satif-ai 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- satif_ai-0.1.1/LICENSE +21 -0
- satif_ai-0.1.1/PKG-INFO +21 -0
- satif_ai-0.1.1/README.md +1 -0
- satif_ai-0.1.1/pyproject.toml +67 -0
- satif_ai-0.1.1/satif_ai/__init__.py +0 -0
- satif_ai-0.1.1/satif_ai/adapters/__init__.py +0 -0
- satif_ai-0.1.1/satif_ai/adapters/tidy.py +462 -0
- satif_ai-0.1.1/satif_ai/code_builders/__init__.py +0 -0
- satif_ai-0.1.1/satif_ai/code_builders/adaptation.py +9 -0
- satif_ai-0.1.1/satif_ai/code_builders/transformation.py +152 -0
- satif_ai-0.1.1/satif_ai/plot_builders/__init__.py +0 -0
- satif_ai-0.1.1/satif_ai/plot_builders/agent.py +204 -0
- satif_ai-0.1.1/satif_ai/plot_builders/prompt.py +92 -0
- satif_ai-0.1.1/satif_ai/plot_builders/tool.py +146 -0
satif_ai-0.1.1/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 SyncPulse, Inc.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
satif_ai-0.1.1/PKG-INFO
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: satif-ai
|
3
|
+
Version: 0.1.1
|
4
|
+
Summary: AI Agents for Satif
|
5
|
+
License: MIT
|
6
|
+
Author: Bryan Djafer
|
7
|
+
Author-email: bryan.djafer@syncpulse.fr
|
8
|
+
Requires-Python: >=3.10,<4.0
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
15
|
+
Requires-Dist: openai-agents (>=0.0.9,<0.0.10)
|
16
|
+
Requires-Dist: satif-sdk (>=0.1.0,<1.0.0)
|
17
|
+
Requires-Dist: sdif-mcp (>=0.1.0,<1.0.0)
|
18
|
+
Description-Content-Type: text/markdown
|
19
|
+
|
20
|
+
# SATIF AI
|
21
|
+
|
satif_ai-0.1.1/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# SATIF AI
|
@@ -0,0 +1,67 @@
|
|
1
|
+
[project]
|
2
|
+
name = "satif-ai"
|
3
|
+
version = "0.1.1"
|
4
|
+
description = "AI Agents for Satif"
|
5
|
+
authors = [
|
6
|
+
{name = "Bryan Djafer", email = "bryan.djafer@syncpulse.fr"}
|
7
|
+
]
|
8
|
+
license = "MIT"
|
9
|
+
readme = "README.md"
|
10
|
+
|
11
|
+
requires-python = ">=3.10,<4.0"
|
12
|
+
|
13
|
+
[tool.poetry.dependencies]
|
14
|
+
openai-agents = ">=0.0.9,<0.0.10"
|
15
|
+
satif-sdk = ">=0.1.0,<1.0.0"
|
16
|
+
sdif-mcp = ">=0.1.0,<1.0.0"
|
17
|
+
|
18
|
+
[build-system]
|
19
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
20
|
+
build-backend = "poetry.core.masonry.api"
|
21
|
+
|
22
|
+
[project.scripts]
|
23
|
+
satif = "satif.cli:main"
|
24
|
+
|
25
|
+
[tool.poetry.group.dev.dependencies]
|
26
|
+
pytest = "^8.3.5"
|
27
|
+
satif-core = {path = "../core", develop = true}
|
28
|
+
satif-sdk = {path = "../sdk", develop = true}
|
29
|
+
sdif-mcp = {path = "../mcp", develop = true}
|
30
|
+
sdif-db = {path = "../sdif", develop = true}
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
[tool.ruff]
|
35
|
+
lint.select = [
|
36
|
+
"E", # pycodestyle
|
37
|
+
"F", # pyflakes
|
38
|
+
"I", # isort
|
39
|
+
"D", # pydocstyle
|
40
|
+
"D401", # First line should be in imperative mood
|
41
|
+
"T201",
|
42
|
+
"UP",
|
43
|
+
]
|
44
|
+
lint.ignore = [
|
45
|
+
"UP006",
|
46
|
+
"UP007",
|
47
|
+
# We actually do want to import from typing_extensions
|
48
|
+
"UP035",
|
49
|
+
# Relax the convention by _not_ requiring documentation for every function parameter.
|
50
|
+
"D417",
|
51
|
+
"E501",
|
52
|
+
"D100",
|
53
|
+
"D103",
|
54
|
+
"D102",
|
55
|
+
"D101",
|
56
|
+
"D104",
|
57
|
+
"D106",
|
58
|
+
"D107",
|
59
|
+
"D205",
|
60
|
+
"D212",
|
61
|
+
"D401",
|
62
|
+
"T201",
|
63
|
+
]
|
64
|
+
[tool.ruff.lint.per-file-ignores]
|
65
|
+
"tests/*" = ["D", "UP"]
|
66
|
+
[tool.ruff.lint.pydocstyle]
|
67
|
+
convention = "google"
|
File without changes
|
File without changes
|
@@ -0,0 +1,462 @@
|
|
1
|
+
import inspect
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import re
|
5
|
+
import shutil
|
6
|
+
import sqlite3
|
7
|
+
import tempfile
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import Optional
|
10
|
+
|
11
|
+
# MCP and Agent imports
|
12
|
+
from agents import Agent, Runner, function_tool
|
13
|
+
from agents.mcp.server import MCPServerStdio
|
14
|
+
from libs.core.satif_core.types import Datasource
|
15
|
+
from mcp import ClientSession
|
16
|
+
|
17
|
+
# SATIF imports
|
18
|
+
from satif_core.adapters.base import Adapter
|
19
|
+
from satif_sdk import SDIFDatabase
|
20
|
+
from satif_sdk.adapters.code import AdapterError, CodeAdapter
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
# --- Tidy Transformation Prompt ---
|
26
|
+
TIDY_TRANSFORMATION_PROMPT = """
|
27
|
+
You are an expert Data Tidying Agent for SDIF databases.
|
28
|
+
Your task is to write Python code to transform tables within a given SDIF database into a 'tidy' format, modifying the database *in place*.
|
29
|
+
|
30
|
+
**Tidy Data Principles:**
|
31
|
+
1. Each variable forms a column.
|
32
|
+
2. Each observation forms a row.
|
33
|
+
3. Each type of observational unit forms a table (you might need to create new tables).
|
34
|
+
|
35
|
+
**Input SDIF Context:**
|
36
|
+
You will be provided with:
|
37
|
+
- The schema of the input SDIF database (`input_schema`).
|
38
|
+
- A sample analysis of the input SDIF database (`input_sample`).
|
39
|
+
|
40
|
+
<input_schema>
|
41
|
+
{input_schema}
|
42
|
+
</input_schema>
|
43
|
+
|
44
|
+
<input_sample>
|
45
|
+
{input_sample}
|
46
|
+
</input_sample>
|
47
|
+
|
48
|
+
**Available `SDIFDatabase` Methods:**
|
49
|
+
Here are the public methods you can call on the `db` object passed to your `adapt_sdif` function:
|
50
|
+
```python
|
51
|
+
{sdif_database_methods}
|
52
|
+
```
|
53
|
+
|
54
|
+
**Your Goal:**
|
55
|
+
Generate Python code for an adaptation function named `adapt_sdif`. This function MUST:
|
56
|
+
- Accept an `SDIFDatabase` object (`db`) which represents the database to be modified.
|
57
|
+
- Perform tidying operations directly on this `db` instance using its available methods (see list above) and potentially pandas for intermediate processing.
|
58
|
+
- Examples of operations:
|
59
|
+
- Read a messy table: `df = db.read_table('messy_table')`
|
60
|
+
- Tidy the DataFrame using pandas (melt, split, etc.).
|
61
|
+
- Write the tidy DataFrame back: `db.write_dataframe(tidy_df, 'tidy_table_name', source_id=1, if_exists='replace')` (obtain a source_id if needed or use a default).
|
62
|
+
- Drop the original messy table if desired: `db.drop_table('messy_table')`
|
63
|
+
- The function should modify the `db` object **in place** and MUST return `None`.
|
64
|
+
|
65
|
+
**Tools Available:**
|
66
|
+
- `execute_sql(query: str)`: Execute a read-only SQL query against the **input** SDIF database to inspect data further before generating adaptation code.
|
67
|
+
- `execute_tidy_adaptation(code: str)`: Execute the Python code string containing your `adapt_sdif` function.
|
68
|
+
- This tool will run your code against a **copy** of the input SDIF.
|
69
|
+
- It will return a **sample analysis** of the **modified SDIF file**.
|
70
|
+
|
71
|
+
**Workflow:**
|
72
|
+
1. **Analyze:** Examine the `input_schema` and `input_sample`. Use `execute_sql` if needed to understand the data structure and identify tables needing tidying.
|
73
|
+
2. **Code:** Write the `adapt` Python code function using `SDIFDatabase` methods and pandas.
|
74
|
+
3. **Execute & Verify:** Use `execute_tidy_adaptation` with your code.
|
75
|
+
4. **Review:** Examine the returned `output_sample_analysis`. Check if the tables in the modified SDIF meet the tidy data principles.
|
76
|
+
5. **Refine:** If the output is not tidy, modify your Python code and repeat step 3.
|
77
|
+
6. **Finalize:** Once the `output_sample_analysis` confirms the data is tidy, respond **only** with the final, validated Python code string for the `adapt_sdif` function enclosed in triple backticks (```python ... ```). Do not include any other text before or after the code block.
|
78
|
+
|
79
|
+
**Example `adapt_sdif` function:**
|
80
|
+
```python
|
81
|
+
import pandas as pd
|
82
|
+
from satif.adapters.code import AdapterError # Import for raising errors if needed
|
83
|
+
from typing import Dict, Any
|
84
|
+
|
85
|
+
# Assume input db has table 'wide_sales' with columns: 'Region', 'Q1_Sales', 'Q2_Sales'
|
86
|
+
|
87
|
+
def adapt_sdif(db: SDIFDatabase) -> None:
|
88
|
+
try:
|
89
|
+
# Get a default source_id or create one if needed
|
90
|
+
sources = db.list_sources()
|
91
|
+
source_id = sources[0]['source_id'] if sources else db.add_source('tidy_adapter', 'script')
|
92
|
+
|
93
|
+
# Read the table to tidy
|
94
|
+
df_sales = db.read_table('wide_sales')
|
95
|
+
|
96
|
+
# Tidy using pandas
|
97
|
+
tidy_sales = pd.melt(df_sales,
|
98
|
+
id_vars=['Region'],
|
99
|
+
value_vars=['Q1_Sales', 'Q2_Sales'],
|
100
|
+
var_name='Quarter',
|
101
|
+
value_name='Sales')
|
102
|
+
tidy_sales['Quarter'] = tidy_sales['Quarter'].str.replace('_Sales', '')
|
103
|
+
|
104
|
+
# Write the tidy table back, replacing the original if desired,
|
105
|
+
# or writing to a new table.
|
106
|
+
# Here, we write to a new table and drop the old one.
|
107
|
+
db.write_dataframe(tidy_sales,
|
108
|
+
'tidy_sales_data',
|
109
|
+
source_id=source_id,
|
110
|
+
if_exists='replace', # Replace if 'tidy_sales_data' already exists
|
111
|
+
description="Tidied sales data")
|
112
|
+
|
113
|
+
# Optionally drop the original table
|
114
|
+
db.drop_table('wide_sales')
|
115
|
+
|
116
|
+
except Exception as e:
|
117
|
+
print(f"Error during tidying: {{e}}") # Log errors
|
118
|
+
# Re-raise the exception to signal failure to the execution framework
|
119
|
+
raise AdapterError(f"Error in adapt_sdif: {{e}}") from e
|
120
|
+
|
121
|
+
# IMPORTANT: Function must return None (can be implicit)
|
122
|
+
return None
|
123
|
+
```
|
124
|
+
|
125
|
+
**Important:**
|
126
|
+
- Your Python code string MUST define the `adapt_sdif(db: SDIFDatabase)` function.
|
127
|
+
- The function MUST return `None`.
|
128
|
+
- Use `db.write_dataframe` with `if_exists='replace'` or `if_exists='append'` or write to new tables and potentially use `db.drop_table` for the old ones.
|
129
|
+
- Handle potential errors during data reading or processing within your function and raise an `AdapterError` or similar to indicate failure.
|
130
|
+
- Ensure pandas and other necessary libraries (like `typing`, `AdapterError`) are imported within the code string if you use them.
|
131
|
+
"""
|
132
|
+
|
133
|
+
# --- Global context for tools ---
|
134
|
+
# These will be set within the TidyAdapter instance when adapt is called
|
135
|
+
TOOL_CONTEXT = {
|
136
|
+
"copied_input_sdif_path": None,
|
137
|
+
"temp_dir": None,
|
138
|
+
"current_output_sdif_path": None, # Path generated by the tool
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
@function_tool
|
143
|
+
async def execute_tidy_adaptation(code: str) -> str:
|
144
|
+
"""
|
145
|
+
Tool implementation for the agent to execute the tidying adaptation code.
|
146
|
+
Runs the code against a *copy* of the input SDIF, creating a *new* output SDIF,
|
147
|
+
and returns a sample analysis of the modified output.
|
148
|
+
"""
|
149
|
+
copied_input_path = TOOL_CONTEXT.get("copied_input_sdif_path")
|
150
|
+
temp_dir = TOOL_CONTEXT.get("temp_dir")
|
151
|
+
|
152
|
+
if not copied_input_path or not copied_input_path.exists():
|
153
|
+
return (
|
154
|
+
"Error: Input SDIF copy not found for transformation. Tool context issue."
|
155
|
+
)
|
156
|
+
if not temp_dir:
|
157
|
+
return "Error: Temporary directory not set up. Tool context issue."
|
158
|
+
|
159
|
+
# Define path for the *output* SDIF generated by *this tool execution*
|
160
|
+
# This path is temporary just for this tool's run
|
161
|
+
tool_output_sdif_path = temp_dir / "tidy_adaptation_output.sdif"
|
162
|
+
# Update context for potential internal use, though CodeAdapter calculates its own path
|
163
|
+
TOOL_CONTEXT["current_output_sdif_path"] = tool_output_sdif_path
|
164
|
+
|
165
|
+
logger.info(
|
166
|
+
f"Executing adaptation code via tool. Output will be: {tool_output_sdif_path}"
|
167
|
+
)
|
168
|
+
|
169
|
+
try:
|
170
|
+
# 1. Instantiate CodeAdapter with the provided code
|
171
|
+
# It will operate on a *copy* specified by copied_input_path
|
172
|
+
# and write to a *new* file (_adapted suffix by default).
|
173
|
+
adapter = CodeAdapter(
|
174
|
+
function=code,
|
175
|
+
function_name="adapt_sdif", # As specified in prompt
|
176
|
+
output_suffix="_adapted_tool_run", # Give tool runs a distinct suffix
|
177
|
+
)
|
178
|
+
# Run the adaptation. It copies `copied_input_path` and modifies the copy.
|
179
|
+
# The returned path is the newly created, adapted file.
|
180
|
+
adapted_sdif_path = adapter.adapt(copied_input_path)
|
181
|
+
|
182
|
+
# 2. Get sample analysis of the *adapted* SDIF file
|
183
|
+
with SDIFDatabase(adapted_sdif_path, read_only=True) as adapted_db_read:
|
184
|
+
analysis = adapted_db_read.get_sample_analysis(
|
185
|
+
num_sample_rows=5, top_n_common_values=5
|
186
|
+
)
|
187
|
+
# Store the path generated by the tool for reference if needed elsewhere
|
188
|
+
TOOL_CONTEXT["current_output_sdif_path"] = adapted_sdif_path
|
189
|
+
return json.dumps({"output_sample_analysis": analysis})
|
190
|
+
|
191
|
+
except AdapterError as e:
|
192
|
+
logger.error(f"Error during adaptation code execution via tool: {e}")
|
193
|
+
# Clean up the failed output file if it exists
|
194
|
+
if (
|
195
|
+
TOOL_CONTEXT["current_output_sdif_path"]
|
196
|
+
and TOOL_CONTEXT["current_output_sdif_path"].exists()
|
197
|
+
):
|
198
|
+
try:
|
199
|
+
TOOL_CONTEXT["current_output_sdif_path"].unlink()
|
200
|
+
except OSError:
|
201
|
+
pass
|
202
|
+
return f"Error: Adaptation code failed: {e}"
|
203
|
+
except (sqlite3.Error, ValueError, TypeError, FileNotFoundError) as e:
|
204
|
+
logger.error(f"Error analyzing adapted SDIF or file issue: {e}")
|
205
|
+
if (
|
206
|
+
TOOL_CONTEXT["current_output_sdif_path"]
|
207
|
+
and TOOL_CONTEXT["current_output_sdif_path"].exists()
|
208
|
+
):
|
209
|
+
try:
|
210
|
+
TOOL_CONTEXT["current_output_sdif_path"].unlink()
|
211
|
+
except OSError:
|
212
|
+
pass
|
213
|
+
return f"Error: Failed to analyze adapted SDIF: {e}"
|
214
|
+
except Exception as e:
|
215
|
+
logger.exception("Unexpected error in execute_tidy_adaptation tool")
|
216
|
+
if (
|
217
|
+
TOOL_CONTEXT["current_output_sdif_path"]
|
218
|
+
and TOOL_CONTEXT["current_output_sdif_path"].exists()
|
219
|
+
):
|
220
|
+
try:
|
221
|
+
TOOL_CONTEXT["current_output_sdif_path"].unlink()
|
222
|
+
except OSError:
|
223
|
+
pass
|
224
|
+
return f"Unexpected Error: {e}"
|
225
|
+
|
226
|
+
|
227
|
+
class TidyAdapter(Adapter):
|
228
|
+
"""
|
229
|
+
Uses an AI agent (via agents library) to generate and execute Python code
|
230
|
+
that transforms tables in an SDIF file into a tidy format using CodeAdapter.
|
231
|
+
"""
|
232
|
+
|
233
|
+
def __init__(
|
234
|
+
self,
|
235
|
+
mcp_server: MCPServerStdio, # Use the server instance
|
236
|
+
mcp_session: ClientSession, # Use the client session
|
237
|
+
llm_model: str = "o4-mini", # Specify the LLM model
|
238
|
+
max_iterations: int = 5,
|
239
|
+
):
|
240
|
+
"""
|
241
|
+
Initialize the TidyAdapter.
|
242
|
+
|
243
|
+
Args:
|
244
|
+
mcp_server: An instance of MCPServerStdio for agent communication.
|
245
|
+
mcp_session: An instance of ClientSession for resource/prompt fetching.
|
246
|
+
llm_model: Name of the language model to use for the agent.
|
247
|
+
max_iterations: Maximum number of attempts the agent gets to refine the code.
|
248
|
+
"""
|
249
|
+
self.mcp_server = mcp_server
|
250
|
+
self.mcp_session = mcp_session
|
251
|
+
self.llm_model = llm_model
|
252
|
+
self.max_iterations = max_iterations # Not directly used by Runner, but good for context/potential future use
|
253
|
+
|
254
|
+
# Temporary environment management (could be context managed)
|
255
|
+
self._temp_dir: Optional[Path] = None
|
256
|
+
self._copied_input_sdif_path: Optional[Path] = None
|
257
|
+
|
258
|
+
def _get_sdif_methods(self) -> str:
|
259
|
+
"""Introspects SDIFDatabase and returns formatted public method signatures."""
|
260
|
+
signatures = []
|
261
|
+
# Exclude known internal/base methods explicitly
|
262
|
+
exclude_methods = {
|
263
|
+
"__init__",
|
264
|
+
"__enter__",
|
265
|
+
"__exit__",
|
266
|
+
"__del__",
|
267
|
+
"_validate_connection",
|
268
|
+
"_create_metadata_tables",
|
269
|
+
}
|
270
|
+
try:
|
271
|
+
# Iterate through members of the class
|
272
|
+
for name, member in inspect.getmembers(SDIFDatabase):
|
273
|
+
# Check if it's a function/method and not excluded/private
|
274
|
+
if (
|
275
|
+
inspect.isfunction(member)
|
276
|
+
and not name.startswith("_")
|
277
|
+
and name not in exclude_methods
|
278
|
+
):
|
279
|
+
try:
|
280
|
+
sig = inspect.signature(member)
|
281
|
+
# Format the signature string
|
282
|
+
sig_str = f"db.{name}{sig}"
|
283
|
+
signatures.append(sig_str)
|
284
|
+
except (ValueError, TypeError) as e:
|
285
|
+
# Handle methods that might not have clear signatures (e.g., built-ins if any slip through)
|
286
|
+
logger.debug(f"Could not get signature for method {name}: {e}")
|
287
|
+
signatures.append(f"db.{name}(...) # Signature unavailable")
|
288
|
+
|
289
|
+
return "\n".join(sorted(signatures))
|
290
|
+
except Exception as e:
|
291
|
+
logger.error(f"Failed to introspect SDIFDatabase methods: {e}")
|
292
|
+
return "# Failed to retrieve method signatures."
|
293
|
+
|
294
|
+
def _setup_temp_env(self, input_sdif_path: Path) -> Path:
|
295
|
+
"""Creates a temporary directory and copies the input SDIF."""
|
296
|
+
self._temp_dir = Path(tempfile.mkdtemp(prefix="satif_tidy_adapter_"))
|
297
|
+
self._copied_input_sdif_path = (
|
298
|
+
self._temp_dir / f"input_copy_{input_sdif_path.name}"
|
299
|
+
)
|
300
|
+
shutil.copy(input_sdif_path, self._copied_input_sdif_path)
|
301
|
+
logger.info(
|
302
|
+
f"Copied input SDIF to temporary location: {self._copied_input_sdif_path}"
|
303
|
+
)
|
304
|
+
# Set global tool context
|
305
|
+
TOOL_CONTEXT["copied_input_sdif_path"] = self._copied_input_sdif_path
|
306
|
+
TOOL_CONTEXT["temp_dir"] = self._temp_dir
|
307
|
+
TOOL_CONTEXT["current_output_sdif_path"] = None # Reset output path context
|
308
|
+
return self._copied_input_sdif_path
|
309
|
+
|
310
|
+
def _cleanup_temp_env(self):
|
311
|
+
"""Removes the temporary directory."""
|
312
|
+
if self._temp_dir and self._temp_dir.exists():
|
313
|
+
try:
|
314
|
+
shutil.rmtree(self._temp_dir)
|
315
|
+
logger.info(f"Cleaned up temporary directory: {self._temp_dir}")
|
316
|
+
except Exception as e:
|
317
|
+
logger.error(
|
318
|
+
f"Error cleaning up temporary directory {self._temp_dir}: {e}"
|
319
|
+
)
|
320
|
+
# Clear global tool context
|
321
|
+
TOOL_CONTEXT["copied_input_sdif_path"] = None
|
322
|
+
TOOL_CONTEXT["temp_dir"] = None
|
323
|
+
TOOL_CONTEXT["current_output_sdif_path"] = None
|
324
|
+
self._temp_dir = None
|
325
|
+
self._copied_input_sdif_path = None
|
326
|
+
|
327
|
+
def parse_code(self, code_text: str) -> Optional[str]:
|
328
|
+
"""Extracts Python code from markdown code blocks."""
|
329
|
+
match = re.search(r"```(?:python)?(.*?)```", code_text, re.DOTALL)
|
330
|
+
if match:
|
331
|
+
return match.group(1).strip()
|
332
|
+
else:
|
333
|
+
# If no markdown block, assume the whole text might be code (less reliable)
|
334
|
+
# Check for keywords common in the expected code
|
335
|
+
if "def adapt_sdif(" in code_text and "SDIFDatabase" in code_text:
|
336
|
+
logger.warning(
|
337
|
+
"No markdown code block found, attempting to use entire response as code."
|
338
|
+
)
|
339
|
+
return code_text.strip()
|
340
|
+
return None # Indicate no valid code found
|
341
|
+
|
342
|
+
async def adapt(self, sdif_database: SDIFDatabase) -> Datasource:
|
343
|
+
"""
|
344
|
+
Transforms the data in the input SDIF to be tidy using an AI agent.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
sdif_database: The input SDIF database instance. Connection will be closed.
|
348
|
+
|
349
|
+
Returns:
|
350
|
+
Path to the new SDIF file containing the tidied data.
|
351
|
+
|
352
|
+
Raises:
|
353
|
+
FileNotFoundError: If the input SDIF path doesn't exist.
|
354
|
+
RuntimeError: If the agent fails to produce valid tidy code.
|
355
|
+
Exception: For unexpected errors during the process.
|
356
|
+
"""
|
357
|
+
input_path = Path(sdif_database.path)
|
358
|
+
if not input_path.exists():
|
359
|
+
raise FileNotFoundError(f"Input SDIF file not found: {input_path}")
|
360
|
+
|
361
|
+
# Ensure the input DB connection is closed before copying
|
362
|
+
try:
|
363
|
+
sdif_database.close()
|
364
|
+
except Exception:
|
365
|
+
pass
|
366
|
+
|
367
|
+
copied_input_path = self._setup_temp_env(input_path)
|
368
|
+
|
369
|
+
try:
|
370
|
+
# Get Initial Context using SDIFDatabase methods directly
|
371
|
+
with SDIFDatabase(copied_input_path, read_only=True) as db:
|
372
|
+
input_schema_dict = db.get_schema()
|
373
|
+
input_sample_dict = db.get_sample_analysis()
|
374
|
+
|
375
|
+
# Get SDIFDatabase method signatures
|
376
|
+
sdif_methods_str = self._get_sdif_methods()
|
377
|
+
|
378
|
+
# Prepare context for the prompt
|
379
|
+
initial_context = {
|
380
|
+
"input_schema": json.dumps(input_schema_dict, indent=2),
|
381
|
+
"input_sample": json.dumps(input_sample_dict, indent=2),
|
382
|
+
"sdif_database_methods": sdif_methods_str,
|
383
|
+
}
|
384
|
+
|
385
|
+
# Instantiate the Agent
|
386
|
+
agent = Agent(
|
387
|
+
name="Tidy SDIF Adapter Agent",
|
388
|
+
mcp_servers=[self.mcp_server],
|
389
|
+
tools=[execute_tidy_adaptation], # Use the decorated tools
|
390
|
+
model=self.llm_model,
|
391
|
+
)
|
392
|
+
|
393
|
+
# Run the agent using the Runner
|
394
|
+
# Pass the prompt and initial context
|
395
|
+
logger.info(f"Running Tidy Agent with model {self.llm_model}...")
|
396
|
+
result = await Runner.run(
|
397
|
+
agent,
|
398
|
+
input=TIDY_TRANSFORMATION_PROMPT.format(
|
399
|
+
input_schema=initial_context["input_schema"],
|
400
|
+
input_sample=initial_context["input_sample"],
|
401
|
+
sdif_database_methods=initial_context["sdif_database_methods"],
|
402
|
+
),
|
403
|
+
)
|
404
|
+
|
405
|
+
if not result or not result.final_output:
|
406
|
+
raise RuntimeError("Agent execution failed or returned no output.")
|
407
|
+
|
408
|
+
logger.info(
|
409
|
+
f"Agent finished. Final output message:\n{result.final_output[:500]}..."
|
410
|
+
)
|
411
|
+
|
412
|
+
# Parse the final code from the agent's response
|
413
|
+
final_code = self.parse_code(result.final_output)
|
414
|
+
|
415
|
+
if not final_code:
|
416
|
+
raise RuntimeError(
|
417
|
+
f"Agent failed to produce valid final Python code in its response."
|
418
|
+
f" Full response:\n{result.final_output}"
|
419
|
+
)
|
420
|
+
|
421
|
+
logger.info(
|
422
|
+
"Successfully parsed final adaptation code from agent response."
|
423
|
+
)
|
424
|
+
# print(f"--- Final Code ---\n{final_code}\n------------------") # Debugging
|
425
|
+
|
426
|
+
# Execute the *final* code using CodeAdapter directly to create the definitive output
|
427
|
+
logger.info("Executing final adaptation code...")
|
428
|
+
final_adapter = CodeAdapter(
|
429
|
+
function=final_code,
|
430
|
+
function_name="adapt_sdif",
|
431
|
+
output_suffix="_tidy_final", # Use a distinct suffix for the final output
|
432
|
+
)
|
433
|
+
# Adapt the *original* copied input path
|
434
|
+
final_adapted_path = final_adapter.adapt(copied_input_path)
|
435
|
+
|
436
|
+
# Move the final successful output SDIF to a persistent location
|
437
|
+
# Example: place it next to the original input file
|
438
|
+
persistent_output_path = (
|
439
|
+
input_path.parent / final_adapted_path.name
|
440
|
+
).resolve()
|
441
|
+
if persistent_output_path.exists():
|
442
|
+
logger.warning(
|
443
|
+
f"Overwriting existing file at final destination: {persistent_output_path}"
|
444
|
+
)
|
445
|
+
persistent_output_path.unlink()
|
446
|
+
|
447
|
+
shutil.move(
|
448
|
+
str(final_adapted_path), persistent_output_path
|
449
|
+
) # Move needs strings sometimes
|
450
|
+
logger.info(
|
451
|
+
f"Successfully generated final tidy SDIF: {persistent_output_path}"
|
452
|
+
)
|
453
|
+
|
454
|
+
return persistent_output_path
|
455
|
+
|
456
|
+
except Exception as e:
|
457
|
+
logger.exception(f"Error during TidyAdapter adapt process: {e}")
|
458
|
+
# Re-raise or handle as appropriate
|
459
|
+
raise
|
460
|
+
finally:
|
461
|
+
# Always clean up temporary files
|
462
|
+
self._cleanup_temp_env()
|
File without changes
|
@@ -0,0 +1,152 @@
|
|
1
|
+
import os
|
2
|
+
import re
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Dict, List, Optional, Union
|
5
|
+
|
6
|
+
from agents import Agent, Runner, function_tool
|
7
|
+
from agents.mcp.server import MCPServerStdio
|
8
|
+
from mcp import ClientSession
|
9
|
+
from satif_core import AsyncCodeBuilder, CodeBuilder, SDIFDatabase
|
10
|
+
from satif_sdk.comparators import get_comparator
|
11
|
+
from satif_sdk.representers import get_representer
|
12
|
+
from satif_sdk.transformers import CodeTransformer
|
13
|
+
|
14
|
+
# Global variables for transformation
|
15
|
+
INPUT_SDIF_PATH: Optional[Path] = None
|
16
|
+
OUTPUT_TARGET_FILES: Optional[Dict[Union[str, Path], str]] = None
|
17
|
+
|
18
|
+
|
19
|
+
@function_tool
|
20
|
+
async def execute_transformation(code: str) -> str:
|
21
|
+
"""Executes the transformation code on the input and returns the
|
22
|
+
comparison difference between the transformed output and the target output example.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
code: The code to execute on the input.
|
26
|
+
"""
|
27
|
+
if INPUT_SDIF_PATH is None or OUTPUT_TARGET_FILES is None:
|
28
|
+
return "Error: Transformation context not initialized"
|
29
|
+
|
30
|
+
code_transformer = CodeTransformer(function=code)
|
31
|
+
generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
|
32
|
+
|
33
|
+
comparisons = []
|
34
|
+
|
35
|
+
if os.path.isdir(generated_output_path):
|
36
|
+
# If it's a directory, compare each file with its corresponding target
|
37
|
+
generated_files = os.listdir(generated_output_path)
|
38
|
+
|
39
|
+
for (
|
40
|
+
output_base_file,
|
41
|
+
output_target_file_name,
|
42
|
+
) in OUTPUT_TARGET_FILES.items():
|
43
|
+
if output_target_file_name in generated_files:
|
44
|
+
generated_file_path = os.path.join(
|
45
|
+
generated_output_path, output_target_file_name
|
46
|
+
)
|
47
|
+
comparator = get_comparator(output_target_file_name.split(".")[-1])
|
48
|
+
comparison = comparator.compare(generated_file_path, output_base_file)
|
49
|
+
comparisons.append(
|
50
|
+
f"Comparison for {generated_file_path} [SOURCE] with {output_target_file_name} [TARGET]: {comparison}"
|
51
|
+
)
|
52
|
+
else:
|
53
|
+
comparisons.append(
|
54
|
+
f"Error: {output_target_file_name} not found in the generated output"
|
55
|
+
)
|
56
|
+
else:
|
57
|
+
# If it's a single file, ensure there's only one target and compare
|
58
|
+
if len(OUTPUT_TARGET_FILES) == 1:
|
59
|
+
output_file = list(OUTPUT_TARGET_FILES.keys())[0]
|
60
|
+
output_target_file_name = list(OUTPUT_TARGET_FILES.values())[0]
|
61
|
+
comparator = get_comparator(output_file.split(".")[-1])
|
62
|
+
comparison = comparator.compare(generated_output_path, output_file)
|
63
|
+
comparisons.append(
|
64
|
+
f"Comparison for {generated_output_path} [SOURCE] with {output_target_file_name} [TARGET]: {comparison}"
|
65
|
+
)
|
66
|
+
else:
|
67
|
+
comparisons.append(
|
68
|
+
"Error: Single output file generated but multiple target files expected"
|
69
|
+
)
|
70
|
+
|
71
|
+
return "\n".join(comparisons)
|
72
|
+
|
73
|
+
|
74
|
+
class TransformationCodeBuilder(CodeBuilder):
|
75
|
+
def __init__(self, output_example: Path | List[Path] | Dict[str, Path]):
|
76
|
+
self.output_example = output_example
|
77
|
+
|
78
|
+
def build(
|
79
|
+
self,
|
80
|
+
sdif: Path | SDIFDatabase,
|
81
|
+
instructions: Optional[str] = None,
|
82
|
+
) -> str:
|
83
|
+
pass
|
84
|
+
|
85
|
+
|
86
|
+
class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
|
87
|
+
"""This class is used to build a transformation code that will be used to transform a SDIF database into a set of files following the format of the given output files."""
|
88
|
+
|
89
|
+
def __init__(
|
90
|
+
self,
|
91
|
+
mcp_server: MCPServerStdio,
|
92
|
+
mcp_session: ClientSession,
|
93
|
+
llm_model: str = "o3-mini",
|
94
|
+
):
|
95
|
+
self.mcp_server = mcp_server
|
96
|
+
self.mcp_session = mcp_session
|
97
|
+
self.llm_model = llm_model
|
98
|
+
|
99
|
+
async def build(
|
100
|
+
self,
|
101
|
+
sdif: Path,
|
102
|
+
output_target_files: Dict[Union[str, Path], str] | List[Path],
|
103
|
+
output_sdif: Optional[Path] = None,
|
104
|
+
instructions: Optional[str] = None,
|
105
|
+
) -> str:
|
106
|
+
global INPUT_SDIF_PATH, OUTPUT_TARGET_FILES
|
107
|
+
INPUT_SDIF_PATH = Path(sdif)
|
108
|
+
|
109
|
+
if isinstance(output_target_files, list):
|
110
|
+
OUTPUT_TARGET_FILES = {file: file.name for file in output_target_files}
|
111
|
+
else:
|
112
|
+
OUTPUT_TARGET_FILES = output_target_files
|
113
|
+
|
114
|
+
input_schema = await self.mcp_session.read_resource(f"schema://{sdif}")
|
115
|
+
input_sample = await self.mcp_session.read_resource(f"sample://{sdif}")
|
116
|
+
|
117
|
+
output_schema = await self.mcp_session.read_resource(f"schema://{output_sdif}")
|
118
|
+
output_sample = await self.mcp_session.read_resource(f"sample://{output_sdif}")
|
119
|
+
output_representation = {
|
120
|
+
file: get_representer(file).represent(file)
|
121
|
+
for file in list(OUTPUT_TARGET_FILES.keys())
|
122
|
+
}
|
123
|
+
|
124
|
+
prompt = await self.mcp_session.get_prompt(
|
125
|
+
"create_transformation",
|
126
|
+
arguments={
|
127
|
+
"input_file": INPUT_SDIF_PATH.name,
|
128
|
+
"input_schema": input_schema.contents[0].text,
|
129
|
+
"input_sample": input_sample.contents[0].text,
|
130
|
+
"output_files": str(list(OUTPUT_TARGET_FILES.values())),
|
131
|
+
"output_schema": output_schema.contents[0].text,
|
132
|
+
"output_sample": output_sample.contents[0].text,
|
133
|
+
"output_representation": str(output_representation),
|
134
|
+
},
|
135
|
+
)
|
136
|
+
agent = Agent(
|
137
|
+
name="Transformation Builder",
|
138
|
+
mcp_servers=[self.mcp_server],
|
139
|
+
tools=[execute_transformation],
|
140
|
+
model=self.llm_model,
|
141
|
+
)
|
142
|
+
result = await Runner.run(agent, prompt.messages[0].content.text)
|
143
|
+
transformation_code = self.parse_code(result.final_output)
|
144
|
+
return transformation_code
|
145
|
+
|
146
|
+
def parse_code(self, code) -> str:
|
147
|
+
match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
|
148
|
+
if match:
|
149
|
+
return match.group(1).strip()
|
150
|
+
else:
|
151
|
+
# Handle case where no code block is found
|
152
|
+
return code.strip()
|
File without changes
|
@@ -0,0 +1,204 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Optional, Union
|
5
|
+
|
6
|
+
from agents import Agent, Runner
|
7
|
+
from agents.mcp import MCPServerStdio
|
8
|
+
from mcp import ClientSession
|
9
|
+
|
10
|
+
from satif_ai.plot_builders.prompt import PLOTTING_AGENT_PROMPT
|
11
|
+
from satif_ai.plot_builders.tool import PLOTTING_TOOL_CONTEXT, execute_plotting_code
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class PlottingAgent:
|
17
|
+
"""Agent that generates Plotly plots from SDIF data based on user instructions."""
|
18
|
+
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
mcp_server: MCPServerStdio,
|
22
|
+
mcp_session: ClientSession,
|
23
|
+
llm_model: str = "o4-mini",
|
24
|
+
):
|
25
|
+
self.mcp_server = mcp_server
|
26
|
+
self.mcp_session = mcp_session
|
27
|
+
self.llm_model = llm_model
|
28
|
+
|
29
|
+
def _parse_final_path(self, final_text: str) -> Optional[Path]:
|
30
|
+
"""Extracts the path from the success message."""
|
31
|
+
# Regex to find the path after "Success: Plot saved to "
|
32
|
+
match = re.search(r"Success: Plot saved to (.*)", final_text)
|
33
|
+
if match:
|
34
|
+
path_str = match.group(1).strip()
|
35
|
+
try:
|
36
|
+
p = Path(path_str)
|
37
|
+
# Check if it seems plausible (e.g., ends with .html and absolute)
|
38
|
+
# Check for existence here is important
|
39
|
+
if p.is_absolute() and p.name.endswith(".html") and p.exists():
|
40
|
+
return p
|
41
|
+
elif (
|
42
|
+
p.exists()
|
43
|
+
): # Accept relative path if it exists (less ideal but maybe happens)
|
44
|
+
logger.warning(
|
45
|
+
f"Parsed path {p} is not absolute but exists. Accepting."
|
46
|
+
)
|
47
|
+
return p.resolve() # Return resolved absolute path
|
48
|
+
except Exception as e:
|
49
|
+
logger.warning(f"Error validating parsed path '{path_str}': {e}")
|
50
|
+
pass
|
51
|
+
# Fallback checks remain the same
|
52
|
+
if "plot.html" in final_text:
|
53
|
+
potential_path_str = final_text.strip()
|
54
|
+
# Try to extract if it's just the path
|
55
|
+
if Path(potential_path_str).name == "plot.html":
|
56
|
+
try:
|
57
|
+
potential_path = Path(
|
58
|
+
potential_path_str
|
59
|
+
).resolve() # Resolve relative paths
|
60
|
+
if potential_path.exists():
|
61
|
+
logger.warning(
|
62
|
+
"Agent returned path directly instead of success message."
|
63
|
+
)
|
64
|
+
return potential_path
|
65
|
+
except Exception:
|
66
|
+
pass
|
67
|
+
|
68
|
+
return None
|
69
|
+
|
70
|
+
async def generate_plot(
|
71
|
+
self, sdif_path: Union[str, Path], instructions: str
|
72
|
+
) -> Optional[Path]:
|
73
|
+
"""
|
74
|
+
Generates a Plotly plot HTML file based on instructions and SDIF data.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
sdif_path: Path to the input SDIF database file.
|
78
|
+
instructions: Natural language instructions for the plot.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
Path to the generated HTML plot file, or None if generation failed.
|
82
|
+
|
83
|
+
Raises:
|
84
|
+
FileNotFoundError: If the input SDIF file does not exist.
|
85
|
+
RuntimeError: If agent execution fails or context cannot be fetched or plot fails.
|
86
|
+
Exception: For other unexpected errors.
|
87
|
+
"""
|
88
|
+
input_path = sdif_path
|
89
|
+
# Set tool context
|
90
|
+
PLOTTING_TOOL_CONTEXT["input_sdif_path"] = input_path
|
91
|
+
PLOTTING_TOOL_CONTEXT["user_instructions"] = instructions
|
92
|
+
PLOTTING_TOOL_CONTEXT["output_plot_path"] = None
|
93
|
+
|
94
|
+
agent_final_output_text = (
|
95
|
+
"Agent did not produce final output." # Default message
|
96
|
+
)
|
97
|
+
|
98
|
+
try:
|
99
|
+
# Get Initial Context from MCP Resources
|
100
|
+
logger.info(
|
101
|
+
f"Fetching schema and sample for {input_path}..."
|
102
|
+
) # Changed level to INFO
|
103
|
+
input_schema_str = "Error: Could not get schema."
|
104
|
+
input_sample_str = "Error: Could not get sample."
|
105
|
+
try:
|
106
|
+
input_path_str = str(input_path)
|
107
|
+
schema_uri = f"schema://{input_path_str}"
|
108
|
+
sample_uri = f"sample://{input_path_str}"
|
109
|
+
logger.debug(f"Requesting schema URI: {schema_uri}")
|
110
|
+
logger.debug(f"Requesting sample URI: {sample_uri}")
|
111
|
+
|
112
|
+
input_schema_resource = await self.mcp_session.read_resource(schema_uri)
|
113
|
+
input_sample_resource = await self.mcp_session.read_resource(sample_uri)
|
114
|
+
|
115
|
+
input_schema_str = (
|
116
|
+
input_schema_resource.contents[0].text
|
117
|
+
if input_schema_resource.contents
|
118
|
+
else "Error: Could not get schema (empty response)."
|
119
|
+
)
|
120
|
+
input_sample_str = (
|
121
|
+
input_sample_resource.contents[0].text
|
122
|
+
if input_sample_resource.contents
|
123
|
+
else "Error: Could not get sample (empty response)."
|
124
|
+
)
|
125
|
+
|
126
|
+
except Exception as mcp_err:
|
127
|
+
logger.error(f"Failed to get schema/sample via MCP: {mcp_err}")
|
128
|
+
raise RuntimeError(
|
129
|
+
f"Failed to get required context via MCP: {mcp_err}"
|
130
|
+
) from mcp_err
|
131
|
+
|
132
|
+
# Format the prompt
|
133
|
+
formatted_prompt = PLOTTING_AGENT_PROMPT.format(
|
134
|
+
input_sdif_path=str(input_path),
|
135
|
+
input_schema=input_schema_str,
|
136
|
+
input_sample=input_sample_str,
|
137
|
+
user_instructions=instructions,
|
138
|
+
)
|
139
|
+
|
140
|
+
# Instantiate the Agent
|
141
|
+
agent = Agent(
|
142
|
+
name="Plotting Agent",
|
143
|
+
mcp_servers=[self.mcp_server],
|
144
|
+
tools=[execute_plotting_code],
|
145
|
+
model=self.llm_model,
|
146
|
+
)
|
147
|
+
|
148
|
+
# Run the agent
|
149
|
+
logger.info(f"Running Plotting Agent with model {self.llm_model}...")
|
150
|
+
result = await Runner.run(
|
151
|
+
agent,
|
152
|
+
input=formatted_prompt,
|
153
|
+
)
|
154
|
+
|
155
|
+
if not result or not result.final_output:
|
156
|
+
raise RuntimeError(
|
157
|
+
"Plotting agent execution failed or returned no output."
|
158
|
+
)
|
159
|
+
|
160
|
+
agent_final_output_text = (
|
161
|
+
result.final_output
|
162
|
+
) # Store for potential error message
|
163
|
+
logger.info(
|
164
|
+
f"Plotting Agent finished. Final output:\n{agent_final_output_text}"
|
165
|
+
)
|
166
|
+
|
167
|
+
# Attempt to parse the path from the agent's final confirmation
|
168
|
+
final_plot_path = self._parse_final_path(agent_final_output_text)
|
169
|
+
|
170
|
+
if final_plot_path: # Path found and exists
|
171
|
+
logger.info(
|
172
|
+
f"Successfully confirmed plot generation at: {final_plot_path}"
|
173
|
+
)
|
174
|
+
return final_plot_path
|
175
|
+
else:
|
176
|
+
final_plot_path_from_context = PLOTTING_TOOL_CONTEXT.get(
|
177
|
+
"output_plot_path"
|
178
|
+
)
|
179
|
+
if (
|
180
|
+
final_plot_path_from_context
|
181
|
+
and final_plot_path_from_context.exists()
|
182
|
+
):
|
183
|
+
logger.warning(
|
184
|
+
"Parsed path from final output failed, but tool context has valid path."
|
185
|
+
)
|
186
|
+
return final_plot_path_from_context
|
187
|
+
else:
|
188
|
+
logger.error(
|
189
|
+
"Agent finished, but could not confirm successful plot generation or find output file."
|
190
|
+
)
|
191
|
+
# Include agent output in error for debugging
|
192
|
+
raise RuntimeError(
|
193
|
+
f"Agent finished, but plot generation failed or output path couldn't be determined. Agent final output: '{agent_final_output_text}'"
|
194
|
+
) # Modified Error
|
195
|
+
|
196
|
+
except Exception as e:
|
197
|
+
logger.exception(f"Error during PlottingAgent generate_plot: {e}")
|
198
|
+
raise # Re-raise other exceptions
|
199
|
+
finally:
|
200
|
+
# Robust context cleanup using pop
|
201
|
+
PLOTTING_TOOL_CONTEXT.pop("input_sdif_path", None)
|
202
|
+
PLOTTING_TOOL_CONTEXT.pop("user_instructions", None)
|
203
|
+
PLOTTING_TOOL_CONTEXT.pop("output_plot_path", None)
|
204
|
+
logger.debug("Cleared plotting tool context.")
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# satif/plot_builders/prompt.py
|
2
|
+
|
3
|
+
PLOTTING_AGENT_PROMPT = """
|
4
|
+
You are an expert Data Visualization Agent specialized in creating insightful and interactive plots using Plotly from data stored in SDIF (SQLite) databases. You are autonomous and **must not ask clarifying questions**.
|
5
|
+
|
6
|
+
**Goal:** Generate Python **script code** to create a Plotly visualization based on user instructions and data within the provided SDIF file. **Critically analyze the data (schema, sample) and instructions to infer the user's likely analytical goal. Prepare and transform the data as needed (e.g., cleaning types, handling missing values appropriately for the plot, calculating new fields), choose the most appropriate chart type (e.g., line for trends, bar for comparisons, scatter for correlations, histogram for distributions) and apply necessary data transformations (grouping, aggregation, pivoting) to best represent the data and answer the implied question in the instructions.** Use standard visualization best practices. Your objective is to produce an effective plot, not engage in conversation.
|
7
|
+
|
8
|
+
**Execution Context:**
|
9
|
+
Your code will be executed in an environment where the following variables are **already defined**:
|
10
|
+
- `db`: An instance of `SDIFDatabase`, connected in read-only mode to the input SDIF file (`{input_sdif_path}`).
|
11
|
+
- `instructions`: A string containing the user's request (`{user_instructions}`).
|
12
|
+
|
13
|
+
**Input SDIF Context:**
|
14
|
+
You have access to the following information about the input SDIF database (accessible via the `db` object):
|
15
|
+
|
16
|
+
<input_schema>
|
17
|
+
{input_schema}
|
18
|
+
</input_schema>
|
19
|
+
|
20
|
+
<input_sample>
|
21
|
+
{input_sample}
|
22
|
+
</input_sample>
|
23
|
+
|
24
|
+
**Available Tools:**
|
25
|
+
1. `execute_sql(query: str) -> str`: Execute a read-only SQL query against the **input** SDIF database (using the available `db` object, e.g., `db.query(...)`) to inspect data further *before* writing your main plotting code. Use this only if absolutely necessary to confirm data characteristics crucial for choosing the **correct** plot type or transformation (e.g., checking cardinality for grouping, range for binning).
|
26
|
+
2. `execute_plotting_code(code: str) -> str`: Executes the Python **script code** you generate. Your script **MUST** use the pre-defined `db` and `instructions` variables, generate a Plotly figure, and **save it to an HTML file** named `plot.html` in the current directory (e.g., `fig.write_html('plot.html')`). This tool will return the absolute path to the generated 'plot.html' on success, or an error message on failure.
|
27
|
+
|
28
|
+
**Workflow:**
|
29
|
+
1. **Analyze & Infer & Select:** Carefully review the user instructions, input schema, and sample data. **Infer the analytical goal. Based on the data types, cardinality, and instructions, determine the necessary data preparation steps (cleaning, type conversion, handling missing values suitable for the plot), select the *most appropriate* Plotly chart type, and identify required data aggregations (e.g., sum, mean, count) or transformations (e.g., grouping, calculating percentages, date extraction) needed to create an insightful visualization.** Do not ask for clarification.
|
30
|
+
2. **Explore (Minimal Use):** Only use `execute_sql` if essential for confirming data properties needed for your chosen preparation/chart/transformation strategy.
|
31
|
+
3. **Code Generation:** Write Python **script code** (NOT a function definition) that:
|
32
|
+
* Imports necessary libraries (`pandas as pd`, `plotly.express as px` or `plotly.graph_objects as go`).
|
33
|
+
* Uses the pre-defined `db` object to read the relevant data.
|
34
|
+
* Uses the `instructions` string variable if helpful for parameterizing the plot (e.g., titles).
|
35
|
+
* **Performs the necessary data preparation (cleaning, type conversion, handling NaNs/nulls appropriately) and transformations/aggregations identified in step 1 using pandas.**
|
36
|
+
* Creates the Plotly figure using the **chosen appropriate chart type** and the prepared/transformed/aggregated data. Make axes labels clear and add an informative title.
|
37
|
+
* **Crucially:** Saves the figure using `fig.write_html('plot.html')`.
|
38
|
+
4. **Execute:** Call the `execute_plotting_code` tool with your generated Python script code string. **You must call this tool.**
|
39
|
+
5. **Finalize:**
|
40
|
+
* **If `execute_plotting_code` returns a success message:** Respond **only** with the success message provided by the tool (e.g., "Success: Plot saved to /path/to/plot.html").
|
41
|
+
* **If `execute_plotting_code` returns an error message:** Respond **only** with the error message provided by the tool.
|
42
|
+
|
43
|
+
**Example Script Code (Illustrating Transformation & Chart Choice):**
|
44
|
+
```python
|
45
|
+
import pandas as pd
|
46
|
+
import plotly.express as px
|
47
|
+
import plotly.graph_objects as go # Import go if needed
|
48
|
+
|
49
|
+
# Assume 'db' and 'instructions' are pre-defined
|
50
|
+
# Assume instructions = "Show average monthly revenue trend"
|
51
|
+
try:
|
52
|
+
# Infer table and columns (e.g., 'transactions' with 'date', 'revenue')
|
53
|
+
df = db.read_table('transactions')
|
54
|
+
|
55
|
+
# --- Data Preparation ---
|
56
|
+
# Ensure date is datetime type
|
57
|
+
df['date'] = pd.to_datetime(df['date'], errors='coerce')
|
58
|
+
# Ensure revenue is numeric, handle errors (e.g., fill with 0 or drop)
|
59
|
+
df['revenue'] = pd.to_numeric(df['revenue'], errors='coerce').fillna(0)
|
60
|
+
# Drop rows where date conversion failed if necessary for plot
|
61
|
+
df = df.dropna(subset=['date'])
|
62
|
+
|
63
|
+
# --- Transformation for Plot ---
|
64
|
+
# Infer appropriate transformation: Group by month and calculate mean revenue
|
65
|
+
df['month'] = df['date'].dt.to_period('M').astype(str)
|
66
|
+
df_agg = df.groupby('month')['revenue'].mean().reset_index()
|
67
|
+
|
68
|
+
# --- Plotting ---
|
69
|
+
# Infer appropriate chart type: Line chart for trend
|
70
|
+
title = f"Average Monthly Revenue Trend (based on: {{instructions[:30]}}...)"
|
71
|
+
fig = px.line(df_agg, x='month', y='revenue', title=title, markers=True,
|
72
|
+
labels={{'revenue':'Average Revenue', 'month':'Month'}}) # Clear labels
|
73
|
+
|
74
|
+
# Save plot - THIS IS REQUIRED
|
75
|
+
output_path = 'plot.html'
|
76
|
+
fig.write_html(output_path)
|
77
|
+
print(f"Plot successfully saved to {{output_path}}") # Optional print
|
78
|
+
|
79
|
+
except Exception as e:
|
80
|
+
print(f"Error during plotting script execution: {{e}}")
|
81
|
+
raise # Re-raise exception
|
82
|
+
```
|
83
|
+
|
84
|
+
**CRITICAL INSTRUCTIONS:**
|
85
|
+
- **DO NOT ask clarifying questions.** Analyze the data and instructions to infer the best approach.
|
86
|
+
- **Prepare and transform the data as needed before plotting (handle types, NaNs, aggregate, etc.).**
|
87
|
+
- **Choose the MOST APPROPRIATE chart type.**
|
88
|
+
- **You MUST generate Python script code, NOT a function definition.**
|
89
|
+
- **Your script code MUST use the pre-defined `db` and `instructions` variables.**
|
90
|
+
- **You MUST call the `execute_plotting_code` tool with your generated script code.**
|
91
|
+
- **Your final response MUST be ONLY the exact success or error message returned by the `execute_plotting_code` tool.** No extra explanations or conversation.
|
92
|
+
"""
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import logging
|
2
|
+
import sqlite3 # Ensure sqlite3 is imported if needed
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from agents import function_tool
|
7
|
+
from satif_sdk import SDIFDatabase
|
8
|
+
from satif_sdk.code_executors import CodeExecutionError, LocalCodeExecutor
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
# Global or context for the tool (similar to TidyAdapter)
|
13
|
+
PLOTTING_TOOL_CONTEXT = {
|
14
|
+
"input_sdif_path": None,
|
15
|
+
"user_instructions": None,
|
16
|
+
"output_plot_path": None,
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
@function_tool
|
21
|
+
async def execute_plotting_code(code: str) -> str:
|
22
|
+
"""
|
23
|
+
Executes the provided Python plotting script code.
|
24
|
+
The code should use the pre-defined 'db' (SDIFDatabase instance)
|
25
|
+
and 'instructions' (string) variables.
|
26
|
+
The code MUST save the generated Plotly plot to 'plot.html'.
|
27
|
+
Returns the path to the saved plot on success or an error message.
|
28
|
+
"""
|
29
|
+
input_sdif_path = PLOTTING_TOOL_CONTEXT.get("input_sdif_path")
|
30
|
+
user_instructions = PLOTTING_TOOL_CONTEXT.get("user_instructions")
|
31
|
+
|
32
|
+
if not input_sdif_path:
|
33
|
+
return "Error: Input SDIF path not found in tool context."
|
34
|
+
# Check existence *before* trying to open
|
35
|
+
if not Path(input_sdif_path).exists():
|
36
|
+
return f"Error: Input SDIF file not found at {input_sdif_path}."
|
37
|
+
if user_instructions is None:
|
38
|
+
# Allow empty instructions, pass empty string
|
39
|
+
user_instructions = ""
|
40
|
+
logger.warning(
|
41
|
+
"User instructions not found in tool context, using empty string."
|
42
|
+
)
|
43
|
+
# return "Error: User instructions not found in tool context."
|
44
|
+
|
45
|
+
# Use LocalCodeExecutor - WARNING: Insecure for untrusted code
|
46
|
+
executor = LocalCodeExecutor()
|
47
|
+
|
48
|
+
expected_output_filename = "plot.html"
|
49
|
+
# Resolve path relative to the current working directory
|
50
|
+
expected_output_path = Path(expected_output_filename).resolve()
|
51
|
+
|
52
|
+
# Clear previous plot if it exists
|
53
|
+
if expected_output_path.exists():
|
54
|
+
try:
|
55
|
+
expected_output_path.unlink()
|
56
|
+
except OSError as e:
|
57
|
+
logger.error(
|
58
|
+
f"Could not remove existing plot file {expected_output_path}: {e}"
|
59
|
+
)
|
60
|
+
|
61
|
+
# Prepare the extra context for the executor
|
62
|
+
db_instance = None
|
63
|
+
try:
|
64
|
+
# Instantiate the DB instance to be passed to the code
|
65
|
+
# Use read-only mode as the code should only read for plotting
|
66
|
+
db_instance = SDIFDatabase(input_sdif_path, read_only=True)
|
67
|
+
|
68
|
+
# Define the context that will be available to the executed code
|
69
|
+
code_context = {
|
70
|
+
"db": db_instance,
|
71
|
+
"instructions": user_instructions,
|
72
|
+
# Add any other context variables if needed by the executor/code
|
73
|
+
}
|
74
|
+
|
75
|
+
# The code provided by the agent is now expected to be a script
|
76
|
+
script_code = f"""
|
77
|
+
import pandas as pd
|
78
|
+
import plotly.express as px
|
79
|
+
import plotly.graph_objects as go
|
80
|
+
from pathlib import Path
|
81
|
+
from satif.sdif_database.database import SDIFDatabase # Make class available for type hints etc.
|
82
|
+
import sqlite3 # For potential use by pandas/db interaction
|
83
|
+
|
84
|
+
# Pre-defined variables available:
|
85
|
+
# db: SDIFDatabase instance connected to {input_sdif_path}
|
86
|
+
# instructions: str = User's instructions
|
87
|
+
|
88
|
+
# --- User's Script Code Start ---
|
89
|
+
{code}
|
90
|
+
# --- User's Script Code End ---
|
91
|
+
"""
|
92
|
+
logger.debug(f"Executing plotting script code:\n---\n{code[:500]}...\n---")
|
93
|
+
|
94
|
+
# LocalCodeExecutor.execute expects 'db' and 'datasource' in its signature
|
95
|
+
# We pass our *actual* db instance as part of extra_context which overrides
|
96
|
+
# the dummy 'db' argument passed for signature compliance.
|
97
|
+
executor.execute(
|
98
|
+
code=script_code,
|
99
|
+
db=db_instance, # Pass instance for signature, but it's also in extra_context
|
100
|
+
datasource=None, # Not needed
|
101
|
+
extra_context=code_context, # Pass db and instructions here
|
102
|
+
)
|
103
|
+
|
104
|
+
# Check if the expected output file was created
|
105
|
+
if expected_output_path.exists():
|
106
|
+
logger.info(
|
107
|
+
f"Plotting code executed successfully. Output: {expected_output_path}"
|
108
|
+
)
|
109
|
+
PLOTTING_TOOL_CONTEXT["output_plot_path"] = expected_output_path
|
110
|
+
return f"Success: Plot saved to {expected_output_path}"
|
111
|
+
else:
|
112
|
+
logger.error(
|
113
|
+
"Plotting code executed but output file 'plot.html' not found."
|
114
|
+
)
|
115
|
+
# Check if the error might be within the script's own error handling
|
116
|
+
# (This requires parsing the execution output, which LocalExecutor doesn't provide easily)
|
117
|
+
return "Error: Code executed (possibly with internal errors), but the expected output file 'plot.html' was not created."
|
118
|
+
|
119
|
+
except (
|
120
|
+
CodeExecutionError,
|
121
|
+
sqlite3.Error,
|
122
|
+
FileNotFoundError,
|
123
|
+
ValueError,
|
124
|
+
TypeError,
|
125
|
+
) as e:
|
126
|
+
logger.error(f"Error executing plotting code or accessing DB: {e}")
|
127
|
+
# Attempt to provide more specific feedback if possible
|
128
|
+
error_message = f"Error executing plotting code: {e}"
|
129
|
+
# Look for common issues like table not found
|
130
|
+
if isinstance(e, pd.io.sql.DatabaseError) and "no such table" in str(e).lower():
|
131
|
+
error_message = f"Error executing plotting code: Table not found. {e}"
|
132
|
+
elif isinstance(e, KeyError): # Pandas KeyError on column access
|
133
|
+
error_message = f"Error executing plotting code: Column not found or data processing error. {e}"
|
134
|
+
|
135
|
+
return error_message # Return the formatted error
|
136
|
+
except Exception as e:
|
137
|
+
logger.exception("Unexpected error during plotting code execution via tool.")
|
138
|
+
return f"Unexpected Error during execution: {e}"
|
139
|
+
finally:
|
140
|
+
# Ensure the db instance created here is closed
|
141
|
+
if db_instance:
|
142
|
+
try:
|
143
|
+
db_instance.close()
|
144
|
+
logger.debug("Closed DB instance in plotting tool.")
|
145
|
+
except Exception as close_err:
|
146
|
+
logger.error(f"Error closing DB instance in plotting tool: {close_err}")
|