satif-ai 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- satif_ai/__init__.py +19 -0
- satif_ai/adapters/tidy.py +19 -38
- satif_ai/standardize.py +112 -0
- satif_ai/standardizers/ai.py +485 -0
- satif_ai/standardizers/ai_csv.py +47 -129
- satif_ai/transform.py +121 -0
- satif_ai/{code_builders/transformation.py → transformation_builders/syncpulse.py} +28 -36
- satif_ai/utils/__init__.py +5 -0
- satif_ai/utils/merge_sdif.py +22 -0
- satif_ai/utils/openai_mcp.py +97 -0
- satif_ai/utils/zip.py +120 -0
- {satif_ai-0.2.7.dist-info → satif_ai-0.2.9.dist-info}/METADATA +4 -3
- satif_ai-0.2.9.dist-info/RECORD +19 -0
- satif_ai/code_builders/adaptation.py +0 -9
- satif_ai/plot_builders/__init__.py +0 -0
- satif_ai/plot_builders/agent.py +0 -204
- satif_ai/plot_builders/prompt.py +0 -92
- satif_ai/plot_builders/tool.py +0 -146
- satif_ai-0.2.7.dist-info/RECORD +0 -17
- /satif_ai/{code_builders → transformation_builders}/__init__.py +0 -0
- {satif_ai-0.2.7.dist-info → satif_ai-0.2.9.dist-info}/LICENSE +0 -0
- {satif_ai-0.2.7.dist-info → satif_ai-0.2.9.dist-info}/WHEEL +0 -0
- {satif_ai-0.2.7.dist-info → satif_ai-0.2.9.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
from agents.mcp.server import CallToolResult, MCPServer, MCPTool
|
5
|
+
from fastmcp import FastMCP
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
|
10
|
+
class OpenAICompatibleMCP(MCPServer):
|
11
|
+
def __init__(self, mcp: FastMCP):
|
12
|
+
self.mcp = mcp
|
13
|
+
self._is_connected = False # Track connection state
|
14
|
+
|
15
|
+
async def connect(self):
|
16
|
+
"""Connect to the server.
|
17
|
+
For FastMCP, connection is managed externally when the server is run.
|
18
|
+
This method marks the wrapper as connected.
|
19
|
+
"""
|
20
|
+
# Assuming FastMCP instance is already running and configured.
|
21
|
+
# No specific connect action required for the FastMCP instance itself here,
|
22
|
+
# as its lifecycle (run, stop) is managed outside this wrapper.
|
23
|
+
logger.info(
|
24
|
+
f"OpenAICompatibleMCP: Simulating connection to FastMCP server '{self.mcp.name}'."
|
25
|
+
)
|
26
|
+
self._is_connected = True
|
27
|
+
|
28
|
+
@property
|
29
|
+
def name(self) -> str:
|
30
|
+
"""A readable name for the server."""
|
31
|
+
return self.mcp.name
|
32
|
+
|
33
|
+
async def cleanup(self):
|
34
|
+
"""Cleanup the server.
|
35
|
+
For FastMCP, cleanup is managed externally. This method marks the wrapper as disconnected.
|
36
|
+
"""
|
37
|
+
# Similar to connect, actual server cleanup is external.
|
38
|
+
logger.info(
|
39
|
+
f"OpenAICompatibleMCP: Simulating cleanup for FastMCP server '{self.mcp.name}'."
|
40
|
+
)
|
41
|
+
self._is_connected = False
|
42
|
+
|
43
|
+
async def list_tools(self) -> list[MCPTool]:
|
44
|
+
"""List the tools available on the server."""
|
45
|
+
if not self._is_connected:
|
46
|
+
# Or raise an error, depending on desired behavior for disconnected state
|
47
|
+
raise RuntimeError(
|
48
|
+
"OpenAICompatibleMCP.list_tools called while not connected."
|
49
|
+
)
|
50
|
+
|
51
|
+
# FastMCP's get_tools() returns a dict[str, fastmcp.tools.tool.Tool]
|
52
|
+
# Each fastmcp.tools.tool.Tool has a to_mcp_tool(name=key) method
|
53
|
+
# MCPTool is an alias for mcp.types.Tool
|
54
|
+
try:
|
55
|
+
fastmcp_tools = await self.mcp.get_tools()
|
56
|
+
mcp_tools_list = [
|
57
|
+
tool.to_mcp_tool(name=key) for key, tool in fastmcp_tools.items()
|
58
|
+
]
|
59
|
+
return mcp_tools_list
|
60
|
+
except Exception as e:
|
61
|
+
logger.error(
|
62
|
+
f"Error listing tools from FastMCP server '{self.mcp.name}': {e}",
|
63
|
+
exc_info=True,
|
64
|
+
)
|
65
|
+
raise e
|
66
|
+
|
67
|
+
async def call_tool(
|
68
|
+
self, tool_name: str, arguments: dict[str, Any] | None
|
69
|
+
) -> CallToolResult:
|
70
|
+
"""Invoke a tool on the server."""
|
71
|
+
if not self._is_connected:
|
72
|
+
logger.warning(
|
73
|
+
f"OpenAICompatibleMCP.call_tool '{tool_name}' called while not connected."
|
74
|
+
)
|
75
|
+
# Return an error CallToolResult
|
76
|
+
return CallToolResult(
|
77
|
+
content=[{"type": "text", "text": "Server not connected"}], isError=True
|
78
|
+
)
|
79
|
+
|
80
|
+
try:
|
81
|
+
# FastMCP's _mcp_call_tool is a protected member, but seems to be what we need.
|
82
|
+
# It returns: list[TextContent | ImageContent | EmbeddedResource]
|
83
|
+
# This matches the 'content' part of CallToolResult.
|
84
|
+
# We need to handle potential errors and wrap the result.
|
85
|
+
content = await self.mcp._mcp_call_tool(tool_name, arguments or {})
|
86
|
+
return CallToolResult(content=content, isError=False)
|
87
|
+
except Exception as e:
|
88
|
+
logger.error(
|
89
|
+
f"Error calling tool '{tool_name}' on FastMCP server '{self.mcp.name}': {e}",
|
90
|
+
exc_info=True,
|
91
|
+
)
|
92
|
+
error_message = f"Error calling tool '{tool_name}': {type(e).__name__}: {e}"
|
93
|
+
# Ensure content is a list of valid MCP content items, even for errors.
|
94
|
+
# A TextContent is a safe choice.
|
95
|
+
return CallToolResult(
|
96
|
+
content=[{"type": "text", "text": error_message}], isError=True
|
97
|
+
)
|
satif_ai/utils/zip.py
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
import asyncio
|
2
|
+
import logging
|
3
|
+
import zipfile
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import List, Tuple
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
# Constants for ZIP file processing, kept local to this utility or passed as args if needed
|
10
|
+
_IGNORED_ZIP_MEMBER_PREFIXES = ("__MACOSX/",)
|
11
|
+
_IGNORED_ZIP_FILENAME_PREFIXES = ("._",)
|
12
|
+
_IGNORED_ZIP_FILENAMES = (".DS_Store",)
|
13
|
+
|
14
|
+
|
15
|
+
async def extract_zip_archive_async(
|
16
|
+
zip_path: Path,
|
17
|
+
extract_to: Path,
|
18
|
+
ignored_member_prefixes: Tuple[str, ...] = _IGNORED_ZIP_MEMBER_PREFIXES,
|
19
|
+
ignored_filename_prefixes: Tuple[str, ...] = _IGNORED_ZIP_FILENAME_PREFIXES,
|
20
|
+
ignored_filenames: Tuple[str, ...] = _IGNORED_ZIP_FILENAMES,
|
21
|
+
) -> List[Path]:
|
22
|
+
"""
|
23
|
+
Asynchronously extracts a ZIP archive to a specified directory, filtering out ignored files.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
zip_path: Path to the ZIP archive.
|
27
|
+
extract_to: Directory where the contents will be extracted.
|
28
|
+
ignored_member_prefixes: Tuple of member path prefixes to ignore.
|
29
|
+
ignored_filename_prefixes: Tuple of filename prefixes to ignore.
|
30
|
+
ignored_filenames: Tuple of exact filenames to ignore.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
A list of paths to the successfully extracted files.
|
34
|
+
|
35
|
+
Raises:
|
36
|
+
ValueError: If the zip_path is invalid or corrupted.
|
37
|
+
RuntimeError: If any other error occurs during extraction.
|
38
|
+
"""
|
39
|
+
|
40
|
+
def blocking_extract() -> List[Path]:
|
41
|
+
extracted_file_paths = []
|
42
|
+
logger.info(f"Extracting ZIP archive '{zip_path.name}' to '{extract_to}'...")
|
43
|
+
try:
|
44
|
+
extract_to.mkdir(
|
45
|
+
parents=True, exist_ok=True
|
46
|
+
) # Ensure extract_to directory exists
|
47
|
+
|
48
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
49
|
+
# Security: Preliminary check for unsafe paths before extraction
|
50
|
+
for member_name in zip_ref.namelist():
|
51
|
+
if member_name.startswith(("/", "..")):
|
52
|
+
logger.error(
|
53
|
+
f"Skipping potentially unsafe path in ZIP: {member_name}"
|
54
|
+
)
|
55
|
+
# Depending on security policy, might raise an error here
|
56
|
+
continue
|
57
|
+
|
58
|
+
# Extract all members
|
59
|
+
zip_ref.extractall(extract_to)
|
60
|
+
|
61
|
+
# After extractall, collect all *file* paths, applying filters
|
62
|
+
# This second pass of filtering ensures that even if extractall creates them,
|
63
|
+
# we don't return paths to ignored files.
|
64
|
+
for root, _, files in extract_to.walk():
|
65
|
+
for filename in files:
|
66
|
+
full_path = root / filename
|
67
|
+
# Create a path relative to 'extract_to' to check against member prefixes
|
68
|
+
# This ensures that '__MACOSX/file.txt' is correctly ignored,
|
69
|
+
# not just a top-level '__MACOSX' directory.
|
70
|
+
try:
|
71
|
+
relative_path_to_check = full_path.relative_to(extract_to)
|
72
|
+
except ValueError:
|
73
|
+
# This can happen if full_path is not under extract_to,
|
74
|
+
# which ideally shouldn't occur if zip_ref.extractall worked as expected
|
75
|
+
# and target_path checks were effective.
|
76
|
+
logger.warning(
|
77
|
+
f"File {full_path} seems to be outside extraction root {extract_to}. Skipping."
|
78
|
+
)
|
79
|
+
continue
|
80
|
+
|
81
|
+
path_str_to_check_prefixes = str(relative_path_to_check)
|
82
|
+
|
83
|
+
if not (
|
84
|
+
any(
|
85
|
+
path_str_to_check_prefixes.startswith(p)
|
86
|
+
for p in ignored_member_prefixes
|
87
|
+
)
|
88
|
+
or any(
|
89
|
+
full_path.name.startswith(p)
|
90
|
+
for p in ignored_filename_prefixes
|
91
|
+
)
|
92
|
+
or full_path.name in ignored_filenames
|
93
|
+
):
|
94
|
+
extracted_file_paths.append(full_path)
|
95
|
+
else:
|
96
|
+
logger.debug(f"Ignoring file post-extraction: {full_path}")
|
97
|
+
|
98
|
+
if not extracted_file_paths:
|
99
|
+
logger.warning(
|
100
|
+
f"ZIP archive '{zip_path.name}' is empty or contains no processable files after filtering."
|
101
|
+
)
|
102
|
+
else:
|
103
|
+
logger.info(
|
104
|
+
f"Successfully extracted {len(extracted_file_paths)} file(s) from '{zip_path.name}'."
|
105
|
+
)
|
106
|
+
return extracted_file_paths
|
107
|
+
except zipfile.BadZipFile as e:
|
108
|
+
logger.error(
|
109
|
+
f"Invalid or corrupted ZIP file: {zip_path.name}", exc_info=True
|
110
|
+
)
|
111
|
+
raise ValueError(f"Invalid or corrupted ZIP file: {zip_path.name}") from e
|
112
|
+
except Exception as e:
|
113
|
+
logger.error(
|
114
|
+
f"Failed to extract ZIP archive '{zip_path.name}': {e}", exc_info=True
|
115
|
+
)
|
116
|
+
raise RuntimeError(
|
117
|
+
f"Unexpected error during ZIP extraction for '{zip_path.name}'"
|
118
|
+
) from e
|
119
|
+
|
120
|
+
return await asyncio.to_thread(blocking_extract)
|
@@ -1,10 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: satif-ai
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: AI Agents for Satif
|
5
5
|
License: MIT
|
6
|
-
Author:
|
7
|
-
|
6
|
+
Author: Syncpulse
|
7
|
+
Maintainer: Bryan Djafer
|
8
|
+
Maintainer-email: bryan.djafer@syncpulse.fr
|
8
9
|
Requires-Python: >=3.10,<4.0
|
9
10
|
Classifier: License :: OSI Approved :: MIT License
|
10
11
|
Classifier: Programming Language :: Python :: 3
|
@@ -0,0 +1,19 @@
|
|
1
|
+
satif_ai/__init__.py,sha256=cqJ6Kd9IolVodPi9yOBPnfhYQXH5a1JgRB3HfLOtP_4,611
|
2
|
+
satif_ai/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
satif_ai/adapters/tidy.py,sha256=lcJXFmzEgCFy1W57kgbMOkoFTPLOkrvHC6NHVRKn-04,18549
|
4
|
+
satif_ai/standardize.py,sha256=TgAB_nhcHY8zqlfT1PpgfgSswqdE-ly-dheQz-7NC7Q,5674
|
5
|
+
satif_ai/standardizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
satif_ai/standardizers/ai.py,sha256=5vv-Rs6s_9FA21uM2iepTsbv6f3adZ8wFteOcW53z_s,21458
|
7
|
+
satif_ai/standardizers/ai_csv.py,sha256=tMibsTp55sHJ56r7cYKjb5b0Hm6rdnV3TeA0EppIWJg,25371
|
8
|
+
satif_ai/transform.py,sha256=iy9prkBCknRcsSXWOY_NwtNojQVcRW_luYFwkcjOnPw,5600
|
9
|
+
satif_ai/transformation_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
satif_ai/transformation_builders/syncpulse.py,sha256=c59BZicNnqs3NDKpflBAPqw42pGb6nYB2Zps0ChGyaM,11368
|
11
|
+
satif_ai/utils/__init__.py,sha256=F-usaCt_vX872mXvtukuZdNMPnkVqDb8RaDgox2uow4,212
|
12
|
+
satif_ai/utils/merge_sdif.py,sha256=-BXsCaLDHEtKOQRWOKyVCNefFwkyVygFQs8NeeFONFA,663
|
13
|
+
satif_ai/utils/openai_mcp.py,sha256=duCQZXG0mBs9DOOFIUvzraJhxD2IDzegWO9iOiLfFwY,3938
|
14
|
+
satif_ai/utils/zip.py,sha256=G_GK8629Iw0TLFCQJfnqOscv7MoKF5zdzxvEAbL7Gss,5186
|
15
|
+
satif_ai-0.2.9.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
|
16
|
+
satif_ai-0.2.9.dist-info/METADATA,sha256=Vq62i6fUx8sKaM2mYVqRfGReHCTcFG_P6mW1otnx8GY,696
|
17
|
+
satif_ai-0.2.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
18
|
+
satif_ai-0.2.9.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
|
19
|
+
satif_ai-0.2.9.dist-info/RECORD,,
|
File without changes
|
satif_ai/plot_builders/agent.py
DELETED
@@ -1,204 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import re
|
3
|
-
from pathlib import Path
|
4
|
-
from typing import Optional, Union
|
5
|
-
|
6
|
-
from agents import Agent, Runner
|
7
|
-
from agents.mcp import MCPServerStdio
|
8
|
-
from mcp import ClientSession
|
9
|
-
|
10
|
-
from satif_ai.plot_builders.prompt import PLOTTING_AGENT_PROMPT
|
11
|
-
from satif_ai.plot_builders.tool import PLOTTING_TOOL_CONTEXT, execute_plotting_code
|
12
|
-
|
13
|
-
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
class PlottingAgent:
|
17
|
-
"""Agent that generates Plotly plots from SDIF data based on user instructions."""
|
18
|
-
|
19
|
-
def __init__(
|
20
|
-
self,
|
21
|
-
mcp_server: MCPServerStdio,
|
22
|
-
mcp_session: ClientSession,
|
23
|
-
llm_model: str = "o4-mini",
|
24
|
-
):
|
25
|
-
self.mcp_server = mcp_server
|
26
|
-
self.mcp_session = mcp_session
|
27
|
-
self.llm_model = llm_model
|
28
|
-
|
29
|
-
def _parse_final_path(self, final_text: str) -> Optional[Path]:
|
30
|
-
"""Extracts the path from the success message."""
|
31
|
-
# Regex to find the path after "Success: Plot saved to "
|
32
|
-
match = re.search(r"Success: Plot saved to (.*)", final_text)
|
33
|
-
if match:
|
34
|
-
path_str = match.group(1).strip()
|
35
|
-
try:
|
36
|
-
p = Path(path_str)
|
37
|
-
# Check if it seems plausible (e.g., ends with .html and absolute)
|
38
|
-
# Check for existence here is important
|
39
|
-
if p.is_absolute() and p.name.endswith(".html") and p.exists():
|
40
|
-
return p
|
41
|
-
elif (
|
42
|
-
p.exists()
|
43
|
-
): # Accept relative path if it exists (less ideal but maybe happens)
|
44
|
-
logger.warning(
|
45
|
-
f"Parsed path {p} is not absolute but exists. Accepting."
|
46
|
-
)
|
47
|
-
return p.resolve() # Return resolved absolute path
|
48
|
-
except Exception as e:
|
49
|
-
logger.warning(f"Error validating parsed path '{path_str}': {e}")
|
50
|
-
pass
|
51
|
-
# Fallback checks remain the same
|
52
|
-
if "plot.html" in final_text:
|
53
|
-
potential_path_str = final_text.strip()
|
54
|
-
# Try to extract if it's just the path
|
55
|
-
if Path(potential_path_str).name == "plot.html":
|
56
|
-
try:
|
57
|
-
potential_path = Path(
|
58
|
-
potential_path_str
|
59
|
-
).resolve() # Resolve relative paths
|
60
|
-
if potential_path.exists():
|
61
|
-
logger.warning(
|
62
|
-
"Agent returned path directly instead of success message."
|
63
|
-
)
|
64
|
-
return potential_path
|
65
|
-
except Exception:
|
66
|
-
pass
|
67
|
-
|
68
|
-
return None
|
69
|
-
|
70
|
-
async def generate_plot(
|
71
|
-
self, sdif_path: Union[str, Path], instructions: str
|
72
|
-
) -> Optional[Path]:
|
73
|
-
"""
|
74
|
-
Generates a Plotly plot HTML file based on instructions and SDIF data.
|
75
|
-
|
76
|
-
Args:
|
77
|
-
sdif_path: Path to the input SDIF database file.
|
78
|
-
instructions: Natural language instructions for the plot.
|
79
|
-
|
80
|
-
Returns:
|
81
|
-
Path to the generated HTML plot file, or None if generation failed.
|
82
|
-
|
83
|
-
Raises:
|
84
|
-
FileNotFoundError: If the input SDIF file does not exist.
|
85
|
-
RuntimeError: If agent execution fails or context cannot be fetched or plot fails.
|
86
|
-
Exception: For other unexpected errors.
|
87
|
-
"""
|
88
|
-
input_path = sdif_path
|
89
|
-
# Set tool context
|
90
|
-
PLOTTING_TOOL_CONTEXT["input_sdif_path"] = input_path
|
91
|
-
PLOTTING_TOOL_CONTEXT["user_instructions"] = instructions
|
92
|
-
PLOTTING_TOOL_CONTEXT["output_plot_path"] = None
|
93
|
-
|
94
|
-
agent_final_output_text = (
|
95
|
-
"Agent did not produce final output." # Default message
|
96
|
-
)
|
97
|
-
|
98
|
-
try:
|
99
|
-
# Get Initial Context from MCP Resources
|
100
|
-
logger.info(
|
101
|
-
f"Fetching schema and sample for {input_path}..."
|
102
|
-
) # Changed level to INFO
|
103
|
-
input_schema_str = "Error: Could not get schema."
|
104
|
-
input_sample_str = "Error: Could not get sample."
|
105
|
-
try:
|
106
|
-
input_path_str = str(input_path)
|
107
|
-
schema_uri = f"schema://{input_path_str}"
|
108
|
-
sample_uri = f"sample://{input_path_str}"
|
109
|
-
logger.debug(f"Requesting schema URI: {schema_uri}")
|
110
|
-
logger.debug(f"Requesting sample URI: {sample_uri}")
|
111
|
-
|
112
|
-
input_schema_resource = await self.mcp_session.read_resource(schema_uri)
|
113
|
-
input_sample_resource = await self.mcp_session.read_resource(sample_uri)
|
114
|
-
|
115
|
-
input_schema_str = (
|
116
|
-
input_schema_resource.contents[0].text
|
117
|
-
if input_schema_resource.contents
|
118
|
-
else "Error: Could not get schema (empty response)."
|
119
|
-
)
|
120
|
-
input_sample_str = (
|
121
|
-
input_sample_resource.contents[0].text
|
122
|
-
if input_sample_resource.contents
|
123
|
-
else "Error: Could not get sample (empty response)."
|
124
|
-
)
|
125
|
-
|
126
|
-
except Exception as mcp_err:
|
127
|
-
logger.error(f"Failed to get schema/sample via MCP: {mcp_err}")
|
128
|
-
raise RuntimeError(
|
129
|
-
f"Failed to get required context via MCP: {mcp_err}"
|
130
|
-
) from mcp_err
|
131
|
-
|
132
|
-
# Format the prompt
|
133
|
-
formatted_prompt = PLOTTING_AGENT_PROMPT.format(
|
134
|
-
input_sdif_path=str(input_path),
|
135
|
-
input_schema=input_schema_str,
|
136
|
-
input_sample=input_sample_str,
|
137
|
-
user_instructions=instructions,
|
138
|
-
)
|
139
|
-
|
140
|
-
# Instantiate the Agent
|
141
|
-
agent = Agent(
|
142
|
-
name="Plotting Agent",
|
143
|
-
mcp_servers=[self.mcp_server],
|
144
|
-
tools=[execute_plotting_code],
|
145
|
-
model=self.llm_model,
|
146
|
-
)
|
147
|
-
|
148
|
-
# Run the agent
|
149
|
-
logger.info(f"Running Plotting Agent with model {self.llm_model}...")
|
150
|
-
result = await Runner.run(
|
151
|
-
agent,
|
152
|
-
input=formatted_prompt,
|
153
|
-
)
|
154
|
-
|
155
|
-
if not result or not result.final_output:
|
156
|
-
raise RuntimeError(
|
157
|
-
"Plotting agent execution failed or returned no output."
|
158
|
-
)
|
159
|
-
|
160
|
-
agent_final_output_text = (
|
161
|
-
result.final_output
|
162
|
-
) # Store for potential error message
|
163
|
-
logger.info(
|
164
|
-
f"Plotting Agent finished. Final output:\n{agent_final_output_text}"
|
165
|
-
)
|
166
|
-
|
167
|
-
# Attempt to parse the path from the agent's final confirmation
|
168
|
-
final_plot_path = self._parse_final_path(agent_final_output_text)
|
169
|
-
|
170
|
-
if final_plot_path: # Path found and exists
|
171
|
-
logger.info(
|
172
|
-
f"Successfully confirmed plot generation at: {final_plot_path}"
|
173
|
-
)
|
174
|
-
return final_plot_path
|
175
|
-
else:
|
176
|
-
final_plot_path_from_context = PLOTTING_TOOL_CONTEXT.get(
|
177
|
-
"output_plot_path"
|
178
|
-
)
|
179
|
-
if (
|
180
|
-
final_plot_path_from_context
|
181
|
-
and final_plot_path_from_context.exists()
|
182
|
-
):
|
183
|
-
logger.warning(
|
184
|
-
"Parsed path from final output failed, but tool context has valid path."
|
185
|
-
)
|
186
|
-
return final_plot_path_from_context
|
187
|
-
else:
|
188
|
-
logger.error(
|
189
|
-
"Agent finished, but could not confirm successful plot generation or find output file."
|
190
|
-
)
|
191
|
-
# Include agent output in error for debugging
|
192
|
-
raise RuntimeError(
|
193
|
-
f"Agent finished, but plot generation failed or output path couldn't be determined. Agent final output: '{agent_final_output_text}'"
|
194
|
-
) # Modified Error
|
195
|
-
|
196
|
-
except Exception as e:
|
197
|
-
logger.exception(f"Error during PlottingAgent generate_plot: {e}")
|
198
|
-
raise # Re-raise other exceptions
|
199
|
-
finally:
|
200
|
-
# Robust context cleanup using pop
|
201
|
-
PLOTTING_TOOL_CONTEXT.pop("input_sdif_path", None)
|
202
|
-
PLOTTING_TOOL_CONTEXT.pop("user_instructions", None)
|
203
|
-
PLOTTING_TOOL_CONTEXT.pop("output_plot_path", None)
|
204
|
-
logger.debug("Cleared plotting tool context.")
|
satif_ai/plot_builders/prompt.py
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
# satif/plot_builders/prompt.py
|
2
|
-
|
3
|
-
PLOTTING_AGENT_PROMPT = """
|
4
|
-
You are an expert Data Visualization Agent specialized in creating insightful and interactive plots using Plotly from data stored in SDIF (SQLite) databases. You are autonomous and **must not ask clarifying questions**.
|
5
|
-
|
6
|
-
**Goal:** Generate Python **script code** to create a Plotly visualization based on user instructions and data within the provided SDIF file. **Critically analyze the data (schema, sample) and instructions to infer the user's likely analytical goal. Prepare and transform the data as needed (e.g., cleaning types, handling missing values appropriately for the plot, calculating new fields), choose the most appropriate chart type (e.g., line for trends, bar for comparisons, scatter for correlations, histogram for distributions) and apply necessary data transformations (grouping, aggregation, pivoting) to best represent the data and answer the implied question in the instructions.** Use standard visualization best practices. Your objective is to produce an effective plot, not engage in conversation.
|
7
|
-
|
8
|
-
**Execution Context:**
|
9
|
-
Your code will be executed in an environment where the following variables are **already defined**:
|
10
|
-
- `db`: An instance of `SDIFDatabase`, connected in read-only mode to the input SDIF file (`{input_sdif_path}`).
|
11
|
-
- `instructions`: A string containing the user's request (`{user_instructions}`).
|
12
|
-
|
13
|
-
**Input SDIF Context:**
|
14
|
-
You have access to the following information about the input SDIF database (accessible via the `db` object):
|
15
|
-
|
16
|
-
<input_schema>
|
17
|
-
{input_schema}
|
18
|
-
</input_schema>
|
19
|
-
|
20
|
-
<input_sample>
|
21
|
-
{input_sample}
|
22
|
-
</input_sample>
|
23
|
-
|
24
|
-
**Available Tools:**
|
25
|
-
1. `execute_sql(query: str) -> str`: Execute a read-only SQL query against the **input** SDIF database (using the available `db` object, e.g., `db.query(...)`) to inspect data further *before* writing your main plotting code. Use this only if absolutely necessary to confirm data characteristics crucial for choosing the **correct** plot type or transformation (e.g., checking cardinality for grouping, range for binning).
|
26
|
-
2. `execute_plotting_code(code: str) -> str`: Executes the Python **script code** you generate. Your script **MUST** use the pre-defined `db` and `instructions` variables, generate a Plotly figure, and **save it to an HTML file** named `plot.html` in the current directory (e.g., `fig.write_html('plot.html')`). This tool will return the absolute path to the generated 'plot.html' on success, or an error message on failure.
|
27
|
-
|
28
|
-
**Workflow:**
|
29
|
-
1. **Analyze & Infer & Select:** Carefully review the user instructions, input schema, and sample data. **Infer the analytical goal. Based on the data types, cardinality, and instructions, determine the necessary data preparation steps (cleaning, type conversion, handling missing values suitable for the plot), select the *most appropriate* Plotly chart type, and identify required data aggregations (e.g., sum, mean, count) or transformations (e.g., grouping, calculating percentages, date extraction) needed to create an insightful visualization.** Do not ask for clarification.
|
30
|
-
2. **Explore (Minimal Use):** Only use `execute_sql` if essential for confirming data properties needed for your chosen preparation/chart/transformation strategy.
|
31
|
-
3. **Code Generation:** Write Python **script code** (NOT a function definition) that:
|
32
|
-
* Imports necessary libraries (`pandas as pd`, `plotly.express as px` or `plotly.graph_objects as go`).
|
33
|
-
* Uses the pre-defined `db` object to read the relevant data.
|
34
|
-
* Uses the `instructions` string variable if helpful for parameterizing the plot (e.g., titles).
|
35
|
-
* **Performs the necessary data preparation (cleaning, type conversion, handling NaNs/nulls appropriately) and transformations/aggregations identified in step 1 using pandas.**
|
36
|
-
* Creates the Plotly figure using the **chosen appropriate chart type** and the prepared/transformed/aggregated data. Make axes labels clear and add an informative title.
|
37
|
-
* **Crucially:** Saves the figure using `fig.write_html('plot.html')`.
|
38
|
-
4. **Execute:** Call the `execute_plotting_code` tool with your generated Python script code string. **You must call this tool.**
|
39
|
-
5. **Finalize:**
|
40
|
-
* **If `execute_plotting_code` returns a success message:** Respond **only** with the success message provided by the tool (e.g., "Success: Plot saved to /path/to/plot.html").
|
41
|
-
* **If `execute_plotting_code` returns an error message:** Respond **only** with the error message provided by the tool.
|
42
|
-
|
43
|
-
**Example Script Code (Illustrating Transformation & Chart Choice):**
|
44
|
-
```python
|
45
|
-
import pandas as pd
|
46
|
-
import plotly.express as px
|
47
|
-
import plotly.graph_objects as go # Import go if needed
|
48
|
-
|
49
|
-
# Assume 'db' and 'instructions' are pre-defined
|
50
|
-
# Assume instructions = "Show average monthly revenue trend"
|
51
|
-
try:
|
52
|
-
# Infer table and columns (e.g., 'transactions' with 'date', 'revenue')
|
53
|
-
df = db.read_table('transactions')
|
54
|
-
|
55
|
-
# --- Data Preparation ---
|
56
|
-
# Ensure date is datetime type
|
57
|
-
df['date'] = pd.to_datetime(df['date'], errors='coerce')
|
58
|
-
# Ensure revenue is numeric, handle errors (e.g., fill with 0 or drop)
|
59
|
-
df['revenue'] = pd.to_numeric(df['revenue'], errors='coerce').fillna(0)
|
60
|
-
# Drop rows where date conversion failed if necessary for plot
|
61
|
-
df = df.dropna(subset=['date'])
|
62
|
-
|
63
|
-
# --- Transformation for Plot ---
|
64
|
-
# Infer appropriate transformation: Group by month and calculate mean revenue
|
65
|
-
df['month'] = df['date'].dt.to_period('M').astype(str)
|
66
|
-
df_agg = df.groupby('month')['revenue'].mean().reset_index()
|
67
|
-
|
68
|
-
# --- Plotting ---
|
69
|
-
# Infer appropriate chart type: Line chart for trend
|
70
|
-
title = f"Average Monthly Revenue Trend (based on: {{instructions[:30]}}...)"
|
71
|
-
fig = px.line(df_agg, x='month', y='revenue', title=title, markers=True,
|
72
|
-
labels={{'revenue':'Average Revenue', 'month':'Month'}}) # Clear labels
|
73
|
-
|
74
|
-
# Save plot - THIS IS REQUIRED
|
75
|
-
output_path = 'plot.html'
|
76
|
-
fig.write_html(output_path)
|
77
|
-
print(f"Plot successfully saved to {{output_path}}") # Optional print
|
78
|
-
|
79
|
-
except Exception as e:
|
80
|
-
print(f"Error during plotting script execution: {{e}}")
|
81
|
-
raise # Re-raise exception
|
82
|
-
```
|
83
|
-
|
84
|
-
**CRITICAL INSTRUCTIONS:**
|
85
|
-
- **DO NOT ask clarifying questions.** Analyze the data and instructions to infer the best approach.
|
86
|
-
- **Prepare and transform the data as needed before plotting (handle types, NaNs, aggregate, etc.).**
|
87
|
-
- **Choose the MOST APPROPRIATE chart type.**
|
88
|
-
- **You MUST generate Python script code, NOT a function definition.**
|
89
|
-
- **Your script code MUST use the pre-defined `db` and `instructions` variables.**
|
90
|
-
- **You MUST call the `execute_plotting_code` tool with your generated script code.**
|
91
|
-
- **Your final response MUST be ONLY the exact success or error message returned by the `execute_plotting_code` tool.** No extra explanations or conversation.
|
92
|
-
"""
|