satif-ai 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- satif_ai/code_builders/transformation.py +49 -11
- satif_ai/standardizers/ai_csv.py +46 -128
- {satif_ai-0.2.6.dist-info → satif_ai-0.2.8.dist-info}/METADATA +1 -1
- satif_ai-0.2.8.dist-info/RECORD +13 -0
- satif_ai/plot_builders/__init__.py +0 -0
- satif_ai/plot_builders/agent.py +0 -204
- satif_ai/plot_builders/prompt.py +0 -92
- satif_ai/plot_builders/tool.py +0 -146
- satif_ai-0.2.6.dist-info/RECORD +0 -17
- {satif_ai-0.2.6.dist-info → satif_ai-0.2.8.dist-info}/LICENSE +0 -0
- {satif_ai-0.2.6.dist-info → satif_ai-0.2.8.dist-info}/WHEEL +0 -0
- {satif_ai-0.2.6.dist-info → satif_ai-0.2.8.dist-info}/entry_points.txt +0 -0
@@ -19,6 +19,37 @@ OUTPUT_TARGET_FILES: Optional[Dict[Union[str, Path], str]] = None
|
|
19
19
|
SCHEMA_ONLY: Optional[bool] = None
|
20
20
|
|
21
21
|
|
22
|
+
def _format_comparison_output(
|
23
|
+
comparison_result: Dict[str, Any],
|
24
|
+
schema_only_mode: Optional[bool],
|
25
|
+
source_file_display_name: str,
|
26
|
+
target_file_display_name: str,
|
27
|
+
) -> str:
|
28
|
+
"""
|
29
|
+
Formats the comparison result string, with special handling for schema_only mode
|
30
|
+
where files are equivalent due to being empty.
|
31
|
+
"""
|
32
|
+
base_message_prefix = f"Comparison for {source_file_display_name} [SOURCE] with {target_file_display_name} [TARGET]:"
|
33
|
+
|
34
|
+
if schema_only_mode is True and comparison_result.get("are_equivalent") is True:
|
35
|
+
details = comparison_result.get("details", {})
|
36
|
+
row_comparison = details.get("row_comparison", {})
|
37
|
+
|
38
|
+
row_count1 = row_comparison.get("row_count1")
|
39
|
+
row_count2 = row_comparison.get("row_count2")
|
40
|
+
|
41
|
+
if (
|
42
|
+
isinstance(row_count1, (int, float))
|
43
|
+
and row_count1 == 0
|
44
|
+
and isinstance(row_count2, (int, float))
|
45
|
+
and row_count2 == 0
|
46
|
+
):
|
47
|
+
return f"{base_message_prefix} Files have the same headers but are both empty (no data rows). This should not happen. Please verify the instructions and try again."
|
48
|
+
|
49
|
+
# Default formatting if the special condition isn't met
|
50
|
+
return f"{base_message_prefix} {comparison_result}"
|
51
|
+
|
52
|
+
|
22
53
|
@function_tool
|
23
54
|
async def execute_transformation(code: str) -> str:
|
24
55
|
"""Executes the transformation code on the input and returns the
|
@@ -34,7 +65,7 @@ async def execute_transformation(code: str) -> str:
|
|
34
65
|
generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
|
35
66
|
|
36
67
|
comparisons = []
|
37
|
-
comparator_kwargs = {
|
68
|
+
comparator_kwargs = {}
|
38
69
|
if SCHEMA_ONLY:
|
39
70
|
comparator_kwargs["check_structure_only"] = True
|
40
71
|
|
@@ -54,9 +85,13 @@ async def execute_transformation(code: str) -> str:
|
|
54
85
|
comparison = comparator.compare(
|
55
86
|
generated_file_path, output_base_file, **comparator_kwargs
|
56
87
|
)
|
57
|
-
|
58
|
-
|
88
|
+
formatted_message = _format_comparison_output(
|
89
|
+
comparison,
|
90
|
+
SCHEMA_ONLY,
|
91
|
+
generated_file_path,
|
92
|
+
output_target_file_name,
|
59
93
|
)
|
94
|
+
comparisons.append(formatted_message)
|
60
95
|
else:
|
61
96
|
comparisons.append(
|
62
97
|
f"Error: {output_target_file_name} not found in the generated output"
|
@@ -70,9 +105,13 @@ async def execute_transformation(code: str) -> str:
|
|
70
105
|
comparison = comparator.compare(
|
71
106
|
generated_output_path, output_file, **comparator_kwargs
|
72
107
|
)
|
73
|
-
|
74
|
-
|
108
|
+
formatted_message = _format_comparison_output(
|
109
|
+
comparison,
|
110
|
+
SCHEMA_ONLY,
|
111
|
+
str(generated_output_path),
|
112
|
+
output_target_file_name,
|
75
113
|
)
|
114
|
+
comparisons.append(formatted_message)
|
76
115
|
else:
|
77
116
|
comparisons.append(
|
78
117
|
"Error: Single output file generated but multiple target files expected"
|
@@ -111,7 +150,7 @@ class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
|
|
111
150
|
sdif: Path, # This will now be relative to project root (MCP server CWD)
|
112
151
|
output_target_files: Dict[Union[str, Path], str] | List[Path],
|
113
152
|
output_sdif: Optional[Path] = None, # This will now be relative or None
|
114
|
-
instructions:
|
153
|
+
instructions: str = "",
|
115
154
|
schema_only: bool = False,
|
116
155
|
representer_options_for_build: Optional[Dict[str, Any]] = None,
|
117
156
|
) -> str:
|
@@ -220,11 +259,10 @@ class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
|
|
220
259
|
"output_schema": output_schema_text,
|
221
260
|
"output_sample": output_sample_text
|
222
261
|
if not SCHEMA_ONLY
|
223
|
-
else "Sample not available.",
|
224
|
-
"output_representation": str(
|
225
|
-
|
226
|
-
|
227
|
-
"instructions": instructions,
|
262
|
+
else "Sample not available. File is empty (no data).",
|
263
|
+
"output_representation": str(output_representation),
|
264
|
+
"instructions": instructions
|
265
|
+
or "No instructions provided. Use the output example.",
|
228
266
|
},
|
229
267
|
)
|
230
268
|
agent = Agent(
|
satif_ai/standardizers/ai_csv.py
CHANGED
@@ -12,12 +12,13 @@ from agents import Agent, Runner, function_tool
|
|
12
12
|
from agents.mcp.server import MCPServerStdio
|
13
13
|
from charset_normalizer import detect
|
14
14
|
from mcp import ClientSession
|
15
|
-
from satif_core.types import Datasource, SDIFPath
|
15
|
+
from satif_core.types import Datasource, SDIFPath, StandardizationResult
|
16
16
|
from satif_sdk.standardizers.csv import (
|
17
|
+
CSVStandardizer,
|
18
|
+
)
|
19
|
+
from satif_sdk.utils import (
|
17
20
|
DELIMITER_SAMPLE_SIZE,
|
18
21
|
ENCODING_SAMPLE_SIZE,
|
19
|
-
CSVStandardizer,
|
20
|
-
SkipColumnsConfig,
|
21
22
|
)
|
22
23
|
|
23
24
|
logger = logging.getLogger(__name__)
|
@@ -50,7 +51,7 @@ You are an expert CSV Data Standardization Agent. Your mission is to analyze a g
|
|
50
51
|
|
51
52
|
3. **Column Analysis and Definition:**
|
52
53
|
* For **each column** you identify that should be included in the final table:
|
53
|
-
* `
|
54
|
+
* `original_identifier` (string): This is how the column is found in the *raw CSV data*.
|
54
55
|
* If `has_header` is true, this is the **exact original header name** from the CSV.
|
55
56
|
* If `has_header` is false, this is a **string representation of the 0-based column index** (e.g., "0", "1", "2").
|
56
57
|
* `final_column_name` (string): This is the desired name for the column in the SDIF database table. It **MUST** be:
|
@@ -76,7 +77,7 @@ You are an expert CSV Data Standardization Agent. Your mission is to analyze a g
|
|
76
77
|
"skip_rows": 0, // Integer for initial N, or sorted list of 0-based indices e.g. [0, 1, 5]
|
77
78
|
"columns": [
|
78
79
|
{{
|
79
|
-
"
|
80
|
+
"original_identifier": "original_header_or_index_string",
|
80
81
|
"final_column_name": "sanitized_snake_case_name",
|
81
82
|
"description": null // Or string value. Null or omit if not generated.
|
82
83
|
}}
|
@@ -88,19 +89,18 @@ You are an expert CSV Data Standardization Agent. Your mission is to analyze a g
|
|
88
89
|
**Tools Available:**
|
89
90
|
- `read_csv_sample(encoding: str, delimiter: str, skip_initial_rows: int = 0, row_limit: int = 20, include_row_indices: bool = False)`: Reads a sample from the *beginning* of the file. Crucial for header and initial structure.
|
90
91
|
- `read_raw_lines(encoding: str, line_limit: int = 50, start_line: int = 0)`: Reads raw lines. Useful for finding specific rows to skip (empty, repeated headers, footers) by their 0-based index.
|
91
|
-
- `get_file_chunk(encoding: str, start_byte: int = 0, end_byte: int = 4096)`: Reads a raw chunk. Good for diagnosing encoding/delimiter issues if `read_csv_sample` returns garbled data or errors.
|
92
92
|
|
93
93
|
**General Workflow Guidance:**
|
94
|
-
1. **Initial Probe & Core Params:** Use `read_csv_sample` with initial hints (and `include_row_indices=True`) to examine the first few rows. Verify/correct `encoding` and `delimiter`. If `read_csv_sample` reports errors or shows garbled data
|
94
|
+
1. **Initial Probe & Core Params:** Use `read_csv_sample` with initial hints (and `include_row_indices=True`) to examine the first few rows. Verify/correct `encoding` and `delimiter`. If `read_csv_sample` reports errors or shows garbled data. Determine `has_header` by looking at the first non-skipped row.
|
95
95
|
2. **Identify Skip Rows:**
|
96
96
|
* If there's metadata/comments at the top, determine how many initial rows to skip and use that for `skip_rows` (integer value).
|
97
97
|
* Use `read_raw_lines` to scan for other rows to skip (e.g., empty lines, comment lines, repeated headers mid-file, summary footers). Collect all 0-based indices of such rows. If you have specific indices, `skip_rows` should be a sorted list of these indices. If you only skip initial N rows, it's an integer.
|
98
98
|
3. **Column Identification & Definition:**
|
99
99
|
* After settling `skip_rows` and `has_header`, call `read_csv_sample` again with `skip_initial_rows` set appropriately (if `skip_rows` is an int) to see the clean data rows and the header (if present).
|
100
|
-
* If `has_header` is true, the first row from this clean sample gives you the `
|
101
|
-
* If `has_header` is false, the `
|
100
|
+
* If `has_header` is true, the first row from this clean sample gives you the `original_identifier` values (original header names).
|
101
|
+
* If `has_header` is false, the `original_identifier` for each column will be its 0-based index as a string (e.g., "0", "1", "2", ... for as many columns as you see in the first data row).
|
102
102
|
* For each column you decide to include:
|
103
|
-
* Determine its `
|
103
|
+
* Determine its `original_identifier`.
|
104
104
|
* Create a clean, descriptive `final_column_name` (snake_case).
|
105
105
|
* If (and ONLY IF) necessary, write a `description` for that column.
|
106
106
|
4. **Table Naming & Description:** Based on the clean data and column names, formulate a `table_name` and, if valuable, a `table_description`.
|
@@ -273,60 +273,6 @@ async def read_raw_lines(
|
|
273
273
|
)
|
274
274
|
|
275
275
|
|
276
|
-
@function_tool
|
277
|
-
async def get_file_chunk(
|
278
|
-
encoding: str, start_byte: int | None, end_byte: int | None
|
279
|
-
) -> str:
|
280
|
-
if start_byte is None:
|
281
|
-
start_byte = 0
|
282
|
-
if end_byte is None:
|
283
|
-
end_byte = 4096
|
284
|
-
context = _CURRENT_AI_CSV_TOOL_CONTEXT.get()
|
285
|
-
if not context or not context.file_path or not context.file_path.exists():
|
286
|
-
return json.dumps({"error": "File path not found in tool context."})
|
287
|
-
if start_byte < 0 or end_byte < start_byte:
|
288
|
-
return json.dumps({"error": "Invalid byte range specified."})
|
289
|
-
|
290
|
-
chunk_text = ""
|
291
|
-
error_message = None
|
292
|
-
bytes_read = 0
|
293
|
-
try:
|
294
|
-
with open(context.file_path, "rb") as fb:
|
295
|
-
file_size = context.file_path.stat().st_size
|
296
|
-
effective_start_byte = min(start_byte, file_size)
|
297
|
-
fb.seek(effective_start_byte)
|
298
|
-
bytes_to_read = max(0, min(end_byte, file_size) - effective_start_byte)
|
299
|
-
if bytes_to_read > 0:
|
300
|
-
chunk_bytes = fb.read(bytes_to_read)
|
301
|
-
bytes_read = len(chunk_bytes)
|
302
|
-
chunk_text = chunk_bytes.decode(encoding, errors="replace")
|
303
|
-
else:
|
304
|
-
chunk_text = ""
|
305
|
-
return json.dumps(
|
306
|
-
{
|
307
|
-
"chunk": chunk_text,
|
308
|
-
"bytes_read": bytes_read,
|
309
|
-
"requested_range": [start_byte, end_byte],
|
310
|
-
"error": None,
|
311
|
-
}
|
312
|
-
)
|
313
|
-
except (UnicodeDecodeError, ValueError) as e:
|
314
|
-
error_message = f"Failed to decode file chunk: {e}. Used encoding '{encoding}'."
|
315
|
-
except OSError as e:
|
316
|
-
error_message = f"File read error: {e}."
|
317
|
-
except Exception as e:
|
318
|
-
logger.error(f"Unexpected error in get_file_chunk tool: {e}", exc_info=True)
|
319
|
-
error_message = f"Unexpected error reading file chunk: {str(e)}"
|
320
|
-
return json.dumps(
|
321
|
-
{
|
322
|
-
"error": error_message,
|
323
|
-
"chunk": chunk_text,
|
324
|
-
"bytes_read": bytes_read,
|
325
|
-
"requested_range": [start_byte, end_byte],
|
326
|
-
}
|
327
|
-
)
|
328
|
-
|
329
|
-
|
330
276
|
# --- AICSVStandardizer Class ---
|
331
277
|
class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStandardizer
|
332
278
|
def __init__(
|
@@ -337,19 +283,18 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
337
283
|
# --- Initial Hints (Optional) ---
|
338
284
|
initial_delimiter: Optional[str] = None,
|
339
285
|
initial_encoding: Optional[str] = None,
|
340
|
-
# --- Base Class Args Passthrough (some will be overridden by AI) ---
|
341
|
-
default_skip_columns: SkipColumnsConfig = None, # Keep for base if AI doesn't define cols
|
342
286
|
):
|
287
|
+
# AI will determine the file_configs
|
343
288
|
super().__init__(
|
344
|
-
delimiter=None,
|
345
|
-
encoding=None,
|
346
|
-
has_header=True,
|
347
|
-
skip_rows=0,
|
348
|
-
skip_columns=
|
349
|
-
descriptions=None,
|
350
|
-
table_names=None,
|
351
|
-
file_configs=None,
|
352
|
-
column_definitions=None,
|
289
|
+
delimiter=None,
|
290
|
+
encoding=None,
|
291
|
+
has_header=True,
|
292
|
+
skip_rows=0,
|
293
|
+
skip_columns=None,
|
294
|
+
descriptions=None,
|
295
|
+
table_names=None,
|
296
|
+
file_configs=None,
|
297
|
+
column_definitions=None,
|
353
298
|
)
|
354
299
|
|
355
300
|
self.mcp_servers = [mcp_server] if mcp_server else []
|
@@ -357,7 +302,6 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
357
302
|
self.llm_model = llm_model
|
358
303
|
self._initial_delimiter_hint = initial_delimiter
|
359
304
|
self._initial_encoding_hint = initial_encoding
|
360
|
-
# self.generate_description from prompt structure (table_description, column descriptions)
|
361
305
|
|
362
306
|
async def _get_initial_guesses(self, file_path: Path) -> Tuple[str, str]:
|
363
307
|
"""Helper to get initial encoding and delimiter guesses for a single file."""
|
@@ -419,7 +363,7 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
419
363
|
agent = Agent(
|
420
364
|
name="CSV Detail Analyzer Agent",
|
421
365
|
mcp_servers=self.mcp_servers,
|
422
|
-
tools=[read_csv_sample, read_raw_lines
|
366
|
+
tools=[read_csv_sample, read_raw_lines],
|
423
367
|
model=self.llm_model,
|
424
368
|
)
|
425
369
|
logger.info(f"Running CSV Detail Analyzer Agent for {file_path.name}...")
|
@@ -469,7 +413,7 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
469
413
|
raise ValueError(
|
470
414
|
f"Each item in 'columns' list must be a dictionary. Found: {type(col_spec)}"
|
471
415
|
)
|
472
|
-
req_col_keys = {"
|
416
|
+
req_col_keys = {"original_identifier", "final_column_name"}
|
473
417
|
if not req_col_keys.issubset(col_spec.keys()):
|
474
418
|
missing_col_keys = req_col_keys - col_spec.keys()
|
475
419
|
raise ValueError(
|
@@ -520,7 +464,7 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
520
464
|
overwrite: bool = False,
|
521
465
|
config: Optional[Dict[str, Any]] = None,
|
522
466
|
**kwargs,
|
523
|
-
) ->
|
467
|
+
) -> StandardizationResult:
|
524
468
|
output_path_obj = Path(output_path)
|
525
469
|
|
526
470
|
input_paths: List[Path]
|
@@ -545,8 +489,6 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
545
489
|
f"Input CSV file not found or is not a file: {input_file_path}"
|
546
490
|
)
|
547
491
|
|
548
|
-
# Create a task for each file's analysis
|
549
|
-
# Need to wrap _get_initial_guesses and _run_analysis_agent in a single async co-routine for gather
|
550
492
|
async def analyze_file_task(file_path_for_task: Path):
|
551
493
|
logger.info(
|
552
494
|
f"--- Starting AI Analysis for file: {file_path_for_task.name} ---"
|
@@ -554,86 +496,62 @@ class AICSVStandardizer(CSVStandardizer): # Inherits from the enhanced CSVStand
|
|
554
496
|
enc_guess, delim_guess = await self._get_initial_guesses(
|
555
497
|
file_path_for_task
|
556
498
|
)
|
557
|
-
|
499
|
+
# Store the raw AI output for this file, potentially to add to StandardizationResult later
|
500
|
+
# This requires _run_analysis_agent to return the raw JSON string or parsed dict
|
501
|
+
ai_params_for_file = await self._run_analysis_agent(
|
558
502
|
file_path_for_task, enc_guess, delim_guess
|
559
503
|
)
|
504
|
+
return file_path_for_task, ai_params_for_file # Return path with params
|
560
505
|
|
561
|
-
ai_analysis_tasks.append(
|
562
|
-
analyze_file_task(input_file_path)
|
563
|
-
) # Pass the path to the task
|
506
|
+
ai_analysis_tasks.append(analyze_file_task(input_file_path))
|
564
507
|
|
565
508
|
logger.info(f"Starting AI analysis for {len(ai_analysis_tasks)} CSV file(s)...")
|
509
|
+
all_ai_params_results_with_paths: List[Tuple[Path, Dict[str, Any]]] = []
|
566
510
|
try:
|
567
|
-
|
511
|
+
all_ai_params_results_with_paths = await asyncio.gather(*ai_analysis_tasks)
|
568
512
|
except Exception as e:
|
569
513
|
logger.exception(f"Critical error during concurrent AI analysis phase: {e}")
|
570
514
|
raise RuntimeError("AI analysis phase failed.") from e
|
571
515
|
|
572
516
|
logger.info(
|
573
|
-
f"AI analysis complete for all {len(
|
517
|
+
f"AI analysis complete for all {len(all_ai_params_results_with_paths)} file(s)."
|
574
518
|
)
|
575
519
|
|
576
|
-
# Aggregate parameters for the base CSVStandardizer
|
577
|
-
all_ai_table_names: List[str] = []
|
578
|
-
all_ai_table_descriptions: List[Optional[str]] = []
|
579
520
|
all_ai_file_configs: List[Dict[str, Any]] = []
|
580
|
-
all_ai_column_definitions: List[
|
581
|
-
List[Dict[str, Any]]
|
582
|
-
] = [] # List of lists of col_specs
|
583
|
-
|
584
|
-
for i, ai_params in enumerate(all_ai_params_results):
|
585
|
-
current_file_path = input_paths[i] # Get corresponding input path
|
586
|
-
logger.info(f"Aggregating AI parameters for: {current_file_path.name}")
|
587
|
-
logger.info(f" AI Table Name: {ai_params['table_name']}")
|
588
|
-
logger.info(f" AI Encoding: {ai_params['encoding']}")
|
589
|
-
logger.info(f" AI Delimiter: '{ai_params['delimiter']}'")
|
590
|
-
logger.info(f" AI Has Header: {ai_params['has_header']}")
|
591
|
-
logger.info(f" AI Skip Rows: {ai_params['skip_rows']}")
|
592
|
-
logger.info(
|
593
|
-
f" AI Table Description: {ai_params.get('table_description') if ai_params.get('table_description') is not None else 'N/A'}"
|
594
|
-
)
|
595
|
-
# logger.info(f" AI Column Definitions ({len(ai_params['columns'])} cols): {ai_params['columns'][:2]}...") # Log a sample
|
596
521
|
|
597
|
-
|
598
|
-
|
522
|
+
for file_path, ai_params in all_ai_params_results_with_paths:
|
523
|
+
logger.info(f"Aggregating AI parameters for: {file_path.name}")
|
599
524
|
|
600
|
-
|
525
|
+
file_conf_for_base = {
|
526
|
+
"table_name": ai_params["table_name"],
|
527
|
+
"description": ai_params.get("table_description"),
|
601
528
|
"encoding": ai_params["encoding"],
|
602
529
|
"delimiter": ai_params["delimiter"],
|
603
530
|
"has_header": ai_params["has_header"],
|
604
531
|
"skip_rows": ai_params["skip_rows"],
|
605
|
-
"
|
532
|
+
"column_definitions": ai_params["columns"],
|
606
533
|
}
|
607
|
-
all_ai_file_configs.append(
|
608
|
-
all_ai_column_definitions.append(
|
609
|
-
ai_params["columns"]
|
610
|
-
) # This is List[Dict], so we append it directly
|
534
|
+
all_ai_file_configs.append(file_conf_for_base)
|
611
535
|
|
612
|
-
|
613
|
-
|
614
|
-
"Initializing final CSVStandardizer with aggregated AI parameters..."
|
615
|
-
)
|
616
|
-
final_processor = CSVStandardizer(
|
617
|
-
table_names=all_ai_table_names,
|
618
|
-
descriptions=all_ai_table_descriptions,
|
619
|
-
file_configs=all_ai_file_configs,
|
620
|
-
column_definitions=all_ai_column_definitions,
|
621
|
-
skip_columns=self.default_skip_columns, # Fallback, though ideally not used if AI defines all columns
|
536
|
+
logger.debug(
|
537
|
+
f"Initializing final CSVStandardizer with aggregated AI parameters: {all_ai_file_configs}"
|
622
538
|
)
|
539
|
+
final_processor = CSVStandardizer(file_configs=all_ai_file_configs)
|
623
540
|
|
624
541
|
try:
|
625
542
|
logger.info(
|
626
543
|
f"Executing batch standardization for {len(input_paths)} file(s)..."
|
627
544
|
)
|
628
|
-
|
629
|
-
datasource=input_paths,
|
545
|
+
standardization_result = final_processor.standardize(
|
546
|
+
datasource=input_paths,
|
630
547
|
output_path=output_path_obj,
|
631
548
|
overwrite=overwrite,
|
632
549
|
)
|
633
550
|
logger.info(
|
634
|
-
f"AI CSV Standardization complete
|
551
|
+
f"AI CSV Standardization complete. Output: {standardization_result.output_path}"
|
635
552
|
)
|
636
|
-
|
553
|
+
|
554
|
+
return standardization_result
|
637
555
|
except Exception as e:
|
638
556
|
logger.exception(
|
639
557
|
f"Error during final batch standardization step using AI parameters: {e}"
|
@@ -0,0 +1,13 @@
|
|
1
|
+
satif_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
satif_ai/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
satif_ai/adapters/tidy.py,sha256=2oYj7Gz3vOQtzcpoJI4JbftWlMKvOWL8rdwthjg-zUE,19884
|
4
|
+
satif_ai/code_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
satif_ai/code_builders/adaptation.py,sha256=E29YM0S6pMtAfB0uzSUexoeWKwXfF8iJVyYUCKWQz5k,188
|
6
|
+
satif_ai/code_builders/transformation.py,sha256=5B7a6lDv-gqQo83F8fQeSw2gHpDgznoDfjXsASkLc60,11870
|
7
|
+
satif_ai/standardizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
satif_ai/standardizers/ai_csv.py,sha256=c0CKnIib610GgwGqaF8NaqT_P4pZ2BupO-BTSNuIhoc,25385
|
9
|
+
satif_ai-0.2.8.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
|
10
|
+
satif_ai-0.2.8.dist-info/METADATA,sha256=CZBbNd1A-KL8eoOzmmz7bW3ue4HGOC2Qic60wQ-v6z8,670
|
11
|
+
satif_ai-0.2.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
12
|
+
satif_ai-0.2.8.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
|
13
|
+
satif_ai-0.2.8.dist-info/RECORD,,
|
File without changes
|
satif_ai/plot_builders/agent.py
DELETED
@@ -1,204 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import re
|
3
|
-
from pathlib import Path
|
4
|
-
from typing import Optional, Union
|
5
|
-
|
6
|
-
from agents import Agent, Runner
|
7
|
-
from agents.mcp import MCPServerStdio
|
8
|
-
from mcp import ClientSession
|
9
|
-
|
10
|
-
from satif_ai.plot_builders.prompt import PLOTTING_AGENT_PROMPT
|
11
|
-
from satif_ai.plot_builders.tool import PLOTTING_TOOL_CONTEXT, execute_plotting_code
|
12
|
-
|
13
|
-
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
class PlottingAgent:
|
17
|
-
"""Agent that generates Plotly plots from SDIF data based on user instructions."""
|
18
|
-
|
19
|
-
def __init__(
|
20
|
-
self,
|
21
|
-
mcp_server: MCPServerStdio,
|
22
|
-
mcp_session: ClientSession,
|
23
|
-
llm_model: str = "o4-mini",
|
24
|
-
):
|
25
|
-
self.mcp_server = mcp_server
|
26
|
-
self.mcp_session = mcp_session
|
27
|
-
self.llm_model = llm_model
|
28
|
-
|
29
|
-
def _parse_final_path(self, final_text: str) -> Optional[Path]:
|
30
|
-
"""Extracts the path from the success message."""
|
31
|
-
# Regex to find the path after "Success: Plot saved to "
|
32
|
-
match = re.search(r"Success: Plot saved to (.*)", final_text)
|
33
|
-
if match:
|
34
|
-
path_str = match.group(1).strip()
|
35
|
-
try:
|
36
|
-
p = Path(path_str)
|
37
|
-
# Check if it seems plausible (e.g., ends with .html and absolute)
|
38
|
-
# Check for existence here is important
|
39
|
-
if p.is_absolute() and p.name.endswith(".html") and p.exists():
|
40
|
-
return p
|
41
|
-
elif (
|
42
|
-
p.exists()
|
43
|
-
): # Accept relative path if it exists (less ideal but maybe happens)
|
44
|
-
logger.warning(
|
45
|
-
f"Parsed path {p} is not absolute but exists. Accepting."
|
46
|
-
)
|
47
|
-
return p.resolve() # Return resolved absolute path
|
48
|
-
except Exception as e:
|
49
|
-
logger.warning(f"Error validating parsed path '{path_str}': {e}")
|
50
|
-
pass
|
51
|
-
# Fallback checks remain the same
|
52
|
-
if "plot.html" in final_text:
|
53
|
-
potential_path_str = final_text.strip()
|
54
|
-
# Try to extract if it's just the path
|
55
|
-
if Path(potential_path_str).name == "plot.html":
|
56
|
-
try:
|
57
|
-
potential_path = Path(
|
58
|
-
potential_path_str
|
59
|
-
).resolve() # Resolve relative paths
|
60
|
-
if potential_path.exists():
|
61
|
-
logger.warning(
|
62
|
-
"Agent returned path directly instead of success message."
|
63
|
-
)
|
64
|
-
return potential_path
|
65
|
-
except Exception:
|
66
|
-
pass
|
67
|
-
|
68
|
-
return None
|
69
|
-
|
70
|
-
async def generate_plot(
|
71
|
-
self, sdif_path: Union[str, Path], instructions: str
|
72
|
-
) -> Optional[Path]:
|
73
|
-
"""
|
74
|
-
Generates a Plotly plot HTML file based on instructions and SDIF data.
|
75
|
-
|
76
|
-
Args:
|
77
|
-
sdif_path: Path to the input SDIF database file.
|
78
|
-
instructions: Natural language instructions for the plot.
|
79
|
-
|
80
|
-
Returns:
|
81
|
-
Path to the generated HTML plot file, or None if generation failed.
|
82
|
-
|
83
|
-
Raises:
|
84
|
-
FileNotFoundError: If the input SDIF file does not exist.
|
85
|
-
RuntimeError: If agent execution fails or context cannot be fetched or plot fails.
|
86
|
-
Exception: For other unexpected errors.
|
87
|
-
"""
|
88
|
-
input_path = sdif_path
|
89
|
-
# Set tool context
|
90
|
-
PLOTTING_TOOL_CONTEXT["input_sdif_path"] = input_path
|
91
|
-
PLOTTING_TOOL_CONTEXT["user_instructions"] = instructions
|
92
|
-
PLOTTING_TOOL_CONTEXT["output_plot_path"] = None
|
93
|
-
|
94
|
-
agent_final_output_text = (
|
95
|
-
"Agent did not produce final output." # Default message
|
96
|
-
)
|
97
|
-
|
98
|
-
try:
|
99
|
-
# Get Initial Context from MCP Resources
|
100
|
-
logger.info(
|
101
|
-
f"Fetching schema and sample for {input_path}..."
|
102
|
-
) # Changed level to INFO
|
103
|
-
input_schema_str = "Error: Could not get schema."
|
104
|
-
input_sample_str = "Error: Could not get sample."
|
105
|
-
try:
|
106
|
-
input_path_str = str(input_path)
|
107
|
-
schema_uri = f"schema://{input_path_str}"
|
108
|
-
sample_uri = f"sample://{input_path_str}"
|
109
|
-
logger.debug(f"Requesting schema URI: {schema_uri}")
|
110
|
-
logger.debug(f"Requesting sample URI: {sample_uri}")
|
111
|
-
|
112
|
-
input_schema_resource = await self.mcp_session.read_resource(schema_uri)
|
113
|
-
input_sample_resource = await self.mcp_session.read_resource(sample_uri)
|
114
|
-
|
115
|
-
input_schema_str = (
|
116
|
-
input_schema_resource.contents[0].text
|
117
|
-
if input_schema_resource.contents
|
118
|
-
else "Error: Could not get schema (empty response)."
|
119
|
-
)
|
120
|
-
input_sample_str = (
|
121
|
-
input_sample_resource.contents[0].text
|
122
|
-
if input_sample_resource.contents
|
123
|
-
else "Error: Could not get sample (empty response)."
|
124
|
-
)
|
125
|
-
|
126
|
-
except Exception as mcp_err:
|
127
|
-
logger.error(f"Failed to get schema/sample via MCP: {mcp_err}")
|
128
|
-
raise RuntimeError(
|
129
|
-
f"Failed to get required context via MCP: {mcp_err}"
|
130
|
-
) from mcp_err
|
131
|
-
|
132
|
-
# Format the prompt
|
133
|
-
formatted_prompt = PLOTTING_AGENT_PROMPT.format(
|
134
|
-
input_sdif_path=str(input_path),
|
135
|
-
input_schema=input_schema_str,
|
136
|
-
input_sample=input_sample_str,
|
137
|
-
user_instructions=instructions,
|
138
|
-
)
|
139
|
-
|
140
|
-
# Instantiate the Agent
|
141
|
-
agent = Agent(
|
142
|
-
name="Plotting Agent",
|
143
|
-
mcp_servers=[self.mcp_server],
|
144
|
-
tools=[execute_plotting_code],
|
145
|
-
model=self.llm_model,
|
146
|
-
)
|
147
|
-
|
148
|
-
# Run the agent
|
149
|
-
logger.info(f"Running Plotting Agent with model {self.llm_model}...")
|
150
|
-
result = await Runner.run(
|
151
|
-
agent,
|
152
|
-
input=formatted_prompt,
|
153
|
-
)
|
154
|
-
|
155
|
-
if not result or not result.final_output:
|
156
|
-
raise RuntimeError(
|
157
|
-
"Plotting agent execution failed or returned no output."
|
158
|
-
)
|
159
|
-
|
160
|
-
agent_final_output_text = (
|
161
|
-
result.final_output
|
162
|
-
) # Store for potential error message
|
163
|
-
logger.info(
|
164
|
-
f"Plotting Agent finished. Final output:\n{agent_final_output_text}"
|
165
|
-
)
|
166
|
-
|
167
|
-
# Attempt to parse the path from the agent's final confirmation
|
168
|
-
final_plot_path = self._parse_final_path(agent_final_output_text)
|
169
|
-
|
170
|
-
if final_plot_path: # Path found and exists
|
171
|
-
logger.info(
|
172
|
-
f"Successfully confirmed plot generation at: {final_plot_path}"
|
173
|
-
)
|
174
|
-
return final_plot_path
|
175
|
-
else:
|
176
|
-
final_plot_path_from_context = PLOTTING_TOOL_CONTEXT.get(
|
177
|
-
"output_plot_path"
|
178
|
-
)
|
179
|
-
if (
|
180
|
-
final_plot_path_from_context
|
181
|
-
and final_plot_path_from_context.exists()
|
182
|
-
):
|
183
|
-
logger.warning(
|
184
|
-
"Parsed path from final output failed, but tool context has valid path."
|
185
|
-
)
|
186
|
-
return final_plot_path_from_context
|
187
|
-
else:
|
188
|
-
logger.error(
|
189
|
-
"Agent finished, but could not confirm successful plot generation or find output file."
|
190
|
-
)
|
191
|
-
# Include agent output in error for debugging
|
192
|
-
raise RuntimeError(
|
193
|
-
f"Agent finished, but plot generation failed or output path couldn't be determined. Agent final output: '{agent_final_output_text}'"
|
194
|
-
) # Modified Error
|
195
|
-
|
196
|
-
except Exception as e:
|
197
|
-
logger.exception(f"Error during PlottingAgent generate_plot: {e}")
|
198
|
-
raise # Re-raise other exceptions
|
199
|
-
finally:
|
200
|
-
# Robust context cleanup using pop
|
201
|
-
PLOTTING_TOOL_CONTEXT.pop("input_sdif_path", None)
|
202
|
-
PLOTTING_TOOL_CONTEXT.pop("user_instructions", None)
|
203
|
-
PLOTTING_TOOL_CONTEXT.pop("output_plot_path", None)
|
204
|
-
logger.debug("Cleared plotting tool context.")
|
satif_ai/plot_builders/prompt.py
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
# satif/plot_builders/prompt.py
|
2
|
-
|
3
|
-
PLOTTING_AGENT_PROMPT = """
|
4
|
-
You are an expert Data Visualization Agent specialized in creating insightful and interactive plots using Plotly from data stored in SDIF (SQLite) databases. You are autonomous and **must not ask clarifying questions**.
|
5
|
-
|
6
|
-
**Goal:** Generate Python **script code** to create a Plotly visualization based on user instructions and data within the provided SDIF file. **Critically analyze the data (schema, sample) and instructions to infer the user's likely analytical goal. Prepare and transform the data as needed (e.g., cleaning types, handling missing values appropriately for the plot, calculating new fields), choose the most appropriate chart type (e.g., line for trends, bar for comparisons, scatter for correlations, histogram for distributions) and apply necessary data transformations (grouping, aggregation, pivoting) to best represent the data and answer the implied question in the instructions.** Use standard visualization best practices. Your objective is to produce an effective plot, not engage in conversation.
|
7
|
-
|
8
|
-
**Execution Context:**
|
9
|
-
Your code will be executed in an environment where the following variables are **already defined**:
|
10
|
-
- `db`: An instance of `SDIFDatabase`, connected in read-only mode to the input SDIF file (`{input_sdif_path}`).
|
11
|
-
- `instructions`: A string containing the user's request (`{user_instructions}`).
|
12
|
-
|
13
|
-
**Input SDIF Context:**
|
14
|
-
You have access to the following information about the input SDIF database (accessible via the `db` object):
|
15
|
-
|
16
|
-
<input_schema>
|
17
|
-
{input_schema}
|
18
|
-
</input_schema>
|
19
|
-
|
20
|
-
<input_sample>
|
21
|
-
{input_sample}
|
22
|
-
</input_sample>
|
23
|
-
|
24
|
-
**Available Tools:**
|
25
|
-
1. `execute_sql(query: str) -> str`: Execute a read-only SQL query against the **input** SDIF database (using the available `db` object, e.g., `db.query(...)`) to inspect data further *before* writing your main plotting code. Use this only if absolutely necessary to confirm data characteristics crucial for choosing the **correct** plot type or transformation (e.g., checking cardinality for grouping, range for binning).
|
26
|
-
2. `execute_plotting_code(code: str) -> str`: Executes the Python **script code** you generate. Your script **MUST** use the pre-defined `db` and `instructions` variables, generate a Plotly figure, and **save it to an HTML file** named `plot.html` in the current directory (e.g., `fig.write_html('plot.html')`). This tool will return the absolute path to the generated 'plot.html' on success, or an error message on failure.
|
27
|
-
|
28
|
-
**Workflow:**
|
29
|
-
1. **Analyze & Infer & Select:** Carefully review the user instructions, input schema, and sample data. **Infer the analytical goal. Based on the data types, cardinality, and instructions, determine the necessary data preparation steps (cleaning, type conversion, handling missing values suitable for the plot), select the *most appropriate* Plotly chart type, and identify required data aggregations (e.g., sum, mean, count) or transformations (e.g., grouping, calculating percentages, date extraction) needed to create an insightful visualization.** Do not ask for clarification.
|
30
|
-
2. **Explore (Minimal Use):** Only use `execute_sql` if essential for confirming data properties needed for your chosen preparation/chart/transformation strategy.
|
31
|
-
3. **Code Generation:** Write Python **script code** (NOT a function definition) that:
|
32
|
-
* Imports necessary libraries (`pandas as pd`, `plotly.express as px` or `plotly.graph_objects as go`).
|
33
|
-
* Uses the pre-defined `db` object to read the relevant data.
|
34
|
-
* Uses the `instructions` string variable if helpful for parameterizing the plot (e.g., titles).
|
35
|
-
* **Performs the necessary data preparation (cleaning, type conversion, handling NaNs/nulls appropriately) and transformations/aggregations identified in step 1 using pandas.**
|
36
|
-
* Creates the Plotly figure using the **chosen appropriate chart type** and the prepared/transformed/aggregated data. Make axes labels clear and add an informative title.
|
37
|
-
* **Crucially:** Saves the figure using `fig.write_html('plot.html')`.
|
38
|
-
4. **Execute:** Call the `execute_plotting_code` tool with your generated Python script code string. **You must call this tool.**
|
39
|
-
5. **Finalize:**
|
40
|
-
* **If `execute_plotting_code` returns a success message:** Respond **only** with the success message provided by the tool (e.g., "Success: Plot saved to /path/to/plot.html").
|
41
|
-
* **If `execute_plotting_code` returns an error message:** Respond **only** with the error message provided by the tool.
|
42
|
-
|
43
|
-
**Example Script Code (Illustrating Transformation & Chart Choice):**
|
44
|
-
```python
|
45
|
-
import pandas as pd
|
46
|
-
import plotly.express as px
|
47
|
-
import plotly.graph_objects as go # Import go if needed
|
48
|
-
|
49
|
-
# Assume 'db' and 'instructions' are pre-defined
|
50
|
-
# Assume instructions = "Show average monthly revenue trend"
|
51
|
-
try:
|
52
|
-
# Infer table and columns (e.g., 'transactions' with 'date', 'revenue')
|
53
|
-
df = db.read_table('transactions')
|
54
|
-
|
55
|
-
# --- Data Preparation ---
|
56
|
-
# Ensure date is datetime type
|
57
|
-
df['date'] = pd.to_datetime(df['date'], errors='coerce')
|
58
|
-
# Ensure revenue is numeric, handle errors (e.g., fill with 0 or drop)
|
59
|
-
df['revenue'] = pd.to_numeric(df['revenue'], errors='coerce').fillna(0)
|
60
|
-
# Drop rows where date conversion failed if necessary for plot
|
61
|
-
df = df.dropna(subset=['date'])
|
62
|
-
|
63
|
-
# --- Transformation for Plot ---
|
64
|
-
# Infer appropriate transformation: Group by month and calculate mean revenue
|
65
|
-
df['month'] = df['date'].dt.to_period('M').astype(str)
|
66
|
-
df_agg = df.groupby('month')['revenue'].mean().reset_index()
|
67
|
-
|
68
|
-
# --- Plotting ---
|
69
|
-
# Infer appropriate chart type: Line chart for trend
|
70
|
-
title = f"Average Monthly Revenue Trend (based on: {{instructions[:30]}}...)"
|
71
|
-
fig = px.line(df_agg, x='month', y='revenue', title=title, markers=True,
|
72
|
-
labels={{'revenue':'Average Revenue', 'month':'Month'}}) # Clear labels
|
73
|
-
|
74
|
-
# Save plot - THIS IS REQUIRED
|
75
|
-
output_path = 'plot.html'
|
76
|
-
fig.write_html(output_path)
|
77
|
-
print(f"Plot successfully saved to {{output_path}}") # Optional print
|
78
|
-
|
79
|
-
except Exception as e:
|
80
|
-
print(f"Error during plotting script execution: {{e}}")
|
81
|
-
raise # Re-raise exception
|
82
|
-
```
|
83
|
-
|
84
|
-
**CRITICAL INSTRUCTIONS:**
|
85
|
-
- **DO NOT ask clarifying questions.** Analyze the data and instructions to infer the best approach.
|
86
|
-
- **Prepare and transform the data as needed before plotting (handle types, NaNs, aggregate, etc.).**
|
87
|
-
- **Choose the MOST APPROPRIATE chart type.**
|
88
|
-
- **You MUST generate Python script code, NOT a function definition.**
|
89
|
-
- **Your script code MUST use the pre-defined `db` and `instructions` variables.**
|
90
|
-
- **You MUST call the `execute_plotting_code` tool with your generated script code.**
|
91
|
-
- **Your final response MUST be ONLY the exact success or error message returned by the `execute_plotting_code` tool.** No extra explanations or conversation.
|
92
|
-
"""
|
satif_ai/plot_builders/tool.py
DELETED
@@ -1,146 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import sqlite3 # Ensure sqlite3 is imported if needed
|
3
|
-
from pathlib import Path
|
4
|
-
|
5
|
-
import pandas as pd
|
6
|
-
from agents import function_tool
|
7
|
-
from satif_sdk import SDIFDatabase
|
8
|
-
from satif_sdk.code_executors import CodeExecutionError, LocalCodeExecutor
|
9
|
-
|
10
|
-
logger = logging.getLogger(__name__)
|
11
|
-
|
12
|
-
# Global or context for the tool (similar to TidyAdapter)
|
13
|
-
PLOTTING_TOOL_CONTEXT = {
|
14
|
-
"input_sdif_path": None,
|
15
|
-
"user_instructions": None,
|
16
|
-
"output_plot_path": None,
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
|
-
@function_tool
|
21
|
-
async def execute_plotting_code(code: str) -> str:
|
22
|
-
"""
|
23
|
-
Executes the provided Python plotting script code.
|
24
|
-
The code should use the pre-defined 'db' (SDIFDatabase instance)
|
25
|
-
and 'instructions' (string) variables.
|
26
|
-
The code MUST save the generated Plotly plot to 'plot.html'.
|
27
|
-
Returns the path to the saved plot on success or an error message.
|
28
|
-
"""
|
29
|
-
input_sdif_path = PLOTTING_TOOL_CONTEXT.get("input_sdif_path")
|
30
|
-
user_instructions = PLOTTING_TOOL_CONTEXT.get("user_instructions")
|
31
|
-
|
32
|
-
if not input_sdif_path:
|
33
|
-
return "Error: Input SDIF path not found in tool context."
|
34
|
-
# Check existence *before* trying to open
|
35
|
-
if not Path(input_sdif_path).exists():
|
36
|
-
return f"Error: Input SDIF file not found at {input_sdif_path}."
|
37
|
-
if user_instructions is None:
|
38
|
-
# Allow empty instructions, pass empty string
|
39
|
-
user_instructions = ""
|
40
|
-
logger.warning(
|
41
|
-
"User instructions not found in tool context, using empty string."
|
42
|
-
)
|
43
|
-
# return "Error: User instructions not found in tool context."
|
44
|
-
|
45
|
-
# Use LocalCodeExecutor - WARNING: Insecure for untrusted code
|
46
|
-
executor = LocalCodeExecutor()
|
47
|
-
|
48
|
-
expected_output_filename = "plot.html"
|
49
|
-
# Resolve path relative to the current working directory
|
50
|
-
expected_output_path = Path(expected_output_filename).resolve()
|
51
|
-
|
52
|
-
# Clear previous plot if it exists
|
53
|
-
if expected_output_path.exists():
|
54
|
-
try:
|
55
|
-
expected_output_path.unlink()
|
56
|
-
except OSError as e:
|
57
|
-
logger.error(
|
58
|
-
f"Could not remove existing plot file {expected_output_path}: {e}"
|
59
|
-
)
|
60
|
-
|
61
|
-
# Prepare the extra context for the executor
|
62
|
-
db_instance = None
|
63
|
-
try:
|
64
|
-
# Instantiate the DB instance to be passed to the code
|
65
|
-
# Use read-only mode as the code should only read for plotting
|
66
|
-
db_instance = SDIFDatabase(input_sdif_path, read_only=True)
|
67
|
-
|
68
|
-
# Define the context that will be available to the executed code
|
69
|
-
code_context = {
|
70
|
-
"db": db_instance,
|
71
|
-
"instructions": user_instructions,
|
72
|
-
# Add any other context variables if needed by the executor/code
|
73
|
-
}
|
74
|
-
|
75
|
-
# The code provided by the agent is now expected to be a script
|
76
|
-
script_code = f"""
|
77
|
-
import pandas as pd
|
78
|
-
import plotly.express as px
|
79
|
-
import plotly.graph_objects as go
|
80
|
-
from pathlib import Path
|
81
|
-
from satif.sdif_database.database import SDIFDatabase # Make class available for type hints etc.
|
82
|
-
import sqlite3 # For potential use by pandas/db interaction
|
83
|
-
|
84
|
-
# Pre-defined variables available:
|
85
|
-
# db: SDIFDatabase instance connected to {input_sdif_path}
|
86
|
-
# instructions: str = User's instructions
|
87
|
-
|
88
|
-
# --- User's Script Code Start ---
|
89
|
-
{code}
|
90
|
-
# --- User's Script Code End ---
|
91
|
-
"""
|
92
|
-
logger.debug(f"Executing plotting script code:\n---\n{code[:500]}...\n---")
|
93
|
-
|
94
|
-
# LocalCodeExecutor.execute expects 'db' and 'datasource' in its signature
|
95
|
-
# We pass our *actual* db instance as part of extra_context which overrides
|
96
|
-
# the dummy 'db' argument passed for signature compliance.
|
97
|
-
executor.execute(
|
98
|
-
code=script_code,
|
99
|
-
db=db_instance, # Pass instance for signature, but it's also in extra_context
|
100
|
-
datasource=None, # Not needed
|
101
|
-
extra_context=code_context, # Pass db and instructions here
|
102
|
-
)
|
103
|
-
|
104
|
-
# Check if the expected output file was created
|
105
|
-
if expected_output_path.exists():
|
106
|
-
logger.info(
|
107
|
-
f"Plotting code executed successfully. Output: {expected_output_path}"
|
108
|
-
)
|
109
|
-
PLOTTING_TOOL_CONTEXT["output_plot_path"] = expected_output_path
|
110
|
-
return f"Success: Plot saved to {expected_output_path}"
|
111
|
-
else:
|
112
|
-
logger.error(
|
113
|
-
"Plotting code executed but output file 'plot.html' not found."
|
114
|
-
)
|
115
|
-
# Check if the error might be within the script's own error handling
|
116
|
-
# (This requires parsing the execution output, which LocalExecutor doesn't provide easily)
|
117
|
-
return "Error: Code executed (possibly with internal errors), but the expected output file 'plot.html' was not created."
|
118
|
-
|
119
|
-
except (
|
120
|
-
CodeExecutionError,
|
121
|
-
sqlite3.Error,
|
122
|
-
FileNotFoundError,
|
123
|
-
ValueError,
|
124
|
-
TypeError,
|
125
|
-
) as e:
|
126
|
-
logger.error(f"Error executing plotting code or accessing DB: {e}")
|
127
|
-
# Attempt to provide more specific feedback if possible
|
128
|
-
error_message = f"Error executing plotting code: {e}"
|
129
|
-
# Look for common issues like table not found
|
130
|
-
if isinstance(e, pd.io.sql.DatabaseError) and "no such table" in str(e).lower():
|
131
|
-
error_message = f"Error executing plotting code: Table not found. {e}"
|
132
|
-
elif isinstance(e, KeyError): # Pandas KeyError on column access
|
133
|
-
error_message = f"Error executing plotting code: Column not found or data processing error. {e}"
|
134
|
-
|
135
|
-
return error_message # Return the formatted error
|
136
|
-
except Exception as e:
|
137
|
-
logger.exception("Unexpected error during plotting code execution via tool.")
|
138
|
-
return f"Unexpected Error during execution: {e}"
|
139
|
-
finally:
|
140
|
-
# Ensure the db instance created here is closed
|
141
|
-
if db_instance:
|
142
|
-
try:
|
143
|
-
db_instance.close()
|
144
|
-
logger.debug("Closed DB instance in plotting tool.")
|
145
|
-
except Exception as close_err:
|
146
|
-
logger.error(f"Error closing DB instance in plotting tool: {close_err}")
|
satif_ai-0.2.6.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
satif_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
satif_ai/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
satif_ai/adapters/tidy.py,sha256=2oYj7Gz3vOQtzcpoJI4JbftWlMKvOWL8rdwthjg-zUE,19884
|
4
|
-
satif_ai/code_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
satif_ai/code_builders/adaptation.py,sha256=E29YM0S6pMtAfB0uzSUexoeWKwXfF8iJVyYUCKWQz5k,188
|
6
|
-
satif_ai/code_builders/transformation.py,sha256=mO_kGYl6QYvErW1rVaToDYJ2rpE36hUmKC7HjGl4ytI,10432
|
7
|
-
satif_ai/plot_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
satif_ai/plot_builders/agent.py,sha256=Ncw7SL9qkpRN0hw76ezSo1K8vVQK6gcXFp8x8VFwqUI,8291
|
9
|
-
satif_ai/plot_builders/prompt.py,sha256=m0W1SsxnB9_FhIYRumkthImJbK-7KUm4dygN3kjAXGk,6877
|
10
|
-
satif_ai/plot_builders/tool.py,sha256=MeLnG_wFoITSVWZcNFsQLCi157O4L3wItQgolBa4fAw,5994
|
11
|
-
satif_ai/standardizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
satif_ai/standardizers/ai_csv.py,sha256=AAeTt7eqFAtayxF2b95Z_K_lnMdwBBnv2Cn-qTEpMp8,29499
|
13
|
-
satif_ai-0.2.6.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
|
14
|
-
satif_ai-0.2.6.dist-info/METADATA,sha256=qM5JQ9OJfC2zTlYlp6XyyrLdzuL1N45hYWsjgUSQxAM,670
|
15
|
-
satif_ai-0.2.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
16
|
-
satif_ai-0.2.6.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
|
17
|
-
satif_ai-0.2.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|