satif-ai 0.2.8__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: satif-ai
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: AI Agents for Satif
5
5
  License: MIT
6
- Author: Bryan Djafer
7
- Author-email: bryan.djafer@syncpulse.fr
6
+ Author: Syncpulse
7
+ Maintainer: Bryan Djafer
8
+ Maintainer-email: bryan.djafer@syncpulse.fr
8
9
  Requires-Python: >=3.10,<4.0
9
10
  Classifier: License :: OSI Approved :: MIT License
10
11
  Classifier: Programming Language :: Python :: 3
@@ -1,8 +1,11 @@
1
1
  [project]
2
2
  name = "satif-ai"
3
- version = "0.2.8"
3
+ version = "0.2.9"
4
4
  description = "AI Agents for Satif"
5
5
  authors = [
6
+ {name = "Syncpulse"}
7
+ ]
8
+ maintainers = [
6
9
  {name = "Bryan Djafer", email = "bryan.djafer@syncpulse.fr"}
7
10
  ]
8
11
  license = "MIT"
@@ -0,0 +1,19 @@
1
+ from .adapters.tidy import TidyAdapter
2
+ from .standardize import astandardize
3
+ from .standardizers.ai import AIStandardizer
4
+ from .standardizers.ai_csv import AICSVStandardizer
5
+ from .transform import atransform
6
+ from .transformation_builders.syncpulse import SyncpulseTransformationBuilder
7
+ from .utils import OpenAICompatibleMCP, extract_zip_archive_async, merge_sdif_files
8
+
9
+ __all__ = [
10
+ "astandardize",
11
+ "atransform",
12
+ "TidyAdapter",
13
+ "AICSVStandardizer",
14
+ "AIStandardizer",
15
+ "SyncpulseTransformationBuilder",
16
+ "OpenAICompatibleMCP",
17
+ "extract_zip_archive_async",
18
+ "merge_sdif_files",
19
+ ]
@@ -6,23 +6,19 @@ import shutil
6
6
  import sqlite3
7
7
  import tempfile
8
8
  from pathlib import Path
9
- from typing import Optional
9
+ from typing import Optional, Union
10
10
 
11
- # MCP and Agent imports
12
11
  from agents import Agent, Runner, function_tool
13
12
  from agents.mcp.server import MCPServerStdio
14
13
  from mcp import ClientSession
15
-
16
- # SATIF imports
17
14
  from satif_core.adapters.base import Adapter
18
- from satif_core.types import Datasource
15
+ from satif_core.types import Datasource, SDIFPath
19
16
  from satif_sdk import SDIFDatabase
20
17
  from satif_sdk.adapters.code import AdapterError, CodeAdapter
21
18
 
22
19
  logger = logging.getLogger(__name__)
23
20
 
24
21
 
25
- # --- Tidy Transformation Prompt ---
26
22
  TIDY_TRANSFORMATION_PROMPT = """
27
23
  You are an expert Data Tidying Agent for SDIF databases.
28
24
  Your task is to write Python code to transform tables within a given SDIF database into a 'tidy' format, modifying the database *in place*.
@@ -130,12 +126,11 @@ def adapt_sdif(db: SDIFDatabase) -> None:
130
126
  - Ensure pandas and other necessary libraries (like `typing`, `AdapterError`) are imported within the code string if you use them.
131
127
  """
132
128
 
133
- # --- Global context for tools ---
134
- # These will be set within the TidyAdapter instance when adapt is called
129
+
135
130
  TOOL_CONTEXT = {
136
131
  "copied_input_sdif_path": None,
137
132
  "temp_dir": None,
138
- "current_output_sdif_path": None, # Path generated by the tool
133
+ "current_output_sdif_path": None,
139
134
  }
140
135
 
141
136
 
@@ -167,13 +162,10 @@ async def execute_tidy_adaptation(code: str) -> str:
167
162
  )
168
163
 
169
164
  try:
170
- # 1. Instantiate CodeAdapter with the provided code
171
- # It will operate on a *copy* specified by copied_input_path
172
- # and write to a *new* file (_adapted suffix by default).
173
165
  adapter = CodeAdapter(
174
166
  function=code,
175
- function_name="adapt_sdif", # As specified in prompt
176
- output_suffix="_adapted_tool_run", # Give tool runs a distinct suffix
167
+ function_name="adapt_sdif",
168
+ output_suffix="_adapted_tool_run",
177
169
  )
178
170
  # Run the adaptation. It copies `copied_input_path` and modifies the copy.
179
171
  # The returned path is the newly created, adapted file.
@@ -232,9 +224,9 @@ class TidyAdapter(Adapter):
232
224
 
233
225
  def __init__(
234
226
  self,
235
- mcp_server: MCPServerStdio, # Use the server instance
236
- mcp_session: ClientSession, # Use the client session
237
- llm_model: str = "o4-mini", # Specify the LLM model
227
+ mcp_server: MCPServerStdio,
228
+ mcp_session: ClientSession,
229
+ llm_model: str = "o4-mini",
238
230
  max_iterations: int = 5,
239
231
  ):
240
232
  """
@@ -339,12 +331,12 @@ class TidyAdapter(Adapter):
339
331
  return code_text.strip()
340
332
  return None # Indicate no valid code found
341
333
 
342
- async def adapt(self, sdif_database: SDIFDatabase) -> Datasource:
334
+ async def adapt(self, sdif: Union[SDIFPath, SDIFDatabase]) -> Datasource:
343
335
  """
344
336
  Transforms the data in the input SDIF to be tidy using an AI agent.
345
337
 
346
338
  Args:
347
- sdif_database: The input SDIF database instance. Connection will be closed.
339
+ sdif: The input SDIF database instance. Connection will be closed.
348
340
 
349
341
  Returns:
350
342
  Path to the new SDIF file containing the tidied data.
@@ -354,13 +346,16 @@ class TidyAdapter(Adapter):
354
346
  RuntimeError: If the agent fails to produce valid tidy code.
355
347
  Exception: For unexpected errors during the process.
356
348
  """
357
- input_path = Path(sdif_database.path)
349
+ if isinstance(sdif, SDIFDatabase):
350
+ input_path = Path(sdif.path)
351
+ else:
352
+ input_path = sdif
358
353
  if not input_path.exists():
359
354
  raise FileNotFoundError(f"Input SDIF file not found: {input_path}")
360
355
 
361
356
  # Ensure the input DB connection is closed before copying
362
357
  try:
363
- sdif_database.close()
358
+ sdif.close()
364
359
  except Exception:
365
360
  pass
366
361
 
@@ -372,17 +367,14 @@ class TidyAdapter(Adapter):
372
367
  input_schema_dict = db.get_schema()
373
368
  input_sample_dict = db.get_sample_analysis()
374
369
 
375
- # Get SDIFDatabase method signatures
376
370
  sdif_methods_str = self._get_sdif_methods()
377
371
 
378
- # Prepare context for the prompt
379
372
  initial_context = {
380
373
  "input_schema": json.dumps(input_schema_dict, indent=2),
381
374
  "input_sample": json.dumps(input_sample_dict, indent=2),
382
375
  "sdif_database_methods": sdif_methods_str,
383
376
  }
384
377
 
385
- # Instantiate the Agent
386
378
  agent = Agent(
387
379
  name="Tidy SDIF Adapter Agent",
388
380
  mcp_servers=[self.mcp_server],
@@ -390,8 +382,6 @@ class TidyAdapter(Adapter):
390
382
  model=self.llm_model,
391
383
  )
392
384
 
393
- # Run the agent using the Runner
394
- # Pass the prompt and initial context
395
385
  logger.info(f"Running Tidy Agent with model {self.llm_model}...")
396
386
  result = await Runner.run(
397
387
  agent,
@@ -409,7 +399,6 @@ class TidyAdapter(Adapter):
409
399
  f"Agent finished. Final output message:\n{result.final_output[:500]}..."
410
400
  )
411
401
 
412
- # Parse the final code from the agent's response
413
402
  final_code = self.parse_code(result.final_output)
414
403
 
415
404
  if not final_code:
@@ -421,20 +410,16 @@ class TidyAdapter(Adapter):
421
410
  logger.info(
422
411
  "Successfully parsed final adaptation code from agent response."
423
412
  )
424
- # print(f"--- Final Code ---\n{final_code}\n------------------") # Debugging
425
413
 
426
- # Execute the *final* code using CodeAdapter directly to create the definitive output
427
414
  logger.info("Executing final adaptation code...")
428
415
  final_adapter = CodeAdapter(
429
416
  function=final_code,
430
417
  function_name="adapt_sdif",
431
- output_suffix="_tidy_final", # Use a distinct suffix for the final output
418
+ output_suffix="_tidy_final",
432
419
  )
433
- # Adapt the *original* copied input path
420
+
434
421
  final_adapted_path = final_adapter.adapt(copied_input_path)
435
422
 
436
- # Move the final successful output SDIF to a persistent location
437
- # Example: place it next to the original input file
438
423
  persistent_output_path = (
439
424
  input_path.parent / final_adapted_path.name
440
425
  ).resolve()
@@ -444,9 +429,7 @@ class TidyAdapter(Adapter):
444
429
  )
445
430
  persistent_output_path.unlink()
446
431
 
447
- shutil.move(
448
- str(final_adapted_path), persistent_output_path
449
- ) # Move needs strings sometimes
432
+ shutil.move(str(final_adapted_path), persistent_output_path)
450
433
  logger.info(
451
434
  f"Successfully generated final tidy SDIF: {persistent_output_path}"
452
435
  )
@@ -455,8 +438,6 @@ class TidyAdapter(Adapter):
455
438
 
456
439
  except Exception as e:
457
440
  logger.exception(f"Error during TidyAdapter adapt process: {e}")
458
- # Re-raise or handle as appropriate
459
441
  raise
460
442
  finally:
461
- # Always clean up temporary files
462
443
  self._cleanup_temp_env()
@@ -0,0 +1,112 @@
1
+ from pathlib import Path
2
+ from typing import Any, Dict, Optional, Union
3
+
4
+ from satif_core.standardizers.base import AsyncStandardizer
5
+ from satif_core.types import Datasource, FilePath, SDIFPath, StandardizationResult
6
+
7
+ from satif_ai.adapters.tidy import TidyAdapter
8
+ from satif_ai.standardizers.ai import AIStandardizer
9
+
10
+
11
+ async def astandardize(
12
+ datasource: Datasource,
13
+ output_path: SDIFPath,
14
+ *,
15
+ overwrite: bool = False,
16
+ sdif_schema: Optional[Union[FilePath, Dict[str, Any]]] = None,
17
+ tidy_adapter: Union[bool, TidyAdapter] = False,
18
+ config: Optional[Dict[str, Any]] = None,
19
+ standardizer: Optional[AsyncStandardizer] = None,
20
+ mcp_server: Optional[Any] = None,
21
+ mcp_session: Optional[Any] = None,
22
+ llm_model: Optional[str] = None,
23
+ ) -> StandardizationResult:
24
+ """
25
+ Asynchronously standardizes a datasource into a single, canonical SDIF SQLite file.
26
+
27
+ This function serves as the primary entry point for the SATIF standardization layer.
28
+ It orchestrates the conversion of various input file formats (e.g., CSV, Excel, PDF)
29
+ from the provided datasource into a unified SDIF (Standard Data Interchange Format)
30
+ SQLite file. The process may involve AI-driven parsing, schema adaptation, and
31
+ data tidying, depending on the configuration and the capabilities of the
32
+ underlying standardizer.
33
+
34
+ Args:
35
+ datasource: The source of the data to be standardized. This can be a
36
+ single file path (str or Path), a list of file paths, or other
37
+ datasource types supported by the chosen standardizer.
38
+ output_path: The path (str or Path) where the output SDIF SQLite database file
39
+ will be created (e.g., "./output/my_data.sdif").
40
+ overwrite: If True, an existing SDIF file at `output_path` will be
41
+ overwritten. Defaults to False.
42
+ sdif_schema: Optional. Path to an SDIF schema definition file (e.g., a JSON file)
43
+ or a dictionary representing the schema. If provided, the
44
+ standardization process (specifically if using the default
45
+ `AIStandardizer`) may attempt to adapt the data to this
46
+ target schema.
47
+ tidy_adapter: Optional. If True, a default `TidyAdapter` may be used.
48
+ Alternatively, a specific `TidyAdapter` instance can be provided
49
+ to perform data tidying processes (e.g., cleaning, normalization,
50
+ restructuring tables). If False (default), no explicit tidying
51
+ step is initiated by this top-level function, though underlying
52
+ standardizers might perform their own internal tidying.
53
+ The specifics depend on the standardizer's capabilities.
54
+ config: Optional. A dictionary for advanced or standardizer-specific
55
+ configurations. This config is passed directly to the
56
+ `standardize` method of the chosen standardizer.
57
+ standardizer: Optional. An instance of an `AsyncStandardizer` subclass.
58
+ If provided, this instance will be used for standardization.
59
+ This allows for using pre-configured or custom standardizers.
60
+ If None, a default `AIStandardizer` is instantiated using
61
+ `mcp_server`, `mcp_session`, `llm_model`, `sdif_schema`,
62
+ and `tidy_adapter`.
63
+ mcp_server: Optional. The MCP (Model Coordination Platform) server instance.
64
+ Used if `standardizer` is None for the default `AIStandardizer`.
65
+ mcp_session: Optional. The MCP session or transport object.
66
+ Used if `standardizer` is None for the default `AIStandardizer`.
67
+ llm_model: Optional. The language model to be used by the default `AIStandardizer`
68
+ if no `standardizer` instance is provided (e.g., "gpt-4o").
69
+ Each standardizer may have its own default model.
70
+
71
+ Returns:
72
+ A `StandardizationResult` object containing:
73
+ - `output_path`: The absolute `Path` to the created or updated SDIF database file.
74
+ - `file_configs`: An optional dictionary detailing configurations used for
75
+ each processed input file, if applicable and returned by
76
+ the standardizer.
77
+
78
+ Raises:
79
+ FileNotFoundError: If the `datasource` (or parts of it) does not exist.
80
+ FileExistsError: If `output_path` exists and `overwrite` is False.
81
+ ValueError: If input arguments are invalid (e.g., unsupported datasource type).
82
+ RuntimeError: For general errors during the standardization process.
83
+ Specific exceptions may also be raised by the underlying
84
+ standardizer implementation.
85
+ """
86
+ if standardizer is None:
87
+ standardizer = AIStandardizer(
88
+ mcp_server=mcp_server,
89
+ mcp_session=mcp_session,
90
+ llm_model=llm_model,
91
+ sdif_schema=sdif_schema,
92
+ tidy_adapter=tidy_adapter
93
+ if isinstance(tidy_adapter, TidyAdapter)
94
+ else (TidyAdapter() if tidy_adapter else None),
95
+ )
96
+
97
+ result = await standardizer.standardize(
98
+ datasource=datasource,
99
+ output_path=output_path,
100
+ overwrite=overwrite,
101
+ config=config,
102
+ )
103
+
104
+ output_sdif_path = (
105
+ Path(result.output_path)
106
+ if isinstance(result.output_path, str)
107
+ else result.output_path
108
+ )
109
+
110
+ return StandardizationResult(
111
+ output_path=output_sdif_path, file_configs=result.file_configs
112
+ )
@@ -0,0 +1,485 @@
1
+ import asyncio
2
+ import logging
3
+ import shutil
4
+ import tempfile
5
+ import uuid
6
+ from collections import defaultdict
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union
9
+
10
+ from satif_core.standardizers.base import AsyncStandardizer
11
+ from satif_core.types import Datasource, FilePath, SDIFPath, StandardizationResult
12
+
13
+ from satif_ai.adapters.tidy import TidyAdapter
14
+ from satif_ai.utils.merge_sdif import merge_sdif_files
15
+ from satif_ai.utils.zip import extract_zip_archive_async
16
+
17
+ from .ai_csv import AICSVStandardizer
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AIStandardizer(AsyncStandardizer):
23
+ """
24
+ Orchestrates the standardization of various file types using specialized AI standardizers.
25
+ It processes a datasource, which can include individual files or ZIP archives.
26
+ Files are dispatched to appropriate AI agents (e.g., AICSVStandardizer),
27
+ and their SDIF outputs are merged into a single, final SDIF.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ mcp_server: Optional[Any] = None,
33
+ mcp_session: Optional[Any] = None,
34
+ llm_model: Optional[str] = None,
35
+ sdif_schema: Optional[Union[FilePath, Dict[str, Any]]] = None,
36
+ tidy_adapter: Optional[TidyAdapter] = None,
37
+ ):
38
+ self.mcp_server = mcp_server
39
+ self.mcp_session = mcp_session
40
+ self.llm_model = llm_model
41
+ self.sdif_schema = sdif_schema # TODO: Implement schema adaptation logic
42
+ self.tidy_adapter = tidy_adapter # TODO: Implement tidying logic
43
+
44
+ self.ai_standardizer_map: Dict[str, Type[AsyncStandardizer]] = {
45
+ ".csv": AICSVStandardizer,
46
+ # Future standardizers:
47
+ # ".xlsx": AIXLSXStandardizer,
48
+ # ".pdf": AIPDFStandardizer,
49
+ # ".json": AIJSONStandardizer,
50
+ # ".xml": AIXMLStandardizer,
51
+ }
52
+ for ext, standardizer_class in self.ai_standardizer_map.items():
53
+ if not issubclass(standardizer_class, AsyncStandardizer):
54
+ raise TypeError(
55
+ f"Standardizer for '{ext}' ({standardizer_class.__name__}) "
56
+ "must inherit from AsyncStandardizer."
57
+ )
58
+
59
+ def _get_ai_standardizer_class(
60
+ self, extension: str
61
+ ) -> Optional[Type[AsyncStandardizer]]:
62
+ return self.ai_standardizer_map.get(extension.lower())
63
+
64
+ async def _resolve_input_files(
65
+ self, datasource: Datasource, temp_processing_dir: Path
66
+ ) -> List[Path]:
67
+ """
68
+ Resolves the input datasource to a list of individual file paths.
69
+ Handles single files, lists of files, and extracts ZIP archives.
70
+ """
71
+ input_file_paths: List[Path] = []
72
+ raw_paths_to_check: List[Union[str, Path]] = []
73
+
74
+ if isinstance(datasource, (str, Path)):
75
+ raw_paths_to_check = [datasource]
76
+ elif isinstance(datasource, list) and all(
77
+ isinstance(p, (str, Path)) for p in datasource
78
+ ):
79
+ raw_paths_to_check = datasource
80
+ else:
81
+ # This also catches the case where datasource is an empty list initially
82
+ raise ValueError(
83
+ "Datasource must be a non-empty file path (string or Path) or a non-empty list of such paths."
84
+ )
85
+
86
+ if not raw_paths_to_check: # Should be caught by above, but defensive
87
+ raise ValueError("No input datasource paths provided.")
88
+
89
+ for raw_path_item in raw_paths_to_check:
90
+ raw_path = Path(raw_path_item).resolve()
91
+ if not raw_path.exists():
92
+ raise FileNotFoundError(f"Input path not found: {raw_path}")
93
+
94
+ if raw_path.is_file():
95
+ if raw_path.suffix.lower() == ".zip":
96
+ zip_extract_target = (
97
+ temp_processing_dir
98
+ / f"extracted_{raw_path.stem}_{uuid.uuid4().hex[:8]}"
99
+ )
100
+ try:
101
+ extracted_from_zip = await extract_zip_archive_async(
102
+ raw_path, zip_extract_target
103
+ )
104
+ input_file_paths.extend(extracted_from_zip)
105
+ except Exception as e_zip:
106
+ logger.error(
107
+ f"Failed to extract ZIP archive '{raw_path}': {e_zip}",
108
+ exc_info=True,
109
+ )
110
+ # Decide if one failed zip should stop all, or just be skipped.
111
+ # For now, skipping problematic zips.
112
+ continue
113
+ else:
114
+ input_file_paths.append(raw_path)
115
+ elif raw_path.is_dir():
116
+ logger.info(f"Processing directory datasource: {raw_path}")
117
+ for child_item in raw_path.iterdir():
118
+ if child_item.is_file():
119
+ input_file_paths.append(child_item)
120
+ # Deeper recursion to be implemeted.
121
+ else:
122
+ logger.warning(
123
+ f"Input path '{raw_path}' is not a file or directory and will be ignored."
124
+ )
125
+
126
+ if not input_file_paths:
127
+ # This means all inputs were invalid, unresolvable, or zips failed etc.
128
+ logger.error("No processable files found after resolving datasource.")
129
+ raise ValueError("Datasource resolution resulted in no processable files.")
130
+ return input_file_paths
131
+
132
+ def _group_files_by_standardizer(
133
+ self, file_paths: List[Path]
134
+ ) -> Tuple[Dict[Type[AsyncStandardizer], List[Path]], List[Path]]:
135
+ """Groups files by the AI standardizer responsible for them based on extension."""
136
+ grouped: Dict[Type[AsyncStandardizer], List[Path]] = defaultdict(list)
137
+ unsupported_files: List[Path] = []
138
+ for file_path in file_paths:
139
+ standardizer_class = self._get_ai_standardizer_class(file_path.suffix)
140
+ if standardizer_class:
141
+ grouped[standardizer_class].append(file_path)
142
+ else:
143
+ unsupported_files.append(file_path)
144
+ if unsupported_files:
145
+ logger.warning(
146
+ f"Unsupported files found and will be ignored: "
147
+ f"{[str(f.name) for f in unsupported_files]}"
148
+ )
149
+ return grouped, unsupported_files
150
+
151
+ async def _process_file_groups(
152
+ self,
153
+ grouped_files: Dict[Type[AsyncStandardizer], List[Path]],
154
+ temp_sdif_dir: Path,
155
+ config: Optional[Dict[str, Any]],
156
+ **kwargs,
157
+ ) -> Tuple[List[Path], List[Dict[str, Any]]]:
158
+ """
159
+ Processes groups of files using their respective AI standardizers.
160
+ Child standardizers are expected to produce a single SDIF SQLite file.
161
+
162
+ Returns:
163
+ A tuple containing:
164
+ - List of Paths to successfully created intermediate SDIF SQLite files.
165
+ - List of aggregated file configurations from child standardizers.
166
+ """
167
+ processing_tasks = []
168
+ standardizer_instances_info = []
169
+
170
+ for standardizer_class, files_in_group in grouped_files.items():
171
+ if not files_in_group:
172
+ continue
173
+
174
+ standardizer_init_kwargs = {}
175
+ # TODO: Pass standardizer-specific config from main 'config' if available for this standardizer_class
176
+
177
+ try:
178
+ ai_child_standardizer = standardizer_class(
179
+ mcp_server=self.mcp_server,
180
+ mcp_session=self.mcp_session,
181
+ llm_model=self.llm_model,
182
+ **standardizer_init_kwargs,
183
+ )
184
+ except Exception as e:
185
+ logger.error(
186
+ f"Failed to initialize standardizer {standardizer_class.__name__} for '{files_in_group[0].name}': {e}",
187
+ exc_info=True,
188
+ )
189
+ raise RuntimeError(
190
+ f"Initialization failed for {standardizer_class.__name__}: {e}"
191
+ )
192
+
193
+ # Generate a unique filename for the intermediate SDIF SQLite file
194
+ intermediate_sdif_filename = f"intermediate_{standardizer_class.__name__}_{uuid.uuid4().hex[:12]}.sdif"
195
+ intermediate_sdif_file_path = temp_sdif_dir / intermediate_sdif_filename
196
+
197
+ logger.info(
198
+ f"Queueing standardization for {len(files_in_group)} file(s) "
199
+ f"with {standardizer_class.__name__} (output file: {intermediate_sdif_file_path})"
200
+ )
201
+
202
+ task = ai_child_standardizer.standardize(
203
+ datasource=files_in_group,
204
+ output_path=intermediate_sdif_file_path,
205
+ overwrite=True, # Temporary intermediate files are always new/overwritten
206
+ config=config,
207
+ **kwargs,
208
+ )
209
+ processing_tasks.append(task)
210
+ standardizer_instances_info.append(
211
+ {
212
+ "class_name": standardizer_class.__name__,
213
+ "output_file": intermediate_sdif_file_path,
214
+ }
215
+ )
216
+
217
+ gathered_outputs = await asyncio.gather(
218
+ *processing_tasks, return_exceptions=True
219
+ )
220
+
221
+ successful_intermediate_sdif_files: List[Path] = []
222
+ aggregated_file_configs: List[Dict[str, Any]] = []
223
+
224
+ for i, result_or_exc in enumerate(gathered_outputs):
225
+ info = standardizer_instances_info[i]
226
+ expected_output_file: Path = info["output_file"]
227
+
228
+ if isinstance(result_or_exc, StandardizationResult):
229
+ # Child standardizer's output_path should be a file path.
230
+ child_reported_output_file = Path(result_or_exc.output_path)
231
+
232
+ if not child_reported_output_file.is_file():
233
+ logger.error(
234
+ f"Standardizer {info['class_name']} reported success, but its output path "
235
+ f"'{child_reported_output_file}' is not a file or does not exist. Skipping."
236
+ )
237
+ continue # Skip this problematic result
238
+
239
+ if (
240
+ child_reported_output_file.resolve()
241
+ != expected_output_file.resolve()
242
+ ):
243
+ logger.warning(
244
+ f"Standardizer {info['class_name']} reported output file '{child_reported_output_file}' "
245
+ f"which differs from expected '{expected_output_file}'. Using reported path."
246
+ )
247
+
248
+ logger.info(
249
+ f"Successfully standardized group with {info['class_name']}. "
250
+ f"Intermediate SDIF file: {child_reported_output_file}"
251
+ )
252
+ successful_intermediate_sdif_files.append(child_reported_output_file)
253
+ if result_or_exc.file_configs:
254
+ aggregated_file_configs.extend(result_or_exc.file_configs)
255
+
256
+ elif isinstance(result_or_exc, Exception):
257
+ logger.error(
258
+ f"Standardization by {info['class_name']} for target '{expected_output_file}' failed: {result_or_exc}",
259
+ exc_info=result_or_exc,
260
+ )
261
+ # Optionally, try to clean up the expected_output_file if it was created before erroring
262
+ if expected_output_file.exists():
263
+ try:
264
+ expected_output_file.unlink()
265
+ except OSError:
266
+ pass
267
+
268
+ return successful_intermediate_sdif_files, aggregated_file_configs
269
+
270
+ async def _consolidate_results(
271
+ self,
272
+ intermediate_sdif_files: List[Path],
273
+ aggregated_file_configs: Optional[List[Dict[str, Any]]],
274
+ final_sdif_file_target: Path,
275
+ overwrite: bool,
276
+ ) -> StandardizationResult:
277
+ """
278
+ Merges or moves intermediate SDIF SQLite files to the final target SDIF SQLite file.
279
+ Cleans up intermediate files.
280
+ """
281
+ if not intermediate_sdif_files:
282
+ raise RuntimeError(
283
+ "No intermediate SDIF files were successfully generated to consolidate."
284
+ )
285
+
286
+ final_sdif_file_target.parent.mkdir(parents=True, exist_ok=True)
287
+
288
+ if final_sdif_file_target.exists():
289
+ if not overwrite:
290
+ raise FileExistsError(
291
+ f"Final output file {final_sdif_file_target} already exists and overwrite is False."
292
+ )
293
+ logger.info(
294
+ f"Overwriting existing final output file: {final_sdif_file_target}"
295
+ )
296
+ try:
297
+ final_sdif_file_target.unlink()
298
+ except OSError as e_unlink:
299
+ logger.error(
300
+ f"Could not delete existing file {final_sdif_file_target}: {e_unlink}"
301
+ )
302
+ raise # Re-raise as this is critical for overwrite
303
+
304
+ final_sdif_path_str: str
305
+ if len(intermediate_sdif_files) == 1:
306
+ source_sqlite_file = intermediate_sdif_files[0]
307
+ logger.info(
308
+ f"Moving single intermediate SDIF SQLite file '{source_sqlite_file}' to final output '{final_sdif_file_target}'."
309
+ )
310
+ try:
311
+ shutil.move(str(source_sqlite_file), str(final_sdif_file_target))
312
+ final_sdif_path_str = str(final_sdif_file_target)
313
+ except Exception as e_move:
314
+ logger.error(
315
+ f"Failed to move {source_sqlite_file} to {final_sdif_file_target}: {e_move}"
316
+ )
317
+ # Attempt to copy as a fallback, then try to remove source
318
+ try:
319
+ shutil.copy2(str(source_sqlite_file), str(final_sdif_file_target))
320
+ final_sdif_path_str = str(final_sdif_file_target)
321
+ source_sqlite_file.unlink(
322
+ missing_ok=True
323
+ ) # Try to clean up source after copy
324
+ except Exception as e_copy_fallback:
325
+ logger.error(
326
+ f"Fallback copy also failed for {source_sqlite_file}: {e_copy_fallback}"
327
+ )
328
+ raise RuntimeError(
329
+ f"Could not place intermediate file into final location: {e_copy_fallback}"
330
+ ) from e_copy_fallback
331
+ else:
332
+ logger.info(
333
+ f"Merging {len(intermediate_sdif_files)} intermediate SDIF SQLite files into '{final_sdif_file_target}'."
334
+ )
335
+ # merge_sdif_files must accept a list of source SQLite file paths and a target SQLite file path.
336
+ merged_target_path = await merge_sdif_files(
337
+ intermediate_sdif_files,
338
+ final_sdif_file_target,
339
+ overwrite=False, # We handled overwrite for final_sdif_file_target
340
+ )
341
+ final_sdif_path_str = str(merged_target_path)
342
+
343
+ # Clean up original intermediate files (they have been moved or their content merged)
344
+ for temp_file in intermediate_sdif_files:
345
+ if (
346
+ temp_file.exists()
347
+ and temp_file.resolve() != Path(final_sdif_path_str).resolve()
348
+ ): # Don't delete the final file if it was one of the intermediates (single file case)
349
+ try:
350
+ temp_file.unlink()
351
+ logger.debug(f"Cleaned up intermediate file: {temp_file}")
352
+ except Exception as e_clean_file:
353
+ logger.warning(
354
+ f"Error cleaning up intermediate file {temp_file}: {e_clean_file}"
355
+ )
356
+
357
+ logger.info(
358
+ f"Consolidation complete. Final SDIF SQLite file: {final_sdif_path_str}"
359
+ )
360
+ return StandardizationResult(
361
+ output_path=Path(final_sdif_path_str),
362
+ file_configs=aggregated_file_configs if aggregated_file_configs else None,
363
+ )
364
+
365
+ async def standardize(
366
+ self,
367
+ datasource: Datasource,
368
+ output_path: SDIFPath, # Expected to be the path to the target *SDIF file*
369
+ *,
370
+ overwrite: bool = False,
371
+ config: Optional[Dict[str, Any]] = None,
372
+ **kwargs,
373
+ ) -> StandardizationResult:
374
+ """
375
+ Standardizes datasource to a single SDIF SQLite file.
376
+
377
+ Args:
378
+ datasource: Source data (file path, list of paths, or directory path).
379
+ output_path: Path to the target output SDIF SQLite file (e.g., "./output/data.sdif").
380
+ overwrite: If True, overwrite existing output file. Defaults to False.
381
+ config: Optional configuration dictionary for standardizers.
382
+ **kwargs: Additional arguments passed to child standardizers.
383
+
384
+ Returns:
385
+ StandardizationResult with the path to the created SDIF SQLite file.
386
+ """
387
+ logger.info(
388
+ f"AIStandardizer starting process for output SDIF file: {output_path}"
389
+ )
390
+ final_sdif_file_target = Path(output_path).resolve()
391
+
392
+ if final_sdif_file_target.is_dir():
393
+ raise ValueError(
394
+ f"Target output_path '{final_sdif_file_target}' is a directory. "
395
+ "It must be a full file path for the target SDIF SQLite database (e.g., data.sqlite or data.sdif)."
396
+ )
397
+ if not final_sdif_file_target.suffix:
398
+ logger.warning(
399
+ f"Target output_path '{final_sdif_file_target}' has no file extension. "
400
+ "It should be a path to an SDIF SQLite database file (e.g., data.sqlite or data.sdif)."
401
+ )
402
+ elif final_sdif_file_target.suffix.lower() not in (".sdif", ".sqlite", ".db"):
403
+ logger.warning(
404
+ f"Target output_path '{final_sdif_file_target}' does not have a common SQLite extension. "
405
+ "Ensure this is the intended SQLite file path."
406
+ )
407
+
408
+ # Create a unique temporary directory for this standardization run
409
+ # This directory will hold intermediate files and ZIP extractions.
410
+ run_temp_dir = Path(tempfile.mkdtemp(prefix="satif_aistd_run_"))
411
+ intermediate_sdif_files_dir = run_temp_dir / "intermediate_sdif_files"
412
+ intermediate_sdif_files_dir.mkdir(parents=True, exist_ok=True)
413
+ file_processing_temp_dir = run_temp_dir / "file_processing_temp"
414
+ file_processing_temp_dir.mkdir(parents=True, exist_ok=True)
415
+
416
+ try:
417
+ # 1. Resolve input datasource to a list of processable file paths
418
+ resolved_files = await self._resolve_input_files(
419
+ datasource, file_processing_temp_dir
420
+ )
421
+ logger.info(f"Resolved {len(resolved_files)} file(s) for standardization.")
422
+
423
+ # 2. Group files by the AI standardizer responsible for them
424
+ grouped_by_std, unsupported = self._group_files_by_standardizer(
425
+ resolved_files
426
+ )
427
+ if not grouped_by_std:
428
+ user_message = (
429
+ "No files found that can be handled by configured AI standardizers."
430
+ )
431
+ if unsupported:
432
+ user_message += (
433
+ f" Unsupported files: {[str(f.name) for f in unsupported]}"
434
+ )
435
+ raise ValueError(user_message)
436
+
437
+ logger.debug(
438
+ f"File groups for standardization: { {cls.__name__: [f.name for f in paths] for cls, paths in grouped_by_std.items()} }"
439
+ )
440
+
441
+ # 3. Process each group of files, generating intermediate SDIF SQLite files
442
+ (
443
+ intermediate_sdif_files,
444
+ aggregated_file_configs,
445
+ ) = await self._process_file_groups(
446
+ grouped_by_std, intermediate_sdif_files_dir, config, **kwargs
447
+ )
448
+
449
+ if not intermediate_sdif_files:
450
+ raise RuntimeError(
451
+ "No intermediate SDIF SQLite files were successfully generated."
452
+ )
453
+ logger.info(
454
+ f"Successfully generated {len(intermediate_sdif_files)} intermediate SDIF SQLite file(s)."
455
+ )
456
+
457
+ # 4. Consolidate intermediate SDIF files into the final target file
458
+ final_result = await self._consolidate_results(
459
+ intermediate_sdif_files,
460
+ aggregated_file_configs,
461
+ final_sdif_file_target,
462
+ overwrite,
463
+ )
464
+
465
+ logger.info(
466
+ f"AIStandardizer process completed. Final SDIF file at: {final_result.output_path}"
467
+ )
468
+ return final_result
469
+
470
+ except Exception as e:
471
+ logger.error(f"AIStandardizer failed: {e}", exc_info=True)
472
+ if isinstance(e, (ValueError, FileNotFoundError, FileExistsError)):
473
+ raise
474
+ raise RuntimeError(f"AIStandardizer processing error: {e}") from e
475
+ finally:
476
+ # Clean up the entire temporary directory for this run
477
+ if run_temp_dir.exists():
478
+ try:
479
+ shutil.rmtree(run_temp_dir)
480
+ logger.info(f"Cleaned up temporary run directory: {run_temp_dir}")
481
+ except Exception as e_clean:
482
+ logger.error(
483
+ f"Error cleaning up temporary run directory {run_temp_dir}: {e_clean}",
484
+ exc_info=True,
485
+ )
@@ -37,7 +37,7 @@ You are an expert CSV Data Standardization Agent. Your mission is to analyze a g
37
37
  - Encoding: {initial_encoding}
38
38
  - Delimiter: '{initial_delimiter}'
39
39
 
40
- **Your Comprehensive Task:**
40
+ **Your Task:**
41
41
 
42
42
  1. **Core Parsing Parameters:**
43
43
  * Determine the correct file `encoding` (string, e.g., "utf-8", "latin-1").
@@ -0,0 +1,121 @@
1
+ from pathlib import Path
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from fastmcp import FastMCP
5
+ from fastmcp.client.transports import FastMCPTransport
6
+ from satif_core.code_executors.base import CodeExecutor
7
+ from satif_core.transformation_builders.base import AsyncTransformationBuilder
8
+ from satif_core.types import (
9
+ FilePath,
10
+ SDIFPath,
11
+ TransformationResult,
12
+ )
13
+ from satif_sdk.code_executors.local_executor import LocalCodeExecutor
14
+ from satif_sdk.transformers.code import CodeTransformer
15
+ from sdif_mcp.server import mcp
16
+
17
+ from satif_ai.transformation_builders.syncpulse import SyncpulseTransformationBuilder
18
+ from satif_ai.utils.openai_mcp import OpenAICompatibleMCP
19
+
20
+
21
+ async def atransform(
22
+ sdif: SDIFPath,
23
+ output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
24
+ instructions: Optional[str] = None,
25
+ output_path: FilePath = Path("."),
26
+ *,
27
+ transformation_code: Optional[str] = None,
28
+ transformation_builder: Optional[AsyncTransformationBuilder] = None,
29
+ code_executor: Optional[CodeExecutor] = None,
30
+ mcp_server: Optional[FastMCP] = None,
31
+ mcp_transport: Optional[FastMCPTransport] = None,
32
+ llm_model: str = "o4-mini",
33
+ schema_only: bool = False,
34
+ representer_kwargs: Optional[Dict[str, Any]] = None,
35
+ ) -> TransformationResult:
36
+ """
37
+ Asynchronously transforms an SDIF (Standard Data Interchange Format) input using
38
+ an AI-generated or provided transformation code.
39
+
40
+ This function orchestrates the process of:
41
+ 1. Optionally generating transformation code using an AI model via a `CodeBuilder`
42
+ if `transformation_code` is not provided.
43
+ explicitly passed.
44
+ 2. Executing the transformation code using a `CodeTransformer` and a `CodeExecutor`.
45
+ 3. Exporting the results to the specified output.
46
+
47
+ Args:
48
+ sdif: Path to the input SDIF file or an `SDIFDatabase` object.
49
+ output_target_files: A dictionary mapping original example file paths (or string identifiers)
50
+ to their desired agent-facing filenames, or a list of output example
51
+ file paths, or a single output file path. These are used by the AI to understand the target
52
+ format and structure, and also by the `CodeTransformer` to determine
53
+ output filenames if the transformation result keys match.
54
+ instructions: Optional. Natural language instructions for the AI to generate
55
+ the transformation code. Used if `transformation_code` is None.
56
+ transformation_code: Optional. Pre-existing Python code for the transformation.
57
+ If None, code will be generated by the `transformation_builder`.
58
+ transformation_builder: Optional. An `AsyncTransformationBuilder` instance responsible for generating
59
+ the transformation code if `transformation_code` is not provided.
60
+ If None, a `TransformationAsyncCodeBuilder` is instantiated.
61
+ code_executor: Optional. A `CodeExecutor` instance for running the transformation
62
+ code. If None, a `LocalCodeExecutor` is used.
63
+ mcp_server: Optional. A `FastMCP` server instance for the AI code builder.
64
+ Defaults to the global `mcp` instance if `transformation_builder` is None.
65
+ mcp_transport: Optional. A `FastMCPTransport` instance for communication with
66
+ the `mcp_server`. Defaults to a new transport using `mcp_server`
67
+ if `transformation_builder` is None.
68
+ llm_model: The language model to use for code generation (e.g., "o4-mini").
69
+ Used if `transformation_builder` is None.
70
+ schema_only: If True, the transformation aims to match only the schema (headers)
71
+ of the `output_target_files`, and input samples may be omitted or marked
72
+ as empty for the AI. This is useful for structural transformations
73
+ without processing actual data rows.
74
+ representer_kwargs: Optional dictionary of keyword arguments to pass to the
75
+ representer when analyzing `output_target_files`.
76
+
77
+ Returns:
78
+ A `TransformationResult` object containing the path to the output
79
+ and the transformation code used.
80
+ """
81
+ if transformation_builder is None:
82
+ if mcp_server is None:
83
+ mcp_server = mcp
84
+
85
+ if mcp_transport is None:
86
+ mcp_transport = FastMCPTransport(mcp=mcp_server)
87
+
88
+ openai_compatible_mcp = OpenAICompatibleMCP(mcp=mcp_server)
89
+ await openai_compatible_mcp.connect()
90
+
91
+ transformation_builder = SyncpulseTransformationBuilder(
92
+ mcp_server=openai_compatible_mcp,
93
+ mcp_session=mcp_transport,
94
+ llm_model=llm_model,
95
+ )
96
+
97
+ if transformation_code is None:
98
+ function_code = await transformation_builder.build(
99
+ sdif=sdif,
100
+ output_target_files=output_target_files,
101
+ instructions=instructions,
102
+ schema_only=schema_only,
103
+ representer_kwargs=representer_kwargs,
104
+ )
105
+ else:
106
+ function_code = transformation_code
107
+
108
+ if code_executor is None:
109
+ code_executor = LocalCodeExecutor()
110
+
111
+ transformer = CodeTransformer(
112
+ function=function_code,
113
+ code_executor=code_executor,
114
+ )
115
+
116
+ output_path = transformer.export(
117
+ sdif=sdif,
118
+ output_path=output_path,
119
+ )
120
+
121
+ return TransformationResult(output_path=output_path, function_code=function_code)
@@ -8,7 +8,9 @@ from typing import Any, Dict, List, Optional, Union
8
8
  from agents import Agent, Runner, function_tool
9
9
  from agents.mcp.server import MCPServer
10
10
  from mcp import ClientSession
11
- from satif_core import AsyncCodeBuilder, CodeBuilder, SDIFDatabase
11
+ from satif_core import AsyncTransformationBuilder
12
+ from satif_core.types import FilePath
13
+ from satif_sdk.code_executors.local_executor import LocalCodeExecutor
12
14
  from satif_sdk.comparators import get_comparator
13
15
  from satif_sdk.representers import get_representer
14
16
  from satif_sdk.transformers import CodeTransformer
@@ -61,7 +63,10 @@ async def execute_transformation(code: str) -> str:
61
63
  if INPUT_SDIF_PATH is None or OUTPUT_TARGET_FILES is None:
62
64
  return "Error: Transformation context not initialized"
63
65
 
64
- code_transformer = CodeTransformer(function=code)
66
+ code_transformer = CodeTransformer(
67
+ function=code,
68
+ code_executor=LocalCodeExecutor(disable_security_warning=True),
69
+ )
65
70
  generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
66
71
 
67
72
  comparisons = []
@@ -120,19 +125,7 @@ async def execute_transformation(code: str) -> str:
120
125
  return "\n".join(comparisons)
121
126
 
122
127
 
123
- class TransformationCodeBuilder(CodeBuilder):
124
- def __init__(self, output_example: Path | List[Path] | Dict[str, Path]):
125
- self.output_example = output_example
126
-
127
- def build(
128
- self,
129
- sdif: Path | SDIFDatabase,
130
- instructions: Optional[str] = None,
131
- ) -> str:
132
- pass
133
-
134
-
135
- class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
128
+ class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
136
129
  """This class is used to build a transformation code that will be used to transform a SDIF database into a set of files following the format of the given output files."""
137
130
 
138
131
  def __init__(
@@ -147,23 +140,18 @@ class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
147
140
 
148
141
  async def build(
149
142
  self,
150
- sdif: Path, # This will now be relative to project root (MCP server CWD)
151
- output_target_files: Dict[Union[str, Path], str] | List[Path],
152
- output_sdif: Optional[Path] = None, # This will now be relative or None
143
+ sdif: Path,
144
+ output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
145
+ output_sdif: Optional[Path] = None,
153
146
  instructions: str = "",
154
147
  schema_only: bool = False,
155
- representer_options_for_build: Optional[Dict[str, Any]] = None,
148
+ representer_kwargs: Optional[Dict[str, Any]] = None,
156
149
  ) -> str:
157
150
  global INPUT_SDIF_PATH, OUTPUT_TARGET_FILES, SCHEMA_ONLY
158
- # INPUT_SDIF_PATH is used by execute_transformation tool, needs to be accessible from where that tool runs.
159
- # If execute_transformation runs in the same process as the builder, absolute path is fine.
160
- # If it were a separate context, this might need adjustment.
161
- # For now, assume execute_transformation can access absolute paths if needed for its *input SDIF*.
162
- # However, the sdif for MCP URIs must be relative.
151
+
163
152
  INPUT_SDIF_PATH = Path(sdif).resolve()
164
153
  SCHEMA_ONLY = schema_only
165
- # Paths for MCP URIs are now expected to be relative to MCP server CWD (project root)
166
- # So, use them directly as strings.
154
+ # We must encode the path because special characters are not allowed in mcp read_resource()
167
155
  input_sdif_mcp_uri_path = base64.b64encode(str(sdif).encode()).decode()
168
156
  output_sdif_mcp_uri_path = (
169
157
  base64.b64encode(str(output_sdif).encode()).decode()
@@ -205,9 +193,14 @@ class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
205
193
 
206
194
  # OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
207
195
  # Values are agent-facing filenames.
208
- if isinstance(output_target_files, list):
196
+ if isinstance(output_target_files, FilePath):
197
+ OUTPUT_TARGET_FILES = {
198
+ Path(output_target_files).resolve(): Path(output_target_files).name
199
+ }
200
+ elif isinstance(output_target_files, list):
209
201
  OUTPUT_TARGET_FILES = {
210
- file_path.resolve(): file_path.name for file_path in output_target_files
202
+ Path(file_path).resolve(): Path(file_path).name
203
+ for file_path in output_target_files
211
204
  }
212
205
  elif isinstance(output_target_files, dict):
213
206
  temp_map = {}
@@ -229,7 +222,7 @@ class TransformationAsyncCodeBuilder(AsyncCodeBuilder):
229
222
  # Representer uses the absolute path (file_key_abs_path) to read the example file.
230
223
  representer = get_representer(file_key_abs_path)
231
224
  representation, used_params = representer.represent(
232
- file_key_abs_path, **(representer_options_for_build or {})
225
+ file_key_abs_path, **(representer_kwargs or {})
233
226
  )
234
227
  output_representation[agent_facing_name] = {
235
228
  "representation": representation,
@@ -0,0 +1,5 @@
1
+ from .merge_sdif import merge_sdif_files
2
+ from .openai_mcp import OpenAICompatibleMCP
3
+ from .zip import extract_zip_archive_async
4
+
5
+ __all__ = ["merge_sdif_files", "extract_zip_archive_async", "OpenAICompatibleMCP"]
@@ -0,0 +1,22 @@
1
+ from pathlib import Path
2
+ from typing import List
3
+
4
+
5
+ async def merge_sdif_files(sdif_paths: List[Path], output_dir: Path) -> Path:
6
+ """Placeholder function to merge multiple SDIF files into one.
7
+
8
+ Args:
9
+ sdif_paths: A list of paths to the SDIF files to merge.
10
+ output_dir: The directory where the merged file should be saved.
11
+
12
+ Returns:
13
+ Path to the merged SDIF file.
14
+ """
15
+ if not sdif_paths:
16
+ raise ValueError("No SDIF files provided for merging.")
17
+
18
+ if len(sdif_paths) == 1:
19
+ return sdif_paths[0] # No merge needed
20
+
21
+ # TODO: Implement SDIF merge
22
+ raise NotImplementedError("Merge not implemented yet.")
@@ -0,0 +1,97 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ from agents.mcp.server import CallToolResult, MCPServer, MCPTool
5
+ from fastmcp import FastMCP
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class OpenAICompatibleMCP(MCPServer):
11
+ def __init__(self, mcp: FastMCP):
12
+ self.mcp = mcp
13
+ self._is_connected = False # Track connection state
14
+
15
+ async def connect(self):
16
+ """Connect to the server.
17
+ For FastMCP, connection is managed externally when the server is run.
18
+ This method marks the wrapper as connected.
19
+ """
20
+ # Assuming FastMCP instance is already running and configured.
21
+ # No specific connect action required for the FastMCP instance itself here,
22
+ # as its lifecycle (run, stop) is managed outside this wrapper.
23
+ logger.info(
24
+ f"OpenAICompatibleMCP: Simulating connection to FastMCP server '{self.mcp.name}'."
25
+ )
26
+ self._is_connected = True
27
+
28
+ @property
29
+ def name(self) -> str:
30
+ """A readable name for the server."""
31
+ return self.mcp.name
32
+
33
+ async def cleanup(self):
34
+ """Cleanup the server.
35
+ For FastMCP, cleanup is managed externally. This method marks the wrapper as disconnected.
36
+ """
37
+ # Similar to connect, actual server cleanup is external.
38
+ logger.info(
39
+ f"OpenAICompatibleMCP: Simulating cleanup for FastMCP server '{self.mcp.name}'."
40
+ )
41
+ self._is_connected = False
42
+
43
+ async def list_tools(self) -> list[MCPTool]:
44
+ """List the tools available on the server."""
45
+ if not self._is_connected:
46
+ # Or raise an error, depending on desired behavior for disconnected state
47
+ raise RuntimeError(
48
+ "OpenAICompatibleMCP.list_tools called while not connected."
49
+ )
50
+
51
+ # FastMCP's get_tools() returns a dict[str, fastmcp.tools.tool.Tool]
52
+ # Each fastmcp.tools.tool.Tool has a to_mcp_tool(name=key) method
53
+ # MCPTool is an alias for mcp.types.Tool
54
+ try:
55
+ fastmcp_tools = await self.mcp.get_tools()
56
+ mcp_tools_list = [
57
+ tool.to_mcp_tool(name=key) for key, tool in fastmcp_tools.items()
58
+ ]
59
+ return mcp_tools_list
60
+ except Exception as e:
61
+ logger.error(
62
+ f"Error listing tools from FastMCP server '{self.mcp.name}': {e}",
63
+ exc_info=True,
64
+ )
65
+ raise e
66
+
67
+ async def call_tool(
68
+ self, tool_name: str, arguments: dict[str, Any] | None
69
+ ) -> CallToolResult:
70
+ """Invoke a tool on the server."""
71
+ if not self._is_connected:
72
+ logger.warning(
73
+ f"OpenAICompatibleMCP.call_tool '{tool_name}' called while not connected."
74
+ )
75
+ # Return an error CallToolResult
76
+ return CallToolResult(
77
+ content=[{"type": "text", "text": "Server not connected"}], isError=True
78
+ )
79
+
80
+ try:
81
+ # FastMCP's _mcp_call_tool is a protected member, but seems to be what we need.
82
+ # It returns: list[TextContent | ImageContent | EmbeddedResource]
83
+ # This matches the 'content' part of CallToolResult.
84
+ # We need to handle potential errors and wrap the result.
85
+ content = await self.mcp._mcp_call_tool(tool_name, arguments or {})
86
+ return CallToolResult(content=content, isError=False)
87
+ except Exception as e:
88
+ logger.error(
89
+ f"Error calling tool '{tool_name}' on FastMCP server '{self.mcp.name}': {e}",
90
+ exc_info=True,
91
+ )
92
+ error_message = f"Error calling tool '{tool_name}': {type(e).__name__}: {e}"
93
+ # Ensure content is a list of valid MCP content items, even for errors.
94
+ # A TextContent is a safe choice.
95
+ return CallToolResult(
96
+ content=[{"type": "text", "text": error_message}], isError=True
97
+ )
@@ -0,0 +1,120 @@
1
+ import asyncio
2
+ import logging
3
+ import zipfile
4
+ from pathlib import Path
5
+ from typing import List, Tuple
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Constants for ZIP file processing, kept local to this utility or passed as args if needed
10
+ _IGNORED_ZIP_MEMBER_PREFIXES = ("__MACOSX/",)
11
+ _IGNORED_ZIP_FILENAME_PREFIXES = ("._",)
12
+ _IGNORED_ZIP_FILENAMES = (".DS_Store",)
13
+
14
+
15
+ async def extract_zip_archive_async(
16
+ zip_path: Path,
17
+ extract_to: Path,
18
+ ignored_member_prefixes: Tuple[str, ...] = _IGNORED_ZIP_MEMBER_PREFIXES,
19
+ ignored_filename_prefixes: Tuple[str, ...] = _IGNORED_ZIP_FILENAME_PREFIXES,
20
+ ignored_filenames: Tuple[str, ...] = _IGNORED_ZIP_FILENAMES,
21
+ ) -> List[Path]:
22
+ """
23
+ Asynchronously extracts a ZIP archive to a specified directory, filtering out ignored files.
24
+
25
+ Args:
26
+ zip_path: Path to the ZIP archive.
27
+ extract_to: Directory where the contents will be extracted.
28
+ ignored_member_prefixes: Tuple of member path prefixes to ignore.
29
+ ignored_filename_prefixes: Tuple of filename prefixes to ignore.
30
+ ignored_filenames: Tuple of exact filenames to ignore.
31
+
32
+ Returns:
33
+ A list of paths to the successfully extracted files.
34
+
35
+ Raises:
36
+ ValueError: If the zip_path is invalid or corrupted.
37
+ RuntimeError: If any other error occurs during extraction.
38
+ """
39
+
40
+ def blocking_extract() -> List[Path]:
41
+ extracted_file_paths = []
42
+ logger.info(f"Extracting ZIP archive '{zip_path.name}' to '{extract_to}'...")
43
+ try:
44
+ extract_to.mkdir(
45
+ parents=True, exist_ok=True
46
+ ) # Ensure extract_to directory exists
47
+
48
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
49
+ # Security: Preliminary check for unsafe paths before extraction
50
+ for member_name in zip_ref.namelist():
51
+ if member_name.startswith(("/", "..")):
52
+ logger.error(
53
+ f"Skipping potentially unsafe path in ZIP: {member_name}"
54
+ )
55
+ # Depending on security policy, might raise an error here
56
+ continue
57
+
58
+ # Extract all members
59
+ zip_ref.extractall(extract_to)
60
+
61
+ # After extractall, collect all *file* paths, applying filters
62
+ # This second pass of filtering ensures that even if extractall creates them,
63
+ # we don't return paths to ignored files.
64
+ for root, _, files in extract_to.walk():
65
+ for filename in files:
66
+ full_path = root / filename
67
+ # Create a path relative to 'extract_to' to check against member prefixes
68
+ # This ensures that '__MACOSX/file.txt' is correctly ignored,
69
+ # not just a top-level '__MACOSX' directory.
70
+ try:
71
+ relative_path_to_check = full_path.relative_to(extract_to)
72
+ except ValueError:
73
+ # This can happen if full_path is not under extract_to,
74
+ # which ideally shouldn't occur if zip_ref.extractall worked as expected
75
+ # and target_path checks were effective.
76
+ logger.warning(
77
+ f"File {full_path} seems to be outside extraction root {extract_to}. Skipping."
78
+ )
79
+ continue
80
+
81
+ path_str_to_check_prefixes = str(relative_path_to_check)
82
+
83
+ if not (
84
+ any(
85
+ path_str_to_check_prefixes.startswith(p)
86
+ for p in ignored_member_prefixes
87
+ )
88
+ or any(
89
+ full_path.name.startswith(p)
90
+ for p in ignored_filename_prefixes
91
+ )
92
+ or full_path.name in ignored_filenames
93
+ ):
94
+ extracted_file_paths.append(full_path)
95
+ else:
96
+ logger.debug(f"Ignoring file post-extraction: {full_path}")
97
+
98
+ if not extracted_file_paths:
99
+ logger.warning(
100
+ f"ZIP archive '{zip_path.name}' is empty or contains no processable files after filtering."
101
+ )
102
+ else:
103
+ logger.info(
104
+ f"Successfully extracted {len(extracted_file_paths)} file(s) from '{zip_path.name}'."
105
+ )
106
+ return extracted_file_paths
107
+ except zipfile.BadZipFile as e:
108
+ logger.error(
109
+ f"Invalid or corrupted ZIP file: {zip_path.name}", exc_info=True
110
+ )
111
+ raise ValueError(f"Invalid or corrupted ZIP file: {zip_path.name}") from e
112
+ except Exception as e:
113
+ logger.error(
114
+ f"Failed to extract ZIP archive '{zip_path.name}': {e}", exc_info=True
115
+ )
116
+ raise RuntimeError(
117
+ f"Unexpected error during ZIP extraction for '{zip_path.name}'"
118
+ ) from e
119
+
120
+ return await asyncio.to_thread(blocking_extract)
@@ -1,9 +0,0 @@
1
- from satif_core.code_builders.base import AsyncCodeBuilder, CodeBuilder
2
-
3
-
4
- class AdaptationCodeBuilder(CodeBuilder):
5
- pass
6
-
7
-
8
- class AdaptationAsyncCodeBuilder(AsyncCodeBuilder):
9
- pass
File without changes
File without changes
File without changes