satif-ai 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,485 @@
1
+ import asyncio
2
+ import logging
3
+ import shutil
4
+ import tempfile
5
+ import uuid
6
+ from collections import defaultdict
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union
9
+
10
+ from satif_core.standardizers.base import AsyncStandardizer
11
+ from satif_core.types import Datasource, FilePath, SDIFPath, StandardizationResult
12
+
13
+ from satif_ai.adapters.tidy import TidyAdapter
14
+ from satif_ai.utils.merge_sdif import merge_sdif_files
15
+ from satif_ai.utils.zip import extract_zip_archive_async
16
+
17
+ from .ai_csv import AICSVStandardizer
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AIStandardizer(AsyncStandardizer):
23
+ """
24
+ Orchestrates the standardization of various file types using specialized AI standardizers.
25
+ It processes a datasource, which can include individual files or ZIP archives.
26
+ Files are dispatched to appropriate AI agents (e.g., AICSVStandardizer),
27
+ and their SDIF outputs are merged into a single, final SDIF.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ mcp_server: Optional[Any] = None,
33
+ mcp_session: Optional[Any] = None,
34
+ llm_model: Optional[str] = None,
35
+ sdif_schema: Optional[Union[FilePath, Dict[str, Any]]] = None,
36
+ tidy_adapter: Optional[TidyAdapter] = None,
37
+ ):
38
+ self.mcp_server = mcp_server
39
+ self.mcp_session = mcp_session
40
+ self.llm_model = llm_model
41
+ self.sdif_schema = sdif_schema # TODO: Implement schema adaptation logic
42
+ self.tidy_adapter = tidy_adapter # TODO: Implement tidying logic
43
+
44
+ self.ai_standardizer_map: Dict[str, Type[AsyncStandardizer]] = {
45
+ ".csv": AICSVStandardizer,
46
+ # Future standardizers:
47
+ # ".xlsx": AIXLSXStandardizer,
48
+ # ".pdf": AIPDFStandardizer,
49
+ # ".json": AIJSONStandardizer,
50
+ # ".xml": AIXMLStandardizer,
51
+ }
52
+ for ext, standardizer_class in self.ai_standardizer_map.items():
53
+ if not issubclass(standardizer_class, AsyncStandardizer):
54
+ raise TypeError(
55
+ f"Standardizer for '{ext}' ({standardizer_class.__name__}) "
56
+ "must inherit from AsyncStandardizer."
57
+ )
58
+
59
+ def _get_ai_standardizer_class(
60
+ self, extension: str
61
+ ) -> Optional[Type[AsyncStandardizer]]:
62
+ return self.ai_standardizer_map.get(extension.lower())
63
+
64
+ async def _resolve_input_files(
65
+ self, datasource: Datasource, temp_processing_dir: Path
66
+ ) -> List[Path]:
67
+ """
68
+ Resolves the input datasource to a list of individual file paths.
69
+ Handles single files, lists of files, and extracts ZIP archives.
70
+ """
71
+ input_file_paths: List[Path] = []
72
+ raw_paths_to_check: List[Union[str, Path]] = []
73
+
74
+ if isinstance(datasource, (str, Path)):
75
+ raw_paths_to_check = [datasource]
76
+ elif isinstance(datasource, list) and all(
77
+ isinstance(p, (str, Path)) for p in datasource
78
+ ):
79
+ raw_paths_to_check = datasource
80
+ else:
81
+ # This also catches the case where datasource is an empty list initially
82
+ raise ValueError(
83
+ "Datasource must be a non-empty file path (string or Path) or a non-empty list of such paths."
84
+ )
85
+
86
+ if not raw_paths_to_check: # Should be caught by above, but defensive
87
+ raise ValueError("No input datasource paths provided.")
88
+
89
+ for raw_path_item in raw_paths_to_check:
90
+ raw_path = Path(raw_path_item).resolve()
91
+ if not raw_path.exists():
92
+ raise FileNotFoundError(f"Input path not found: {raw_path}")
93
+
94
+ if raw_path.is_file():
95
+ if raw_path.suffix.lower() == ".zip":
96
+ zip_extract_target = (
97
+ temp_processing_dir
98
+ / f"extracted_{raw_path.stem}_{uuid.uuid4().hex[:8]}"
99
+ )
100
+ try:
101
+ extracted_from_zip = await extract_zip_archive_async(
102
+ raw_path, zip_extract_target
103
+ )
104
+ input_file_paths.extend(extracted_from_zip)
105
+ except Exception as e_zip:
106
+ logger.error(
107
+ f"Failed to extract ZIP archive '{raw_path}': {e_zip}",
108
+ exc_info=True,
109
+ )
110
+ # Decide if one failed zip should stop all, or just be skipped.
111
+ # For now, skipping problematic zips.
112
+ continue
113
+ else:
114
+ input_file_paths.append(raw_path)
115
+ elif raw_path.is_dir():
116
+ logger.info(f"Processing directory datasource: {raw_path}")
117
+ for child_item in raw_path.iterdir():
118
+ if child_item.is_file():
119
+ input_file_paths.append(child_item)
120
+ # Deeper recursion to be implemeted.
121
+ else:
122
+ logger.warning(
123
+ f"Input path '{raw_path}' is not a file or directory and will be ignored."
124
+ )
125
+
126
+ if not input_file_paths:
127
+ # This means all inputs were invalid, unresolvable, or zips failed etc.
128
+ logger.error("No processable files found after resolving datasource.")
129
+ raise ValueError("Datasource resolution resulted in no processable files.")
130
+ return input_file_paths
131
+
132
+ def _group_files_by_standardizer(
133
+ self, file_paths: List[Path]
134
+ ) -> Tuple[Dict[Type[AsyncStandardizer], List[Path]], List[Path]]:
135
+ """Groups files by the AI standardizer responsible for them based on extension."""
136
+ grouped: Dict[Type[AsyncStandardizer], List[Path]] = defaultdict(list)
137
+ unsupported_files: List[Path] = []
138
+ for file_path in file_paths:
139
+ standardizer_class = self._get_ai_standardizer_class(file_path.suffix)
140
+ if standardizer_class:
141
+ grouped[standardizer_class].append(file_path)
142
+ else:
143
+ unsupported_files.append(file_path)
144
+ if unsupported_files:
145
+ logger.warning(
146
+ f"Unsupported files found and will be ignored: "
147
+ f"{[str(f.name) for f in unsupported_files]}"
148
+ )
149
+ return grouped, unsupported_files
150
+
151
+ async def _process_file_groups(
152
+ self,
153
+ grouped_files: Dict[Type[AsyncStandardizer], List[Path]],
154
+ temp_sdif_dir: Path,
155
+ config: Optional[Dict[str, Any]],
156
+ **kwargs,
157
+ ) -> Tuple[List[Path], List[Dict[str, Any]]]:
158
+ """
159
+ Processes groups of files using their respective AI standardizers.
160
+ Child standardizers are expected to produce a single SDIF SQLite file.
161
+
162
+ Returns:
163
+ A tuple containing:
164
+ - List of Paths to successfully created intermediate SDIF SQLite files.
165
+ - List of aggregated file configurations from child standardizers.
166
+ """
167
+ processing_tasks = []
168
+ standardizer_instances_info = []
169
+
170
+ for standardizer_class, files_in_group in grouped_files.items():
171
+ if not files_in_group:
172
+ continue
173
+
174
+ standardizer_init_kwargs = {}
175
+ # TODO: Pass standardizer-specific config from main 'config' if available for this standardizer_class
176
+
177
+ try:
178
+ ai_child_standardizer = standardizer_class(
179
+ mcp_server=self.mcp_server,
180
+ mcp_session=self.mcp_session,
181
+ llm_model=self.llm_model,
182
+ **standardizer_init_kwargs,
183
+ )
184
+ except Exception as e:
185
+ logger.error(
186
+ f"Failed to initialize standardizer {standardizer_class.__name__} for '{files_in_group[0].name}': {e}",
187
+ exc_info=True,
188
+ )
189
+ raise RuntimeError(
190
+ f"Initialization failed for {standardizer_class.__name__}: {e}"
191
+ )
192
+
193
+ # Generate a unique filename for the intermediate SDIF SQLite file
194
+ intermediate_sdif_filename = f"intermediate_{standardizer_class.__name__}_{uuid.uuid4().hex[:12]}.sdif"
195
+ intermediate_sdif_file_path = temp_sdif_dir / intermediate_sdif_filename
196
+
197
+ logger.info(
198
+ f"Queueing standardization for {len(files_in_group)} file(s) "
199
+ f"with {standardizer_class.__name__} (output file: {intermediate_sdif_file_path})"
200
+ )
201
+
202
+ task = ai_child_standardizer.standardize(
203
+ datasource=files_in_group,
204
+ output_path=intermediate_sdif_file_path,
205
+ overwrite=True, # Temporary intermediate files are always new/overwritten
206
+ config=config,
207
+ **kwargs,
208
+ )
209
+ processing_tasks.append(task)
210
+ standardizer_instances_info.append(
211
+ {
212
+ "class_name": standardizer_class.__name__,
213
+ "output_file": intermediate_sdif_file_path,
214
+ }
215
+ )
216
+
217
+ gathered_outputs = await asyncio.gather(
218
+ *processing_tasks, return_exceptions=True
219
+ )
220
+
221
+ successful_intermediate_sdif_files: List[Path] = []
222
+ aggregated_file_configs: List[Dict[str, Any]] = []
223
+
224
+ for i, result_or_exc in enumerate(gathered_outputs):
225
+ info = standardizer_instances_info[i]
226
+ expected_output_file: Path = info["output_file"]
227
+
228
+ if isinstance(result_or_exc, StandardizationResult):
229
+ # Child standardizer's output_path should be a file path.
230
+ child_reported_output_file = Path(result_or_exc.output_path)
231
+
232
+ if not child_reported_output_file.is_file():
233
+ logger.error(
234
+ f"Standardizer {info['class_name']} reported success, but its output path "
235
+ f"'{child_reported_output_file}' is not a file or does not exist. Skipping."
236
+ )
237
+ continue # Skip this problematic result
238
+
239
+ if (
240
+ child_reported_output_file.resolve()
241
+ != expected_output_file.resolve()
242
+ ):
243
+ logger.warning(
244
+ f"Standardizer {info['class_name']} reported output file '{child_reported_output_file}' "
245
+ f"which differs from expected '{expected_output_file}'. Using reported path."
246
+ )
247
+
248
+ logger.info(
249
+ f"Successfully standardized group with {info['class_name']}. "
250
+ f"Intermediate SDIF file: {child_reported_output_file}"
251
+ )
252
+ successful_intermediate_sdif_files.append(child_reported_output_file)
253
+ if result_or_exc.file_configs:
254
+ aggregated_file_configs.extend(result_or_exc.file_configs)
255
+
256
+ elif isinstance(result_or_exc, Exception):
257
+ logger.error(
258
+ f"Standardization by {info['class_name']} for target '{expected_output_file}' failed: {result_or_exc}",
259
+ exc_info=result_or_exc,
260
+ )
261
+ # Optionally, try to clean up the expected_output_file if it was created before erroring
262
+ if expected_output_file.exists():
263
+ try:
264
+ expected_output_file.unlink()
265
+ except OSError:
266
+ pass
267
+
268
+ return successful_intermediate_sdif_files, aggregated_file_configs
269
+
270
+ async def _consolidate_results(
271
+ self,
272
+ intermediate_sdif_files: List[Path],
273
+ aggregated_file_configs: Optional[List[Dict[str, Any]]],
274
+ final_sdif_file_target: Path,
275
+ overwrite: bool,
276
+ ) -> StandardizationResult:
277
+ """
278
+ Merges or moves intermediate SDIF SQLite files to the final target SDIF SQLite file.
279
+ Cleans up intermediate files.
280
+ """
281
+ if not intermediate_sdif_files:
282
+ raise RuntimeError(
283
+ "No intermediate SDIF files were successfully generated to consolidate."
284
+ )
285
+
286
+ final_sdif_file_target.parent.mkdir(parents=True, exist_ok=True)
287
+
288
+ if final_sdif_file_target.exists():
289
+ if not overwrite:
290
+ raise FileExistsError(
291
+ f"Final output file {final_sdif_file_target} already exists and overwrite is False."
292
+ )
293
+ logger.info(
294
+ f"Overwriting existing final output file: {final_sdif_file_target}"
295
+ )
296
+ try:
297
+ final_sdif_file_target.unlink()
298
+ except OSError as e_unlink:
299
+ logger.error(
300
+ f"Could not delete existing file {final_sdif_file_target}: {e_unlink}"
301
+ )
302
+ raise # Re-raise as this is critical for overwrite
303
+
304
+ final_sdif_path_str: str
305
+ if len(intermediate_sdif_files) == 1:
306
+ source_sqlite_file = intermediate_sdif_files[0]
307
+ logger.info(
308
+ f"Moving single intermediate SDIF SQLite file '{source_sqlite_file}' to final output '{final_sdif_file_target}'."
309
+ )
310
+ try:
311
+ shutil.move(str(source_sqlite_file), str(final_sdif_file_target))
312
+ final_sdif_path_str = str(final_sdif_file_target)
313
+ except Exception as e_move:
314
+ logger.error(
315
+ f"Failed to move {source_sqlite_file} to {final_sdif_file_target}: {e_move}"
316
+ )
317
+ # Attempt to copy as a fallback, then try to remove source
318
+ try:
319
+ shutil.copy2(str(source_sqlite_file), str(final_sdif_file_target))
320
+ final_sdif_path_str = str(final_sdif_file_target)
321
+ source_sqlite_file.unlink(
322
+ missing_ok=True
323
+ ) # Try to clean up source after copy
324
+ except Exception as e_copy_fallback:
325
+ logger.error(
326
+ f"Fallback copy also failed for {source_sqlite_file}: {e_copy_fallback}"
327
+ )
328
+ raise RuntimeError(
329
+ f"Could not place intermediate file into final location: {e_copy_fallback}"
330
+ ) from e_copy_fallback
331
+ else:
332
+ logger.info(
333
+ f"Merging {len(intermediate_sdif_files)} intermediate SDIF SQLite files into '{final_sdif_file_target}'."
334
+ )
335
+ # merge_sdif_files must accept a list of source SQLite file paths and a target SQLite file path.
336
+ merged_target_path = await merge_sdif_files(
337
+ intermediate_sdif_files,
338
+ final_sdif_file_target,
339
+ overwrite=False, # We handled overwrite for final_sdif_file_target
340
+ )
341
+ final_sdif_path_str = str(merged_target_path)
342
+
343
+ # Clean up original intermediate files (they have been moved or their content merged)
344
+ for temp_file in intermediate_sdif_files:
345
+ if (
346
+ temp_file.exists()
347
+ and temp_file.resolve() != Path(final_sdif_path_str).resolve()
348
+ ): # Don't delete the final file if it was one of the intermediates (single file case)
349
+ try:
350
+ temp_file.unlink()
351
+ logger.debug(f"Cleaned up intermediate file: {temp_file}")
352
+ except Exception as e_clean_file:
353
+ logger.warning(
354
+ f"Error cleaning up intermediate file {temp_file}: {e_clean_file}"
355
+ )
356
+
357
+ logger.info(
358
+ f"Consolidation complete. Final SDIF SQLite file: {final_sdif_path_str}"
359
+ )
360
+ return StandardizationResult(
361
+ output_path=Path(final_sdif_path_str),
362
+ file_configs=aggregated_file_configs if aggregated_file_configs else None,
363
+ )
364
+
365
+ async def standardize(
366
+ self,
367
+ datasource: Datasource,
368
+ output_path: SDIFPath, # Expected to be the path to the target *SDIF file*
369
+ *,
370
+ overwrite: bool = False,
371
+ config: Optional[Dict[str, Any]] = None,
372
+ **kwargs,
373
+ ) -> StandardizationResult:
374
+ """
375
+ Standardizes datasource to a single SDIF SQLite file.
376
+
377
+ Args:
378
+ datasource: Source data (file path, list of paths, or directory path).
379
+ output_path: Path to the target output SDIF SQLite file (e.g., "./output/data.sdif").
380
+ overwrite: If True, overwrite existing output file. Defaults to False.
381
+ config: Optional configuration dictionary for standardizers.
382
+ **kwargs: Additional arguments passed to child standardizers.
383
+
384
+ Returns:
385
+ StandardizationResult with the path to the created SDIF SQLite file.
386
+ """
387
+ logger.info(
388
+ f"AIStandardizer starting process for output SDIF file: {output_path}"
389
+ )
390
+ final_sdif_file_target = Path(output_path).resolve()
391
+
392
+ if final_sdif_file_target.is_dir():
393
+ raise ValueError(
394
+ f"Target output_path '{final_sdif_file_target}' is a directory. "
395
+ "It must be a full file path for the target SDIF SQLite database (e.g., data.sqlite or data.sdif)."
396
+ )
397
+ if not final_sdif_file_target.suffix:
398
+ logger.warning(
399
+ f"Target output_path '{final_sdif_file_target}' has no file extension. "
400
+ "It should be a path to an SDIF SQLite database file (e.g., data.sqlite or data.sdif)."
401
+ )
402
+ elif final_sdif_file_target.suffix.lower() not in (".sdif", ".sqlite", ".db"):
403
+ logger.warning(
404
+ f"Target output_path '{final_sdif_file_target}' does not have a common SQLite extension. "
405
+ "Ensure this is the intended SQLite file path."
406
+ )
407
+
408
+ # Create a unique temporary directory for this standardization run
409
+ # This directory will hold intermediate files and ZIP extractions.
410
+ run_temp_dir = Path(tempfile.mkdtemp(prefix="satif_aistd_run_"))
411
+ intermediate_sdif_files_dir = run_temp_dir / "intermediate_sdif_files"
412
+ intermediate_sdif_files_dir.mkdir(parents=True, exist_ok=True)
413
+ file_processing_temp_dir = run_temp_dir / "file_processing_temp"
414
+ file_processing_temp_dir.mkdir(parents=True, exist_ok=True)
415
+
416
+ try:
417
+ # 1. Resolve input datasource to a list of processable file paths
418
+ resolved_files = await self._resolve_input_files(
419
+ datasource, file_processing_temp_dir
420
+ )
421
+ logger.info(f"Resolved {len(resolved_files)} file(s) for standardization.")
422
+
423
+ # 2. Group files by the AI standardizer responsible for them
424
+ grouped_by_std, unsupported = self._group_files_by_standardizer(
425
+ resolved_files
426
+ )
427
+ if not grouped_by_std:
428
+ user_message = (
429
+ "No files found that can be handled by configured AI standardizers."
430
+ )
431
+ if unsupported:
432
+ user_message += (
433
+ f" Unsupported files: {[str(f.name) for f in unsupported]}"
434
+ )
435
+ raise ValueError(user_message)
436
+
437
+ logger.debug(
438
+ f"File groups for standardization: { {cls.__name__: [f.name for f in paths] for cls, paths in grouped_by_std.items()} }"
439
+ )
440
+
441
+ # 3. Process each group of files, generating intermediate SDIF SQLite files
442
+ (
443
+ intermediate_sdif_files,
444
+ aggregated_file_configs,
445
+ ) = await self._process_file_groups(
446
+ grouped_by_std, intermediate_sdif_files_dir, config, **kwargs
447
+ )
448
+
449
+ if not intermediate_sdif_files:
450
+ raise RuntimeError(
451
+ "No intermediate SDIF SQLite files were successfully generated."
452
+ )
453
+ logger.info(
454
+ f"Successfully generated {len(intermediate_sdif_files)} intermediate SDIF SQLite file(s)."
455
+ )
456
+
457
+ # 4. Consolidate intermediate SDIF files into the final target file
458
+ final_result = await self._consolidate_results(
459
+ intermediate_sdif_files,
460
+ aggregated_file_configs,
461
+ final_sdif_file_target,
462
+ overwrite,
463
+ )
464
+
465
+ logger.info(
466
+ f"AIStandardizer process completed. Final SDIF file at: {final_result.output_path}"
467
+ )
468
+ return final_result
469
+
470
+ except Exception as e:
471
+ logger.error(f"AIStandardizer failed: {e}", exc_info=True)
472
+ if isinstance(e, (ValueError, FileNotFoundError, FileExistsError)):
473
+ raise
474
+ raise RuntimeError(f"AIStandardizer processing error: {e}") from e
475
+ finally:
476
+ # Clean up the entire temporary directory for this run
477
+ if run_temp_dir.exists():
478
+ try:
479
+ shutil.rmtree(run_temp_dir)
480
+ logger.info(f"Cleaned up temporary run directory: {run_temp_dir}")
481
+ except Exception as e_clean:
482
+ logger.error(
483
+ f"Error cleaning up temporary run directory {run_temp_dir}: {e_clean}",
484
+ exc_info=True,
485
+ )
@@ -37,7 +37,7 @@ You are an expert CSV Data Standardization Agent. Your mission is to analyze a g
37
37
  - Encoding: {initial_encoding}
38
38
  - Delimiter: '{initial_delimiter}'
39
39
 
40
- **Your Comprehensive Task:**
40
+ **Your Task:**
41
41
 
42
42
  1. **Core Parsing Parameters:**
43
43
  * Determine the correct file `encoding` (string, e.g., "utf-8", "latin-1").
satif_ai/transform.py ADDED
@@ -0,0 +1,121 @@
1
+ from pathlib import Path
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from fastmcp import FastMCP
5
+ from fastmcp.client.transports import FastMCPTransport
6
+ from satif_core.code_executors.base import CodeExecutor
7
+ from satif_core.transformation_builders.base import AsyncTransformationBuilder
8
+ from satif_core.types import (
9
+ FilePath,
10
+ SDIFPath,
11
+ TransformationResult,
12
+ )
13
+ from satif_sdk.code_executors.local_executor import LocalCodeExecutor
14
+ from satif_sdk.transformers.code import CodeTransformer
15
+ from sdif_mcp.server import mcp
16
+
17
+ from satif_ai.transformation_builders.syncpulse import SyncpulseTransformationBuilder
18
+ from satif_ai.utils.openai_mcp import OpenAICompatibleMCP
19
+
20
+
21
+ async def atransform(
22
+ sdif: SDIFPath,
23
+ output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
24
+ instructions: Optional[str] = None,
25
+ output_path: FilePath = Path("."),
26
+ *,
27
+ transformation_code: Optional[str] = None,
28
+ transformation_builder: Optional[AsyncTransformationBuilder] = None,
29
+ code_executor: Optional[CodeExecutor] = None,
30
+ mcp_server: Optional[FastMCP] = None,
31
+ mcp_transport: Optional[FastMCPTransport] = None,
32
+ llm_model: str = "o4-mini",
33
+ schema_only: bool = False,
34
+ representer_kwargs: Optional[Dict[str, Any]] = None,
35
+ ) -> TransformationResult:
36
+ """
37
+ Asynchronously transforms an SDIF (Standard Data Interchange Format) input using
38
+ an AI-generated or provided transformation code.
39
+
40
+ This function orchestrates the process of:
41
+ 1. Optionally generating transformation code using an AI model via a `CodeBuilder`
42
+ if `transformation_code` is not provided.
43
+ explicitly passed.
44
+ 2. Executing the transformation code using a `CodeTransformer` and a `CodeExecutor`.
45
+ 3. Exporting the results to the specified output.
46
+
47
+ Args:
48
+ sdif: Path to the input SDIF file or an `SDIFDatabase` object.
49
+ output_target_files: A dictionary mapping original example file paths (or string identifiers)
50
+ to their desired agent-facing filenames, or a list of output example
51
+ file paths, or a single output file path. These are used by the AI to understand the target
52
+ format and structure, and also by the `CodeTransformer` to determine
53
+ output filenames if the transformation result keys match.
54
+ instructions: Optional. Natural language instructions for the AI to generate
55
+ the transformation code. Used if `transformation_code` is None.
56
+ transformation_code: Optional. Pre-existing Python code for the transformation.
57
+ If None, code will be generated by the `transformation_builder`.
58
+ transformation_builder: Optional. An `AsyncTransformationBuilder` instance responsible for generating
59
+ the transformation code if `transformation_code` is not provided.
60
+ If None, a `TransformationAsyncCodeBuilder` is instantiated.
61
+ code_executor: Optional. A `CodeExecutor` instance for running the transformation
62
+ code. If None, a `LocalCodeExecutor` is used.
63
+ mcp_server: Optional. A `FastMCP` server instance for the AI code builder.
64
+ Defaults to the global `mcp` instance if `transformation_builder` is None.
65
+ mcp_transport: Optional. A `FastMCPTransport` instance for communication with
66
+ the `mcp_server`. Defaults to a new transport using `mcp_server`
67
+ if `transformation_builder` is None.
68
+ llm_model: The language model to use for code generation (e.g., "o4-mini").
69
+ Used if `transformation_builder` is None.
70
+ schema_only: If True, the transformation aims to match only the schema (headers)
71
+ of the `output_target_files`, and input samples may be omitted or marked
72
+ as empty for the AI. This is useful for structural transformations
73
+ without processing actual data rows.
74
+ representer_kwargs: Optional dictionary of keyword arguments to pass to the
75
+ representer when analyzing `output_target_files`.
76
+
77
+ Returns:
78
+ A `TransformationResult` object containing the path to the output
79
+ and the transformation code used.
80
+ """
81
+ if transformation_builder is None:
82
+ if mcp_server is None:
83
+ mcp_server = mcp
84
+
85
+ if mcp_transport is None:
86
+ mcp_transport = FastMCPTransport(mcp=mcp_server)
87
+
88
+ openai_compatible_mcp = OpenAICompatibleMCP(mcp=mcp_server)
89
+ await openai_compatible_mcp.connect()
90
+
91
+ transformation_builder = SyncpulseTransformationBuilder(
92
+ mcp_server=openai_compatible_mcp,
93
+ mcp_session=mcp_transport,
94
+ llm_model=llm_model,
95
+ )
96
+
97
+ if transformation_code is None:
98
+ function_code = await transformation_builder.build(
99
+ sdif=sdif,
100
+ output_target_files=output_target_files,
101
+ instructions=instructions,
102
+ schema_only=schema_only,
103
+ representer_kwargs=representer_kwargs,
104
+ )
105
+ else:
106
+ function_code = transformation_code
107
+
108
+ if code_executor is None:
109
+ code_executor = LocalCodeExecutor()
110
+
111
+ transformer = CodeTransformer(
112
+ function=function_code,
113
+ code_executor=code_executor,
114
+ )
115
+
116
+ output_path = transformer.export(
117
+ sdif=sdif,
118
+ output_path=output_path,
119
+ )
120
+
121
+ return TransformationResult(output_path=output_path, function_code=function_code)