PyPI - satif-ai - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl - Mend

satif-ai 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

satif_ai/standardizers/ai.py +112 -56
satif_ai/transform.py +0 -2
satif_ai/transformation_builders/syncpulse.py +143 -113
satif_ai-0.2.12.dist-info/METADATA +175 -0
{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/RECORD +8 -8
satif_ai-0.2.10.dist-info/METADATA +0 -23
{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/LICENSE +0 -0
{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/WHEEL +0 -0
{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/entry_points.txt +0 -0

satif_ai/standardizers/ai.py CHANGED Viewed

@@ -63,6 +63,38 @@ class AIStandardizer(AsyncStandardizer):
     ) -> Optional[Type[AsyncStandardizer]]:
         return self.ai_standardizer_map.get(extension.lower())
+    def _resolve_file_path(
+        self, raw_path_item: Union[str, Path], temp_processing_dir: Path
+    ) -> List[Path]:
+        """
+        Resolves a single input path to a list of file paths.
+        This method contains blocking file system operations.
+        """
+        raw_path = Path(raw_path_item).resolve()
+        input_file_paths: List[Path] = []
+        if not raw_path.exists():
+            raise FileNotFoundError(f"Input path not found: {raw_path}")
+        if raw_path.is_file():
+            if raw_path.suffix.lower() == ".zip":
+                # Zip extraction is handled asynchronously in the calling method
+                return [raw_path]
+            else:
+                input_file_paths.append(raw_path)
+        elif raw_path.is_dir():
+            logger.info(f"Processing directory datasource: {raw_path}")
+            for child_item in raw_path.iterdir():
+                if child_item.is_file():
+                    input_file_paths.append(child_item)
+                # Deeper recursion to be implemented.
+        else:
+            logger.warning(
+                f"Input path '{raw_path}' is not a file or directory and will be ignored."
+            )
+        return input_file_paths
     async def _resolve_input_files(
         self, datasource: Datasource, temp_processing_dir: Path
     ) -> List[Path]:
@@ -70,8 +102,8 @@ class AIStandardizer(AsyncStandardizer):
         Resolves the input datasource to a list of individual file paths.
         Handles single files, lists of files, and extracts ZIP archives.
         """
-        input_file_paths: List[Path] = []
         raw_paths_to_check: List[Union[str, Path]] = []
+        all_input_file_paths: List[Path] = []
         if isinstance(datasource, (str, Path)):
             raw_paths_to_check = [datasource]
@@ -88,12 +120,13 @@ class AIStandardizer(AsyncStandardizer):
         if not raw_paths_to_check:  # Should be caught by above, but defensive
             raise ValueError("No input datasource paths provided.")
+        # Process each path item in a thread to avoid blocking the event loop
         for raw_path_item in raw_paths_to_check:
-            raw_path = Path(raw_path_item).resolve()
-            if not raw_path.exists():
-                raise FileNotFoundError(f"Input path not found: {raw_path}")
+            resolved_paths = await asyncio.to_thread(
+                self._resolve_file_path, raw_path_item, temp_processing_dir
+            )
-            if raw_path.is_file():
+            for raw_path in resolved_paths:
                 if raw_path.suffix.lower() == ".zip":
                     zip_extract_target = (
                         temp_processing_dir
@@ -103,7 +136,7 @@ class AIStandardizer(AsyncStandardizer):
                         extracted_from_zip = await extract_zip_archive_async(
                             raw_path, zip_extract_target
                         )
-                        input_file_paths.extend(extracted_from_zip)
+                        all_input_file_paths.extend(extracted_from_zip)
                     except Exception as e_zip:
                         logger.error(
                             f"Failed to extract ZIP archive '{raw_path}': {e_zip}",
@@ -113,23 +146,14 @@ class AIStandardizer(AsyncStandardizer):
                         # For now, skipping problematic zips.
                         continue
                 else:
-                    input_file_paths.append(raw_path)
-            elif raw_path.is_dir():
-                logger.info(f"Processing directory datasource: {raw_path}")
-                for child_item in raw_path.iterdir():
-                    if child_item.is_file():
-                        input_file_paths.append(child_item)
-                    # Deeper recursion to be implemeted.
-            else:
-                logger.warning(
-                    f"Input path '{raw_path}' is not a file or directory and will be ignored."
-                )
+                    all_input_file_paths.append(raw_path)
-        if not input_file_paths:
+        if not all_input_file_paths:
             # This means all inputs were invalid, unresolvable, or zips failed etc.
             logger.error("No processable files found after resolving datasource.")
             raise ValueError("Datasource resolution resulted in no processable files.")
-        return input_file_paths
+        return all_input_file_paths
     def _group_files_by_standardizer(
         self, file_paths: List[Path]
@@ -269,7 +293,7 @@ class AIStandardizer(AsyncStandardizer):
         return successful_intermediate_sdif_files, aggregated_file_configs
-    async def _consolidate_results(
+    def _consolidate_results(
         self,
         intermediate_sdif_files: List[Path],
         aggregated_file_configs: Optional[List[Dict[str, Any]]],
@@ -362,6 +386,59 @@ class AIStandardizer(AsyncStandardizer):
             file_configs=aggregated_file_configs if aggregated_file_configs else None,
         )
+    async def _setup_workspace(
+        self, output_path: Path, overwrite: bool
+    ) -> Tuple[Path, Path, Path]:
+        """
+        Sets up the temporary workspace directories and validates the output path.
+        Contains blocking file system operations.
+        """
+        final_sdif_file_target = output_path.resolve()
+        if final_sdif_file_target.is_dir():
+            raise ValueError(
+                f"Target output_path '{final_sdif_file_target}' is a directory. "
+                "It must be a full file path for the target SDIF SQLite database (e.g., data.sqlite or data.sdif)."
+            )
+        if not final_sdif_file_target.suffix:
+            logger.warning(
+                f"Target output_path '{final_sdif_file_target}' has no file extension. "
+                "It should be a path to an SDIF SQLite database file (e.g., data.sqlite or data.sdif)."
+            )
+        elif final_sdif_file_target.suffix.lower() not in (".sdif", ".sqlite", ".db"):
+            logger.warning(
+                f"Target output_path '{final_sdif_file_target}' does not have a common SQLite extension. "
+                "Ensure this is the intended SQLite file path."
+            )
+        # Create a unique temporary directory for this standardization run
+        run_temp_dir = Path(tempfile.mkdtemp(prefix="satif_aistd_run_"))
+        intermediate_sdif_files_dir = run_temp_dir / "intermediate_sdif_files"
+        intermediate_sdif_files_dir.mkdir(parents=True, exist_ok=True)
+        file_processing_temp_dir = run_temp_dir / "file_processing_temp"
+        file_processing_temp_dir.mkdir(parents=True, exist_ok=True)
+        return (
+            final_sdif_file_target,
+            intermediate_sdif_files_dir,
+            file_processing_temp_dir,
+        )
+    async def _cleanup_workspace(self, run_temp_dir: Path) -> None:
+        """
+        Cleans up the temporary workspace directory.
+        Contains blocking file system operations.
+        """
+        if run_temp_dir.exists():
+            try:
+                await asyncio.to_thread(shutil.rmtree, run_temp_dir)
+                logger.info(f"Cleaned up temporary run directory: {run_temp_dir}")
+            except Exception as e_clean:
+                logger.error(
+                    f"Error cleaning up temporary run directory {run_temp_dir}: {e_clean}",
+                    exc_info=True,
+                )
     async def standardize(
         self,
         datasource: Datasource,
@@ -387,31 +464,15 @@ class AIStandardizer(AsyncStandardizer):
         logger.info(
             f"AIStandardizer starting process for output SDIF file: {output_path}"
         )
-        final_sdif_file_target = Path(output_path).resolve()
-        if final_sdif_file_target.is_dir():
-            raise ValueError(
-                f"Target output_path '{final_sdif_file_target}' is a directory. "
-                "It must be a full file path for the target SDIF SQLite database (e.g., data.sqlite or data.sdif)."
-            )
-        if not final_sdif_file_target.suffix:
-            logger.warning(
-                f"Target output_path '{final_sdif_file_target}' has no file extension. "
-                "It should be a path to an SDIF SQLite database file (e.g., data.sqlite or data.sdif)."
-            )
-        elif final_sdif_file_target.suffix.lower() not in (".sdif", ".sqlite", ".db"):
-            logger.warning(
-                f"Target output_path '{final_sdif_file_target}' does not have a common SQLite extension. "
-                "Ensure this is the intended SQLite file path."
-            )
+        # Setup workspace and validate output path - moved to a separate async function
+        (
+            final_sdif_file_target,
+            intermediate_sdif_files_dir,
+            file_processing_temp_dir,
+        ) = await asyncio.to_thread(self._setup_workspace, Path(output_path), overwrite)
-        # Create a unique temporary directory for this standardization run
-        # This directory will hold intermediate files and ZIP extractions.
-        run_temp_dir = Path(tempfile.mkdtemp(prefix="satif_aistd_run_"))
-        intermediate_sdif_files_dir = run_temp_dir / "intermediate_sdif_files"
-        intermediate_sdif_files_dir.mkdir(parents=True, exist_ok=True)
-        file_processing_temp_dir = run_temp_dir / "file_processing_temp"
-        file_processing_temp_dir.mkdir(parents=True, exist_ok=True)
+        run_temp_dir = file_processing_temp_dir.parent
         try:
             resolved_files = await self._resolve_input_files(
@@ -419,9 +480,11 @@ class AIStandardizer(AsyncStandardizer):
             )
             logger.info(f"Resolved {len(resolved_files)} file(s) for standardization.")
-            grouped_by_std, unsupported = self._group_files_by_standardizer(
-                resolved_files
+            # File grouping - potentially move to a thread if the list is very large
+            grouped_by_std, unsupported = await asyncio.to_thread(
+                self._group_files_by_standardizer, resolved_files
             )
             if not grouped_by_std:
                 user_message = (
                     "No files found that can be handled by configured AI standardizers."
@@ -451,7 +514,8 @@ class AIStandardizer(AsyncStandardizer):
                 f"Successfully generated {len(intermediate_sdif_files)} intermediate SDIF SQLite file(s)."
             )
-            final_result = await self._consolidate_results(
+            final_result = await asyncio.to_thread(
+                self._consolidate_results,
                 intermediate_sdif_files,
                 aggregated_file_configs,
                 final_sdif_file_target,
@@ -469,13 +533,5 @@ class AIStandardizer(AsyncStandardizer):
                 raise
             raise RuntimeError(f"AIStandardizer processing error: {e}") from e
         finally:
-            # Clean up the entire temporary directory for this run
-            if run_temp_dir.exists():
-                try:
-                    shutil.rmtree(run_temp_dir)
-                    logger.info(f"Cleaned up temporary run directory: {run_temp_dir}")
-                except Exception as e_clean:
-                    logger.error(
-                        f"Error cleaning up temporary run directory {run_temp_dir}: {e_clean}",
-                        exc_info=True,
-                    )
+            # Clean up using a dedicated async method
+            await self._cleanup_workspace(run_temp_dir)

satif_ai/transform.py CHANGED Viewed

@@ -90,7 +90,6 @@ async def atransform(
     # If code isn't provided, we need a builder. If a builder isn't provided, we create one.
     if current_transformation_code is None:
         if active_builder is None:
-            # Create SyncpulseTransformationBuilder
             _effective_mcp_server = mcp_server if mcp_server is not None else mcp
             _openai_mcp_instance = OpenAICompatibleMCP(mcp=_effective_mcp_server)
@@ -137,7 +136,6 @@ async def atransform(
     if current_transformation_code is None:
         raise ValueError("Transformation code could not be obtained or generated.")
-    # Code Executor and Transformation
     _code_executor = code_executor if code_executor is not None else LocalCodeExecutor()
     transformer = CodeTransformer(

satif_ai/transformation_builders/syncpulse.py CHANGED Viewed

@@ -2,6 +2,7 @@ import base64
 import os
 import re
 from collections import defaultdict
+from contextvars import ContextVar
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
@@ -15,10 +16,15 @@ from satif_sdk.comparators import get_comparator
 from satif_sdk.representers import get_representer
 from satif_sdk.transformers import CodeTransformer
-# Global variables for transformation
-INPUT_SDIF_PATH: Optional[Path] = None
-OUTPUT_TARGET_FILES: Optional[Dict[Union[str, Path], str]] = None
-SCHEMA_ONLY: Optional[bool] = None
+CONTEXT_INPUT_SDIF_PATH: ContextVar[Optional[Path]] = ContextVar(
+    "CONTEXT_INPUT_SDIF_PATH", default=None
+)
+CONTEXT_OUTPUT_TARGET_FILES: ContextVar[Optional[Dict[Union[str, Path], str]]] = (
+    ContextVar("CONTEXT_OUTPUT_TARGET_FILES", default=None)
+)
+CONTEXT_SCHEMA_ONLY: ContextVar[Optional[bool]] = ContextVar(
+    "CONTEXT_SCHEMA_ONLY", default=None
+)
 def _format_comparison_output(
@@ -60,18 +66,22 @@ async def execute_transformation(code: str) -> str:
     Args:
         code: The code to execute on the input.
     """
-    if INPUT_SDIF_PATH is None or OUTPUT_TARGET_FILES is None:
-        return "Error: Transformation context not initialized"
+    input_sdif_path = CONTEXT_INPUT_SDIF_PATH.get()
+    output_target_files_dict = CONTEXT_OUTPUT_TARGET_FILES.get()
+    schema_only_flag = CONTEXT_SCHEMA_ONLY.get()
+    if input_sdif_path is None or output_target_files_dict is None:
+        return "Error: Transformation context not initialized correctly via contextvars"
     code_transformer = CodeTransformer(
         function=code,
         code_executor=LocalCodeExecutor(disable_security_warning=True),
     )
-    generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
+    generated_output_path = code_transformer.export(input_sdif_path)
     comparisons = []
     comparator_kwargs = {}
-    if SCHEMA_ONLY:
+    if schema_only_flag:
         comparator_kwargs["check_structure_only"] = True
     if os.path.isdir(generated_output_path):
@@ -81,7 +91,7 @@ async def execute_transformation(code: str) -> str:
         for (
             output_base_file,
             output_target_file_name,
-        ) in OUTPUT_TARGET_FILES.items():
+        ) in output_target_files_dict.items():
             if output_target_file_name in generated_files:
                 generated_file_path = os.path.join(
                     generated_output_path, output_target_file_name
@@ -92,7 +102,7 @@ async def execute_transformation(code: str) -> str:
                 )
                 formatted_message = _format_comparison_output(
                     comparison,
-                    SCHEMA_ONLY,
+                    schema_only_flag,
                     generated_file_path,
                     output_target_file_name,
                 )
@@ -103,16 +113,18 @@ async def execute_transformation(code: str) -> str:
                 )
     else:
         # If it's a single file, ensure there's only one target and compare
-        if len(OUTPUT_TARGET_FILES) == 1:
-            output_file = list(OUTPUT_TARGET_FILES.keys())[0]
-            output_target_file_name = list(OUTPUT_TARGET_FILES.values())[0]
-            comparator = get_comparator(output_file.split(".")[-1])
+        if len(output_target_files_dict) == 1:
+            output_file = list(output_target_files_dict.keys())[0]
+            output_target_file_name = list(output_target_files_dict.values())[0]
+            comparator = get_comparator(
+                str(output_file).split(".")[-1]
+            )  # Ensure output_file is string for split
             comparison = comparator.compare(
                 generated_output_path, output_file, **comparator_kwargs
             )
             formatted_message = _format_comparison_output(
                 comparison,
-                SCHEMA_ONLY,
+                schema_only_flag,
                 str(generated_output_path),
                 output_target_file_name,
             )
@@ -147,126 +159,144 @@ class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
         schema_only: bool = False,
         representer_kwargs: Optional[Dict[str, Any]] = None,
     ) -> str:
-        global INPUT_SDIF_PATH, OUTPUT_TARGET_FILES, SCHEMA_ONLY
-        INPUT_SDIF_PATH = Path(sdif).resolve()
-        SCHEMA_ONLY = schema_only
-        # We must encode the path because special characters are not allowed in mcp read_resource()
-        input_sdif_mcp_uri_path = base64.b64encode(str(sdif).encode()).decode()
-        output_sdif_mcp_uri_path = (
-            base64.b64encode(str(output_sdif).encode()).decode()
-            if output_sdif
-            else None
-        )
-        input_schema = await self.mcp_session.read_resource(
-            f"schema://{input_sdif_mcp_uri_path}"
-        )
-        input_sample = await self.mcp_session.read_resource(
-            f"sample://{input_sdif_mcp_uri_path}"
-        )
-        output_schema_text = "N/A"
-        output_sample_text = "N/A"
-        if output_sdif_mcp_uri_path:
-            try:
-                output_schema_content = await self.mcp_session.read_resource(
-                    f"schema://{output_sdif_mcp_uri_path}"
-                )
-                if output_schema_content.contents:
-                    output_schema_text = output_schema_content.contents[0].text
-            except Exception as e:
-                print(
-                    f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
-                )
-            try:
-                output_sample_content = await self.mcp_session.read_resource(
-                    f"sample://{output_sdif_mcp_uri_path}"
-                )
-                if output_sample_content.contents:
-                    output_sample_text = output_sample_content.contents[0].text
-            except Exception as e:
-                print(
-                    f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
-                )
+        resolved_input_sdif_path = Path(sdif).resolve()
         # OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
         # Values are agent-facing filenames.
+        resolved_output_target_files: Dict[Union[str, Path], str]
         if isinstance(output_target_files, FilePath):
-            OUTPUT_TARGET_FILES = {
+            resolved_output_target_files = {
                 Path(output_target_files).resolve(): Path(output_target_files).name
             }
         elif isinstance(output_target_files, list):
-            OUTPUT_TARGET_FILES = {
+            resolved_output_target_files = {
                 Path(file_path).resolve(): Path(file_path).name
                 for file_path in output_target_files
             }
         elif isinstance(output_target_files, dict):
             temp_map = {}
             for k, v in output_target_files.items():
-                if isinstance(k, Path):
-                    temp_map[k.resolve()] = v
-                else:
-                    temp_map[k] = v
-            OUTPUT_TARGET_FILES = temp_map
+                # Resolve Path keys to absolute paths
+                key_to_resolve = k
+                if (
+                    isinstance(key_to_resolve, str) and Path(key_to_resolve).exists()
+                ):  # Check if string is a valid path
+                    key_to_resolve = Path(key_to_resolve)
+                if isinstance(key_to_resolve, Path):
+                    temp_map[key_to_resolve.resolve()] = v
+                else:  # Keep non-Path keys as they are (e.g. if it's already a resolved string path from somewhere else)
+                    temp_map[key_to_resolve] = v
+            resolved_output_target_files = temp_map
         else:
-            OUTPUT_TARGET_FILES = {}
+            resolved_output_target_files = {}
+        token_input_path = CONTEXT_INPUT_SDIF_PATH.set(resolved_input_sdif_path)
+        token_output_files = CONTEXT_OUTPUT_TARGET_FILES.set(
+            resolved_output_target_files
+        )
+        token_schema_only = CONTEXT_SCHEMA_ONLY.set(schema_only)
-        output_representation = defaultdict(dict)
-        if OUTPUT_TARGET_FILES:
-            for file_key_abs_path in list(OUTPUT_TARGET_FILES.keys()):
-                agent_facing_name = OUTPUT_TARGET_FILES[file_key_abs_path]
-                print(f"Representing {agent_facing_name} from {file_key_abs_path}")
+        try:
+            # We must encode the path because special characters are not allowed in mcp read_resource()
+            input_sdif_mcp_uri_path = base64.b64encode(
+                str(resolved_input_sdif_path).encode()
+            ).decode()
+            output_sdif_mcp_uri_path = (
+                base64.b64encode(str(output_sdif).encode()).decode()
+                if output_sdif
+                else None
+            )
+            input_schema = await self.mcp_session.read_resource(
+                f"schema://{input_sdif_mcp_uri_path}"
+            )
+            input_sample = await self.mcp_session.read_resource(
+                f"sample://{input_sdif_mcp_uri_path}"
+            )
+            output_schema_text = "N/A"
+            output_sample_text = "N/A"
+            if output_sdif_mcp_uri_path:
                 try:
-                    # Representer uses the absolute path (file_key_abs_path) to read the example file.
-                    representer = get_representer(file_key_abs_path)
-                    representation, used_params = representer.represent(
-                        file_key_abs_path, **(representer_kwargs or {})
+                    output_schema_content = await self.mcp_session.read_resource(
+                        f"schema://{output_sdif_mcp_uri_path}"
                     )
-                    output_representation[agent_facing_name] = {
-                        "representation": representation,
-                        "used_params": used_params,
-                    }
+                    if output_schema_content.contents:
+                        output_schema_text = output_schema_content.contents[0].text
                 except Exception as e:
                     print(
-                        f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
+                        f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
+                    )
+                try:
+                    output_sample_content = await self.mcp_session.read_resource(
+                        f"sample://{output_sdif_mcp_uri_path}"
                     )
-                    output_representation[agent_facing_name] = (
-                        f"Error representing file: {e}"
+                    if output_sample_content.contents:
+                        output_sample_text = output_sample_content.contents[0].text
+                except Exception as e:
+                    print(
+                        f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
                     )
+            output_representation = defaultdict(dict)
+            if resolved_output_target_files:
+                for file_key_abs_path in list(resolved_output_target_files.keys()):
+                    agent_facing_name = resolved_output_target_files[file_key_abs_path]
+                    try:
+                        # Representer uses the absolute path (file_key_abs_path) to read the example file.
+                        representer = get_representer(file_key_abs_path)
+                        representation, used_params = representer.represent(
+                            file_key_abs_path, **(representer_kwargs or {})
+                        )
+                        output_representation[agent_facing_name] = {
+                            "representation": representation,
+                            "used_params": used_params,
+                        }
+                    except Exception as e:
+                        print(
+                            f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
+                        )
+                        output_representation[agent_facing_name] = (
+                            f"Error representing file: {e}"
+                        )
-        prompt = await self.mcp_session.get_prompt(
-            "create_transformation",
-            arguments={
-                "input_file": Path(
-                    input_sdif_mcp_uri_path
-                ).name,  # Display name for prompt (from relative path)
-                "input_schema": input_schema.contents[0].text
-                if input_schema.contents
-                else "Error reading input schema",
-                "input_sample": input_sample.contents[0].text
-                if input_sample.contents
-                else "Error reading input sample",
-                "output_files": str(list(OUTPUT_TARGET_FILES.values())),
-                "output_schema": output_schema_text,
-                "output_sample": output_sample_text
-                if not SCHEMA_ONLY
-                else "Sample not available. File is empty (no data).",
-                "output_representation": str(output_representation),
-                "instructions": instructions
-                or "No instructions provided. Use the output example.",
-            },
-        )
-        agent = Agent(
-            name="Transformation Builder",
-            mcp_servers=[self.mcp_server],
-            tools=[execute_transformation],
-            model=self.llm_model,
-        )
-        result = await Runner.run(agent, prompt.messages[0].content.text)
-        transformation_code = self.parse_code(result.final_output)
-        return transformation_code
+            prompt = await self.mcp_session.get_prompt(
+                "create_transformation",
+                arguments={
+                    "input_file": Path(
+                        input_sdif_mcp_uri_path  # Use the original sdif path for display name logic if needed
+                    ).name,
+                    "input_schema": input_schema.contents[0].text
+                    if input_schema.contents
+                    else "Error reading input schema",
+                    "input_sample": input_sample.contents[0].text
+                    if input_sample.contents
+                    else "Error reading input sample",
+                    "output_files": str(list(resolved_output_target_files.values())),
+                    "output_schema": output_schema_text,
+                    "output_sample": output_sample_text
+                    if not schema_only
+                    else "Sample not available. File is empty (no data).",
+                    "output_representation": str(output_representation),
+                    "instructions": instructions
+                    or "No instructions provided. Use the output example.",
+                },
+            )
+            agent = Agent(
+                name="Transformation Builder",
+                mcp_servers=[self.mcp_server],
+                tools=[execute_transformation],
+                model=self.llm_model,
+            )
+            result = await Runner.run(agent, prompt.messages[0].content.text)
+            transformation_code = self.parse_code(result.final_output)
+            return transformation_code
+        finally:
+            # Reset context variables after the task is done
+            CONTEXT_INPUT_SDIF_PATH.reset(token_input_path)
+            CONTEXT_OUTPUT_TARGET_FILES.reset(token_output_files)
+            CONTEXT_SCHEMA_ONLY.reset(token_schema_only)
     def parse_code(self, code) -> str:
         match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)

satif_ai-0.2.12.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,175 @@
+Metadata-Version: 2.3
+Name: satif-ai
+Version: 0.2.12
+Summary: AI Agents for Satif
+License: MIT
+Author: Syncpulse
+Maintainer: Bryan Djafer
+Maintainer-email: bryan.djafer@syncpulse.fr
+Requires-Python: >=3.10,<3.14
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Provides-Extra: xlsx
+Requires-Dist: openai-agents (>=0.0.9,<0.0.10)
+Requires-Dist: satif-sdk (>=0.1.0,<1.0.0)
+Requires-Dist: sdif-mcp (>=0.1.0,<1.0.0)
+Description-Content-Type: text/markdown
+# SATIF AI
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Python Version](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+[![Status: Experimental](https://img.shields.io/badge/Status-Experimental-orange.svg)](https://github.com/syncpulse-solutions/satif)
+AI toolkit for transforming any input files into any output files.
+## ⚠️ Disclaimer
+**EXPERIMENTAL STATUS**: This package is in early development and not production-ready. The API may change significantly between versions.
+**BLOCKING I/O**: Despite the async API, some operations may contain blocking I/O. This package should be used for testing and experimental purposes only.
+## Installation
+```bash
+pip install satif-ai
+```
+## Overview
+SATIF AI enables automated transformation of heterogeneous data sources (CSV, Excel, PDF, XML, etc.) into any desired output format in 2 steps:
+1. **Standardization**: Ingests heterogeneous source files (CSV, Excel, PDF, XML, etc.) and transforms them into SDIF, a structured intermediate format.
+2. **Transformation**: Applies business logic to the standardized data to generate the target output files, with transformation code generated by AI.
+## Key Features
+- **Any Format Support**: Process virtually any input, even challenging unstructured content (PDFs, complex Excel sheets)
+- **AI-Powered Code Generation**: Automatically generate transformation code from examples and natural language instructions
+- **Robust Schema Enforcement**: Handle input data drift and schema inconsistencies through configurable validation
+- **SQL-Based Data Processing**: Query and manipulate all data using SQL
+- **Decoupled Processing Stages**: Standardize once, transform many times with different logic
+## Usage
+### Basic Workflow
+```python
+import asyncio
+from satif_ai import astandardize, atransform
+async def main():
+    # Step 1: Standardize input files into SDIF
+    sdif_path = await astandardize(
+        datasource=["data.csv", "reference.xlsx"],
+        output_path="standardized.sdif",
+        overwrite=True
+    )
+    # Step 2: Transform SDIF into desired output using AI
+    await atransform(
+        sdif=sdif_path,
+        output_target_files="output.json",
+        instructions="Extract customer IDs and purchase totals, calculate the average purchase value per customer, and output as JSON with customer_id and avg_purchase_value fields.",
+        llm_model="o4-mini"  # Choose AI model based on needs
+    )
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+## Architecture
+```
+┌─────────────────┐     ┌───────────────────────┐     ┌─────────────────┐
+│  Source Files   │────▶│ Standardization Layer │────▶│   SDIF File     │
+│ CSV/Excel/PDF/  │     │                       │     │ (SQLite-based)  │
+│ XML/JSON/etc.   │     └───────────────────────┘     └────────┬────────┘
+└─────────────────┘                                            │
+                                                               │
+┌─────────────────┐     ┌───────────────────────┐              │
+│  Output Files   │◀────│  Transformation Layer │◀─────────────┘
+│ Any format      │     │  (AI-generated code)  │
+└─────────────────┘     └───────────────────────┘
+```
+SDIF (Standardized Data Interoperable Format) is the intermediate SQLite-based format that:
+- Stores structured tables alongside JSON objects and binary media
+- Maintains rich metadata about data origins and relationships
+- Provides direct SQL queryability for complex transformations
+## Documentation
+For detailed documentation, examples, and advanced features, visit [SATIF Documentation](https://satif.io/docs).
+## Contributing
+Contributions are welcome! Whether it's bug reports, feature requests, or code contributions, please feel free to get involved.
+### Contribution Workflow
+1. **Fork the repository** on GitHub.
+2. **Clone your fork** locally:
+   ```bash
+   git clone https://github.com/syncpulse-solutions/satif.git
+   cd satif/libs/ai
+   ```
+3. **Create a new branch** for your feature or bug fix:
+   ```bash
+   git checkout -b feature/your-feature-name
+   ```
+   or
+   ```bash
+   git checkout -b fix/your-bug-fix-name
+   ```
+4. **Set up the development environment** as described in the [From Source (for Development)](#from-source-for-development) section:
+   ```bash
+   make install  # or poetry install
+   ```
+5. **Make your changes.** Ensure your code follows the project's style guidelines.
+6. **Format and lint your code:**
+   ```bash
+   make format
+   make lint
+   ```
+7. **Run type checks:**
+   ```bash
+   make typecheck
+   ```
+8. **Run tests** to ensure your changes don't break existing functionality:
+   ```bash
+   make test
+   ```
+   To also generate a coverage report:
+   ```bash
+   make coverage
+   ```
+9. **Commit your changes** with a clear and descriptive commit message.
+10. **Push your changes** to your fork on GitHub:
+    ```bash
+    git push origin feature/your-feature-name
+    ```
+11. **Submit a Pull Request (PR)** to the `main` branch of the original `syncpulse-solutions/satif` repository.
+## License
+This project is licensed under the MIT License.
+Maintainer: Bryan Djafer (bryan.djafer@syncpulse.fr)

{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/RECORD RENAMED Viewed

@@ -3,18 +3,18 @@ satif_ai/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
 satif_ai/adapters/tidy.py,sha256=1g7Wcq8agAZhaAqQDhhD8yh3iO5gZ4mwdKHsiNN3hHY,18540
 satif_ai/standardize.py,sha256=TgAB_nhcHY8zqlfT1PpgfgSswqdE-ly-dheQz-7NC7Q,5674
 satif_ai/standardizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-satif_ai/standardizers/ai.py,sha256=jtYM-ChjLtkpFaubz980CTCNAoC33iYxB3pq0_hn2lU,21045
+satif_ai/standardizers/ai.py,sha256=2dz5LC5mAM7G1ZpDJPb7whdYIBLfwIPFOFNZJIhHxvk,22920
 satif_ai/standardizers/ai_csv.py,sha256=LbCRaLleujQRgSRRyt9ujbED-PIGRq1J8zRnejGM5nc,25437
 satif_ai/standardizers/ai_xlsx.py,sha256=558Bzfy8WGuk5mdnjMvvtakQXcU3rmwK3ykPjpXKwmQ,15863
-satif_ai/transform.py,sha256=g5XNeVCIKUgDW3UIhf02MN9xkXnWF3EJXS0Eig_hfD8,7677
+satif_ai/transform.py,sha256=CoaCtIvJjJuIJ2HgU_yU8QZVGi73PcJNfke9w3sDBoc,7586
 satif_ai/transformation_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-satif_ai/transformation_builders/syncpulse.py,sha256=c59BZicNnqs3NDKpflBAPqw42pGb6nYB2Zps0ChGyaM,11368
+satif_ai/transformation_builders/syncpulse.py,sha256=WhvS-HTzs7DQj-tfJ12Xk2rYGYdn8pv_x5jtU7WN2h4,13258
 satif_ai/utils/__init__.py,sha256=F-usaCt_vX872mXvtukuZdNMPnkVqDb8RaDgox2uow4,212
 satif_ai/utils/merge_sdif.py,sha256=y4C6pgkdyer0QugroFKUck4Eud4Ap-tJzM-eclMo3Rw,25629
 satif_ai/utils/openai_mcp.py,sha256=duCQZXG0mBs9DOOFIUvzraJhxD2IDzegWO9iOiLfFwY,3938
 satif_ai/utils/zip.py,sha256=G_GK8629Iw0TLFCQJfnqOscv7MoKF5zdzxvEAbL7Gss,5186
-satif_ai-0.2.10.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
-satif_ai-0.2.10.dist-info/METADATA,sha256=O5QWv8YJFtB5AIniv0LRgmSgpEaRLVdlz8WHZAru1X8,719
-satif_ai-0.2.10.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-satif_ai-0.2.10.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
-satif_ai-0.2.10.dist-info/RECORD,,
+satif_ai-0.2.12.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
+satif_ai-0.2.12.dist-info/METADATA,sha256=m89TCjz21zi-fPOei5CRxxWbNxIghiMGDEQgWpRxt_U,6485
+satif_ai-0.2.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+satif_ai-0.2.12.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
+satif_ai-0.2.12.dist-info/RECORD,,

satif_ai-0.2.10.dist-info/METADATA DELETED Viewed

@@ -1,23 +0,0 @@
-Metadata-Version: 2.3
-Name: satif-ai
-Version: 0.2.10
-Summary: AI Agents for Satif
-License: MIT
-Author: Syncpulse
-Maintainer: Bryan Djafer
-Maintainer-email: bryan.djafer@syncpulse.fr
-Requires-Python: >=3.10,<3.14
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Provides-Extra: xlsx
-Requires-Dist: openai-agents (>=0.0.9,<0.0.10)
-Requires-Dist: satif-sdk (>=0.1.0,<1.0.0)
-Requires-Dist: sdif-mcp (>=0.1.0,<1.0.0)
-Description-Content-Type: text/markdown
-# SATIF AI

{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/LICENSE RENAMED Viewed

File without changes

{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{satif_ai-0.2.10.dist-info → satif_ai-0.2.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

satif-ai 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

satif-ai 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl