PyPI - satif-ai - Versions diffs - 0.2.8__tar.gz → 0.2.10__tar.gz - Mend

satif-ai 0.2.8tar.gz → 0.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{satif_ai-0.2.8 → satif_ai-0.2.10}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,19 @@
 Metadata-Version: 2.3
 Name: satif-ai
-Version: 0.2.8
+Version: 0.2.10
 Summary: AI Agents for Satif
 License: MIT
-Author: Bryan Djafer
-Author-email: bryan.djafer@syncpulse.fr
-Requires-Python: >=3.10,<4.0
+Author: Syncpulse
+Maintainer: Bryan Djafer
+Maintainer-email: bryan.djafer@syncpulse.fr
+Requires-Python: >=3.10,<3.14
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Provides-Extra: xlsx
 Requires-Dist: openai-agents (>=0.0.9,<0.0.10)
 Requires-Dist: satif-sdk (>=0.1.0,<1.0.0)
 Requires-Dist: sdif-mcp (>=0.1.0,<1.0.0)

{satif_ai-0.2.8 → satif_ai-0.2.10}/pyproject.toml RENAMED Viewed

@@ -1,14 +1,17 @@
 [project]
 name = "satif-ai"
-version = "0.2.8"
+version = "0.2.10"
 description = "AI Agents for Satif"
 authors = [
+    {name = "Syncpulse"}
+]
+maintainers = [
     {name = "Bryan Djafer", email = "bryan.djafer@syncpulse.fr"}
 ]
 license = "MIT"
 readme = "README.md"
-requires-python = ">=3.10,<4.0"
+requires-python = ">=3.10,<3.14"
 [tool.poetry.dependencies]
 openai-agents = ">=0.0.9,<0.0.10"
@@ -19,6 +22,9 @@ sdif-mcp = ">=0.1.0,<1.0.0"
 requires = ["poetry-core>=2.0.0,<3.0.0"]
 build-backend = "poetry.core.masonry.api"
+[tool.poetry.extras]
+xlsx = ["xlsx-to-sdif"]
 [project.scripts]
 satif-ai = "satif.cli:main"
@@ -28,6 +34,7 @@ satif-core = {path = "../core", develop = true}
 satif-sdk = {path = "../sdk", develop = true}
 sdif-mcp = {path = "../mcp", develop = true}
 sdif-db = {path = "../sdif", develop = true}
+xlsx-to-sdif = {path = "../xlsx-to-sdif", develop = true}
 ipykernel = "^6.29.5"

satif_ai-0.2.10/satif_ai/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from .adapters.tidy import TidyAdapter
+from .standardize import astandardize
+from .standardizers.ai import AIStandardizer
+from .standardizers.ai_csv import AICSVStandardizer
+from .transform import atransform
+from .transformation_builders.syncpulse import SyncpulseTransformationBuilder
+from .utils import OpenAICompatibleMCP, extract_zip_archive_async, merge_sdif_files
+__all__ = [
+    "astandardize",
+    "atransform",
+    "TidyAdapter",
+    "AICSVStandardizer",
+    "AIStandardizer",
+    "SyncpulseTransformationBuilder",
+    "OpenAICompatibleMCP",
+    "extract_zip_archive_async",
+    "merge_sdif_files",
+]

{satif_ai-0.2.8 → satif_ai-0.2.10}/satif_ai/adapters/tidy.py RENAMED Viewed

@@ -6,23 +6,19 @@ import shutil
 import sqlite3
 import tempfile
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Union
-# MCP and Agent imports
 from agents import Agent, Runner, function_tool
-from agents.mcp.server import MCPServerStdio
+from agents.mcp.server import MCPServer
 from mcp import ClientSession
-# SATIF imports
 from satif_core.adapters.base import Adapter
-from satif_core.types import Datasource
+from satif_core.types import Datasource, SDIFPath
 from satif_sdk import SDIFDatabase
 from satif_sdk.adapters.code import AdapterError, CodeAdapter
 logger = logging.getLogger(__name__)
-# --- Tidy Transformation Prompt ---
 TIDY_TRANSFORMATION_PROMPT = """
 You are an expert Data Tidying Agent for SDIF databases.
 Your task is to write Python code to transform tables within a given SDIF database into a 'tidy' format, modifying the database *in place*.
@@ -130,12 +126,11 @@ def adapt_sdif(db: SDIFDatabase) -> None:
 - Ensure pandas and other necessary libraries (like `typing`, `AdapterError`) are imported within the code string if you use them.
 """
-# --- Global context for tools ---
-# These will be set within the TidyAdapter instance when adapt is called
 TOOL_CONTEXT = {
     "copied_input_sdif_path": None,
     "temp_dir": None,
-    "current_output_sdif_path": None,  # Path generated by the tool
+    "current_output_sdif_path": None,
 }
@@ -167,13 +162,10 @@ async def execute_tidy_adaptation(code: str) -> str:
     )
     try:
-        # 1. Instantiate CodeAdapter with the provided code
-        #    It will operate on a *copy* specified by copied_input_path
-        #    and write to a *new* file (_adapted suffix by default).
         adapter = CodeAdapter(
             function=code,
-            function_name="adapt_sdif",  # As specified in prompt
-            output_suffix="_adapted_tool_run",  # Give tool runs a distinct suffix
+            function_name="adapt_sdif",
+            output_suffix="_adapted_tool_run",
         )
         # Run the adaptation. It copies `copied_input_path` and modifies the copy.
         # The returned path is the newly created, adapted file.
@@ -232,16 +224,16 @@ class TidyAdapter(Adapter):
     def __init__(
         self,
-        mcp_server: MCPServerStdio,  # Use the server instance
-        mcp_session: ClientSession,  # Use the client session
-        llm_model: str = "o4-mini",  # Specify the LLM model
+        mcp_server: MCPServer,
+        mcp_session: ClientSession,
+        llm_model: str = "o4-mini",
         max_iterations: int = 5,
     ):
         """
         Initialize the TidyAdapter.
         Args:
-            mcp_server: An instance of MCPServerStdio for agent communication.
+            mcp_server: An instance of MCPServer for agent communication.
             mcp_session: An instance of ClientSession for resource/prompt fetching.
             llm_model: Name of the language model to use for the agent.
             max_iterations: Maximum number of attempts the agent gets to refine the code.
@@ -339,12 +331,12 @@ class TidyAdapter(Adapter):
                 return code_text.strip()
             return None  # Indicate no valid code found
-    async def adapt(self, sdif_database: SDIFDatabase) -> Datasource:
+    async def adapt(self, sdif: Union[SDIFPath, SDIFDatabase]) -> Datasource:
         """
         Transforms the data in the input SDIF to be tidy using an AI agent.
         Args:
-            sdif_database: The input SDIF database instance. Connection will be closed.
+            sdif: The input SDIF database instance. Connection will be closed.
         Returns:
             Path to the new SDIF file containing the tidied data.
@@ -354,13 +346,16 @@ class TidyAdapter(Adapter):
             RuntimeError: If the agent fails to produce valid tidy code.
             Exception: For unexpected errors during the process.
         """
-        input_path = Path(sdif_database.path)
+        if isinstance(sdif, SDIFDatabase):
+            input_path = Path(sdif.path)
+        else:
+            input_path = Path(sdif)
         if not input_path.exists():
             raise FileNotFoundError(f"Input SDIF file not found: {input_path}")
         # Ensure the input DB connection is closed before copying
         try:
-            sdif_database.close()
+            sdif.close()
         except Exception:
             pass
@@ -372,17 +367,14 @@ class TidyAdapter(Adapter):
                 input_schema_dict = db.get_schema()
                 input_sample_dict = db.get_sample_analysis()
-            # Get SDIFDatabase method signatures
             sdif_methods_str = self._get_sdif_methods()
-            # Prepare context for the prompt
             initial_context = {
                 "input_schema": json.dumps(input_schema_dict, indent=2),
                 "input_sample": json.dumps(input_sample_dict, indent=2),
                 "sdif_database_methods": sdif_methods_str,
             }
-            # Instantiate the Agent
             agent = Agent(
                 name="Tidy SDIF Adapter Agent",
                 mcp_servers=[self.mcp_server],
@@ -390,8 +382,6 @@ class TidyAdapter(Adapter):
                 model=self.llm_model,
             )
-            # Run the agent using the Runner
-            # Pass the prompt and initial context
             logger.info(f"Running Tidy Agent with model {self.llm_model}...")
             result = await Runner.run(
                 agent,
@@ -409,7 +399,6 @@ class TidyAdapter(Adapter):
                 f"Agent finished. Final output message:\n{result.final_output[:500]}..."
             )
-            # Parse the final code from the agent's response
             final_code = self.parse_code(result.final_output)
             if not final_code:
@@ -421,20 +410,16 @@ class TidyAdapter(Adapter):
             logger.info(
                 "Successfully parsed final adaptation code from agent response."
             )
-            # print(f"--- Final Code ---\n{final_code}\n------------------") # Debugging
-            # Execute the *final* code using CodeAdapter directly to create the definitive output
             logger.info("Executing final adaptation code...")
             final_adapter = CodeAdapter(
                 function=final_code,
                 function_name="adapt_sdif",
-                output_suffix="_tidy_final",  # Use a distinct suffix for the final output
+                output_suffix="_tidy_final",
             )
-            # Adapt the *original* copied input path
             final_adapted_path = final_adapter.adapt(copied_input_path)
-            # Move the final successful output SDIF to a persistent location
-            # Example: place it next to the original input file
             persistent_output_path = (
                 input_path.parent / final_adapted_path.name
             ).resolve()
@@ -444,9 +429,7 @@ class TidyAdapter(Adapter):
                 )
                 persistent_output_path.unlink()
-            shutil.move(
-                str(final_adapted_path), persistent_output_path
-            )  # Move needs strings sometimes
+            shutil.move(str(final_adapted_path), persistent_output_path)
             logger.info(
                 f"Successfully generated final tidy SDIF: {persistent_output_path}"
             )
@@ -455,8 +438,6 @@ class TidyAdapter(Adapter):
         except Exception as e:
             logger.exception(f"Error during TidyAdapter adapt process: {e}")
-            # Re-raise or handle as appropriate
             raise
         finally:
-            # Always clean up temporary files
             self._cleanup_temp_env()

satif_ai-0.2.10/satif_ai/standardize.py ADDED Viewed

@@ -0,0 +1,112 @@
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+from satif_core.standardizers.base import AsyncStandardizer
+from satif_core.types import Datasource, FilePath, SDIFPath, StandardizationResult
+from satif_ai.adapters.tidy import TidyAdapter
+from satif_ai.standardizers.ai import AIStandardizer
+async def astandardize(
+    datasource: Datasource,
+    output_path: SDIFPath,
+    *,
+    overwrite: bool = False,
+    sdif_schema: Optional[Union[FilePath, Dict[str, Any]]] = None,
+    tidy_adapter: Union[bool, TidyAdapter] = False,
+    config: Optional[Dict[str, Any]] = None,
+    standardizer: Optional[AsyncStandardizer] = None,
+    mcp_server: Optional[Any] = None,
+    mcp_session: Optional[Any] = None,
+    llm_model: Optional[str] = None,
+) -> StandardizationResult:
+    """
+    Asynchronously standardizes a datasource into a single, canonical SDIF SQLite file.
+    This function serves as the primary entry point for the SATIF standardization layer.
+    It orchestrates the conversion of various input file formats (e.g., CSV, Excel, PDF)
+    from the provided datasource into a unified SDIF (Standard Data Interchange Format)
+    SQLite file. The process may involve AI-driven parsing, schema adaptation, and
+    data tidying, depending on the configuration and the capabilities of the
+    underlying standardizer.
+    Args:
+        datasource: The source of the data to be standardized. This can be a
+                    single file path (str or Path), a list of file paths, or other
+                    datasource types supported by the chosen standardizer.
+        output_path: The path (str or Path) where the output SDIF SQLite database file
+                     will be created (e.g., "./output/my_data.sdif").
+        overwrite: If True, an existing SDIF file at `output_path` will be
+                   overwritten. Defaults to False.
+        sdif_schema: Optional. Path to an SDIF schema definition file (e.g., a JSON file)
+                     or a dictionary representing the schema. If provided, the
+                     standardization process (specifically if using the default
+                     `AIStandardizer`) may attempt to adapt the data to this
+                     target schema.
+        tidy_adapter: Optional. If True, a default `TidyAdapter` may be used.
+                      Alternatively, a specific `TidyAdapter` instance can be provided
+                      to perform data tidying processes (e.g., cleaning, normalization,
+                      restructuring tables). If False (default), no explicit tidying
+                      step is initiated by this top-level function, though underlying
+                      standardizers might perform their own internal tidying.
+                      The specifics depend on the standardizer's capabilities.
+        config: Optional. A dictionary for advanced or standardizer-specific
+                configurations. This config is passed directly to the
+                `standardize` method of the chosen standardizer.
+        standardizer: Optional. An instance of an `AsyncStandardizer` subclass.
+                      If provided, this instance will be used for standardization.
+                      This allows for using pre-configured or custom standardizers.
+                      If None, a default `AIStandardizer` is instantiated using
+                      `mcp_server`, `mcp_session`, `llm_model`, `sdif_schema`,
+                      and `tidy_adapter`.
+        mcp_server: Optional. The MCP (Model Coordination Platform) server instance.
+                    Used if `standardizer` is None for the default `AIStandardizer`.
+        mcp_session: Optional. The MCP session or transport object.
+                     Used if `standardizer` is None for the default `AIStandardizer`.
+        llm_model: Optional. The language model to be used by the default `AIStandardizer`
+                   if no `standardizer` instance is provided (e.g., "gpt-4o").
+                   Each standardizer may have its own default model.
+    Returns:
+        A `StandardizationResult` object containing:
+        - `output_path`: The absolute `Path` to the created or updated SDIF database file.
+        - `file_configs`: An optional dictionary detailing configurations used for
+                          each processed input file, if applicable and returned by
+                          the standardizer.
+    Raises:
+        FileNotFoundError: If the `datasource` (or parts of it) does not exist.
+        FileExistsError: If `output_path` exists and `overwrite` is False.
+        ValueError: If input arguments are invalid (e.g., unsupported datasource type).
+        RuntimeError: For general errors during the standardization process.
+                      Specific exceptions may also be raised by the underlying
+                      standardizer implementation.
+    """
+    if standardizer is None:
+        standardizer = AIStandardizer(
+            mcp_server=mcp_server,
+            mcp_session=mcp_session,
+            llm_model=llm_model,
+            sdif_schema=sdif_schema,
+            tidy_adapter=tidy_adapter
+            if isinstance(tidy_adapter, TidyAdapter)
+            else (TidyAdapter() if tidy_adapter else None),
+        )
+    result = await standardizer.standardize(
+        datasource=datasource,
+        output_path=output_path,
+        overwrite=overwrite,
+        config=config,
+    )
+    output_sdif_path = (
+        Path(result.output_path)
+        if isinstance(result.output_path, str)
+        else result.output_path
+    )
+    return StandardizationResult(
+        output_path=output_sdif_path, file_configs=result.file_configs
+    )

satif-ai 0.2.8__tar.gz → 0.2.10__tar.gz

satif-ai 0.2.8tar.gz → 0.2.10tar.gz