PyPI - satif-ai - Versions diffs - 0.1.2__tar.gz → 0.2.1__tar.gz - Mend

satif-ai 0.1.2tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{satif_ai-0.1.2 → satif_ai-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: satif-ai
-Version: 0.1.2
+Version: 0.2.1
 Summary: AI Agents for Satif
 License: MIT
 Author: Bryan Djafer

{satif_ai-0.1.2 → satif_ai-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "satif-ai"
-version = "0.1.2"
+version = "0.2.1"
 description = "AI Agents for Satif"
 authors = [
     {name = "Bryan Djafer", email = "bryan.djafer@syncpulse.fr"}

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/standardizers/ai_csv.py RENAMED Viewed

@@ -1,10 +1,11 @@
+import asyncio
 import contextvars
 import csv
 import json
 import logging
 import re
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Tuple
 import clevercsv
 from agents import Agent, Runner, function_tool
@@ -358,6 +359,51 @@ class AICSVStandardizer(CSVStandardizer):  # Inherits from the enhanced CSVStand
         self._initial_encoding_hint = initial_encoding
         # self.generate_description from prompt structure (table_description, column descriptions)
+    async def _get_initial_guesses(self, file_path: Path) -> Tuple[str, str]:
+        """Helper to get initial encoding and delimiter guesses for a single file."""
+        encoding_guess = self._initial_encoding_hint
+        if not encoding_guess:
+            try:
+                with open(file_path, "rb") as fb_enc:
+                    enc_sample = fb_enc.read(ENCODING_SAMPLE_SIZE)
+                detected_enc_info = detect(enc_sample) if enc_sample else None
+                encoding_guess = (
+                    detected_enc_info["encoding"]
+                    if detected_enc_info and detected_enc_info["encoding"]
+                    else "utf-8"
+                )
+            except Exception as e:
+                logger.warning(
+                    f"Initial encoding detection for {file_path.name} failed: {e}. Using utf-8."
+                )
+                encoding_guess = "utf-8"
+        delimiter_guess = self._initial_delimiter_hint
+        if not delimiter_guess:
+            try:
+                with open(
+                    file_path, encoding=encoding_guess, errors="ignore"
+                ) as f_delim_sample:
+                    delim_sample_text = f_delim_sample.read(DELIMITER_SAMPLE_SIZE)
+                if delim_sample_text:
+                    sniffer = clevercsv.Sniffer()
+                    dialect = sniffer.sniff(delim_sample_text)
+                    delimiter_guess = (
+                        dialect.delimiter if dialect and dialect.delimiter else ","
+                    )
+                else:
+                    delimiter_guess = ","
+            except Exception as e:
+                logger.warning(
+                    f"Initial delimiter detection for {file_path.name} failed ({e}). Using ','."
+                )
+                delimiter_guess = ","
+        logger.info(
+            f"Initial guesses for {file_path.name} - Encoding: {encoding_guess}, Delimiter: '{delimiter_guess}'"
+        )
+        return encoding_guess, delimiter_guess
     async def _run_analysis_agent(
         self,
         file_path: Path,
@@ -469,24 +515,21 @@ class AICSVStandardizer(CSVStandardizer):  # Inherits from the enhanced CSVStand
     async def standardize(
         self,
         datasource: Datasource,
-        output_path: SDIFPath,  # Corrected name from output_sdif
+        output_path: SDIFPath,
         *,
         overwrite: bool = False,
         config: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> Path:
         output_path_obj = Path(output_path)
+        input_paths: List[Path]
         if isinstance(datasource, (str, Path)):
             input_paths = [Path(datasource)]
         elif isinstance(datasource, list) and all(
             isinstance(p, (str, Path)) for p in datasource
         ):
             input_paths = [Path(p) for p in datasource]
-            if len(input_paths) > 1:
-                logger.warning(
-                    "AICSVStandardizer currently processes one CSV file at a time for detailed AI analysis. Using the first file only."
-                )
-                input_paths = [input_paths[0]]  # Process one file if multiple given
         else:
             raise TypeError(
                 "datasource must be a file path string/Path object or a list of such paths."
@@ -495,168 +538,104 @@ class AICSVStandardizer(CSVStandardizer):  # Inherits from the enhanced CSVStand
         if not input_paths:
             raise ValueError("No input datasource provided.")
-        input_path = input_paths[
-            0
-        ]  # We focus on a single file for this AI standardizer
-        if not input_path.exists() or not input_path.is_file():
-            raise FileNotFoundError(
-                f"Input CSV file not found or is not a file: {input_path}"
-            )
-        logger.info(f"--- AI Analysis for file: {input_path.name} ---")
-        # 1. Initial Guesses for AI
-        initial_encoding_guess = self._initial_encoding_hint
-        if not initial_encoding_guess:
-            try:
-                # Use base class's _detect_encoding, need an instance or make it static/helper
-                # For simplicity, re-implement or call a static version if available.
-                # Here, we simulate it for now or assume base standardizer's helper is callable.
-                with open(input_path, "rb") as fb_enc:
-                    enc_sample = fb_enc.read(ENCODING_SAMPLE_SIZE)
-                detected_enc_info = detect(enc_sample) if enc_sample else None
-                initial_encoding_guess = (
-                    detected_enc_info["encoding"]
-                    if detected_enc_info and detected_enc_info["encoding"]
-                    else "utf-8"
+        ai_analysis_tasks = []
+        for input_file_path in input_paths:
+            if not input_file_path.exists() or not input_file_path.is_file():
+                raise FileNotFoundError(
+                    f"Input CSV file not found or is not a file: {input_file_path}"
                 )
+            # Create a task for each file's analysis
+            # Need to wrap _get_initial_guesses and _run_analysis_agent in a single async co-routine for gather
+            async def analyze_file_task(file_path_for_task: Path):
                 logger.info(
-                    f"Initial encoding guess (detected): {initial_encoding_guess}"
+                    f"--- Starting AI Analysis for file: {file_path_for_task.name} ---"
                 )
-            except Exception as e:
-                logger.warning(
-                    f"Initial encoding detection failed: {e}. Using utf-8 as fallback guess."
+                enc_guess, delim_guess = await self._get_initial_guesses(
+                    file_path_for_task
                 )
-                initial_encoding_guess = "utf-8"
-        else:
-            logger.info(
-                f"Using provided initial encoding hint: {initial_encoding_guess}"
-            )
-        initial_delimiter_guess = self._initial_delimiter_hint
-        if not initial_delimiter_guess:
-            try:
-                with open(
-                    input_path, encoding=initial_encoding_guess, errors="ignore"
-                ) as f_delim_sample:
-                    delim_sample_text = f_delim_sample.read(DELIMITER_SAMPLE_SIZE)
-                if delim_sample_text:
-                    # Simulate base class's _detect_delimiter
-                    sniffer = clevercsv.Sniffer()
-                    dialect = sniffer.sniff(delim_sample_text)
-                    initial_delimiter_guess = dialect.delimiter if dialect else ","
-                    logger.info(
-                        f"Initial delimiter guess (detected): '{initial_delimiter_guess}'"
-                    )
-                else:
-                    initial_delimiter_guess = ","  # Fallback
-                    logger.warning(
-                        f"File empty/small, defaulting delimiter guess to ',' for {input_path.name}"
-                    )
-            except Exception as e:
-                logger.warning(
-                    f"Initial delimiter detection failed ({e}). Using ',' as fallback guess for {input_path.name}."
+                return await self._run_analysis_agent(
+                    file_path_for_task, enc_guess, delim_guess
                 )
-                initial_delimiter_guess = ","
-        else:
-            logger.info(
-                f"Using provided initial delimiter hint: '{initial_delimiter_guess}'"
-            )
-        # 2. Run AI Agent Analysis
+            ai_analysis_tasks.append(
+                analyze_file_task(input_file_path)
+            )  # Pass the path to the task
+        logger.info(f"Starting AI analysis for {len(ai_analysis_tasks)} CSV file(s)...")
         try:
-            ai_params = await self._run_analysis_agent(
-                input_path,
-                initial_encoding_guess,
-                initial_delimiter_guess,
-            )
+            all_ai_params_results = await asyncio.gather(*ai_analysis_tasks)
         except Exception as e:
-            logger.exception(
-                f"AI Agent analysis failed critically for {input_path.name}. Aborting."
+            logger.exception(f"Critical error during concurrent AI analysis phase: {e}")
+            raise RuntimeError("AI analysis phase failed.") from e
+        logger.info(
+            f"AI analysis complete for all {len(all_ai_params_results)} file(s)."
+        )
+        # Aggregate parameters for the base CSVStandardizer
+        all_ai_table_names: List[str] = []
+        all_ai_table_descriptions: List[Optional[str]] = []
+        all_ai_file_configs: List[Dict[str, Any]] = []
+        all_ai_column_definitions: List[
+            List[Dict[str, Any]]
+        ] = []  # List of lists of col_specs
+        for i, ai_params in enumerate(all_ai_params_results):
+            current_file_path = input_paths[i]  # Get corresponding input path
+            logger.info(f"Aggregating AI parameters for: {current_file_path.name}")
+            logger.info(f"  AI Table Name: {ai_params['table_name']}")
+            logger.info(f"  AI Encoding: {ai_params['encoding']}")
+            logger.info(f"  AI Delimiter: '{ai_params['delimiter']}'")
+            logger.info(f"  AI Has Header: {ai_params['has_header']}")
+            logger.info(f"  AI Skip Rows: {ai_params['skip_rows']}")
+            logger.info(
+                f"  AI Table Description: {ai_params.get('table_description') if ai_params.get('table_description') is not None else 'N/A'}"
             )
-            raise RuntimeError(f"AI analysis failed for {input_path.name}") from e
-        # 3. Prepare parameters for the base CSVStandardizer
-        # The AI provides parameters for a single file processing scenario.
-        # Column definitions for the base standardizer:
-        # Base class expects: List[Optional[Dict[str, List[Dict[str, Any]]]]]
-        # For a single file, it's List containing one Dict: [{table_name: [col_specs...]}]
-        # Or, if base class is adapted, List containing one List: [[col_specs...]]
-        # The AI output `ai_params["columns"]` is already in the format:
-        # [{"identifier_in_csv": ..., "final_column_name": ..., "description": ...}, ...]
-        # This is exactly what the enhanced CSVStandardizer's `_setup_columns` expects for `defined_columns_spec`
-        # when `column_definitions` is a list containing this list of specs.
-        ai_column_definitions = [
-            ai_params["columns"]
-        ]  # Wrap the list of col specs for the single file/table
-        # The base CSVStandardizer will use its own _sanitize_name for the table name from AI.
-        # We provide it via table_names list.
-        ai_table_name = [ai_params["table_name"]]
-        ai_table_description = [
-            ai_params.get("table_description")
-        ]  # List of one description
-        # File-specific config for the base standardizer
-        # For a single file, this will be a list containing one dictionary.
-        file_specific_config = [
-            {
+            # logger.info(f"  AI Column Definitions ({len(ai_params['columns'])} cols): {ai_params['columns'][:2]}...") # Log a sample
+            all_ai_table_names.append(ai_params["table_name"])
+            all_ai_table_descriptions.append(ai_params.get("table_description"))
+            file_conf = {
                 "encoding": ai_params["encoding"],
                 "delimiter": ai_params["delimiter"],
                 "has_header": ai_params["has_header"],
                 "skip_rows": ai_params["skip_rows"],
-                # skip_columns is not used if column_definitions are provided,
-                # as column selection is implicit in the provided definitions.
-                "skip_columns": None,  # Explicitly set to None
+                "skip_columns": None,  # Column selection is handled by column_definitions
             }
-        ]
+            all_ai_file_configs.append(file_conf)
+            all_ai_column_definitions.append(
+                ai_params["columns"]
+            )  # This is List[Dict], so we append it directly
-        logger.info(f"AI determined parameters for {input_path.name}:")
-        logger.info(f"  Table Name: {ai_table_name[0]}")
-        logger.info(f"  Encoding: {file_specific_config[0]['encoding']}")
-        logger.info(f"  Delimiter: '{file_specific_config[0]['delimiter']}'")
-        logger.info(f"  Has Header: {file_specific_config[0]['has_header']}")
-        logger.info(f"  Skip Rows: {file_specific_config[0]['skip_rows']}")
+        # Instantiate the base CSVStandardizer with aggregated AI-derived parameters
         logger.info(
-            f"  Table Description: {ai_table_description[0] if ai_table_description and ai_table_description[0] is not None else 'N/A'}"
+            "Initializing final CSVStandardizer with aggregated AI parameters..."
         )
-        logger.info(f"  Column Definitions ({len(ai_column_definitions[0])} cols):")
-        for i, c_def in enumerate(ai_column_definitions[0]):
-            logger.info(
-                f"    {i + 1}. ID in CSV: '{c_def['identifier_in_csv']}', Final Name: '{c_def['final_column_name']}', Desc: '{c_def.get('description', 'N/A')}'"
-            )
-        # 4. Call Base Class Standardizer Logic with AI-derived parameters
-        # We instantiate a new CSVStandardizer configured by the AI for this specific file.
         final_processor = CSVStandardizer(
-            # These are now single-element lists because we process one file
-            table_names=ai_table_name,
-            descriptions=ai_table_description,
-            file_configs=file_specific_config,
-            column_definitions=ai_column_definitions,  # Pass the AI-generated column specs
-            # default_skip_columns from __init__ can remain as a very deep fallback if AI somehow fails for columns
-            skip_columns=self.default_skip_columns,
+            table_names=all_ai_table_names,
+            descriptions=all_ai_table_descriptions,
+            file_configs=all_ai_file_configs,
+            column_definitions=all_ai_column_definitions,
+            skip_columns=self.default_skip_columns,  # Fallback, though ideally not used if AI defines all columns
         )
         try:
-            # The datasource for the base standardizer is the single input_path
+            logger.info(
+                f"Executing batch standardization for {len(input_paths)} file(s)..."
+            )
             result_path = final_processor.standardize(
-                datasource=[input_path],  # Pass as a list of one
+                datasource=input_paths,  # Pass the original list of Path objects
                 output_path=output_path_obj,
                 overwrite=overwrite,
             )
             logger.info(
-                f"AI CSV Standardization complete for {input_path.name}. Output: {result_path}"
+                f"AI CSV Standardization complete for all files. Output: {result_path}"
             )
             return result_path
         except Exception as e:
             logger.exception(
-                f"Error during final standardization step using AI parameters for {input_path.name}: {e}"
+                f"Error during final batch standardization step using AI parameters: {e}"
             )
-            raise RuntimeError(
-                f"Final standardization step failed for {input_path.name}."
-            ) from e
+            raise RuntimeError("Final batch standardization step failed.") from e

{satif_ai-0.1.2 → satif_ai-0.2.1}/LICENSE RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/README.md RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/__init__.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/adapters/__init__.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/adapters/tidy.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/code_builders/__init__.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/code_builders/adaptation.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/code_builders/transformation.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/plot_builders/__init__.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/plot_builders/agent.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/plot_builders/prompt.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/plot_builders/tool.py RENAMED Viewed

File without changes

{satif_ai-0.1.2 → satif_ai-0.2.1}/satif_ai/standardizers/__init__.py RENAMED Viewed

File without changes

satif-ai 0.1.2__tar.gz → 0.2.1__tar.gz

satif-ai 0.1.2tar.gz → 0.2.1tar.gz