PyPI - iatoolkit - Versions diffs - 1.9.0__py3-none-any.whl → 1.15.3__py3-none-any.whl - Mend

iatoolkit 1.9.0py3-none-any.whl → 1.15.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

iatoolkit/__init__.py +1 -1
iatoolkit/common/routes.py +1 -1
iatoolkit/common/util.py +8 -123
iatoolkit/core.py +1 -0
iatoolkit/infra/connectors/file_connector.py +10 -2
iatoolkit/infra/connectors/google_drive_connector.py +3 -0
iatoolkit/infra/connectors/local_file_connector.py +3 -0
iatoolkit/infra/connectors/s3_connector.py +24 -1
iatoolkit/infra/llm_providers/deepseek_adapter.py +17 -1
iatoolkit/infra/llm_providers/gemini_adapter.py +117 -18
iatoolkit/infra/llm_providers/openai_adapter.py +175 -18
iatoolkit/infra/llm_response.py +13 -0
iatoolkit/locales/en.yaml +47 -2
iatoolkit/locales/es.yaml +45 -1
iatoolkit/repositories/llm_query_repo.py +44 -33
iatoolkit/services/company_context_service.py +294 -133
iatoolkit/services/dispatcher_service.py +1 -1
iatoolkit/services/knowledge_base_service.py +26 -4
iatoolkit/services/llm_client_service.py +58 -2
iatoolkit/services/prompt_service.py +236 -330
iatoolkit/services/query_service.py +37 -18
iatoolkit/services/storage_service.py +92 -0
iatoolkit/static/js/chat_filepond.js +188 -63
iatoolkit/static/js/chat_main.js +105 -52
iatoolkit/static/styles/chat_iatoolkit.css +96 -0
iatoolkit/system_prompts/query_main.prompt +24 -41
iatoolkit/templates/chat.html +15 -6
iatoolkit/views/base_login_view.py +1 -1
iatoolkit/views/categories_api_view.py +43 -3
iatoolkit/views/chat_view.py +1 -1
iatoolkit/views/login_view.py +1 -1
iatoolkit/views/prompt_api_view.py +1 -1
{iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/METADATA +1 -1
{iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/RECORD +38 -37
{iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/WHEEL +0 -0
{iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE +0 -0
{iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
{iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/top_level.txt +0 -0

iatoolkit/services/company_context_service.py CHANGED Viewed

@@ -7,9 +7,11 @@ from iatoolkit.common.util import Utility
 from iatoolkit.services.configuration_service import ConfigurationService
 from iatoolkit.common.interfaces.asset_storage import AssetRepository, AssetType
 from iatoolkit.services.sql_service import SqlService
-from iatoolkit.common.exceptions import IAToolkitException
 import logging
+import yaml
 from injector import inject
+from typing import List, Dict
+import os
 class CompanyContextService:
@@ -46,8 +48,9 @@ class CompanyContextService:
             logging.warning(f"Could not load Markdown context for '{company_short_name}': {e}")
         # 2. Context from company-specific SQL databases
+        db_tables = []
         try:
-            sql_context = self._get_sql_schema_context(company_short_name)
+            sql_context, db_tables = self._get_sql_enriched_context(company_short_name)
             if sql_context:
                 context_parts.append(sql_context)
         except Exception as e:
@@ -55,7 +58,7 @@ class CompanyContextService:
         # 3. Context from yaml (schema/*.yaml) files
         try:
-            yaml_schema_context = self._get_yaml_schema_context(company_short_name)
+            yaml_schema_context = self._get_yaml_schema_context(company_short_name, db_tables)
             if yaml_schema_context:
                 context_parts.append(yaml_schema_context)
         except Exception as e:
@@ -64,149 +67,96 @@ class CompanyContextService:
         # Join all parts with a clear separator
         return "\n\n---\n\n".join(context_parts)
-    def _get_static_file_context(self, company_short_name: str) -> str:
-        # Get context from .md files using the repository
-        static_context = ''
-        try:
-            # 1. List markdown files in the context "folder"
-            # Note: The repo handles where this folder actually is (FS or DB)
-            md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
-            for filename in md_files:
-                try:
-                    # 2. Read content
-                    content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
-                    static_context += content + "\n"  # Append content
-                except Exception as e:
-                    logging.warning(f"Error reading context file {filename}: {e}")
-        except Exception as e:
-            # If listing fails (e.g. folder doesn't exist), just log and return empty
-            logging.warning(f"Error listing context files for {company_short_name}: {e}")
-        return static_context
-    def _get_sql_schema_context(self, company_short_name: str) -> str:
+    def _get_sql_enriched_context(self, company_short_name: str):
         """
-        Generates the SQL schema context by inspecting live database connections
-        based on the flexible company.yaml configuration.
-        It supports including all tables and providing specific overrides for a subset of them.
+        Generates the SQL context for the LLM using the enriched schema logic.
+        It iterates over configured databases, fetches their enriched structure,
+        and formats it into a prompt-friendly string.
         """
         data_sources_config = self.config_service.get_configuration(company_short_name, 'data_sources')
         if not data_sources_config or not data_sources_config.get('sql'):
-            return ''
+            return '', []
+        context_output = []
+        db_tables=[]
-        sql_context = ''
         for source in data_sources_config.get('sql', []):
             db_name = source.get('database')
             if not db_name:
                 continue
-            # get database schema definition, for this source.
-            database_schema_name = source.get('schema', 'public')
             try:
-                # 1. Get the full database structure at once using the SQL service
-                db_structure = self.sql_service.get_database_structure(company_short_name, db_name)
-            except IAToolkitException as e:
-                logging.warning(f"Could not get DB structure for '{db_name}': {e}")
-                continue
+                # 1. Get the Enriched Schema (Physical + YAML)
+                enriched_structure = self.get_enriched_database_schema(company_short_name, db_name)
+                if not enriched_structure:
+                    continue
+                # 2. Build Header for this Database
+                db_context = f"***Database (`database_key`)***: {db_name}\n"
+                # Optional: Add DB description from config if available (useful context)
+                db_desc = source.get('description', '')
+                if db_desc:
+                    db_context += f"**Description:** {db_desc}\n"
+                db_context += (
+                    f"IMPORTANT: To query this database you MUST use the service/tool "
+                    f"**iat_sql_query**, with `database_key='{db_name}'`.\n"
+                )
-            db_description = source.get('description', '')
-            sql_context += f"***Database (`database_key`)***: {db_name}\n"
+                # 3. Format Tables
+                for table_name, table_data in enriched_structure.items():
+                    table_desc = table_data.get('description', '')
+                    columns = table_data.get('columns', [])
-            if db_description:
-                sql_context += (
-                    f"**Description:** : {db_description}\n"
-                )
+                    # Table Header
+                    table_str = f"\nTable: **{table_name}**"
+                    if table_desc:
+                        table_str += f"\nDescription: {table_desc}"
-            sql_context += (
-                f"IMPORTANT: To query this database you MUST use the service/tool "
-                f"**iat_sql_query**, with `database_key={db_name}`.\n"
-            )
-            sql_context += (
-                f"IMPORTANT: The value of **database_key** is ALWAYS the literal string "
-                f"'{db_name}'. Do not invent or infer alternative names. "
-                f"Use exactly: `database_key='{db_name}'`.\n"
-            )
-            # 2. get the list of tables to process based on structure and config
-            tables_to_process = []
-            if source.get('include_all_tables', False):
-                # Use keys from the fetched structure
-                all_tables = list(db_structure.keys())
-                tables_to_exclude = set(source.get('exclude_tables', []))
-                tables_to_process = [t for t in all_tables if t not in tables_to_exclude]
-            elif 'tables' in source:
-                # Use keys from the config map, but check if they exist in DB structure
-                config_tables = list(source['tables'].keys())
-                tables_to_process = [t for t in config_tables if t in db_structure]
-            # 3. get the global settings and overrides.
-            global_exclude_columns = source.get('exclude_columns', [])
-            table_prefix = source.get('table_prefix')
-            table_overrides = source.get('tables', {})
-            # 4. iterate over the tables.
-            for table_name in tables_to_process:
-                try:
-                    table_data = db_structure[table_name]
-                    # 5. get the table specific configuration.
-                    table_config = table_overrides.get(table_name, {})
-                    # 6. define the schema object name, using the override if it exists.
-                    # Priority 1: Explicit override from the 'tables' map.
-                    schema_object_name = table_config.get('schema_name')
-                    if not schema_object_name:
-                        # Priority 3: Automatic prefix stripping.
-                        if table_prefix and table_name.startswith(table_prefix):
-                            schema_object_name = table_name[len(table_prefix):]
-                        else:
-                            # Priority 4: Default to the table name itself.
-                            schema_object_name = table_name
-                    # 7. define the list of columns to exclude, (local vs. global).
-                    local_exclude_columns = table_config.get('exclude_columns')
-                    final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
-                    # 8. Build the table definition dictionary manually using the structure data
-                    json_dict = {
-                        "table": table_name,
-                        "schema": database_schema_name,
-                        "description": f"The table belongs to the **`{database_schema_name}`** schema.",
-                        "fields": []
-                    }
-                    if schema_object_name:
-                        json_dict["description"] += (
-                            f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
-                        )
-                    for col in table_data.get('columns', []):
-                        name = col["name"]
-                        if name in final_exclude_columns:
-                            continue
-                        json_dict["fields"].append({
-                            "name": name,
-                            "type": col["type"]
-                        })
-                    # Append as string representation of dict (consistent with previous behavior)
-                    sql_context += "\n\n" + str(json_dict)
-                except (KeyError, RuntimeError) as e:
-                    logging.warning(f"Could not generate schema for table '{table_name}': {e}")
-        if sql_context:
-            sql_context = "These are the SQL databases you can query using the **`iat_sql_service`**: \n" + sql_context
-        return sql_context
-    def _get_yaml_schema_context(self, company_short_name: str) -> str:
+                    table_str += "\nColumns:"
+                    # Format Columns
+                    for col in columns:
+                        col_name = col.get('name')
+                        col_type = col.get('type', 'unknown')
+                        col_desc = col.get('description', '')
+                        col_props = col.get('properties') # Nested JSONB structure
+                        col_line = f"\n  - `{col_name}` ({col_type})"
+                        if col_desc:
+                            col_line += f": {col_desc}"
+                        table_str += col_line
+                        # If it has nested properties (JSONB enriched from YAML), format them
+                        if col_props:
+                            table_str += "\n"
+                            table_str += self._format_json_schema(col_props, 2) # Indent level 2
+                    db_context += table_str
+                    # collect the table names for later use
+                    db_tables.append(
+                        {'db_name': db_name,
+                         'table_name': table_name,
+                         }
+                    )
+                context_output.append(db_context)
+            except Exception as e:
+                logging.warning(f"Could not generate enriched SQL context for '{db_name}': {e}")
+        if not context_output:
+            return "", []
+        header = "These are the SQL databases you can query using the **`iat_sql_service`**. The schema below includes enriched metadata:\n"
+        return header + "\n\n---\n\n".join(context_output), db_tables
+    def _get_yaml_schema_context(self, company_short_name: str, db_tables: List[Dict]) -> str:
         # Get context from .yaml schema files using the repository
         yaml_schema_context = ''
@@ -215,6 +165,18 @@ class CompanyContextService:
             schema_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA, extension='.yaml')
             for filename in schema_files:
+                # skip tables that are already in the SQL context
+                if '-' in filename:
+                    dbname, f = filename.split("-", 1)
+                    table_name = f.split('.')[0]
+                    exists = any(
+                        item["db_name"] == dbname and item["table_name"] == table_name
+                        for item in db_tables
+                    )
+                    if exists:
+                        continue
                 try:
                     # 2. Read content
                     content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, filename)
@@ -225,7 +187,7 @@ class CompanyContextService:
                     # 4. Generate markdown description from the dict
                     if schema_dict:
                         # We use generate_schema_table which accepts a dict directly
-                        yaml_schema_context += self.utility.generate_schema_table(schema_dict)
+                        yaml_schema_context += self.generate_schema_table(schema_dict)
                 except Exception as e:
                     logging.warning(f"Error processing schema file {filename}: {e}")
@@ -233,4 +195,203 @@ class CompanyContextService:
         except Exception as e:
             logging.warning(f"Error listing schema files for {company_short_name}: {e}")
-        return yaml_schema_context
+        return yaml_schema_context
+    def generate_schema_table(self, schema: dict) -> str:
+        if not schema or not isinstance(schema, dict):
+            return ""
+        # root detection
+        keys = list(schema.keys())
+        if not keys:
+            return ""
+        root_name = keys[0]
+        root_data = schema[root_name]
+        output = [f"\n### Objeto: `{root_name}`"]
+        # table description
+        root_description = root_data.get('description', '')
+        if root_description:
+            clean_desc = root_description.replace('\n', ' ').strip()
+            output.append(f"##Descripción:  {clean_desc}")
+        # extract columns and properties from the root object
+        # priority: columns > properties > fields
+        properties = root_data.get('columns', root_data.get('properties', {}))
+        if properties:
+            output.append("**Estructura de Datos:**")
+            # use indent_level 0 for the main columns
+            # call recursive function to format the properties
+            output.append(self._format_json_schema(properties, 0))
+        else:
+            output.append("\n_Sin definición de estructura._")
+        return "\n".join(output)
+    def _format_json_schema(self, properties: dict, indent_level: int) -> str:
+        output = []
+        indent_str = '  ' * indent_level
+        if not isinstance(properties, dict):
+            return ""
+        for name, details in properties.items():
+            if not isinstance(details, dict): continue
+            description = details.get('description', '')
+            data_type = details.get('type', 'any')
+            # NORMALIZACIÓN VISUAL: jsonb -> object
+            if data_type and data_type.lower() == 'jsonb':
+                data_type = 'object'
+            line = f"{indent_str}- **`{name}`**"
+            if data_type:
+                line += f" ({data_type})"
+            if description:
+                clean_desc = description.replace('\n', ' ').strip()
+                line += f": {clean_desc}"
+            output.append(line)
+            # Recursividad: buscar hijos en 'properties', 'fields' o 'columns'
+            children = details.get('properties', details.get('fields'))
+            # Caso Array (items -> properties)
+            if not children and details.get('items'):
+                items = details['items']
+                if isinstance(items, dict):
+                    if items.get('description'):
+                        output.append(f"{indent_str}  _Items: {items['description']}_")
+                    children = items.get('properties', items.get('fields'))
+            if children:
+                output.append(self._format_json_schema(children, indent_level + 1))
+        return "\n".join(output)
+    def _get_static_file_context(self, company_short_name: str) -> str:
+        # Get context from .md files using the repository
+        static_context = ''
+        try:
+            # 1. List markdown files in the context "folder"
+            # Note: The repo handles where this folder actually is (FS or DB)
+            md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
+            for filename in md_files:
+                try:
+                    # 2. Read content
+                    content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
+                    static_context += content + "\n"  # Append content
+                except Exception as e:
+                    logging.warning(f"Error reading context file {filename}: {e}")
+        except Exception as e:
+            # If listing fails (e.g. folder doesn't exist), just log and return empty
+            logging.warning(f"Error listing context files for {company_short_name}: {e}")
+        return static_context
+    def get_enriched_database_schema(self, company_short_name: str, db_name: str) -> dict:
+        """
+        Retrieves the physical database structure and enriches it with metadata
+        found in the AssetRepository (YAML files).
+        """
+        try:
+            # 1. Physical Structure (Real Source)
+            structure = self.sql_service.get_database_structure(company_short_name, db_name)
+            # 2. YAML files
+            available_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA)
+            files_map = {}
+            for f in available_files:
+                clean = f.lower().replace('.yaml', '').replace('.yml', '')
+                if '-' not in clean:
+                    continue            # skip non-table files
+                dbname, table = clean.split("-", 1)
+                # filter by the database
+                if dbname != db_name:
+                    continue
+                files_map[table] = f
+            logging.debug(f"🔍 [CompanyContextService] Enriching schema for {db_name}. Files found: {len(files_map)}")
+            # 3. fusion between physical structure and YAML files
+            for table_name, table_data in structure.items():
+                t_name = table_name.lower().strip()
+                real_filename = files_map.get(t_name)
+                if not real_filename:
+                    continue
+                try:
+                    content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, real_filename)
+                    if not content:
+                        continue
+                    meta = yaml.safe_load(content) or {}
+                    # detect root, usually table name
+                    root_data = meta.get(table_name) or meta.get(t_name)
+                    if not root_data and len(meta) == 1:
+                        root_data = list(meta.values())[0]
+                    if not root_data:
+                        continue
+                    # A. Table description
+                    if 'description' in root_data:
+                        table_data['description'] = root_data['description']
+                    # B. get the map of columns from the YAML
+                    yaml_cols = root_data.get('columns', root_data.get('fields', {}))
+                    # --- LEGACY ADAPTER: List -> Dictionary ---
+                    if isinstance(yaml_cols, list):
+                        temp_map = {}
+                        for c in yaml_cols:
+                            if isinstance(c, dict) and 'name' in c:
+                                col_name = c['name']
+                                temp_map[col_name] = c
+                        yaml_cols = temp_map
+                    # --------------------------------------------
+                    if isinstance(yaml_cols, dict):
+                        # map in lower case for lookup
+                        y_cols_lower = {str(k).lower(): v for k, v in yaml_cols.items()}
+                        # Iterate over columns
+                        for col in table_data.get('columns', []):
+                            c_name = str(col['name']).lower()  # Real DB Name
+                            if c_name in y_cols_lower:
+                                y_col = y_cols_lower[c_name]
+                                # copy the basic metadata from database
+                                if y_col.get('description'): col['description'] = y_col['description']
+                                if y_col.get('pii'): col['pii'] = y_col['pii']
+                                if y_col.get('synonyms'): col['synonyms'] = y_col['synonyms']
+                                # C. inject the json schema from the YAML
+                                props = y_col.get('properties')
+                                if props:
+                                    col['properties'] = props
+                    else:
+                        if yaml_cols:
+                            logging.warning(f"⚠️ [CompanyContextService] Unrecognized column format in {real_filename}")
+                except Exception as e:
+                    logging.error(f"❌ Error processing schema file {real_filename}: {e}")
+            return structure
+        except Exception as e:
+            logging.exception(f"Error generating enriched schema for {db_name}")
+            # Depending on policy, re-raise or return empty structure
+            raise e

iatoolkit/services/dispatcher_service.py CHANGED Viewed

@@ -106,7 +106,7 @@ class Dispatcher:
         if self.tool_service.is_system_tool(function_name):
             # this is the system function to be executed.
             handler = self.tool_service.get_system_handler(function_name)
-            logging.info(
+            logging.debug(
                 f"Calling system handler [{function_name}] "
                 f"with company_short_name={company_short_name} "
                 f"and kwargs={kwargs}"

iatoolkit/services/knowledge_base_service.py CHANGED Viewed

@@ -382,27 +382,49 @@ class KnowledgeBaseService:
     def sync_collection_types(self, company_short_name: str, categories_config: list):
         """
         This should be called during company initialization or configuration reload.
+        Syncs DB collection types with the provided list.
+        Also updates the configuration YAML.
         """
         company = self.profile_service.get_company_by_short_name(company_short_name)
         if not company:
             raise IAToolkitException(IAToolkitException.ErrorType.INVALID_NAME,
-                            f'Company {company_short_name} not found')
+                                     f'Company {company_short_name} not found')
         session = self.document_repo.session
+        # 1. Get existing types
         existing_types = session.query(CollectionType).filter_by(company_id=company.id).all()
         existing_names = {ct.name: ct for ct in existing_types}
+        # 2. Add new types
+        current_config_names = set()
         for cat_name in categories_config:
+            current_config_names.add(cat_name)
             if cat_name not in existing_names:
                 new_type = CollectionType(company_id=company.id, name=cat_name)
                 session.add(new_type)
-        # Opcional: Eliminar los que ya no están en el config?
-        # Por seguridad de datos, mejor no borrar automáticamente, o marcarlos inactivos.
+        # 3. Delete types not in config
+        # Note: This might cascade delete documents depending on FK setup.
+        # Assuming safe deletion is desired here to match "Sync" behavior.
+        for existing_ct in existing_types:
+            if existing_ct.name not in current_config_names:
+                session.delete(existing_ct)
         session.commit()
+        # 4. Update Configuration YAML
+        # Lazy import to avoid circular dependency
+        from iatoolkit import current_iatoolkit
+        from iatoolkit.services.configuration_service import ConfigurationService
+        config_service = current_iatoolkit().get_injector().get(ConfigurationService)
+        config_service.update_configuration_key(
+            company_short_name,
+            "knowledge_base.collections",
+            categories_config
+        )
     def get_collection_names(self, company_short_name: str) -> List[str]:
         """
         Retrieves the names of all collections defined for a specific company.

iatoolkit 1.9.0__py3-none-any.whl → 1.15.3__py3-none-any.whl

iatoolkit 1.9.0py3-none-any.whl → 1.15.3py3-none-any.whl