PyPI - iatoolkit - Versions diffs - 1.4.2__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

iatoolkit 1.4.2py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

iatoolkit/__init__.py +1 -1
iatoolkit/common/interfaces/database_provider.py +13 -8
iatoolkit/common/routes.py +24 -6
iatoolkit/common/util.py +21 -1
iatoolkit/infra/connectors/file_connector_factory.py +1 -0
iatoolkit/infra/connectors/s3_connector.py +4 -2
iatoolkit/locales/en.yaml +72 -5
iatoolkit/locales/es.yaml +71 -4
iatoolkit/repositories/database_manager.py +27 -47
iatoolkit/repositories/llm_query_repo.py +29 -7
iatoolkit/repositories/models.py +16 -7
iatoolkit/services/company_context_service.py +44 -20
iatoolkit/services/configuration_service.py +227 -71
iatoolkit/services/dispatcher_service.py +0 -3
iatoolkit/services/knowledge_base_service.py +14 -1
iatoolkit/services/load_documents_service.py +10 -3
iatoolkit/services/prompt_service.py +210 -29
iatoolkit/services/sql_service.py +17 -0
iatoolkit/templates/chat.html +2 -1
iatoolkit/views/categories_api_view.py +71 -0
iatoolkit/views/configuration_api_view.py +163 -0
iatoolkit/views/prompt_api_view.py +88 -7
{iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/METADATA +1 -1
{iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/RECORD +28 -27
iatoolkit/views/load_company_configuration_api_view.py +0 -49
{iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/WHEEL +0 -0
{iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/licenses/LICENSE +0 -0
{iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
{iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/top_level.txt +0 -0

iatoolkit/services/company_context_service.py CHANGED Viewed

@@ -104,16 +104,17 @@ class CompanyContextService:
                 continue
             # get database schema definition, for this source.
-            database_schema_name = source.get('schema')
+            database_schema_name = source.get('schema', 'public')
             try:
-                db_provider = self.sql_service.get_database_provider(company_short_name, db_name)
+                # 1. Get the full database structure at once using the SQL service
+                db_structure = self.sql_service.get_database_structure(company_short_name, db_name)
             except IAToolkitException as e:
-                logging.warning(f"Could not get DB provider for '{db_name}': {e}")
+                logging.warning(f"Could not get DB structure for '{db_name}': {e}")
                 continue
             db_description = source.get('description', '')
-            sql_context = f"***Database (`database_key`)***: {db_name}\n"
+            sql_context += f"***Database (`database_key`)***: {db_name}\n"
             if db_description:
                 sql_context += (
@@ -131,28 +132,32 @@ class CompanyContextService:
                 f"Use exactly: `database_key='{db_name}'`.\n"
             )
-            # 1. get the list of tables to process.
+            # 2. get the list of tables to process based on structure and config
             tables_to_process = []
             if source.get('include_all_tables', False):
-                all_tables = db_provider.get_all_table_names()
+                # Use keys from the fetched structure
+                all_tables = list(db_structure.keys())
                 tables_to_exclude = set(source.get('exclude_tables', []))
                 tables_to_process = [t for t in all_tables if t not in tables_to_exclude]
             elif 'tables' in source:
-                # if not include_all_tables, use the list of tables explicitly specified in the map.
-                tables_to_process = list(source['tables'].keys())
+                # Use keys from the config map, but check if they exist in DB structure
+                config_tables = list(source['tables'].keys())
+                tables_to_process = [t for t in config_tables if t in db_structure]
-            # 2. get the global settings and overrides.
+            # 3. get the global settings and overrides.
             global_exclude_columns = source.get('exclude_columns', [])
             table_prefix = source.get('table_prefix')
             table_overrides = source.get('tables', {})
-            # 3. iterate over the tables.
+            # 4. iterate over the tables.
             for table_name in tables_to_process:
                 try:
-                    # 4. get the table specific configuration.
+                    table_data = db_structure[table_name]
+                    # 5. get the table specific configuration.
                     table_config = table_overrides.get(table_name, {})
-                    # 5. define the schema object name, using the override if it exists.
+                    # 6. define the schema object name, using the override if it exists.
                     # Priority 1: Explicit override from the 'tables' map.
                     schema_object_name = table_config.get('schema_name')
@@ -164,17 +169,36 @@ class CompanyContextService:
                             # Priority 4: Default to the table name itself.
                             schema_object_name = table_name
-                    # 6. define the list of columns to exclude, (local vs. global).
+                    # 7. define the list of columns to exclude, (local vs. global).
                     local_exclude_columns = table_config.get('exclude_columns')
                     final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
-                    # 7. get the table schema definition.
-                    table_definition = db_provider.get_table_description(
-                        table_name=table_name,
-                        schema_object_name=schema_object_name,
-                        exclude_columns=final_exclude_columns
-                    )
-                    sql_context += table_definition
+                    # 8. Build the table definition dictionary manually using the structure data
+                    json_dict = {
+                        "table": table_name,
+                        "schema": database_schema_name,
+                        "description": f"The table belongs to the **`{database_schema_name}`** schema.",
+                        "fields": []
+                    }
+                    if schema_object_name:
+                        json_dict["description"] += (
+                            f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
+                        )
+                    for col in table_data.get('columns', []):
+                        name = col["name"]
+                        if name in final_exclude_columns:
+                            continue
+                        json_dict["fields"].append({
+                            "name": name,
+                            "type": col["type"]
+                        })
+                    # Append as string representation of dict (consistent with previous behavior)
+                    sql_context += "\n\n" + str(json_dict)
                 except (KeyError, RuntimeError) as e:
                     logging.warning(f"Could not generate schema for table '{table_name}': {e}")

iatoolkit/services/configuration_service.py CHANGED Viewed

@@ -38,41 +38,11 @@ class ConfigurationService:
         if company_short_name not in self._loaded_configs:
             self._loaded_configs[company_short_name] = self._load_and_merge_configs(company_short_name)
-    def get_configuration(self, company_short_name: str, content_key: str):
-        """
-        Public method to provide a specific section of a company's configuration.
-        It uses a cache to avoid reading files from disk on every call.
-        """
-        self._ensure_config_loaded(company_short_name)
-        return self._loaded_configs[company_short_name].get(content_key)
-    def get_llm_configuration(self, company_short_name: str):
-        """
-        Convenience helper to obtain the 'llm' configuration block for a company.
-        Kept separate from get_configuration() to avoid coupling tests that
-        assert the number of calls to get_configuration().
-        """
-        default_llm_model = None
-        available_llm_models = []
-        self._ensure_config_loaded(company_short_name)
-        llm_config = self._loaded_configs[company_short_name].get("llm")
-        if llm_config:
-            default_llm_model = llm_config.get("model")
-            available_llm_models = llm_config.get('available_models') or []
-        # fallback: if no explicit list of models is provided, use the default model
-        if not available_llm_models and default_llm_model:
-            available_llm_models = [{
-                "id": default_llm_model,
-                "label": default_llm_model,
-                "description": "Modelo por defecto configurado para esta compañía."
-            }]
-        return default_llm_model, available_llm_models
     def load_configuration(self, company_short_name: str):
         """
         Main entry point for configuring a company instance.
         This method is invoked by the dispatcher for each registered company.
+        And for the configurator, for editing the configuration of a company.
         """
         logging.info(f"⚙️  Starting configuration for company '{company_short_name}'...")
@@ -97,44 +67,109 @@ class ConfigurationService:
         logging.info(f"✅ Company '{company_short_name}' configured successfully.")
         return config, errors
-    def _load_and_merge_configs(self, company_short_name: str) -> dict:
+    def get_configuration(self, company_short_name: str, content_key: str):
         """
-        Loads the main company.yaml and merges data from supplementary files
-        specified in the 'content_files' section using AssetRepository.
+        Public method to provide a specific section of a company's configuration.
+        It uses a cache to avoid reading files from disk on every call.
+        """
+        self._ensure_config_loaded(company_short_name)
+        return self._loaded_configs[company_short_name].get(content_key)
+    def update_configuration_key(self, company_short_name: str, key: str, value) -> tuple[dict, list[str]]:
+        """
+        Updates a specific key in the company's configuration file, validates the result,
+        and saves it to the asset repository if valid.
+        Args:
+            company_short_name: The company identifier.
+            key: The configuration key to update (supports dot notation, e.g., 'llm.model').
+            value: The new value for the key.
+        Returns:
+            A tuple containing the updated configuration dict and a list of error strings (if any).
         """
+        # 1. Load raw config from file (to avoid working with merged supplementary files if possible,
+        # but for simplicity we load the main yaml structure)
         main_config_filename = "company.yaml"
-        # verify existence of the main configuration file
         if not self.asset_repo.exists(company_short_name, AssetType.CONFIG, main_config_filename):
-            # raise FileNotFoundError(f"Main configuration file not found: {main_config_filename}")
-            logging.exception(f"Main configuration file not found: {main_config_filename}")
+            raise FileNotFoundError(f"Configuration file not found for {company_short_name}")
-            # return the minimal configuration needed for starting the IAToolkit
-            # this is a for solving a chicken/egg problem when trying to migrate the configuration
-            # from filesystem to database in enterprise installation
-            # see create_assets cli command in enterprise-iatoolkit)
-            return {
-                'id': company_short_name,
-                'name': company_short_name,
-                'llm': {'model': 'gpt-5', 'provider_api_keys': {'openai':''} },
-                }
+        yaml_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, main_config_filename)
+        config = self.utility.load_yaml_from_string(yaml_content) or {}
+        # 2. Update the key in the dictionary
+        self._set_nested_value(config, key, value)
+        # 3. Validate the new configuration structure
+        errors = self._validate_configuration(company_short_name, config)
+        if errors:
+            logging.warning(f"Configuration update failed validation: {errors}")
+            return config, errors
+        # 4. Save back to repository
+        # Assuming Utility has a method to dump YAML. If not, standard yaml library would be needed.
+        # For this example, we assume self.utility.dump_yaml_to_string exists.
+        new_yaml_content = self.utility.dump_yaml_to_string(config)
+        self.asset_repo.write_text(company_short_name, AssetType.CONFIG, main_config_filename, new_yaml_content)
+        # 5. Invalidate cache so next reads get the new version
+        if company_short_name in self._loaded_configs:
+            del self._loaded_configs[company_short_name]
+        return config, []
+    def add_configuration_key(self, company_short_name: str, parent_key: str, key: str, value) -> tuple[dict, list[str]]:
+        """
+        Adds a new key-value pair under a specific parent key in the configuration.
+        Args:
+            company_short_name: The company identifier.
+            parent_key: The parent configuration key under which to add the new key (e.g., 'llm').
+            key: The new key name to add.
+            value: The value for the new key.
+        Returns:
+            A tuple containing the updated configuration dict and a list of error strings (if any).
+        """
+        # 1. Load raw config from file
+        main_config_filename = "company.yaml"
+        if not self.asset_repo.exists(company_short_name, AssetType.CONFIG, main_config_filename):
+            raise FileNotFoundError(f"Configuration file not found for {company_short_name}")
-        # read text and parse
         yaml_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, main_config_filename)
-        config = self.utility.load_yaml_from_string(yaml_content)
-        if not config:
-            return {}
+        config = self.utility.load_yaml_from_string(yaml_content) or {}
-        # Load and merge supplementary content files (e.g., onboarding_cards)
-        for key, filename in config.get('help_files', {}).items():
-            if self.asset_repo.exists(company_short_name, AssetType.CONFIG, filename):
-                supp_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, filename)
-                config[key] = self.utility.load_yaml_from_string(supp_content)
-            else:
-                logging.warning(f"⚠️  Warning: Content file not found: {filename}")
-                config[key] = None
+        # 2. Construct full path and set the value
+        # If parent_key is provided, we append the new key to it (e.g., 'llm.new_setting')
+        full_path = f"{parent_key}.{key}" if parent_key else key
+        self._set_nested_value(config, full_path, value)
-        return config
+        # 3. Validate the new configuration structure
+        errors = self._validate_configuration(company_short_name, config)
+        if errors:
+            logging.warning(f"Configuration add failed validation: {errors}")
+            return config, errors
+        # 4. Save back to repository
+        new_yaml_content = self.utility.dump_yaml_to_string(config)
+        self.asset_repo.write_text(company_short_name, AssetType.CONFIG, main_config_filename, new_yaml_content)
+        # 5. Invalidate cache
+        if company_short_name in self._loaded_configs:
+            del self._loaded_configs[company_short_name]
+        return config, []
+    def validate_configuration(self, company_short_name: str) -> list[str]:
+        """
+        Public method to trigger validation of the current configuration.
+        """
+        config = self._load_and_merge_configs(company_short_name)
+        return self._validate_configuration(company_short_name, config)
     def _register_company_database(self, config: dict) -> Company:
         # register the company in the database: create_or_update logic
@@ -239,13 +274,11 @@ class ConfigurationService:
         from iatoolkit.services.prompt_service import PromptService
         prompt_service = current_iatoolkit().get_injector().get(PromptService)
-        prompts_config = config.get('prompts', [])
-        categories_config = config.get('prompt_categories', [])
+        prompt_list, categories_config = self._get_prompt_config(config)
         prompt_service.sync_company_prompts(
             company_short_name=company_short_name,
-            prompts_config=prompts_config,
-            categories_config=categories_config
+            prompt_list=prompt_list,
+            categories_config=categories_config,
         )
     def _register_knowledge_base(self, company_short_name: str, config: dict):
@@ -260,7 +293,6 @@ class ConfigurationService:
         # sync collection types in database
         knowledge_base.sync_collection_types(company_short_name, categories_config)
     def _validate_configuration(self, company_short_name: str, config: dict):
         """
         Validates the structure and consistency of the company.yaml configuration.
@@ -328,8 +360,10 @@ class ConfigurationService:
                 add_error(f"tools[{i}]", "'params' key must be a dictionary.")
         # 6. Prompts
-        category_set = set(config.get("prompt_categories", []))
-        for i, prompt in enumerate(config.get("prompts", [])):
+        prompt_list, categories_config = self._get_prompt_config(config)
+        category_set = set(categories_config)
+        for i, prompt in enumerate(prompt_list):
             prompt_name = prompt.get("name")
             if not prompt_name:
                 add_error(f"prompts[{i}]", "Missing required key: 'name'")
@@ -343,10 +377,12 @@ class ConfigurationService:
                     add_error(f"prompts[{i}]", "Missing required key: 'description'")
             prompt_cat = prompt.get("category")
-            if not prompt_cat:
-                add_error(f"prompts[{i}]", "Missing required key: 'category'")
-            elif prompt_cat not in category_set:
-                add_error(f"prompts[{i}]", f"Category '{prompt_cat}' is not defined in 'prompt_categories'.")
+            prompt_type = prompt.get("prompt_type", 'company').lower()
+            if prompt_type == 'company':
+                if not prompt_cat:
+                    add_error(f"prompts[{i}]", "Missing required key: 'category'")
+                elif prompt_cat not in category_set:
+                    add_error(f"prompts[{i}]", f"Category '{prompt_cat}' is not defined in 'prompt_categories'.")
         # 7. User Feedback
         feedback_config = config.get("parameters", {}).get("user_feedback", {})
@@ -393,3 +429,123 @@ class ConfigurationService:
         return errors
+    def _set_nested_value(self, data: dict, key: str, value):
+        """
+        Helper to set a value in a nested dictionary or list using dot notation (e.g. 'llm.model', 'tools.0.name').
+        Handles traversal through both dictionaries and lists.
+        """
+        keys = key.split('.')
+        current = data
+        # Traverse up to the parent of the target key
+        for i, k in enumerate(keys[:-1]):
+            if isinstance(current, dict):
+                # If it's a dict, we can traverse or create the path
+                current = current.setdefault(k, {})
+            elif isinstance(current, list):
+                # If it's a list, we MUST use an integer index
+                try:
+                    idx = int(k)
+                    # Allow accessing existing index
+                    current = current[idx]
+                except (ValueError, IndexError) as e:
+                    raise ValueError(
+                        f"Invalid path: cannot access index '{k}' in list at '{'.'.join(keys[:i + 1])}'") from e
+            else:
+                raise ValueError(
+                    f"Invalid path: '{k}' is not a container (got {type(current)}) at '{'.'.join(keys[:i + 1])}'")
+        # Set the final value
+        last_key = keys[-1]
+        if isinstance(current, dict):
+            current[last_key] = value
+        elif isinstance(current, list):
+            try:
+                idx = int(last_key)
+                # If index equals length, it means append
+                if idx == len(current):
+                    current.append(value)
+                elif 0 <= idx < len(current):
+                    current[idx] = value
+                else:
+                    raise IndexError(f"Index {idx} out of range for list of size {len(current)}")
+            except (ValueError, IndexError) as e:
+                raise ValueError(f"Invalid path: cannot assign to index '{last_key}' in list") from e
+        else:
+            raise ValueError(f"Cannot assign value to non-container type {type(current)} at '{key}'")
+    def get_llm_configuration(self, company_short_name: str):
+        """
+        Convenience helper to obtain the 'llm' configuration block for a company.
+        Kept separate from get_configuration() to avoid coupling tests that
+        assert the number of calls to get_configuration().
+        """
+        default_llm_model = None
+        available_llm_models = []
+        self._ensure_config_loaded(company_short_name)
+        llm_config = self._loaded_configs[company_short_name].get("llm")
+        if llm_config:
+            default_llm_model = llm_config.get("model")
+            available_llm_models = llm_config.get('available_models') or []
+        # fallback: if no explicit list of models is provided, use the default model
+        if not available_llm_models and default_llm_model:
+            available_llm_models = [{
+                "id": default_llm_model,
+                "label": default_llm_model,
+                "description": "Modelo por defecto configurado para esta compañía."
+            }]
+        return default_llm_model, available_llm_models
+    def _load_and_merge_configs(self, company_short_name: str) -> dict:
+        """
+        Loads the main company.yaml and merges data from supplementary files
+        specified in the 'content_files' section using AssetRepository.
+        """
+        main_config_filename = "company.yaml"
+        # verify existence of the main configuration file
+        if not self.asset_repo.exists(company_short_name, AssetType.CONFIG, main_config_filename):
+            # raise FileNotFoundError(f"Main configuration file not found: {main_config_filename}")
+            logging.exception(f"Main configuration file not found: {main_config_filename}")
+            # return the minimal configuration needed for starting the IAToolkit
+            # this is a for solving a chicken/egg problem when trying to migrate the configuration
+            # from filesystem to database in enterprise installation
+            # see create_assets cli command in enterprise-iatoolkit)
+            return {
+                'id': company_short_name,
+                'name': company_short_name,
+                'llm': {'model': 'gpt-5', 'provider_api_keys': {'openai':''} },
+                }
+        # read text and parse
+        yaml_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, main_config_filename)
+        config = self.utility.load_yaml_from_string(yaml_content)
+        if not config:
+            return {}
+        # Load and merge supplementary content files (e.g., onboarding_cards)
+        for key, filename in config.get('help_files', {}).items():
+            if self.asset_repo.exists(company_short_name, AssetType.CONFIG, filename):
+                supp_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, filename)
+                config[key] = self.utility.load_yaml_from_string(supp_content)
+            else:
+                logging.warning(f"⚠️  Warning: Content file not found: {filename}")
+                config[key] = None
+        return config
+    def _get_prompt_config(self, config):
+        prompts_config = config.get('prompts', {})
+        if isinstance(prompts_config, dict):
+            prompt_list = prompts_config.get('prompt_list', [])
+            categories_config = prompts_config.get('prompt_categories', [])
+        else:
+            prompt_list = config.get('prompts', [])
+            categories_config = config.get('prompt_categories', [])
+        return prompt_list, categories_config

iatoolkit/services/dispatcher_service.py CHANGED Viewed

@@ -87,9 +87,6 @@ class Dispatcher:
             # system tools registration
             self.tool_service.register_system_tools()
-            # system prompts registration
-            self.prompt_service.register_system_prompts()
         except Exception as e:
             self.llmquery_repo.rollback()
             raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))

iatoolkit/services/knowledge_base_service.py CHANGED Viewed

@@ -308,7 +308,7 @@ class KnowledgeBaseService:
         # filter by collection
         if collection:
-            query = query.join(CollectionType).filter(CollectionType.name == collection)
+            query = query.join(Document.collection_type).filter(CollectionType.name == collection)
         # Filter by user identifier
         if user_identifier:
@@ -403,6 +403,19 @@ class KnowledgeBaseService:
         session.commit()
+    def get_collection_names(self, company_short_name: str) -> List[str]:
+        """
+        Retrieves the names of all collections defined for a specific company.
+        """
+        company = self.profile_service.get_company_by_short_name(company_short_name)
+        if not company:
+            logging.warning(f"Company {company_short_name} not found when listing collections.")
+            return []
+        session = self.document_repo.session
+        collections = session.query(CollectionType).filter_by(company_id=company.id).all()
+        return [c.name for c in collections]
     def _get_collection_type_id(self, company_id: int, collection_name: str) -> Optional[int]:
         """Helper to get ID by name"""
         if not collection_name:

iatoolkit/services/load_documents_service.py CHANGED Viewed

@@ -65,17 +65,24 @@ class LoadDocumentsService:
                 logging.warning(f"Source '{source_name}' not found in configuration for company '{company.short_name}'. Skipping.")
                 continue
+            collection = source_config.get('collection')
+            if not collection:
+                logging.warning(
+                    f"Document Source '{source_name}' missing collection definition en company.yaml, Skipping.")
+                continue
             try:
-                logging.info(f"Processing source '{source_name}' for company '{company.short_name}'...")
+                logging.info(f"company {company.short_name}: loading source '{source_name}' into collection '{collection}'...")
                 # Combine the base connector configuration with the specific path from the source.
                 full_connector_config = base_connector_config.copy()
                 full_connector_config['path'] = source_config.get('path')
+                full_connector_config['folder'] = source_config.get('folder')
                 # Prepare the context for the callback function.
                 context = {
                     'company': company,
-                    'collection': source_config.get('metadata', {}).get('collection'),
+                    'collection': collection,
                     'metadata': source_config.get('metadata', {})
                 }
@@ -132,7 +139,7 @@ class LoadDocumentsService:
                 company=company,
                 filename=filename,
                 content=content,
-                collection=predefined_metadata.get('collection'),
+                collection=context.get('collection'),
                 metadata=predefined_metadata
             )

iatoolkit 1.4.2__py3-none-any.whl → 1.9.0__py3-none-any.whl

iatoolkit 1.4.2py3-none-any.whl → 1.9.0py3-none-any.whl