iatoolkit 1.4.2__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. iatoolkit/__init__.py +1 -1
  2. iatoolkit/common/interfaces/database_provider.py +13 -8
  3. iatoolkit/common/routes.py +24 -6
  4. iatoolkit/common/util.py +21 -1
  5. iatoolkit/infra/connectors/file_connector_factory.py +1 -0
  6. iatoolkit/infra/connectors/s3_connector.py +4 -2
  7. iatoolkit/locales/en.yaml +72 -5
  8. iatoolkit/locales/es.yaml +71 -4
  9. iatoolkit/repositories/database_manager.py +27 -47
  10. iatoolkit/repositories/llm_query_repo.py +29 -7
  11. iatoolkit/repositories/models.py +16 -7
  12. iatoolkit/services/company_context_service.py +44 -20
  13. iatoolkit/services/configuration_service.py +227 -71
  14. iatoolkit/services/dispatcher_service.py +0 -3
  15. iatoolkit/services/knowledge_base_service.py +14 -1
  16. iatoolkit/services/load_documents_service.py +10 -3
  17. iatoolkit/services/prompt_service.py +210 -29
  18. iatoolkit/services/sql_service.py +17 -0
  19. iatoolkit/templates/chat.html +2 -1
  20. iatoolkit/views/categories_api_view.py +71 -0
  21. iatoolkit/views/configuration_api_view.py +163 -0
  22. iatoolkit/views/prompt_api_view.py +88 -7
  23. {iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/METADATA +1 -1
  24. {iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/RECORD +28 -27
  25. iatoolkit/views/load_company_configuration_api_view.py +0 -49
  26. {iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/WHEEL +0 -0
  27. {iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/licenses/LICENSE +0 -0
  28. {iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
  29. {iatoolkit-1.4.2.dist-info → iatoolkit-1.9.0.dist-info}/top_level.txt +0 -0
@@ -104,16 +104,17 @@ class CompanyContextService:
104
104
  continue
105
105
 
106
106
  # get database schema definition, for this source.
107
- database_schema_name = source.get('schema')
107
+ database_schema_name = source.get('schema', 'public')
108
108
 
109
109
  try:
110
- db_provider = self.sql_service.get_database_provider(company_short_name, db_name)
110
+ # 1. Get the full database structure at once using the SQL service
111
+ db_structure = self.sql_service.get_database_structure(company_short_name, db_name)
111
112
  except IAToolkitException as e:
112
- logging.warning(f"Could not get DB provider for '{db_name}': {e}")
113
+ logging.warning(f"Could not get DB structure for '{db_name}': {e}")
113
114
  continue
114
115
 
115
116
  db_description = source.get('description', '')
116
- sql_context = f"***Database (`database_key`)***: {db_name}\n"
117
+ sql_context += f"***Database (`database_key`)***: {db_name}\n"
117
118
 
118
119
  if db_description:
119
120
  sql_context += (
@@ -131,28 +132,32 @@ class CompanyContextService:
131
132
  f"Use exactly: `database_key='{db_name}'`.\n"
132
133
  )
133
134
 
134
- # 1. get the list of tables to process.
135
+ # 2. get the list of tables to process based on structure and config
135
136
  tables_to_process = []
136
137
  if source.get('include_all_tables', False):
137
- all_tables = db_provider.get_all_table_names()
138
+ # Use keys from the fetched structure
139
+ all_tables = list(db_structure.keys())
138
140
  tables_to_exclude = set(source.get('exclude_tables', []))
139
141
  tables_to_process = [t for t in all_tables if t not in tables_to_exclude]
140
142
  elif 'tables' in source:
141
- # if not include_all_tables, use the list of tables explicitly specified in the map.
142
- tables_to_process = list(source['tables'].keys())
143
+ # Use keys from the config map, but check if they exist in DB structure
144
+ config_tables = list(source['tables'].keys())
145
+ tables_to_process = [t for t in config_tables if t in db_structure]
143
146
 
144
- # 2. get the global settings and overrides.
147
+ # 3. get the global settings and overrides.
145
148
  global_exclude_columns = source.get('exclude_columns', [])
146
149
  table_prefix = source.get('table_prefix')
147
150
  table_overrides = source.get('tables', {})
148
151
 
149
- # 3. iterate over the tables.
152
+ # 4. iterate over the tables.
150
153
  for table_name in tables_to_process:
151
154
  try:
152
- # 4. get the table specific configuration.
155
+ table_data = db_structure[table_name]
156
+
157
+ # 5. get the table specific configuration.
153
158
  table_config = table_overrides.get(table_name, {})
154
159
 
155
- # 5. define the schema object name, using the override if it exists.
160
+ # 6. define the schema object name, using the override if it exists.
156
161
  # Priority 1: Explicit override from the 'tables' map.
157
162
  schema_object_name = table_config.get('schema_name')
158
163
 
@@ -164,17 +169,36 @@ class CompanyContextService:
164
169
  # Priority 4: Default to the table name itself.
165
170
  schema_object_name = table_name
166
171
 
167
- # 6. define the list of columns to exclude, (local vs. global).
172
+ # 7. define the list of columns to exclude, (local vs. global).
168
173
  local_exclude_columns = table_config.get('exclude_columns')
169
174
  final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
170
175
 
171
- # 7. get the table schema definition.
172
- table_definition = db_provider.get_table_description(
173
- table_name=table_name,
174
- schema_object_name=schema_object_name,
175
- exclude_columns=final_exclude_columns
176
- )
177
- sql_context += table_definition
176
+ # 8. Build the table definition dictionary manually using the structure data
177
+ json_dict = {
178
+ "table": table_name,
179
+ "schema": database_schema_name,
180
+ "description": f"The table belongs to the **`{database_schema_name}`** schema.",
181
+ "fields": []
182
+ }
183
+
184
+ if schema_object_name:
185
+ json_dict["description"] += (
186
+ f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
187
+ )
188
+
189
+ for col in table_data.get('columns', []):
190
+ name = col["name"]
191
+ if name in final_exclude_columns:
192
+ continue
193
+
194
+ json_dict["fields"].append({
195
+ "name": name,
196
+ "type": col["type"]
197
+ })
198
+
199
+ # Append as string representation of dict (consistent with previous behavior)
200
+ sql_context += "\n\n" + str(json_dict)
201
+
178
202
  except (KeyError, RuntimeError) as e:
179
203
  logging.warning(f"Could not generate schema for table '{table_name}': {e}")
180
204
 
@@ -38,41 +38,11 @@ class ConfigurationService:
38
38
  if company_short_name not in self._loaded_configs:
39
39
  self._loaded_configs[company_short_name] = self._load_and_merge_configs(company_short_name)
40
40
 
41
- def get_configuration(self, company_short_name: str, content_key: str):
42
- """
43
- Public method to provide a specific section of a company's configuration.
44
- It uses a cache to avoid reading files from disk on every call.
45
- """
46
- self._ensure_config_loaded(company_short_name)
47
- return self._loaded_configs[company_short_name].get(content_key)
48
-
49
- def get_llm_configuration(self, company_short_name: str):
50
- """
51
- Convenience helper to obtain the 'llm' configuration block for a company.
52
- Kept separate from get_configuration() to avoid coupling tests that
53
- assert the number of calls to get_configuration().
54
- """
55
- default_llm_model = None
56
- available_llm_models = []
57
- self._ensure_config_loaded(company_short_name)
58
- llm_config = self._loaded_configs[company_short_name].get("llm")
59
- if llm_config:
60
- default_llm_model = llm_config.get("model")
61
- available_llm_models = llm_config.get('available_models') or []
62
-
63
- # fallback: if no explicit list of models is provided, use the default model
64
- if not available_llm_models and default_llm_model:
65
- available_llm_models = [{
66
- "id": default_llm_model,
67
- "label": default_llm_model,
68
- "description": "Modelo por defecto configurado para esta compañía."
69
- }]
70
- return default_llm_model, available_llm_models
71
-
72
41
  def load_configuration(self, company_short_name: str):
73
42
  """
74
43
  Main entry point for configuring a company instance.
75
44
  This method is invoked by the dispatcher for each registered company.
45
+ And for the configurator, for editing the configuration of a company.
76
46
  """
77
47
  logging.info(f"⚙️ Starting configuration for company '{company_short_name}'...")
78
48
 
@@ -97,44 +67,109 @@ class ConfigurationService:
97
67
  logging.info(f"✅ Company '{company_short_name}' configured successfully.")
98
68
  return config, errors
99
69
 
100
- def _load_and_merge_configs(self, company_short_name: str) -> dict:
70
+ def get_configuration(self, company_short_name: str, content_key: str):
101
71
  """
102
- Loads the main company.yaml and merges data from supplementary files
103
- specified in the 'content_files' section using AssetRepository.
72
+ Public method to provide a specific section of a company's configuration.
73
+ It uses a cache to avoid reading files from disk on every call.
74
+ """
75
+ self._ensure_config_loaded(company_short_name)
76
+ return self._loaded_configs[company_short_name].get(content_key)
77
+
78
+ def update_configuration_key(self, company_short_name: str, key: str, value) -> tuple[dict, list[str]]:
79
+ """
80
+ Updates a specific key in the company's configuration file, validates the result,
81
+ and saves it to the asset repository if valid.
82
+
83
+ Args:
84
+ company_short_name: The company identifier.
85
+ key: The configuration key to update (supports dot notation, e.g., 'llm.model').
86
+ value: The new value for the key.
87
+
88
+ Returns:
89
+ A tuple containing the updated configuration dict and a list of error strings (if any).
104
90
  """
91
+ # 1. Load raw config from file (to avoid working with merged supplementary files if possible,
92
+ # but for simplicity we load the main yaml structure)
105
93
  main_config_filename = "company.yaml"
106
94
 
107
- # verify existence of the main configuration file
108
95
  if not self.asset_repo.exists(company_short_name, AssetType.CONFIG, main_config_filename):
109
- # raise FileNotFoundError(f"Main configuration file not found: {main_config_filename}")
110
- logging.exception(f"Main configuration file not found: {main_config_filename}")
96
+ raise FileNotFoundError(f"Configuration file not found for {company_short_name}")
111
97
 
112
- # return the minimal configuration needed for starting the IAToolkit
113
- # this is a for solving a chicken/egg problem when trying to migrate the configuration
114
- # from filesystem to database in enterprise installation
115
- # see create_assets cli command in enterprise-iatoolkit)
116
- return {
117
- 'id': company_short_name,
118
- 'name': company_short_name,
119
- 'llm': {'model': 'gpt-5', 'provider_api_keys': {'openai':''} },
120
- }
98
+ yaml_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, main_config_filename)
99
+ config = self.utility.load_yaml_from_string(yaml_content) or {}
100
+
101
+ # 2. Update the key in the dictionary
102
+ self._set_nested_value(config, key, value)
103
+
104
+ # 3. Validate the new configuration structure
105
+ errors = self._validate_configuration(company_short_name, config)
106
+
107
+ if errors:
108
+ logging.warning(f"Configuration update failed validation: {errors}")
109
+ return config, errors
110
+
111
+ # 4. Save back to repository
112
+ # Assuming Utility has a method to dump YAML. If not, standard yaml library would be needed.
113
+ # For this example, we assume self.utility.dump_yaml_to_string exists.
114
+ new_yaml_content = self.utility.dump_yaml_to_string(config)
115
+ self.asset_repo.write_text(company_short_name, AssetType.CONFIG, main_config_filename, new_yaml_content)
116
+
117
+ # 5. Invalidate cache so next reads get the new version
118
+ if company_short_name in self._loaded_configs:
119
+ del self._loaded_configs[company_short_name]
120
+
121
+ return config, []
122
+
123
+ def add_configuration_key(self, company_short_name: str, parent_key: str, key: str, value) -> tuple[dict, list[str]]:
124
+ """
125
+ Adds a new key-value pair under a specific parent key in the configuration.
126
+
127
+ Args:
128
+ company_short_name: The company identifier.
129
+ parent_key: The parent configuration key under which to add the new key (e.g., 'llm').
130
+ key: The new key name to add.
131
+ value: The value for the new key.
132
+
133
+ Returns:
134
+ A tuple containing the updated configuration dict and a list of error strings (if any).
135
+ """
136
+ # 1. Load raw config from file
137
+ main_config_filename = "company.yaml"
138
+
139
+ if not self.asset_repo.exists(company_short_name, AssetType.CONFIG, main_config_filename):
140
+ raise FileNotFoundError(f"Configuration file not found for {company_short_name}")
121
141
 
122
- # read text and parse
123
142
  yaml_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, main_config_filename)
124
- config = self.utility.load_yaml_from_string(yaml_content)
125
- if not config:
126
- return {}
143
+ config = self.utility.load_yaml_from_string(yaml_content) or {}
127
144
 
128
- # Load and merge supplementary content files (e.g., onboarding_cards)
129
- for key, filename in config.get('help_files', {}).items():
130
- if self.asset_repo.exists(company_short_name, AssetType.CONFIG, filename):
131
- supp_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, filename)
132
- config[key] = self.utility.load_yaml_from_string(supp_content)
133
- else:
134
- logging.warning(f"⚠️ Warning: Content file not found: {filename}")
135
- config[key] = None
145
+ # 2. Construct full path and set the value
146
+ # If parent_key is provided, we append the new key to it (e.g., 'llm.new_setting')
147
+ full_path = f"{parent_key}.{key}" if parent_key else key
148
+ self._set_nested_value(config, full_path, value)
136
149
 
137
- return config
150
+ # 3. Validate the new configuration structure
151
+ errors = self._validate_configuration(company_short_name, config)
152
+
153
+ if errors:
154
+ logging.warning(f"Configuration add failed validation: {errors}")
155
+ return config, errors
156
+
157
+ # 4. Save back to repository
158
+ new_yaml_content = self.utility.dump_yaml_to_string(config)
159
+ self.asset_repo.write_text(company_short_name, AssetType.CONFIG, main_config_filename, new_yaml_content)
160
+
161
+ # 5. Invalidate cache
162
+ if company_short_name in self._loaded_configs:
163
+ del self._loaded_configs[company_short_name]
164
+
165
+ return config, []
166
+
167
+ def validate_configuration(self, company_short_name: str) -> list[str]:
168
+ """
169
+ Public method to trigger validation of the current configuration.
170
+ """
171
+ config = self._load_and_merge_configs(company_short_name)
172
+ return self._validate_configuration(company_short_name, config)
138
173
 
139
174
  def _register_company_database(self, config: dict) -> Company:
140
175
  # register the company in the database: create_or_update logic
@@ -239,13 +274,11 @@ class ConfigurationService:
239
274
  from iatoolkit.services.prompt_service import PromptService
240
275
  prompt_service = current_iatoolkit().get_injector().get(PromptService)
241
276
 
242
- prompts_config = config.get('prompts', [])
243
- categories_config = config.get('prompt_categories', [])
244
-
277
+ prompt_list, categories_config = self._get_prompt_config(config)
245
278
  prompt_service.sync_company_prompts(
246
279
  company_short_name=company_short_name,
247
- prompts_config=prompts_config,
248
- categories_config=categories_config
280
+ prompt_list=prompt_list,
281
+ categories_config=categories_config,
249
282
  )
250
283
 
251
284
  def _register_knowledge_base(self, company_short_name: str, config: dict):
@@ -260,7 +293,6 @@ class ConfigurationService:
260
293
  # sync collection types in database
261
294
  knowledge_base.sync_collection_types(company_short_name, categories_config)
262
295
 
263
-
264
296
  def _validate_configuration(self, company_short_name: str, config: dict):
265
297
  """
266
298
  Validates the structure and consistency of the company.yaml configuration.
@@ -328,8 +360,10 @@ class ConfigurationService:
328
360
  add_error(f"tools[{i}]", "'params' key must be a dictionary.")
329
361
 
330
362
  # 6. Prompts
331
- category_set = set(config.get("prompt_categories", []))
332
- for i, prompt in enumerate(config.get("prompts", [])):
363
+ prompt_list, categories_config = self._get_prompt_config(config)
364
+
365
+ category_set = set(categories_config)
366
+ for i, prompt in enumerate(prompt_list):
333
367
  prompt_name = prompt.get("name")
334
368
  if not prompt_name:
335
369
  add_error(f"prompts[{i}]", "Missing required key: 'name'")
@@ -343,10 +377,12 @@ class ConfigurationService:
343
377
  add_error(f"prompts[{i}]", "Missing required key: 'description'")
344
378
 
345
379
  prompt_cat = prompt.get("category")
346
- if not prompt_cat:
347
- add_error(f"prompts[{i}]", "Missing required key: 'category'")
348
- elif prompt_cat not in category_set:
349
- add_error(f"prompts[{i}]", f"Category '{prompt_cat}' is not defined in 'prompt_categories'.")
380
+ prompt_type = prompt.get("prompt_type", 'company').lower()
381
+ if prompt_type == 'company':
382
+ if not prompt_cat:
383
+ add_error(f"prompts[{i}]", "Missing required key: 'category'")
384
+ elif prompt_cat not in category_set:
385
+ add_error(f"prompts[{i}]", f"Category '{prompt_cat}' is not defined in 'prompt_categories'.")
350
386
 
351
387
  # 7. User Feedback
352
388
  feedback_config = config.get("parameters", {}).get("user_feedback", {})
@@ -393,3 +429,123 @@ class ConfigurationService:
393
429
 
394
430
  return errors
395
431
 
432
+
433
+ def _set_nested_value(self, data: dict, key: str, value):
434
+ """
435
+ Helper to set a value in a nested dictionary or list using dot notation (e.g. 'llm.model', 'tools.0.name').
436
+ Handles traversal through both dictionaries and lists.
437
+ """
438
+ keys = key.split('.')
439
+ current = data
440
+
441
+ # Traverse up to the parent of the target key
442
+ for i, k in enumerate(keys[:-1]):
443
+ if isinstance(current, dict):
444
+ # If it's a dict, we can traverse or create the path
445
+ current = current.setdefault(k, {})
446
+ elif isinstance(current, list):
447
+ # If it's a list, we MUST use an integer index
448
+ try:
449
+ idx = int(k)
450
+ # Allow accessing existing index
451
+ current = current[idx]
452
+ except (ValueError, IndexError) as e:
453
+ raise ValueError(
454
+ f"Invalid path: cannot access index '{k}' in list at '{'.'.join(keys[:i + 1])}'") from e
455
+ else:
456
+ raise ValueError(
457
+ f"Invalid path: '{k}' is not a container (got {type(current)}) at '{'.'.join(keys[:i + 1])}'")
458
+
459
+ # Set the final value
460
+ last_key = keys[-1]
461
+ if isinstance(current, dict):
462
+ current[last_key] = value
463
+ elif isinstance(current, list):
464
+ try:
465
+ idx = int(last_key)
466
+ # If index equals length, it means append
467
+ if idx == len(current):
468
+ current.append(value)
469
+ elif 0 <= idx < len(current):
470
+ current[idx] = value
471
+ else:
472
+ raise IndexError(f"Index {idx} out of range for list of size {len(current)}")
473
+ except (ValueError, IndexError) as e:
474
+ raise ValueError(f"Invalid path: cannot assign to index '{last_key}' in list") from e
475
+ else:
476
+ raise ValueError(f"Cannot assign value to non-container type {type(current)} at '{key}'")
477
+
478
+ def get_llm_configuration(self, company_short_name: str):
479
+ """
480
+ Convenience helper to obtain the 'llm' configuration block for a company.
481
+ Kept separate from get_configuration() to avoid coupling tests that
482
+ assert the number of calls to get_configuration().
483
+ """
484
+ default_llm_model = None
485
+ available_llm_models = []
486
+ self._ensure_config_loaded(company_short_name)
487
+ llm_config = self._loaded_configs[company_short_name].get("llm")
488
+ if llm_config:
489
+ default_llm_model = llm_config.get("model")
490
+ available_llm_models = llm_config.get('available_models') or []
491
+
492
+ # fallback: if no explicit list of models is provided, use the default model
493
+ if not available_llm_models and default_llm_model:
494
+ available_llm_models = [{
495
+ "id": default_llm_model,
496
+ "label": default_llm_model,
497
+ "description": "Modelo por defecto configurado para esta compañía."
498
+ }]
499
+ return default_llm_model, available_llm_models
500
+
501
+
502
+ def _load_and_merge_configs(self, company_short_name: str) -> dict:
503
+ """
504
+ Loads the main company.yaml and merges data from supplementary files
505
+ specified in the 'content_files' section using AssetRepository.
506
+ """
507
+ main_config_filename = "company.yaml"
508
+
509
+ # verify existence of the main configuration file
510
+ if not self.asset_repo.exists(company_short_name, AssetType.CONFIG, main_config_filename):
511
+ # raise FileNotFoundError(f"Main configuration file not found: {main_config_filename}")
512
+ logging.exception(f"Main configuration file not found: {main_config_filename}")
513
+
514
+ # return the minimal configuration needed for starting the IAToolkit
515
+ # this is a for solving a chicken/egg problem when trying to migrate the configuration
516
+ # from filesystem to database in enterprise installation
517
+ # see create_assets cli command in enterprise-iatoolkit)
518
+ return {
519
+ 'id': company_short_name,
520
+ 'name': company_short_name,
521
+ 'llm': {'model': 'gpt-5', 'provider_api_keys': {'openai':''} },
522
+ }
523
+
524
+ # read text and parse
525
+ yaml_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, main_config_filename)
526
+ config = self.utility.load_yaml_from_string(yaml_content)
527
+ if not config:
528
+ return {}
529
+
530
+ # Load and merge supplementary content files (e.g., onboarding_cards)
531
+ for key, filename in config.get('help_files', {}).items():
532
+ if self.asset_repo.exists(company_short_name, AssetType.CONFIG, filename):
533
+ supp_content = self.asset_repo.read_text(company_short_name, AssetType.CONFIG, filename)
534
+ config[key] = self.utility.load_yaml_from_string(supp_content)
535
+ else:
536
+ logging.warning(f"⚠️ Warning: Content file not found: {filename}")
537
+ config[key] = None
538
+
539
+ return config
540
+
541
+ def _get_prompt_config(self, config):
542
+ prompts_config = config.get('prompts', {})
543
+ if isinstance(prompts_config, dict):
544
+ prompt_list = prompts_config.get('prompt_list', [])
545
+ categories_config = prompts_config.get('prompt_categories', [])
546
+ else:
547
+ prompt_list = config.get('prompts', [])
548
+ categories_config = config.get('prompt_categories', [])
549
+
550
+ return prompt_list, categories_config
551
+
@@ -87,9 +87,6 @@ class Dispatcher:
87
87
  # system tools registration
88
88
  self.tool_service.register_system_tools()
89
89
 
90
- # system prompts registration
91
- self.prompt_service.register_system_prompts()
92
-
93
90
  except Exception as e:
94
91
  self.llmquery_repo.rollback()
95
92
  raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))
@@ -308,7 +308,7 @@ class KnowledgeBaseService:
308
308
 
309
309
  # filter by collection
310
310
  if collection:
311
- query = query.join(CollectionType).filter(CollectionType.name == collection)
311
+ query = query.join(Document.collection_type).filter(CollectionType.name == collection)
312
312
 
313
313
  # Filter by user identifier
314
314
  if user_identifier:
@@ -403,6 +403,19 @@ class KnowledgeBaseService:
403
403
 
404
404
  session.commit()
405
405
 
406
+ def get_collection_names(self, company_short_name: str) -> List[str]:
407
+ """
408
+ Retrieves the names of all collections defined for a specific company.
409
+ """
410
+ company = self.profile_service.get_company_by_short_name(company_short_name)
411
+ if not company:
412
+ logging.warning(f"Company {company_short_name} not found when listing collections.")
413
+ return []
414
+
415
+ session = self.document_repo.session
416
+ collections = session.query(CollectionType).filter_by(company_id=company.id).all()
417
+ return [c.name for c in collections]
418
+
406
419
  def _get_collection_type_id(self, company_id: int, collection_name: str) -> Optional[int]:
407
420
  """Helper to get ID by name"""
408
421
  if not collection_name:
@@ -65,17 +65,24 @@ class LoadDocumentsService:
65
65
  logging.warning(f"Source '{source_name}' not found in configuration for company '{company.short_name}'. Skipping.")
66
66
  continue
67
67
 
68
+ collection = source_config.get('collection')
69
+ if not collection:
70
+ logging.warning(
71
+ f"Document Source '{source_name}' missing collection definition en company.yaml, Skipping.")
72
+ continue
73
+
68
74
  try:
69
- logging.info(f"Processing source '{source_name}' for company '{company.short_name}'...")
75
+ logging.info(f"company {company.short_name}: loading source '{source_name}' into collection '{collection}'...")
70
76
 
71
77
  # Combine the base connector configuration with the specific path from the source.
72
78
  full_connector_config = base_connector_config.copy()
73
79
  full_connector_config['path'] = source_config.get('path')
80
+ full_connector_config['folder'] = source_config.get('folder')
74
81
 
75
82
  # Prepare the context for the callback function.
76
83
  context = {
77
84
  'company': company,
78
- 'collection': source_config.get('metadata', {}).get('collection'),
85
+ 'collection': collection,
79
86
  'metadata': source_config.get('metadata', {})
80
87
  }
81
88
 
@@ -132,7 +139,7 @@ class LoadDocumentsService:
132
139
  company=company,
133
140
  filename=filename,
134
141
  content=content,
135
- collection=predefined_metadata.get('collection'),
142
+ collection=context.get('collection'),
136
143
  metadata=predefined_metadata
137
144
  )
138
145