iatoolkit 1.7.0__py3-none-any.whl → 1.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. iatoolkit/__init__.py +1 -1
  2. iatoolkit/common/routes.py +16 -3
  3. iatoolkit/common/util.py +8 -123
  4. iatoolkit/core.py +1 -0
  5. iatoolkit/infra/connectors/file_connector.py +10 -2
  6. iatoolkit/infra/connectors/google_drive_connector.py +3 -0
  7. iatoolkit/infra/connectors/local_file_connector.py +3 -0
  8. iatoolkit/infra/connectors/s3_connector.py +24 -1
  9. iatoolkit/infra/llm_providers/deepseek_adapter.py +17 -1
  10. iatoolkit/infra/llm_providers/gemini_adapter.py +117 -18
  11. iatoolkit/infra/llm_providers/openai_adapter.py +175 -18
  12. iatoolkit/infra/llm_response.py +13 -0
  13. iatoolkit/locales/en.yaml +82 -4
  14. iatoolkit/locales/es.yaml +79 -4
  15. iatoolkit/repositories/llm_query_repo.py +51 -18
  16. iatoolkit/repositories/models.py +16 -7
  17. iatoolkit/services/company_context_service.py +294 -133
  18. iatoolkit/services/configuration_service.py +140 -121
  19. iatoolkit/services/dispatcher_service.py +1 -4
  20. iatoolkit/services/knowledge_base_service.py +26 -4
  21. iatoolkit/services/llm_client_service.py +58 -2
  22. iatoolkit/services/prompt_service.py +251 -164
  23. iatoolkit/services/query_service.py +37 -18
  24. iatoolkit/services/storage_service.py +92 -0
  25. iatoolkit/static/js/chat_filepond.js +188 -63
  26. iatoolkit/static/js/chat_main.js +105 -52
  27. iatoolkit/static/styles/chat_iatoolkit.css +96 -0
  28. iatoolkit/system_prompts/query_main.prompt +24 -41
  29. iatoolkit/templates/chat.html +15 -6
  30. iatoolkit/views/base_login_view.py +1 -1
  31. iatoolkit/views/categories_api_view.py +111 -0
  32. iatoolkit/views/chat_view.py +1 -1
  33. iatoolkit/views/configuration_api_view.py +1 -1
  34. iatoolkit/views/login_view.py +1 -1
  35. iatoolkit/views/prompt_api_view.py +88 -7
  36. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/METADATA +1 -1
  37. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/RECORD +41 -39
  38. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/WHEEL +0 -0
  39. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE +0 -0
  40. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
  41. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,19 @@ import enum
17
17
  class Base(DeclarativeBase):
18
18
  pass
19
19
 
20
+
21
+ class DocumentStatus(str, enum.Enum):
22
+ PENDING = "pending"
23
+ PROCESSING = "processing"
24
+ ACTIVE = "active"
25
+ FAILED = "failed"
26
+
27
+ class PromptType(str, enum.Enum):
28
+ SYSTEM = "system"
29
+ COMPANY = "company"
30
+ AGENT = "agent"
31
+
32
+
20
33
  # relation table for many-to-many relationship between companies and users
21
34
  user_company = Table('iat_user_company',
22
35
  Base.metadata,
@@ -149,11 +162,6 @@ class Tool(Base):
149
162
  return {column.key: getattr(self, column.key) for column in class_mapper(self.__class__).columns}
150
163
 
151
164
 
152
- class DocumentStatus(str, enum.Enum):
153
- PENDING = "pending"
154
- PROCESSING = "processing"
155
- ACTIVE = "active"
156
- FAILED = "failed"
157
165
 
158
166
 
159
167
  class CollectionType(Base):
@@ -290,12 +298,13 @@ class Prompt(Base):
290
298
  description = Column(String, nullable=False)
291
299
  filename = Column(String, nullable=False)
292
300
  active = Column(Boolean, default=True)
293
- is_system_prompt = Column(Boolean, default=False)
301
+ prompt_type = Column(String, default=PromptType.COMPANY.value, nullable=False)
294
302
  order = Column(Integer, nullable=True, default=0)
295
303
  category_id = Column(Integer, ForeignKey('iat_prompt_categories.id'), nullable=True)
296
304
  custom_fields = Column(JSON, nullable=False, default=[])
297
-
298
305
  created_at = Column(DateTime, default=datetime.now)
306
+ def to_dict(self):
307
+ return {column.key: getattr(self, column.key) for column in class_mapper(self.__class__).columns}
299
308
 
300
309
  company = relationship("Company", back_populates="prompts")
301
310
  category = relationship("PromptCategory", back_populates="prompts")
@@ -7,9 +7,11 @@ from iatoolkit.common.util import Utility
7
7
  from iatoolkit.services.configuration_service import ConfigurationService
8
8
  from iatoolkit.common.interfaces.asset_storage import AssetRepository, AssetType
9
9
  from iatoolkit.services.sql_service import SqlService
10
- from iatoolkit.common.exceptions import IAToolkitException
11
10
  import logging
11
+ import yaml
12
12
  from injector import inject
13
+ from typing import List, Dict
14
+ import os
13
15
 
14
16
 
15
17
  class CompanyContextService:
@@ -46,8 +48,9 @@ class CompanyContextService:
46
48
  logging.warning(f"Could not load Markdown context for '{company_short_name}': {e}")
47
49
 
48
50
  # 2. Context from company-specific SQL databases
51
+ db_tables = []
49
52
  try:
50
- sql_context = self._get_sql_schema_context(company_short_name)
53
+ sql_context, db_tables = self._get_sql_enriched_context(company_short_name)
51
54
  if sql_context:
52
55
  context_parts.append(sql_context)
53
56
  except Exception as e:
@@ -55,7 +58,7 @@ class CompanyContextService:
55
58
 
56
59
  # 3. Context from yaml (schema/*.yaml) files
57
60
  try:
58
- yaml_schema_context = self._get_yaml_schema_context(company_short_name)
61
+ yaml_schema_context = self._get_yaml_schema_context(company_short_name, db_tables)
59
62
  if yaml_schema_context:
60
63
  context_parts.append(yaml_schema_context)
61
64
  except Exception as e:
@@ -64,149 +67,96 @@ class CompanyContextService:
64
67
  # Join all parts with a clear separator
65
68
  return "\n\n---\n\n".join(context_parts)
66
69
 
67
- def _get_static_file_context(self, company_short_name: str) -> str:
68
- # Get context from .md files using the repository
69
- static_context = ''
70
70
 
71
- try:
72
- # 1. List markdown files in the context "folder"
73
- # Note: The repo handles where this folder actually is (FS or DB)
74
- md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
75
-
76
- for filename in md_files:
77
- try:
78
- # 2. Read content
79
- content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
80
- static_context += content + "\n" # Append content
81
- except Exception as e:
82
- logging.warning(f"Error reading context file {filename}: {e}")
83
-
84
- except Exception as e:
85
- # If listing fails (e.g. folder doesn't exist), just log and return empty
86
- logging.warning(f"Error listing context files for {company_short_name}: {e}")
87
-
88
- return static_context
89
-
90
- def _get_sql_schema_context(self, company_short_name: str) -> str:
71
+ def _get_sql_enriched_context(self, company_short_name: str):
91
72
  """
92
- Generates the SQL schema context by inspecting live database connections
93
- based on the flexible company.yaml configuration.
94
- It supports including all tables and providing specific overrides for a subset of them.
73
+ Generates the SQL context for the LLM using the enriched schema logic.
74
+ It iterates over configured databases, fetches their enriched structure,
75
+ and formats it into a prompt-friendly string.
95
76
  """
96
77
  data_sources_config = self.config_service.get_configuration(company_short_name, 'data_sources')
97
78
  if not data_sources_config or not data_sources_config.get('sql'):
98
- return ''
79
+ return '', []
80
+
81
+ context_output = []
82
+ db_tables=[]
99
83
 
100
- sql_context = ''
101
84
  for source in data_sources_config.get('sql', []):
102
85
  db_name = source.get('database')
103
86
  if not db_name:
104
87
  continue
105
88
 
106
- # get database schema definition, for this source.
107
- database_schema_name = source.get('schema', 'public')
108
-
109
89
  try:
110
- # 1. Get the full database structure at once using the SQL service
111
- db_structure = self.sql_service.get_database_structure(company_short_name, db_name)
112
- except IAToolkitException as e:
113
- logging.warning(f"Could not get DB structure for '{db_name}': {e}")
114
- continue
90
+ # 1. Get the Enriched Schema (Physical + YAML)
91
+ enriched_structure = self.get_enriched_database_schema(company_short_name, db_name)
92
+ if not enriched_structure:
93
+ continue
94
+
95
+ # 2. Build Header for this Database
96
+ db_context = f"***Database (`database_key`)***: {db_name}\n"
97
+
98
+ # Optional: Add DB description from config if available (useful context)
99
+ db_desc = source.get('description', '')
100
+ if db_desc:
101
+ db_context += f"**Description:** {db_desc}\n"
102
+
103
+ db_context += (
104
+ f"IMPORTANT: To query this database you MUST use the service/tool "
105
+ f"**iat_sql_query**, with `database_key='{db_name}'`.\n"
106
+ )
115
107
 
116
- db_description = source.get('description', '')
117
- sql_context += f"***Database (`database_key`)***: {db_name}\n"
108
+ # 3. Format Tables
109
+ for table_name, table_data in enriched_structure.items():
110
+ table_desc = table_data.get('description', '')
111
+ columns = table_data.get('columns', [])
118
112
 
119
- if db_description:
120
- sql_context += (
121
- f"**Description:** : {db_description}\n"
122
- )
113
+ # Table Header
114
+ table_str = f"\nTable: **{table_name}**"
115
+ if table_desc:
116
+ table_str += f"\nDescription: {table_desc}"
123
117
 
124
- sql_context += (
125
- f"IMPORTANT: To query this database you MUST use the service/tool "
126
- f"**iat_sql_query**, with `database_key={db_name}`.\n"
127
- )
128
-
129
- sql_context += (
130
- f"IMPORTANT: The value of **database_key** is ALWAYS the literal string "
131
- f"'{db_name}'. Do not invent or infer alternative names. "
132
- f"Use exactly: `database_key='{db_name}'`.\n"
133
- )
134
-
135
- # 2. get the list of tables to process based on structure and config
136
- tables_to_process = []
137
- if source.get('include_all_tables', False):
138
- # Use keys from the fetched structure
139
- all_tables = list(db_structure.keys())
140
- tables_to_exclude = set(source.get('exclude_tables', []))
141
- tables_to_process = [t for t in all_tables if t not in tables_to_exclude]
142
- elif 'tables' in source:
143
- # Use keys from the config map, but check if they exist in DB structure
144
- config_tables = list(source['tables'].keys())
145
- tables_to_process = [t for t in config_tables if t in db_structure]
146
-
147
- # 3. get the global settings and overrides.
148
- global_exclude_columns = source.get('exclude_columns', [])
149
- table_prefix = source.get('table_prefix')
150
- table_overrides = source.get('tables', {})
151
-
152
- # 4. iterate over the tables.
153
- for table_name in tables_to_process:
154
- try:
155
- table_data = db_structure[table_name]
156
-
157
- # 5. get the table specific configuration.
158
- table_config = table_overrides.get(table_name, {})
159
-
160
- # 6. define the schema object name, using the override if it exists.
161
- # Priority 1: Explicit override from the 'tables' map.
162
- schema_object_name = table_config.get('schema_name')
163
-
164
- if not schema_object_name:
165
- # Priority 3: Automatic prefix stripping.
166
- if table_prefix and table_name.startswith(table_prefix):
167
- schema_object_name = table_name[len(table_prefix):]
168
- else:
169
- # Priority 4: Default to the table name itself.
170
- schema_object_name = table_name
171
-
172
- # 7. define the list of columns to exclude, (local vs. global).
173
- local_exclude_columns = table_config.get('exclude_columns')
174
- final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
175
-
176
- # 8. Build the table definition dictionary manually using the structure data
177
- json_dict = {
178
- "table": table_name,
179
- "schema": database_schema_name,
180
- "description": f"The table belongs to the **`{database_schema_name}`** schema.",
181
- "fields": []
182
- }
183
-
184
- if schema_object_name:
185
- json_dict["description"] += (
186
- f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
187
- )
188
-
189
- for col in table_data.get('columns', []):
190
- name = col["name"]
191
- if name in final_exclude_columns:
192
- continue
193
-
194
- json_dict["fields"].append({
195
- "name": name,
196
- "type": col["type"]
197
- })
198
-
199
- # Append as string representation of dict (consistent with previous behavior)
200
- sql_context += "\n\n" + str(json_dict)
201
-
202
- except (KeyError, RuntimeError) as e:
203
- logging.warning(f"Could not generate schema for table '{table_name}': {e}")
204
-
205
- if sql_context:
206
- sql_context = "These are the SQL databases you can query using the **`iat_sql_service`**: \n" + sql_context
207
- return sql_context
208
-
209
- def _get_yaml_schema_context(self, company_short_name: str) -> str:
118
+ table_str += "\nColumns:"
119
+
120
+ # Format Columns
121
+ for col in columns:
122
+ col_name = col.get('name')
123
+ col_type = col.get('type', 'unknown')
124
+ col_desc = col.get('description', '')
125
+ col_props = col.get('properties') # Nested JSONB structure
126
+
127
+ col_line = f"\n - `{col_name}` ({col_type})"
128
+ if col_desc:
129
+ col_line += f": {col_desc}"
130
+
131
+ table_str += col_line
132
+
133
+ # If it has nested properties (JSONB enriched from YAML), format them
134
+ if col_props:
135
+ table_str += "\n"
136
+ table_str += self._format_json_schema(col_props, 2) # Indent level 2
137
+
138
+ db_context += table_str
139
+
140
+ # collect the table names for later use
141
+ db_tables.append(
142
+ {'db_name': db_name,
143
+ 'table_name': table_name,
144
+ }
145
+ )
146
+
147
+ context_output.append(db_context)
148
+
149
+ except Exception as e:
150
+ logging.warning(f"Could not generate enriched SQL context for '{db_name}': {e}")
151
+
152
+ if not context_output:
153
+ return "", []
154
+
155
+ header = "These are the SQL databases you can query using the **`iat_sql_service`**. The schema below includes enriched metadata:\n"
156
+ return header + "\n\n---\n\n".join(context_output), db_tables
157
+
158
+
159
+ def _get_yaml_schema_context(self, company_short_name: str, db_tables: List[Dict]) -> str:
210
160
  # Get context from .yaml schema files using the repository
211
161
  yaml_schema_context = ''
212
162
 
@@ -215,6 +165,18 @@ class CompanyContextService:
215
165
  schema_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA, extension='.yaml')
216
166
 
217
167
  for filename in schema_files:
168
+ # skip tables that are already in the SQL context
169
+ if '-' in filename:
170
+ dbname, f = filename.split("-", 1)
171
+ table_name = f.split('.')[0]
172
+
173
+ exists = any(
174
+ item["db_name"] == dbname and item["table_name"] == table_name
175
+ for item in db_tables
176
+ )
177
+ if exists:
178
+ continue
179
+
218
180
  try:
219
181
  # 2. Read content
220
182
  content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, filename)
@@ -225,7 +187,7 @@ class CompanyContextService:
225
187
  # 4. Generate markdown description from the dict
226
188
  if schema_dict:
227
189
  # We use generate_schema_table which accepts a dict directly
228
- yaml_schema_context += self.utility.generate_schema_table(schema_dict)
190
+ yaml_schema_context += self.generate_schema_table(schema_dict)
229
191
 
230
192
  except Exception as e:
231
193
  logging.warning(f"Error processing schema file {filename}: {e}")
@@ -233,4 +195,203 @@ class CompanyContextService:
233
195
  except Exception as e:
234
196
  logging.warning(f"Error listing schema files for {company_short_name}: {e}")
235
197
 
236
- return yaml_schema_context
198
+ return yaml_schema_context
199
+
200
+ def generate_schema_table(self, schema: dict) -> str:
201
+ if not schema or not isinstance(schema, dict):
202
+ return ""
203
+
204
+ # root detection
205
+ keys = list(schema.keys())
206
+ if not keys:
207
+ return ""
208
+
209
+ root_name = keys[0]
210
+ root_data = schema[root_name]
211
+ output = [f"\n### Objeto: `{root_name}`"]
212
+
213
+ # table description
214
+ root_description = root_data.get('description', '')
215
+ if root_description:
216
+ clean_desc = root_description.replace('\n', ' ').strip()
217
+ output.append(f"##Descripción: {clean_desc}")
218
+
219
+ # extract columns and properties from the root object
220
+ # priority: columns > properties > fields
221
+ properties = root_data.get('columns', root_data.get('properties', {}))
222
+ if properties:
223
+ output.append("**Estructura de Datos:**")
224
+
225
+ # use indent_level 0 for the main columns
226
+ # call recursive function to format the properties
227
+ output.append(self._format_json_schema(properties, 0))
228
+ else:
229
+ output.append("\n_Sin definición de estructura._")
230
+
231
+ return "\n".join(output)
232
+
233
+ def _format_json_schema(self, properties: dict, indent_level: int) -> str:
234
+ output = []
235
+ indent_str = ' ' * indent_level
236
+
237
+ if not isinstance(properties, dict):
238
+ return ""
239
+
240
+ for name, details in properties.items():
241
+ if not isinstance(details, dict): continue
242
+
243
+ description = details.get('description', '')
244
+ data_type = details.get('type', 'any')
245
+
246
+ # NORMALIZACIÓN VISUAL: jsonb -> object
247
+ if data_type and data_type.lower() == 'jsonb':
248
+ data_type = 'object'
249
+
250
+ line = f"{indent_str}- **`{name}`**"
251
+ if data_type:
252
+ line += f" ({data_type})"
253
+ if description:
254
+ clean_desc = description.replace('\n', ' ').strip()
255
+ line += f": {clean_desc}"
256
+
257
+ output.append(line)
258
+
259
+ # Recursividad: buscar hijos en 'properties', 'fields' o 'columns'
260
+ children = details.get('properties', details.get('fields'))
261
+
262
+ # Caso Array (items -> properties)
263
+ if not children and details.get('items'):
264
+ items = details['items']
265
+ if isinstance(items, dict):
266
+ if items.get('description'):
267
+ output.append(f"{indent_str} _Items: {items['description']}_")
268
+ children = items.get('properties', items.get('fields'))
269
+
270
+ if children:
271
+ output.append(self._format_json_schema(children, indent_level + 1))
272
+
273
+ return "\n".join(output)
274
+
275
+
276
+ def _get_static_file_context(self, company_short_name: str) -> str:
277
+ # Get context from .md files using the repository
278
+ static_context = ''
279
+
280
+ try:
281
+ # 1. List markdown files in the context "folder"
282
+ # Note: The repo handles where this folder actually is (FS or DB)
283
+ md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
284
+
285
+ for filename in md_files:
286
+ try:
287
+ # 2. Read content
288
+ content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
289
+ static_context += content + "\n" # Append content
290
+ except Exception as e:
291
+ logging.warning(f"Error reading context file {filename}: {e}")
292
+
293
+ except Exception as e:
294
+ # If listing fails (e.g. folder doesn't exist), just log and return empty
295
+ logging.warning(f"Error listing context files for {company_short_name}: {e}")
296
+
297
+ return static_context
298
+
299
+ def get_enriched_database_schema(self, company_short_name: str, db_name: str) -> dict:
300
+ """
301
+ Retrieves the physical database structure and enriches it with metadata
302
+ found in the AssetRepository (YAML files).
303
+ """
304
+ try:
305
+ # 1. Physical Structure (Real Source)
306
+ structure = self.sql_service.get_database_structure(company_short_name, db_name)
307
+
308
+ # 2. YAML files
309
+ available_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA)
310
+ files_map = {}
311
+ for f in available_files:
312
+ clean = f.lower().replace('.yaml', '').replace('.yml', '')
313
+ if '-' not in clean:
314
+ continue # skip non-table files
315
+
316
+ dbname, table = clean.split("-", 1)
317
+ # filter by the database
318
+ if dbname != db_name:
319
+ continue
320
+ files_map[table] = f
321
+
322
+ logging.debug(f"🔍 [CompanyContextService] Enriching schema for {db_name}. Files found: {len(files_map)}")
323
+
324
+ # 3. fusion between physical structure and YAML files
325
+ for table_name, table_data in structure.items():
326
+ t_name = table_name.lower().strip()
327
+
328
+ real_filename = files_map.get(t_name)
329
+ if not real_filename:
330
+ continue
331
+
332
+ try:
333
+ content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, real_filename)
334
+ if not content:
335
+ continue
336
+
337
+ meta = yaml.safe_load(content) or {}
338
+
339
+ # detect root, usually table name
340
+ root_data = meta.get(table_name) or meta.get(t_name)
341
+ if not root_data and len(meta) == 1:
342
+ root_data = list(meta.values())[0]
343
+
344
+ if not root_data:
345
+ continue
346
+
347
+ # A. Table description
348
+ if 'description' in root_data:
349
+ table_data['description'] = root_data['description']
350
+
351
+ # B. get the map of columns from the YAML
352
+ yaml_cols = root_data.get('columns', root_data.get('fields', {}))
353
+
354
+ # --- LEGACY ADAPTER: List -> Dictionary ---
355
+ if isinstance(yaml_cols, list):
356
+ temp_map = {}
357
+ for c in yaml_cols:
358
+ if isinstance(c, dict) and 'name' in c:
359
+ col_name = c['name']
360
+ temp_map[col_name] = c
361
+ yaml_cols = temp_map
362
+ # --------------------------------------------
363
+
364
+ if isinstance(yaml_cols, dict):
365
+ # map in lower case for lookup
366
+ y_cols_lower = {str(k).lower(): v for k, v in yaml_cols.items()}
367
+
368
+ # Iterate over columns
369
+ for col in table_data.get('columns', []):
370
+ c_name = str(col['name']).lower() # Real DB Name
371
+
372
+ if c_name in y_cols_lower:
373
+ y_col = y_cols_lower[c_name]
374
+
375
+ # copy the basic metadata from database
376
+ if y_col.get('description'): col['description'] = y_col['description']
377
+ if y_col.get('pii'): col['pii'] = y_col['pii']
378
+ if y_col.get('synonyms'): col['synonyms'] = y_col['synonyms']
379
+
380
+ # C. inject the json schema from the YAML
381
+ props = y_col.get('properties')
382
+ if props:
383
+ col['properties'] = props
384
+ else:
385
+ if yaml_cols:
386
+ logging.warning(f"⚠️ [CompanyContextService] Unrecognized column format in {real_filename}")
387
+
388
+ except Exception as e:
389
+ logging.error(f"❌ Error processing schema file {real_filename}: {e}")
390
+
391
+ return structure
392
+
393
+ except Exception as e:
394
+ logging.exception(f"Error generating enriched schema for {db_name}")
395
+ # Depending on policy, re-raise or return empty structure
396
+ raise e
397
+