iatoolkit 1.9.0__py3-none-any.whl → 1.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +1 -1
- iatoolkit/common/routes.py +1 -1
- iatoolkit/common/util.py +8 -123
- iatoolkit/core.py +1 -0
- iatoolkit/infra/connectors/file_connector.py +10 -2
- iatoolkit/infra/connectors/google_drive_connector.py +3 -0
- iatoolkit/infra/connectors/local_file_connector.py +3 -0
- iatoolkit/infra/connectors/s3_connector.py +24 -1
- iatoolkit/infra/llm_providers/deepseek_adapter.py +17 -1
- iatoolkit/infra/llm_providers/gemini_adapter.py +117 -18
- iatoolkit/infra/llm_providers/openai_adapter.py +175 -18
- iatoolkit/infra/llm_response.py +13 -0
- iatoolkit/locales/en.yaml +47 -2
- iatoolkit/locales/es.yaml +45 -1
- iatoolkit/repositories/llm_query_repo.py +44 -33
- iatoolkit/services/company_context_service.py +294 -133
- iatoolkit/services/dispatcher_service.py +1 -1
- iatoolkit/services/knowledge_base_service.py +26 -4
- iatoolkit/services/llm_client_service.py +58 -2
- iatoolkit/services/prompt_service.py +236 -330
- iatoolkit/services/query_service.py +37 -18
- iatoolkit/services/storage_service.py +92 -0
- iatoolkit/static/js/chat_filepond.js +188 -63
- iatoolkit/static/js/chat_main.js +105 -52
- iatoolkit/static/styles/chat_iatoolkit.css +96 -0
- iatoolkit/system_prompts/query_main.prompt +24 -41
- iatoolkit/templates/chat.html +15 -6
- iatoolkit/views/base_login_view.py +1 -1
- iatoolkit/views/categories_api_view.py +43 -3
- iatoolkit/views/chat_view.py +1 -1
- iatoolkit/views/login_view.py +1 -1
- iatoolkit/views/prompt_api_view.py +1 -1
- {iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/METADATA +1 -1
- {iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/RECORD +38 -37
- {iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/WHEEL +0 -0
- {iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE +0 -0
- {iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
- {iatoolkit-1.9.0.dist-info → iatoolkit-1.15.3.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,11 @@ from iatoolkit.common.util import Utility
|
|
|
7
7
|
from iatoolkit.services.configuration_service import ConfigurationService
|
|
8
8
|
from iatoolkit.common.interfaces.asset_storage import AssetRepository, AssetType
|
|
9
9
|
from iatoolkit.services.sql_service import SqlService
|
|
10
|
-
from iatoolkit.common.exceptions import IAToolkitException
|
|
11
10
|
import logging
|
|
11
|
+
import yaml
|
|
12
12
|
from injector import inject
|
|
13
|
+
from typing import List, Dict
|
|
14
|
+
import os
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class CompanyContextService:
|
|
@@ -46,8 +48,9 @@ class CompanyContextService:
|
|
|
46
48
|
logging.warning(f"Could not load Markdown context for '{company_short_name}': {e}")
|
|
47
49
|
|
|
48
50
|
# 2. Context from company-specific SQL databases
|
|
51
|
+
db_tables = []
|
|
49
52
|
try:
|
|
50
|
-
sql_context = self.
|
|
53
|
+
sql_context, db_tables = self._get_sql_enriched_context(company_short_name)
|
|
51
54
|
if sql_context:
|
|
52
55
|
context_parts.append(sql_context)
|
|
53
56
|
except Exception as e:
|
|
@@ -55,7 +58,7 @@ class CompanyContextService:
|
|
|
55
58
|
|
|
56
59
|
# 3. Context from yaml (schema/*.yaml) files
|
|
57
60
|
try:
|
|
58
|
-
yaml_schema_context = self._get_yaml_schema_context(company_short_name)
|
|
61
|
+
yaml_schema_context = self._get_yaml_schema_context(company_short_name, db_tables)
|
|
59
62
|
if yaml_schema_context:
|
|
60
63
|
context_parts.append(yaml_schema_context)
|
|
61
64
|
except Exception as e:
|
|
@@ -64,149 +67,96 @@ class CompanyContextService:
|
|
|
64
67
|
# Join all parts with a clear separator
|
|
65
68
|
return "\n\n---\n\n".join(context_parts)
|
|
66
69
|
|
|
67
|
-
def _get_static_file_context(self, company_short_name: str) -> str:
|
|
68
|
-
# Get context from .md files using the repository
|
|
69
|
-
static_context = ''
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
# 1. List markdown files in the context "folder"
|
|
73
|
-
# Note: The repo handles where this folder actually is (FS or DB)
|
|
74
|
-
md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
|
|
75
|
-
|
|
76
|
-
for filename in md_files:
|
|
77
|
-
try:
|
|
78
|
-
# 2. Read content
|
|
79
|
-
content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
|
|
80
|
-
static_context += content + "\n" # Append content
|
|
81
|
-
except Exception as e:
|
|
82
|
-
logging.warning(f"Error reading context file {filename}: {e}")
|
|
83
|
-
|
|
84
|
-
except Exception as e:
|
|
85
|
-
# If listing fails (e.g. folder doesn't exist), just log and return empty
|
|
86
|
-
logging.warning(f"Error listing context files for {company_short_name}: {e}")
|
|
87
|
-
|
|
88
|
-
return static_context
|
|
89
|
-
|
|
90
|
-
def _get_sql_schema_context(self, company_short_name: str) -> str:
|
|
71
|
+
def _get_sql_enriched_context(self, company_short_name: str):
|
|
91
72
|
"""
|
|
92
|
-
Generates the SQL
|
|
93
|
-
|
|
94
|
-
|
|
73
|
+
Generates the SQL context for the LLM using the enriched schema logic.
|
|
74
|
+
It iterates over configured databases, fetches their enriched structure,
|
|
75
|
+
and formats it into a prompt-friendly string.
|
|
95
76
|
"""
|
|
96
77
|
data_sources_config = self.config_service.get_configuration(company_short_name, 'data_sources')
|
|
97
78
|
if not data_sources_config or not data_sources_config.get('sql'):
|
|
98
|
-
return ''
|
|
79
|
+
return '', []
|
|
80
|
+
|
|
81
|
+
context_output = []
|
|
82
|
+
db_tables=[]
|
|
99
83
|
|
|
100
|
-
sql_context = ''
|
|
101
84
|
for source in data_sources_config.get('sql', []):
|
|
102
85
|
db_name = source.get('database')
|
|
103
86
|
if not db_name:
|
|
104
87
|
continue
|
|
105
88
|
|
|
106
|
-
# get database schema definition, for this source.
|
|
107
|
-
database_schema_name = source.get('schema', 'public')
|
|
108
|
-
|
|
109
89
|
try:
|
|
110
|
-
# 1. Get the
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
90
|
+
# 1. Get the Enriched Schema (Physical + YAML)
|
|
91
|
+
enriched_structure = self.get_enriched_database_schema(company_short_name, db_name)
|
|
92
|
+
if not enriched_structure:
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
# 2. Build Header for this Database
|
|
96
|
+
db_context = f"***Database (`database_key`)***: {db_name}\n"
|
|
97
|
+
|
|
98
|
+
# Optional: Add DB description from config if available (useful context)
|
|
99
|
+
db_desc = source.get('description', '')
|
|
100
|
+
if db_desc:
|
|
101
|
+
db_context += f"**Description:** {db_desc}\n"
|
|
102
|
+
|
|
103
|
+
db_context += (
|
|
104
|
+
f"IMPORTANT: To query this database you MUST use the service/tool "
|
|
105
|
+
f"**iat_sql_query**, with `database_key='{db_name}'`.\n"
|
|
106
|
+
)
|
|
115
107
|
|
|
116
|
-
|
|
117
|
-
|
|
108
|
+
# 3. Format Tables
|
|
109
|
+
for table_name, table_data in enriched_structure.items():
|
|
110
|
+
table_desc = table_data.get('description', '')
|
|
111
|
+
columns = table_data.get('columns', [])
|
|
118
112
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
113
|
+
# Table Header
|
|
114
|
+
table_str = f"\nTable: **{table_name}**"
|
|
115
|
+
if table_desc:
|
|
116
|
+
table_str += f"\nDescription: {table_desc}"
|
|
123
117
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
if table_prefix and table_name.startswith(table_prefix):
|
|
167
|
-
schema_object_name = table_name[len(table_prefix):]
|
|
168
|
-
else:
|
|
169
|
-
# Priority 4: Default to the table name itself.
|
|
170
|
-
schema_object_name = table_name
|
|
171
|
-
|
|
172
|
-
# 7. define the list of columns to exclude, (local vs. global).
|
|
173
|
-
local_exclude_columns = table_config.get('exclude_columns')
|
|
174
|
-
final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
|
|
175
|
-
|
|
176
|
-
# 8. Build the table definition dictionary manually using the structure data
|
|
177
|
-
json_dict = {
|
|
178
|
-
"table": table_name,
|
|
179
|
-
"schema": database_schema_name,
|
|
180
|
-
"description": f"The table belongs to the **`{database_schema_name}`** schema.",
|
|
181
|
-
"fields": []
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
if schema_object_name:
|
|
185
|
-
json_dict["description"] += (
|
|
186
|
-
f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
for col in table_data.get('columns', []):
|
|
190
|
-
name = col["name"]
|
|
191
|
-
if name in final_exclude_columns:
|
|
192
|
-
continue
|
|
193
|
-
|
|
194
|
-
json_dict["fields"].append({
|
|
195
|
-
"name": name,
|
|
196
|
-
"type": col["type"]
|
|
197
|
-
})
|
|
198
|
-
|
|
199
|
-
# Append as string representation of dict (consistent with previous behavior)
|
|
200
|
-
sql_context += "\n\n" + str(json_dict)
|
|
201
|
-
|
|
202
|
-
except (KeyError, RuntimeError) as e:
|
|
203
|
-
logging.warning(f"Could not generate schema for table '{table_name}': {e}")
|
|
204
|
-
|
|
205
|
-
if sql_context:
|
|
206
|
-
sql_context = "These are the SQL databases you can query using the **`iat_sql_service`**: \n" + sql_context
|
|
207
|
-
return sql_context
|
|
208
|
-
|
|
209
|
-
def _get_yaml_schema_context(self, company_short_name: str) -> str:
|
|
118
|
+
table_str += "\nColumns:"
|
|
119
|
+
|
|
120
|
+
# Format Columns
|
|
121
|
+
for col in columns:
|
|
122
|
+
col_name = col.get('name')
|
|
123
|
+
col_type = col.get('type', 'unknown')
|
|
124
|
+
col_desc = col.get('description', '')
|
|
125
|
+
col_props = col.get('properties') # Nested JSONB structure
|
|
126
|
+
|
|
127
|
+
col_line = f"\n - `{col_name}` ({col_type})"
|
|
128
|
+
if col_desc:
|
|
129
|
+
col_line += f": {col_desc}"
|
|
130
|
+
|
|
131
|
+
table_str += col_line
|
|
132
|
+
|
|
133
|
+
# If it has nested properties (JSONB enriched from YAML), format them
|
|
134
|
+
if col_props:
|
|
135
|
+
table_str += "\n"
|
|
136
|
+
table_str += self._format_json_schema(col_props, 2) # Indent level 2
|
|
137
|
+
|
|
138
|
+
db_context += table_str
|
|
139
|
+
|
|
140
|
+
# collect the table names for later use
|
|
141
|
+
db_tables.append(
|
|
142
|
+
{'db_name': db_name,
|
|
143
|
+
'table_name': table_name,
|
|
144
|
+
}
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
context_output.append(db_context)
|
|
148
|
+
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logging.warning(f"Could not generate enriched SQL context for '{db_name}': {e}")
|
|
151
|
+
|
|
152
|
+
if not context_output:
|
|
153
|
+
return "", []
|
|
154
|
+
|
|
155
|
+
header = "These are the SQL databases you can query using the **`iat_sql_service`**. The schema below includes enriched metadata:\n"
|
|
156
|
+
return header + "\n\n---\n\n".join(context_output), db_tables
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _get_yaml_schema_context(self, company_short_name: str, db_tables: List[Dict]) -> str:
|
|
210
160
|
# Get context from .yaml schema files using the repository
|
|
211
161
|
yaml_schema_context = ''
|
|
212
162
|
|
|
@@ -215,6 +165,18 @@ class CompanyContextService:
|
|
|
215
165
|
schema_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA, extension='.yaml')
|
|
216
166
|
|
|
217
167
|
for filename in schema_files:
|
|
168
|
+
# skip tables that are already in the SQL context
|
|
169
|
+
if '-' in filename:
|
|
170
|
+
dbname, f = filename.split("-", 1)
|
|
171
|
+
table_name = f.split('.')[0]
|
|
172
|
+
|
|
173
|
+
exists = any(
|
|
174
|
+
item["db_name"] == dbname and item["table_name"] == table_name
|
|
175
|
+
for item in db_tables
|
|
176
|
+
)
|
|
177
|
+
if exists:
|
|
178
|
+
continue
|
|
179
|
+
|
|
218
180
|
try:
|
|
219
181
|
# 2. Read content
|
|
220
182
|
content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, filename)
|
|
@@ -225,7 +187,7 @@ class CompanyContextService:
|
|
|
225
187
|
# 4. Generate markdown description from the dict
|
|
226
188
|
if schema_dict:
|
|
227
189
|
# We use generate_schema_table which accepts a dict directly
|
|
228
|
-
yaml_schema_context += self.
|
|
190
|
+
yaml_schema_context += self.generate_schema_table(schema_dict)
|
|
229
191
|
|
|
230
192
|
except Exception as e:
|
|
231
193
|
logging.warning(f"Error processing schema file {filename}: {e}")
|
|
@@ -233,4 +195,203 @@ class CompanyContextService:
|
|
|
233
195
|
except Exception as e:
|
|
234
196
|
logging.warning(f"Error listing schema files for {company_short_name}: {e}")
|
|
235
197
|
|
|
236
|
-
return yaml_schema_context
|
|
198
|
+
return yaml_schema_context
|
|
199
|
+
|
|
200
|
+
def generate_schema_table(self, schema: dict) -> str:
|
|
201
|
+
if not schema or not isinstance(schema, dict):
|
|
202
|
+
return ""
|
|
203
|
+
|
|
204
|
+
# root detection
|
|
205
|
+
keys = list(schema.keys())
|
|
206
|
+
if not keys:
|
|
207
|
+
return ""
|
|
208
|
+
|
|
209
|
+
root_name = keys[0]
|
|
210
|
+
root_data = schema[root_name]
|
|
211
|
+
output = [f"\n### Objeto: `{root_name}`"]
|
|
212
|
+
|
|
213
|
+
# table description
|
|
214
|
+
root_description = root_data.get('description', '')
|
|
215
|
+
if root_description:
|
|
216
|
+
clean_desc = root_description.replace('\n', ' ').strip()
|
|
217
|
+
output.append(f"##Descripción: {clean_desc}")
|
|
218
|
+
|
|
219
|
+
# extract columns and properties from the root object
|
|
220
|
+
# priority: columns > properties > fields
|
|
221
|
+
properties = root_data.get('columns', root_data.get('properties', {}))
|
|
222
|
+
if properties:
|
|
223
|
+
output.append("**Estructura de Datos:**")
|
|
224
|
+
|
|
225
|
+
# use indent_level 0 for the main columns
|
|
226
|
+
# call recursive function to format the properties
|
|
227
|
+
output.append(self._format_json_schema(properties, 0))
|
|
228
|
+
else:
|
|
229
|
+
output.append("\n_Sin definición de estructura._")
|
|
230
|
+
|
|
231
|
+
return "\n".join(output)
|
|
232
|
+
|
|
233
|
+
def _format_json_schema(self, properties: dict, indent_level: int) -> str:
|
|
234
|
+
output = []
|
|
235
|
+
indent_str = ' ' * indent_level
|
|
236
|
+
|
|
237
|
+
if not isinstance(properties, dict):
|
|
238
|
+
return ""
|
|
239
|
+
|
|
240
|
+
for name, details in properties.items():
|
|
241
|
+
if not isinstance(details, dict): continue
|
|
242
|
+
|
|
243
|
+
description = details.get('description', '')
|
|
244
|
+
data_type = details.get('type', 'any')
|
|
245
|
+
|
|
246
|
+
# NORMALIZACIÓN VISUAL: jsonb -> object
|
|
247
|
+
if data_type and data_type.lower() == 'jsonb':
|
|
248
|
+
data_type = 'object'
|
|
249
|
+
|
|
250
|
+
line = f"{indent_str}- **`{name}`**"
|
|
251
|
+
if data_type:
|
|
252
|
+
line += f" ({data_type})"
|
|
253
|
+
if description:
|
|
254
|
+
clean_desc = description.replace('\n', ' ').strip()
|
|
255
|
+
line += f": {clean_desc}"
|
|
256
|
+
|
|
257
|
+
output.append(line)
|
|
258
|
+
|
|
259
|
+
# Recursividad: buscar hijos en 'properties', 'fields' o 'columns'
|
|
260
|
+
children = details.get('properties', details.get('fields'))
|
|
261
|
+
|
|
262
|
+
# Caso Array (items -> properties)
|
|
263
|
+
if not children and details.get('items'):
|
|
264
|
+
items = details['items']
|
|
265
|
+
if isinstance(items, dict):
|
|
266
|
+
if items.get('description'):
|
|
267
|
+
output.append(f"{indent_str} _Items: {items['description']}_")
|
|
268
|
+
children = items.get('properties', items.get('fields'))
|
|
269
|
+
|
|
270
|
+
if children:
|
|
271
|
+
output.append(self._format_json_schema(children, indent_level + 1))
|
|
272
|
+
|
|
273
|
+
return "\n".join(output)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _get_static_file_context(self, company_short_name: str) -> str:
|
|
277
|
+
# Get context from .md files using the repository
|
|
278
|
+
static_context = ''
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
# 1. List markdown files in the context "folder"
|
|
282
|
+
# Note: The repo handles where this folder actually is (FS or DB)
|
|
283
|
+
md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
|
|
284
|
+
|
|
285
|
+
for filename in md_files:
|
|
286
|
+
try:
|
|
287
|
+
# 2. Read content
|
|
288
|
+
content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
|
|
289
|
+
static_context += content + "\n" # Append content
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logging.warning(f"Error reading context file {filename}: {e}")
|
|
292
|
+
|
|
293
|
+
except Exception as e:
|
|
294
|
+
# If listing fails (e.g. folder doesn't exist), just log and return empty
|
|
295
|
+
logging.warning(f"Error listing context files for {company_short_name}: {e}")
|
|
296
|
+
|
|
297
|
+
return static_context
|
|
298
|
+
|
|
299
|
+
def get_enriched_database_schema(self, company_short_name: str, db_name: str) -> dict:
|
|
300
|
+
"""
|
|
301
|
+
Retrieves the physical database structure and enriches it with metadata
|
|
302
|
+
found in the AssetRepository (YAML files).
|
|
303
|
+
"""
|
|
304
|
+
try:
|
|
305
|
+
# 1. Physical Structure (Real Source)
|
|
306
|
+
structure = self.sql_service.get_database_structure(company_short_name, db_name)
|
|
307
|
+
|
|
308
|
+
# 2. YAML files
|
|
309
|
+
available_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA)
|
|
310
|
+
files_map = {}
|
|
311
|
+
for f in available_files:
|
|
312
|
+
clean = f.lower().replace('.yaml', '').replace('.yml', '')
|
|
313
|
+
if '-' not in clean:
|
|
314
|
+
continue # skip non-table files
|
|
315
|
+
|
|
316
|
+
dbname, table = clean.split("-", 1)
|
|
317
|
+
# filter by the database
|
|
318
|
+
if dbname != db_name:
|
|
319
|
+
continue
|
|
320
|
+
files_map[table] = f
|
|
321
|
+
|
|
322
|
+
logging.debug(f"🔍 [CompanyContextService] Enriching schema for {db_name}. Files found: {len(files_map)}")
|
|
323
|
+
|
|
324
|
+
# 3. fusion between physical structure and YAML files
|
|
325
|
+
for table_name, table_data in structure.items():
|
|
326
|
+
t_name = table_name.lower().strip()
|
|
327
|
+
|
|
328
|
+
real_filename = files_map.get(t_name)
|
|
329
|
+
if not real_filename:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, real_filename)
|
|
334
|
+
if not content:
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
meta = yaml.safe_load(content) or {}
|
|
338
|
+
|
|
339
|
+
# detect root, usually table name
|
|
340
|
+
root_data = meta.get(table_name) or meta.get(t_name)
|
|
341
|
+
if not root_data and len(meta) == 1:
|
|
342
|
+
root_data = list(meta.values())[0]
|
|
343
|
+
|
|
344
|
+
if not root_data:
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# A. Table description
|
|
348
|
+
if 'description' in root_data:
|
|
349
|
+
table_data['description'] = root_data['description']
|
|
350
|
+
|
|
351
|
+
# B. get the map of columns from the YAML
|
|
352
|
+
yaml_cols = root_data.get('columns', root_data.get('fields', {}))
|
|
353
|
+
|
|
354
|
+
# --- LEGACY ADAPTER: List -> Dictionary ---
|
|
355
|
+
if isinstance(yaml_cols, list):
|
|
356
|
+
temp_map = {}
|
|
357
|
+
for c in yaml_cols:
|
|
358
|
+
if isinstance(c, dict) and 'name' in c:
|
|
359
|
+
col_name = c['name']
|
|
360
|
+
temp_map[col_name] = c
|
|
361
|
+
yaml_cols = temp_map
|
|
362
|
+
# --------------------------------------------
|
|
363
|
+
|
|
364
|
+
if isinstance(yaml_cols, dict):
|
|
365
|
+
# map in lower case for lookup
|
|
366
|
+
y_cols_lower = {str(k).lower(): v for k, v in yaml_cols.items()}
|
|
367
|
+
|
|
368
|
+
# Iterate over columns
|
|
369
|
+
for col in table_data.get('columns', []):
|
|
370
|
+
c_name = str(col['name']).lower() # Real DB Name
|
|
371
|
+
|
|
372
|
+
if c_name in y_cols_lower:
|
|
373
|
+
y_col = y_cols_lower[c_name]
|
|
374
|
+
|
|
375
|
+
# copy the basic metadata from database
|
|
376
|
+
if y_col.get('description'): col['description'] = y_col['description']
|
|
377
|
+
if y_col.get('pii'): col['pii'] = y_col['pii']
|
|
378
|
+
if y_col.get('synonyms'): col['synonyms'] = y_col['synonyms']
|
|
379
|
+
|
|
380
|
+
# C. inject the json schema from the YAML
|
|
381
|
+
props = y_col.get('properties')
|
|
382
|
+
if props:
|
|
383
|
+
col['properties'] = props
|
|
384
|
+
else:
|
|
385
|
+
if yaml_cols:
|
|
386
|
+
logging.warning(f"⚠️ [CompanyContextService] Unrecognized column format in {real_filename}")
|
|
387
|
+
|
|
388
|
+
except Exception as e:
|
|
389
|
+
logging.error(f"❌ Error processing schema file {real_filename}: {e}")
|
|
390
|
+
|
|
391
|
+
return structure
|
|
392
|
+
|
|
393
|
+
except Exception as e:
|
|
394
|
+
logging.exception(f"Error generating enriched schema for {db_name}")
|
|
395
|
+
# Depending on policy, re-raise or return empty structure
|
|
396
|
+
raise e
|
|
397
|
+
|
|
@@ -106,7 +106,7 @@ class Dispatcher:
|
|
|
106
106
|
if self.tool_service.is_system_tool(function_name):
|
|
107
107
|
# this is the system function to be executed.
|
|
108
108
|
handler = self.tool_service.get_system_handler(function_name)
|
|
109
|
-
logging.
|
|
109
|
+
logging.debug(
|
|
110
110
|
f"Calling system handler [{function_name}] "
|
|
111
111
|
f"with company_short_name={company_short_name} "
|
|
112
112
|
f"and kwargs={kwargs}"
|
|
@@ -382,27 +382,49 @@ class KnowledgeBaseService:
|
|
|
382
382
|
def sync_collection_types(self, company_short_name: str, categories_config: list):
|
|
383
383
|
"""
|
|
384
384
|
This should be called during company initialization or configuration reload.
|
|
385
|
+
Syncs DB collection types with the provided list.
|
|
386
|
+
Also updates the configuration YAML.
|
|
385
387
|
"""
|
|
386
388
|
company = self.profile_service.get_company_by_short_name(company_short_name)
|
|
387
389
|
if not company:
|
|
388
390
|
raise IAToolkitException(IAToolkitException.ErrorType.INVALID_NAME,
|
|
389
|
-
|
|
390
|
-
|
|
391
|
+
f'Company {company_short_name} not found')
|
|
391
392
|
|
|
392
393
|
session = self.document_repo.session
|
|
394
|
+
|
|
395
|
+
# 1. Get existing types
|
|
393
396
|
existing_types = session.query(CollectionType).filter_by(company_id=company.id).all()
|
|
394
397
|
existing_names = {ct.name: ct for ct in existing_types}
|
|
395
398
|
|
|
399
|
+
# 2. Add new types
|
|
400
|
+
current_config_names = set()
|
|
396
401
|
for cat_name in categories_config:
|
|
402
|
+
current_config_names.add(cat_name)
|
|
397
403
|
if cat_name not in existing_names:
|
|
398
404
|
new_type = CollectionType(company_id=company.id, name=cat_name)
|
|
399
405
|
session.add(new_type)
|
|
400
406
|
|
|
401
|
-
#
|
|
402
|
-
#
|
|
407
|
+
# 3. Delete types not in config
|
|
408
|
+
# Note: This might cascade delete documents depending on FK setup.
|
|
409
|
+
# Assuming safe deletion is desired here to match "Sync" behavior.
|
|
410
|
+
for existing_ct in existing_types:
|
|
411
|
+
if existing_ct.name not in current_config_names:
|
|
412
|
+
session.delete(existing_ct)
|
|
403
413
|
|
|
404
414
|
session.commit()
|
|
405
415
|
|
|
416
|
+
# 4. Update Configuration YAML
|
|
417
|
+
# Lazy import to avoid circular dependency
|
|
418
|
+
from iatoolkit import current_iatoolkit
|
|
419
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
420
|
+
config_service = current_iatoolkit().get_injector().get(ConfigurationService)
|
|
421
|
+
|
|
422
|
+
config_service.update_configuration_key(
|
|
423
|
+
company_short_name,
|
|
424
|
+
"knowledge_base.collections",
|
|
425
|
+
categories_config
|
|
426
|
+
)
|
|
427
|
+
|
|
406
428
|
def get_collection_names(self, company_short_name: str) -> List[str]:
|
|
407
429
|
"""
|
|
408
430
|
Retrieves the names of all collections defined for a specific company.
|