iatoolkit 1.7.0__py3-none-any.whl → 1.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +1 -1
- iatoolkit/common/routes.py +16 -3
- iatoolkit/common/util.py +8 -123
- iatoolkit/core.py +1 -0
- iatoolkit/infra/connectors/file_connector.py +10 -2
- iatoolkit/infra/connectors/google_drive_connector.py +3 -0
- iatoolkit/infra/connectors/local_file_connector.py +3 -0
- iatoolkit/infra/connectors/s3_connector.py +24 -1
- iatoolkit/infra/llm_providers/deepseek_adapter.py +17 -1
- iatoolkit/infra/llm_providers/gemini_adapter.py +117 -18
- iatoolkit/infra/llm_providers/openai_adapter.py +175 -18
- iatoolkit/infra/llm_response.py +13 -0
- iatoolkit/locales/en.yaml +82 -4
- iatoolkit/locales/es.yaml +79 -4
- iatoolkit/repositories/llm_query_repo.py +51 -18
- iatoolkit/repositories/models.py +16 -7
- iatoolkit/services/company_context_service.py +294 -133
- iatoolkit/services/configuration_service.py +140 -121
- iatoolkit/services/dispatcher_service.py +1 -4
- iatoolkit/services/knowledge_base_service.py +26 -4
- iatoolkit/services/llm_client_service.py +58 -2
- iatoolkit/services/prompt_service.py +251 -164
- iatoolkit/services/query_service.py +37 -18
- iatoolkit/services/storage_service.py +92 -0
- iatoolkit/static/js/chat_filepond.js +188 -63
- iatoolkit/static/js/chat_main.js +105 -52
- iatoolkit/static/styles/chat_iatoolkit.css +96 -0
- iatoolkit/system_prompts/query_main.prompt +24 -41
- iatoolkit/templates/chat.html +15 -6
- iatoolkit/views/base_login_view.py +1 -1
- iatoolkit/views/categories_api_view.py +111 -0
- iatoolkit/views/chat_view.py +1 -1
- iatoolkit/views/configuration_api_view.py +1 -1
- iatoolkit/views/login_view.py +1 -1
- iatoolkit/views/prompt_api_view.py +88 -7
- {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/METADATA +1 -1
- {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/RECORD +41 -39
- {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/WHEEL +0 -0
- {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE +0 -0
- {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
- {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/top_level.txt +0 -0
iatoolkit/repositories/models.py
CHANGED
|
@@ -17,6 +17,19 @@ import enum
|
|
|
17
17
|
class Base(DeclarativeBase):
|
|
18
18
|
pass
|
|
19
19
|
|
|
20
|
+
|
|
21
|
+
class DocumentStatus(str, enum.Enum):
|
|
22
|
+
PENDING = "pending"
|
|
23
|
+
PROCESSING = "processing"
|
|
24
|
+
ACTIVE = "active"
|
|
25
|
+
FAILED = "failed"
|
|
26
|
+
|
|
27
|
+
class PromptType(str, enum.Enum):
|
|
28
|
+
SYSTEM = "system"
|
|
29
|
+
COMPANY = "company"
|
|
30
|
+
AGENT = "agent"
|
|
31
|
+
|
|
32
|
+
|
|
20
33
|
# relation table for many-to-many relationship between companies and users
|
|
21
34
|
user_company = Table('iat_user_company',
|
|
22
35
|
Base.metadata,
|
|
@@ -149,11 +162,6 @@ class Tool(Base):
|
|
|
149
162
|
return {column.key: getattr(self, column.key) for column in class_mapper(self.__class__).columns}
|
|
150
163
|
|
|
151
164
|
|
|
152
|
-
class DocumentStatus(str, enum.Enum):
|
|
153
|
-
PENDING = "pending"
|
|
154
|
-
PROCESSING = "processing"
|
|
155
|
-
ACTIVE = "active"
|
|
156
|
-
FAILED = "failed"
|
|
157
165
|
|
|
158
166
|
|
|
159
167
|
class CollectionType(Base):
|
|
@@ -290,12 +298,13 @@ class Prompt(Base):
|
|
|
290
298
|
description = Column(String, nullable=False)
|
|
291
299
|
filename = Column(String, nullable=False)
|
|
292
300
|
active = Column(Boolean, default=True)
|
|
293
|
-
|
|
301
|
+
prompt_type = Column(String, default=PromptType.COMPANY.value, nullable=False)
|
|
294
302
|
order = Column(Integer, nullable=True, default=0)
|
|
295
303
|
category_id = Column(Integer, ForeignKey('iat_prompt_categories.id'), nullable=True)
|
|
296
304
|
custom_fields = Column(JSON, nullable=False, default=[])
|
|
297
|
-
|
|
298
305
|
created_at = Column(DateTime, default=datetime.now)
|
|
306
|
+
def to_dict(self):
|
|
307
|
+
return {column.key: getattr(self, column.key) for column in class_mapper(self.__class__).columns}
|
|
299
308
|
|
|
300
309
|
company = relationship("Company", back_populates="prompts")
|
|
301
310
|
category = relationship("PromptCategory", back_populates="prompts")
|
|
@@ -7,9 +7,11 @@ from iatoolkit.common.util import Utility
|
|
|
7
7
|
from iatoolkit.services.configuration_service import ConfigurationService
|
|
8
8
|
from iatoolkit.common.interfaces.asset_storage import AssetRepository, AssetType
|
|
9
9
|
from iatoolkit.services.sql_service import SqlService
|
|
10
|
-
from iatoolkit.common.exceptions import IAToolkitException
|
|
11
10
|
import logging
|
|
11
|
+
import yaml
|
|
12
12
|
from injector import inject
|
|
13
|
+
from typing import List, Dict
|
|
14
|
+
import os
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class CompanyContextService:
|
|
@@ -46,8 +48,9 @@ class CompanyContextService:
|
|
|
46
48
|
logging.warning(f"Could not load Markdown context for '{company_short_name}': {e}")
|
|
47
49
|
|
|
48
50
|
# 2. Context from company-specific SQL databases
|
|
51
|
+
db_tables = []
|
|
49
52
|
try:
|
|
50
|
-
sql_context = self.
|
|
53
|
+
sql_context, db_tables = self._get_sql_enriched_context(company_short_name)
|
|
51
54
|
if sql_context:
|
|
52
55
|
context_parts.append(sql_context)
|
|
53
56
|
except Exception as e:
|
|
@@ -55,7 +58,7 @@ class CompanyContextService:
|
|
|
55
58
|
|
|
56
59
|
# 3. Context from yaml (schema/*.yaml) files
|
|
57
60
|
try:
|
|
58
|
-
yaml_schema_context = self._get_yaml_schema_context(company_short_name)
|
|
61
|
+
yaml_schema_context = self._get_yaml_schema_context(company_short_name, db_tables)
|
|
59
62
|
if yaml_schema_context:
|
|
60
63
|
context_parts.append(yaml_schema_context)
|
|
61
64
|
except Exception as e:
|
|
@@ -64,149 +67,96 @@ class CompanyContextService:
|
|
|
64
67
|
# Join all parts with a clear separator
|
|
65
68
|
return "\n\n---\n\n".join(context_parts)
|
|
66
69
|
|
|
67
|
-
def _get_static_file_context(self, company_short_name: str) -> str:
|
|
68
|
-
# Get context from .md files using the repository
|
|
69
|
-
static_context = ''
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
# 1. List markdown files in the context "folder"
|
|
73
|
-
# Note: The repo handles where this folder actually is (FS or DB)
|
|
74
|
-
md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
|
|
75
|
-
|
|
76
|
-
for filename in md_files:
|
|
77
|
-
try:
|
|
78
|
-
# 2. Read content
|
|
79
|
-
content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
|
|
80
|
-
static_context += content + "\n" # Append content
|
|
81
|
-
except Exception as e:
|
|
82
|
-
logging.warning(f"Error reading context file {filename}: {e}")
|
|
83
|
-
|
|
84
|
-
except Exception as e:
|
|
85
|
-
# If listing fails (e.g. folder doesn't exist), just log and return empty
|
|
86
|
-
logging.warning(f"Error listing context files for {company_short_name}: {e}")
|
|
87
|
-
|
|
88
|
-
return static_context
|
|
89
|
-
|
|
90
|
-
def _get_sql_schema_context(self, company_short_name: str) -> str:
|
|
71
|
+
def _get_sql_enriched_context(self, company_short_name: str):
|
|
91
72
|
"""
|
|
92
|
-
Generates the SQL
|
|
93
|
-
|
|
94
|
-
|
|
73
|
+
Generates the SQL context for the LLM using the enriched schema logic.
|
|
74
|
+
It iterates over configured databases, fetches their enriched structure,
|
|
75
|
+
and formats it into a prompt-friendly string.
|
|
95
76
|
"""
|
|
96
77
|
data_sources_config = self.config_service.get_configuration(company_short_name, 'data_sources')
|
|
97
78
|
if not data_sources_config or not data_sources_config.get('sql'):
|
|
98
|
-
return ''
|
|
79
|
+
return '', []
|
|
80
|
+
|
|
81
|
+
context_output = []
|
|
82
|
+
db_tables=[]
|
|
99
83
|
|
|
100
|
-
sql_context = ''
|
|
101
84
|
for source in data_sources_config.get('sql', []):
|
|
102
85
|
db_name = source.get('database')
|
|
103
86
|
if not db_name:
|
|
104
87
|
continue
|
|
105
88
|
|
|
106
|
-
# get database schema definition, for this source.
|
|
107
|
-
database_schema_name = source.get('schema', 'public')
|
|
108
|
-
|
|
109
89
|
try:
|
|
110
|
-
# 1. Get the
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
90
|
+
# 1. Get the Enriched Schema (Physical + YAML)
|
|
91
|
+
enriched_structure = self.get_enriched_database_schema(company_short_name, db_name)
|
|
92
|
+
if not enriched_structure:
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
# 2. Build Header for this Database
|
|
96
|
+
db_context = f"***Database (`database_key`)***: {db_name}\n"
|
|
97
|
+
|
|
98
|
+
# Optional: Add DB description from config if available (useful context)
|
|
99
|
+
db_desc = source.get('description', '')
|
|
100
|
+
if db_desc:
|
|
101
|
+
db_context += f"**Description:** {db_desc}\n"
|
|
102
|
+
|
|
103
|
+
db_context += (
|
|
104
|
+
f"IMPORTANT: To query this database you MUST use the service/tool "
|
|
105
|
+
f"**iat_sql_query**, with `database_key='{db_name}'`.\n"
|
|
106
|
+
)
|
|
115
107
|
|
|
116
|
-
|
|
117
|
-
|
|
108
|
+
# 3. Format Tables
|
|
109
|
+
for table_name, table_data in enriched_structure.items():
|
|
110
|
+
table_desc = table_data.get('description', '')
|
|
111
|
+
columns = table_data.get('columns', [])
|
|
118
112
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
113
|
+
# Table Header
|
|
114
|
+
table_str = f"\nTable: **{table_name}**"
|
|
115
|
+
if table_desc:
|
|
116
|
+
table_str += f"\nDescription: {table_desc}"
|
|
123
117
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
if table_prefix and table_name.startswith(table_prefix):
|
|
167
|
-
schema_object_name = table_name[len(table_prefix):]
|
|
168
|
-
else:
|
|
169
|
-
# Priority 4: Default to the table name itself.
|
|
170
|
-
schema_object_name = table_name
|
|
171
|
-
|
|
172
|
-
# 7. define the list of columns to exclude, (local vs. global).
|
|
173
|
-
local_exclude_columns = table_config.get('exclude_columns')
|
|
174
|
-
final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
|
|
175
|
-
|
|
176
|
-
# 8. Build the table definition dictionary manually using the structure data
|
|
177
|
-
json_dict = {
|
|
178
|
-
"table": table_name,
|
|
179
|
-
"schema": database_schema_name,
|
|
180
|
-
"description": f"The table belongs to the **`{database_schema_name}`** schema.",
|
|
181
|
-
"fields": []
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
if schema_object_name:
|
|
185
|
-
json_dict["description"] += (
|
|
186
|
-
f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
for col in table_data.get('columns', []):
|
|
190
|
-
name = col["name"]
|
|
191
|
-
if name in final_exclude_columns:
|
|
192
|
-
continue
|
|
193
|
-
|
|
194
|
-
json_dict["fields"].append({
|
|
195
|
-
"name": name,
|
|
196
|
-
"type": col["type"]
|
|
197
|
-
})
|
|
198
|
-
|
|
199
|
-
# Append as string representation of dict (consistent with previous behavior)
|
|
200
|
-
sql_context += "\n\n" + str(json_dict)
|
|
201
|
-
|
|
202
|
-
except (KeyError, RuntimeError) as e:
|
|
203
|
-
logging.warning(f"Could not generate schema for table '{table_name}': {e}")
|
|
204
|
-
|
|
205
|
-
if sql_context:
|
|
206
|
-
sql_context = "These are the SQL databases you can query using the **`iat_sql_service`**: \n" + sql_context
|
|
207
|
-
return sql_context
|
|
208
|
-
|
|
209
|
-
def _get_yaml_schema_context(self, company_short_name: str) -> str:
|
|
118
|
+
table_str += "\nColumns:"
|
|
119
|
+
|
|
120
|
+
# Format Columns
|
|
121
|
+
for col in columns:
|
|
122
|
+
col_name = col.get('name')
|
|
123
|
+
col_type = col.get('type', 'unknown')
|
|
124
|
+
col_desc = col.get('description', '')
|
|
125
|
+
col_props = col.get('properties') # Nested JSONB structure
|
|
126
|
+
|
|
127
|
+
col_line = f"\n - `{col_name}` ({col_type})"
|
|
128
|
+
if col_desc:
|
|
129
|
+
col_line += f": {col_desc}"
|
|
130
|
+
|
|
131
|
+
table_str += col_line
|
|
132
|
+
|
|
133
|
+
# If it has nested properties (JSONB enriched from YAML), format them
|
|
134
|
+
if col_props:
|
|
135
|
+
table_str += "\n"
|
|
136
|
+
table_str += self._format_json_schema(col_props, 2) # Indent level 2
|
|
137
|
+
|
|
138
|
+
db_context += table_str
|
|
139
|
+
|
|
140
|
+
# collect the table names for later use
|
|
141
|
+
db_tables.append(
|
|
142
|
+
{'db_name': db_name,
|
|
143
|
+
'table_name': table_name,
|
|
144
|
+
}
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
context_output.append(db_context)
|
|
148
|
+
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logging.warning(f"Could not generate enriched SQL context for '{db_name}': {e}")
|
|
151
|
+
|
|
152
|
+
if not context_output:
|
|
153
|
+
return "", []
|
|
154
|
+
|
|
155
|
+
header = "These are the SQL databases you can query using the **`iat_sql_service`**. The schema below includes enriched metadata:\n"
|
|
156
|
+
return header + "\n\n---\n\n".join(context_output), db_tables
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _get_yaml_schema_context(self, company_short_name: str, db_tables: List[Dict]) -> str:
|
|
210
160
|
# Get context from .yaml schema files using the repository
|
|
211
161
|
yaml_schema_context = ''
|
|
212
162
|
|
|
@@ -215,6 +165,18 @@ class CompanyContextService:
|
|
|
215
165
|
schema_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA, extension='.yaml')
|
|
216
166
|
|
|
217
167
|
for filename in schema_files:
|
|
168
|
+
# skip tables that are already in the SQL context
|
|
169
|
+
if '-' in filename:
|
|
170
|
+
dbname, f = filename.split("-", 1)
|
|
171
|
+
table_name = f.split('.')[0]
|
|
172
|
+
|
|
173
|
+
exists = any(
|
|
174
|
+
item["db_name"] == dbname and item["table_name"] == table_name
|
|
175
|
+
for item in db_tables
|
|
176
|
+
)
|
|
177
|
+
if exists:
|
|
178
|
+
continue
|
|
179
|
+
|
|
218
180
|
try:
|
|
219
181
|
# 2. Read content
|
|
220
182
|
content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, filename)
|
|
@@ -225,7 +187,7 @@ class CompanyContextService:
|
|
|
225
187
|
# 4. Generate markdown description from the dict
|
|
226
188
|
if schema_dict:
|
|
227
189
|
# We use generate_schema_table which accepts a dict directly
|
|
228
|
-
yaml_schema_context += self.
|
|
190
|
+
yaml_schema_context += self.generate_schema_table(schema_dict)
|
|
229
191
|
|
|
230
192
|
except Exception as e:
|
|
231
193
|
logging.warning(f"Error processing schema file {filename}: {e}")
|
|
@@ -233,4 +195,203 @@ class CompanyContextService:
|
|
|
233
195
|
except Exception as e:
|
|
234
196
|
logging.warning(f"Error listing schema files for {company_short_name}: {e}")
|
|
235
197
|
|
|
236
|
-
return yaml_schema_context
|
|
198
|
+
return yaml_schema_context
|
|
199
|
+
|
|
200
|
+
def generate_schema_table(self, schema: dict) -> str:
|
|
201
|
+
if not schema or not isinstance(schema, dict):
|
|
202
|
+
return ""
|
|
203
|
+
|
|
204
|
+
# root detection
|
|
205
|
+
keys = list(schema.keys())
|
|
206
|
+
if not keys:
|
|
207
|
+
return ""
|
|
208
|
+
|
|
209
|
+
root_name = keys[0]
|
|
210
|
+
root_data = schema[root_name]
|
|
211
|
+
output = [f"\n### Objeto: `{root_name}`"]
|
|
212
|
+
|
|
213
|
+
# table description
|
|
214
|
+
root_description = root_data.get('description', '')
|
|
215
|
+
if root_description:
|
|
216
|
+
clean_desc = root_description.replace('\n', ' ').strip()
|
|
217
|
+
output.append(f"##Descripción: {clean_desc}")
|
|
218
|
+
|
|
219
|
+
# extract columns and properties from the root object
|
|
220
|
+
# priority: columns > properties > fields
|
|
221
|
+
properties = root_data.get('columns', root_data.get('properties', {}))
|
|
222
|
+
if properties:
|
|
223
|
+
output.append("**Estructura de Datos:**")
|
|
224
|
+
|
|
225
|
+
# use indent_level 0 for the main columns
|
|
226
|
+
# call recursive function to format the properties
|
|
227
|
+
output.append(self._format_json_schema(properties, 0))
|
|
228
|
+
else:
|
|
229
|
+
output.append("\n_Sin definición de estructura._")
|
|
230
|
+
|
|
231
|
+
return "\n".join(output)
|
|
232
|
+
|
|
233
|
+
def _format_json_schema(self, properties: dict, indent_level: int) -> str:
|
|
234
|
+
output = []
|
|
235
|
+
indent_str = ' ' * indent_level
|
|
236
|
+
|
|
237
|
+
if not isinstance(properties, dict):
|
|
238
|
+
return ""
|
|
239
|
+
|
|
240
|
+
for name, details in properties.items():
|
|
241
|
+
if not isinstance(details, dict): continue
|
|
242
|
+
|
|
243
|
+
description = details.get('description', '')
|
|
244
|
+
data_type = details.get('type', 'any')
|
|
245
|
+
|
|
246
|
+
# NORMALIZACIÓN VISUAL: jsonb -> object
|
|
247
|
+
if data_type and data_type.lower() == 'jsonb':
|
|
248
|
+
data_type = 'object'
|
|
249
|
+
|
|
250
|
+
line = f"{indent_str}- **`{name}`**"
|
|
251
|
+
if data_type:
|
|
252
|
+
line += f" ({data_type})"
|
|
253
|
+
if description:
|
|
254
|
+
clean_desc = description.replace('\n', ' ').strip()
|
|
255
|
+
line += f": {clean_desc}"
|
|
256
|
+
|
|
257
|
+
output.append(line)
|
|
258
|
+
|
|
259
|
+
# Recursividad: buscar hijos en 'properties', 'fields' o 'columns'
|
|
260
|
+
children = details.get('properties', details.get('fields'))
|
|
261
|
+
|
|
262
|
+
# Caso Array (items -> properties)
|
|
263
|
+
if not children and details.get('items'):
|
|
264
|
+
items = details['items']
|
|
265
|
+
if isinstance(items, dict):
|
|
266
|
+
if items.get('description'):
|
|
267
|
+
output.append(f"{indent_str} _Items: {items['description']}_")
|
|
268
|
+
children = items.get('properties', items.get('fields'))
|
|
269
|
+
|
|
270
|
+
if children:
|
|
271
|
+
output.append(self._format_json_schema(children, indent_level + 1))
|
|
272
|
+
|
|
273
|
+
return "\n".join(output)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _get_static_file_context(self, company_short_name: str) -> str:
|
|
277
|
+
# Get context from .md files using the repository
|
|
278
|
+
static_context = ''
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
# 1. List markdown files in the context "folder"
|
|
282
|
+
# Note: The repo handles where this folder actually is (FS or DB)
|
|
283
|
+
md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
|
|
284
|
+
|
|
285
|
+
for filename in md_files:
|
|
286
|
+
try:
|
|
287
|
+
# 2. Read content
|
|
288
|
+
content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
|
|
289
|
+
static_context += content + "\n" # Append content
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logging.warning(f"Error reading context file {filename}: {e}")
|
|
292
|
+
|
|
293
|
+
except Exception as e:
|
|
294
|
+
# If listing fails (e.g. folder doesn't exist), just log and return empty
|
|
295
|
+
logging.warning(f"Error listing context files for {company_short_name}: {e}")
|
|
296
|
+
|
|
297
|
+
return static_context
|
|
298
|
+
|
|
299
|
+
def get_enriched_database_schema(self, company_short_name: str, db_name: str) -> dict:
|
|
300
|
+
"""
|
|
301
|
+
Retrieves the physical database structure and enriches it with metadata
|
|
302
|
+
found in the AssetRepository (YAML files).
|
|
303
|
+
"""
|
|
304
|
+
try:
|
|
305
|
+
# 1. Physical Structure (Real Source)
|
|
306
|
+
structure = self.sql_service.get_database_structure(company_short_name, db_name)
|
|
307
|
+
|
|
308
|
+
# 2. YAML files
|
|
309
|
+
available_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA)
|
|
310
|
+
files_map = {}
|
|
311
|
+
for f in available_files:
|
|
312
|
+
clean = f.lower().replace('.yaml', '').replace('.yml', '')
|
|
313
|
+
if '-' not in clean:
|
|
314
|
+
continue # skip non-table files
|
|
315
|
+
|
|
316
|
+
dbname, table = clean.split("-", 1)
|
|
317
|
+
# filter by the database
|
|
318
|
+
if dbname != db_name:
|
|
319
|
+
continue
|
|
320
|
+
files_map[table] = f
|
|
321
|
+
|
|
322
|
+
logging.debug(f"🔍 [CompanyContextService] Enriching schema for {db_name}. Files found: {len(files_map)}")
|
|
323
|
+
|
|
324
|
+
# 3. fusion between physical structure and YAML files
|
|
325
|
+
for table_name, table_data in structure.items():
|
|
326
|
+
t_name = table_name.lower().strip()
|
|
327
|
+
|
|
328
|
+
real_filename = files_map.get(t_name)
|
|
329
|
+
if not real_filename:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, real_filename)
|
|
334
|
+
if not content:
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
meta = yaml.safe_load(content) or {}
|
|
338
|
+
|
|
339
|
+
# detect root, usually table name
|
|
340
|
+
root_data = meta.get(table_name) or meta.get(t_name)
|
|
341
|
+
if not root_data and len(meta) == 1:
|
|
342
|
+
root_data = list(meta.values())[0]
|
|
343
|
+
|
|
344
|
+
if not root_data:
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# A. Table description
|
|
348
|
+
if 'description' in root_data:
|
|
349
|
+
table_data['description'] = root_data['description']
|
|
350
|
+
|
|
351
|
+
# B. get the map of columns from the YAML
|
|
352
|
+
yaml_cols = root_data.get('columns', root_data.get('fields', {}))
|
|
353
|
+
|
|
354
|
+
# --- LEGACY ADAPTER: List -> Dictionary ---
|
|
355
|
+
if isinstance(yaml_cols, list):
|
|
356
|
+
temp_map = {}
|
|
357
|
+
for c in yaml_cols:
|
|
358
|
+
if isinstance(c, dict) and 'name' in c:
|
|
359
|
+
col_name = c['name']
|
|
360
|
+
temp_map[col_name] = c
|
|
361
|
+
yaml_cols = temp_map
|
|
362
|
+
# --------------------------------------------
|
|
363
|
+
|
|
364
|
+
if isinstance(yaml_cols, dict):
|
|
365
|
+
# map in lower case for lookup
|
|
366
|
+
y_cols_lower = {str(k).lower(): v for k, v in yaml_cols.items()}
|
|
367
|
+
|
|
368
|
+
# Iterate over columns
|
|
369
|
+
for col in table_data.get('columns', []):
|
|
370
|
+
c_name = str(col['name']).lower() # Real DB Name
|
|
371
|
+
|
|
372
|
+
if c_name in y_cols_lower:
|
|
373
|
+
y_col = y_cols_lower[c_name]
|
|
374
|
+
|
|
375
|
+
# copy the basic metadata from database
|
|
376
|
+
if y_col.get('description'): col['description'] = y_col['description']
|
|
377
|
+
if y_col.get('pii'): col['pii'] = y_col['pii']
|
|
378
|
+
if y_col.get('synonyms'): col['synonyms'] = y_col['synonyms']
|
|
379
|
+
|
|
380
|
+
# C. inject the json schema from the YAML
|
|
381
|
+
props = y_col.get('properties')
|
|
382
|
+
if props:
|
|
383
|
+
col['properties'] = props
|
|
384
|
+
else:
|
|
385
|
+
if yaml_cols:
|
|
386
|
+
logging.warning(f"⚠️ [CompanyContextService] Unrecognized column format in {real_filename}")
|
|
387
|
+
|
|
388
|
+
except Exception as e:
|
|
389
|
+
logging.error(f"❌ Error processing schema file {real_filename}: {e}")
|
|
390
|
+
|
|
391
|
+
return structure
|
|
392
|
+
|
|
393
|
+
except Exception as e:
|
|
394
|
+
logging.exception(f"Error generating enriched schema for {db_name}")
|
|
395
|
+
# Depending on policy, re-raise or return empty structure
|
|
396
|
+
raise e
|
|
397
|
+
|