iatoolkit 0.91.1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. iatoolkit/__init__.py +6 -4
  2. iatoolkit/base_company.py +0 -16
  3. iatoolkit/cli_commands.py +3 -14
  4. iatoolkit/common/exceptions.py +1 -0
  5. iatoolkit/common/interfaces/__init__.py +0 -0
  6. iatoolkit/common/interfaces/asset_storage.py +34 -0
  7. iatoolkit/common/interfaces/database_provider.py +43 -0
  8. iatoolkit/common/model_registry.py +159 -0
  9. iatoolkit/common/routes.py +47 -5
  10. iatoolkit/common/util.py +32 -13
  11. iatoolkit/company_registry.py +5 -0
  12. iatoolkit/core.py +51 -20
  13. iatoolkit/infra/connectors/file_connector_factory.py +1 -0
  14. iatoolkit/infra/connectors/s3_connector.py +4 -2
  15. iatoolkit/infra/llm_providers/__init__.py +0 -0
  16. iatoolkit/infra/llm_providers/deepseek_adapter.py +278 -0
  17. iatoolkit/infra/{gemini_adapter.py → llm_providers/gemini_adapter.py} +11 -17
  18. iatoolkit/infra/{openai_adapter.py → llm_providers/openai_adapter.py} +41 -7
  19. iatoolkit/infra/llm_proxy.py +235 -134
  20. iatoolkit/infra/llm_response.py +5 -0
  21. iatoolkit/locales/en.yaml +158 -2
  22. iatoolkit/locales/es.yaml +158 -0
  23. iatoolkit/repositories/database_manager.py +52 -47
  24. iatoolkit/repositories/document_repo.py +7 -0
  25. iatoolkit/repositories/filesystem_asset_repository.py +36 -0
  26. iatoolkit/repositories/llm_query_repo.py +2 -0
  27. iatoolkit/repositories/models.py +72 -79
  28. iatoolkit/repositories/profile_repo.py +59 -3
  29. iatoolkit/repositories/vs_repo.py +22 -24
  30. iatoolkit/services/company_context_service.py +126 -53
  31. iatoolkit/services/configuration_service.py +299 -73
  32. iatoolkit/services/dispatcher_service.py +21 -3
  33. iatoolkit/services/file_processor_service.py +0 -5
  34. iatoolkit/services/history_manager_service.py +43 -24
  35. iatoolkit/services/knowledge_base_service.py +425 -0
  36. iatoolkit/{infra/llm_client.py → services/llm_client_service.py} +38 -29
  37. iatoolkit/services/load_documents_service.py +26 -48
  38. iatoolkit/services/profile_service.py +32 -4
  39. iatoolkit/services/prompt_service.py +32 -30
  40. iatoolkit/services/query_service.py +51 -26
  41. iatoolkit/services/sql_service.py +122 -74
  42. iatoolkit/services/tool_service.py +26 -11
  43. iatoolkit/services/user_session_context_service.py +115 -63
  44. iatoolkit/static/js/chat_main.js +44 -4
  45. iatoolkit/static/js/chat_model_selector.js +227 -0
  46. iatoolkit/static/js/chat_onboarding_button.js +1 -1
  47. iatoolkit/static/js/chat_reload_button.js +4 -1
  48. iatoolkit/static/styles/chat_iatoolkit.css +58 -2
  49. iatoolkit/static/styles/llm_output.css +34 -1
  50. iatoolkit/system_prompts/query_main.prompt +26 -2
  51. iatoolkit/templates/base.html +13 -0
  52. iatoolkit/templates/chat.html +45 -2
  53. iatoolkit/templates/onboarding_shell.html +0 -1
  54. iatoolkit/views/base_login_view.py +7 -2
  55. iatoolkit/views/chat_view.py +76 -0
  56. iatoolkit/views/configuration_api_view.py +163 -0
  57. iatoolkit/views/load_document_api_view.py +14 -10
  58. iatoolkit/views/login_view.py +8 -3
  59. iatoolkit/views/rag_api_view.py +216 -0
  60. iatoolkit/views/users_api_view.py +33 -0
  61. {iatoolkit-0.91.1.dist-info → iatoolkit-1.7.0.dist-info}/METADATA +4 -4
  62. {iatoolkit-0.91.1.dist-info → iatoolkit-1.7.0.dist-info}/RECORD +66 -58
  63. iatoolkit/repositories/tasks_repo.py +0 -52
  64. iatoolkit/services/search_service.py +0 -55
  65. iatoolkit/services/tasks_service.py +0 -188
  66. iatoolkit/views/tasks_api_view.py +0 -72
  67. iatoolkit/views/tasks_review_api_view.py +0 -55
  68. {iatoolkit-0.91.1.dist-info → iatoolkit-1.7.0.dist-info}/WHEEL +0 -0
  69. {iatoolkit-0.91.1.dist-info → iatoolkit-1.7.0.dist-info}/licenses/LICENSE +0 -0
  70. {iatoolkit-0.91.1.dist-info → iatoolkit-1.7.0.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
  71. {iatoolkit-0.91.1.dist-info → iatoolkit-1.7.0.dist-info}/top_level.txt +0 -0
@@ -3,10 +3,11 @@
3
3
  #
4
4
  # IAToolkit is open source software.
5
5
 
6
- from iatoolkit.repositories.models import User, Company, ApiKey, UserFeedback
6
+ from iatoolkit.repositories.models import (User, Company, user_company,
7
+ ApiKey, UserFeedback, AccessLog)
7
8
  from injector import inject
8
9
  from iatoolkit.repositories.database_manager import DatabaseManager
9
- from sqlalchemy.orm import joinedload # Para cargar la relación eficientemente
10
+ from sqlalchemy import select, func, and_
10
11
 
11
12
 
12
13
  class ProfileRepo:
@@ -69,8 +70,63 @@ class ProfileRepo:
69
70
  def get_companies(self) -> list[Company]:
70
71
  return self.session.query(Company).all()
71
72
 
73
+ def get_user_role_in_company(self, company_id, user_id, ):
74
+ stmt = (
75
+ select(user_company.c.role)
76
+ .where(
77
+ user_company.c.user_id == user_id,
78
+ user_company.c.company_id == company_id,
79
+ )
80
+ )
81
+ result = self.session.execute(stmt).scalar_one_or_none()
82
+ return result
83
+
84
+ def get_companies_by_user_identifier(self, user_identifier: str) -> list:
85
+ """
86
+ Return all the companies to which the user belongs (by email),
87
+ and the role he has in each company.
88
+ """
89
+ return (
90
+ self.session.query(Company, user_company.c.role)
91
+ .join(user_company, Company.id == user_company.c.company_id)
92
+ .join(User, User.id == user_company.c.user_id)
93
+ .filter(User.email == user_identifier)
94
+ .all()
95
+ )
96
+
97
+ def get_company_users_with_details(self, company_short_name: str) -> list[dict]:
98
+ # returns the list of users in the company with their role and last access date
99
+
100
+ # subquery for last access date
101
+ last_access_sq = (
102
+ self.session.query(
103
+ AccessLog.user_identifier,
104
+ func.max(AccessLog.timestamp).label("max_ts")
105
+ )
106
+ .filter(AccessLog.company_short_name == company_short_name)
107
+ .group_by(AccessLog.user_identifier)
108
+ .subquery()
109
+ )
110
+
111
+ # main query
112
+ stmt = (
113
+ self.session.query(
114
+ User,
115
+ user_company.c.role,
116
+ last_access_sq.c.max_ts
117
+ )
118
+ .join(user_company, User.id == user_company.c.user_id)
119
+ .join(Company, Company.id == user_company.c.company_id)
120
+ .outerjoin(last_access_sq, User.email == last_access_sq.c.user_identifier)
121
+ .filter(Company.short_name == company_short_name)
122
+ )
123
+
124
+ results = stmt.all()
125
+
126
+ return results
127
+
72
128
  def create_company(self, new_company: Company):
73
- company = self.session.query(Company).filter_by(name=new_company.name).first()
129
+ company = self.session.query(Company).filter_by(short_name=new_company.short_name).first()
74
130
  if company:
75
131
  if company.parameters != new_company.parameters:
76
132
  company.parameters = new_company.parameters
@@ -9,6 +9,7 @@ from iatoolkit.common.exceptions import IAToolkitException
9
9
  from iatoolkit.repositories.database_manager import DatabaseManager
10
10
  from iatoolkit.services.embedding_service import EmbeddingService
11
11
  from iatoolkit.repositories.models import Document, VSDoc, Company
12
+ from typing import Dict
12
13
  import logging
13
14
 
14
15
 
@@ -38,8 +39,9 @@ class VSRepo:
38
39
  company_short_name: str,
39
40
  query_text: str,
40
41
  n_results=5,
41
- metadata_filter=None
42
- ) -> list[Document]:
42
+ metadata_filter=None,
43
+ collection_id: int = None
44
+ ) -> list[Dict]:
43
45
  """
44
46
  search documents similar to the query for a company
45
47
 
@@ -70,11 +72,12 @@ class VSRepo:
70
72
 
71
73
  # build the SQL query
72
74
  sql_query_parts = ["""
73
- SELECT iat_documents.id, \
75
+ SELECT iat_vsdocs.id, \
74
76
  iat_documents.filename, \
75
- iat_documents.content, \
77
+ iat_vsdocs.text, \
76
78
  iat_documents.content_b64, \
77
- iat_documents.meta
79
+ iat_documents.meta,
80
+ iat_documents.id
78
81
  FROM iat_vsdocs, \
79
82
  iat_documents
80
83
  WHERE iat_vsdocs.company_id = :company_id
@@ -88,6 +91,10 @@ class VSRepo:
88
91
  "n_results": n_results
89
92
  }
90
93
 
94
+ # Filter by Collection ID
95
+ if collection_id:
96
+ sql_query_parts.append(" AND iat_documents.collection_type_id = :collection_id")
97
+ params['collection_id'] = collection_id
91
98
 
92
99
  # add metadata filter, if exists
93
100
  if metadata_filter and isinstance(metadata_filter, dict):
@@ -117,17 +124,18 @@ class VSRepo:
117
124
  for row in rows:
118
125
  # create the document object with the data
119
126
  meta_data = row[4] if len(row) > 4 and row[4] is not None else {}
120
- doc = Document(
121
- id=row[0],
122
- company_id=company.id,
123
- filename=row[1],
124
- content=row[2],
125
- content_b64=row[3],
126
- meta=meta_data
127
+ vs_documents.append(
128
+ {
129
+ 'id': row[0],
130
+ 'document_id': row[5],
131
+ 'filename': row[1],
132
+ 'text': row[2],
133
+ 'meta': meta_data,
134
+ }
127
135
  )
128
- vs_documents.append(doc)
129
136
 
130
- return self.remove_duplicates_by_id(vs_documents)
137
+ return vs_documents
138
+
131
139
 
132
140
  except Exception as e:
133
141
  logging.error(f"Error en la consulta de documentos: {str(e)}")
@@ -138,13 +146,3 @@ class VSRepo:
138
146
  finally:
139
147
  self.session.close()
140
148
 
141
- def remove_duplicates_by_id(self, objects):
142
- unique_by_id = {}
143
- result = []
144
-
145
- for obj in objects:
146
- if obj.id not in unique_by_id:
147
- unique_by_id[obj.id] = True
148
- result.append(obj)
149
-
150
- return result
@@ -5,11 +5,11 @@
5
5
 
6
6
  from iatoolkit.common.util import Utility
7
7
  from iatoolkit.services.configuration_service import ConfigurationService
8
+ from iatoolkit.common.interfaces.asset_storage import AssetRepository, AssetType
8
9
  from iatoolkit.services.sql_service import SqlService
9
10
  from iatoolkit.common.exceptions import IAToolkitException
10
11
  import logging
11
12
  from injector import inject
12
- import os
13
13
 
14
14
 
15
15
  class CompanyContextService:
@@ -22,10 +22,12 @@ class CompanyContextService:
22
22
  def __init__(self,
23
23
  sql_service: SqlService,
24
24
  utility: Utility,
25
- config_service: ConfigurationService):
25
+ config_service: ConfigurationService,
26
+ asset_repo: AssetRepository):
26
27
  self.sql_service = sql_service
27
28
  self.utility = utility
28
29
  self.config_service = config_service
30
+ self.asset_repo = asset_repo
29
31
 
30
32
  def get_company_context(self, company_short_name: str) -> str:
31
33
  """
@@ -35,7 +37,7 @@ class CompanyContextService:
35
37
  """
36
38
  context_parts = []
37
39
 
38
- # 1. Context from Markdown (context/*.md) and yaml (schema/*.yaml) files
40
+ # 1. Context from Markdown (context/*.md) files
39
41
  try:
40
42
  md_context = self._get_static_file_context(company_short_name)
41
43
  if md_context:
@@ -43,7 +45,7 @@ class CompanyContextService:
43
45
  except Exception as e:
44
46
  logging.warning(f"Could not load Markdown context for '{company_short_name}': {e}")
45
47
 
46
- # 2. Context from company-specific database schemas (schema/*.yaml files)
48
+ # 2. Context from company-specific SQL databases
47
49
  try:
48
50
  sql_context = self._get_sql_schema_context(company_short_name)
49
51
  if sql_context:
@@ -51,29 +53,37 @@ class CompanyContextService:
51
53
  except Exception as e:
52
54
  logging.warning(f"Could not generate SQL context for '{company_short_name}': {e}")
53
55
 
56
+ # 3. Context from yaml (schema/*.yaml) files
57
+ try:
58
+ yaml_schema_context = self._get_yaml_schema_context(company_short_name)
59
+ if yaml_schema_context:
60
+ context_parts.append(yaml_schema_context)
61
+ except Exception as e:
62
+ logging.warning(f"Could not load Yaml context for '{company_short_name}': {e}")
63
+
54
64
  # Join all parts with a clear separator
55
65
  return "\n\n---\n\n".join(context_parts)
56
66
 
57
67
  def _get_static_file_context(self, company_short_name: str) -> str:
58
- # Get context from .md and .yaml schema files.
68
+ # Get context from .md files using the repository
59
69
  static_context = ''
60
70
 
61
- # Part 1: Markdown context files
62
- context_dir = f'companies/{company_short_name}/context'
63
- if os.path.exists(context_dir):
64
- context_files = self.utility.get_files_by_extension(context_dir, '.md', return_extension=True)
65
- for file in context_files:
66
- filepath = os.path.join(context_dir, file)
67
- static_context += self.utility.load_markdown_context(filepath)
68
-
69
- # Part 2: YAML schema files
70
- schema_dir = f'companies/{company_short_name}/schema'
71
- if os.path.exists(schema_dir):
72
- schema_files = self.utility.get_files_by_extension(schema_dir, '.yaml', return_extension=True)
73
- for file in schema_files:
74
- schema_name = file.split('.')[0] # Use full filename as entity name
75
- filepath = os.path.join(schema_dir, file)
76
- static_context += self.utility.generate_context_for_schema(schema_name, filepath)
71
+ try:
72
+ # 1. List markdown files in the context "folder"
73
+ # Note: The repo handles where this folder actually is (FS or DB)
74
+ md_files = self.asset_repo.list_files(company_short_name, AssetType.CONTEXT, extension='.md')
75
+
76
+ for filename in md_files:
77
+ try:
78
+ # 2. Read content
79
+ content = self.asset_repo.read_text(company_short_name, AssetType.CONTEXT, filename)
80
+ static_context += content + "\n" # Append content
81
+ except Exception as e:
82
+ logging.warning(f"Error reading context file {filename}: {e}")
83
+
84
+ except Exception as e:
85
+ # If listing fails (e.g. folder doesn't exist), just log and return empty
86
+ logging.warning(f"Error listing context files for {company_short_name}: {e}")
77
87
 
78
88
  return static_context
79
89
 
@@ -93,49 +103,63 @@ class CompanyContextService:
93
103
  if not db_name:
94
104
  continue
95
105
 
106
+ # get database schema definition, for this source.
107
+ database_schema_name = source.get('schema', 'public')
108
+
96
109
  try:
97
- db_manager = self.sql_service.get_database_manager(db_name)
110
+ # 1. Get the full database structure at once using the SQL service
111
+ db_structure = self.sql_service.get_database_structure(company_short_name, db_name)
98
112
  except IAToolkitException as e:
99
- logging.warning(f"Could not get DB manager for '{db_name}': {e}")
113
+ logging.warning(f"Could not get DB structure for '{db_name}': {e}")
100
114
  continue
101
115
 
102
116
  db_description = source.get('description', '')
103
- sql_context = f'***Base de datos (database_name)***: {db_name}\n'
104
- sql_context += f"**Descripción:**: {db_description}\n" if db_description else ""
105
- sql_context += "Para consultar esta base de datos debes utilizar el servicio ***iat_sql_query***.\n"
117
+ sql_context += f"***Database (`database_key`)***: {db_name}\n"
118
+
119
+ if db_description:
120
+ sql_context += (
121
+ f"**Description:** : {db_description}\n"
122
+ )
123
+
124
+ sql_context += (
125
+ f"IMPORTANT: To query this database you MUST use the service/tool "
126
+ f"**iat_sql_query**, with `database_key={db_name}`.\n"
127
+ )
128
+
129
+ sql_context += (
130
+ f"IMPORTANT: The value of **database_key** is ALWAYS the literal string "
131
+ f"'{db_name}'. Do not invent or infer alternative names. "
132
+ f"Use exactly: `database_key='{db_name}'`.\n"
133
+ )
106
134
 
107
- # 1. get the list of tables to process.
135
+ # 2. get the list of tables to process based on structure and config
108
136
  tables_to_process = []
109
137
  if source.get('include_all_tables', False):
110
- all_tables = db_manager.get_all_table_names()
138
+ # Use keys from the fetched structure
139
+ all_tables = list(db_structure.keys())
111
140
  tables_to_exclude = set(source.get('exclude_tables', []))
112
141
  tables_to_process = [t for t in all_tables if t not in tables_to_exclude]
113
142
  elif 'tables' in source:
114
- # if not include_all_tables, use the list of tables explicitly specified in the map.
115
- tables_to_process = list(source['tables'].keys())
143
+ # Use keys from the config map, but check if they exist in DB structure
144
+ config_tables = list(source['tables'].keys())
145
+ tables_to_process = [t for t in config_tables if t in db_structure]
116
146
 
117
- # 2. get the global settings and overrides.
147
+ # 3. get the global settings and overrides.
118
148
  global_exclude_columns = source.get('exclude_columns', [])
119
149
  table_prefix = source.get('table_prefix')
120
-
121
- # get the global schema definition, for this source.
122
- global_schema_name = source.get('schema')
123
-
124
150
  table_overrides = source.get('tables', {})
125
151
 
126
- # 3. iterate over the tables.
152
+ # 4. iterate over the tables.
127
153
  for table_name in tables_to_process:
128
154
  try:
129
- # 4. get the table specific configuration.
155
+ table_data = db_structure[table_name]
156
+
157
+ # 5. get the table specific configuration.
130
158
  table_config = table_overrides.get(table_name, {})
131
159
 
132
- # 5. define the schema object name, using the override if it exists.
160
+ # 6. define the schema object name, using the override if it exists.
133
161
  # Priority 1: Explicit override from the 'tables' map.
134
- schema_object_name = table_config.get('schema_object_name')
135
-
136
- # Priority 2: Global schema defined in the source.
137
- if not schema_object_name and global_schema_name:
138
- schema_object_name = global_schema_name
162
+ schema_object_name = table_config.get('schema_name')
139
163
 
140
164
  if not schema_object_name:
141
165
  # Priority 3: Automatic prefix stripping.
@@ -145,19 +169,68 @@ class CompanyContextService:
145
169
  # Priority 4: Default to the table name itself.
146
170
  schema_object_name = table_name
147
171
 
148
- # 6. define the list of columns to exclude, (local vs. global).
172
+ # 7. define the list of columns to exclude, (local vs. global).
149
173
  local_exclude_columns = table_config.get('exclude_columns')
150
174
  final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
151
175
 
152
- # 7. get the table schema definition.
153
- table_definition = db_manager.get_table_schema(
154
- table_name=table_name,
155
- db_schema=db_manager.schema,
156
- schema_object_name=schema_object_name,
157
- exclude_columns=final_exclude_columns
158
- )
159
- sql_context += table_definition
176
+ # 8. Build the table definition dictionary manually using the structure data
177
+ json_dict = {
178
+ "table": table_name,
179
+ "schema": database_schema_name,
180
+ "description": f"The table belongs to the **`{database_schema_name}`** schema.",
181
+ "fields": []
182
+ }
183
+
184
+ if schema_object_name:
185
+ json_dict["description"] += (
186
+ f"The meaning of each field in this table is detailed in the **`{schema_object_name}`** object."
187
+ )
188
+
189
+ for col in table_data.get('columns', []):
190
+ name = col["name"]
191
+ if name in final_exclude_columns:
192
+ continue
193
+
194
+ json_dict["fields"].append({
195
+ "name": name,
196
+ "type": col["type"]
197
+ })
198
+
199
+ # Append as string representation of dict (consistent with previous behavior)
200
+ sql_context += "\n\n" + str(json_dict)
201
+
160
202
  except (KeyError, RuntimeError) as e:
161
203
  logging.warning(f"Could not generate schema for table '{table_name}': {e}")
162
204
 
163
- return sql_context
205
+ if sql_context:
206
+ sql_context = "These are the SQL databases you can query using the **`iat_sql_service`**: \n" + sql_context
207
+ return sql_context
208
+
209
+ def _get_yaml_schema_context(self, company_short_name: str) -> str:
210
+ # Get context from .yaml schema files using the repository
211
+ yaml_schema_context = ''
212
+
213
+ try:
214
+ # 1. List yaml files in the schema "folder"
215
+ schema_files = self.asset_repo.list_files(company_short_name, AssetType.SCHEMA, extension='.yaml')
216
+
217
+ for filename in schema_files:
218
+ try:
219
+ # 2. Read content
220
+ content = self.asset_repo.read_text(company_short_name, AssetType.SCHEMA, filename)
221
+
222
+ # 3. Parse YAML content into a dict
223
+ schema_dict = self.utility.load_yaml_from_string(content)
224
+
225
+ # 4. Generate markdown description from the dict
226
+ if schema_dict:
227
+ # We use generate_schema_table which accepts a dict directly
228
+ yaml_schema_context += self.utility.generate_schema_table(schema_dict)
229
+
230
+ except Exception as e:
231
+ logging.warning(f"Error processing schema file {filename}: {e}")
232
+
233
+ except Exception as e:
234
+ logging.warning(f"Error listing schema files for {company_short_name}: {e}")
235
+
236
+ return yaml_schema_context