iatoolkit 0.71.4__py3-none-any.whl → 1.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +19 -7
- iatoolkit/base_company.py +1 -71
- iatoolkit/cli_commands.py +9 -21
- iatoolkit/common/exceptions.py +2 -0
- iatoolkit/common/interfaces/__init__.py +0 -0
- iatoolkit/common/interfaces/asset_storage.py +34 -0
- iatoolkit/common/interfaces/database_provider.py +38 -0
- iatoolkit/common/model_registry.py +159 -0
- iatoolkit/common/routes.py +53 -32
- iatoolkit/common/util.py +17 -12
- iatoolkit/company_registry.py +55 -14
- iatoolkit/{iatoolkit.py → core.py} +102 -72
- iatoolkit/infra/{mail_app.py → brevo_mail_app.py} +15 -37
- iatoolkit/infra/llm_providers/__init__.py +0 -0
- iatoolkit/infra/llm_providers/deepseek_adapter.py +278 -0
- iatoolkit/infra/{gemini_adapter.py → llm_providers/gemini_adapter.py} +11 -17
- iatoolkit/infra/{openai_adapter.py → llm_providers/openai_adapter.py} +41 -7
- iatoolkit/infra/llm_proxy.py +235 -134
- iatoolkit/infra/llm_response.py +5 -0
- iatoolkit/locales/en.yaml +134 -4
- iatoolkit/locales/es.yaml +293 -162
- iatoolkit/repositories/database_manager.py +92 -22
- iatoolkit/repositories/document_repo.py +7 -0
- iatoolkit/repositories/filesystem_asset_repository.py +36 -0
- iatoolkit/repositories/llm_query_repo.py +36 -22
- iatoolkit/repositories/models.py +86 -95
- iatoolkit/repositories/profile_repo.py +64 -13
- iatoolkit/repositories/vs_repo.py +31 -28
- iatoolkit/services/auth_service.py +1 -1
- iatoolkit/services/branding_service.py +1 -1
- iatoolkit/services/company_context_service.py +96 -39
- iatoolkit/services/configuration_service.py +329 -67
- iatoolkit/services/dispatcher_service.py +51 -227
- iatoolkit/services/document_service.py +10 -1
- iatoolkit/services/embedding_service.py +9 -6
- iatoolkit/services/excel_service.py +50 -2
- iatoolkit/services/file_processor_service.py +0 -5
- iatoolkit/services/history_manager_service.py +208 -0
- iatoolkit/services/jwt_service.py +1 -1
- iatoolkit/services/knowledge_base_service.py +412 -0
- iatoolkit/services/language_service.py +8 -2
- iatoolkit/services/license_service.py +82 -0
- iatoolkit/{infra/llm_client.py → services/llm_client_service.py} +42 -29
- iatoolkit/services/load_documents_service.py +18 -47
- iatoolkit/services/mail_service.py +171 -25
- iatoolkit/services/profile_service.py +69 -36
- iatoolkit/services/{prompt_manager_service.py → prompt_service.py} +136 -25
- iatoolkit/services/query_service.py +229 -203
- iatoolkit/services/sql_service.py +116 -34
- iatoolkit/services/tool_service.py +246 -0
- iatoolkit/services/user_feedback_service.py +18 -6
- iatoolkit/services/user_session_context_service.py +121 -51
- iatoolkit/static/images/iatoolkit_core.png +0 -0
- iatoolkit/static/images/iatoolkit_logo.png +0 -0
- iatoolkit/static/js/chat_feedback_button.js +1 -1
- iatoolkit/static/js/chat_help_content.js +4 -4
- iatoolkit/static/js/chat_main.js +61 -9
- iatoolkit/static/js/chat_model_selector.js +227 -0
- iatoolkit/static/js/chat_onboarding_button.js +1 -1
- iatoolkit/static/js/chat_reload_button.js +4 -1
- iatoolkit/static/styles/chat_iatoolkit.css +59 -3
- iatoolkit/static/styles/chat_public.css +28 -0
- iatoolkit/static/styles/documents.css +598 -0
- iatoolkit/static/styles/landing_page.css +223 -7
- iatoolkit/static/styles/llm_output.css +34 -1
- iatoolkit/system_prompts/__init__.py +0 -0
- iatoolkit/system_prompts/query_main.prompt +28 -3
- iatoolkit/system_prompts/sql_rules.prompt +47 -12
- iatoolkit/templates/_company_header.html +30 -5
- iatoolkit/templates/_login_widget.html +3 -3
- iatoolkit/templates/base.html +13 -0
- iatoolkit/templates/chat.html +45 -3
- iatoolkit/templates/forgot_password.html +3 -2
- iatoolkit/templates/onboarding_shell.html +1 -2
- iatoolkit/templates/signup.html +3 -0
- iatoolkit/views/base_login_view.py +8 -3
- iatoolkit/views/change_password_view.py +1 -1
- iatoolkit/views/chat_view.py +76 -0
- iatoolkit/views/forgot_password_view.py +9 -4
- iatoolkit/views/history_api_view.py +3 -3
- iatoolkit/views/home_view.py +4 -2
- iatoolkit/views/init_context_api_view.py +1 -1
- iatoolkit/views/llmquery_api_view.py +4 -3
- iatoolkit/views/load_company_configuration_api_view.py +49 -0
- iatoolkit/views/{file_store_api_view.py → load_document_api_view.py} +15 -11
- iatoolkit/views/login_view.py +25 -8
- iatoolkit/views/logout_api_view.py +10 -2
- iatoolkit/views/prompt_api_view.py +1 -1
- iatoolkit/views/rag_api_view.py +216 -0
- iatoolkit/views/root_redirect_view.py +22 -0
- iatoolkit/views/signup_view.py +12 -4
- iatoolkit/views/static_page_view.py +27 -0
- iatoolkit/views/users_api_view.py +33 -0
- iatoolkit/views/verify_user_view.py +1 -1
- iatoolkit-1.4.2.dist-info/METADATA +268 -0
- iatoolkit-1.4.2.dist-info/RECORD +133 -0
- iatoolkit-1.4.2.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
- iatoolkit/repositories/tasks_repo.py +0 -52
- iatoolkit/services/history_service.py +0 -37
- iatoolkit/services/search_service.py +0 -55
- iatoolkit/services/tasks_service.py +0 -188
- iatoolkit/templates/about.html +0 -13
- iatoolkit/templates/index.html +0 -145
- iatoolkit/templates/login_simulation.html +0 -45
- iatoolkit/views/external_login_view.py +0 -73
- iatoolkit/views/index_view.py +0 -14
- iatoolkit/views/login_simulation_view.py +0 -93
- iatoolkit/views/tasks_api_view.py +0 -72
- iatoolkit/views/tasks_review_api_view.py +0 -55
- iatoolkit-0.71.4.dist-info/METADATA +0 -276
- iatoolkit-0.71.4.dist-info/RECORD +0 -122
- {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/WHEEL +0 -0
- {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/licenses/LICENSE +0 -0
- {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/top_level.txt +0 -0
|
@@ -4,13 +4,9 @@
|
|
|
4
4
|
# IAToolkit is open source software.
|
|
5
5
|
|
|
6
6
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
7
|
-
from iatoolkit.services.
|
|
8
|
-
from iatoolkit.services.sql_service import SqlService
|
|
7
|
+
from iatoolkit.services.prompt_service import PromptService
|
|
9
8
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
10
9
|
from iatoolkit.services.configuration_service import ConfigurationService
|
|
11
|
-
from iatoolkit.repositories.models import Company, Function
|
|
12
|
-
from iatoolkit.services.excel_service import ExcelService
|
|
13
|
-
from iatoolkit.services.mail_service import MailService
|
|
14
10
|
from iatoolkit.common.util import Utility
|
|
15
11
|
from injector import inject
|
|
16
12
|
import logging
|
|
@@ -23,28 +19,25 @@ class Dispatcher:
|
|
|
23
19
|
config_service: ConfigurationService,
|
|
24
20
|
prompt_service: PromptService,
|
|
25
21
|
llmquery_repo: LLMQueryRepo,
|
|
26
|
-
util: Utility,
|
|
27
|
-
sql_service: SqlService,
|
|
28
|
-
excel_service: ExcelService,
|
|
29
|
-
mail_service: MailService):
|
|
22
|
+
util: Utility,):
|
|
30
23
|
self.config_service = config_service
|
|
31
24
|
self.prompt_service = prompt_service
|
|
32
25
|
self.llmquery_repo = llmquery_repo
|
|
33
26
|
self.util = util
|
|
34
|
-
self.sql_service = sql_service
|
|
35
|
-
self.excel_service = excel_service
|
|
36
|
-
self.mail_service = mail_service
|
|
37
|
-
self.system_functions = _FUNCTION_LIST
|
|
38
|
-
self.system_prompts = _SYSTEM_PROMPT
|
|
39
27
|
|
|
28
|
+
self._tool_service = None
|
|
40
29
|
self._company_registry = None
|
|
41
30
|
self._company_instances = None
|
|
42
31
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def tool_service(self):
|
|
35
|
+
"""Lazy-loads and returns the ToolService instance to avoid circular imports."""
|
|
36
|
+
if self._tool_service is None:
|
|
37
|
+
from iatoolkit import current_iatoolkit
|
|
38
|
+
from iatoolkit.services.tool_service import ToolService
|
|
39
|
+
self._tool_service = current_iatoolkit().get_injector().get(ToolService)
|
|
40
|
+
return self._tool_service
|
|
48
41
|
|
|
49
42
|
@property
|
|
50
43
|
def company_registry(self):
|
|
@@ -65,87 +58,67 @@ class Dispatcher:
|
|
|
65
58
|
# initialize the system functions and prompts
|
|
66
59
|
self.setup_iatoolkit_system()
|
|
67
60
|
|
|
68
|
-
|
|
69
|
-
for
|
|
61
|
+
# Loads the configuration of every company: company.yaml file
|
|
62
|
+
for company_short_name, company_instance in self.company_instances.items():
|
|
70
63
|
try:
|
|
71
64
|
# read company configuration from company.yaml
|
|
72
|
-
self.config_service.load_configuration(
|
|
65
|
+
config, errors = self.config_service.load_configuration(company_short_name)
|
|
73
66
|
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
'''
|
|
68
|
+
if errors:
|
|
69
|
+
raise IAToolkitException(
|
|
70
|
+
IAToolkitException.ErrorType.CONFIG_ERROR,
|
|
71
|
+
'company.yaml validation errors'
|
|
72
|
+
)
|
|
73
|
+
'''
|
|
74
|
+
|
|
75
|
+
# complement the instance self data
|
|
76
|
+
company_instance.company_short_name = company_short_name
|
|
77
|
+
company_instance.company = config.get('company')
|
|
76
78
|
|
|
77
79
|
except Exception as e:
|
|
78
|
-
logging.error(f"❌ Failed to register configuration for '{
|
|
79
|
-
|
|
80
|
+
logging.error(f"❌ Failed to register configuration for '{company_short_name}': {e}")
|
|
81
|
+
raise e
|
|
80
82
|
|
|
81
83
|
return True
|
|
82
84
|
|
|
83
|
-
def _register_company_databases(self, company_name: str):
|
|
84
|
-
"""
|
|
85
|
-
Reads the data_sources config for a company and registers each
|
|
86
|
-
database with the central SqlService.
|
|
87
|
-
"""
|
|
88
|
-
logging.info(f" -> Registering databases for '{company_name}'...")
|
|
89
|
-
data_sources_config = self.config_service.get_configuration(company_name, 'data_sources')
|
|
90
|
-
|
|
91
|
-
if not data_sources_config or not data_sources_config.get('sql'):
|
|
92
|
-
return
|
|
93
|
-
|
|
94
|
-
for db_config in data_sources_config['sql']:
|
|
95
|
-
db_name = db_config.get('database')
|
|
96
|
-
db_env_var = db_config.get('connection_string_env')
|
|
97
|
-
|
|
98
|
-
# resolve the URI connection string from the environment variable
|
|
99
|
-
db_uri = os.getenv(db_env_var) if db_env_var else None
|
|
100
|
-
if not db_uri:
|
|
101
|
-
logging.error(
|
|
102
|
-
f"-> Skipping database registration for '{company_name}' due to missing 'database' name or invalid connection URI.")
|
|
103
|
-
return
|
|
104
|
-
|
|
105
|
-
self.sql_service.register_database(db_name, db_uri)
|
|
106
|
-
|
|
107
85
|
def setup_iatoolkit_system(self):
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
self.
|
|
111
|
-
Function(
|
|
112
|
-
company_id=None,
|
|
113
|
-
system_function=True,
|
|
114
|
-
name=function['function_name'],
|
|
115
|
-
description= function['description'],
|
|
116
|
-
parameters=function['parameters']
|
|
117
|
-
)
|
|
118
|
-
)
|
|
86
|
+
try:
|
|
87
|
+
# system tools registration
|
|
88
|
+
self.tool_service.register_system_tools()
|
|
119
89
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
order=1,
|
|
127
|
-
is_system_prompt=True,
|
|
128
|
-
)
|
|
129
|
-
i += 1
|
|
90
|
+
# system prompts registration
|
|
91
|
+
self.prompt_service.register_system_prompts()
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
self.llmquery_repo.rollback()
|
|
95
|
+
raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))
|
|
130
96
|
|
|
131
97
|
|
|
132
|
-
def dispatch(self, company_short_name: str,
|
|
98
|
+
def dispatch(self, company_short_name: str, function_name: str, **kwargs) -> dict:
|
|
133
99
|
company_key = company_short_name.lower()
|
|
134
100
|
|
|
135
101
|
if company_key not in self.company_instances:
|
|
136
102
|
available_companies = list(self.company_instances.keys())
|
|
137
103
|
raise IAToolkitException(
|
|
138
104
|
IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
139
|
-
f"
|
|
105
|
+
f"Company '{company_short_name}' not configured. available companies: {available_companies}"
|
|
140
106
|
)
|
|
141
107
|
|
|
142
|
-
# check if action is a system function
|
|
143
|
-
if
|
|
144
|
-
|
|
108
|
+
# check if action is a system function using ToolService
|
|
109
|
+
if self.tool_service.is_system_tool(function_name):
|
|
110
|
+
# this is the system function to be executed.
|
|
111
|
+
handler = self.tool_service.get_system_handler(function_name)
|
|
112
|
+
logging.info(
|
|
113
|
+
f"Calling system handler [{function_name}] "
|
|
114
|
+
f"with company_short_name={company_short_name} "
|
|
115
|
+
f"and kwargs={kwargs}"
|
|
116
|
+
)
|
|
117
|
+
return handler(company_short_name, **kwargs)
|
|
145
118
|
|
|
146
119
|
company_instance = self.company_instances[company_short_name]
|
|
147
120
|
try:
|
|
148
|
-
return company_instance.handle_request(
|
|
121
|
+
return company_instance.handle_request(function_name, **kwargs)
|
|
149
122
|
except IAToolkitException as e:
|
|
150
123
|
# Si ya es una IAToolkitException, la relanzamos para preservar el tipo de error original.
|
|
151
124
|
raise e
|
|
@@ -153,158 +126,9 @@ class Dispatcher:
|
|
|
153
126
|
except Exception as e:
|
|
154
127
|
logging.exception(e)
|
|
155
128
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
156
|
-
f"Error
|
|
129
|
+
f"Error in function call '{function_name}': {str(e)}") from e
|
|
157
130
|
|
|
158
|
-
def get_company_services(self, company: Company) -> list[dict]:
|
|
159
|
-
# create the syntax with openai response syntax, for the company function list
|
|
160
|
-
tools = []
|
|
161
|
-
functions = self.llmquery_repo.get_company_functions(company)
|
|
162
|
-
|
|
163
|
-
for function in functions:
|
|
164
|
-
# make sure is always on
|
|
165
|
-
function.parameters["additionalProperties"] = False
|
|
166
|
-
|
|
167
|
-
ai_tool = {
|
|
168
|
-
"type": "function",
|
|
169
|
-
"name": function.name,
|
|
170
|
-
"description": function.description,
|
|
171
|
-
"parameters": function.parameters,
|
|
172
|
-
"strict": True
|
|
173
|
-
}
|
|
174
|
-
tools.append(ai_tool)
|
|
175
|
-
return tools
|
|
176
|
-
|
|
177
|
-
def get_user_info(self, company_name: str, user_identifier: str) -> dict:
|
|
178
|
-
if company_name not in self.company_instances:
|
|
179
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
180
|
-
f"company not configured: {company_name}")
|
|
181
|
-
|
|
182
|
-
# source 2: external company user
|
|
183
|
-
company_instance = self.company_instances[company_name]
|
|
184
|
-
try:
|
|
185
|
-
external_user_profile = company_instance.get_user_info(user_identifier)
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logging.exception(e)
|
|
188
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
189
|
-
f"Error in get_user_info: {company_name}: {str(e)}") from e
|
|
190
|
-
|
|
191
|
-
return external_user_profile
|
|
192
131
|
|
|
193
132
|
def get_company_instance(self, company_name: str):
|
|
194
133
|
"""Returns the instance for a given company name."""
|
|
195
134
|
return self.company_instances.get(company_name)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# iatoolkit system prompts
|
|
199
|
-
_SYSTEM_PROMPT = [
|
|
200
|
-
{'name': 'query_main', 'description':'iatoolkit main prompt'},
|
|
201
|
-
{'name': 'format_styles', 'description':'output format styles'},
|
|
202
|
-
{'name': 'sql_rules', 'description':'instructions for SQL queries'}
|
|
203
|
-
]
|
|
204
|
-
|
|
205
|
-
# iatoolkit built-in functions (Tools)
|
|
206
|
-
_FUNCTION_LIST = [
|
|
207
|
-
{
|
|
208
|
-
"function_name": "iat_sql_query",
|
|
209
|
-
"description": "Servicio SQL de IAToolkit: debes utilizar este servicio para todas las consultas a base de datos.",
|
|
210
|
-
"parameters": {
|
|
211
|
-
"type": "object",
|
|
212
|
-
"properties": {
|
|
213
|
-
"database": {
|
|
214
|
-
"type": "string",
|
|
215
|
-
"description": "nombre de la base de datos a consultar: `database_name`"
|
|
216
|
-
},
|
|
217
|
-
"query": {
|
|
218
|
-
"type": "string",
|
|
219
|
-
"description": "string con la consulta en sql"
|
|
220
|
-
},
|
|
221
|
-
},
|
|
222
|
-
"required": ["database", "query"]
|
|
223
|
-
}
|
|
224
|
-
},
|
|
225
|
-
{
|
|
226
|
-
"function_name": "iat_generate_excel",
|
|
227
|
-
"description": "Generador de Excel."
|
|
228
|
-
"Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
|
|
229
|
-
"Cada diccionario representa una fila del archivo. "
|
|
230
|
-
"el archivo se guarda en directorio de descargas."
|
|
231
|
-
"retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
|
|
232
|
-
"content_type y download_link",
|
|
233
|
-
"parameters": {
|
|
234
|
-
"type": "object",
|
|
235
|
-
"properties": {
|
|
236
|
-
"filename": {
|
|
237
|
-
"type": "string",
|
|
238
|
-
"description": "Nombre del archivo de salida (ejemplo: 'reporte.xlsx')",
|
|
239
|
-
"pattern": "^.+\\.xlsx?$"
|
|
240
|
-
},
|
|
241
|
-
"sheet_name": {
|
|
242
|
-
"type": "string",
|
|
243
|
-
"description": "Nombre de la hoja dentro del Excel",
|
|
244
|
-
"minLength": 1
|
|
245
|
-
},
|
|
246
|
-
"data": {
|
|
247
|
-
"type": "array",
|
|
248
|
-
"description": "Lista de diccionarios. Cada diccionario representa una fila.",
|
|
249
|
-
"minItems": 1,
|
|
250
|
-
"items": {
|
|
251
|
-
"type": "object",
|
|
252
|
-
"properties": {},
|
|
253
|
-
"additionalProperties": {
|
|
254
|
-
"anyOf": [
|
|
255
|
-
{"type": "string"},
|
|
256
|
-
{"type": "number"},
|
|
257
|
-
{"type": "boolean"},
|
|
258
|
-
{"type": "null"},
|
|
259
|
-
{
|
|
260
|
-
"type": "string",
|
|
261
|
-
"format": "date"
|
|
262
|
-
}
|
|
263
|
-
]
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
},
|
|
268
|
-
"required": ["filename", "sheet_name", "data"]
|
|
269
|
-
}
|
|
270
|
-
},
|
|
271
|
-
{
|
|
272
|
-
'function_name': "iat_send_email",
|
|
273
|
-
'description': "iatoolkit mail system. "
|
|
274
|
-
"envia mails cuando un usuario lo solicita."
|
|
275
|
-
"Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
|
|
276
|
-
'parameters': {
|
|
277
|
-
"type": "object",
|
|
278
|
-
"properties": {
|
|
279
|
-
"from_email": {"type": "string","description": "dirección de correo electrónico que esta enviando el email."},
|
|
280
|
-
"recipient": {"type": "string", "description": "email del destinatario"},
|
|
281
|
-
"subject": {"type": "string", "description": "asunto del email"},
|
|
282
|
-
"body": {"type": "string", "description": "HTML del email"},
|
|
283
|
-
"attachments": {
|
|
284
|
-
"type": "array",
|
|
285
|
-
"description": "Lista de archivos adjuntos codificados en base64",
|
|
286
|
-
"items": {
|
|
287
|
-
"type": "object",
|
|
288
|
-
"properties": {
|
|
289
|
-
"filename": {
|
|
290
|
-
"type": "string",
|
|
291
|
-
"description": "Nombre del archivo con su extensión (ej. informe.pdf)"
|
|
292
|
-
},
|
|
293
|
-
"content": {
|
|
294
|
-
"type": "string",
|
|
295
|
-
"description": "Contenido del archivo en b64."
|
|
296
|
-
},
|
|
297
|
-
"attachment_token": {
|
|
298
|
-
"type": "string",
|
|
299
|
-
"description": "token para descargar el archivo."
|
|
300
|
-
}
|
|
301
|
-
},
|
|
302
|
-
"required": ["filename", "content", "attachment_token"],
|
|
303
|
-
"additionalProperties": False
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
},
|
|
307
|
-
"required": ["from_email","recipient", "subject", "body", "attachments"]
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
]
|
|
@@ -12,10 +12,15 @@ import pytesseract
|
|
|
12
12
|
from injector import inject
|
|
13
13
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
14
14
|
from iatoolkit.services.i18n_service import I18nService
|
|
15
|
+
from iatoolkit.services.excel_service import ExcelService
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class DocumentService:
|
|
17
19
|
@inject
|
|
18
|
-
def __init__(self,
|
|
20
|
+
def __init__(self,
|
|
21
|
+
excel_service: ExcelService,
|
|
22
|
+
i18n_service: I18nService):
|
|
23
|
+
self.excel_service = excel_service
|
|
19
24
|
self.i18n_service = i18n_service
|
|
20
25
|
|
|
21
26
|
# max number of pages to load
|
|
@@ -40,6 +45,10 @@ class DocumentService:
|
|
|
40
45
|
return self.read_scanned_pdf(file_content)
|
|
41
46
|
else:
|
|
42
47
|
return self.read_pdf(file_content)
|
|
48
|
+
elif filename.lower().endswith(('.xlsx', '.xls')):
|
|
49
|
+
return self.excel_service.read_excel(file_content)
|
|
50
|
+
elif filename.lower().endswith('.csv'):
|
|
51
|
+
return self.excel_service.read_csv(file_content)
|
|
43
52
|
else:
|
|
44
53
|
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
45
54
|
"Formato de archivo desconocido")
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
import os
|
|
6
6
|
import base64
|
|
7
7
|
import numpy as np
|
|
8
|
-
from threading import Lock
|
|
9
8
|
from huggingface_hub import InferenceClient
|
|
10
9
|
from openai import OpenAI
|
|
11
10
|
from injector import inject
|
|
@@ -14,12 +13,14 @@ from iatoolkit.services.i18n_service import I18nService
|
|
|
14
13
|
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
15
14
|
import logging
|
|
16
15
|
|
|
16
|
+
|
|
17
17
|
# Wrapper classes to create a common interface for embedding clients
|
|
18
18
|
class EmbeddingClientWrapper:
|
|
19
19
|
"""Abstract base class for embedding client wrappers."""
|
|
20
|
-
def __init__(self, client, model: str):
|
|
20
|
+
def __init__(self, client, model: str, dimensions: int = 1536):
|
|
21
21
|
self.client = client
|
|
22
22
|
self.model = model
|
|
23
|
+
self.dimensions = dimensions
|
|
23
24
|
|
|
24
25
|
def get_embedding(self, text: str) -> list[float]:
|
|
25
26
|
"""Generates and returns an embedding for the given text."""
|
|
@@ -37,7 +38,9 @@ class OpenAIClientWrapper(EmbeddingClientWrapper):
|
|
|
37
38
|
def get_embedding(self, text: str) -> list[float]:
|
|
38
39
|
# The OpenAI API expects the input text to be clean
|
|
39
40
|
text = text.replace("\n", " ")
|
|
40
|
-
response = self.client.embeddings.create(input=[text],
|
|
41
|
+
response = self.client.embeddings.create(input=[text],
|
|
42
|
+
model=self.model,
|
|
43
|
+
dimensions=self.dimensions)
|
|
41
44
|
return response.data[0].embedding
|
|
42
45
|
|
|
43
46
|
# Factory and Service classes
|
|
@@ -68,6 +71,7 @@ class EmbeddingClientFactory:
|
|
|
68
71
|
if not provider:
|
|
69
72
|
raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
|
|
70
73
|
model = embedding_config.get('model')
|
|
74
|
+
dimensions = int(embedding_config.get('dimensions', "1536"))
|
|
71
75
|
|
|
72
76
|
api_key_name = embedding_config.get('api_key_name')
|
|
73
77
|
if not api_key_name:
|
|
@@ -83,12 +87,12 @@ class EmbeddingClientFactory:
|
|
|
83
87
|
if not model:
|
|
84
88
|
model='sentence-transformers/all-MiniLM-L6-v2'
|
|
85
89
|
client = InferenceClient(model=model, token=api_key)
|
|
86
|
-
wrapper = HuggingFaceClientWrapper(client, model)
|
|
90
|
+
wrapper = HuggingFaceClientWrapper(client, model, dimensions)
|
|
87
91
|
elif provider == 'openai':
|
|
88
92
|
client = OpenAI(api_key=api_key)
|
|
89
93
|
if not model:
|
|
90
94
|
model='text-embedding-ada-002'
|
|
91
|
-
wrapper = OpenAIClientWrapper(client, model)
|
|
95
|
+
wrapper = OpenAIClientWrapper(client, model, dimensions)
|
|
92
96
|
else:
|
|
93
97
|
raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
|
|
94
98
|
|
|
@@ -126,7 +130,6 @@ class EmbeddingService:
|
|
|
126
130
|
|
|
127
131
|
# 2. Use the wrapper's common interface to get the embedding
|
|
128
132
|
embedding = client_wrapper.get_embedding(text)
|
|
129
|
-
|
|
130
133
|
# 3. Process the result
|
|
131
134
|
if to_base64:
|
|
132
135
|
return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#
|
|
4
4
|
# IAToolkit is open source software.
|
|
5
5
|
|
|
6
|
+
from flask import current_app, jsonify
|
|
6
7
|
from iatoolkit.common.util import Utility
|
|
7
8
|
import pandas as pd
|
|
8
9
|
from uuid import uuid4
|
|
@@ -11,8 +12,9 @@ from iatoolkit.common.exceptions import IAToolkitException
|
|
|
11
12
|
from iatoolkit.services.i18n_service import I18nService
|
|
12
13
|
from injector import inject
|
|
13
14
|
import os
|
|
15
|
+
import io
|
|
14
16
|
import logging
|
|
15
|
-
|
|
17
|
+
import json
|
|
16
18
|
|
|
17
19
|
EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
18
20
|
|
|
@@ -25,7 +27,53 @@ class ExcelService:
|
|
|
25
27
|
self.util = util
|
|
26
28
|
self.i18n_service = i18n_service
|
|
27
29
|
|
|
28
|
-
def
|
|
30
|
+
def read_excel(self, file_content: bytes) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Reads an Excel file and converts its content to a JSON string.
|
|
33
|
+
- If the Excel file has a single sheet, it returns the JSON of that sheet.
|
|
34
|
+
- If it has multiple sheets, it returns a JSON object with sheet names as keys.
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
# Use a BytesIO object to allow pandas to read the in-memory byte content
|
|
38
|
+
file_like_object = io.BytesIO(file_content)
|
|
39
|
+
|
|
40
|
+
# Read all sheets into a dictionary of DataFrames
|
|
41
|
+
xls = pd.read_excel(file_like_object, sheet_name=None)
|
|
42
|
+
|
|
43
|
+
if len(xls) == 1:
|
|
44
|
+
# If only one sheet, return its JSON representation directly
|
|
45
|
+
sheet_name = list(xls.keys())[0]
|
|
46
|
+
return xls[sheet_name].to_json(orient='records', indent=4)
|
|
47
|
+
else:
|
|
48
|
+
# If multiple sheets, create a dictionary of JSON strings
|
|
49
|
+
sheets_json = {}
|
|
50
|
+
for sheet_name, df in xls.items():
|
|
51
|
+
sheets_json[sheet_name] = df.to_json(orient='records', indent=4)
|
|
52
|
+
return json.dumps(sheets_json, indent=4)
|
|
53
|
+
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
56
|
+
self.i18n_service.t('errors.services.cannot_read_excel')) from e
|
|
57
|
+
|
|
58
|
+
def read_csv(self, file_content: bytes) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Reads a CSV file and converts its content to a JSON string.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
# Use a BytesIO object to allow pandas to read the in-memory byte content
|
|
64
|
+
file_like_object = io.BytesIO(file_content)
|
|
65
|
+
|
|
66
|
+
# Read the CSV into a DataFrame
|
|
67
|
+
df = pd.read_csv(file_like_object)
|
|
68
|
+
|
|
69
|
+
# Return JSON representation
|
|
70
|
+
return df.to_json(orient='records', indent=4)
|
|
71
|
+
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
74
|
+
self.i18n_service.t('errors.services.cannot_read_csv')) from e
|
|
75
|
+
|
|
76
|
+
def excel_generator(self, company_short_name: str, **kwargs) -> str:
|
|
29
77
|
"""
|
|
30
78
|
Genera un Excel a partir de una lista de diccionarios.
|
|
31
79
|
|
|
@@ -74,9 +74,6 @@ class FileProcessor:
|
|
|
74
74
|
if not self._apply_filters(file_name):
|
|
75
75
|
continue
|
|
76
76
|
|
|
77
|
-
if self.config.echo:
|
|
78
|
-
print(f'loading: {file_name}')
|
|
79
|
-
|
|
80
77
|
content = self.connector.get_file_content(file_path)
|
|
81
78
|
|
|
82
79
|
# execute the callback function
|
|
@@ -87,8 +84,6 @@ class FileProcessor:
|
|
|
87
84
|
context=self.config.context)
|
|
88
85
|
self.processed_files += 1
|
|
89
86
|
|
|
90
|
-
logging.info(f"Successfully processed file: {file_path}")
|
|
91
|
-
|
|
92
87
|
except Exception as e:
|
|
93
88
|
logging.error(f"Error processing {file_path}: {e}")
|
|
94
89
|
if not self.config.continue_on_error:
|