iatoolkit 0.66.2__py3-none-any.whl → 0.71.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +2 -6
- iatoolkit/base_company.py +3 -31
- iatoolkit/cli_commands.py +1 -1
- iatoolkit/common/routes.py +5 -1
- iatoolkit/common/session_manager.py +2 -0
- iatoolkit/company_registry.py +1 -2
- iatoolkit/iatoolkit.py +13 -13
- iatoolkit/infra/llm_client.py +8 -12
- iatoolkit/infra/llm_proxy.py +38 -10
- iatoolkit/locales/en.yaml +25 -2
- iatoolkit/locales/es.yaml +27 -4
- iatoolkit/repositories/database_manager.py +8 -3
- iatoolkit/repositories/document_repo.py +1 -1
- iatoolkit/repositories/models.py +6 -8
- iatoolkit/repositories/profile_repo.py +0 -4
- iatoolkit/repositories/vs_repo.py +26 -20
- iatoolkit/services/auth_service.py +2 -2
- iatoolkit/services/branding_service.py +11 -7
- iatoolkit/services/company_context_service.py +155 -0
- iatoolkit/services/configuration_service.py +133 -0
- iatoolkit/services/dispatcher_service.py +75 -70
- iatoolkit/services/document_service.py +5 -2
- iatoolkit/services/embedding_service.py +146 -0
- iatoolkit/services/excel_service.py +15 -11
- iatoolkit/services/file_processor_service.py +4 -12
- iatoolkit/services/history_service.py +7 -7
- iatoolkit/services/i18n_service.py +4 -4
- iatoolkit/services/jwt_service.py +7 -9
- iatoolkit/services/language_service.py +29 -23
- iatoolkit/services/load_documents_service.py +100 -113
- iatoolkit/services/mail_service.py +9 -4
- iatoolkit/services/profile_service.py +10 -7
- iatoolkit/services/prompt_manager_service.py +20 -16
- iatoolkit/services/query_service.py +112 -43
- iatoolkit/services/search_service.py +11 -4
- iatoolkit/services/sql_service.py +57 -25
- iatoolkit/services/user_feedback_service.py +15 -13
- iatoolkit/static/js/chat_history_button.js +3 -5
- iatoolkit/static/js/chat_main.js +2 -17
- iatoolkit/static/js/chat_onboarding_button.js +6 -0
- iatoolkit/static/styles/chat_iatoolkit.css +69 -158
- iatoolkit/static/styles/chat_modal.css +1 -37
- iatoolkit/static/styles/onboarding.css +7 -0
- iatoolkit/system_prompts/query_main.prompt +2 -10
- iatoolkit/templates/change_password.html +1 -1
- iatoolkit/templates/chat.html +12 -4
- iatoolkit/templates/chat_modals.html +4 -0
- iatoolkit/templates/error.html +1 -1
- iatoolkit/templates/login_simulation.html +17 -6
- iatoolkit/templates/onboarding_shell.html +4 -1
- iatoolkit/views/base_login_view.py +7 -8
- iatoolkit/views/change_password_view.py +2 -3
- iatoolkit/views/embedding_api_view.py +65 -0
- iatoolkit/views/external_login_view.py +1 -1
- iatoolkit/views/file_store_api_view.py +1 -1
- iatoolkit/views/forgot_password_view.py +2 -4
- iatoolkit/views/help_content_api_view.py +9 -9
- iatoolkit/views/history_api_view.py +1 -1
- iatoolkit/views/home_view.py +2 -2
- iatoolkit/views/init_context_api_view.py +18 -17
- iatoolkit/views/llmquery_api_view.py +3 -2
- iatoolkit/views/login_simulation_view.py +14 -2
- iatoolkit/views/login_view.py +9 -9
- iatoolkit/views/signup_view.py +2 -4
- iatoolkit/views/verify_user_view.py +2 -4
- {iatoolkit-0.66.2.dist-info → iatoolkit-0.71.2.dist-info}/METADATA +40 -22
- iatoolkit-0.71.2.dist-info/RECORD +122 -0
- iatoolkit-0.71.2.dist-info/licenses/LICENSE +21 -0
- iatoolkit/services/help_content_service.py +0 -30
- iatoolkit/services/onboarding_service.py +0 -43
- iatoolkit-0.66.2.dist-info/RECORD +0 -119
- {iatoolkit-0.66.2.dist-info → iatoolkit-0.71.2.dist-info}/WHEEL +0 -0
- {iatoolkit-0.66.2.dist-info → iatoolkit-0.71.2.dist-info}/top_level.txt +0 -0
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
|
|
6
6
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
7
7
|
from iatoolkit.services.prompt_manager_service import PromptService
|
|
8
|
+
from iatoolkit.services.sql_service import SqlService
|
|
8
9
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
9
|
-
|
|
10
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
10
11
|
from iatoolkit.repositories.models import Company, Function
|
|
11
12
|
from iatoolkit.services.excel_service import ExcelService
|
|
12
13
|
from iatoolkit.services.mail_service import MailService
|
|
@@ -19,14 +20,18 @@ import os
|
|
|
19
20
|
class Dispatcher:
|
|
20
21
|
@inject
|
|
21
22
|
def __init__(self,
|
|
23
|
+
config_service: ConfigurationService,
|
|
22
24
|
prompt_service: PromptService,
|
|
23
25
|
llmquery_repo: LLMQueryRepo,
|
|
24
26
|
util: Utility,
|
|
27
|
+
sql_service: SqlService,
|
|
25
28
|
excel_service: ExcelService,
|
|
26
29
|
mail_service: MailService):
|
|
30
|
+
self.config_service = config_service
|
|
27
31
|
self.prompt_service = prompt_service
|
|
28
32
|
self.llmquery_repo = llmquery_repo
|
|
29
33
|
self.util = util
|
|
34
|
+
self.sql_service = sql_service
|
|
30
35
|
self.excel_service = excel_service
|
|
31
36
|
self.mail_service = mail_service
|
|
32
37
|
self.system_functions = _FUNCTION_LIST
|
|
@@ -38,6 +43,7 @@ class Dispatcher:
|
|
|
38
43
|
self.tool_handlers = {
|
|
39
44
|
"iat_generate_excel": self.excel_service.excel_generator,
|
|
40
45
|
"iat_send_email": self.mail_service.send_mail,
|
|
46
|
+
"iat_sql_query": self.sql_service.exec_sql
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
@property
|
|
@@ -55,17 +61,49 @@ class Dispatcher:
|
|
|
55
61
|
self._company_instances = self.company_registry.get_all_company_instances()
|
|
56
62
|
return self._company_instances
|
|
57
63
|
|
|
58
|
-
def
|
|
64
|
+
def load_company_configs(self):
|
|
59
65
|
# initialize the system functions and prompts
|
|
60
66
|
self.setup_iatoolkit_system()
|
|
61
67
|
|
|
62
|
-
"""
|
|
63
|
-
for
|
|
64
|
-
|
|
65
|
-
|
|
68
|
+
"""Loads the configuration of every company"""
|
|
69
|
+
for company_name, company_instance in self.company_instances.items():
|
|
70
|
+
try:
|
|
71
|
+
# read company configuration from company.yaml
|
|
72
|
+
self.config_service.load_configuration(company_name, company_instance)
|
|
73
|
+
|
|
74
|
+
# register the company databases
|
|
75
|
+
self._register_company_databases(company_name)
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logging.error(f"❌ Failed to register configuration for '{company_name}': {e}")
|
|
79
|
+
continue
|
|
66
80
|
|
|
67
81
|
return True
|
|
68
82
|
|
|
83
|
+
def _register_company_databases(self, company_name: str):
|
|
84
|
+
"""
|
|
85
|
+
Reads the data_sources config for a company and registers each
|
|
86
|
+
database with the central SqlService.
|
|
87
|
+
"""
|
|
88
|
+
logging.info(f" -> Registering databases for '{company_name}'...")
|
|
89
|
+
data_sources_config = self.config_service.get_configuration(company_name, 'data_sources')
|
|
90
|
+
|
|
91
|
+
if not data_sources_config or not data_sources_config.get('sql'):
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
for db_config in data_sources_config['sql']:
|
|
95
|
+
db_name = db_config.get('database')
|
|
96
|
+
db_env_var = db_config.get('connection_string_env')
|
|
97
|
+
|
|
98
|
+
# resolve the URI connection string from the environment variable
|
|
99
|
+
db_uri = os.getenv(db_env_var) if db_env_var else None
|
|
100
|
+
if not db_uri:
|
|
101
|
+
logging.error(
|
|
102
|
+
f"-> Skipping database registration for '{company_name}' due to missing 'database' name or invalid connection URI.")
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
self.sql_service.register_database(db_name, db_uri)
|
|
106
|
+
|
|
69
107
|
def setup_iatoolkit_system(self):
|
|
70
108
|
# create system functions
|
|
71
109
|
for function in self.system_functions:
|
|
@@ -90,25 +128,22 @@ class Dispatcher:
|
|
|
90
128
|
)
|
|
91
129
|
i += 1
|
|
92
130
|
|
|
93
|
-
# register in the database every company class
|
|
94
|
-
for company in self.company_instances.values():
|
|
95
|
-
company.register_company()
|
|
96
131
|
|
|
97
|
-
def dispatch(self,
|
|
98
|
-
company_key =
|
|
132
|
+
def dispatch(self, company_short_name: str, action: str, **kwargs) -> dict:
|
|
133
|
+
company_key = company_short_name.lower()
|
|
99
134
|
|
|
100
135
|
if company_key not in self.company_instances:
|
|
101
136
|
available_companies = list(self.company_instances.keys())
|
|
102
137
|
raise IAToolkitException(
|
|
103
138
|
IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
104
|
-
f"Empresa '{
|
|
139
|
+
f"Empresa '{company_short_name}' no configurada. Empresas disponibles: {available_companies}"
|
|
105
140
|
)
|
|
106
141
|
|
|
107
142
|
# check if action is a system function
|
|
108
143
|
if action in self.tool_handlers:
|
|
109
144
|
return self.tool_handlers[action](**kwargs)
|
|
110
145
|
|
|
111
|
-
company_instance = self.company_instances[
|
|
146
|
+
company_instance = self.company_instances[company_short_name]
|
|
112
147
|
try:
|
|
113
148
|
return company_instance.handle_request(action, **kwargs)
|
|
114
149
|
except IAToolkitException as e:
|
|
@@ -120,37 +155,6 @@ class Dispatcher:
|
|
|
120
155
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
121
156
|
f"Error en function call '{action}': {str(e)}") from e
|
|
122
157
|
|
|
123
|
-
def get_company_context(self, company_name: str, **kwargs) -> str:
|
|
124
|
-
if company_name not in self.company_instances:
|
|
125
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
126
|
-
f"Empresa no configurada: {company_name}")
|
|
127
|
-
|
|
128
|
-
company_context = ''
|
|
129
|
-
|
|
130
|
-
# read the company context from this list of markdown files,
|
|
131
|
-
# company brief, credits, operation description, etc.
|
|
132
|
-
context_dir = os.path.join(os.getcwd(), f'companies/{company_name}/context')
|
|
133
|
-
context_files = self.util.get_files_by_extension(context_dir, '.md', return_extension=True)
|
|
134
|
-
for file in context_files:
|
|
135
|
-
filepath = os.path.join(context_dir, file)
|
|
136
|
-
company_context += self.util.load_markdown_context(filepath)
|
|
137
|
-
|
|
138
|
-
# add the schemas for every table or function call responses
|
|
139
|
-
schema_dir = os.path.join(os.getcwd(), f'companies/{company_name}/schema')
|
|
140
|
-
schema_files = self.util.get_files_by_extension(schema_dir, '.yaml', return_extension=True)
|
|
141
|
-
for file in schema_files:
|
|
142
|
-
schema_name = file.split('_')[0]
|
|
143
|
-
filepath = os.path.join(schema_dir, file)
|
|
144
|
-
company_context += self.util.generate_context_for_schema(schema_name, filepath)
|
|
145
|
-
|
|
146
|
-
company_instance = self.company_instances[company_name]
|
|
147
|
-
try:
|
|
148
|
-
return company_context + company_instance.get_company_context(**kwargs)
|
|
149
|
-
except Exception as e:
|
|
150
|
-
logging.exception(e)
|
|
151
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
152
|
-
f"Error en get_company_context de {company_name}: {str(e)}") from e
|
|
153
|
-
|
|
154
158
|
def get_company_services(self, company: Company) -> list[dict]:
|
|
155
159
|
# create the syntax with openai response syntax, for the company function list
|
|
156
160
|
tools = []
|
|
@@ -173,7 +177,7 @@ class Dispatcher:
|
|
|
173
177
|
def get_user_info(self, company_name: str, user_identifier: str) -> dict:
|
|
174
178
|
if company_name not in self.company_instances:
|
|
175
179
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
176
|
-
f"
|
|
180
|
+
f"company not configured: {company_name}")
|
|
177
181
|
|
|
178
182
|
# source 2: external company user
|
|
179
183
|
company_instance = self.company_instances[company_name]
|
|
@@ -182,48 +186,50 @@ class Dispatcher:
|
|
|
182
186
|
except Exception as e:
|
|
183
187
|
logging.exception(e)
|
|
184
188
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
185
|
-
f"Error
|
|
189
|
+
f"Error in get_user_info: {company_name}: {str(e)}") from e
|
|
186
190
|
|
|
187
191
|
return external_user_profile
|
|
188
192
|
|
|
189
|
-
def get_metadata_from_filename(self, company_name: str, filename: str) -> dict:
|
|
190
|
-
if company_name not in self.company_instances:
|
|
191
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
192
|
-
f"Empresa no configurada: {company_name}")
|
|
193
|
-
|
|
194
|
-
company_instance = self.company_instances[company_name]
|
|
195
|
-
try:
|
|
196
|
-
return company_instance.get_metadata_from_filename(filename)
|
|
197
|
-
except Exception as e:
|
|
198
|
-
logging.exception(e)
|
|
199
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
200
|
-
f"Error en get_metadata_from_filename de {company_name}: {str(e)}") from e
|
|
201
|
-
|
|
202
193
|
def get_company_instance(self, company_name: str):
|
|
203
194
|
"""Returns the instance for a given company name."""
|
|
204
195
|
return self.company_instances.get(company_name)
|
|
205
196
|
|
|
206
197
|
|
|
207
|
-
|
|
208
198
|
# iatoolkit system prompts
|
|
209
199
|
_SYSTEM_PROMPT = [
|
|
210
|
-
{'name': 'query_main', 'description':'main prompt
|
|
211
|
-
{'name': 'format_styles', 'description':'
|
|
212
|
-
{'name': 'sql_rules', 'description':'
|
|
200
|
+
{'name': 'query_main', 'description':'iatoolkit main prompt'},
|
|
201
|
+
{'name': 'format_styles', 'description':'output format styles'},
|
|
202
|
+
{'name': 'sql_rules', 'description':'instructions for SQL queries'}
|
|
213
203
|
]
|
|
214
204
|
|
|
215
|
-
|
|
216
|
-
# iatoolkit function calls
|
|
205
|
+
# iatoolkit built-in functions (Tools)
|
|
217
206
|
_FUNCTION_LIST = [
|
|
218
207
|
{
|
|
219
|
-
"
|
|
208
|
+
"function_name": "iat_sql_query",
|
|
209
|
+
"description": "Servicio SQL de IAToolkit: debes utilizar este servicio para todas las consultas a base de datos.",
|
|
210
|
+
"parameters": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"properties": {
|
|
213
|
+
"database": {
|
|
214
|
+
"type": "string",
|
|
215
|
+
"description": "nombre de la base de datos a consultar: `database_name`"
|
|
216
|
+
},
|
|
217
|
+
"query": {
|
|
218
|
+
"type": "string",
|
|
219
|
+
"description": "string con la consulta en sql"
|
|
220
|
+
},
|
|
221
|
+
},
|
|
222
|
+
"required": ["database", "query"]
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"function_name": "iat_generate_excel",
|
|
220
227
|
"description": "Generador de Excel."
|
|
221
228
|
"Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
|
|
222
229
|
"Cada diccionario representa una fila del archivo. "
|
|
223
230
|
"el archivo se guarda en directorio de descargas."
|
|
224
231
|
"retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
|
|
225
232
|
"content_type y download_link",
|
|
226
|
-
"function_name": "iat_generate_excel",
|
|
227
233
|
"parameters": {
|
|
228
234
|
"type": "object",
|
|
229
235
|
"properties": {
|
|
@@ -263,11 +269,10 @@ _FUNCTION_LIST = [
|
|
|
263
269
|
}
|
|
264
270
|
},
|
|
265
271
|
{
|
|
266
|
-
'
|
|
267
|
-
|
|
272
|
+
'function_name': "iat_send_email",
|
|
273
|
+
'description': "iatoolkit mail system. "
|
|
268
274
|
"envia mails cuando un usuario lo solicita."
|
|
269
275
|
"Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
|
|
270
|
-
'function_name': "iat_send_email",
|
|
271
276
|
'parameters': {
|
|
272
277
|
"type": "object",
|
|
273
278
|
"properties": {
|
|
@@ -11,10 +11,13 @@ import os
|
|
|
11
11
|
import pytesseract
|
|
12
12
|
from injector import inject
|
|
13
13
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
14
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
14
15
|
|
|
15
16
|
class DocumentService:
|
|
16
17
|
@inject
|
|
17
|
-
def __init__(self):
|
|
18
|
+
def __init__(self, i18n_service: I18nService):
|
|
19
|
+
self.i18n_service = i18n_service
|
|
20
|
+
|
|
18
21
|
# max number of pages to load
|
|
19
22
|
self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "200"))
|
|
20
23
|
|
|
@@ -29,7 +32,7 @@ class DocumentService:
|
|
|
29
32
|
file_content = file_content.decode('utf-8')
|
|
30
33
|
except UnicodeDecodeError:
|
|
31
34
|
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
32
|
-
|
|
35
|
+
self.i18n_service.t('errors.services.no_text_file'))
|
|
33
36
|
|
|
34
37
|
return file_content
|
|
35
38
|
elif filename.lower().endswith('.pdf'):
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# iatoolkit/services/embedding_service.py
|
|
2
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
3
|
+
# Product: IAToolkit
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import base64
|
|
7
|
+
import numpy as np
|
|
8
|
+
from threading import Lock
|
|
9
|
+
from huggingface_hub import InferenceClient
|
|
10
|
+
from openai import OpenAI
|
|
11
|
+
from injector import inject
|
|
12
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
13
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
14
|
+
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
# Wrapper classes to create a common interface for embedding clients
|
|
18
|
+
class EmbeddingClientWrapper:
|
|
19
|
+
"""Abstract base class for embedding client wrappers."""
|
|
20
|
+
def __init__(self, client, model: str):
|
|
21
|
+
self.client = client
|
|
22
|
+
self.model = model
|
|
23
|
+
|
|
24
|
+
def get_embedding(self, text: str) -> list[float]:
|
|
25
|
+
"""Generates and returns an embedding for the given text."""
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
|
|
28
|
+
class HuggingFaceClientWrapper(EmbeddingClientWrapper):
|
|
29
|
+
def get_embedding(self, text: str) -> list[float]:
|
|
30
|
+
embedding = self.client.feature_extraction(text)
|
|
31
|
+
# Ensure the output is a flat list of floats
|
|
32
|
+
if isinstance(embedding, list) and len(embedding) > 0 and isinstance(embedding[0], list):
|
|
33
|
+
return embedding[0]
|
|
34
|
+
return embedding
|
|
35
|
+
|
|
36
|
+
class OpenAIClientWrapper(EmbeddingClientWrapper):
|
|
37
|
+
def get_embedding(self, text: str) -> list[float]:
|
|
38
|
+
# The OpenAI API expects the input text to be clean
|
|
39
|
+
text = text.replace("\n", " ")
|
|
40
|
+
response = self.client.embeddings.create(input=[text], model=self.model)
|
|
41
|
+
return response.data[0].embedding
|
|
42
|
+
|
|
43
|
+
# Factory and Service classes
|
|
44
|
+
class EmbeddingClientFactory:
|
|
45
|
+
"""
|
|
46
|
+
Manages the lifecycle of embedding client wrappers for different companies.
|
|
47
|
+
It ensures that only one client wrapper is created per company, and it is thread-safe.
|
|
48
|
+
"""
|
|
49
|
+
@inject
|
|
50
|
+
def __init__(self, config_service: ConfigurationService):
|
|
51
|
+
self.config_service = config_service
|
|
52
|
+
self._clients = {} # Cache for storing initialized client wrappers
|
|
53
|
+
self._lock = Lock()
|
|
54
|
+
|
|
55
|
+
def get_client(self, company_short_name: str) -> EmbeddingClientWrapper:
|
|
56
|
+
"""
|
|
57
|
+
Retrieves a configured embedding client wrapper for a specific company.
|
|
58
|
+
If the client is not in the cache, it creates and stores it.
|
|
59
|
+
"""
|
|
60
|
+
if company_short_name in self._clients:
|
|
61
|
+
return self._clients[company_short_name]
|
|
62
|
+
|
|
63
|
+
with self._lock:
|
|
64
|
+
if company_short_name in self._clients:
|
|
65
|
+
return self._clients[company_short_name]
|
|
66
|
+
|
|
67
|
+
embedding_config = self.config_service.get_configuration(company_short_name, 'embedding_provider')
|
|
68
|
+
if not embedding_config:
|
|
69
|
+
raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
|
|
70
|
+
|
|
71
|
+
provider = embedding_config.get('provider')
|
|
72
|
+
if not provider:
|
|
73
|
+
raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
|
|
74
|
+
model = embedding_config.get('model')
|
|
75
|
+
|
|
76
|
+
api_key_name = embedding_config.get('api_key_name')
|
|
77
|
+
api_key = os.getenv(api_key_name)
|
|
78
|
+
if not api_key:
|
|
79
|
+
raise ValueError(f"Environment variable '{api_key_name}' is not set.")
|
|
80
|
+
|
|
81
|
+
# Logic to handle multiple providers
|
|
82
|
+
wrapper = None
|
|
83
|
+
if provider == 'huggingface':
|
|
84
|
+
if not model:
|
|
85
|
+
model='sentence-transformers/all-MiniLM-L6-v2'
|
|
86
|
+
client = InferenceClient(model=model, token=api_key)
|
|
87
|
+
wrapper = HuggingFaceClientWrapper(client, model)
|
|
88
|
+
elif provider == 'openai':
|
|
89
|
+
client = OpenAI(api_key=api_key)
|
|
90
|
+
if not model:
|
|
91
|
+
model='text-embedding-ada-002'
|
|
92
|
+
wrapper = OpenAIClientWrapper(client, model)
|
|
93
|
+
else:
|
|
94
|
+
raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
|
|
95
|
+
|
|
96
|
+
logging.info(f"Embedding client for '{company_short_name}' created with model: {model} via {provider}")
|
|
97
|
+
self._clients[company_short_name] = wrapper
|
|
98
|
+
return wrapper
|
|
99
|
+
|
|
100
|
+
class EmbeddingService:
|
|
101
|
+
"""
|
|
102
|
+
A stateless service for generating text embeddings.
|
|
103
|
+
It relies on the EmbeddingClientFactory to get the correct,
|
|
104
|
+
company-specific embedding client on demand.
|
|
105
|
+
"""
|
|
106
|
+
@inject
|
|
107
|
+
def __init__(self,
|
|
108
|
+
client_factory: EmbeddingClientFactory,
|
|
109
|
+
profile_repo: ProfileRepo,
|
|
110
|
+
i18n_service: I18nService):
|
|
111
|
+
self.client_factory = client_factory
|
|
112
|
+
self.i18n_service = i18n_service
|
|
113
|
+
self.profile_repo = profile_repo
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def embed_text(self, company_short_name: str, text: str, to_base64: bool = False) -> list[float] | str:
|
|
117
|
+
"""
|
|
118
|
+
Generates the embedding for a given text using the appropriate company model.
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
122
|
+
if not company:
|
|
123
|
+
raise ValueError(self.i18n_service.t('errors.company_not_found', company_short_name=company_short_name))
|
|
124
|
+
|
|
125
|
+
# 1. Get the correct client wrapper from the factory
|
|
126
|
+
client_wrapper = self.client_factory.get_client(company_short_name)
|
|
127
|
+
|
|
128
|
+
# 2. Use the wrapper's common interface to get the embedding
|
|
129
|
+
embedding = client_wrapper.get_embedding(text)
|
|
130
|
+
|
|
131
|
+
# 3. Process the result
|
|
132
|
+
if to_base64:
|
|
133
|
+
return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
|
|
134
|
+
|
|
135
|
+
return embedding
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logging.error(f"Error generating embedding for text: {text[:80]}... - {e}")
|
|
138
|
+
raise
|
|
139
|
+
|
|
140
|
+
def get_model_name(self, company_short_name: str) -> str:
|
|
141
|
+
"""
|
|
142
|
+
Helper method to get the model name for a specific company.
|
|
143
|
+
"""
|
|
144
|
+
# Get the wrapper and return the model name from it
|
|
145
|
+
client_wrapper = self.client_factory.get_client(company_short_name)
|
|
146
|
+
return client_wrapper.model
|
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
|
8
8
|
from uuid import uuid4
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
11
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
11
12
|
from injector import inject
|
|
12
13
|
import os
|
|
13
14
|
import logging
|
|
@@ -18,8 +19,11 @@ EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
18
19
|
|
|
19
20
|
class ExcelService:
|
|
20
21
|
@inject
|
|
21
|
-
def __init__(self,
|
|
22
|
+
def __init__(self,
|
|
23
|
+
util: Utility,
|
|
24
|
+
i18n_service: I18nService):
|
|
22
25
|
self.util = util
|
|
26
|
+
self.i18n_service = i18n_service
|
|
23
27
|
|
|
24
28
|
def excel_generator(self, **kwargs) -> str:
|
|
25
29
|
"""
|
|
@@ -42,11 +46,11 @@ class ExcelService:
|
|
|
42
46
|
# get the parameters
|
|
43
47
|
fname = kwargs.get('filename')
|
|
44
48
|
if not fname:
|
|
45
|
-
return '
|
|
49
|
+
return self.i18n_service.t('errors.services.no_output_file')
|
|
46
50
|
|
|
47
51
|
data = kwargs.get('data')
|
|
48
52
|
if not data or not isinstance(data, list):
|
|
49
|
-
return '
|
|
53
|
+
return self.i18n_service.t('errors.services.no_data_for_excel')
|
|
50
54
|
|
|
51
55
|
sheet_name = kwargs.get('sheet_name', 'hoja 1')
|
|
52
56
|
|
|
@@ -58,7 +62,7 @@ class ExcelService:
|
|
|
58
62
|
|
|
59
63
|
# 4. check that download directory is configured
|
|
60
64
|
if 'IATOOLKIT_DOWNLOAD_DIR' not in current_app.config:
|
|
61
|
-
return '
|
|
65
|
+
return self.i18n_service.t('errors.services.no_download_directory')
|
|
62
66
|
|
|
63
67
|
download_dir = current_app.config['IATOOLKIT_DOWNLOAD_DIR']
|
|
64
68
|
filepath = Path(download_dir) / token
|
|
@@ -77,28 +81,28 @@ class ExcelService:
|
|
|
77
81
|
|
|
78
82
|
except Exception as e:
|
|
79
83
|
raise IAToolkitException(IAToolkitException.ErrorType.CALL_ERROR,
|
|
80
|
-
'
|
|
84
|
+
self.i18n_service.t('errors.services.cannot_create_excel')) from e
|
|
81
85
|
|
|
82
86
|
def validate_file_access(self, filename):
|
|
83
87
|
try:
|
|
84
88
|
if not filename:
|
|
85
|
-
return jsonify({"error":
|
|
89
|
+
return jsonify({"error": self.i18n_service.t('errors.services.invalid_filename')})
|
|
86
90
|
# Prevent path traversal attacks
|
|
87
91
|
if '..' in filename or filename.startswith('/') or '\\' in filename:
|
|
88
|
-
return jsonify({"error":
|
|
92
|
+
return jsonify({"error": self.i18n_service.t('errors.services.invalid_filename')})
|
|
89
93
|
|
|
90
94
|
temp_dir = os.path.join(current_app.root_path, 'static', 'temp')
|
|
91
95
|
file_path = os.path.join(temp_dir, filename)
|
|
92
96
|
|
|
93
97
|
if not os.path.exists(file_path):
|
|
94
|
-
return jsonify({"error":
|
|
98
|
+
return jsonify({"error": self.i18n_service.t('errors.services.file_not_exist')})
|
|
95
99
|
|
|
96
100
|
if not os.path.isfile(file_path):
|
|
97
|
-
return jsonify({"error":
|
|
101
|
+
return jsonify({"error": self.i18n_service.t('errors.services.path_is_not_a_file')})
|
|
98
102
|
|
|
99
103
|
return None
|
|
100
104
|
|
|
101
105
|
except Exception as e:
|
|
102
|
-
error_msg = f"
|
|
106
|
+
error_msg = f"File validation error {filename}: {str(e)}"
|
|
103
107
|
logging.error(error_msg)
|
|
104
|
-
return jsonify({"error":
|
|
108
|
+
return jsonify({"error": self.i18n_service.t('errors.services.file_validation_error')})
|
|
@@ -52,27 +52,19 @@ class FileProcessor:
|
|
|
52
52
|
logger: Optional[logging.Logger] = None):
|
|
53
53
|
self.connector = connector
|
|
54
54
|
self.config = config
|
|
55
|
-
self.logger = logger or self._setup_logger()
|
|
56
55
|
self.processed_files = 0
|
|
57
56
|
|
|
58
|
-
def _setup_logger(self):
|
|
59
|
-
logging.basicConfig(
|
|
60
|
-
filename=self.config.log_file,
|
|
61
|
-
level=logging.INFO,
|
|
62
|
-
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
63
|
-
)
|
|
64
|
-
return logging.getLogger(__name__)
|
|
65
57
|
|
|
66
58
|
def process_files(self):
|
|
67
59
|
# Fetches files from the connector, filters them, and processes them.
|
|
68
60
|
try:
|
|
69
61
|
files = self.connector.list_files()
|
|
70
62
|
except Exception as e:
|
|
71
|
-
|
|
63
|
+
logging.error(f"Error fetching files: {e}")
|
|
72
64
|
return False
|
|
73
65
|
|
|
74
66
|
if self.config.echo:
|
|
75
|
-
print(f'
|
|
67
|
+
print(f'loading {len(files)} files')
|
|
76
68
|
|
|
77
69
|
for file_info in files:
|
|
78
70
|
file_path = file_info['path']
|
|
@@ -95,10 +87,10 @@ class FileProcessor:
|
|
|
95
87
|
context=self.config.context)
|
|
96
88
|
self.processed_files += 1
|
|
97
89
|
|
|
98
|
-
|
|
90
|
+
logging.info(f"Successfully processed file: {file_path}")
|
|
99
91
|
|
|
100
92
|
except Exception as e:
|
|
101
|
-
|
|
93
|
+
logging.error(f"Error processing {file_path}: {e}")
|
|
102
94
|
if not self.config.continue_on_error:
|
|
103
95
|
raise e
|
|
104
96
|
|
|
@@ -6,32 +6,32 @@
|
|
|
6
6
|
from injector import inject
|
|
7
7
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
8
8
|
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
9
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class HistoryService:
|
|
12
13
|
@inject
|
|
13
14
|
def __init__(self, llm_query_repo: LLMQueryRepo,
|
|
14
|
-
profile_repo: ProfileRepo
|
|
15
|
+
profile_repo: ProfileRepo,
|
|
16
|
+
i18n_service: I18nService):
|
|
15
17
|
self.llm_query_repo = llm_query_repo
|
|
16
18
|
self.profile_repo = profile_repo
|
|
19
|
+
self.i18n_service = i18n_service
|
|
17
20
|
|
|
18
21
|
def get_history(self,
|
|
19
22
|
company_short_name: str,
|
|
20
23
|
user_identifier: str) -> dict:
|
|
21
24
|
try:
|
|
22
|
-
# validate company
|
|
23
25
|
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
24
26
|
if not company:
|
|
25
|
-
return {
|
|
27
|
+
return {"error": self.i18n_service.t('errors.company_not_found', company_short_name=company_short_name)}
|
|
26
28
|
|
|
27
29
|
history = self.llm_query_repo.get_history(company, user_identifier)
|
|
28
|
-
|
|
29
30
|
if not history:
|
|
30
|
-
return {'message': '
|
|
31
|
+
return {'message': 'empty history', 'history': []}
|
|
31
32
|
|
|
32
33
|
history_list = [query.to_dict() for query in history]
|
|
33
|
-
|
|
34
|
-
return {'message': 'Historial obtenido correctamente', 'history': history_list}
|
|
34
|
+
return {'message': 'history loaded ok', 'history': history_list}
|
|
35
35
|
|
|
36
36
|
except Exception as e:
|
|
37
37
|
return {'error': str(e)}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# iatoolkit/services/i18n_service.py
|
|
2
2
|
import os
|
|
3
3
|
import logging
|
|
4
|
-
from injector import inject
|
|
4
|
+
from injector import inject, singleton
|
|
5
5
|
from iatoolkit.common.util import Utility
|
|
6
6
|
from iatoolkit.services.language_service import LanguageService
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
@singleton
|
|
9
9
|
class I18nService:
|
|
10
10
|
"""
|
|
11
11
|
Servicio centralizado para manejar la internacionalización (i18n).
|
|
@@ -27,7 +27,7 @@ class I18nService:
|
|
|
27
27
|
"""
|
|
28
28
|
locales_dir = os.path.join(os.path.dirname(__file__), '..', 'locales')
|
|
29
29
|
if not os.path.exists(locales_dir):
|
|
30
|
-
logging.error("
|
|
30
|
+
logging.error("Directory 'locales' not found.")
|
|
31
31
|
return
|
|
32
32
|
|
|
33
33
|
for filename in os.listdir(locales_dir):
|
|
@@ -37,7 +37,7 @@ class I18nService:
|
|
|
37
37
|
try:
|
|
38
38
|
self.translations[lang_code] = self.util.load_schema_from_yaml(filepath)
|
|
39
39
|
except Exception as e:
|
|
40
|
-
logging.error(f"
|
|
40
|
+
logging.error(f"Error while loading the translation file {filepath}: {e}")
|
|
41
41
|
|
|
42
42
|
def _get_nested_key(self, lang: str, key: str):
|
|
43
43
|
"""
|
|
@@ -20,8 +20,8 @@ class JWTService:
|
|
|
20
20
|
self.secret_key = app.config['JWT_SECRET_KEY']
|
|
21
21
|
self.algorithm = app.config['JWT_ALGORITHM']
|
|
22
22
|
except KeyError as e:
|
|
23
|
-
logging.error(f"
|
|
24
|
-
raise RuntimeError(f"
|
|
23
|
+
logging.error(f"missing JWT configuration: {e}.")
|
|
24
|
+
raise RuntimeError(f"missing JWT configuration variables: {e}")
|
|
25
25
|
|
|
26
26
|
def generate_chat_jwt(self,
|
|
27
27
|
company_short_name: str,
|
|
@@ -58,25 +58,23 @@ class JWTService:
|
|
|
58
58
|
|
|
59
59
|
# Validaciones adicionales
|
|
60
60
|
if payload.get('type') != 'chat_session':
|
|
61
|
-
logging.warning(f"
|
|
61
|
+
logging.warning(f"Invalid JWT type '{payload.get('type')}'")
|
|
62
62
|
return None
|
|
63
63
|
|
|
64
64
|
# user_identifier debe estar presente
|
|
65
65
|
if not payload.get('user_identifier'):
|
|
66
|
-
logging.warning(f"
|
|
66
|
+
logging.warning(f"missing user_identifier in JWT payload.")
|
|
67
67
|
return None
|
|
68
68
|
|
|
69
69
|
if not payload.get('company_short_name'):
|
|
70
|
-
logging.warning(f"
|
|
70
|
+
logging.warning(f"missing company_short_name in JWT payload.")
|
|
71
71
|
return None
|
|
72
72
|
|
|
73
|
-
logging.debug(
|
|
74
|
-
f"JWT validado exitosamente para company: {payload.get('company_short_name')}, user: {payload.get('external_user_id')}")
|
|
75
73
|
return payload
|
|
76
74
|
|
|
77
75
|
except jwt.InvalidTokenError as e:
|
|
78
|
-
logging.warning(f"
|
|
76
|
+
logging.warning(f"Invalid JWT token:: {e}")
|
|
79
77
|
return None
|
|
80
78
|
except Exception as e:
|
|
81
|
-
logging.error(f"
|
|
79
|
+
logging.error(f"unexpected error during JWT validation: {e}")
|
|
82
80
|
return None
|