iatoolkit 0.11.0__py3-none-any.whl → 0.71.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +2 -6
- iatoolkit/base_company.py +9 -29
- iatoolkit/cli_commands.py +1 -1
- iatoolkit/common/routes.py +96 -52
- iatoolkit/common/session_manager.py +2 -1
- iatoolkit/common/util.py +17 -27
- iatoolkit/company_registry.py +1 -2
- iatoolkit/iatoolkit.py +97 -53
- iatoolkit/infra/llm_client.py +15 -20
- iatoolkit/infra/llm_proxy.py +38 -10
- iatoolkit/infra/openai_adapter.py +1 -1
- iatoolkit/infra/redis_session_manager.py +48 -2
- iatoolkit/locales/en.yaml +167 -0
- iatoolkit/locales/es.yaml +163 -0
- iatoolkit/repositories/database_manager.py +23 -3
- iatoolkit/repositories/document_repo.py +1 -1
- iatoolkit/repositories/models.py +35 -10
- iatoolkit/repositories/profile_repo.py +3 -2
- iatoolkit/repositories/vs_repo.py +26 -20
- iatoolkit/services/auth_service.py +193 -0
- iatoolkit/services/branding_service.py +70 -25
- iatoolkit/services/company_context_service.py +155 -0
- iatoolkit/services/configuration_service.py +133 -0
- iatoolkit/services/dispatcher_service.py +80 -105
- iatoolkit/services/document_service.py +5 -2
- iatoolkit/services/embedding_service.py +146 -0
- iatoolkit/services/excel_service.py +30 -26
- iatoolkit/services/file_processor_service.py +4 -12
- iatoolkit/services/history_service.py +7 -16
- iatoolkit/services/i18n_service.py +104 -0
- iatoolkit/services/jwt_service.py +18 -29
- iatoolkit/services/language_service.py +83 -0
- iatoolkit/services/load_documents_service.py +100 -113
- iatoolkit/services/mail_service.py +9 -4
- iatoolkit/services/profile_service.py +152 -76
- iatoolkit/services/prompt_manager_service.py +20 -16
- iatoolkit/services/query_service.py +208 -96
- iatoolkit/services/search_service.py +11 -4
- iatoolkit/services/sql_service.py +57 -25
- iatoolkit/services/tasks_service.py +1 -1
- iatoolkit/services/user_feedback_service.py +72 -34
- iatoolkit/services/user_session_context_service.py +112 -54
- iatoolkit/static/images/fernando.jpeg +0 -0
- iatoolkit/static/js/chat_feedback_button.js +80 -0
- iatoolkit/static/js/chat_help_content.js +124 -0
- iatoolkit/static/js/chat_history_button.js +110 -0
- iatoolkit/static/js/chat_logout_button.js +36 -0
- iatoolkit/static/js/chat_main.js +135 -222
- iatoolkit/static/js/chat_onboarding_button.js +103 -0
- iatoolkit/static/js/chat_prompt_manager.js +94 -0
- iatoolkit/static/js/chat_reload_button.js +35 -0
- iatoolkit/static/styles/chat_iatoolkit.css +289 -210
- iatoolkit/static/styles/chat_modal.css +63 -77
- iatoolkit/static/styles/chat_public.css +107 -0
- iatoolkit/static/styles/landing_page.css +182 -0
- iatoolkit/static/styles/onboarding.css +176 -0
- iatoolkit/system_prompts/query_main.prompt +5 -22
- iatoolkit/templates/_company_header.html +20 -0
- iatoolkit/templates/_login_widget.html +42 -0
- iatoolkit/templates/base.html +40 -20
- iatoolkit/templates/change_password.html +57 -36
- iatoolkit/templates/chat.html +180 -86
- iatoolkit/templates/chat_modals.html +138 -68
- iatoolkit/templates/error.html +44 -8
- iatoolkit/templates/forgot_password.html +40 -23
- iatoolkit/templates/index.html +145 -0
- iatoolkit/templates/login_simulation.html +45 -0
- iatoolkit/templates/onboarding_shell.html +107 -0
- iatoolkit/templates/signup.html +63 -65
- iatoolkit/views/base_login_view.py +91 -0
- iatoolkit/views/change_password_view.py +56 -31
- iatoolkit/views/embedding_api_view.py +65 -0
- iatoolkit/views/external_login_view.py +61 -28
- iatoolkit/views/{file_store_view.py → file_store_api_view.py} +10 -3
- iatoolkit/views/forgot_password_view.py +27 -21
- iatoolkit/views/help_content_api_view.py +54 -0
- iatoolkit/views/history_api_view.py +56 -0
- iatoolkit/views/home_view.py +50 -23
- iatoolkit/views/index_view.py +14 -0
- iatoolkit/views/init_context_api_view.py +74 -0
- iatoolkit/views/llmquery_api_view.py +58 -0
- iatoolkit/views/login_simulation_view.py +93 -0
- iatoolkit/views/login_view.py +130 -37
- iatoolkit/views/logout_api_view.py +49 -0
- iatoolkit/views/profile_api_view.py +46 -0
- iatoolkit/views/{prompt_view.py → prompt_api_view.py} +10 -10
- iatoolkit/views/signup_view.py +41 -36
- iatoolkit/views/{tasks_view.py → tasks_api_view.py} +10 -36
- iatoolkit/views/tasks_review_api_view.py +55 -0
- iatoolkit/views/user_feedback_api_view.py +60 -0
- iatoolkit/views/verify_user_view.py +34 -29
- {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/METADATA +41 -23
- iatoolkit-0.71.2.dist-info/RECORD +122 -0
- iatoolkit-0.71.2.dist-info/licenses/LICENSE +21 -0
- iatoolkit/common/auth.py +0 -200
- iatoolkit/static/images/arrow_up.png +0 -0
- iatoolkit/static/images/diagrama_iatoolkit.jpg +0 -0
- iatoolkit/static/images/logo_clinica.png +0 -0
- iatoolkit/static/images/logo_iatoolkit.png +0 -0
- iatoolkit/static/images/logo_maxxa.png +0 -0
- iatoolkit/static/images/logo_notaria.png +0 -0
- iatoolkit/static/images/logo_tarjeta.png +0 -0
- iatoolkit/static/images/logo_umayor.png +0 -0
- iatoolkit/static/images/upload.png +0 -0
- iatoolkit/static/js/chat_feedback.js +0 -115
- iatoolkit/static/js/chat_history.js +0 -117
- iatoolkit/static/styles/chat_info.css +0 -53
- iatoolkit/templates/header.html +0 -31
- iatoolkit/templates/home.html +0 -199
- iatoolkit/templates/login.html +0 -43
- iatoolkit/templates/test.html +0 -9
- iatoolkit/views/chat_token_request_view.py +0 -98
- iatoolkit/views/chat_view.py +0 -58
- iatoolkit/views/download_file_view.py +0 -58
- iatoolkit/views/external_chat_login_view.py +0 -95
- iatoolkit/views/history_view.py +0 -57
- iatoolkit/views/llmquery_view.py +0 -65
- iatoolkit/views/tasks_review_view.py +0 -83
- iatoolkit/views/user_feedback_view.py +0 -74
- iatoolkit-0.11.0.dist-info/RECORD +0 -110
- {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/WHEEL +0 -0
- {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/top_level.txt +0 -0
|
@@ -5,12 +5,12 @@
|
|
|
5
5
|
|
|
6
6
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
7
7
|
from iatoolkit.services.prompt_manager_service import PromptService
|
|
8
|
+
from iatoolkit.services.sql_service import SqlService
|
|
8
9
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
9
|
-
|
|
10
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
10
11
|
from iatoolkit.repositories.models import Company, Function
|
|
11
12
|
from iatoolkit.services.excel_service import ExcelService
|
|
12
13
|
from iatoolkit.services.mail_service import MailService
|
|
13
|
-
from iatoolkit.common.session_manager import SessionManager
|
|
14
14
|
from iatoolkit.common.util import Utility
|
|
15
15
|
from injector import inject
|
|
16
16
|
import logging
|
|
@@ -20,14 +20,18 @@ import os
|
|
|
20
20
|
class Dispatcher:
|
|
21
21
|
@inject
|
|
22
22
|
def __init__(self,
|
|
23
|
+
config_service: ConfigurationService,
|
|
23
24
|
prompt_service: PromptService,
|
|
24
25
|
llmquery_repo: LLMQueryRepo,
|
|
25
26
|
util: Utility,
|
|
27
|
+
sql_service: SqlService,
|
|
26
28
|
excel_service: ExcelService,
|
|
27
29
|
mail_service: MailService):
|
|
30
|
+
self.config_service = config_service
|
|
28
31
|
self.prompt_service = prompt_service
|
|
29
32
|
self.llmquery_repo = llmquery_repo
|
|
30
33
|
self.util = util
|
|
34
|
+
self.sql_service = sql_service
|
|
31
35
|
self.excel_service = excel_service
|
|
32
36
|
self.mail_service = mail_service
|
|
33
37
|
self.system_functions = _FUNCTION_LIST
|
|
@@ -39,6 +43,7 @@ class Dispatcher:
|
|
|
39
43
|
self.tool_handlers = {
|
|
40
44
|
"iat_generate_excel": self.excel_service.excel_generator,
|
|
41
45
|
"iat_send_email": self.mail_service.send_mail,
|
|
46
|
+
"iat_sql_query": self.sql_service.exec_sql
|
|
42
47
|
}
|
|
43
48
|
|
|
44
49
|
@property
|
|
@@ -56,17 +61,49 @@ class Dispatcher:
|
|
|
56
61
|
self._company_instances = self.company_registry.get_all_company_instances()
|
|
57
62
|
return self._company_instances
|
|
58
63
|
|
|
59
|
-
def
|
|
64
|
+
def load_company_configs(self):
|
|
60
65
|
# initialize the system functions and prompts
|
|
61
66
|
self.setup_iatoolkit_system()
|
|
62
67
|
|
|
63
|
-
"""
|
|
64
|
-
for
|
|
65
|
-
|
|
66
|
-
|
|
68
|
+
"""Loads the configuration of every company"""
|
|
69
|
+
for company_name, company_instance in self.company_instances.items():
|
|
70
|
+
try:
|
|
71
|
+
# read company configuration from company.yaml
|
|
72
|
+
self.config_service.load_configuration(company_name, company_instance)
|
|
73
|
+
|
|
74
|
+
# register the company databases
|
|
75
|
+
self._register_company_databases(company_name)
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logging.error(f"❌ Failed to register configuration for '{company_name}': {e}")
|
|
79
|
+
continue
|
|
67
80
|
|
|
68
81
|
return True
|
|
69
82
|
|
|
83
|
+
def _register_company_databases(self, company_name: str):
|
|
84
|
+
"""
|
|
85
|
+
Reads the data_sources config for a company and registers each
|
|
86
|
+
database with the central SqlService.
|
|
87
|
+
"""
|
|
88
|
+
logging.info(f" -> Registering databases for '{company_name}'...")
|
|
89
|
+
data_sources_config = self.config_service.get_configuration(company_name, 'data_sources')
|
|
90
|
+
|
|
91
|
+
if not data_sources_config or not data_sources_config.get('sql'):
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
for db_config in data_sources_config['sql']:
|
|
95
|
+
db_name = db_config.get('database')
|
|
96
|
+
db_env_var = db_config.get('connection_string_env')
|
|
97
|
+
|
|
98
|
+
# resolve the URI connection string from the environment variable
|
|
99
|
+
db_uri = os.getenv(db_env_var) if db_env_var else None
|
|
100
|
+
if not db_uri:
|
|
101
|
+
logging.error(
|
|
102
|
+
f"-> Skipping database registration for '{company_name}' due to missing 'database' name or invalid connection URI.")
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
self.sql_service.register_database(db_name, db_uri)
|
|
106
|
+
|
|
70
107
|
def setup_iatoolkit_system(self):
|
|
71
108
|
# create system functions
|
|
72
109
|
for function in self.system_functions:
|
|
@@ -91,25 +128,22 @@ class Dispatcher:
|
|
|
91
128
|
)
|
|
92
129
|
i += 1
|
|
93
130
|
|
|
94
|
-
# register in the database every company class
|
|
95
|
-
for company in self.company_instances.values():
|
|
96
|
-
company.register_company()
|
|
97
131
|
|
|
98
|
-
def dispatch(self,
|
|
99
|
-
company_key =
|
|
132
|
+
def dispatch(self, company_short_name: str, action: str, **kwargs) -> dict:
|
|
133
|
+
company_key = company_short_name.lower()
|
|
100
134
|
|
|
101
135
|
if company_key not in self.company_instances:
|
|
102
136
|
available_companies = list(self.company_instances.keys())
|
|
103
137
|
raise IAToolkitException(
|
|
104
138
|
IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
105
|
-
f"Empresa '{
|
|
139
|
+
f"Empresa '{company_short_name}' no configurada. Empresas disponibles: {available_companies}"
|
|
106
140
|
)
|
|
107
141
|
|
|
108
142
|
# check if action is a system function
|
|
109
143
|
if action in self.tool_handlers:
|
|
110
144
|
return self.tool_handlers[action](**kwargs)
|
|
111
145
|
|
|
112
|
-
company_instance = self.company_instances[
|
|
146
|
+
company_instance = self.company_instances[company_short_name]
|
|
113
147
|
try:
|
|
114
148
|
return company_instance.handle_request(action, **kwargs)
|
|
115
149
|
except IAToolkitException as e:
|
|
@@ -121,37 +155,6 @@ class Dispatcher:
|
|
|
121
155
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
122
156
|
f"Error en function call '{action}': {str(e)}") from e
|
|
123
157
|
|
|
124
|
-
def get_company_context(self, company_name: str, **kwargs) -> str:
|
|
125
|
-
if company_name not in self.company_instances:
|
|
126
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
127
|
-
f"Empresa no configurada: {company_name}")
|
|
128
|
-
|
|
129
|
-
company_context = ''
|
|
130
|
-
|
|
131
|
-
# read the company context from this list of markdown files,
|
|
132
|
-
# company brief, credits, operation description, etc.
|
|
133
|
-
context_dir = os.path.join(os.getcwd(), f'companies/{company_name}/context')
|
|
134
|
-
context_files = self.util.get_files_by_extension(context_dir, '.md', return_extension=True)
|
|
135
|
-
for file in context_files:
|
|
136
|
-
filepath = os.path.join(context_dir, file)
|
|
137
|
-
company_context += self.util.load_markdown_context(filepath)
|
|
138
|
-
|
|
139
|
-
# add the schemas for every table or function call responses
|
|
140
|
-
schema_dir = os.path.join(os.getcwd(), f'companies/{company_name}/schema')
|
|
141
|
-
schema_files = self.util.get_files_by_extension(schema_dir, '.yaml', return_extension=True)
|
|
142
|
-
for file in schema_files:
|
|
143
|
-
schema_name = file.split('_')[0]
|
|
144
|
-
filepath = os.path.join(schema_dir, file)
|
|
145
|
-
company_context += self.util.generate_context_for_schema(schema_name, filepath)
|
|
146
|
-
|
|
147
|
-
company_instance = self.company_instances[company_name]
|
|
148
|
-
try:
|
|
149
|
-
return company_context + company_instance.get_company_context(**kwargs)
|
|
150
|
-
except Exception as e:
|
|
151
|
-
logging.exception(e)
|
|
152
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
153
|
-
f"Error en get_company_context de {company_name}: {str(e)}") from e
|
|
154
|
-
|
|
155
158
|
def get_company_services(self, company: Company) -> list[dict]:
|
|
156
159
|
# create the syntax with openai response syntax, for the company function list
|
|
157
160
|
tools = []
|
|
@@ -171,89 +174,62 @@ class Dispatcher:
|
|
|
171
174
|
tools.append(ai_tool)
|
|
172
175
|
return tools
|
|
173
176
|
|
|
174
|
-
def get_user_info(self, company_name: str, user_identifier: str
|
|
175
|
-
if company_name not in self.company_instances:
|
|
176
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
177
|
-
f"Empresa no configurada: {company_name}")
|
|
178
|
-
|
|
179
|
-
raw_user_data = {}
|
|
180
|
-
if is_local_user:
|
|
181
|
-
# source 1: local user login into IAToolkit
|
|
182
|
-
raw_user_data = SessionManager.get('user', {})
|
|
183
|
-
else:
|
|
184
|
-
# source 2: external company user
|
|
185
|
-
company_instance = self.company_instances[company_name]
|
|
186
|
-
try:
|
|
187
|
-
raw_user_data = company_instance.get_user_info(user_identifier)
|
|
188
|
-
except Exception as e:
|
|
189
|
-
logging.exception(e)
|
|
190
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
191
|
-
f"Error en get_user_info de {company_name}: {str(e)}") from e
|
|
192
|
-
|
|
193
|
-
# always normalize the data for consistent structure
|
|
194
|
-
return self._normalize_user_data(raw_user_data, is_local_user)
|
|
195
|
-
|
|
196
|
-
def _normalize_user_data(self, raw_data: dict, is_local: bool) -> dict:
|
|
197
|
-
"""
|
|
198
|
-
Asegura que los datos del usuario siempre tengan una estructura consistente.
|
|
199
|
-
"""
|
|
200
|
-
# default values
|
|
201
|
-
normalized_user = {
|
|
202
|
-
"id": raw_data.get("id", 0),
|
|
203
|
-
"user_email": raw_data.get("email", ""),
|
|
204
|
-
"user_fullname": raw_data.get("user_fullname", ""),
|
|
205
|
-
"company_id": raw_data.get("company_id", 0),
|
|
206
|
-
"company_name": raw_data.get("company", ""),
|
|
207
|
-
"company_short_name": raw_data.get("company_short_name", ""),
|
|
208
|
-
"is_local": is_local,
|
|
209
|
-
"extras": raw_data.get("extras", {})
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
# get the extras from the raw data, if any
|
|
213
|
-
extras = raw_data.get("extras", {})
|
|
214
|
-
if isinstance(extras, dict):
|
|
215
|
-
normalized_user.update(extras)
|
|
216
|
-
|
|
217
|
-
return normalized_user
|
|
218
|
-
|
|
219
|
-
def get_metadata_from_filename(self, company_name: str, filename: str) -> dict:
|
|
177
|
+
def get_user_info(self, company_name: str, user_identifier: str) -> dict:
|
|
220
178
|
if company_name not in self.company_instances:
|
|
221
179
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
222
|
-
|
|
180
|
+
f"company not configured: {company_name}")
|
|
223
181
|
|
|
182
|
+
# source 2: external company user
|
|
224
183
|
company_instance = self.company_instances[company_name]
|
|
225
184
|
try:
|
|
226
|
-
|
|
185
|
+
external_user_profile = company_instance.get_user_info(user_identifier)
|
|
227
186
|
except Exception as e:
|
|
228
187
|
logging.exception(e)
|
|
229
188
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
230
|
-
|
|
189
|
+
f"Error in get_user_info: {company_name}: {str(e)}") from e
|
|
190
|
+
|
|
191
|
+
return external_user_profile
|
|
231
192
|
|
|
232
193
|
def get_company_instance(self, company_name: str):
|
|
233
194
|
"""Returns the instance for a given company name."""
|
|
234
195
|
return self.company_instances.get(company_name)
|
|
235
196
|
|
|
236
197
|
|
|
237
|
-
|
|
238
198
|
# iatoolkit system prompts
|
|
239
199
|
_SYSTEM_PROMPT = [
|
|
240
|
-
{'name': 'query_main', 'description':'main prompt
|
|
241
|
-
{'name': 'format_styles', 'description':'
|
|
242
|
-
{'name': 'sql_rules', 'description':'
|
|
200
|
+
{'name': 'query_main', 'description':'iatoolkit main prompt'},
|
|
201
|
+
{'name': 'format_styles', 'description':'output format styles'},
|
|
202
|
+
{'name': 'sql_rules', 'description':'instructions for SQL queries'}
|
|
243
203
|
]
|
|
244
204
|
|
|
245
|
-
|
|
246
|
-
# iatoolkit function calls
|
|
205
|
+
# iatoolkit built-in functions (Tools)
|
|
247
206
|
_FUNCTION_LIST = [
|
|
248
207
|
{
|
|
249
|
-
"
|
|
208
|
+
"function_name": "iat_sql_query",
|
|
209
|
+
"description": "Servicio SQL de IAToolkit: debes utilizar este servicio para todas las consultas a base de datos.",
|
|
210
|
+
"parameters": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"properties": {
|
|
213
|
+
"database": {
|
|
214
|
+
"type": "string",
|
|
215
|
+
"description": "nombre de la base de datos a consultar: `database_name`"
|
|
216
|
+
},
|
|
217
|
+
"query": {
|
|
218
|
+
"type": "string",
|
|
219
|
+
"description": "string con la consulta en sql"
|
|
220
|
+
},
|
|
221
|
+
},
|
|
222
|
+
"required": ["database", "query"]
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"function_name": "iat_generate_excel",
|
|
250
227
|
"description": "Generador de Excel."
|
|
251
228
|
"Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
|
|
252
229
|
"Cada diccionario representa una fila del archivo. "
|
|
253
230
|
"el archivo se guarda en directorio de descargas."
|
|
254
231
|
"retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
|
|
255
232
|
"content_type y download_link",
|
|
256
|
-
"function_name": "iat_generate_excel",
|
|
257
233
|
"parameters": {
|
|
258
234
|
"type": "object",
|
|
259
235
|
"properties": {
|
|
@@ -293,11 +269,10 @@ _FUNCTION_LIST = [
|
|
|
293
269
|
}
|
|
294
270
|
},
|
|
295
271
|
{
|
|
296
|
-
'
|
|
297
|
-
|
|
272
|
+
'function_name': "iat_send_email",
|
|
273
|
+
'description': "iatoolkit mail system. "
|
|
298
274
|
"envia mails cuando un usuario lo solicita."
|
|
299
275
|
"Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
|
|
300
|
-
'function_name': "iat_send_email",
|
|
301
276
|
'parameters': {
|
|
302
277
|
"type": "object",
|
|
303
278
|
"properties": {
|
|
@@ -11,10 +11,13 @@ import os
|
|
|
11
11
|
import pytesseract
|
|
12
12
|
from injector import inject
|
|
13
13
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
14
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
14
15
|
|
|
15
16
|
class DocumentService:
|
|
16
17
|
@inject
|
|
17
|
-
def __init__(self):
|
|
18
|
+
def __init__(self, i18n_service: I18nService):
|
|
19
|
+
self.i18n_service = i18n_service
|
|
20
|
+
|
|
18
21
|
# max number of pages to load
|
|
19
22
|
self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "200"))
|
|
20
23
|
|
|
@@ -29,7 +32,7 @@ class DocumentService:
|
|
|
29
32
|
file_content = file_content.decode('utf-8')
|
|
30
33
|
except UnicodeDecodeError:
|
|
31
34
|
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
32
|
-
|
|
35
|
+
self.i18n_service.t('errors.services.no_text_file'))
|
|
33
36
|
|
|
34
37
|
return file_content
|
|
35
38
|
elif filename.lower().endswith('.pdf'):
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# iatoolkit/services/embedding_service.py
|
|
2
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
3
|
+
# Product: IAToolkit
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import base64
|
|
7
|
+
import numpy as np
|
|
8
|
+
from threading import Lock
|
|
9
|
+
from huggingface_hub import InferenceClient
|
|
10
|
+
from openai import OpenAI
|
|
11
|
+
from injector import inject
|
|
12
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
13
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
14
|
+
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
# Wrapper classes to create a common interface for embedding clients
|
|
18
|
+
class EmbeddingClientWrapper:
|
|
19
|
+
"""Abstract base class for embedding client wrappers."""
|
|
20
|
+
def __init__(self, client, model: str):
|
|
21
|
+
self.client = client
|
|
22
|
+
self.model = model
|
|
23
|
+
|
|
24
|
+
def get_embedding(self, text: str) -> list[float]:
|
|
25
|
+
"""Generates and returns an embedding for the given text."""
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
|
|
28
|
+
class HuggingFaceClientWrapper(EmbeddingClientWrapper):
|
|
29
|
+
def get_embedding(self, text: str) -> list[float]:
|
|
30
|
+
embedding = self.client.feature_extraction(text)
|
|
31
|
+
# Ensure the output is a flat list of floats
|
|
32
|
+
if isinstance(embedding, list) and len(embedding) > 0 and isinstance(embedding[0], list):
|
|
33
|
+
return embedding[0]
|
|
34
|
+
return embedding
|
|
35
|
+
|
|
36
|
+
class OpenAIClientWrapper(EmbeddingClientWrapper):
|
|
37
|
+
def get_embedding(self, text: str) -> list[float]:
|
|
38
|
+
# The OpenAI API expects the input text to be clean
|
|
39
|
+
text = text.replace("\n", " ")
|
|
40
|
+
response = self.client.embeddings.create(input=[text], model=self.model)
|
|
41
|
+
return response.data[0].embedding
|
|
42
|
+
|
|
43
|
+
# Factory and Service classes
|
|
44
|
+
class EmbeddingClientFactory:
|
|
45
|
+
"""
|
|
46
|
+
Manages the lifecycle of embedding client wrappers for different companies.
|
|
47
|
+
It ensures that only one client wrapper is created per company, and it is thread-safe.
|
|
48
|
+
"""
|
|
49
|
+
@inject
|
|
50
|
+
def __init__(self, config_service: ConfigurationService):
|
|
51
|
+
self.config_service = config_service
|
|
52
|
+
self._clients = {} # Cache for storing initialized client wrappers
|
|
53
|
+
self._lock = Lock()
|
|
54
|
+
|
|
55
|
+
def get_client(self, company_short_name: str) -> EmbeddingClientWrapper:
|
|
56
|
+
"""
|
|
57
|
+
Retrieves a configured embedding client wrapper for a specific company.
|
|
58
|
+
If the client is not in the cache, it creates and stores it.
|
|
59
|
+
"""
|
|
60
|
+
if company_short_name in self._clients:
|
|
61
|
+
return self._clients[company_short_name]
|
|
62
|
+
|
|
63
|
+
with self._lock:
|
|
64
|
+
if company_short_name in self._clients:
|
|
65
|
+
return self._clients[company_short_name]
|
|
66
|
+
|
|
67
|
+
embedding_config = self.config_service.get_configuration(company_short_name, 'embedding_provider')
|
|
68
|
+
if not embedding_config:
|
|
69
|
+
raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
|
|
70
|
+
|
|
71
|
+
provider = embedding_config.get('provider')
|
|
72
|
+
if not provider:
|
|
73
|
+
raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
|
|
74
|
+
model = embedding_config.get('model')
|
|
75
|
+
|
|
76
|
+
api_key_name = embedding_config.get('api_key_name')
|
|
77
|
+
api_key = os.getenv(api_key_name)
|
|
78
|
+
if not api_key:
|
|
79
|
+
raise ValueError(f"Environment variable '{api_key_name}' is not set.")
|
|
80
|
+
|
|
81
|
+
# Logic to handle multiple providers
|
|
82
|
+
wrapper = None
|
|
83
|
+
if provider == 'huggingface':
|
|
84
|
+
if not model:
|
|
85
|
+
model='sentence-transformers/all-MiniLM-L6-v2'
|
|
86
|
+
client = InferenceClient(model=model, token=api_key)
|
|
87
|
+
wrapper = HuggingFaceClientWrapper(client, model)
|
|
88
|
+
elif provider == 'openai':
|
|
89
|
+
client = OpenAI(api_key=api_key)
|
|
90
|
+
if not model:
|
|
91
|
+
model='text-embedding-ada-002'
|
|
92
|
+
wrapper = OpenAIClientWrapper(client, model)
|
|
93
|
+
else:
|
|
94
|
+
raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
|
|
95
|
+
|
|
96
|
+
logging.info(f"Embedding client for '{company_short_name}' created with model: {model} via {provider}")
|
|
97
|
+
self._clients[company_short_name] = wrapper
|
|
98
|
+
return wrapper
|
|
99
|
+
|
|
100
|
+
class EmbeddingService:
|
|
101
|
+
"""
|
|
102
|
+
A stateless service for generating text embeddings.
|
|
103
|
+
It relies on the EmbeddingClientFactory to get the correct,
|
|
104
|
+
company-specific embedding client on demand.
|
|
105
|
+
"""
|
|
106
|
+
@inject
|
|
107
|
+
def __init__(self,
|
|
108
|
+
client_factory: EmbeddingClientFactory,
|
|
109
|
+
profile_repo: ProfileRepo,
|
|
110
|
+
i18n_service: I18nService):
|
|
111
|
+
self.client_factory = client_factory
|
|
112
|
+
self.i18n_service = i18n_service
|
|
113
|
+
self.profile_repo = profile_repo
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def embed_text(self, company_short_name: str, text: str, to_base64: bool = False) -> list[float] | str:
|
|
117
|
+
"""
|
|
118
|
+
Generates the embedding for a given text using the appropriate company model.
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
122
|
+
if not company:
|
|
123
|
+
raise ValueError(self.i18n_service.t('errors.company_not_found', company_short_name=company_short_name))
|
|
124
|
+
|
|
125
|
+
# 1. Get the correct client wrapper from the factory
|
|
126
|
+
client_wrapper = self.client_factory.get_client(company_short_name)
|
|
127
|
+
|
|
128
|
+
# 2. Use the wrapper's common interface to get the embedding
|
|
129
|
+
embedding = client_wrapper.get_embedding(text)
|
|
130
|
+
|
|
131
|
+
# 3. Process the result
|
|
132
|
+
if to_base64:
|
|
133
|
+
return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
|
|
134
|
+
|
|
135
|
+
return embedding
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logging.error(f"Error generating embedding for text: {text[:80]}... - {e}")
|
|
138
|
+
raise
|
|
139
|
+
|
|
140
|
+
def get_model_name(self, company_short_name: str) -> str:
|
|
141
|
+
"""
|
|
142
|
+
Helper method to get the model name for a specific company.
|
|
143
|
+
"""
|
|
144
|
+
# Get the wrapper and return the model name from it
|
|
145
|
+
client_wrapper = self.client_factory.get_client(company_short_name)
|
|
146
|
+
return client_wrapper.model
|
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
|
8
8
|
from uuid import uuid4
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
11
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
11
12
|
from injector import inject
|
|
12
13
|
import os
|
|
13
14
|
import logging
|
|
@@ -18,35 +19,38 @@ EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
18
19
|
|
|
19
20
|
class ExcelService:
|
|
20
21
|
@inject
|
|
21
|
-
def __init__(self,
|
|
22
|
+
def __init__(self,
|
|
23
|
+
util: Utility,
|
|
24
|
+
i18n_service: I18nService):
|
|
22
25
|
self.util = util
|
|
26
|
+
self.i18n_service = i18n_service
|
|
23
27
|
|
|
24
28
|
def excel_generator(self, **kwargs) -> str:
|
|
25
29
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
30
|
+
Genera un Excel a partir de una lista de diccionarios.
|
|
31
|
+
|
|
32
|
+
Parámetros esperados en kwargs:
|
|
33
|
+
- filename: str (nombre lógico a mostrar, ej. "reporte_clientes.xlsx") [obligatorio]
|
|
34
|
+
- data: list[dict] (filas del excel) [obligatorio]
|
|
35
|
+
- sheet_name: str = "hoja 1"
|
|
36
|
+
|
|
37
|
+
Retorna:
|
|
38
|
+
{
|
|
39
|
+
"filename": "reporte.xlsx",
|
|
40
|
+
"attachment_token": "8b7f8a66-...-c1c3.xlsx",
|
|
41
|
+
"content_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
42
|
+
"download_link": "/download/8b7f8a66-...-c1c3.xlsx"
|
|
43
|
+
}
|
|
44
|
+
"""
|
|
41
45
|
try:
|
|
42
46
|
# get the parameters
|
|
43
47
|
fname = kwargs.get('filename')
|
|
44
48
|
if not fname:
|
|
45
|
-
return '
|
|
49
|
+
return self.i18n_service.t('errors.services.no_output_file')
|
|
46
50
|
|
|
47
51
|
data = kwargs.get('data')
|
|
48
52
|
if not data or not isinstance(data, list):
|
|
49
|
-
return '
|
|
53
|
+
return self.i18n_service.t('errors.services.no_data_for_excel')
|
|
50
54
|
|
|
51
55
|
sheet_name = kwargs.get('sheet_name', 'hoja 1')
|
|
52
56
|
|
|
@@ -58,7 +62,7 @@ class ExcelService:
|
|
|
58
62
|
|
|
59
63
|
# 4. check that download directory is configured
|
|
60
64
|
if 'IATOOLKIT_DOWNLOAD_DIR' not in current_app.config:
|
|
61
|
-
return '
|
|
65
|
+
return self.i18n_service.t('errors.services.no_download_directory')
|
|
62
66
|
|
|
63
67
|
download_dir = current_app.config['IATOOLKIT_DOWNLOAD_DIR']
|
|
64
68
|
filepath = Path(download_dir) / token
|
|
@@ -77,28 +81,28 @@ class ExcelService:
|
|
|
77
81
|
|
|
78
82
|
except Exception as e:
|
|
79
83
|
raise IAToolkitException(IAToolkitException.ErrorType.CALL_ERROR,
|
|
80
|
-
'
|
|
84
|
+
self.i18n_service.t('errors.services.cannot_create_excel')) from e
|
|
81
85
|
|
|
82
86
|
def validate_file_access(self, filename):
|
|
83
87
|
try:
|
|
84
88
|
if not filename:
|
|
85
|
-
return jsonify({"error":
|
|
89
|
+
return jsonify({"error": self.i18n_service.t('errors.services.invalid_filename')})
|
|
86
90
|
# Prevent path traversal attacks
|
|
87
91
|
if '..' in filename or filename.startswith('/') or '\\' in filename:
|
|
88
|
-
return jsonify({"error":
|
|
92
|
+
return jsonify({"error": self.i18n_service.t('errors.services.invalid_filename')})
|
|
89
93
|
|
|
90
94
|
temp_dir = os.path.join(current_app.root_path, 'static', 'temp')
|
|
91
95
|
file_path = os.path.join(temp_dir, filename)
|
|
92
96
|
|
|
93
97
|
if not os.path.exists(file_path):
|
|
94
|
-
return jsonify({"error":
|
|
98
|
+
return jsonify({"error": self.i18n_service.t('errors.services.file_not_exist')})
|
|
95
99
|
|
|
96
100
|
if not os.path.isfile(file_path):
|
|
97
|
-
return jsonify({"error":
|
|
101
|
+
return jsonify({"error": self.i18n_service.t('errors.services.path_is_not_a_file')})
|
|
98
102
|
|
|
99
103
|
return None
|
|
100
104
|
|
|
101
105
|
except Exception as e:
|
|
102
|
-
error_msg = f"
|
|
106
|
+
error_msg = f"File validation error {filename}: {str(e)}"
|
|
103
107
|
logging.error(error_msg)
|
|
104
|
-
return jsonify({"error":
|
|
108
|
+
return jsonify({"error": self.i18n_service.t('errors.services.file_validation_error')})
|
|
@@ -52,27 +52,19 @@ class FileProcessor:
|
|
|
52
52
|
logger: Optional[logging.Logger] = None):
|
|
53
53
|
self.connector = connector
|
|
54
54
|
self.config = config
|
|
55
|
-
self.logger = logger or self._setup_logger()
|
|
56
55
|
self.processed_files = 0
|
|
57
56
|
|
|
58
|
-
def _setup_logger(self):
|
|
59
|
-
logging.basicConfig(
|
|
60
|
-
filename=self.config.log_file,
|
|
61
|
-
level=logging.INFO,
|
|
62
|
-
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
63
|
-
)
|
|
64
|
-
return logging.getLogger(__name__)
|
|
65
57
|
|
|
66
58
|
def process_files(self):
|
|
67
59
|
# Fetches files from the connector, filters them, and processes them.
|
|
68
60
|
try:
|
|
69
61
|
files = self.connector.list_files()
|
|
70
62
|
except Exception as e:
|
|
71
|
-
|
|
63
|
+
logging.error(f"Error fetching files: {e}")
|
|
72
64
|
return False
|
|
73
65
|
|
|
74
66
|
if self.config.echo:
|
|
75
|
-
print(f'
|
|
67
|
+
print(f'loading {len(files)} files')
|
|
76
68
|
|
|
77
69
|
for file_info in files:
|
|
78
70
|
file_path = file_info['path']
|
|
@@ -95,10 +87,10 @@ class FileProcessor:
|
|
|
95
87
|
context=self.config.context)
|
|
96
88
|
self.processed_files += 1
|
|
97
89
|
|
|
98
|
-
|
|
90
|
+
logging.info(f"Successfully processed file: {file_path}")
|
|
99
91
|
|
|
100
92
|
except Exception as e:
|
|
101
|
-
|
|
93
|
+
logging.error(f"Error processing {file_path}: {e}")
|
|
102
94
|
if not self.config.continue_on_error:
|
|
103
95
|
raise e
|
|
104
96
|
|
|
@@ -5,42 +5,33 @@
|
|
|
5
5
|
|
|
6
6
|
from injector import inject
|
|
7
7
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
8
|
-
|
|
9
8
|
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
10
|
-
from iatoolkit.
|
|
9
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
class HistoryService:
|
|
14
13
|
@inject
|
|
15
14
|
def __init__(self, llm_query_repo: LLMQueryRepo,
|
|
16
15
|
profile_repo: ProfileRepo,
|
|
17
|
-
|
|
16
|
+
i18n_service: I18nService):
|
|
18
17
|
self.llm_query_repo = llm_query_repo
|
|
19
18
|
self.profile_repo = profile_repo
|
|
20
|
-
self.
|
|
19
|
+
self.i18n_service = i18n_service
|
|
21
20
|
|
|
22
21
|
def get_history(self,
|
|
23
22
|
company_short_name: str,
|
|
24
|
-
|
|
25
|
-
local_user_id: int = 0) -> dict:
|
|
23
|
+
user_identifier: str) -> dict:
|
|
26
24
|
try:
|
|
27
|
-
user_identifier, _ = self.util.resolve_user_identifier(external_user_id, local_user_id)
|
|
28
|
-
if not user_identifier:
|
|
29
|
-
return {'error': "No se pudo resolver el identificador del usuario"}
|
|
30
|
-
|
|
31
|
-
# validate company
|
|
32
25
|
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
33
26
|
if not company:
|
|
34
|
-
return {
|
|
27
|
+
return {"error": self.i18n_service.t('errors.company_not_found', company_short_name=company_short_name)}
|
|
35
28
|
|
|
36
29
|
history = self.llm_query_repo.get_history(company, user_identifier)
|
|
37
|
-
|
|
38
30
|
if not history:
|
|
39
|
-
return {'message': '
|
|
31
|
+
return {'message': 'empty history', 'history': []}
|
|
40
32
|
|
|
41
33
|
history_list = [query.to_dict() for query in history]
|
|
42
|
-
|
|
43
|
-
return {'message': 'Historial obtenido correctamente', 'history': history_list}
|
|
34
|
+
return {'message': 'history loaded ok', 'history': history_list}
|
|
44
35
|
|
|
45
36
|
except Exception as e:
|
|
46
37
|
return {'error': str(e)}
|