iatoolkit 0.71.4__py3-none-any.whl → 0.91.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +15 -5
- iatoolkit/base_company.py +4 -58
- iatoolkit/cli_commands.py +6 -7
- iatoolkit/common/exceptions.py +1 -0
- iatoolkit/common/routes.py +12 -28
- iatoolkit/common/util.py +7 -1
- iatoolkit/company_registry.py +50 -14
- iatoolkit/{iatoolkit.py → core.py} +54 -55
- iatoolkit/infra/{mail_app.py → brevo_mail_app.py} +15 -37
- iatoolkit/infra/llm_client.py +9 -5
- iatoolkit/locales/en.yaml +10 -2
- iatoolkit/locales/es.yaml +171 -162
- iatoolkit/repositories/database_manager.py +59 -14
- iatoolkit/repositories/llm_query_repo.py +34 -22
- iatoolkit/repositories/models.py +16 -18
- iatoolkit/repositories/profile_repo.py +5 -10
- iatoolkit/repositories/vs_repo.py +9 -4
- iatoolkit/services/auth_service.py +1 -1
- iatoolkit/services/branding_service.py +1 -1
- iatoolkit/services/company_context_service.py +19 -11
- iatoolkit/services/configuration_service.py +219 -46
- iatoolkit/services/dispatcher_service.py +31 -225
- iatoolkit/services/document_service.py +10 -1
- iatoolkit/services/embedding_service.py +9 -6
- iatoolkit/services/excel_service.py +50 -2
- iatoolkit/services/history_manager_service.py +189 -0
- iatoolkit/services/jwt_service.py +1 -1
- iatoolkit/services/language_service.py +8 -2
- iatoolkit/services/license_service.py +82 -0
- iatoolkit/services/mail_service.py +171 -25
- iatoolkit/services/profile_service.py +37 -32
- iatoolkit/services/{prompt_manager_service.py → prompt_service.py} +110 -1
- iatoolkit/services/query_service.py +192 -191
- iatoolkit/services/sql_service.py +63 -12
- iatoolkit/services/tool_service.py +231 -0
- iatoolkit/services/user_feedback_service.py +18 -6
- iatoolkit/services/user_session_context_service.py +18 -0
- iatoolkit/static/images/iatoolkit_core.png +0 -0
- iatoolkit/static/images/iatoolkit_logo.png +0 -0
- iatoolkit/static/js/chat_feedback_button.js +1 -1
- iatoolkit/static/js/chat_help_content.js +4 -4
- iatoolkit/static/js/chat_main.js +17 -5
- iatoolkit/static/js/chat_onboarding_button.js +1 -1
- iatoolkit/static/styles/chat_iatoolkit.css +1 -1
- iatoolkit/static/styles/chat_public.css +28 -0
- iatoolkit/static/styles/documents.css +598 -0
- iatoolkit/static/styles/landing_page.css +223 -7
- iatoolkit/system_prompts/__init__.py +0 -0
- iatoolkit/system_prompts/query_main.prompt +2 -1
- iatoolkit/system_prompts/sql_rules.prompt +47 -12
- iatoolkit/templates/_company_header.html +30 -5
- iatoolkit/templates/_login_widget.html +3 -3
- iatoolkit/templates/chat.html +1 -1
- iatoolkit/templates/forgot_password.html +3 -2
- iatoolkit/templates/onboarding_shell.html +1 -1
- iatoolkit/templates/signup.html +3 -0
- iatoolkit/views/base_login_view.py +1 -1
- iatoolkit/views/change_password_view.py +1 -1
- iatoolkit/views/forgot_password_view.py +9 -4
- iatoolkit/views/history_api_view.py +3 -3
- iatoolkit/views/home_view.py +4 -2
- iatoolkit/views/init_context_api_view.py +1 -1
- iatoolkit/views/llmquery_api_view.py +4 -3
- iatoolkit/views/{file_store_api_view.py → load_document_api_view.py} +1 -1
- iatoolkit/views/login_view.py +17 -5
- iatoolkit/views/logout_api_view.py +10 -2
- iatoolkit/views/prompt_api_view.py +1 -1
- iatoolkit/views/root_redirect_view.py +22 -0
- iatoolkit/views/signup_view.py +12 -4
- iatoolkit/views/static_page_view.py +27 -0
- iatoolkit/views/verify_user_view.py +1 -1
- iatoolkit-0.91.1.dist-info/METADATA +268 -0
- iatoolkit-0.91.1.dist-info/RECORD +125 -0
- iatoolkit-0.91.1.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
- iatoolkit/services/history_service.py +0 -37
- iatoolkit/templates/about.html +0 -13
- iatoolkit/templates/index.html +0 -145
- iatoolkit/templates/login_simulation.html +0 -45
- iatoolkit/views/external_login_view.py +0 -73
- iatoolkit/views/index_view.py +0 -14
- iatoolkit/views/login_simulation_view.py +0 -93
- iatoolkit-0.71.4.dist-info/METADATA +0 -276
- iatoolkit-0.71.4.dist-info/RECORD +0 -122
- {iatoolkit-0.71.4.dist-info → iatoolkit-0.91.1.dist-info}/WHEEL +0 -0
- {iatoolkit-0.71.4.dist-info → iatoolkit-0.91.1.dist-info}/licenses/LICENSE +0 -0
- {iatoolkit-0.71.4.dist-info → iatoolkit-0.91.1.dist-info}/top_level.txt +0 -0
|
@@ -4,13 +4,9 @@
|
|
|
4
4
|
# IAToolkit is open source software.
|
|
5
5
|
|
|
6
6
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
7
|
-
from iatoolkit.services.
|
|
8
|
-
from iatoolkit.services.sql_service import SqlService
|
|
7
|
+
from iatoolkit.services.prompt_service import PromptService
|
|
9
8
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
10
9
|
from iatoolkit.services.configuration_service import ConfigurationService
|
|
11
|
-
from iatoolkit.repositories.models import Company, Function
|
|
12
|
-
from iatoolkit.services.excel_service import ExcelService
|
|
13
|
-
from iatoolkit.services.mail_service import MailService
|
|
14
10
|
from iatoolkit.common.util import Utility
|
|
15
11
|
from injector import inject
|
|
16
12
|
import logging
|
|
@@ -23,28 +19,25 @@ class Dispatcher:
|
|
|
23
19
|
config_service: ConfigurationService,
|
|
24
20
|
prompt_service: PromptService,
|
|
25
21
|
llmquery_repo: LLMQueryRepo,
|
|
26
|
-
util: Utility,
|
|
27
|
-
sql_service: SqlService,
|
|
28
|
-
excel_service: ExcelService,
|
|
29
|
-
mail_service: MailService):
|
|
22
|
+
util: Utility,):
|
|
30
23
|
self.config_service = config_service
|
|
31
24
|
self.prompt_service = prompt_service
|
|
32
25
|
self.llmquery_repo = llmquery_repo
|
|
33
26
|
self.util = util
|
|
34
|
-
self.sql_service = sql_service
|
|
35
|
-
self.excel_service = excel_service
|
|
36
|
-
self.mail_service = mail_service
|
|
37
|
-
self.system_functions = _FUNCTION_LIST
|
|
38
|
-
self.system_prompts = _SYSTEM_PROMPT
|
|
39
27
|
|
|
28
|
+
self._tool_service = None
|
|
40
29
|
self._company_registry = None
|
|
41
30
|
self._company_instances = None
|
|
42
31
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def tool_service(self):
|
|
35
|
+
"""Lazy-loads and returns the ToolService instance to avoid circular imports."""
|
|
36
|
+
if self._tool_service is None:
|
|
37
|
+
from iatoolkit import current_iatoolkit
|
|
38
|
+
from iatoolkit.services.tool_service import ToolService
|
|
39
|
+
self._tool_service = current_iatoolkit().get_injector().get(ToolService)
|
|
40
|
+
return self._tool_service
|
|
48
41
|
|
|
49
42
|
@property
|
|
50
43
|
def company_registry(self):
|
|
@@ -65,87 +58,49 @@ class Dispatcher:
|
|
|
65
58
|
# initialize the system functions and prompts
|
|
66
59
|
self.setup_iatoolkit_system()
|
|
67
60
|
|
|
68
|
-
|
|
61
|
+
# Loads the configuration of every company: company.yaml file
|
|
69
62
|
for company_name, company_instance in self.company_instances.items():
|
|
70
63
|
try:
|
|
71
64
|
# read company configuration from company.yaml
|
|
72
65
|
self.config_service.load_configuration(company_name, company_instance)
|
|
73
66
|
|
|
74
|
-
# register the company databases
|
|
75
|
-
self._register_company_databases(company_name)
|
|
76
|
-
|
|
77
67
|
except Exception as e:
|
|
78
68
|
logging.error(f"❌ Failed to register configuration for '{company_name}': {e}")
|
|
79
|
-
|
|
69
|
+
raise e
|
|
80
70
|
|
|
81
71
|
return True
|
|
82
72
|
|
|
83
|
-
def _register_company_databases(self, company_name: str):
|
|
84
|
-
"""
|
|
85
|
-
Reads the data_sources config for a company and registers each
|
|
86
|
-
database with the central SqlService.
|
|
87
|
-
"""
|
|
88
|
-
logging.info(f" -> Registering databases for '{company_name}'...")
|
|
89
|
-
data_sources_config = self.config_service.get_configuration(company_name, 'data_sources')
|
|
90
|
-
|
|
91
|
-
if not data_sources_config or not data_sources_config.get('sql'):
|
|
92
|
-
return
|
|
93
|
-
|
|
94
|
-
for db_config in data_sources_config['sql']:
|
|
95
|
-
db_name = db_config.get('database')
|
|
96
|
-
db_env_var = db_config.get('connection_string_env')
|
|
97
|
-
|
|
98
|
-
# resolve the URI connection string from the environment variable
|
|
99
|
-
db_uri = os.getenv(db_env_var) if db_env_var else None
|
|
100
|
-
if not db_uri:
|
|
101
|
-
logging.error(
|
|
102
|
-
f"-> Skipping database registration for '{company_name}' due to missing 'database' name or invalid connection URI.")
|
|
103
|
-
return
|
|
104
|
-
|
|
105
|
-
self.sql_service.register_database(db_name, db_uri)
|
|
106
|
-
|
|
107
73
|
def setup_iatoolkit_system(self):
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
self.
|
|
111
|
-
Function(
|
|
112
|
-
company_id=None,
|
|
113
|
-
system_function=True,
|
|
114
|
-
name=function['function_name'],
|
|
115
|
-
description= function['description'],
|
|
116
|
-
parameters=function['parameters']
|
|
117
|
-
)
|
|
118
|
-
)
|
|
74
|
+
try:
|
|
75
|
+
# system tools registration
|
|
76
|
+
self.tool_service.register_system_tools()
|
|
119
77
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
order=1,
|
|
127
|
-
is_system_prompt=True,
|
|
128
|
-
)
|
|
129
|
-
i += 1
|
|
78
|
+
# system prompts registration
|
|
79
|
+
self.prompt_service.register_system_prompts()
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
self.llmquery_repo.rollback()
|
|
83
|
+
raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))
|
|
130
84
|
|
|
131
85
|
|
|
132
|
-
def dispatch(self, company_short_name: str,
|
|
86
|
+
def dispatch(self, company_short_name: str, function_name: str, **kwargs) -> dict:
|
|
133
87
|
company_key = company_short_name.lower()
|
|
134
88
|
|
|
135
89
|
if company_key not in self.company_instances:
|
|
136
90
|
available_companies = list(self.company_instances.keys())
|
|
137
91
|
raise IAToolkitException(
|
|
138
92
|
IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
139
|
-
f"
|
|
93
|
+
f"Company '{company_short_name}' not configured. available companies: {available_companies}"
|
|
140
94
|
)
|
|
141
95
|
|
|
142
|
-
# check if action is a system function
|
|
143
|
-
if
|
|
144
|
-
|
|
96
|
+
# check if action is a system function using ToolService
|
|
97
|
+
if self.tool_service.is_system_tool(function_name):
|
|
98
|
+
handler = self.tool_service.get_system_handler(function_name)
|
|
99
|
+
return handler(company_short_name, **kwargs)
|
|
145
100
|
|
|
146
101
|
company_instance = self.company_instances[company_short_name]
|
|
147
102
|
try:
|
|
148
|
-
return company_instance.handle_request(
|
|
103
|
+
return company_instance.handle_request(function_name, **kwargs)
|
|
149
104
|
except IAToolkitException as e:
|
|
150
105
|
# Si ya es una IAToolkitException, la relanzamos para preservar el tipo de error original.
|
|
151
106
|
raise e
|
|
@@ -153,158 +108,9 @@ class Dispatcher:
|
|
|
153
108
|
except Exception as e:
|
|
154
109
|
logging.exception(e)
|
|
155
110
|
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
156
|
-
f"Error
|
|
111
|
+
f"Error in function call '{function_name}': {str(e)}") from e
|
|
157
112
|
|
|
158
|
-
def get_company_services(self, company: Company) -> list[dict]:
|
|
159
|
-
# create the syntax with openai response syntax, for the company function list
|
|
160
|
-
tools = []
|
|
161
|
-
functions = self.llmquery_repo.get_company_functions(company)
|
|
162
|
-
|
|
163
|
-
for function in functions:
|
|
164
|
-
# make sure is always on
|
|
165
|
-
function.parameters["additionalProperties"] = False
|
|
166
|
-
|
|
167
|
-
ai_tool = {
|
|
168
|
-
"type": "function",
|
|
169
|
-
"name": function.name,
|
|
170
|
-
"description": function.description,
|
|
171
|
-
"parameters": function.parameters,
|
|
172
|
-
"strict": True
|
|
173
|
-
}
|
|
174
|
-
tools.append(ai_tool)
|
|
175
|
-
return tools
|
|
176
|
-
|
|
177
|
-
def get_user_info(self, company_name: str, user_identifier: str) -> dict:
|
|
178
|
-
if company_name not in self.company_instances:
|
|
179
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
180
|
-
f"company not configured: {company_name}")
|
|
181
|
-
|
|
182
|
-
# source 2: external company user
|
|
183
|
-
company_instance = self.company_instances[company_name]
|
|
184
|
-
try:
|
|
185
|
-
external_user_profile = company_instance.get_user_info(user_identifier)
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logging.exception(e)
|
|
188
|
-
raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
|
|
189
|
-
f"Error in get_user_info: {company_name}: {str(e)}") from e
|
|
190
|
-
|
|
191
|
-
return external_user_profile
|
|
192
113
|
|
|
193
114
|
def get_company_instance(self, company_name: str):
|
|
194
115
|
"""Returns the instance for a given company name."""
|
|
195
116
|
return self.company_instances.get(company_name)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# iatoolkit system prompts
|
|
199
|
-
_SYSTEM_PROMPT = [
|
|
200
|
-
{'name': 'query_main', 'description':'iatoolkit main prompt'},
|
|
201
|
-
{'name': 'format_styles', 'description':'output format styles'},
|
|
202
|
-
{'name': 'sql_rules', 'description':'instructions for SQL queries'}
|
|
203
|
-
]
|
|
204
|
-
|
|
205
|
-
# iatoolkit built-in functions (Tools)
|
|
206
|
-
_FUNCTION_LIST = [
|
|
207
|
-
{
|
|
208
|
-
"function_name": "iat_sql_query",
|
|
209
|
-
"description": "Servicio SQL de IAToolkit: debes utilizar este servicio para todas las consultas a base de datos.",
|
|
210
|
-
"parameters": {
|
|
211
|
-
"type": "object",
|
|
212
|
-
"properties": {
|
|
213
|
-
"database": {
|
|
214
|
-
"type": "string",
|
|
215
|
-
"description": "nombre de la base de datos a consultar: `database_name`"
|
|
216
|
-
},
|
|
217
|
-
"query": {
|
|
218
|
-
"type": "string",
|
|
219
|
-
"description": "string con la consulta en sql"
|
|
220
|
-
},
|
|
221
|
-
},
|
|
222
|
-
"required": ["database", "query"]
|
|
223
|
-
}
|
|
224
|
-
},
|
|
225
|
-
{
|
|
226
|
-
"function_name": "iat_generate_excel",
|
|
227
|
-
"description": "Generador de Excel."
|
|
228
|
-
"Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
|
|
229
|
-
"Cada diccionario representa una fila del archivo. "
|
|
230
|
-
"el archivo se guarda en directorio de descargas."
|
|
231
|
-
"retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
|
|
232
|
-
"content_type y download_link",
|
|
233
|
-
"parameters": {
|
|
234
|
-
"type": "object",
|
|
235
|
-
"properties": {
|
|
236
|
-
"filename": {
|
|
237
|
-
"type": "string",
|
|
238
|
-
"description": "Nombre del archivo de salida (ejemplo: 'reporte.xlsx')",
|
|
239
|
-
"pattern": "^.+\\.xlsx?$"
|
|
240
|
-
},
|
|
241
|
-
"sheet_name": {
|
|
242
|
-
"type": "string",
|
|
243
|
-
"description": "Nombre de la hoja dentro del Excel",
|
|
244
|
-
"minLength": 1
|
|
245
|
-
},
|
|
246
|
-
"data": {
|
|
247
|
-
"type": "array",
|
|
248
|
-
"description": "Lista de diccionarios. Cada diccionario representa una fila.",
|
|
249
|
-
"minItems": 1,
|
|
250
|
-
"items": {
|
|
251
|
-
"type": "object",
|
|
252
|
-
"properties": {},
|
|
253
|
-
"additionalProperties": {
|
|
254
|
-
"anyOf": [
|
|
255
|
-
{"type": "string"},
|
|
256
|
-
{"type": "number"},
|
|
257
|
-
{"type": "boolean"},
|
|
258
|
-
{"type": "null"},
|
|
259
|
-
{
|
|
260
|
-
"type": "string",
|
|
261
|
-
"format": "date"
|
|
262
|
-
}
|
|
263
|
-
]
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
},
|
|
268
|
-
"required": ["filename", "sheet_name", "data"]
|
|
269
|
-
}
|
|
270
|
-
},
|
|
271
|
-
{
|
|
272
|
-
'function_name': "iat_send_email",
|
|
273
|
-
'description': "iatoolkit mail system. "
|
|
274
|
-
"envia mails cuando un usuario lo solicita."
|
|
275
|
-
"Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
|
|
276
|
-
'parameters': {
|
|
277
|
-
"type": "object",
|
|
278
|
-
"properties": {
|
|
279
|
-
"from_email": {"type": "string","description": "dirección de correo electrónico que esta enviando el email."},
|
|
280
|
-
"recipient": {"type": "string", "description": "email del destinatario"},
|
|
281
|
-
"subject": {"type": "string", "description": "asunto del email"},
|
|
282
|
-
"body": {"type": "string", "description": "HTML del email"},
|
|
283
|
-
"attachments": {
|
|
284
|
-
"type": "array",
|
|
285
|
-
"description": "Lista de archivos adjuntos codificados en base64",
|
|
286
|
-
"items": {
|
|
287
|
-
"type": "object",
|
|
288
|
-
"properties": {
|
|
289
|
-
"filename": {
|
|
290
|
-
"type": "string",
|
|
291
|
-
"description": "Nombre del archivo con su extensión (ej. informe.pdf)"
|
|
292
|
-
},
|
|
293
|
-
"content": {
|
|
294
|
-
"type": "string",
|
|
295
|
-
"description": "Contenido del archivo en b64."
|
|
296
|
-
},
|
|
297
|
-
"attachment_token": {
|
|
298
|
-
"type": "string",
|
|
299
|
-
"description": "token para descargar el archivo."
|
|
300
|
-
}
|
|
301
|
-
},
|
|
302
|
-
"required": ["filename", "content", "attachment_token"],
|
|
303
|
-
"additionalProperties": False
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
},
|
|
307
|
-
"required": ["from_email","recipient", "subject", "body", "attachments"]
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
]
|
|
@@ -12,10 +12,15 @@ import pytesseract
|
|
|
12
12
|
from injector import inject
|
|
13
13
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
14
14
|
from iatoolkit.services.i18n_service import I18nService
|
|
15
|
+
from iatoolkit.services.excel_service import ExcelService
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class DocumentService:
|
|
17
19
|
@inject
|
|
18
|
-
def __init__(self,
|
|
20
|
+
def __init__(self,
|
|
21
|
+
excel_service: ExcelService,
|
|
22
|
+
i18n_service: I18nService):
|
|
23
|
+
self.excel_service = excel_service
|
|
19
24
|
self.i18n_service = i18n_service
|
|
20
25
|
|
|
21
26
|
# max number of pages to load
|
|
@@ -40,6 +45,10 @@ class DocumentService:
|
|
|
40
45
|
return self.read_scanned_pdf(file_content)
|
|
41
46
|
else:
|
|
42
47
|
return self.read_pdf(file_content)
|
|
48
|
+
elif filename.lower().endswith(('.xlsx', '.xls')):
|
|
49
|
+
return self.excel_service.read_excel(file_content)
|
|
50
|
+
elif filename.lower().endswith('.csv'):
|
|
51
|
+
return self.excel_service.read_csv(file_content)
|
|
43
52
|
else:
|
|
44
53
|
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
45
54
|
"Formato de archivo desconocido")
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
import os
|
|
6
6
|
import base64
|
|
7
7
|
import numpy as np
|
|
8
|
-
from threading import Lock
|
|
9
8
|
from huggingface_hub import InferenceClient
|
|
10
9
|
from openai import OpenAI
|
|
11
10
|
from injector import inject
|
|
@@ -14,12 +13,14 @@ from iatoolkit.services.i18n_service import I18nService
|
|
|
14
13
|
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
15
14
|
import logging
|
|
16
15
|
|
|
16
|
+
|
|
17
17
|
# Wrapper classes to create a common interface for embedding clients
|
|
18
18
|
class EmbeddingClientWrapper:
|
|
19
19
|
"""Abstract base class for embedding client wrappers."""
|
|
20
|
-
def __init__(self, client, model: str):
|
|
20
|
+
def __init__(self, client, model: str, dimensions: int = 1536):
|
|
21
21
|
self.client = client
|
|
22
22
|
self.model = model
|
|
23
|
+
self.dimensions = dimensions
|
|
23
24
|
|
|
24
25
|
def get_embedding(self, text: str) -> list[float]:
|
|
25
26
|
"""Generates and returns an embedding for the given text."""
|
|
@@ -37,7 +38,9 @@ class OpenAIClientWrapper(EmbeddingClientWrapper):
|
|
|
37
38
|
def get_embedding(self, text: str) -> list[float]:
|
|
38
39
|
# The OpenAI API expects the input text to be clean
|
|
39
40
|
text = text.replace("\n", " ")
|
|
40
|
-
response = self.client.embeddings.create(input=[text],
|
|
41
|
+
response = self.client.embeddings.create(input=[text],
|
|
42
|
+
model=self.model,
|
|
43
|
+
dimensions=self.dimensions)
|
|
41
44
|
return response.data[0].embedding
|
|
42
45
|
|
|
43
46
|
# Factory and Service classes
|
|
@@ -68,6 +71,7 @@ class EmbeddingClientFactory:
|
|
|
68
71
|
if not provider:
|
|
69
72
|
raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
|
|
70
73
|
model = embedding_config.get('model')
|
|
74
|
+
dimensions = int(embedding_config.get('dimensions', "1536"))
|
|
71
75
|
|
|
72
76
|
api_key_name = embedding_config.get('api_key_name')
|
|
73
77
|
if not api_key_name:
|
|
@@ -83,12 +87,12 @@ class EmbeddingClientFactory:
|
|
|
83
87
|
if not model:
|
|
84
88
|
model='sentence-transformers/all-MiniLM-L6-v2'
|
|
85
89
|
client = InferenceClient(model=model, token=api_key)
|
|
86
|
-
wrapper = HuggingFaceClientWrapper(client, model)
|
|
90
|
+
wrapper = HuggingFaceClientWrapper(client, model, dimensions)
|
|
87
91
|
elif provider == 'openai':
|
|
88
92
|
client = OpenAI(api_key=api_key)
|
|
89
93
|
if not model:
|
|
90
94
|
model='text-embedding-ada-002'
|
|
91
|
-
wrapper = OpenAIClientWrapper(client, model)
|
|
95
|
+
wrapper = OpenAIClientWrapper(client, model, dimensions)
|
|
92
96
|
else:
|
|
93
97
|
raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
|
|
94
98
|
|
|
@@ -126,7 +130,6 @@ class EmbeddingService:
|
|
|
126
130
|
|
|
127
131
|
# 2. Use the wrapper's common interface to get the embedding
|
|
128
132
|
embedding = client_wrapper.get_embedding(text)
|
|
129
|
-
|
|
130
133
|
# 3. Process the result
|
|
131
134
|
if to_base64:
|
|
132
135
|
return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#
|
|
4
4
|
# IAToolkit is open source software.
|
|
5
5
|
|
|
6
|
+
from flask import current_app, jsonify
|
|
6
7
|
from iatoolkit.common.util import Utility
|
|
7
8
|
import pandas as pd
|
|
8
9
|
from uuid import uuid4
|
|
@@ -11,8 +12,9 @@ from iatoolkit.common.exceptions import IAToolkitException
|
|
|
11
12
|
from iatoolkit.services.i18n_service import I18nService
|
|
12
13
|
from injector import inject
|
|
13
14
|
import os
|
|
15
|
+
import io
|
|
14
16
|
import logging
|
|
15
|
-
|
|
17
|
+
import json
|
|
16
18
|
|
|
17
19
|
EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
18
20
|
|
|
@@ -25,7 +27,53 @@ class ExcelService:
|
|
|
25
27
|
self.util = util
|
|
26
28
|
self.i18n_service = i18n_service
|
|
27
29
|
|
|
28
|
-
def
|
|
30
|
+
def read_excel(self, file_content: bytes) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Reads an Excel file and converts its content to a JSON string.
|
|
33
|
+
- If the Excel file has a single sheet, it returns the JSON of that sheet.
|
|
34
|
+
- If it has multiple sheets, it returns a JSON object with sheet names as keys.
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
# Use a BytesIO object to allow pandas to read the in-memory byte content
|
|
38
|
+
file_like_object = io.BytesIO(file_content)
|
|
39
|
+
|
|
40
|
+
# Read all sheets into a dictionary of DataFrames
|
|
41
|
+
xls = pd.read_excel(file_like_object, sheet_name=None)
|
|
42
|
+
|
|
43
|
+
if len(xls) == 1:
|
|
44
|
+
# If only one sheet, return its JSON representation directly
|
|
45
|
+
sheet_name = list(xls.keys())[0]
|
|
46
|
+
return xls[sheet_name].to_json(orient='records', indent=4)
|
|
47
|
+
else:
|
|
48
|
+
# If multiple sheets, create a dictionary of JSON strings
|
|
49
|
+
sheets_json = {}
|
|
50
|
+
for sheet_name, df in xls.items():
|
|
51
|
+
sheets_json[sheet_name] = df.to_json(orient='records', indent=4)
|
|
52
|
+
return json.dumps(sheets_json, indent=4)
|
|
53
|
+
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
56
|
+
self.i18n_service.t('errors.services.cannot_read_excel')) from e
|
|
57
|
+
|
|
58
|
+
def read_csv(self, file_content: bytes) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Reads a CSV file and converts its content to a JSON string.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
# Use a BytesIO object to allow pandas to read the in-memory byte content
|
|
64
|
+
file_like_object = io.BytesIO(file_content)
|
|
65
|
+
|
|
66
|
+
# Read the CSV into a DataFrame
|
|
67
|
+
df = pd.read_csv(file_like_object)
|
|
68
|
+
|
|
69
|
+
# Return JSON representation
|
|
70
|
+
return df.to_json(orient='records', indent=4)
|
|
71
|
+
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
|
|
74
|
+
self.i18n_service.t('errors.services.cannot_read_csv')) from e
|
|
75
|
+
|
|
76
|
+
def excel_generator(self, company_short_name: str, **kwargs) -> str:
|
|
29
77
|
"""
|
|
30
78
|
Genera un Excel a partir de una lista de diccionarios.
|
|
31
79
|
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import json
|
|
3
|
+
from typing import Dict, Any, Tuple, Optional
|
|
4
|
+
from iatoolkit.services.user_session_context_service import UserSessionContextService
|
|
5
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
6
|
+
from iatoolkit.infra.llm_client import llmClient
|
|
7
|
+
from iatoolkit.repositories.models import Company
|
|
8
|
+
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
9
|
+
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
10
|
+
from injector import inject
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HistoryManagerService:
|
|
14
|
+
"""
|
|
15
|
+
Manages conversation history for LLMs in a unified way.
|
|
16
|
+
Handles:
|
|
17
|
+
1. Server-side history (e.g., OpenAI response_ids).
|
|
18
|
+
2. Client-side history (e.g., Gemini message lists).
|
|
19
|
+
3. Database persistence retrieval (full chat history).
|
|
20
|
+
"""
|
|
21
|
+
TYPE_SERVER_SIDE = 'server_side' # For models like OpenAI
|
|
22
|
+
TYPE_CLIENT_SIDE = 'client_side' # For models like Gemini
|
|
23
|
+
|
|
24
|
+
GEMINI_MAX_TOKENS_CONTEXT_HISTORY = 200000
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@inject
|
|
28
|
+
def __init__(self,
|
|
29
|
+
session_context: UserSessionContextService,
|
|
30
|
+
i18n: I18nService,
|
|
31
|
+
llm_query_repo: LLMQueryRepo,
|
|
32
|
+
profile_repo: ProfileRepo,
|
|
33
|
+
llm_client: Optional[llmClient] = None):
|
|
34
|
+
self.session_context = session_context
|
|
35
|
+
self.i18n = i18n
|
|
36
|
+
self.llm_query_repo = llm_query_repo
|
|
37
|
+
self.profile_repo = profile_repo
|
|
38
|
+
self.llm_client = llm_client
|
|
39
|
+
|
|
40
|
+
def initialize_context(self,
|
|
41
|
+
company_short_name: str,
|
|
42
|
+
user_identifier: str,
|
|
43
|
+
history_type: str,
|
|
44
|
+
prepared_context: str,
|
|
45
|
+
company: Company, model: str) -> Dict[str, Any]:
|
|
46
|
+
"""
|
|
47
|
+
Initializes a new conversation history.
|
|
48
|
+
"""
|
|
49
|
+
# 1. Clear existing history
|
|
50
|
+
self.session_context.clear_llm_history(company_short_name, user_identifier)
|
|
51
|
+
|
|
52
|
+
if history_type == self.TYPE_SERVER_SIDE:
|
|
53
|
+
# OpenAI: Send system prompt to API and store the resulting ID
|
|
54
|
+
response_id = self.llm_client.set_company_context(
|
|
55
|
+
company=company,
|
|
56
|
+
company_base_context=prepared_context,
|
|
57
|
+
model=model
|
|
58
|
+
)
|
|
59
|
+
self.session_context.save_last_response_id(company_short_name, user_identifier, response_id)
|
|
60
|
+
self.session_context.save_initial_response_id(company_short_name, user_identifier, response_id)
|
|
61
|
+
return {'response_id': response_id}
|
|
62
|
+
|
|
63
|
+
elif history_type == self.TYPE_CLIENT_SIDE:
|
|
64
|
+
# Gemini: Store system prompt as the first message in the list
|
|
65
|
+
context_history = [{"role": "user", "content": prepared_context}]
|
|
66
|
+
self.session_context.save_context_history(company_short_name, user_identifier, context_history)
|
|
67
|
+
return {}
|
|
68
|
+
|
|
69
|
+
return {}
|
|
70
|
+
|
|
71
|
+
def populate_request_params(self,
|
|
72
|
+
handle: Any,
|
|
73
|
+
user_turn_prompt: str,
|
|
74
|
+
ignore_history: bool = False) -> bool:
|
|
75
|
+
"""
|
|
76
|
+
Populates the request_params within the HistoryHandle.
|
|
77
|
+
Returns True if a rebuild is needed, False otherwise.
|
|
78
|
+
"""
|
|
79
|
+
if handle.type == self.TYPE_SERVER_SIDE:
|
|
80
|
+
previous_response_id = None
|
|
81
|
+
if ignore_history:
|
|
82
|
+
previous_response_id = self.session_context.get_initial_response_id(handle.company_short_name,
|
|
83
|
+
handle.user_identifier)
|
|
84
|
+
else:
|
|
85
|
+
previous_response_id = self.session_context.get_last_response_id(handle.company_short_name,
|
|
86
|
+
handle.user_identifier)
|
|
87
|
+
|
|
88
|
+
if not previous_response_id:
|
|
89
|
+
handle.request_params = {}
|
|
90
|
+
return True # Needs rebuild
|
|
91
|
+
|
|
92
|
+
handle.request_params = {'previous_response_id': previous_response_id}
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
elif handle.type == self.TYPE_CLIENT_SIDE:
|
|
96
|
+
context_history = self.session_context.get_context_history(handle.company_short_name,
|
|
97
|
+
handle.user_identifier) or []
|
|
98
|
+
|
|
99
|
+
if not context_history:
|
|
100
|
+
handle.request_params = {}
|
|
101
|
+
return True # Needs rebuild
|
|
102
|
+
|
|
103
|
+
if ignore_history and len(context_history) > 1:
|
|
104
|
+
# Keep only system prompt
|
|
105
|
+
context_history = [context_history[0]]
|
|
106
|
+
|
|
107
|
+
# For Gemini, we append the current user turn to the context sent to the API
|
|
108
|
+
context_history.append({"role": "user", "content": user_turn_prompt})
|
|
109
|
+
|
|
110
|
+
self._trim_context_history(context_history)
|
|
111
|
+
|
|
112
|
+
handle.request_params = {'context_history': context_history}
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
handle.request_params = {}
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def update_history(self,
|
|
119
|
+
history_handle: Any,
|
|
120
|
+
user_turn_prompt: str,
|
|
121
|
+
response: Dict[str, Any]):
|
|
122
|
+
"""Saves or updates the history after a successful LLM call."""
|
|
123
|
+
|
|
124
|
+
# We access the type from the handle
|
|
125
|
+
history_type = history_handle.type
|
|
126
|
+
company_short_name = history_handle.company_short_name
|
|
127
|
+
user_identifier = history_handle.user_identifier
|
|
128
|
+
|
|
129
|
+
if history_type == self.TYPE_SERVER_SIDE:
|
|
130
|
+
if "response_id" in response:
|
|
131
|
+
self.session_context.save_last_response_id(company_short_name, user_identifier,
|
|
132
|
+
response["response_id"])
|
|
133
|
+
|
|
134
|
+
elif history_type == self.TYPE_CLIENT_SIDE:
|
|
135
|
+
context_history = self.session_context.get_context_history(company_short_name,
|
|
136
|
+
user_identifier) or []
|
|
137
|
+
# Ensure the user prompt is recorded if not already.
|
|
138
|
+
# We check content equality to handle the case where the previous message was
|
|
139
|
+
# also 'user' (e.g., System Prompt) but different content.
|
|
140
|
+
last_content = context_history[-1].get("content") if context_history else None
|
|
141
|
+
|
|
142
|
+
if last_content != user_turn_prompt:
|
|
143
|
+
context_history.append({"role": "user", "content": user_turn_prompt})
|
|
144
|
+
|
|
145
|
+
if response.get('output'):
|
|
146
|
+
context_history.append({"role": "model", "content": response['output']})
|
|
147
|
+
|
|
148
|
+
self.session_context.save_context_history(company_short_name, user_identifier, context_history)
|
|
149
|
+
|
|
150
|
+
def _trim_context_history(self, context_history: list):
|
|
151
|
+
"""Internal helper to keep token usage within limits for client-side history."""
|
|
152
|
+
if not context_history or len(context_history) <= 1:
|
|
153
|
+
return
|
|
154
|
+
try:
|
|
155
|
+
total_tokens = sum(self.llm_client.count_tokens(json.dumps(message)) for message in context_history)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logging.error(f"Error counting tokens for history: {e}.")
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
while total_tokens > self.GEMINI_MAX_TOKENS_CONTEXT_HISTORY and len(context_history) > 1:
|
|
161
|
+
try:
|
|
162
|
+
# Remove the oldest message after system prompt
|
|
163
|
+
removed_message = context_history.pop(1)
|
|
164
|
+
removed_tokens = self.llm_client.count_tokens(json.dumps(removed_message))
|
|
165
|
+
total_tokens -= removed_tokens
|
|
166
|
+
logging.warning(
|
|
167
|
+
f"History tokens exceed limit. Removed old message. New total: {total_tokens} tokens."
|
|
168
|
+
)
|
|
169
|
+
except IndexError:
|
|
170
|
+
break
|
|
171
|
+
|
|
172
|
+
# --- Database History Management (Legacy HistoryService) ---
|
|
173
|
+
|
|
174
|
+
def get_full_history(self, company_short_name: str, user_identifier: str) -> dict:
|
|
175
|
+
"""Retrieves the full persisted history from the database."""
|
|
176
|
+
try:
|
|
177
|
+
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
178
|
+
if not company:
|
|
179
|
+
return {"error": self.i18n.t('errors.company_not_found', company_short_name=company_short_name)}
|
|
180
|
+
|
|
181
|
+
history = self.llm_query_repo.get_history(company, user_identifier)
|
|
182
|
+
if not history:
|
|
183
|
+
return {'message': 'empty history', 'history': []}
|
|
184
|
+
|
|
185
|
+
history_list = [query.to_dict() for query in history]
|
|
186
|
+
return {'message': 'history loaded ok', 'history': history_list}
|
|
187
|
+
|
|
188
|
+
except Exception as e:
|
|
189
|
+
return {'error': str(e)}
|