iatoolkit 0.71.4__py3-none-any.whl → 0.91.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. iatoolkit/__init__.py +15 -5
  2. iatoolkit/base_company.py +4 -58
  3. iatoolkit/cli_commands.py +6 -7
  4. iatoolkit/common/exceptions.py +1 -0
  5. iatoolkit/common/routes.py +12 -28
  6. iatoolkit/common/util.py +7 -1
  7. iatoolkit/company_registry.py +50 -14
  8. iatoolkit/{iatoolkit.py → core.py} +54 -55
  9. iatoolkit/infra/{mail_app.py → brevo_mail_app.py} +15 -37
  10. iatoolkit/infra/llm_client.py +9 -5
  11. iatoolkit/locales/en.yaml +10 -2
  12. iatoolkit/locales/es.yaml +171 -162
  13. iatoolkit/repositories/database_manager.py +59 -14
  14. iatoolkit/repositories/llm_query_repo.py +34 -22
  15. iatoolkit/repositories/models.py +16 -18
  16. iatoolkit/repositories/profile_repo.py +5 -10
  17. iatoolkit/repositories/vs_repo.py +9 -4
  18. iatoolkit/services/auth_service.py +1 -1
  19. iatoolkit/services/branding_service.py +1 -1
  20. iatoolkit/services/company_context_service.py +19 -11
  21. iatoolkit/services/configuration_service.py +219 -46
  22. iatoolkit/services/dispatcher_service.py +31 -225
  23. iatoolkit/services/document_service.py +10 -1
  24. iatoolkit/services/embedding_service.py +9 -6
  25. iatoolkit/services/excel_service.py +50 -2
  26. iatoolkit/services/history_manager_service.py +189 -0
  27. iatoolkit/services/jwt_service.py +1 -1
  28. iatoolkit/services/language_service.py +8 -2
  29. iatoolkit/services/license_service.py +82 -0
  30. iatoolkit/services/mail_service.py +171 -25
  31. iatoolkit/services/profile_service.py +37 -32
  32. iatoolkit/services/{prompt_manager_service.py → prompt_service.py} +110 -1
  33. iatoolkit/services/query_service.py +192 -191
  34. iatoolkit/services/sql_service.py +63 -12
  35. iatoolkit/services/tool_service.py +231 -0
  36. iatoolkit/services/user_feedback_service.py +18 -6
  37. iatoolkit/services/user_session_context_service.py +18 -0
  38. iatoolkit/static/images/iatoolkit_core.png +0 -0
  39. iatoolkit/static/images/iatoolkit_logo.png +0 -0
  40. iatoolkit/static/js/chat_feedback_button.js +1 -1
  41. iatoolkit/static/js/chat_help_content.js +4 -4
  42. iatoolkit/static/js/chat_main.js +17 -5
  43. iatoolkit/static/js/chat_onboarding_button.js +1 -1
  44. iatoolkit/static/styles/chat_iatoolkit.css +1 -1
  45. iatoolkit/static/styles/chat_public.css +28 -0
  46. iatoolkit/static/styles/documents.css +598 -0
  47. iatoolkit/static/styles/landing_page.css +223 -7
  48. iatoolkit/system_prompts/__init__.py +0 -0
  49. iatoolkit/system_prompts/query_main.prompt +2 -1
  50. iatoolkit/system_prompts/sql_rules.prompt +47 -12
  51. iatoolkit/templates/_company_header.html +30 -5
  52. iatoolkit/templates/_login_widget.html +3 -3
  53. iatoolkit/templates/chat.html +1 -1
  54. iatoolkit/templates/forgot_password.html +3 -2
  55. iatoolkit/templates/onboarding_shell.html +1 -1
  56. iatoolkit/templates/signup.html +3 -0
  57. iatoolkit/views/base_login_view.py +1 -1
  58. iatoolkit/views/change_password_view.py +1 -1
  59. iatoolkit/views/forgot_password_view.py +9 -4
  60. iatoolkit/views/history_api_view.py +3 -3
  61. iatoolkit/views/home_view.py +4 -2
  62. iatoolkit/views/init_context_api_view.py +1 -1
  63. iatoolkit/views/llmquery_api_view.py +4 -3
  64. iatoolkit/views/{file_store_api_view.py → load_document_api_view.py} +1 -1
  65. iatoolkit/views/login_view.py +17 -5
  66. iatoolkit/views/logout_api_view.py +10 -2
  67. iatoolkit/views/prompt_api_view.py +1 -1
  68. iatoolkit/views/root_redirect_view.py +22 -0
  69. iatoolkit/views/signup_view.py +12 -4
  70. iatoolkit/views/static_page_view.py +27 -0
  71. iatoolkit/views/verify_user_view.py +1 -1
  72. iatoolkit-0.91.1.dist-info/METADATA +268 -0
  73. iatoolkit-0.91.1.dist-info/RECORD +125 -0
  74. iatoolkit-0.91.1.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
  75. iatoolkit/services/history_service.py +0 -37
  76. iatoolkit/templates/about.html +0 -13
  77. iatoolkit/templates/index.html +0 -145
  78. iatoolkit/templates/login_simulation.html +0 -45
  79. iatoolkit/views/external_login_view.py +0 -73
  80. iatoolkit/views/index_view.py +0 -14
  81. iatoolkit/views/login_simulation_view.py +0 -93
  82. iatoolkit-0.71.4.dist-info/METADATA +0 -276
  83. iatoolkit-0.71.4.dist-info/RECORD +0 -122
  84. {iatoolkit-0.71.4.dist-info → iatoolkit-0.91.1.dist-info}/WHEEL +0 -0
  85. {iatoolkit-0.71.4.dist-info → iatoolkit-0.91.1.dist-info}/licenses/LICENSE +0 -0
  86. {iatoolkit-0.71.4.dist-info → iatoolkit-0.91.1.dist-info}/top_level.txt +0 -0
@@ -4,13 +4,9 @@
4
4
  # IAToolkit is open source software.
5
5
 
6
6
  from iatoolkit.common.exceptions import IAToolkitException
7
- from iatoolkit.services.prompt_manager_service import PromptService
8
- from iatoolkit.services.sql_service import SqlService
7
+ from iatoolkit.services.prompt_service import PromptService
9
8
  from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
10
9
  from iatoolkit.services.configuration_service import ConfigurationService
11
- from iatoolkit.repositories.models import Company, Function
12
- from iatoolkit.services.excel_service import ExcelService
13
- from iatoolkit.services.mail_service import MailService
14
10
  from iatoolkit.common.util import Utility
15
11
  from injector import inject
16
12
  import logging
@@ -23,28 +19,25 @@ class Dispatcher:
23
19
  config_service: ConfigurationService,
24
20
  prompt_service: PromptService,
25
21
  llmquery_repo: LLMQueryRepo,
26
- util: Utility,
27
- sql_service: SqlService,
28
- excel_service: ExcelService,
29
- mail_service: MailService):
22
+ util: Utility,):
30
23
  self.config_service = config_service
31
24
  self.prompt_service = prompt_service
32
25
  self.llmquery_repo = llmquery_repo
33
26
  self.util = util
34
- self.sql_service = sql_service
35
- self.excel_service = excel_service
36
- self.mail_service = mail_service
37
- self.system_functions = _FUNCTION_LIST
38
- self.system_prompts = _SYSTEM_PROMPT
39
27
 
28
+ self._tool_service = None
40
29
  self._company_registry = None
41
30
  self._company_instances = None
42
31
 
43
- self.tool_handlers = {
44
- "iat_generate_excel": self.excel_service.excel_generator,
45
- "iat_send_email": self.mail_service.send_mail,
46
- "iat_sql_query": self.sql_service.exec_sql
47
- }
32
+
33
+ @property
34
+ def tool_service(self):
35
+ """Lazy-loads and returns the ToolService instance to avoid circular imports."""
36
+ if self._tool_service is None:
37
+ from iatoolkit import current_iatoolkit
38
+ from iatoolkit.services.tool_service import ToolService
39
+ self._tool_service = current_iatoolkit().get_injector().get(ToolService)
40
+ return self._tool_service
48
41
 
49
42
  @property
50
43
  def company_registry(self):
@@ -65,87 +58,49 @@ class Dispatcher:
65
58
  # initialize the system functions and prompts
66
59
  self.setup_iatoolkit_system()
67
60
 
68
- """Loads the configuration of every company"""
61
+ # Loads the configuration of every company: company.yaml file
69
62
  for company_name, company_instance in self.company_instances.items():
70
63
  try:
71
64
  # read company configuration from company.yaml
72
65
  self.config_service.load_configuration(company_name, company_instance)
73
66
 
74
- # register the company databases
75
- self._register_company_databases(company_name)
76
-
77
67
  except Exception as e:
78
68
  logging.error(f"❌ Failed to register configuration for '{company_name}': {e}")
79
- continue
69
+ raise e
80
70
 
81
71
  return True
82
72
 
83
- def _register_company_databases(self, company_name: str):
84
- """
85
- Reads the data_sources config for a company and registers each
86
- database with the central SqlService.
87
- """
88
- logging.info(f" -> Registering databases for '{company_name}'...")
89
- data_sources_config = self.config_service.get_configuration(company_name, 'data_sources')
90
-
91
- if not data_sources_config or not data_sources_config.get('sql'):
92
- return
93
-
94
- for db_config in data_sources_config['sql']:
95
- db_name = db_config.get('database')
96
- db_env_var = db_config.get('connection_string_env')
97
-
98
- # resolve the URI connection string from the environment variable
99
- db_uri = os.getenv(db_env_var) if db_env_var else None
100
- if not db_uri:
101
- logging.error(
102
- f"-> Skipping database registration for '{company_name}' due to missing 'database' name or invalid connection URI.")
103
- return
104
-
105
- self.sql_service.register_database(db_name, db_uri)
106
-
107
73
  def setup_iatoolkit_system(self):
108
- # create system functions
109
- for function in self.system_functions:
110
- self.llmquery_repo.create_or_update_function(
111
- Function(
112
- company_id=None,
113
- system_function=True,
114
- name=function['function_name'],
115
- description= function['description'],
116
- parameters=function['parameters']
117
- )
118
- )
74
+ try:
75
+ # system tools registration
76
+ self.tool_service.register_system_tools()
119
77
 
120
- # create the system prompts
121
- i = 1
122
- for prompt in self.system_prompts:
123
- self.prompt_service.create_prompt(
124
- prompt_name=prompt['name'],
125
- description=prompt['description'],
126
- order=1,
127
- is_system_prompt=True,
128
- )
129
- i += 1
78
+ # system prompts registration
79
+ self.prompt_service.register_system_prompts()
80
+
81
+ except Exception as e:
82
+ self.llmquery_repo.rollback()
83
+ raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))
130
84
 
131
85
 
132
- def dispatch(self, company_short_name: str, action: str, **kwargs) -> dict:
86
+ def dispatch(self, company_short_name: str, function_name: str, **kwargs) -> dict:
133
87
  company_key = company_short_name.lower()
134
88
 
135
89
  if company_key not in self.company_instances:
136
90
  available_companies = list(self.company_instances.keys())
137
91
  raise IAToolkitException(
138
92
  IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
139
- f"Empresa '{company_short_name}' no configurada. Empresas disponibles: {available_companies}"
93
+ f"Company '{company_short_name}' not configured. available companies: {available_companies}"
140
94
  )
141
95
 
142
- # check if action is a system function
143
- if action in self.tool_handlers:
144
- return self.tool_handlers[action](**kwargs)
96
+ # check if action is a system function using ToolService
97
+ if self.tool_service.is_system_tool(function_name):
98
+ handler = self.tool_service.get_system_handler(function_name)
99
+ return handler(company_short_name, **kwargs)
145
100
 
146
101
  company_instance = self.company_instances[company_short_name]
147
102
  try:
148
- return company_instance.handle_request(action, **kwargs)
103
+ return company_instance.handle_request(function_name, **kwargs)
149
104
  except IAToolkitException as e:
150
105
  # Si ya es una IAToolkitException, la relanzamos para preservar el tipo de error original.
151
106
  raise e
@@ -153,158 +108,9 @@ class Dispatcher:
153
108
  except Exception as e:
154
109
  logging.exception(e)
155
110
  raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
156
- f"Error en function call '{action}': {str(e)}") from e
111
+ f"Error in function call '{function_name}': {str(e)}") from e
157
112
 
158
- def get_company_services(self, company: Company) -> list[dict]:
159
- # create the syntax with openai response syntax, for the company function list
160
- tools = []
161
- functions = self.llmquery_repo.get_company_functions(company)
162
-
163
- for function in functions:
164
- # make sure is always on
165
- function.parameters["additionalProperties"] = False
166
-
167
- ai_tool = {
168
- "type": "function",
169
- "name": function.name,
170
- "description": function.description,
171
- "parameters": function.parameters,
172
- "strict": True
173
- }
174
- tools.append(ai_tool)
175
- return tools
176
-
177
- def get_user_info(self, company_name: str, user_identifier: str) -> dict:
178
- if company_name not in self.company_instances:
179
- raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
180
- f"company not configured: {company_name}")
181
-
182
- # source 2: external company user
183
- company_instance = self.company_instances[company_name]
184
- try:
185
- external_user_profile = company_instance.get_user_info(user_identifier)
186
- except Exception as e:
187
- logging.exception(e)
188
- raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
189
- f"Error in get_user_info: {company_name}: {str(e)}") from e
190
-
191
- return external_user_profile
192
113
 
193
114
  def get_company_instance(self, company_name: str):
194
115
  """Returns the instance for a given company name."""
195
116
  return self.company_instances.get(company_name)
196
-
197
-
198
- # iatoolkit system prompts
199
- _SYSTEM_PROMPT = [
200
- {'name': 'query_main', 'description':'iatoolkit main prompt'},
201
- {'name': 'format_styles', 'description':'output format styles'},
202
- {'name': 'sql_rules', 'description':'instructions for SQL queries'}
203
- ]
204
-
205
- # iatoolkit built-in functions (Tools)
206
- _FUNCTION_LIST = [
207
- {
208
- "function_name": "iat_sql_query",
209
- "description": "Servicio SQL de IAToolkit: debes utilizar este servicio para todas las consultas a base de datos.",
210
- "parameters": {
211
- "type": "object",
212
- "properties": {
213
- "database": {
214
- "type": "string",
215
- "description": "nombre de la base de datos a consultar: `database_name`"
216
- },
217
- "query": {
218
- "type": "string",
219
- "description": "string con la consulta en sql"
220
- },
221
- },
222
- "required": ["database", "query"]
223
- }
224
- },
225
- {
226
- "function_name": "iat_generate_excel",
227
- "description": "Generador de Excel."
228
- "Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
229
- "Cada diccionario representa una fila del archivo. "
230
- "el archivo se guarda en directorio de descargas."
231
- "retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
232
- "content_type y download_link",
233
- "parameters": {
234
- "type": "object",
235
- "properties": {
236
- "filename": {
237
- "type": "string",
238
- "description": "Nombre del archivo de salida (ejemplo: 'reporte.xlsx')",
239
- "pattern": "^.+\\.xlsx?$"
240
- },
241
- "sheet_name": {
242
- "type": "string",
243
- "description": "Nombre de la hoja dentro del Excel",
244
- "minLength": 1
245
- },
246
- "data": {
247
- "type": "array",
248
- "description": "Lista de diccionarios. Cada diccionario representa una fila.",
249
- "minItems": 1,
250
- "items": {
251
- "type": "object",
252
- "properties": {},
253
- "additionalProperties": {
254
- "anyOf": [
255
- {"type": "string"},
256
- {"type": "number"},
257
- {"type": "boolean"},
258
- {"type": "null"},
259
- {
260
- "type": "string",
261
- "format": "date"
262
- }
263
- ]
264
- }
265
- }
266
- }
267
- },
268
- "required": ["filename", "sheet_name", "data"]
269
- }
270
- },
271
- {
272
- 'function_name': "iat_send_email",
273
- 'description': "iatoolkit mail system. "
274
- "envia mails cuando un usuario lo solicita."
275
- "Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
276
- 'parameters': {
277
- "type": "object",
278
- "properties": {
279
- "from_email": {"type": "string","description": "dirección de correo electrónico que esta enviando el email."},
280
- "recipient": {"type": "string", "description": "email del destinatario"},
281
- "subject": {"type": "string", "description": "asunto del email"},
282
- "body": {"type": "string", "description": "HTML del email"},
283
- "attachments": {
284
- "type": "array",
285
- "description": "Lista de archivos adjuntos codificados en base64",
286
- "items": {
287
- "type": "object",
288
- "properties": {
289
- "filename": {
290
- "type": "string",
291
- "description": "Nombre del archivo con su extensión (ej. informe.pdf)"
292
- },
293
- "content": {
294
- "type": "string",
295
- "description": "Contenido del archivo en b64."
296
- },
297
- "attachment_token": {
298
- "type": "string",
299
- "description": "token para descargar el archivo."
300
- }
301
- },
302
- "required": ["filename", "content", "attachment_token"],
303
- "additionalProperties": False
304
- }
305
- }
306
- },
307
- "required": ["from_email","recipient", "subject", "body", "attachments"]
308
- }
309
- }
310
- ]
@@ -12,10 +12,15 @@ import pytesseract
12
12
  from injector import inject
13
13
  from iatoolkit.common.exceptions import IAToolkitException
14
14
  from iatoolkit.services.i18n_service import I18nService
15
+ from iatoolkit.services.excel_service import ExcelService
16
+
15
17
 
16
18
  class DocumentService:
17
19
  @inject
18
- def __init__(self, i18n_service: I18nService):
20
+ def __init__(self,
21
+ excel_service: ExcelService,
22
+ i18n_service: I18nService):
23
+ self.excel_service = excel_service
19
24
  self.i18n_service = i18n_service
20
25
 
21
26
  # max number of pages to load
@@ -40,6 +45,10 @@ class DocumentService:
40
45
  return self.read_scanned_pdf(file_content)
41
46
  else:
42
47
  return self.read_pdf(file_content)
48
+ elif filename.lower().endswith(('.xlsx', '.xls')):
49
+ return self.excel_service.read_excel(file_content)
50
+ elif filename.lower().endswith('.csv'):
51
+ return self.excel_service.read_csv(file_content)
43
52
  else:
44
53
  raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
45
54
  "Formato de archivo desconocido")
@@ -5,7 +5,6 @@
5
5
  import os
6
6
  import base64
7
7
  import numpy as np
8
- from threading import Lock
9
8
  from huggingface_hub import InferenceClient
10
9
  from openai import OpenAI
11
10
  from injector import inject
@@ -14,12 +13,14 @@ from iatoolkit.services.i18n_service import I18nService
14
13
  from iatoolkit.repositories.profile_repo import ProfileRepo
15
14
  import logging
16
15
 
16
+
17
17
  # Wrapper classes to create a common interface for embedding clients
18
18
  class EmbeddingClientWrapper:
19
19
  """Abstract base class for embedding client wrappers."""
20
- def __init__(self, client, model: str):
20
+ def __init__(self, client, model: str, dimensions: int = 1536):
21
21
  self.client = client
22
22
  self.model = model
23
+ self.dimensions = dimensions
23
24
 
24
25
  def get_embedding(self, text: str) -> list[float]:
25
26
  """Generates and returns an embedding for the given text."""
@@ -37,7 +38,9 @@ class OpenAIClientWrapper(EmbeddingClientWrapper):
37
38
  def get_embedding(self, text: str) -> list[float]:
38
39
  # The OpenAI API expects the input text to be clean
39
40
  text = text.replace("\n", " ")
40
- response = self.client.embeddings.create(input=[text], model=self.model)
41
+ response = self.client.embeddings.create(input=[text],
42
+ model=self.model,
43
+ dimensions=self.dimensions)
41
44
  return response.data[0].embedding
42
45
 
43
46
  # Factory and Service classes
@@ -68,6 +71,7 @@ class EmbeddingClientFactory:
68
71
  if not provider:
69
72
  raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
70
73
  model = embedding_config.get('model')
74
+ dimensions = int(embedding_config.get('dimensions', "1536"))
71
75
 
72
76
  api_key_name = embedding_config.get('api_key_name')
73
77
  if not api_key_name:
@@ -83,12 +87,12 @@ class EmbeddingClientFactory:
83
87
  if not model:
84
88
  model='sentence-transformers/all-MiniLM-L6-v2'
85
89
  client = InferenceClient(model=model, token=api_key)
86
- wrapper = HuggingFaceClientWrapper(client, model)
90
+ wrapper = HuggingFaceClientWrapper(client, model, dimensions)
87
91
  elif provider == 'openai':
88
92
  client = OpenAI(api_key=api_key)
89
93
  if not model:
90
94
  model='text-embedding-ada-002'
91
- wrapper = OpenAIClientWrapper(client, model)
95
+ wrapper = OpenAIClientWrapper(client, model, dimensions)
92
96
  else:
93
97
  raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
94
98
 
@@ -126,7 +130,6 @@ class EmbeddingService:
126
130
 
127
131
  # 2. Use the wrapper's common interface to get the embedding
128
132
  embedding = client_wrapper.get_embedding(text)
129
-
130
133
  # 3. Process the result
131
134
  if to_base64:
132
135
  return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
@@ -3,6 +3,7 @@
3
3
  #
4
4
  # IAToolkit is open source software.
5
5
 
6
+ from flask import current_app, jsonify
6
7
  from iatoolkit.common.util import Utility
7
8
  import pandas as pd
8
9
  from uuid import uuid4
@@ -11,8 +12,9 @@ from iatoolkit.common.exceptions import IAToolkitException
11
12
  from iatoolkit.services.i18n_service import I18nService
12
13
  from injector import inject
13
14
  import os
15
+ import io
14
16
  import logging
15
- from flask import current_app, jsonify
17
+ import json
16
18
 
17
19
  EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
18
20
 
@@ -25,7 +27,53 @@ class ExcelService:
25
27
  self.util = util
26
28
  self.i18n_service = i18n_service
27
29
 
28
- def excel_generator(self, **kwargs) -> str:
30
+ def read_excel(self, file_content: bytes) -> str:
31
+ """
32
+ Reads an Excel file and converts its content to a JSON string.
33
+ - If the Excel file has a single sheet, it returns the JSON of that sheet.
34
+ - If it has multiple sheets, it returns a JSON object with sheet names as keys.
35
+ """
36
+ try:
37
+ # Use a BytesIO object to allow pandas to read the in-memory byte content
38
+ file_like_object = io.BytesIO(file_content)
39
+
40
+ # Read all sheets into a dictionary of DataFrames
41
+ xls = pd.read_excel(file_like_object, sheet_name=None)
42
+
43
+ if len(xls) == 1:
44
+ # If only one sheet, return its JSON representation directly
45
+ sheet_name = list(xls.keys())[0]
46
+ return xls[sheet_name].to_json(orient='records', indent=4)
47
+ else:
48
+ # If multiple sheets, create a dictionary of JSON strings
49
+ sheets_json = {}
50
+ for sheet_name, df in xls.items():
51
+ sheets_json[sheet_name] = df.to_json(orient='records', indent=4)
52
+ return json.dumps(sheets_json, indent=4)
53
+
54
+ except Exception as e:
55
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
56
+ self.i18n_service.t('errors.services.cannot_read_excel')) from e
57
+
58
+ def read_csv(self, file_content: bytes) -> str:
59
+ """
60
+ Reads a CSV file and converts its content to a JSON string.
61
+ """
62
+ try:
63
+ # Use a BytesIO object to allow pandas to read the in-memory byte content
64
+ file_like_object = io.BytesIO(file_content)
65
+
66
+ # Read the CSV into a DataFrame
67
+ df = pd.read_csv(file_like_object)
68
+
69
+ # Return JSON representation
70
+ return df.to_json(orient='records', indent=4)
71
+
72
+ except Exception as e:
73
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
74
+ self.i18n_service.t('errors.services.cannot_read_csv')) from e
75
+
76
+ def excel_generator(self, company_short_name: str, **kwargs) -> str:
29
77
  """
30
78
  Genera un Excel a partir de una lista de diccionarios.
31
79
 
@@ -0,0 +1,189 @@
1
+ import logging
2
+ import json
3
+ from typing import Dict, Any, Tuple, Optional
4
+ from iatoolkit.services.user_session_context_service import UserSessionContextService
5
+ from iatoolkit.services.i18n_service import I18nService
6
+ from iatoolkit.infra.llm_client import llmClient
7
+ from iatoolkit.repositories.models import Company
8
+ from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
9
+ from iatoolkit.repositories.profile_repo import ProfileRepo
10
+ from injector import inject
11
+
12
+
13
+ class HistoryManagerService:
14
+ """
15
+ Manages conversation history for LLMs in a unified way.
16
+ Handles:
17
+ 1. Server-side history (e.g., OpenAI response_ids).
18
+ 2. Client-side history (e.g., Gemini message lists).
19
+ 3. Database persistence retrieval (full chat history).
20
+ """
21
+ TYPE_SERVER_SIDE = 'server_side' # For models like OpenAI
22
+ TYPE_CLIENT_SIDE = 'client_side' # For models like Gemini
23
+
24
+ GEMINI_MAX_TOKENS_CONTEXT_HISTORY = 200000
25
+
26
+
27
+ @inject
28
+ def __init__(self,
29
+ session_context: UserSessionContextService,
30
+ i18n: I18nService,
31
+ llm_query_repo: LLMQueryRepo,
32
+ profile_repo: ProfileRepo,
33
+ llm_client: Optional[llmClient] = None):
34
+ self.session_context = session_context
35
+ self.i18n = i18n
36
+ self.llm_query_repo = llm_query_repo
37
+ self.profile_repo = profile_repo
38
+ self.llm_client = llm_client
39
+
40
+ def initialize_context(self,
41
+ company_short_name: str,
42
+ user_identifier: str,
43
+ history_type: str,
44
+ prepared_context: str,
45
+ company: Company, model: str) -> Dict[str, Any]:
46
+ """
47
+ Initializes a new conversation history.
48
+ """
49
+ # 1. Clear existing history
50
+ self.session_context.clear_llm_history(company_short_name, user_identifier)
51
+
52
+ if history_type == self.TYPE_SERVER_SIDE:
53
+ # OpenAI: Send system prompt to API and store the resulting ID
54
+ response_id = self.llm_client.set_company_context(
55
+ company=company,
56
+ company_base_context=prepared_context,
57
+ model=model
58
+ )
59
+ self.session_context.save_last_response_id(company_short_name, user_identifier, response_id)
60
+ self.session_context.save_initial_response_id(company_short_name, user_identifier, response_id)
61
+ return {'response_id': response_id}
62
+
63
+ elif history_type == self.TYPE_CLIENT_SIDE:
64
+ # Gemini: Store system prompt as the first message in the list
65
+ context_history = [{"role": "user", "content": prepared_context}]
66
+ self.session_context.save_context_history(company_short_name, user_identifier, context_history)
67
+ return {}
68
+
69
+ return {}
70
+
71
+ def populate_request_params(self,
72
+ handle: Any,
73
+ user_turn_prompt: str,
74
+ ignore_history: bool = False) -> bool:
75
+ """
76
+ Populates the request_params within the HistoryHandle.
77
+ Returns True if a rebuild is needed, False otherwise.
78
+ """
79
+ if handle.type == self.TYPE_SERVER_SIDE:
80
+ previous_response_id = None
81
+ if ignore_history:
82
+ previous_response_id = self.session_context.get_initial_response_id(handle.company_short_name,
83
+ handle.user_identifier)
84
+ else:
85
+ previous_response_id = self.session_context.get_last_response_id(handle.company_short_name,
86
+ handle.user_identifier)
87
+
88
+ if not previous_response_id:
89
+ handle.request_params = {}
90
+ return True # Needs rebuild
91
+
92
+ handle.request_params = {'previous_response_id': previous_response_id}
93
+ return False
94
+
95
+ elif handle.type == self.TYPE_CLIENT_SIDE:
96
+ context_history = self.session_context.get_context_history(handle.company_short_name,
97
+ handle.user_identifier) or []
98
+
99
+ if not context_history:
100
+ handle.request_params = {}
101
+ return True # Needs rebuild
102
+
103
+ if ignore_history and len(context_history) > 1:
104
+ # Keep only system prompt
105
+ context_history = [context_history[0]]
106
+
107
+ # For Gemini, we append the current user turn to the context sent to the API
108
+ context_history.append({"role": "user", "content": user_turn_prompt})
109
+
110
+ self._trim_context_history(context_history)
111
+
112
+ handle.request_params = {'context_history': context_history}
113
+ return False
114
+
115
+ handle.request_params = {}
116
+ return False
117
+
118
+ def update_history(self,
119
+ history_handle: Any,
120
+ user_turn_prompt: str,
121
+ response: Dict[str, Any]):
122
+ """Saves or updates the history after a successful LLM call."""
123
+
124
+ # We access the type from the handle
125
+ history_type = history_handle.type
126
+ company_short_name = history_handle.company_short_name
127
+ user_identifier = history_handle.user_identifier
128
+
129
+ if history_type == self.TYPE_SERVER_SIDE:
130
+ if "response_id" in response:
131
+ self.session_context.save_last_response_id(company_short_name, user_identifier,
132
+ response["response_id"])
133
+
134
+ elif history_type == self.TYPE_CLIENT_SIDE:
135
+ context_history = self.session_context.get_context_history(company_short_name,
136
+ user_identifier) or []
137
+ # Ensure the user prompt is recorded if not already.
138
+ # We check content equality to handle the case where the previous message was
139
+ # also 'user' (e.g., System Prompt) but different content.
140
+ last_content = context_history[-1].get("content") if context_history else None
141
+
142
+ if last_content != user_turn_prompt:
143
+ context_history.append({"role": "user", "content": user_turn_prompt})
144
+
145
+ if response.get('output'):
146
+ context_history.append({"role": "model", "content": response['output']})
147
+
148
+ self.session_context.save_context_history(company_short_name, user_identifier, context_history)
149
+
150
+ def _trim_context_history(self, context_history: list):
151
+ """Internal helper to keep token usage within limits for client-side history."""
152
+ if not context_history or len(context_history) <= 1:
153
+ return
154
+ try:
155
+ total_tokens = sum(self.llm_client.count_tokens(json.dumps(message)) for message in context_history)
156
+ except Exception as e:
157
+ logging.error(f"Error counting tokens for history: {e}.")
158
+ return
159
+
160
+ while total_tokens > self.GEMINI_MAX_TOKENS_CONTEXT_HISTORY and len(context_history) > 1:
161
+ try:
162
+ # Remove the oldest message after system prompt
163
+ removed_message = context_history.pop(1)
164
+ removed_tokens = self.llm_client.count_tokens(json.dumps(removed_message))
165
+ total_tokens -= removed_tokens
166
+ logging.warning(
167
+ f"History tokens exceed limit. Removed old message. New total: {total_tokens} tokens."
168
+ )
169
+ except IndexError:
170
+ break
171
+
172
+ # --- Database History Management (Legacy HistoryService) ---
173
+
174
+ def get_full_history(self, company_short_name: str, user_identifier: str) -> dict:
175
+ """Retrieves the full persisted history from the database."""
176
+ try:
177
+ company = self.profile_repo.get_company_by_short_name(company_short_name)
178
+ if not company:
179
+ return {"error": self.i18n.t('errors.company_not_found', company_short_name=company_short_name)}
180
+
181
+ history = self.llm_query_repo.get_history(company, user_identifier)
182
+ if not history:
183
+ return {'message': 'empty history', 'history': []}
184
+
185
+ history_list = [query.to_dict() for query in history]
186
+ return {'message': 'history loaded ok', 'history': history_list}
187
+
188
+ except Exception as e:
189
+ return {'error': str(e)}