iatoolkit 0.71.4__py3-none-any.whl → 1.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. iatoolkit/__init__.py +19 -7
  2. iatoolkit/base_company.py +1 -71
  3. iatoolkit/cli_commands.py +9 -21
  4. iatoolkit/common/exceptions.py +2 -0
  5. iatoolkit/common/interfaces/__init__.py +0 -0
  6. iatoolkit/common/interfaces/asset_storage.py +34 -0
  7. iatoolkit/common/interfaces/database_provider.py +38 -0
  8. iatoolkit/common/model_registry.py +159 -0
  9. iatoolkit/common/routes.py +53 -32
  10. iatoolkit/common/util.py +17 -12
  11. iatoolkit/company_registry.py +55 -14
  12. iatoolkit/{iatoolkit.py → core.py} +102 -72
  13. iatoolkit/infra/{mail_app.py → brevo_mail_app.py} +15 -37
  14. iatoolkit/infra/llm_providers/__init__.py +0 -0
  15. iatoolkit/infra/llm_providers/deepseek_adapter.py +278 -0
  16. iatoolkit/infra/{gemini_adapter.py → llm_providers/gemini_adapter.py} +11 -17
  17. iatoolkit/infra/{openai_adapter.py → llm_providers/openai_adapter.py} +41 -7
  18. iatoolkit/infra/llm_proxy.py +235 -134
  19. iatoolkit/infra/llm_response.py +5 -0
  20. iatoolkit/locales/en.yaml +134 -4
  21. iatoolkit/locales/es.yaml +293 -162
  22. iatoolkit/repositories/database_manager.py +92 -22
  23. iatoolkit/repositories/document_repo.py +7 -0
  24. iatoolkit/repositories/filesystem_asset_repository.py +36 -0
  25. iatoolkit/repositories/llm_query_repo.py +36 -22
  26. iatoolkit/repositories/models.py +86 -95
  27. iatoolkit/repositories/profile_repo.py +64 -13
  28. iatoolkit/repositories/vs_repo.py +31 -28
  29. iatoolkit/services/auth_service.py +1 -1
  30. iatoolkit/services/branding_service.py +1 -1
  31. iatoolkit/services/company_context_service.py +96 -39
  32. iatoolkit/services/configuration_service.py +329 -67
  33. iatoolkit/services/dispatcher_service.py +51 -227
  34. iatoolkit/services/document_service.py +10 -1
  35. iatoolkit/services/embedding_service.py +9 -6
  36. iatoolkit/services/excel_service.py +50 -2
  37. iatoolkit/services/file_processor_service.py +0 -5
  38. iatoolkit/services/history_manager_service.py +208 -0
  39. iatoolkit/services/jwt_service.py +1 -1
  40. iatoolkit/services/knowledge_base_service.py +412 -0
  41. iatoolkit/services/language_service.py +8 -2
  42. iatoolkit/services/license_service.py +82 -0
  43. iatoolkit/{infra/llm_client.py → services/llm_client_service.py} +42 -29
  44. iatoolkit/services/load_documents_service.py +18 -47
  45. iatoolkit/services/mail_service.py +171 -25
  46. iatoolkit/services/profile_service.py +69 -36
  47. iatoolkit/services/{prompt_manager_service.py → prompt_service.py} +136 -25
  48. iatoolkit/services/query_service.py +229 -203
  49. iatoolkit/services/sql_service.py +116 -34
  50. iatoolkit/services/tool_service.py +246 -0
  51. iatoolkit/services/user_feedback_service.py +18 -6
  52. iatoolkit/services/user_session_context_service.py +121 -51
  53. iatoolkit/static/images/iatoolkit_core.png +0 -0
  54. iatoolkit/static/images/iatoolkit_logo.png +0 -0
  55. iatoolkit/static/js/chat_feedback_button.js +1 -1
  56. iatoolkit/static/js/chat_help_content.js +4 -4
  57. iatoolkit/static/js/chat_main.js +61 -9
  58. iatoolkit/static/js/chat_model_selector.js +227 -0
  59. iatoolkit/static/js/chat_onboarding_button.js +1 -1
  60. iatoolkit/static/js/chat_reload_button.js +4 -1
  61. iatoolkit/static/styles/chat_iatoolkit.css +59 -3
  62. iatoolkit/static/styles/chat_public.css +28 -0
  63. iatoolkit/static/styles/documents.css +598 -0
  64. iatoolkit/static/styles/landing_page.css +223 -7
  65. iatoolkit/static/styles/llm_output.css +34 -1
  66. iatoolkit/system_prompts/__init__.py +0 -0
  67. iatoolkit/system_prompts/query_main.prompt +28 -3
  68. iatoolkit/system_prompts/sql_rules.prompt +47 -12
  69. iatoolkit/templates/_company_header.html +30 -5
  70. iatoolkit/templates/_login_widget.html +3 -3
  71. iatoolkit/templates/base.html +13 -0
  72. iatoolkit/templates/chat.html +45 -3
  73. iatoolkit/templates/forgot_password.html +3 -2
  74. iatoolkit/templates/onboarding_shell.html +1 -2
  75. iatoolkit/templates/signup.html +3 -0
  76. iatoolkit/views/base_login_view.py +8 -3
  77. iatoolkit/views/change_password_view.py +1 -1
  78. iatoolkit/views/chat_view.py +76 -0
  79. iatoolkit/views/forgot_password_view.py +9 -4
  80. iatoolkit/views/history_api_view.py +3 -3
  81. iatoolkit/views/home_view.py +4 -2
  82. iatoolkit/views/init_context_api_view.py +1 -1
  83. iatoolkit/views/llmquery_api_view.py +4 -3
  84. iatoolkit/views/load_company_configuration_api_view.py +49 -0
  85. iatoolkit/views/{file_store_api_view.py → load_document_api_view.py} +15 -11
  86. iatoolkit/views/login_view.py +25 -8
  87. iatoolkit/views/logout_api_view.py +10 -2
  88. iatoolkit/views/prompt_api_view.py +1 -1
  89. iatoolkit/views/rag_api_view.py +216 -0
  90. iatoolkit/views/root_redirect_view.py +22 -0
  91. iatoolkit/views/signup_view.py +12 -4
  92. iatoolkit/views/static_page_view.py +27 -0
  93. iatoolkit/views/users_api_view.py +33 -0
  94. iatoolkit/views/verify_user_view.py +1 -1
  95. iatoolkit-1.4.2.dist-info/METADATA +268 -0
  96. iatoolkit-1.4.2.dist-info/RECORD +133 -0
  97. iatoolkit-1.4.2.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
  98. iatoolkit/repositories/tasks_repo.py +0 -52
  99. iatoolkit/services/history_service.py +0 -37
  100. iatoolkit/services/search_service.py +0 -55
  101. iatoolkit/services/tasks_service.py +0 -188
  102. iatoolkit/templates/about.html +0 -13
  103. iatoolkit/templates/index.html +0 -145
  104. iatoolkit/templates/login_simulation.html +0 -45
  105. iatoolkit/views/external_login_view.py +0 -73
  106. iatoolkit/views/index_view.py +0 -14
  107. iatoolkit/views/login_simulation_view.py +0 -93
  108. iatoolkit/views/tasks_api_view.py +0 -72
  109. iatoolkit/views/tasks_review_api_view.py +0 -55
  110. iatoolkit-0.71.4.dist-info/METADATA +0 -276
  111. iatoolkit-0.71.4.dist-info/RECORD +0 -122
  112. {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/WHEEL +0 -0
  113. {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/licenses/LICENSE +0 -0
  114. {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/top_level.txt +0 -0
@@ -4,13 +4,9 @@
4
4
  # IAToolkit is open source software.
5
5
 
6
6
  from iatoolkit.common.exceptions import IAToolkitException
7
- from iatoolkit.services.prompt_manager_service import PromptService
8
- from iatoolkit.services.sql_service import SqlService
7
+ from iatoolkit.services.prompt_service import PromptService
9
8
  from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
10
9
  from iatoolkit.services.configuration_service import ConfigurationService
11
- from iatoolkit.repositories.models import Company, Function
12
- from iatoolkit.services.excel_service import ExcelService
13
- from iatoolkit.services.mail_service import MailService
14
10
  from iatoolkit.common.util import Utility
15
11
  from injector import inject
16
12
  import logging
@@ -23,28 +19,25 @@ class Dispatcher:
23
19
  config_service: ConfigurationService,
24
20
  prompt_service: PromptService,
25
21
  llmquery_repo: LLMQueryRepo,
26
- util: Utility,
27
- sql_service: SqlService,
28
- excel_service: ExcelService,
29
- mail_service: MailService):
22
+ util: Utility,):
30
23
  self.config_service = config_service
31
24
  self.prompt_service = prompt_service
32
25
  self.llmquery_repo = llmquery_repo
33
26
  self.util = util
34
- self.sql_service = sql_service
35
- self.excel_service = excel_service
36
- self.mail_service = mail_service
37
- self.system_functions = _FUNCTION_LIST
38
- self.system_prompts = _SYSTEM_PROMPT
39
27
 
28
+ self._tool_service = None
40
29
  self._company_registry = None
41
30
  self._company_instances = None
42
31
 
43
- self.tool_handlers = {
44
- "iat_generate_excel": self.excel_service.excel_generator,
45
- "iat_send_email": self.mail_service.send_mail,
46
- "iat_sql_query": self.sql_service.exec_sql
47
- }
32
+
33
+ @property
34
+ def tool_service(self):
35
+ """Lazy-loads and returns the ToolService instance to avoid circular imports."""
36
+ if self._tool_service is None:
37
+ from iatoolkit import current_iatoolkit
38
+ from iatoolkit.services.tool_service import ToolService
39
+ self._tool_service = current_iatoolkit().get_injector().get(ToolService)
40
+ return self._tool_service
48
41
 
49
42
  @property
50
43
  def company_registry(self):
@@ -65,87 +58,67 @@ class Dispatcher:
65
58
  # initialize the system functions and prompts
66
59
  self.setup_iatoolkit_system()
67
60
 
68
- """Loads the configuration of every company"""
69
- for company_name, company_instance in self.company_instances.items():
61
+ # Loads the configuration of every company: company.yaml file
62
+ for company_short_name, company_instance in self.company_instances.items():
70
63
  try:
71
64
  # read company configuration from company.yaml
72
- self.config_service.load_configuration(company_name, company_instance)
65
+ config, errors = self.config_service.load_configuration(company_short_name)
73
66
 
74
- # register the company databases
75
- self._register_company_databases(company_name)
67
+ '''
68
+ if errors:
69
+ raise IAToolkitException(
70
+ IAToolkitException.ErrorType.CONFIG_ERROR,
71
+ 'company.yaml validation errors'
72
+ )
73
+ '''
74
+
75
+ # complement the instance self data
76
+ company_instance.company_short_name = company_short_name
77
+ company_instance.company = config.get('company')
76
78
 
77
79
  except Exception as e:
78
- logging.error(f"❌ Failed to register configuration for '{company_name}': {e}")
79
- continue
80
+ logging.error(f"❌ Failed to register configuration for '{company_short_name}': {e}")
81
+ raise e
80
82
 
81
83
  return True
82
84
 
83
- def _register_company_databases(self, company_name: str):
84
- """
85
- Reads the data_sources config for a company and registers each
86
- database with the central SqlService.
87
- """
88
- logging.info(f" -> Registering databases for '{company_name}'...")
89
- data_sources_config = self.config_service.get_configuration(company_name, 'data_sources')
90
-
91
- if not data_sources_config or not data_sources_config.get('sql'):
92
- return
93
-
94
- for db_config in data_sources_config['sql']:
95
- db_name = db_config.get('database')
96
- db_env_var = db_config.get('connection_string_env')
97
-
98
- # resolve the URI connection string from the environment variable
99
- db_uri = os.getenv(db_env_var) if db_env_var else None
100
- if not db_uri:
101
- logging.error(
102
- f"-> Skipping database registration for '{company_name}' due to missing 'database' name or invalid connection URI.")
103
- return
104
-
105
- self.sql_service.register_database(db_name, db_uri)
106
-
107
85
  def setup_iatoolkit_system(self):
108
- # create system functions
109
- for function in self.system_functions:
110
- self.llmquery_repo.create_or_update_function(
111
- Function(
112
- company_id=None,
113
- system_function=True,
114
- name=function['function_name'],
115
- description= function['description'],
116
- parameters=function['parameters']
117
- )
118
- )
86
+ try:
87
+ # system tools registration
88
+ self.tool_service.register_system_tools()
119
89
 
120
- # create the system prompts
121
- i = 1
122
- for prompt in self.system_prompts:
123
- self.prompt_service.create_prompt(
124
- prompt_name=prompt['name'],
125
- description=prompt['description'],
126
- order=1,
127
- is_system_prompt=True,
128
- )
129
- i += 1
90
+ # system prompts registration
91
+ self.prompt_service.register_system_prompts()
92
+
93
+ except Exception as e:
94
+ self.llmquery_repo.rollback()
95
+ raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))
130
96
 
131
97
 
132
- def dispatch(self, company_short_name: str, action: str, **kwargs) -> dict:
98
+ def dispatch(self, company_short_name: str, function_name: str, **kwargs) -> dict:
133
99
  company_key = company_short_name.lower()
134
100
 
135
101
  if company_key not in self.company_instances:
136
102
  available_companies = list(self.company_instances.keys())
137
103
  raise IAToolkitException(
138
104
  IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
139
- f"Empresa '{company_short_name}' no configurada. Empresas disponibles: {available_companies}"
105
+ f"Company '{company_short_name}' not configured. available companies: {available_companies}"
140
106
  )
141
107
 
142
- # check if action is a system function
143
- if action in self.tool_handlers:
144
- return self.tool_handlers[action](**kwargs)
108
+ # check if action is a system function using ToolService
109
+ if self.tool_service.is_system_tool(function_name):
110
+ # this is the system function to be executed.
111
+ handler = self.tool_service.get_system_handler(function_name)
112
+ logging.info(
113
+ f"Calling system handler [{function_name}] "
114
+ f"with company_short_name={company_short_name} "
115
+ f"and kwargs={kwargs}"
116
+ )
117
+ return handler(company_short_name, **kwargs)
145
118
 
146
119
  company_instance = self.company_instances[company_short_name]
147
120
  try:
148
- return company_instance.handle_request(action, **kwargs)
121
+ return company_instance.handle_request(function_name, **kwargs)
149
122
  except IAToolkitException as e:
150
123
  # Si ya es una IAToolkitException, la relanzamos para preservar el tipo de error original.
151
124
  raise e
@@ -153,158 +126,9 @@ class Dispatcher:
153
126
  except Exception as e:
154
127
  logging.exception(e)
155
128
  raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
156
- f"Error en function call '{action}': {str(e)}") from e
129
+ f"Error in function call '{function_name}': {str(e)}") from e
157
130
 
158
- def get_company_services(self, company: Company) -> list[dict]:
159
- # create the syntax with openai response syntax, for the company function list
160
- tools = []
161
- functions = self.llmquery_repo.get_company_functions(company)
162
-
163
- for function in functions:
164
- # make sure is always on
165
- function.parameters["additionalProperties"] = False
166
-
167
- ai_tool = {
168
- "type": "function",
169
- "name": function.name,
170
- "description": function.description,
171
- "parameters": function.parameters,
172
- "strict": True
173
- }
174
- tools.append(ai_tool)
175
- return tools
176
-
177
- def get_user_info(self, company_name: str, user_identifier: str) -> dict:
178
- if company_name not in self.company_instances:
179
- raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
180
- f"company not configured: {company_name}")
181
-
182
- # source 2: external company user
183
- company_instance = self.company_instances[company_name]
184
- try:
185
- external_user_profile = company_instance.get_user_info(user_identifier)
186
- except Exception as e:
187
- logging.exception(e)
188
- raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
189
- f"Error in get_user_info: {company_name}: {str(e)}") from e
190
-
191
- return external_user_profile
192
131
 
193
132
  def get_company_instance(self, company_name: str):
194
133
  """Returns the instance for a given company name."""
195
134
  return self.company_instances.get(company_name)
196
-
197
-
198
- # iatoolkit system prompts
199
- _SYSTEM_PROMPT = [
200
- {'name': 'query_main', 'description':'iatoolkit main prompt'},
201
- {'name': 'format_styles', 'description':'output format styles'},
202
- {'name': 'sql_rules', 'description':'instructions for SQL queries'}
203
- ]
204
-
205
- # iatoolkit built-in functions (Tools)
206
- _FUNCTION_LIST = [
207
- {
208
- "function_name": "iat_sql_query",
209
- "description": "Servicio SQL de IAToolkit: debes utilizar este servicio para todas las consultas a base de datos.",
210
- "parameters": {
211
- "type": "object",
212
- "properties": {
213
- "database": {
214
- "type": "string",
215
- "description": "nombre de la base de datos a consultar: `database_name`"
216
- },
217
- "query": {
218
- "type": "string",
219
- "description": "string con la consulta en sql"
220
- },
221
- },
222
- "required": ["database", "query"]
223
- }
224
- },
225
- {
226
- "function_name": "iat_generate_excel",
227
- "description": "Generador de Excel."
228
- "Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
229
- "Cada diccionario representa una fila del archivo. "
230
- "el archivo se guarda en directorio de descargas."
231
- "retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
232
- "content_type y download_link",
233
- "parameters": {
234
- "type": "object",
235
- "properties": {
236
- "filename": {
237
- "type": "string",
238
- "description": "Nombre del archivo de salida (ejemplo: 'reporte.xlsx')",
239
- "pattern": "^.+\\.xlsx?$"
240
- },
241
- "sheet_name": {
242
- "type": "string",
243
- "description": "Nombre de la hoja dentro del Excel",
244
- "minLength": 1
245
- },
246
- "data": {
247
- "type": "array",
248
- "description": "Lista de diccionarios. Cada diccionario representa una fila.",
249
- "minItems": 1,
250
- "items": {
251
- "type": "object",
252
- "properties": {},
253
- "additionalProperties": {
254
- "anyOf": [
255
- {"type": "string"},
256
- {"type": "number"},
257
- {"type": "boolean"},
258
- {"type": "null"},
259
- {
260
- "type": "string",
261
- "format": "date"
262
- }
263
- ]
264
- }
265
- }
266
- }
267
- },
268
- "required": ["filename", "sheet_name", "data"]
269
- }
270
- },
271
- {
272
- 'function_name': "iat_send_email",
273
- 'description': "iatoolkit mail system. "
274
- "envia mails cuando un usuario lo solicita."
275
- "Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
276
- 'parameters': {
277
- "type": "object",
278
- "properties": {
279
- "from_email": {"type": "string","description": "dirección de correo electrónico que esta enviando el email."},
280
- "recipient": {"type": "string", "description": "email del destinatario"},
281
- "subject": {"type": "string", "description": "asunto del email"},
282
- "body": {"type": "string", "description": "HTML del email"},
283
- "attachments": {
284
- "type": "array",
285
- "description": "Lista de archivos adjuntos codificados en base64",
286
- "items": {
287
- "type": "object",
288
- "properties": {
289
- "filename": {
290
- "type": "string",
291
- "description": "Nombre del archivo con su extensión (ej. informe.pdf)"
292
- },
293
- "content": {
294
- "type": "string",
295
- "description": "Contenido del archivo en b64."
296
- },
297
- "attachment_token": {
298
- "type": "string",
299
- "description": "token para descargar el archivo."
300
- }
301
- },
302
- "required": ["filename", "content", "attachment_token"],
303
- "additionalProperties": False
304
- }
305
- }
306
- },
307
- "required": ["from_email","recipient", "subject", "body", "attachments"]
308
- }
309
- }
310
- ]
@@ -12,10 +12,15 @@ import pytesseract
12
12
  from injector import inject
13
13
  from iatoolkit.common.exceptions import IAToolkitException
14
14
  from iatoolkit.services.i18n_service import I18nService
15
+ from iatoolkit.services.excel_service import ExcelService
16
+
15
17
 
16
18
  class DocumentService:
17
19
  @inject
18
- def __init__(self, i18n_service: I18nService):
20
+ def __init__(self,
21
+ excel_service: ExcelService,
22
+ i18n_service: I18nService):
23
+ self.excel_service = excel_service
19
24
  self.i18n_service = i18n_service
20
25
 
21
26
  # max number of pages to load
@@ -40,6 +45,10 @@ class DocumentService:
40
45
  return self.read_scanned_pdf(file_content)
41
46
  else:
42
47
  return self.read_pdf(file_content)
48
+ elif filename.lower().endswith(('.xlsx', '.xls')):
49
+ return self.excel_service.read_excel(file_content)
50
+ elif filename.lower().endswith('.csv'):
51
+ return self.excel_service.read_csv(file_content)
43
52
  else:
44
53
  raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
45
54
  "Formato de archivo desconocido")
@@ -5,7 +5,6 @@
5
5
  import os
6
6
  import base64
7
7
  import numpy as np
8
- from threading import Lock
9
8
  from huggingface_hub import InferenceClient
10
9
  from openai import OpenAI
11
10
  from injector import inject
@@ -14,12 +13,14 @@ from iatoolkit.services.i18n_service import I18nService
14
13
  from iatoolkit.repositories.profile_repo import ProfileRepo
15
14
  import logging
16
15
 
16
+
17
17
  # Wrapper classes to create a common interface for embedding clients
18
18
  class EmbeddingClientWrapper:
19
19
  """Abstract base class for embedding client wrappers."""
20
- def __init__(self, client, model: str):
20
+ def __init__(self, client, model: str, dimensions: int = 1536):
21
21
  self.client = client
22
22
  self.model = model
23
+ self.dimensions = dimensions
23
24
 
24
25
  def get_embedding(self, text: str) -> list[float]:
25
26
  """Generates and returns an embedding for the given text."""
@@ -37,7 +38,9 @@ class OpenAIClientWrapper(EmbeddingClientWrapper):
37
38
  def get_embedding(self, text: str) -> list[float]:
38
39
  # The OpenAI API expects the input text to be clean
39
40
  text = text.replace("\n", " ")
40
- response = self.client.embeddings.create(input=[text], model=self.model)
41
+ response = self.client.embeddings.create(input=[text],
42
+ model=self.model,
43
+ dimensions=self.dimensions)
41
44
  return response.data[0].embedding
42
45
 
43
46
  # Factory and Service classes
@@ -68,6 +71,7 @@ class EmbeddingClientFactory:
68
71
  if not provider:
69
72
  raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
70
73
  model = embedding_config.get('model')
74
+ dimensions = int(embedding_config.get('dimensions', "1536"))
71
75
 
72
76
  api_key_name = embedding_config.get('api_key_name')
73
77
  if not api_key_name:
@@ -83,12 +87,12 @@ class EmbeddingClientFactory:
83
87
  if not model:
84
88
  model='sentence-transformers/all-MiniLM-L6-v2'
85
89
  client = InferenceClient(model=model, token=api_key)
86
- wrapper = HuggingFaceClientWrapper(client, model)
90
+ wrapper = HuggingFaceClientWrapper(client, model, dimensions)
87
91
  elif provider == 'openai':
88
92
  client = OpenAI(api_key=api_key)
89
93
  if not model:
90
94
  model='text-embedding-ada-002'
91
- wrapper = OpenAIClientWrapper(client, model)
95
+ wrapper = OpenAIClientWrapper(client, model, dimensions)
92
96
  else:
93
97
  raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
94
98
 
@@ -126,7 +130,6 @@ class EmbeddingService:
126
130
 
127
131
  # 2. Use the wrapper's common interface to get the embedding
128
132
  embedding = client_wrapper.get_embedding(text)
129
-
130
133
  # 3. Process the result
131
134
  if to_base64:
132
135
  return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
@@ -3,6 +3,7 @@
3
3
  #
4
4
  # IAToolkit is open source software.
5
5
 
6
+ from flask import current_app, jsonify
6
7
  from iatoolkit.common.util import Utility
7
8
  import pandas as pd
8
9
  from uuid import uuid4
@@ -11,8 +12,9 @@ from iatoolkit.common.exceptions import IAToolkitException
11
12
  from iatoolkit.services.i18n_service import I18nService
12
13
  from injector import inject
13
14
  import os
15
+ import io
14
16
  import logging
15
- from flask import current_app, jsonify
17
+ import json
16
18
 
17
19
  EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
18
20
 
@@ -25,7 +27,53 @@ class ExcelService:
25
27
  self.util = util
26
28
  self.i18n_service = i18n_service
27
29
 
28
- def excel_generator(self, **kwargs) -> str:
30
+ def read_excel(self, file_content: bytes) -> str:
31
+ """
32
+ Reads an Excel file and converts its content to a JSON string.
33
+ - If the Excel file has a single sheet, it returns the JSON of that sheet.
34
+ - If it has multiple sheets, it returns a JSON object with sheet names as keys.
35
+ """
36
+ try:
37
+ # Use a BytesIO object to allow pandas to read the in-memory byte content
38
+ file_like_object = io.BytesIO(file_content)
39
+
40
+ # Read all sheets into a dictionary of DataFrames
41
+ xls = pd.read_excel(file_like_object, sheet_name=None)
42
+
43
+ if len(xls) == 1:
44
+ # If only one sheet, return its JSON representation directly
45
+ sheet_name = list(xls.keys())[0]
46
+ return xls[sheet_name].to_json(orient='records', indent=4)
47
+ else:
48
+ # If multiple sheets, create a dictionary of JSON strings
49
+ sheets_json = {}
50
+ for sheet_name, df in xls.items():
51
+ sheets_json[sheet_name] = df.to_json(orient='records', indent=4)
52
+ return json.dumps(sheets_json, indent=4)
53
+
54
+ except Exception as e:
55
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
56
+ self.i18n_service.t('errors.services.cannot_read_excel')) from e
57
+
58
+ def read_csv(self, file_content: bytes) -> str:
59
+ """
60
+ Reads a CSV file and converts its content to a JSON string.
61
+ """
62
+ try:
63
+ # Use a BytesIO object to allow pandas to read the in-memory byte content
64
+ file_like_object = io.BytesIO(file_content)
65
+
66
+ # Read the CSV into a DataFrame
67
+ df = pd.read_csv(file_like_object)
68
+
69
+ # Return JSON representation
70
+ return df.to_json(orient='records', indent=4)
71
+
72
+ except Exception as e:
73
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
74
+ self.i18n_service.t('errors.services.cannot_read_csv')) from e
75
+
76
+ def excel_generator(self, company_short_name: str, **kwargs) -> str:
29
77
  """
30
78
  Genera un Excel a partir de una lista de diccionarios.
31
79
 
@@ -74,9 +74,6 @@ class FileProcessor:
74
74
  if not self._apply_filters(file_name):
75
75
  continue
76
76
 
77
- if self.config.echo:
78
- print(f'loading: {file_name}')
79
-
80
77
  content = self.connector.get_file_content(file_path)
81
78
 
82
79
  # execute the callback function
@@ -87,8 +84,6 @@ class FileProcessor:
87
84
  context=self.config.context)
88
85
  self.processed_files += 1
89
86
 
90
- logging.info(f"Successfully processed file: {file_path}")
91
-
92
87
  except Exception as e:
93
88
  logging.error(f"Error processing {file_path}: {e}")
94
89
  if not self.config.continue_on_error: