iatoolkit 0.3.9__py3-none-any.whl → 0.107.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iatoolkit might be problematic. Click here for more details.

Files changed (150) hide show
  1. iatoolkit/__init__.py +27 -35
  2. iatoolkit/base_company.py +3 -35
  3. iatoolkit/cli_commands.py +18 -47
  4. iatoolkit/common/__init__.py +0 -0
  5. iatoolkit/common/exceptions.py +48 -0
  6. iatoolkit/common/interfaces/__init__.py +0 -0
  7. iatoolkit/common/interfaces/asset_storage.py +34 -0
  8. iatoolkit/common/interfaces/database_provider.py +39 -0
  9. iatoolkit/common/model_registry.py +159 -0
  10. iatoolkit/common/routes.py +138 -0
  11. iatoolkit/common/session_manager.py +26 -0
  12. iatoolkit/common/util.py +353 -0
  13. iatoolkit/company_registry.py +66 -29
  14. iatoolkit/core.py +514 -0
  15. iatoolkit/infra/__init__.py +5 -0
  16. iatoolkit/infra/brevo_mail_app.py +123 -0
  17. iatoolkit/infra/call_service.py +140 -0
  18. iatoolkit/infra/connectors/__init__.py +5 -0
  19. iatoolkit/infra/connectors/file_connector.py +17 -0
  20. iatoolkit/infra/connectors/file_connector_factory.py +57 -0
  21. iatoolkit/infra/connectors/google_cloud_storage_connector.py +53 -0
  22. iatoolkit/infra/connectors/google_drive_connector.py +68 -0
  23. iatoolkit/infra/connectors/local_file_connector.py +46 -0
  24. iatoolkit/infra/connectors/s3_connector.py +33 -0
  25. iatoolkit/infra/google_chat_app.py +57 -0
  26. iatoolkit/infra/llm_providers/__init__.py +0 -0
  27. iatoolkit/infra/llm_providers/deepseek_adapter.py +278 -0
  28. iatoolkit/infra/llm_providers/gemini_adapter.py +350 -0
  29. iatoolkit/infra/llm_providers/openai_adapter.py +124 -0
  30. iatoolkit/infra/llm_proxy.py +268 -0
  31. iatoolkit/infra/llm_response.py +45 -0
  32. iatoolkit/infra/redis_session_manager.py +122 -0
  33. iatoolkit/locales/en.yaml +222 -0
  34. iatoolkit/locales/es.yaml +225 -0
  35. iatoolkit/repositories/__init__.py +5 -0
  36. iatoolkit/repositories/database_manager.py +187 -0
  37. iatoolkit/repositories/document_repo.py +33 -0
  38. iatoolkit/repositories/filesystem_asset_repository.py +36 -0
  39. iatoolkit/repositories/llm_query_repo.py +105 -0
  40. iatoolkit/repositories/models.py +279 -0
  41. iatoolkit/repositories/profile_repo.py +171 -0
  42. iatoolkit/repositories/vs_repo.py +150 -0
  43. iatoolkit/services/__init__.py +5 -0
  44. iatoolkit/services/auth_service.py +193 -0
  45. {services → iatoolkit/services}/benchmark_service.py +7 -7
  46. iatoolkit/services/branding_service.py +153 -0
  47. iatoolkit/services/company_context_service.py +214 -0
  48. iatoolkit/services/configuration_service.py +375 -0
  49. iatoolkit/services/dispatcher_service.py +134 -0
  50. {services → iatoolkit/services}/document_service.py +20 -8
  51. iatoolkit/services/embedding_service.py +148 -0
  52. iatoolkit/services/excel_service.py +156 -0
  53. {services → iatoolkit/services}/file_processor_service.py +36 -21
  54. iatoolkit/services/history_manager_service.py +208 -0
  55. iatoolkit/services/i18n_service.py +104 -0
  56. iatoolkit/services/jwt_service.py +80 -0
  57. iatoolkit/services/language_service.py +89 -0
  58. iatoolkit/services/license_service.py +82 -0
  59. iatoolkit/services/llm_client_service.py +438 -0
  60. iatoolkit/services/load_documents_service.py +174 -0
  61. iatoolkit/services/mail_service.py +213 -0
  62. {services → iatoolkit/services}/profile_service.py +200 -101
  63. iatoolkit/services/prompt_service.py +303 -0
  64. iatoolkit/services/query_service.py +467 -0
  65. iatoolkit/services/search_service.py +55 -0
  66. iatoolkit/services/sql_service.py +169 -0
  67. iatoolkit/services/tool_service.py +246 -0
  68. iatoolkit/services/user_feedback_service.py +117 -0
  69. iatoolkit/services/user_session_context_service.py +213 -0
  70. iatoolkit/static/images/fernando.jpeg +0 -0
  71. iatoolkit/static/images/iatoolkit_core.png +0 -0
  72. iatoolkit/static/images/iatoolkit_logo.png +0 -0
  73. iatoolkit/static/js/chat_feedback_button.js +80 -0
  74. iatoolkit/static/js/chat_filepond.js +85 -0
  75. iatoolkit/static/js/chat_help_content.js +124 -0
  76. iatoolkit/static/js/chat_history_button.js +110 -0
  77. iatoolkit/static/js/chat_logout_button.js +36 -0
  78. iatoolkit/static/js/chat_main.js +401 -0
  79. iatoolkit/static/js/chat_model_selector.js +227 -0
  80. iatoolkit/static/js/chat_onboarding_button.js +103 -0
  81. iatoolkit/static/js/chat_prompt_manager.js +94 -0
  82. iatoolkit/static/js/chat_reload_button.js +38 -0
  83. iatoolkit/static/styles/chat_iatoolkit.css +559 -0
  84. iatoolkit/static/styles/chat_modal.css +133 -0
  85. iatoolkit/static/styles/chat_public.css +135 -0
  86. iatoolkit/static/styles/documents.css +598 -0
  87. iatoolkit/static/styles/landing_page.css +398 -0
  88. iatoolkit/static/styles/llm_output.css +148 -0
  89. iatoolkit/static/styles/onboarding.css +176 -0
  90. iatoolkit/system_prompts/__init__.py +0 -0
  91. iatoolkit/system_prompts/query_main.prompt +30 -23
  92. iatoolkit/system_prompts/sql_rules.prompt +47 -12
  93. iatoolkit/templates/_company_header.html +45 -0
  94. iatoolkit/templates/_login_widget.html +42 -0
  95. iatoolkit/templates/base.html +78 -0
  96. iatoolkit/templates/change_password.html +66 -0
  97. iatoolkit/templates/chat.html +337 -0
  98. iatoolkit/templates/chat_modals.html +185 -0
  99. iatoolkit/templates/error.html +51 -0
  100. iatoolkit/templates/forgot_password.html +51 -0
  101. iatoolkit/templates/onboarding_shell.html +106 -0
  102. iatoolkit/templates/signup.html +79 -0
  103. iatoolkit/views/__init__.py +5 -0
  104. iatoolkit/views/base_login_view.py +96 -0
  105. iatoolkit/views/change_password_view.py +116 -0
  106. iatoolkit/views/chat_view.py +76 -0
  107. iatoolkit/views/embedding_api_view.py +65 -0
  108. iatoolkit/views/forgot_password_view.py +75 -0
  109. iatoolkit/views/help_content_api_view.py +54 -0
  110. iatoolkit/views/history_api_view.py +56 -0
  111. iatoolkit/views/home_view.py +63 -0
  112. iatoolkit/views/init_context_api_view.py +74 -0
  113. iatoolkit/views/llmquery_api_view.py +59 -0
  114. iatoolkit/views/load_company_configuration_api_view.py +49 -0
  115. iatoolkit/views/load_document_api_view.py +65 -0
  116. iatoolkit/views/login_view.py +170 -0
  117. iatoolkit/views/logout_api_view.py +57 -0
  118. iatoolkit/views/profile_api_view.py +46 -0
  119. iatoolkit/views/prompt_api_view.py +37 -0
  120. iatoolkit/views/root_redirect_view.py +22 -0
  121. iatoolkit/views/signup_view.py +100 -0
  122. iatoolkit/views/static_page_view.py +27 -0
  123. iatoolkit/views/user_feedback_api_view.py +60 -0
  124. iatoolkit/views/users_api_view.py +33 -0
  125. iatoolkit/views/verify_user_view.py +60 -0
  126. iatoolkit-0.107.4.dist-info/METADATA +268 -0
  127. iatoolkit-0.107.4.dist-info/RECORD +132 -0
  128. iatoolkit-0.107.4.dist-info/licenses/LICENSE +21 -0
  129. iatoolkit-0.107.4.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
  130. {iatoolkit-0.3.9.dist-info → iatoolkit-0.107.4.dist-info}/top_level.txt +0 -1
  131. iatoolkit/iatoolkit.py +0 -413
  132. iatoolkit/system_prompts/arquitectura.prompt +0 -32
  133. iatoolkit-0.3.9.dist-info/METADATA +0 -252
  134. iatoolkit-0.3.9.dist-info/RECORD +0 -32
  135. services/__init__.py +0 -5
  136. services/api_service.py +0 -75
  137. services/dispatcher_service.py +0 -351
  138. services/excel_service.py +0 -98
  139. services/history_service.py +0 -45
  140. services/jwt_service.py +0 -91
  141. services/load_documents_service.py +0 -212
  142. services/mail_service.py +0 -62
  143. services/prompt_manager_service.py +0 -172
  144. services/query_service.py +0 -334
  145. services/search_service.py +0 -32
  146. services/sql_service.py +0 -42
  147. services/tasks_service.py +0 -188
  148. services/user_feedback_service.py +0 -67
  149. services/user_session_context_service.py +0 -85
  150. {iatoolkit-0.3.9.dist-info → iatoolkit-0.107.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,134 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Product: IAToolkit
3
+ #
4
+ # IAToolkit is open source software.
5
+
6
+ from iatoolkit.common.exceptions import IAToolkitException
7
+ from iatoolkit.services.prompt_service import PromptService
8
+ from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
9
+ from iatoolkit.services.configuration_service import ConfigurationService
10
+ from iatoolkit.common.util import Utility
11
+ from injector import inject
12
+ import logging
13
+ import os
14
+
15
+
16
+ class Dispatcher:
17
+ @inject
18
+ def __init__(self,
19
+ config_service: ConfigurationService,
20
+ prompt_service: PromptService,
21
+ llmquery_repo: LLMQueryRepo,
22
+ util: Utility,):
23
+ self.config_service = config_service
24
+ self.prompt_service = prompt_service
25
+ self.llmquery_repo = llmquery_repo
26
+ self.util = util
27
+
28
+ self._tool_service = None
29
+ self._company_registry = None
30
+ self._company_instances = None
31
+
32
+
33
+ @property
34
+ def tool_service(self):
35
+ """Lazy-loads and returns the ToolService instance to avoid circular imports."""
36
+ if self._tool_service is None:
37
+ from iatoolkit import current_iatoolkit
38
+ from iatoolkit.services.tool_service import ToolService
39
+ self._tool_service = current_iatoolkit().get_injector().get(ToolService)
40
+ return self._tool_service
41
+
42
+ @property
43
+ def company_registry(self):
44
+ """Lazy-loads and returns the CompanyRegistry instance."""
45
+ if self._company_registry is None:
46
+ from iatoolkit.company_registry import get_company_registry
47
+ self._company_registry = get_company_registry()
48
+ return self._company_registry
49
+
50
+ @property
51
+ def company_instances(self):
52
+ """Lazy-loads and returns the instantiated company classes."""
53
+ if self._company_instances is None:
54
+ self._company_instances = self.company_registry.get_all_company_instances()
55
+ return self._company_instances
56
+
57
+ def load_company_configs(self):
58
+ # initialize the system functions and prompts
59
+ self.setup_iatoolkit_system()
60
+
61
+ # Loads the configuration of every company: company.yaml file
62
+ for company_short_name, company_instance in self.company_instances.items():
63
+ try:
64
+ # read company configuration from company.yaml
65
+ config, errors = self.config_service.load_configuration(company_short_name)
66
+
67
+ '''
68
+ if errors:
69
+ raise IAToolkitException(
70
+ IAToolkitException.ErrorType.CONFIG_ERROR,
71
+ 'company.yaml validation errors'
72
+ )
73
+ '''
74
+
75
+ # complement the instance self data
76
+ company_instance.company_short_name = company_short_name
77
+ company_instance.company = config.get('company')
78
+
79
+ except Exception as e:
80
+ logging.error(f"❌ Failed to register configuration for '{company_short_name}': {e}")
81
+ raise e
82
+
83
+ return True
84
+
85
+ def setup_iatoolkit_system(self):
86
+ try:
87
+ # system tools registration
88
+ self.tool_service.register_system_tools()
89
+
90
+ # system prompts registration
91
+ self.prompt_service.register_system_prompts()
92
+
93
+ except Exception as e:
94
+ self.llmquery_repo.rollback()
95
+ raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR, str(e))
96
+
97
+
98
+ def dispatch(self, company_short_name: str, function_name: str, **kwargs) -> dict:
99
+ company_key = company_short_name.lower()
100
+
101
+ if company_key not in self.company_instances:
102
+ available_companies = list(self.company_instances.keys())
103
+ raise IAToolkitException(
104
+ IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
105
+ f"Company '{company_short_name}' not configured. available companies: {available_companies}"
106
+ )
107
+
108
+ # check if action is a system function using ToolService
109
+ if self.tool_service.is_system_tool(function_name):
110
+ # this is the system function to be executed.
111
+ handler = self.tool_service.get_system_handler(function_name)
112
+ logging.info(
113
+ f"Calling system handler [{function_name}] "
114
+ f"with company_short_name={company_short_name} "
115
+ f"and kwargs={kwargs}"
116
+ )
117
+ return handler(company_short_name, **kwargs)
118
+
119
+ company_instance = self.company_instances[company_short_name]
120
+ try:
121
+ return company_instance.handle_request(function_name, **kwargs)
122
+ except IAToolkitException as e:
123
+ # Si ya es una IAToolkitException, la relanzamos para preservar el tipo de error original.
124
+ raise e
125
+
126
+ except Exception as e:
127
+ logging.exception(e)
128
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
129
+ f"Error in function call '{function_name}': {str(e)}") from e
130
+
131
+
132
+ def get_company_instance(self, company_name: str):
133
+ """Returns the instance for a given company name."""
134
+ return self.company_instances.get(company_name)
@@ -1,7 +1,7 @@
1
1
  # Copyright (c) 2024 Fernando Libedinsky
2
- # Producto: IAToolkit
3
- # Todos los derechos reservados.
4
- # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
2
+ # Product: IAToolkit
3
+ #
4
+ # IAToolkit is open source software.
5
5
 
6
6
  from docx import Document
7
7
  import fitz # PyMuPDF
@@ -10,26 +10,34 @@ import io
10
10
  import os
11
11
  import pytesseract
12
12
  from injector import inject
13
- from common.exceptions import IAToolkitException
13
+ from iatoolkit.common.exceptions import IAToolkitException
14
+ from iatoolkit.services.i18n_service import I18nService
15
+ from iatoolkit.services.excel_service import ExcelService
16
+
14
17
 
15
18
  class DocumentService:
16
19
  @inject
17
- def __init__(self):
20
+ def __init__(self,
21
+ excel_service: ExcelService,
22
+ i18n_service: I18nService):
23
+ self.excel_service = excel_service
24
+ self.i18n_service = i18n_service
25
+
18
26
  # max number of pages to load
19
- self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "10"))
27
+ self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "200"))
20
28
 
21
29
  def file_to_txt(self, filename, file_content):
22
30
  try:
23
31
  if filename.lower().endswith('.docx'):
24
32
  return self.read_docx(file_content)
25
- elif filename.lower().endswith('.txt'):
33
+ elif filename.lower().endswith('.txt') or filename.lower().endswith('.md'):
26
34
  if isinstance(file_content, bytes):
27
35
  try:
28
36
  # decode using UTF-8
29
37
  file_content = file_content.decode('utf-8')
30
38
  except UnicodeDecodeError:
31
39
  raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
32
- "El archivo no es texto o la codificación no es UTF-8")
40
+ self.i18n_service.t('errors.services.no_text_file'))
33
41
 
34
42
  return file_content
35
43
  elif filename.lower().endswith('.pdf'):
@@ -37,6 +45,10 @@ class DocumentService:
37
45
  return self.read_scanned_pdf(file_content)
38
46
  else:
39
47
  return self.read_pdf(file_content)
48
+ elif filename.lower().endswith(('.xlsx', '.xls')):
49
+ return self.excel_service.read_excel(file_content)
50
+ elif filename.lower().endswith('.csv'):
51
+ return self.excel_service.read_csv(file_content)
40
52
  else:
41
53
  raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
42
54
  "Formato de archivo desconocido")
@@ -0,0 +1,148 @@
1
+ # iatoolkit/services/embedding_service.py
2
+ # Copyright (c) 2024 Fernando Libedinsky
3
+ # Product: IAToolkit
4
+
5
+ import os
6
+ import base64
7
+ import numpy as np
8
+ from huggingface_hub import InferenceClient
9
+ from openai import OpenAI
10
+ from injector import inject
11
+ from iatoolkit.services.configuration_service import ConfigurationService
12
+ from iatoolkit.services.i18n_service import I18nService
13
+ from iatoolkit.repositories.profile_repo import ProfileRepo
14
+ import logging
15
+
16
+
17
+ # Wrapper classes to create a common interface for embedding clients
18
+ class EmbeddingClientWrapper:
19
+ """Abstract base class for embedding client wrappers."""
20
+ def __init__(self, client, model: str, dimensions: int = 1536):
21
+ self.client = client
22
+ self.model = model
23
+ self.dimensions = dimensions
24
+
25
+ def get_embedding(self, text: str) -> list[float]:
26
+ """Generates and returns an embedding for the given text."""
27
+ raise NotImplementedError
28
+
29
+ class HuggingFaceClientWrapper(EmbeddingClientWrapper):
30
+ def get_embedding(self, text: str) -> list[float]:
31
+ embedding = self.client.feature_extraction(text)
32
+ # Ensure the output is a flat list of floats
33
+ if isinstance(embedding, list) and len(embedding) > 0 and isinstance(embedding[0], list):
34
+ return embedding[0]
35
+ return embedding
36
+
37
+ class OpenAIClientWrapper(EmbeddingClientWrapper):
38
+ def get_embedding(self, text: str) -> list[float]:
39
+ # The OpenAI API expects the input text to be clean
40
+ text = text.replace("\n", " ")
41
+ response = self.client.embeddings.create(input=[text],
42
+ model=self.model,
43
+ dimensions=self.dimensions)
44
+ return response.data[0].embedding
45
+
46
+ # Factory and Service classes
47
+ class EmbeddingClientFactory:
48
+ """
49
+ Manages the lifecycle of embedding client wrappers for different companies.
50
+ It ensures that only one client wrapper is created per company, and it is thread-safe.
51
+ """
52
+ @inject
53
+ def __init__(self, config_service: ConfigurationService):
54
+ self.config_service = config_service
55
+ self._clients = {} # Cache for storing initialized client wrappers
56
+
57
+ def get_client(self, company_short_name: str) -> EmbeddingClientWrapper:
58
+ """
59
+ Retrieves a configured embedding client wrapper for a specific company.
60
+ If the client is not in the cache, it creates and stores it.
61
+ """
62
+ if company_short_name in self._clients:
63
+ return self._clients[company_short_name]
64
+
65
+ # Get the embedding provider and model from the company.yaml
66
+ embedding_config = self.config_service.get_configuration(company_short_name, 'embedding_provider')
67
+ if not embedding_config:
68
+ raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
69
+
70
+ provider = embedding_config.get('provider')
71
+ if not provider:
72
+ raise ValueError(f"Embedding provider not configured for company '{company_short_name}'.")
73
+ model = embedding_config.get('model')
74
+ dimensions = int(embedding_config.get('dimensions', "1536"))
75
+
76
+ api_key_name = embedding_config.get('api_key_name')
77
+ if not api_key_name:
78
+ raise ValueError(f"Missiong configuration for embedding_provider:api_key_name en config.yaml.")
79
+
80
+ api_key = os.getenv(api_key_name)
81
+ if not api_key:
82
+ raise ValueError(f"Environment variable '{api_key_name}' is not set.")
83
+
84
+ # Logic to handle multiple providers
85
+ wrapper = None
86
+ if provider == 'huggingface':
87
+ if not model:
88
+ model='sentence-transformers/all-MiniLM-L6-v2'
89
+ client = InferenceClient(model=model, token=api_key)
90
+ wrapper = HuggingFaceClientWrapper(client, model, dimensions)
91
+ elif provider == 'openai':
92
+ client = OpenAI(api_key=api_key)
93
+ if not model:
94
+ model='text-embedding-ada-002'
95
+ wrapper = OpenAIClientWrapper(client, model, dimensions)
96
+ else:
97
+ raise NotImplementedError(f"Embedding provider '{provider}' is not implemented.")
98
+
99
+ logging.debug(f"Embedding client for '{company_short_name}' created with model: {model} via {provider}")
100
+ self._clients[company_short_name] = wrapper
101
+ return wrapper
102
+
103
+ class EmbeddingService:
104
+ """
105
+ A stateless service for generating text embeddings.
106
+ It relies on the EmbeddingClientFactory to get the correct,
107
+ company-specific embedding client on demand.
108
+ """
109
+ @inject
110
+ def __init__(self,
111
+ client_factory: EmbeddingClientFactory,
112
+ profile_repo: ProfileRepo,
113
+ i18n_service: I18nService):
114
+ self.client_factory = client_factory
115
+ self.i18n_service = i18n_service
116
+ self.profile_repo = profile_repo
117
+
118
+
119
+ def embed_text(self, company_short_name: str, text: str, to_base64: bool = False) -> list[float] | str:
120
+ """
121
+ Generates the embedding for a given text using the appropriate company model.
122
+ """
123
+ try:
124
+ company = self.profile_repo.get_company_by_short_name(company_short_name)
125
+ if not company:
126
+ raise ValueError(self.i18n_service.t('errors.company_not_found', company_short_name=company_short_name))
127
+
128
+ # 1. Get the correct client wrapper from the factory
129
+ client_wrapper = self.client_factory.get_client(company_short_name)
130
+
131
+ # 2. Use the wrapper's common interface to get the embedding
132
+ embedding = client_wrapper.get_embedding(text)
133
+ # 3. Process the result
134
+ if to_base64:
135
+ return base64.b64encode(np.array(embedding, dtype=np.float32).tobytes()).decode('utf-8')
136
+
137
+ return embedding
138
+ except Exception as e:
139
+ logging.error(f"Error generating embedding for text: {text[:80]}... - {e}")
140
+ raise
141
+
142
+ def get_model_name(self, company_short_name: str) -> str:
143
+ """
144
+ Helper method to get the model name for a specific company.
145
+ """
146
+ # Get the wrapper and return the model name from it
147
+ client_wrapper = self.client_factory.get_client(company_short_name)
148
+ return client_wrapper.model
@@ -0,0 +1,156 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Product: IAToolkit
3
+ #
4
+ # IAToolkit is open source software.
5
+
6
+ from flask import current_app, jsonify
7
+ from iatoolkit.common.util import Utility
8
+ import pandas as pd
9
+ from uuid import uuid4
10
+ from pathlib import Path
11
+ from iatoolkit.common.exceptions import IAToolkitException
12
+ from iatoolkit.services.i18n_service import I18nService
13
+ from injector import inject
14
+ import os
15
+ import io
16
+ import logging
17
+ import json
18
+
19
+ EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
20
+
21
+
22
+ class ExcelService:
23
+ @inject
24
+ def __init__(self,
25
+ util: Utility,
26
+ i18n_service: I18nService):
27
+ self.util = util
28
+ self.i18n_service = i18n_service
29
+
30
+ def read_excel(self, file_content: bytes) -> str:
31
+ """
32
+ Reads an Excel file and converts its content to a JSON string.
33
+ - If the Excel file has a single sheet, it returns the JSON of that sheet.
34
+ - If it has multiple sheets, it returns a JSON object with sheet names as keys.
35
+ """
36
+ try:
37
+ # Use a BytesIO object to allow pandas to read the in-memory byte content
38
+ file_like_object = io.BytesIO(file_content)
39
+
40
+ # Read all sheets into a dictionary of DataFrames
41
+ xls = pd.read_excel(file_like_object, sheet_name=None)
42
+
43
+ if len(xls) == 1:
44
+ # If only one sheet, return its JSON representation directly
45
+ sheet_name = list(xls.keys())[0]
46
+ return xls[sheet_name].to_json(orient='records', indent=4)
47
+ else:
48
+ # If multiple sheets, create a dictionary of JSON strings
49
+ sheets_json = {}
50
+ for sheet_name, df in xls.items():
51
+ sheets_json[sheet_name] = df.to_json(orient='records', indent=4)
52
+ return json.dumps(sheets_json, indent=4)
53
+
54
+ except Exception as e:
55
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
56
+ self.i18n_service.t('errors.services.cannot_read_excel')) from e
57
+
58
+ def read_csv(self, file_content: bytes) -> str:
59
+ """
60
+ Reads a CSV file and converts its content to a JSON string.
61
+ """
62
+ try:
63
+ # Use a BytesIO object to allow pandas to read the in-memory byte content
64
+ file_like_object = io.BytesIO(file_content)
65
+
66
+ # Read the CSV into a DataFrame
67
+ df = pd.read_csv(file_like_object)
68
+
69
+ # Return JSON representation
70
+ return df.to_json(orient='records', indent=4)
71
+
72
+ except Exception as e:
73
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
74
+ self.i18n_service.t('errors.services.cannot_read_csv')) from e
75
+
76
+ def excel_generator(self, company_short_name: str, **kwargs) -> str:
77
+ """
78
+ Genera un Excel a partir de una lista de diccionarios.
79
+
80
+ Parámetros esperados en kwargs:
81
+ - filename: str (nombre lógico a mostrar, ej. "reporte_clientes.xlsx") [obligatorio]
82
+ - data: list[dict] (filas del excel) [obligatorio]
83
+ - sheet_name: str = "hoja 1"
84
+
85
+ Retorna:
86
+ {
87
+ "filename": "reporte.xlsx",
88
+ "attachment_token": "8b7f8a66-...-c1c3.xlsx",
89
+ "content_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
90
+ "download_link": "/download/8b7f8a66-...-c1c3.xlsx"
91
+ }
92
+ """
93
+ try:
94
+ # get the parameters
95
+ fname = kwargs.get('filename')
96
+ if not fname:
97
+ return self.i18n_service.t('errors.services.no_output_file')
98
+
99
+ data = kwargs.get('data')
100
+ if not data or not isinstance(data, list):
101
+ return self.i18n_service.t('errors.services.no_data_for_excel')
102
+
103
+ sheet_name = kwargs.get('sheet_name', 'hoja 1')
104
+
105
+ # 1. convert dictionary to dataframe
106
+ df = pd.DataFrame(data)
107
+
108
+ # 3. create temporary name
109
+ token = f"{uuid4()}.xlsx"
110
+
111
+ # 4. check that download directory is configured
112
+ if 'IATOOLKIT_DOWNLOAD_DIR' not in current_app.config:
113
+ return self.i18n_service.t('errors.services.no_download_directory')
114
+
115
+ download_dir = current_app.config['IATOOLKIT_DOWNLOAD_DIR']
116
+ filepath = Path(download_dir) / token
117
+ filepath.parent.mkdir(parents=True, exist_ok=True)
118
+
119
+ # 4. save excel file in temporary directory
120
+ df.to_excel(filepath, index=False, sheet_name=sheet_name)
121
+
122
+ # 5. return the link to the LLM
123
+ return {
124
+ "filename": fname,
125
+ "attachment_token": token,
126
+ "content_type": EXCEL_MIME,
127
+ "download_link": f"/download/{token}"
128
+ }
129
+
130
+ except Exception as e:
131
+ raise IAToolkitException(IAToolkitException.ErrorType.CALL_ERROR,
132
+ self.i18n_service.t('errors.services.cannot_create_excel')) from e
133
+
134
+ def validate_file_access(self, filename):
135
+ try:
136
+ if not filename:
137
+ return jsonify({"error": self.i18n_service.t('errors.services.invalid_filename')})
138
+ # Prevent path traversal attacks
139
+ if '..' in filename or filename.startswith('/') or '\\' in filename:
140
+ return jsonify({"error": self.i18n_service.t('errors.services.invalid_filename')})
141
+
142
+ temp_dir = os.path.join(current_app.root_path, 'static', 'temp')
143
+ file_path = os.path.join(temp_dir, filename)
144
+
145
+ if not os.path.exists(file_path):
146
+ return jsonify({"error": self.i18n_service.t('errors.services.file_not_exist')})
147
+
148
+ if not os.path.isfile(file_path):
149
+ return jsonify({"error": self.i18n_service.t('errors.services.path_is_not_a_file')})
150
+
151
+ return None
152
+
153
+ except Exception as e:
154
+ error_msg = f"File validation error {filename}: {str(e)}"
155
+ logging.error(error_msg)
156
+ return jsonify({"error": self.i18n_service.t('errors.services.file_validation_error')})
@@ -1,58 +1,70 @@
1
1
  # Copyright (c) 2024 Fernando Libedinsky
2
- # Producto: IAToolkit
3
- # Todos los derechos reservados.
4
- # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
2
+ # Product: IAToolkit
3
+ #
4
+ # IAToolkit is open source software.
5
5
 
6
- from infra.connectors.file_connector import FileConnector
6
+ from iatoolkit.infra.connectors.file_connector import FileConnector
7
7
  import logging
8
8
  import os
9
9
  from typing import Optional, Callable, Dict
10
+ from iatoolkit.repositories.models import Company
10
11
 
11
12
 
12
13
  class FileProcessorConfig:
14
+ """Configuration class for the FileProcessor."""
13
15
  def __init__(
14
16
  self,
15
17
  filters: Dict,
16
- action: Callable[[str, bytes], None],
18
+ callback: Callable[[Company, str, bytes, dict], None],
17
19
  continue_on_error: bool = True,
18
20
  log_file: str = 'file_processor.log',
19
21
  echo: bool = False,
20
- context: dict = None,
22
+ context: dict = None
21
23
  ):
24
+ """
25
+ Initializes the FileProcessor configuration.
26
+
27
+ Args:
28
+ filters (Dict): A dictionary of filters to apply to file names.
29
+ Example: {'filename_contains': '.pdf'}
30
+ action (Callable): The function to execute for each processed file.
31
+ It receives filename (str), content (bytes), and context (dict).
32
+ continue_on_error (bool): If True, continues processing other files upon an error.
33
+ log_file (str): The path to the log file.
34
+ echo (bool): If True, prints progress to the console.
35
+ context (dict): A context dictionary passed to the action function.
36
+ """
22
37
  self.filters = filters
23
- self.action = action
38
+ self.callback = callback
24
39
  self.continue_on_error = continue_on_error
25
40
  self.log_file = log_file
26
41
  self.echo = echo
27
42
  self.context = context or {}
28
43
 
29
44
  class FileProcessor:
45
+ """
46
+ A generic service to process files from a given data source (connector).
47
+ It lists files, applies filters, and executes a specific action for each one.
48
+ """
30
49
  def __init__(self,
31
50
  connector: FileConnector,
32
51
  config: FileProcessorConfig,
33
52
  logger: Optional[logging.Logger] = None):
34
53
  self.connector = connector
35
54
  self.config = config
36
- self.logger = logger or self._setup_logger()
37
55
  self.processed_files = 0
38
56
 
39
- def _setup_logger(self):
40
- logging.basicConfig(
41
- filename=self.config.log_file,
42
- level=logging.INFO,
43
- format='%(asctime)s - %(levelname)s - %(message)s'
44
- )
45
- return logging.getLogger(__name__)
46
57
 
47
58
  def process_files(self):
59
+ # Fetches files from the connector, filters them, and processes them.
48
60
  try:
49
61
  files = self.connector.list_files()
50
62
  except Exception as e:
51
- self.logger.error(f"Error fetching files: {e}")
63
+ logging.error(f"Error fetching files: {e}")
52
64
  return False
53
65
 
54
66
  if self.config.echo:
55
- print(f'cargando un total de {len(files)} archivos')
67
+ print(f'loading {len(files)} files')
56
68
 
57
69
  for file_info in files:
58
70
  file_path = file_info['path']
@@ -67,15 +79,18 @@ class FileProcessor:
67
79
 
68
80
  content = self.connector.get_file_content(file_path)
69
81
 
70
- # execute the action defined
82
+ # execute the callback function
71
83
  filename = os.path.basename(file_name)
72
- self.config.action(filename, content, self.config.context)
84
+ self.config.callback(company=self.config.context.get('company'),
85
+ filename=filename,
86
+ content=content,
87
+ context=self.config.context)
73
88
  self.processed_files += 1
74
89
 
75
- self.logger.info(f"Successfully processed file: {file_path}")
90
+ logging.info(f"Successfully processed file: {file_path}")
76
91
 
77
92
  except Exception as e:
78
- self.logger.error(f"Error processing {file_path}: {e}")
93
+ logging.error(f"Error processing {file_path}: {e}")
79
94
  if not self.config.continue_on_error:
80
95
  raise e
81
96