iatoolkit 0.11.0__py3-none-any.whl → 0.71.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. iatoolkit/__init__.py +2 -6
  2. iatoolkit/base_company.py +9 -29
  3. iatoolkit/cli_commands.py +1 -1
  4. iatoolkit/common/routes.py +96 -52
  5. iatoolkit/common/session_manager.py +2 -1
  6. iatoolkit/common/util.py +17 -27
  7. iatoolkit/company_registry.py +1 -2
  8. iatoolkit/iatoolkit.py +97 -53
  9. iatoolkit/infra/llm_client.py +15 -20
  10. iatoolkit/infra/llm_proxy.py +38 -10
  11. iatoolkit/infra/openai_adapter.py +1 -1
  12. iatoolkit/infra/redis_session_manager.py +48 -2
  13. iatoolkit/locales/en.yaml +167 -0
  14. iatoolkit/locales/es.yaml +163 -0
  15. iatoolkit/repositories/database_manager.py +23 -3
  16. iatoolkit/repositories/document_repo.py +1 -1
  17. iatoolkit/repositories/models.py +35 -10
  18. iatoolkit/repositories/profile_repo.py +3 -2
  19. iatoolkit/repositories/vs_repo.py +26 -20
  20. iatoolkit/services/auth_service.py +193 -0
  21. iatoolkit/services/branding_service.py +70 -25
  22. iatoolkit/services/company_context_service.py +155 -0
  23. iatoolkit/services/configuration_service.py +133 -0
  24. iatoolkit/services/dispatcher_service.py +80 -105
  25. iatoolkit/services/document_service.py +5 -2
  26. iatoolkit/services/embedding_service.py +146 -0
  27. iatoolkit/services/excel_service.py +30 -26
  28. iatoolkit/services/file_processor_service.py +4 -12
  29. iatoolkit/services/history_service.py +7 -16
  30. iatoolkit/services/i18n_service.py +104 -0
  31. iatoolkit/services/jwt_service.py +18 -29
  32. iatoolkit/services/language_service.py +83 -0
  33. iatoolkit/services/load_documents_service.py +100 -113
  34. iatoolkit/services/mail_service.py +9 -4
  35. iatoolkit/services/profile_service.py +152 -76
  36. iatoolkit/services/prompt_manager_service.py +20 -16
  37. iatoolkit/services/query_service.py +208 -96
  38. iatoolkit/services/search_service.py +11 -4
  39. iatoolkit/services/sql_service.py +57 -25
  40. iatoolkit/services/tasks_service.py +1 -1
  41. iatoolkit/services/user_feedback_service.py +72 -34
  42. iatoolkit/services/user_session_context_service.py +112 -54
  43. iatoolkit/static/images/fernando.jpeg +0 -0
  44. iatoolkit/static/js/chat_feedback_button.js +80 -0
  45. iatoolkit/static/js/chat_help_content.js +124 -0
  46. iatoolkit/static/js/chat_history_button.js +110 -0
  47. iatoolkit/static/js/chat_logout_button.js +36 -0
  48. iatoolkit/static/js/chat_main.js +135 -222
  49. iatoolkit/static/js/chat_onboarding_button.js +103 -0
  50. iatoolkit/static/js/chat_prompt_manager.js +94 -0
  51. iatoolkit/static/js/chat_reload_button.js +35 -0
  52. iatoolkit/static/styles/chat_iatoolkit.css +289 -210
  53. iatoolkit/static/styles/chat_modal.css +63 -77
  54. iatoolkit/static/styles/chat_public.css +107 -0
  55. iatoolkit/static/styles/landing_page.css +182 -0
  56. iatoolkit/static/styles/onboarding.css +176 -0
  57. iatoolkit/system_prompts/query_main.prompt +5 -22
  58. iatoolkit/templates/_company_header.html +20 -0
  59. iatoolkit/templates/_login_widget.html +42 -0
  60. iatoolkit/templates/base.html +40 -20
  61. iatoolkit/templates/change_password.html +57 -36
  62. iatoolkit/templates/chat.html +180 -86
  63. iatoolkit/templates/chat_modals.html +138 -68
  64. iatoolkit/templates/error.html +44 -8
  65. iatoolkit/templates/forgot_password.html +40 -23
  66. iatoolkit/templates/index.html +145 -0
  67. iatoolkit/templates/login_simulation.html +45 -0
  68. iatoolkit/templates/onboarding_shell.html +107 -0
  69. iatoolkit/templates/signup.html +63 -65
  70. iatoolkit/views/base_login_view.py +91 -0
  71. iatoolkit/views/change_password_view.py +56 -31
  72. iatoolkit/views/embedding_api_view.py +65 -0
  73. iatoolkit/views/external_login_view.py +61 -28
  74. iatoolkit/views/{file_store_view.py → file_store_api_view.py} +10 -3
  75. iatoolkit/views/forgot_password_view.py +27 -21
  76. iatoolkit/views/help_content_api_view.py +54 -0
  77. iatoolkit/views/history_api_view.py +56 -0
  78. iatoolkit/views/home_view.py +50 -23
  79. iatoolkit/views/index_view.py +14 -0
  80. iatoolkit/views/init_context_api_view.py +74 -0
  81. iatoolkit/views/llmquery_api_view.py +58 -0
  82. iatoolkit/views/login_simulation_view.py +93 -0
  83. iatoolkit/views/login_view.py +130 -37
  84. iatoolkit/views/logout_api_view.py +49 -0
  85. iatoolkit/views/profile_api_view.py +46 -0
  86. iatoolkit/views/{prompt_view.py → prompt_api_view.py} +10 -10
  87. iatoolkit/views/signup_view.py +41 -36
  88. iatoolkit/views/{tasks_view.py → tasks_api_view.py} +10 -36
  89. iatoolkit/views/tasks_review_api_view.py +55 -0
  90. iatoolkit/views/user_feedback_api_view.py +60 -0
  91. iatoolkit/views/verify_user_view.py +34 -29
  92. {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/METADATA +41 -23
  93. iatoolkit-0.71.2.dist-info/RECORD +122 -0
  94. iatoolkit-0.71.2.dist-info/licenses/LICENSE +21 -0
  95. iatoolkit/common/auth.py +0 -200
  96. iatoolkit/static/images/arrow_up.png +0 -0
  97. iatoolkit/static/images/diagrama_iatoolkit.jpg +0 -0
  98. iatoolkit/static/images/logo_clinica.png +0 -0
  99. iatoolkit/static/images/logo_iatoolkit.png +0 -0
  100. iatoolkit/static/images/logo_maxxa.png +0 -0
  101. iatoolkit/static/images/logo_notaria.png +0 -0
  102. iatoolkit/static/images/logo_tarjeta.png +0 -0
  103. iatoolkit/static/images/logo_umayor.png +0 -0
  104. iatoolkit/static/images/upload.png +0 -0
  105. iatoolkit/static/js/chat_feedback.js +0 -115
  106. iatoolkit/static/js/chat_history.js +0 -117
  107. iatoolkit/static/styles/chat_info.css +0 -53
  108. iatoolkit/templates/header.html +0 -31
  109. iatoolkit/templates/home.html +0 -199
  110. iatoolkit/templates/login.html +0 -43
  111. iatoolkit/templates/test.html +0 -9
  112. iatoolkit/views/chat_token_request_view.py +0 -98
  113. iatoolkit/views/chat_view.py +0 -58
  114. iatoolkit/views/download_file_view.py +0 -58
  115. iatoolkit/views/external_chat_login_view.py +0 -95
  116. iatoolkit/views/history_view.py +0 -57
  117. iatoolkit/views/llmquery_view.py +0 -65
  118. iatoolkit/views/tasks_review_view.py +0 -83
  119. iatoolkit/views/user_feedback_view.py +0 -74
  120. iatoolkit-0.11.0.dist-info/RECORD +0 -110
  121. {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/WHEEL +0 -0
  122. {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,104 @@
1
+ # iatoolkit/services/i18n_service.py
2
+ import os
3
+ import logging
4
+ from injector import inject, singleton
5
+ from iatoolkit.common.util import Utility
6
+ from iatoolkit.services.language_service import LanguageService
7
+
8
+ @singleton
9
+ class I18nService:
10
+ """
11
+ Servicio centralizado para manejar la internacionalización (i18n).
12
+ Carga todas las traducciones desde archivos YAML en memoria al iniciar.
13
+ """
14
+ FALLBACK_LANGUAGE = 'es'
15
+
16
+ @inject
17
+ def __init__(self, util: Utility, language_service: LanguageService):
18
+ self.util = util
19
+ self.language_service = language_service
20
+
21
+ self.translations = {}
22
+ self._load_translations()
23
+
24
+ def _load_translations(self):
25
+ """
26
+ Carga todos los archivos .yaml del directorio 'locales' en memoria.
27
+ """
28
+ locales_dir = os.path.join(os.path.dirname(__file__), '..', 'locales')
29
+ if not os.path.exists(locales_dir):
30
+ logging.error("Directory 'locales' not found.")
31
+ return
32
+
33
+ for filename in os.listdir(locales_dir):
34
+ if filename.endswith('.yaml'):
35
+ lang_code = filename.split('.')[0]
36
+ filepath = os.path.join(locales_dir, filename)
37
+ try:
38
+ self.translations[lang_code] = self.util.load_schema_from_yaml(filepath)
39
+ except Exception as e:
40
+ logging.error(f"Error while loading the translation file {filepath}: {e}")
41
+
42
+ def _get_nested_key(self, lang: str, key: str):
43
+ """
44
+ Obtiene un valor de un diccionario anidado usando una clave con puntos.
45
+ """
46
+ data = self.translations.get(lang, {})
47
+ keys = key.split('.')
48
+ for k in keys:
49
+ if isinstance(data, dict) and k in data:
50
+ data = data[k]
51
+ else:
52
+ return None
53
+ return data
54
+
55
+ def get_translation_block(self, key: str, lang: str = None) -> dict:
56
+ """
57
+ Gets a whole dictionary block from the translations.
58
+ Useful for passing a set of translations to JavaScript.
59
+ """
60
+ if lang is None:
61
+ lang = self.language_service.get_current_language()
62
+
63
+ # 1. Try to get the block in the requested language
64
+ block = self._get_nested_key(lang, key)
65
+
66
+ # 2. If not found, try the fallback language
67
+ if not isinstance(block, dict):
68
+ block = self._get_nested_key(self.FALLBACK_LANGUAGE, key)
69
+
70
+ return block if isinstance(block, dict) else {}
71
+
72
+ def t(self, key: str, lang: str = None, **kwargs) -> str:
73
+ """
74
+ Gets the translation for a given key.
75
+ If 'lang' is provided, it's used. Otherwise, it's determined automatically.
76
+ """
77
+ # If no specific language is requested, determine it from the current context.
78
+ if lang is None:
79
+ lang = self.language_service.get_current_language()
80
+
81
+ # 1. Attempt to get the translation in the requested language
82
+ message = self._get_nested_key(lang, key)
83
+
84
+ # 2. If not found, try the fallback language
85
+ if message is None and lang != self.FALLBACK_LANGUAGE:
86
+ logging.warning(
87
+ f"Translation key '{key}' not found for language '{lang}'. Attempting fallback to '{self.FALLBACK_LANGUAGE}'.")
88
+ message = self._get_nested_key(self.FALLBACK_LANGUAGE, key)
89
+
90
+ # 3. If still not found, return the key itself as a last resort
91
+ if message is None:
92
+ logging.error(
93
+ f"Translation key '{key}' not found, even in fallback '{self.FALLBACK_LANGUAGE}'.")
94
+ return key
95
+
96
+ # 4. If variables are provided, format the message
97
+ if kwargs:
98
+ try:
99
+ return message.format(**kwargs)
100
+ except KeyError as e:
101
+ logging.error(f"Error formatting key '{key}': missing variable {e} in arguments.")
102
+ return message
103
+
104
+ return message
@@ -20,20 +20,22 @@ class JWTService:
20
20
  self.secret_key = app.config['JWT_SECRET_KEY']
21
21
  self.algorithm = app.config['JWT_ALGORITHM']
22
22
  except KeyError as e:
23
- logging.error(f"Configuración JWT faltante en app.config: {e}. JWTService no funcionará correctamente.")
24
- raise RuntimeError(f"Configuración JWT esencial faltante: {e}")
23
+ logging.error(f"missing JWT configuration: {e}.")
24
+ raise RuntimeError(f"missing JWT configuration variables: {e}")
25
25
 
26
26
  def generate_chat_jwt(self,
27
- company_id: int,
28
27
  company_short_name: str,
29
- external_user_id: str,
28
+ user_identifier: str,
30
29
  expires_delta_seconds: int) -> Optional[str]:
31
30
  # generate a JWT for a chat session
32
31
  try:
32
+ if not company_short_name or not user_identifier:
33
+ logging.error(f"Missing token ID: {company_short_name}/{user_identifier}")
34
+ return None
35
+
33
36
  payload = {
34
- 'company_id': company_id,
35
37
  'company_short_name': company_short_name,
36
- 'external_user_id': external_user_id,
38
+ 'user_identifier': user_identifier,
37
39
  'exp': time.time() + expires_delta_seconds,
38
40
  'iat': time.time(),
39
41
  'type': 'chat_session' # Identificador del tipo de token
@@ -41,10 +43,10 @@ class JWTService:
41
43
  token = jwt.encode(payload, self.secret_key, algorithm=self.algorithm)
42
44
  return token
43
45
  except Exception as e:
44
- logging.error(f"Error al generar JWT para company {company_id}, user {external_user_id}: {e}")
46
+ logging.error(f"Error al generar JWT para {company_short_name}/{user_identifier}: {e}")
45
47
  return None
46
48
 
47
- def validate_chat_jwt(self, token: str, expected_company_short_name: str) -> Optional[Dict[str, Any]]:
49
+ def validate_chat_jwt(self, token: str) -> Optional[Dict[str, Any]]:
48
50
  """
49
51
  Valida un JWT de sesión de chat.
50
52
  Retorna el payload decodificado si es válido y coincide con la empresa, o None.
@@ -56,36 +58,23 @@ class JWTService:
56
58
 
57
59
  # Validaciones adicionales
58
60
  if payload.get('type') != 'chat_session':
59
- logging.warning(f"Validación JWT fallida: tipo incorrecto '{payload.get('type')}'")
61
+ logging.warning(f"Invalid JWT type '{payload.get('type')}'")
60
62
  return None
61
63
 
62
- if payload.get('company_short_name') != expected_company_short_name:
63
- logging.warning(
64
- f"Validación JWT fallida: company_short_name no coincide. "
65
- f"Esperado: {expected_company_short_name}, Obtenido: {payload.get('company_short_name')}"
66
- )
64
+ # user_identifier debe estar presente
65
+ if not payload.get('user_identifier'):
66
+ logging.warning(f"missing user_identifier in JWT payload.")
67
67
  return None
68
68
 
69
- # external_user_id debe estar presente
70
- if 'external_user_id' not in payload or not payload['external_user_id']:
71
- logging.warning(f"Validación JWT fallida: external_user_id ausente o vacío.")
69
+ if not payload.get('company_short_name'):
70
+ logging.warning(f"missing company_short_name in JWT payload.")
72
71
  return None
73
72
 
74
- # company_id debe estar presente
75
- if 'company_id' not in payload or not isinstance(payload['company_id'], int):
76
- logging.warning(f"Validación JWT fallida: company_id ausente o tipo incorrecto.")
77
- return None
78
-
79
- logging.debug(
80
- f"JWT validado exitosamente para company: {payload.get('company_short_name')}, user: {payload.get('external_user_id')}")
81
73
  return payload
82
74
 
83
- except jwt.ExpiredSignatureError:
84
- logging.info(f"Validación JWT fallida: token expirado para {expected_company_short_name}")
85
- return None
86
75
  except jwt.InvalidTokenError as e:
87
- logging.warning(f"Validación JWT fallida: token inválido para {expected_company_short_name}. Error: {e}")
76
+ logging.warning(f"Invalid JWT token:: {e}")
88
77
  return None
89
78
  except Exception as e:
90
- logging.error(f"Error inesperado durante validación de JWT para {expected_company_short_name}: {e}")
79
+ logging.error(f"unexpected error during JWT validation: {e}")
91
80
  return None
@@ -0,0 +1,83 @@
1
+ # iatoolkit/services/language_service.py
2
+
3
+ import logging
4
+ from injector import inject, singleton
5
+ from flask import g, request
6
+ from iatoolkit.repositories.profile_repo import ProfileRepo
7
+ from iatoolkit.services.configuration_service import ConfigurationService
8
+ from iatoolkit.common.session_manager import SessionManager
9
+
10
+ @singleton
11
+ class LanguageService:
12
+ """
13
+ Determines the correct language for the current request
14
+ based on a defined priority order (session, URL, etc.)
15
+ and caches it in the Flask 'g' object for the request's lifecycle.
16
+ """
17
+
18
+ FALLBACK_LANGUAGE = 'es'
19
+
20
+ @inject
21
+ def __init__(self,
22
+ config_service: ConfigurationService,
23
+ profile_repo: ProfileRepo):
24
+ self.config_service = config_service
25
+ self.profile_repo = profile_repo
26
+
27
+ def _get_company_short_name(self) -> str | None:
28
+ """
29
+ Gets the company_short_name from the current request context.
30
+ This handles different scenarios like web sessions, public URLs, and API calls.
31
+
32
+ Priority Order:
33
+ 1. Flask Session (for logged-in web users).
34
+ 2. URL rule variable (for public pages and API endpoints).
35
+ """
36
+ # 1. Check session for logged-in users
37
+ company_short_name = SessionManager.get('company_short_name')
38
+ if company_short_name:
39
+ return company_short_name
40
+
41
+ # 2. Check URL arguments (e.g., /<company_short_name>/login)
42
+ # This covers public pages and most API calls.
43
+ if request.view_args and 'company_short_name' in request.view_args:
44
+ return request.view_args['company_short_name']
45
+
46
+ return None
47
+
48
+ def get_current_language(self) -> str:
49
+ """
50
+ Determines and caches the language for the current request using a priority order:
51
+ 1. User's preference (from their profile).
52
+ 2. Company's default language.
53
+ 3. System-wide fallback language ('es').
54
+ """
55
+ if 'lang' in g:
56
+ return g.lang
57
+
58
+ try:
59
+ # Priority 1: User's preferred language
60
+ user_identifier = SessionManager.get('user_identifier')
61
+ if user_identifier:
62
+ user = self.profile_repo.get_user_by_email(user_identifier)
63
+ if user and user.preferred_language:
64
+ logging.debug(f"Language determined by user preference: {user.preferred_language}")
65
+ g.lang = user.preferred_language
66
+ return g.lang
67
+
68
+ # Priority 2: Company's default language
69
+ company_short_name = self._get_company_short_name()
70
+ if company_short_name:
71
+ locale = self.config_service.get_configuration(company_short_name, 'locale')
72
+ if locale:
73
+ company_language = locale.split('_')[0]
74
+ g.lang = company_language
75
+ return g.lang
76
+ except Exception as e:
77
+ logging.info(f"Could not determine language, falling back to default. Reason: {e}")
78
+ pass
79
+
80
+ # Priority 3: System-wide fallback
81
+ logging.info(f"Language determined by system fallback: {self.FALLBACK_LANGUAGE}")
82
+ g.lang = self.FALLBACK_LANGUAGE
83
+ return g.lang
@@ -1,50 +1,41 @@
1
1
  # Copyright (c) 2024 Fernando Libedinsky
2
2
  # Product: IAToolkit
3
- #
4
- # IAToolkit is open source software.
5
3
 
6
4
  from iatoolkit.repositories.vs_repo import VSRepo
7
5
  from iatoolkit.repositories.document_repo import DocumentRepo
8
- from iatoolkit.repositories.profile_repo import ProfileRepo
9
- from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
10
-
11
6
  from iatoolkit.repositories.models import Document, VSDoc, Company
12
7
  from iatoolkit.services.document_service import DocumentService
8
+ from iatoolkit.services.configuration_service import ConfigurationService
13
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
10
  from iatoolkit.infra.connectors.file_connector_factory import FileConnectorFactory
15
11
  from iatoolkit.services.file_processor_service import FileProcessorConfig, FileProcessor
16
- from iatoolkit.services.dispatcher_service import Dispatcher
17
12
  from iatoolkit.common.exceptions import IAToolkitException
18
13
  import logging
19
14
  import base64
20
- from injector import inject
21
- from typing import Dict
15
+ from injector import inject, singleton
16
+ import os
22
17
 
23
18
 
19
+ @singleton
24
20
  class LoadDocumentsService:
25
21
  """
26
22
  Orchestrates the process of loading, processing, and storing documents
27
- from various sources for different companies.
23
+ from various sources defined in the company's configuration.
28
24
  """
29
25
  @inject
30
26
  def __init__(self,
27
+ config_service: ConfigurationService,
31
28
  file_connector_factory: FileConnectorFactory,
32
29
  doc_service: DocumentService,
33
30
  doc_repo: DocumentRepo,
34
31
  vector_store: VSRepo,
35
- profile_repo: ProfileRepo,
36
- dispatcher: Dispatcher,
37
- llm_query_repo: LLMQueryRepo
38
32
  ):
33
+ self.config_service = config_service
39
34
  self.doc_service = doc_service
40
35
  self.doc_repo = doc_repo
41
- self.profile_repo = profile_repo
42
- self.llm_query_repo = llm_query_repo
43
36
  self.vector_store = vector_store
44
37
  self.file_connector_factory = file_connector_factory
45
- self.dispatcher = dispatcher
46
38
 
47
- # lower warnings
48
39
  logging.getLogger().setLevel(logging.ERROR)
49
40
 
50
41
  self.splitter = RecursiveCharacterTextSplitter(
@@ -53,135 +44,131 @@ class LoadDocumentsService:
53
44
  separators=["\n\n", "\n", "."]
54
45
  )
55
46
 
56
- def load_company_files(self,
57
- company: Company,
58
- connector_config: Dict,
59
- predefined_metadata: Dict = None,
60
- filters: Dict = None):
47
+ def load_sources(self,
48
+ company: Company,
49
+ sources_to_load: list[str] = None,
50
+ filters: dict = None) -> int:
61
51
  """
62
- Loads all the company files from a connector
52
+ Loads documents from one or more configured sources for a company.
63
53
 
64
54
  Args:
65
55
  company (Company): The company to load files for.
66
- connector_config (Dict): The configuration for the file connector.
67
- predefined_metadata (Dict, optional): Metadata to be added to all documents from this source.
68
- filters (Dict, optional): Filters to apply to the files.
56
+ sources_to_load (list[str], optional): A list of specific source names to load.
57
+ If None, all configured sources will be loaded.
58
+ filters (dict, optional): Filters to apply when listing files (e.g., file extension).
69
59
 
70
60
  Returns:
71
- int: The number of processed files.
61
+ int: The total number of processed files.
72
62
  """
73
- if not connector_config:
74
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
75
- f"Falta configurar conector")
63
+ knowledge_base_config = self.config_service.get_configuration(company.short_name, 'knowledge_base')
64
+ if not knowledge_base_config:
65
+ raise IAToolkitException(IAToolkitException.ErrorType.CONFIG_ERROR,
66
+ f"Missing 'knowledge_base' configuration for company '{company.short_name}'.")
67
+
68
+ if not sources_to_load:
69
+ raise IAToolkitException(IAToolkitException.ErrorType.PARAM_NOT_FILLED,
70
+ f"Missing sources to load for company '{company.short_name}'.")
71
+
72
+ base_connector_config = self._get_base_connector_config(knowledge_base_config)
73
+ all_sources = knowledge_base_config.get('document_sources', {})
74
+
75
+ total_processed_files = 0
76
+ for source_name in sources_to_load:
77
+ source_config = all_sources.get(source_name)
78
+ if not source_config:
79
+ logging.warning(f"Source '{source_name}' not found in configuration for company '{company.short_name}'. Skipping.")
80
+ continue
81
+
82
+ try:
83
+ logging.info(f"Processing source '{source_name}' for company '{company.short_name}'...")
84
+
85
+ # Combine the base connector configuration with the specific path from the source.
86
+ full_connector_config = base_connector_config.copy()
87
+ full_connector_config['path'] = source_config.get('path')
88
+
89
+ # Prepare the context for the callback function.
90
+ context = {
91
+ 'company': company,
92
+ 'metadata': source_config.get('metadata', {})
93
+ }
94
+
95
+ processor_config = FileProcessorConfig(
96
+ callback=self._file_processing_callback,
97
+ context=context,
98
+ filters=filters or {"filename_contains": ".pdf"},
99
+ continue_on_error=True,
100
+ echo=True
101
+ )
76
102
 
77
- try:
78
- if not filters:
79
- filters = {"filename_contains": ".pdf"}
80
-
81
- # Pasar metadata predefinida como parte del contexto al procesador
82
- # para que esté disponible en la función load_file_callback
83
- context = {
84
- 'company': company,
85
- 'metadata': {}
86
- }
87
-
88
- if predefined_metadata:
89
- context['metadata'] = predefined_metadata
90
-
91
- # config the processor
92
- processor_config = FileProcessorConfig(
93
- callback=self.load_file_callback,
94
- context=context,
95
- filters=filters,
96
- continue_on_error=True,
97
- echo=True
98
- )
103
+ connector = self.file_connector_factory.create(full_connector_config)
104
+ processor = FileProcessor(connector, processor_config)
105
+ processor.process_files()
99
106
 
100
- connector = self.file_connector_factory.create(connector_config)
101
- processor = FileProcessor(connector, processor_config)
107
+ total_processed_files += processor.processed_files
108
+ logging.info(f"Finished processing source '{source_name}'. Processed {processor.processed_files} files.")
102
109
 
103
- # process the files
104
- processor.process_files()
110
+ except Exception as e:
111
+ logging.exception(f"Failed to process source '{source_name}' for company '{company.short_name}': {e}")
105
112
 
106
- return processor.processed_files
107
- except Exception as e:
108
- logging.exception("Loading files error: %s", str(e))
109
- return {"error": str(e)}
113
+ return total_processed_files
110
114
 
111
- def load_file_callback(self, company: Company, filename: str, content: bytes, context: dict = {}):
112
- """
113
- Processes a single file: extracts text, generates metadata, and saves it
114
- to the relational database and the vector store.
115
- This method is intended to be used as the 'action' for FileProcessor.
115
+ def _get_base_connector_config(self, knowledge_base_config: dict) -> dict:
116
+ """Determines and returns the appropriate base connector configuration (dev vs prod)."""
117
+ connectors = knowledge_base_config.get('connectors', {})
118
+ env = os.getenv('FLASK_ENV', 'dev')
116
119
 
117
- Args:
118
- company (Company): The company associated with the file.
119
- filename (str): The name of the file.
120
- content (bytes): The binary content of the file.
121
- context (dict, optional): A context dictionary, may contain predefined metadata.
122
- """
120
+ if env == 'dev':
121
+ return connectors.get('development', {'type': 'local'})
122
+ else:
123
+ prod_config = connectors.get('production')
124
+ if not prod_config:
125
+ raise IAToolkitException(IAToolkitException.ErrorType.CONFIG_ERROR,
126
+ "Production connector configuration is missing.")
127
+ # The S3 connector itself is responsible for reading AWS environment variables.
128
+ # No need to pass credentials explicitly here.
129
+ return prod_config
123
130
 
131
+ def _file_processing_callback(self, company: Company, filename: str, content: bytes, context: dict = None):
132
+ """
133
+ Callback method to process a single file. It extracts text, merges metadata,
134
+ and saves the document to both relational and vector stores.
135
+ """
124
136
  if not company:
125
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
126
- f"Falta configurar empresa")
137
+ raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER, "Missing company object in callback.")
127
138
 
128
- # check if file exist in repositories
129
- if self.doc_repo.get(company_id=company.id,filename=filename):
139
+ if self.doc_repo.get(company_id=company.id, filename=filename):
140
+ logging.debug(f"File '{filename}' already exists for company '{company.id}'. Skipping.")
130
141
  return
131
142
 
132
143
  try:
133
- # extract text from the document
134
144
  document_content = self.doc_service.file_to_txt(filename, content)
135
- content_base64 = base64.b64encode(content).decode('utf-8')
136
145
 
137
- # generate metada based on the filename structure
138
- dynamic_metadata = self.dispatcher.get_metadata_from_filename(company_name=company.short_name, filename=filename)
146
+ # Get predefined metadata from the context passed by the processor.
147
+ predefined_metadata = context.get('metadata', {}) if context else {}
139
148
 
140
- # Obtener metadatos del contexto si existen
141
- context_metadata = context.get('metadata', {}).copy() if context else {}
142
-
143
- # Fusionar los metadatos. El orden de prioridad es:
144
- # 1. dynamic_metadata (tiene mayor prioridad)
145
- # 2. context_metadata (del parámetro context)
146
- # Los valores en dynamic_metadata tendrán precedencia sobre los de context_metadata
147
- final_meta = {**context_metadata, **dynamic_metadata}
148
-
149
- # save the file in the document repositories
149
+ # Save the document to the relational database.
150
+ session = self.doc_repo.session
150
151
  new_document = Document(
151
152
  company_id=company.id,
152
153
  filename=filename,
153
154
  content=document_content,
154
- content_b64=content_base64,
155
- meta=final_meta
155
+ content_b64=base64.b64encode(content).decode('utf-8'),
156
+ meta=predefined_metadata
156
157
  )
157
-
158
- # insert the document into the Database (without commit)
159
- session = self.doc_repo.session
160
158
  session.add(new_document)
161
- session.flush() # get the ID without commit
162
-
163
- # split the content, and create the chunk list
164
- splitted_content = self.splitter.split_text(document_content)
165
- chunk_list = [
166
- VSDoc(
167
- company_id=company.id,
168
- document_id=new_document.id,
169
- text=text
170
- )
171
- for text in splitted_content
172
- ]
159
+ session.flush() # Flush to get the new_document.id without committing.
173
160
 
174
- # save to vector store
175
- self.vector_store.add_document(chunk_list)
161
+ # Split into chunks and prepare for vector store.
162
+ chunks = self.splitter.split_text(document_content)
163
+ vs_docs = [VSDoc(company_id=company.id, document_id=new_document.id, text=text) for text in chunks]
176
164
 
177
- # confirm the transaction
178
- session.commit()
165
+ # Add document chunks to the vector store.
166
+ self.vector_store.add_document(company.short_name, vs_docs)
179
167
 
168
+ session.commit()
180
169
  return new_document
181
170
  except Exception as e:
182
171
  self.doc_repo.session.rollback()
183
-
184
- # if something fails, throw exception
185
- logging.exception("Error procesando el archivo %s: %s", filename, str(e))
172
+ logging.exception(f"Error processing file '{filename}': {e}")
186
173
  raise IAToolkitException(IAToolkitException.ErrorType.LOAD_DOCUMENT_ERROR,
187
- f"Error al procesar el archivo {filename}")
174
+ f"Error while processing file: {filename}")
@@ -4,6 +4,7 @@
4
4
  # IAToolkit is open source software.
5
5
 
6
6
  from iatoolkit.infra.mail_app import MailApp
7
+ from iatoolkit.services.i18n_service import I18nService
7
8
  from injector import inject
8
9
  from pathlib import Path
9
10
  from iatoolkit.common.exceptions import IAToolkitException
@@ -13,18 +14,22 @@ TEMP_DIR = Path("static/temp")
13
14
 
14
15
  class MailService:
15
16
  @inject
16
- def __init__(self, mail_app: MailApp):
17
+ def __init__(self,
18
+ mail_app: MailApp,
19
+ i18n_service: I18nService):
17
20
  self.mail_app = mail_app
21
+ self.i18n_service = i18n_service
22
+
18
23
 
19
24
  def _read_token_bytes(self, token: str) -> bytes:
20
25
  # Defensa simple contra path traversal
21
26
  if not token or "/" in token or "\\" in token or token.startswith("."):
22
27
  raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
23
- "attachment_token inválido")
28
+ "attachment_token invalid")
24
29
  path = TEMP_DIR / token
25
30
  if not path.is_file():
26
31
  raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
27
- f"Adjunto no encontrado: {token}")
32
+ f"attach file not found: {token}")
28
33
  return path.read_bytes()
29
34
 
30
35
  def send_mail(self, **kwargs):
@@ -59,4 +64,4 @@ class MailService:
59
64
  body=body,
60
65
  attachments=norm_attachments)
61
66
 
62
- return 'mail enviado'
67
+ return self.i18n_service.t('services.mail_sent')