iatoolkit 0.11.0__py3-none-any.whl → 0.71.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +2 -6
- iatoolkit/base_company.py +9 -29
- iatoolkit/cli_commands.py +1 -1
- iatoolkit/common/routes.py +96 -52
- iatoolkit/common/session_manager.py +2 -1
- iatoolkit/common/util.py +17 -27
- iatoolkit/company_registry.py +1 -2
- iatoolkit/iatoolkit.py +97 -53
- iatoolkit/infra/llm_client.py +15 -20
- iatoolkit/infra/llm_proxy.py +38 -10
- iatoolkit/infra/openai_adapter.py +1 -1
- iatoolkit/infra/redis_session_manager.py +48 -2
- iatoolkit/locales/en.yaml +167 -0
- iatoolkit/locales/es.yaml +163 -0
- iatoolkit/repositories/database_manager.py +23 -3
- iatoolkit/repositories/document_repo.py +1 -1
- iatoolkit/repositories/models.py +35 -10
- iatoolkit/repositories/profile_repo.py +3 -2
- iatoolkit/repositories/vs_repo.py +26 -20
- iatoolkit/services/auth_service.py +193 -0
- iatoolkit/services/branding_service.py +70 -25
- iatoolkit/services/company_context_service.py +155 -0
- iatoolkit/services/configuration_service.py +133 -0
- iatoolkit/services/dispatcher_service.py +80 -105
- iatoolkit/services/document_service.py +5 -2
- iatoolkit/services/embedding_service.py +146 -0
- iatoolkit/services/excel_service.py +30 -26
- iatoolkit/services/file_processor_service.py +4 -12
- iatoolkit/services/history_service.py +7 -16
- iatoolkit/services/i18n_service.py +104 -0
- iatoolkit/services/jwt_service.py +18 -29
- iatoolkit/services/language_service.py +83 -0
- iatoolkit/services/load_documents_service.py +100 -113
- iatoolkit/services/mail_service.py +9 -4
- iatoolkit/services/profile_service.py +152 -76
- iatoolkit/services/prompt_manager_service.py +20 -16
- iatoolkit/services/query_service.py +208 -96
- iatoolkit/services/search_service.py +11 -4
- iatoolkit/services/sql_service.py +57 -25
- iatoolkit/services/tasks_service.py +1 -1
- iatoolkit/services/user_feedback_service.py +72 -34
- iatoolkit/services/user_session_context_service.py +112 -54
- iatoolkit/static/images/fernando.jpeg +0 -0
- iatoolkit/static/js/chat_feedback_button.js +80 -0
- iatoolkit/static/js/chat_help_content.js +124 -0
- iatoolkit/static/js/chat_history_button.js +110 -0
- iatoolkit/static/js/chat_logout_button.js +36 -0
- iatoolkit/static/js/chat_main.js +135 -222
- iatoolkit/static/js/chat_onboarding_button.js +103 -0
- iatoolkit/static/js/chat_prompt_manager.js +94 -0
- iatoolkit/static/js/chat_reload_button.js +35 -0
- iatoolkit/static/styles/chat_iatoolkit.css +289 -210
- iatoolkit/static/styles/chat_modal.css +63 -77
- iatoolkit/static/styles/chat_public.css +107 -0
- iatoolkit/static/styles/landing_page.css +182 -0
- iatoolkit/static/styles/onboarding.css +176 -0
- iatoolkit/system_prompts/query_main.prompt +5 -22
- iatoolkit/templates/_company_header.html +20 -0
- iatoolkit/templates/_login_widget.html +42 -0
- iatoolkit/templates/base.html +40 -20
- iatoolkit/templates/change_password.html +57 -36
- iatoolkit/templates/chat.html +180 -86
- iatoolkit/templates/chat_modals.html +138 -68
- iatoolkit/templates/error.html +44 -8
- iatoolkit/templates/forgot_password.html +40 -23
- iatoolkit/templates/index.html +145 -0
- iatoolkit/templates/login_simulation.html +45 -0
- iatoolkit/templates/onboarding_shell.html +107 -0
- iatoolkit/templates/signup.html +63 -65
- iatoolkit/views/base_login_view.py +91 -0
- iatoolkit/views/change_password_view.py +56 -31
- iatoolkit/views/embedding_api_view.py +65 -0
- iatoolkit/views/external_login_view.py +61 -28
- iatoolkit/views/{file_store_view.py → file_store_api_view.py} +10 -3
- iatoolkit/views/forgot_password_view.py +27 -21
- iatoolkit/views/help_content_api_view.py +54 -0
- iatoolkit/views/history_api_view.py +56 -0
- iatoolkit/views/home_view.py +50 -23
- iatoolkit/views/index_view.py +14 -0
- iatoolkit/views/init_context_api_view.py +74 -0
- iatoolkit/views/llmquery_api_view.py +58 -0
- iatoolkit/views/login_simulation_view.py +93 -0
- iatoolkit/views/login_view.py +130 -37
- iatoolkit/views/logout_api_view.py +49 -0
- iatoolkit/views/profile_api_view.py +46 -0
- iatoolkit/views/{prompt_view.py → prompt_api_view.py} +10 -10
- iatoolkit/views/signup_view.py +41 -36
- iatoolkit/views/{tasks_view.py → tasks_api_view.py} +10 -36
- iatoolkit/views/tasks_review_api_view.py +55 -0
- iatoolkit/views/user_feedback_api_view.py +60 -0
- iatoolkit/views/verify_user_view.py +34 -29
- {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/METADATA +41 -23
- iatoolkit-0.71.2.dist-info/RECORD +122 -0
- iatoolkit-0.71.2.dist-info/licenses/LICENSE +21 -0
- iatoolkit/common/auth.py +0 -200
- iatoolkit/static/images/arrow_up.png +0 -0
- iatoolkit/static/images/diagrama_iatoolkit.jpg +0 -0
- iatoolkit/static/images/logo_clinica.png +0 -0
- iatoolkit/static/images/logo_iatoolkit.png +0 -0
- iatoolkit/static/images/logo_maxxa.png +0 -0
- iatoolkit/static/images/logo_notaria.png +0 -0
- iatoolkit/static/images/logo_tarjeta.png +0 -0
- iatoolkit/static/images/logo_umayor.png +0 -0
- iatoolkit/static/images/upload.png +0 -0
- iatoolkit/static/js/chat_feedback.js +0 -115
- iatoolkit/static/js/chat_history.js +0 -117
- iatoolkit/static/styles/chat_info.css +0 -53
- iatoolkit/templates/header.html +0 -31
- iatoolkit/templates/home.html +0 -199
- iatoolkit/templates/login.html +0 -43
- iatoolkit/templates/test.html +0 -9
- iatoolkit/views/chat_token_request_view.py +0 -98
- iatoolkit/views/chat_view.py +0 -58
- iatoolkit/views/download_file_view.py +0 -58
- iatoolkit/views/external_chat_login_view.py +0 -95
- iatoolkit/views/history_view.py +0 -57
- iatoolkit/views/llmquery_view.py +0 -65
- iatoolkit/views/tasks_review_view.py +0 -83
- iatoolkit/views/user_feedback_view.py +0 -74
- iatoolkit-0.11.0.dist-info/RECORD +0 -110
- {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/WHEEL +0 -0
- {iatoolkit-0.11.0.dist-info → iatoolkit-0.71.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# iatoolkit/services/i18n_service.py
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
from injector import inject, singleton
|
|
5
|
+
from iatoolkit.common.util import Utility
|
|
6
|
+
from iatoolkit.services.language_service import LanguageService
|
|
7
|
+
|
|
8
|
+
@singleton
|
|
9
|
+
class I18nService:
|
|
10
|
+
"""
|
|
11
|
+
Servicio centralizado para manejar la internacionalización (i18n).
|
|
12
|
+
Carga todas las traducciones desde archivos YAML en memoria al iniciar.
|
|
13
|
+
"""
|
|
14
|
+
FALLBACK_LANGUAGE = 'es'
|
|
15
|
+
|
|
16
|
+
@inject
|
|
17
|
+
def __init__(self, util: Utility, language_service: LanguageService):
|
|
18
|
+
self.util = util
|
|
19
|
+
self.language_service = language_service
|
|
20
|
+
|
|
21
|
+
self.translations = {}
|
|
22
|
+
self._load_translations()
|
|
23
|
+
|
|
24
|
+
def _load_translations(self):
|
|
25
|
+
"""
|
|
26
|
+
Carga todos los archivos .yaml del directorio 'locales' en memoria.
|
|
27
|
+
"""
|
|
28
|
+
locales_dir = os.path.join(os.path.dirname(__file__), '..', 'locales')
|
|
29
|
+
if not os.path.exists(locales_dir):
|
|
30
|
+
logging.error("Directory 'locales' not found.")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
for filename in os.listdir(locales_dir):
|
|
34
|
+
if filename.endswith('.yaml'):
|
|
35
|
+
lang_code = filename.split('.')[0]
|
|
36
|
+
filepath = os.path.join(locales_dir, filename)
|
|
37
|
+
try:
|
|
38
|
+
self.translations[lang_code] = self.util.load_schema_from_yaml(filepath)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logging.error(f"Error while loading the translation file {filepath}: {e}")
|
|
41
|
+
|
|
42
|
+
def _get_nested_key(self, lang: str, key: str):
|
|
43
|
+
"""
|
|
44
|
+
Obtiene un valor de un diccionario anidado usando una clave con puntos.
|
|
45
|
+
"""
|
|
46
|
+
data = self.translations.get(lang, {})
|
|
47
|
+
keys = key.split('.')
|
|
48
|
+
for k in keys:
|
|
49
|
+
if isinstance(data, dict) and k in data:
|
|
50
|
+
data = data[k]
|
|
51
|
+
else:
|
|
52
|
+
return None
|
|
53
|
+
return data
|
|
54
|
+
|
|
55
|
+
def get_translation_block(self, key: str, lang: str = None) -> dict:
|
|
56
|
+
"""
|
|
57
|
+
Gets a whole dictionary block from the translations.
|
|
58
|
+
Useful for passing a set of translations to JavaScript.
|
|
59
|
+
"""
|
|
60
|
+
if lang is None:
|
|
61
|
+
lang = self.language_service.get_current_language()
|
|
62
|
+
|
|
63
|
+
# 1. Try to get the block in the requested language
|
|
64
|
+
block = self._get_nested_key(lang, key)
|
|
65
|
+
|
|
66
|
+
# 2. If not found, try the fallback language
|
|
67
|
+
if not isinstance(block, dict):
|
|
68
|
+
block = self._get_nested_key(self.FALLBACK_LANGUAGE, key)
|
|
69
|
+
|
|
70
|
+
return block if isinstance(block, dict) else {}
|
|
71
|
+
|
|
72
|
+
def t(self, key: str, lang: str = None, **kwargs) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Gets the translation for a given key.
|
|
75
|
+
If 'lang' is provided, it's used. Otherwise, it's determined automatically.
|
|
76
|
+
"""
|
|
77
|
+
# If no specific language is requested, determine it from the current context.
|
|
78
|
+
if lang is None:
|
|
79
|
+
lang = self.language_service.get_current_language()
|
|
80
|
+
|
|
81
|
+
# 1. Attempt to get the translation in the requested language
|
|
82
|
+
message = self._get_nested_key(lang, key)
|
|
83
|
+
|
|
84
|
+
# 2. If not found, try the fallback language
|
|
85
|
+
if message is None and lang != self.FALLBACK_LANGUAGE:
|
|
86
|
+
logging.warning(
|
|
87
|
+
f"Translation key '{key}' not found for language '{lang}'. Attempting fallback to '{self.FALLBACK_LANGUAGE}'.")
|
|
88
|
+
message = self._get_nested_key(self.FALLBACK_LANGUAGE, key)
|
|
89
|
+
|
|
90
|
+
# 3. If still not found, return the key itself as a last resort
|
|
91
|
+
if message is None:
|
|
92
|
+
logging.error(
|
|
93
|
+
f"Translation key '{key}' not found, even in fallback '{self.FALLBACK_LANGUAGE}'.")
|
|
94
|
+
return key
|
|
95
|
+
|
|
96
|
+
# 4. If variables are provided, format the message
|
|
97
|
+
if kwargs:
|
|
98
|
+
try:
|
|
99
|
+
return message.format(**kwargs)
|
|
100
|
+
except KeyError as e:
|
|
101
|
+
logging.error(f"Error formatting key '{key}': missing variable {e} in arguments.")
|
|
102
|
+
return message
|
|
103
|
+
|
|
104
|
+
return message
|
|
@@ -20,20 +20,22 @@ class JWTService:
|
|
|
20
20
|
self.secret_key = app.config['JWT_SECRET_KEY']
|
|
21
21
|
self.algorithm = app.config['JWT_ALGORITHM']
|
|
22
22
|
except KeyError as e:
|
|
23
|
-
logging.error(f"
|
|
24
|
-
raise RuntimeError(f"
|
|
23
|
+
logging.error(f"missing JWT configuration: {e}.")
|
|
24
|
+
raise RuntimeError(f"missing JWT configuration variables: {e}")
|
|
25
25
|
|
|
26
26
|
def generate_chat_jwt(self,
|
|
27
|
-
company_id: int,
|
|
28
27
|
company_short_name: str,
|
|
29
|
-
|
|
28
|
+
user_identifier: str,
|
|
30
29
|
expires_delta_seconds: int) -> Optional[str]:
|
|
31
30
|
# generate a JWT for a chat session
|
|
32
31
|
try:
|
|
32
|
+
if not company_short_name or not user_identifier:
|
|
33
|
+
logging.error(f"Missing token ID: {company_short_name}/{user_identifier}")
|
|
34
|
+
return None
|
|
35
|
+
|
|
33
36
|
payload = {
|
|
34
|
-
'company_id': company_id,
|
|
35
37
|
'company_short_name': company_short_name,
|
|
36
|
-
'
|
|
38
|
+
'user_identifier': user_identifier,
|
|
37
39
|
'exp': time.time() + expires_delta_seconds,
|
|
38
40
|
'iat': time.time(),
|
|
39
41
|
'type': 'chat_session' # Identificador del tipo de token
|
|
@@ -41,10 +43,10 @@ class JWTService:
|
|
|
41
43
|
token = jwt.encode(payload, self.secret_key, algorithm=self.algorithm)
|
|
42
44
|
return token
|
|
43
45
|
except Exception as e:
|
|
44
|
-
logging.error(f"Error al generar JWT para
|
|
46
|
+
logging.error(f"Error al generar JWT para {company_short_name}/{user_identifier}: {e}")
|
|
45
47
|
return None
|
|
46
48
|
|
|
47
|
-
def validate_chat_jwt(self, token: str
|
|
49
|
+
def validate_chat_jwt(self, token: str) -> Optional[Dict[str, Any]]:
|
|
48
50
|
"""
|
|
49
51
|
Valida un JWT de sesión de chat.
|
|
50
52
|
Retorna el payload decodificado si es válido y coincide con la empresa, o None.
|
|
@@ -56,36 +58,23 @@ class JWTService:
|
|
|
56
58
|
|
|
57
59
|
# Validaciones adicionales
|
|
58
60
|
if payload.get('type') != 'chat_session':
|
|
59
|
-
logging.warning(f"
|
|
61
|
+
logging.warning(f"Invalid JWT type '{payload.get('type')}'")
|
|
60
62
|
return None
|
|
61
63
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
f"Esperado: {expected_company_short_name}, Obtenido: {payload.get('company_short_name')}"
|
|
66
|
-
)
|
|
64
|
+
# user_identifier debe estar presente
|
|
65
|
+
if not payload.get('user_identifier'):
|
|
66
|
+
logging.warning(f"missing user_identifier in JWT payload.")
|
|
67
67
|
return None
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
logging.warning(f"Validación JWT fallida: external_user_id ausente o vacío.")
|
|
69
|
+
if not payload.get('company_short_name'):
|
|
70
|
+
logging.warning(f"missing company_short_name in JWT payload.")
|
|
72
71
|
return None
|
|
73
72
|
|
|
74
|
-
# company_id debe estar presente
|
|
75
|
-
if 'company_id' not in payload or not isinstance(payload['company_id'], int):
|
|
76
|
-
logging.warning(f"Validación JWT fallida: company_id ausente o tipo incorrecto.")
|
|
77
|
-
return None
|
|
78
|
-
|
|
79
|
-
logging.debug(
|
|
80
|
-
f"JWT validado exitosamente para company: {payload.get('company_short_name')}, user: {payload.get('external_user_id')}")
|
|
81
73
|
return payload
|
|
82
74
|
|
|
83
|
-
except jwt.ExpiredSignatureError:
|
|
84
|
-
logging.info(f"Validación JWT fallida: token expirado para {expected_company_short_name}")
|
|
85
|
-
return None
|
|
86
75
|
except jwt.InvalidTokenError as e:
|
|
87
|
-
logging.warning(f"
|
|
76
|
+
logging.warning(f"Invalid JWT token:: {e}")
|
|
88
77
|
return None
|
|
89
78
|
except Exception as e:
|
|
90
|
-
logging.error(f"
|
|
79
|
+
logging.error(f"unexpected error during JWT validation: {e}")
|
|
91
80
|
return None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# iatoolkit/services/language_service.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from injector import inject, singleton
|
|
5
|
+
from flask import g, request
|
|
6
|
+
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
7
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
8
|
+
from iatoolkit.common.session_manager import SessionManager
|
|
9
|
+
|
|
10
|
+
@singleton
|
|
11
|
+
class LanguageService:
|
|
12
|
+
"""
|
|
13
|
+
Determines the correct language for the current request
|
|
14
|
+
based on a defined priority order (session, URL, etc.)
|
|
15
|
+
and caches it in the Flask 'g' object for the request's lifecycle.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
FALLBACK_LANGUAGE = 'es'
|
|
19
|
+
|
|
20
|
+
@inject
|
|
21
|
+
def __init__(self,
|
|
22
|
+
config_service: ConfigurationService,
|
|
23
|
+
profile_repo: ProfileRepo):
|
|
24
|
+
self.config_service = config_service
|
|
25
|
+
self.profile_repo = profile_repo
|
|
26
|
+
|
|
27
|
+
def _get_company_short_name(self) -> str | None:
|
|
28
|
+
"""
|
|
29
|
+
Gets the company_short_name from the current request context.
|
|
30
|
+
This handles different scenarios like web sessions, public URLs, and API calls.
|
|
31
|
+
|
|
32
|
+
Priority Order:
|
|
33
|
+
1. Flask Session (for logged-in web users).
|
|
34
|
+
2. URL rule variable (for public pages and API endpoints).
|
|
35
|
+
"""
|
|
36
|
+
# 1. Check session for logged-in users
|
|
37
|
+
company_short_name = SessionManager.get('company_short_name')
|
|
38
|
+
if company_short_name:
|
|
39
|
+
return company_short_name
|
|
40
|
+
|
|
41
|
+
# 2. Check URL arguments (e.g., /<company_short_name>/login)
|
|
42
|
+
# This covers public pages and most API calls.
|
|
43
|
+
if request.view_args and 'company_short_name' in request.view_args:
|
|
44
|
+
return request.view_args['company_short_name']
|
|
45
|
+
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
def get_current_language(self) -> str:
|
|
49
|
+
"""
|
|
50
|
+
Determines and caches the language for the current request using a priority order:
|
|
51
|
+
1. User's preference (from their profile).
|
|
52
|
+
2. Company's default language.
|
|
53
|
+
3. System-wide fallback language ('es').
|
|
54
|
+
"""
|
|
55
|
+
if 'lang' in g:
|
|
56
|
+
return g.lang
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
# Priority 1: User's preferred language
|
|
60
|
+
user_identifier = SessionManager.get('user_identifier')
|
|
61
|
+
if user_identifier:
|
|
62
|
+
user = self.profile_repo.get_user_by_email(user_identifier)
|
|
63
|
+
if user and user.preferred_language:
|
|
64
|
+
logging.debug(f"Language determined by user preference: {user.preferred_language}")
|
|
65
|
+
g.lang = user.preferred_language
|
|
66
|
+
return g.lang
|
|
67
|
+
|
|
68
|
+
# Priority 2: Company's default language
|
|
69
|
+
company_short_name = self._get_company_short_name()
|
|
70
|
+
if company_short_name:
|
|
71
|
+
locale = self.config_service.get_configuration(company_short_name, 'locale')
|
|
72
|
+
if locale:
|
|
73
|
+
company_language = locale.split('_')[0]
|
|
74
|
+
g.lang = company_language
|
|
75
|
+
return g.lang
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logging.info(f"Could not determine language, falling back to default. Reason: {e}")
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
# Priority 3: System-wide fallback
|
|
81
|
+
logging.info(f"Language determined by system fallback: {self.FALLBACK_LANGUAGE}")
|
|
82
|
+
g.lang = self.FALLBACK_LANGUAGE
|
|
83
|
+
return g.lang
|
|
@@ -1,50 +1,41 @@
|
|
|
1
1
|
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
2
|
# Product: IAToolkit
|
|
3
|
-
#
|
|
4
|
-
# IAToolkit is open source software.
|
|
5
3
|
|
|
6
4
|
from iatoolkit.repositories.vs_repo import VSRepo
|
|
7
5
|
from iatoolkit.repositories.document_repo import DocumentRepo
|
|
8
|
-
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
9
|
-
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
10
|
-
|
|
11
6
|
from iatoolkit.repositories.models import Document, VSDoc, Company
|
|
12
7
|
from iatoolkit.services.document_service import DocumentService
|
|
8
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
13
9
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
14
10
|
from iatoolkit.infra.connectors.file_connector_factory import FileConnectorFactory
|
|
15
11
|
from iatoolkit.services.file_processor_service import FileProcessorConfig, FileProcessor
|
|
16
|
-
from iatoolkit.services.dispatcher_service import Dispatcher
|
|
17
12
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
18
13
|
import logging
|
|
19
14
|
import base64
|
|
20
|
-
from injector import inject
|
|
21
|
-
|
|
15
|
+
from injector import inject, singleton
|
|
16
|
+
import os
|
|
22
17
|
|
|
23
18
|
|
|
19
|
+
@singleton
|
|
24
20
|
class LoadDocumentsService:
|
|
25
21
|
"""
|
|
26
22
|
Orchestrates the process of loading, processing, and storing documents
|
|
27
|
-
from various sources
|
|
23
|
+
from various sources defined in the company's configuration.
|
|
28
24
|
"""
|
|
29
25
|
@inject
|
|
30
26
|
def __init__(self,
|
|
27
|
+
config_service: ConfigurationService,
|
|
31
28
|
file_connector_factory: FileConnectorFactory,
|
|
32
29
|
doc_service: DocumentService,
|
|
33
30
|
doc_repo: DocumentRepo,
|
|
34
31
|
vector_store: VSRepo,
|
|
35
|
-
profile_repo: ProfileRepo,
|
|
36
|
-
dispatcher: Dispatcher,
|
|
37
|
-
llm_query_repo: LLMQueryRepo
|
|
38
32
|
):
|
|
33
|
+
self.config_service = config_service
|
|
39
34
|
self.doc_service = doc_service
|
|
40
35
|
self.doc_repo = doc_repo
|
|
41
|
-
self.profile_repo = profile_repo
|
|
42
|
-
self.llm_query_repo = llm_query_repo
|
|
43
36
|
self.vector_store = vector_store
|
|
44
37
|
self.file_connector_factory = file_connector_factory
|
|
45
|
-
self.dispatcher = dispatcher
|
|
46
38
|
|
|
47
|
-
# lower warnings
|
|
48
39
|
logging.getLogger().setLevel(logging.ERROR)
|
|
49
40
|
|
|
50
41
|
self.splitter = RecursiveCharacterTextSplitter(
|
|
@@ -53,135 +44,131 @@ class LoadDocumentsService:
|
|
|
53
44
|
separators=["\n\n", "\n", "."]
|
|
54
45
|
)
|
|
55
46
|
|
|
56
|
-
def
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
filters: Dict = None):
|
|
47
|
+
def load_sources(self,
|
|
48
|
+
company: Company,
|
|
49
|
+
sources_to_load: list[str] = None,
|
|
50
|
+
filters: dict = None) -> int:
|
|
61
51
|
"""
|
|
62
|
-
Loads
|
|
52
|
+
Loads documents from one or more configured sources for a company.
|
|
63
53
|
|
|
64
54
|
Args:
|
|
65
55
|
company (Company): The company to load files for.
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
filters (
|
|
56
|
+
sources_to_load (list[str], optional): A list of specific source names to load.
|
|
57
|
+
If None, all configured sources will be loaded.
|
|
58
|
+
filters (dict, optional): Filters to apply when listing files (e.g., file extension).
|
|
69
59
|
|
|
70
60
|
Returns:
|
|
71
|
-
int: The number of processed files.
|
|
61
|
+
int: The total number of processed files.
|
|
72
62
|
"""
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
63
|
+
knowledge_base_config = self.config_service.get_configuration(company.short_name, 'knowledge_base')
|
|
64
|
+
if not knowledge_base_config:
|
|
65
|
+
raise IAToolkitException(IAToolkitException.ErrorType.CONFIG_ERROR,
|
|
66
|
+
f"Missing 'knowledge_base' configuration for company '{company.short_name}'.")
|
|
67
|
+
|
|
68
|
+
if not sources_to_load:
|
|
69
|
+
raise IAToolkitException(IAToolkitException.ErrorType.PARAM_NOT_FILLED,
|
|
70
|
+
f"Missing sources to load for company '{company.short_name}'.")
|
|
71
|
+
|
|
72
|
+
base_connector_config = self._get_base_connector_config(knowledge_base_config)
|
|
73
|
+
all_sources = knowledge_base_config.get('document_sources', {})
|
|
74
|
+
|
|
75
|
+
total_processed_files = 0
|
|
76
|
+
for source_name in sources_to_load:
|
|
77
|
+
source_config = all_sources.get(source_name)
|
|
78
|
+
if not source_config:
|
|
79
|
+
logging.warning(f"Source '{source_name}' not found in configuration for company '{company.short_name}'. Skipping.")
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
logging.info(f"Processing source '{source_name}' for company '{company.short_name}'...")
|
|
84
|
+
|
|
85
|
+
# Combine the base connector configuration with the specific path from the source.
|
|
86
|
+
full_connector_config = base_connector_config.copy()
|
|
87
|
+
full_connector_config['path'] = source_config.get('path')
|
|
88
|
+
|
|
89
|
+
# Prepare the context for the callback function.
|
|
90
|
+
context = {
|
|
91
|
+
'company': company,
|
|
92
|
+
'metadata': source_config.get('metadata', {})
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
processor_config = FileProcessorConfig(
|
|
96
|
+
callback=self._file_processing_callback,
|
|
97
|
+
context=context,
|
|
98
|
+
filters=filters or {"filename_contains": ".pdf"},
|
|
99
|
+
continue_on_error=True,
|
|
100
|
+
echo=True
|
|
101
|
+
)
|
|
76
102
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Pasar metadata predefinida como parte del contexto al procesador
|
|
82
|
-
# para que esté disponible en la función load_file_callback
|
|
83
|
-
context = {
|
|
84
|
-
'company': company,
|
|
85
|
-
'metadata': {}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
if predefined_metadata:
|
|
89
|
-
context['metadata'] = predefined_metadata
|
|
90
|
-
|
|
91
|
-
# config the processor
|
|
92
|
-
processor_config = FileProcessorConfig(
|
|
93
|
-
callback=self.load_file_callback,
|
|
94
|
-
context=context,
|
|
95
|
-
filters=filters,
|
|
96
|
-
continue_on_error=True,
|
|
97
|
-
echo=True
|
|
98
|
-
)
|
|
103
|
+
connector = self.file_connector_factory.create(full_connector_config)
|
|
104
|
+
processor = FileProcessor(connector, processor_config)
|
|
105
|
+
processor.process_files()
|
|
99
106
|
|
|
100
|
-
|
|
101
|
-
|
|
107
|
+
total_processed_files += processor.processed_files
|
|
108
|
+
logging.info(f"Finished processing source '{source_name}'. Processed {processor.processed_files} files.")
|
|
102
109
|
|
|
103
|
-
|
|
104
|
-
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logging.exception(f"Failed to process source '{source_name}' for company '{company.short_name}': {e}")
|
|
105
112
|
|
|
106
|
-
|
|
107
|
-
except Exception as e:
|
|
108
|
-
logging.exception("Loading files error: %s", str(e))
|
|
109
|
-
return {"error": str(e)}
|
|
113
|
+
return total_processed_files
|
|
110
114
|
|
|
111
|
-
def
|
|
112
|
-
"""
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
This method is intended to be used as the 'action' for FileProcessor.
|
|
115
|
+
def _get_base_connector_config(self, knowledge_base_config: dict) -> dict:
|
|
116
|
+
"""Determines and returns the appropriate base connector configuration (dev vs prod)."""
|
|
117
|
+
connectors = knowledge_base_config.get('connectors', {})
|
|
118
|
+
env = os.getenv('FLASK_ENV', 'dev')
|
|
116
119
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
if env == 'dev':
|
|
121
|
+
return connectors.get('development', {'type': 'local'})
|
|
122
|
+
else:
|
|
123
|
+
prod_config = connectors.get('production')
|
|
124
|
+
if not prod_config:
|
|
125
|
+
raise IAToolkitException(IAToolkitException.ErrorType.CONFIG_ERROR,
|
|
126
|
+
"Production connector configuration is missing.")
|
|
127
|
+
# The S3 connector itself is responsible for reading AWS environment variables.
|
|
128
|
+
# No need to pass credentials explicitly here.
|
|
129
|
+
return prod_config
|
|
123
130
|
|
|
131
|
+
def _file_processing_callback(self, company: Company, filename: str, content: bytes, context: dict = None):
|
|
132
|
+
"""
|
|
133
|
+
Callback method to process a single file. It extracts text, merges metadata,
|
|
134
|
+
and saves the document to both relational and vector stores.
|
|
135
|
+
"""
|
|
124
136
|
if not company:
|
|
125
|
-
raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
|
|
126
|
-
f"Falta configurar empresa")
|
|
137
|
+
raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER, "Missing company object in callback.")
|
|
127
138
|
|
|
128
|
-
|
|
129
|
-
|
|
139
|
+
if self.doc_repo.get(company_id=company.id, filename=filename):
|
|
140
|
+
logging.debug(f"File '{filename}' already exists for company '{company.id}'. Skipping.")
|
|
130
141
|
return
|
|
131
142
|
|
|
132
143
|
try:
|
|
133
|
-
# extract text from the document
|
|
134
144
|
document_content = self.doc_service.file_to_txt(filename, content)
|
|
135
|
-
content_base64 = base64.b64encode(content).decode('utf-8')
|
|
136
145
|
|
|
137
|
-
#
|
|
138
|
-
|
|
146
|
+
# Get predefined metadata from the context passed by the processor.
|
|
147
|
+
predefined_metadata = context.get('metadata', {}) if context else {}
|
|
139
148
|
|
|
140
|
-
#
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
# Fusionar los metadatos. El orden de prioridad es:
|
|
144
|
-
# 1. dynamic_metadata (tiene mayor prioridad)
|
|
145
|
-
# 2. context_metadata (del parámetro context)
|
|
146
|
-
# Los valores en dynamic_metadata tendrán precedencia sobre los de context_metadata
|
|
147
|
-
final_meta = {**context_metadata, **dynamic_metadata}
|
|
148
|
-
|
|
149
|
-
# save the file in the document repositories
|
|
149
|
+
# Save the document to the relational database.
|
|
150
|
+
session = self.doc_repo.session
|
|
150
151
|
new_document = Document(
|
|
151
152
|
company_id=company.id,
|
|
152
153
|
filename=filename,
|
|
153
154
|
content=document_content,
|
|
154
|
-
content_b64=
|
|
155
|
-
meta=
|
|
155
|
+
content_b64=base64.b64encode(content).decode('utf-8'),
|
|
156
|
+
meta=predefined_metadata
|
|
156
157
|
)
|
|
157
|
-
|
|
158
|
-
# insert the document into the Database (without commit)
|
|
159
|
-
session = self.doc_repo.session
|
|
160
158
|
session.add(new_document)
|
|
161
|
-
session.flush()
|
|
162
|
-
|
|
163
|
-
# split the content, and create the chunk list
|
|
164
|
-
splitted_content = self.splitter.split_text(document_content)
|
|
165
|
-
chunk_list = [
|
|
166
|
-
VSDoc(
|
|
167
|
-
company_id=company.id,
|
|
168
|
-
document_id=new_document.id,
|
|
169
|
-
text=text
|
|
170
|
-
)
|
|
171
|
-
for text in splitted_content
|
|
172
|
-
]
|
|
159
|
+
session.flush() # Flush to get the new_document.id without committing.
|
|
173
160
|
|
|
174
|
-
#
|
|
175
|
-
self.
|
|
161
|
+
# Split into chunks and prepare for vector store.
|
|
162
|
+
chunks = self.splitter.split_text(document_content)
|
|
163
|
+
vs_docs = [VSDoc(company_id=company.id, document_id=new_document.id, text=text) for text in chunks]
|
|
176
164
|
|
|
177
|
-
#
|
|
178
|
-
|
|
165
|
+
# Add document chunks to the vector store.
|
|
166
|
+
self.vector_store.add_document(company.short_name, vs_docs)
|
|
179
167
|
|
|
168
|
+
session.commit()
|
|
180
169
|
return new_document
|
|
181
170
|
except Exception as e:
|
|
182
171
|
self.doc_repo.session.rollback()
|
|
183
|
-
|
|
184
|
-
# if something fails, throw exception
|
|
185
|
-
logging.exception("Error procesando el archivo %s: %s", filename, str(e))
|
|
172
|
+
logging.exception(f"Error processing file '{filename}': {e}")
|
|
186
173
|
raise IAToolkitException(IAToolkitException.ErrorType.LOAD_DOCUMENT_ERROR,
|
|
187
|
-
|
|
174
|
+
f"Error while processing file: {filename}")
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# IAToolkit is open source software.
|
|
5
5
|
|
|
6
6
|
from iatoolkit.infra.mail_app import MailApp
|
|
7
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
7
8
|
from injector import inject
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
@@ -13,18 +14,22 @@ TEMP_DIR = Path("static/temp")
|
|
|
13
14
|
|
|
14
15
|
class MailService:
|
|
15
16
|
@inject
|
|
16
|
-
def __init__(self,
|
|
17
|
+
def __init__(self,
|
|
18
|
+
mail_app: MailApp,
|
|
19
|
+
i18n_service: I18nService):
|
|
17
20
|
self.mail_app = mail_app
|
|
21
|
+
self.i18n_service = i18n_service
|
|
22
|
+
|
|
18
23
|
|
|
19
24
|
def _read_token_bytes(self, token: str) -> bytes:
|
|
20
25
|
# Defensa simple contra path traversal
|
|
21
26
|
if not token or "/" in token or "\\" in token or token.startswith("."):
|
|
22
27
|
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
23
|
-
"attachment_token
|
|
28
|
+
"attachment_token invalid")
|
|
24
29
|
path = TEMP_DIR / token
|
|
25
30
|
if not path.is_file():
|
|
26
31
|
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
27
|
-
f"
|
|
32
|
+
f"attach file not found: {token}")
|
|
28
33
|
return path.read_bytes()
|
|
29
34
|
|
|
30
35
|
def send_mail(self, **kwargs):
|
|
@@ -59,4 +64,4 @@ class MailService:
|
|
|
59
64
|
body=body,
|
|
60
65
|
attachments=norm_attachments)
|
|
61
66
|
|
|
62
|
-
return '
|
|
67
|
+
return self.i18n_service.t('services.mail_sent')
|