iatoolkit 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iatoolkit might be problematic. Click here for more details.

@@ -0,0 +1,45 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from injector import inject
7
+ from repositories.llm_query_repo import LLMQueryRepo
8
+ from repositories.profile_repo import ProfileRepo
9
+ from common.util import Utility
10
+
11
+
12
+ class HistoryService:
13
+ @inject
14
+ def __init__(self, llm_query_repo: LLMQueryRepo,
15
+ profile_repo: ProfileRepo,
16
+ util: Utility):
17
+ self.llm_query_repo = llm_query_repo
18
+ self.profile_repo = profile_repo
19
+ self.util = util
20
+
21
+ def get_history(self,
22
+ company_short_name: str,
23
+ external_user_id: str = None,
24
+ local_user_id: int = 0) -> dict:
25
+ try:
26
+ user_identifier = self.util.resolve_user_identifier(external_user_id, local_user_id)
27
+ if not user_identifier:
28
+ return {'error': "No se pudo resolver el identificador del usuario"}
29
+
30
+ # validate company
31
+ company = self.profile_repo.get_company_by_short_name(company_short_name)
32
+ if not company:
33
+ return {'error': f'No existe la empresa: {company_short_name}'}
34
+
35
+ history = self.llm_query_repo.get_history(company, user_identifier)
36
+
37
+ if not history:
38
+ return {'error': 'No se pudo obtener el historial'}
39
+
40
+ history_list = [query.to_dict() for query in history]
41
+
42
+ return {'message': 'Historial obtenido correctamente', 'history': history_list}
43
+
44
+ except Exception as e:
45
+ return {'error': str(e)}
@@ -0,0 +1,91 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ import jwt
7
+ import time
8
+ import logging
9
+ from injector import singleton, inject
10
+ from typing import Optional, Dict, Any
11
+ from flask import Flask
12
+
13
+
14
+ @singleton
15
+ class JWTService:
16
+ @inject
17
+ def __init__(self, app: Flask):
18
+ # Acceder a la configuración directamente desde app.config
19
+ try:
20
+ self.secret_key = app.config['JWT_SECRET_KEY']
21
+ self.algorithm = app.config['JWT_ALGORITHM']
22
+ except KeyError as e:
23
+ logging.error(f"Configuración JWT faltante en app.config: {e}. JWTService no funcionará correctamente.")
24
+ raise RuntimeError(f"Configuración JWT esencial faltante: {e}")
25
+
26
+ def generate_chat_jwt(self,
27
+ company_id: int,
28
+ company_short_name: str,
29
+ external_user_id: str,
30
+ expires_delta_seconds: int) -> Optional[str]:
31
+ # generate a JWT for a chat session
32
+ try:
33
+ payload = {
34
+ 'company_id': company_id,
35
+ 'company_short_name': company_short_name,
36
+ 'external_user_id': external_user_id,
37
+ 'exp': time.time() + expires_delta_seconds,
38
+ 'iat': time.time(),
39
+ 'type': 'chat_session' # Identificador del tipo de token
40
+ }
41
+ token = jwt.encode(payload, self.secret_key, algorithm=self.algorithm)
42
+ return token
43
+ except Exception as e:
44
+ logging.error(f"Error al generar JWT para company {company_id}, user {external_user_id}: {e}")
45
+ return None
46
+
47
+ def validate_chat_jwt(self, token: str, expected_company_short_name: str) -> Optional[Dict[str, Any]]:
48
+ """
49
+ Valida un JWT de sesión de chat.
50
+ Retorna el payload decodificado si es válido y coincide con la empresa, o None.
51
+ """
52
+ if not token:
53
+ return None
54
+ try:
55
+ payload = jwt.decode(token, self.secret_key, algorithms=[self.algorithm])
56
+
57
+ # Validaciones adicionales
58
+ if payload.get('type') != 'chat_session':
59
+ logging.warning(f"Validación JWT fallida: tipo incorrecto '{payload.get('type')}'")
60
+ return None
61
+
62
+ if payload.get('company_short_name') != expected_company_short_name:
63
+ logging.warning(
64
+ f"Validación JWT fallida: company_short_name no coincide. "
65
+ f"Esperado: {expected_company_short_name}, Obtenido: {payload.get('company_short_name')}"
66
+ )
67
+ return None
68
+
69
+ # external_user_id debe estar presente
70
+ if 'external_user_id' not in payload or not payload['external_user_id']:
71
+ logging.warning(f"Validación JWT fallida: external_user_id ausente o vacío.")
72
+ return None
73
+
74
+ # company_id debe estar presente
75
+ if 'company_id' not in payload or not isinstance(payload['company_id'], int):
76
+ logging.warning(f"Validación JWT fallida: company_id ausente o tipo incorrecto.")
77
+ return None
78
+
79
+ logging.debug(
80
+ f"JWT validado exitosamente para company: {payload.get('company_short_name')}, user: {payload.get('external_user_id')}")
81
+ return payload
82
+
83
+ except jwt.ExpiredSignatureError:
84
+ logging.info(f"Validación JWT fallida: token expirado para {expected_company_short_name}")
85
+ return None
86
+ except jwt.InvalidTokenError as e:
87
+ logging.warning(f"Validación JWT fallida: token inválido para {expected_company_short_name}. Error: {e}")
88
+ return None
89
+ except Exception as e:
90
+ logging.error(f"Error inesperado durante validación de JWT para {expected_company_short_name}: {e}")
91
+ return None
@@ -0,0 +1,212 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from repositories.vs_repo import VSRepo
7
+ from repositories.document_repo import DocumentRepo
8
+ from repositories.profile_repo import ProfileRepo
9
+ from repositories.llm_query_repo import LLMQueryRepo
10
+ from repositories.models import Document, VSDoc, Company
11
+ from services.document_service import DocumentService
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from infra.connectors.file_connector_factory import FileConnectorFactory
14
+ from services.file_processor_service import FileProcessorConfig, FileProcessor
15
+ from services.dispatcher_service import Dispatcher
16
+ from common.exceptions import IAToolkitException
17
+ import logging
18
+ import base64
19
+ from injector import inject
20
+ from typing import Dict
21
+
22
+
23
+ class LoadDocumentsService:
24
+ @inject
25
+ def __init__(self,
26
+ file_connector_factory: FileConnectorFactory,
27
+ doc_service: DocumentService,
28
+ doc_repo: DocumentRepo,
29
+ vector_store: VSRepo,
30
+ profile_repo: ProfileRepo,
31
+ dispatcher: Dispatcher,
32
+ llm_query_repo: LLMQueryRepo
33
+ ):
34
+ self.doc_service = doc_service
35
+ self.doc_repo = doc_repo
36
+ self.profile_repo = profile_repo
37
+ self.llm_query_repo = llm_query_repo
38
+ self.vector_store = vector_store
39
+ self.file_connector_factory = file_connector_factory
40
+ self.dispatcher = dispatcher
41
+ self.company = None
42
+
43
+ # lower warnings
44
+ logging.getLogger().setLevel(logging.ERROR)
45
+
46
+ self.splitter = RecursiveCharacterTextSplitter(
47
+ chunk_size=1000,
48
+ chunk_overlap=100,
49
+ separators=["\n\n", "\n", "."]
50
+ )
51
+
52
+ # load the files for all of the companies.
53
+ def load(self, doc_type: str = None):
54
+ # doc_type: an optional document_type for loading
55
+ files_loaded = 0
56
+ companies = self.profile_repo.get_companies()
57
+
58
+ for company in companies:
59
+ load_config = company.parameters.get('load', {})
60
+ if not load_config:
61
+ continue
62
+
63
+ print(f"Cargando datos de ** {company.short_name} **")
64
+ self.company = company
65
+
66
+ # Si hay configuraciones de tipos de documento específicos
67
+ doc_types_config = load_config.get('document_types', {})
68
+
69
+ if doc_types_config and len(doc_types_config) > 0:
70
+ # Si se especificó un tipo de documento, cargar solo ese tipo
71
+ if doc_type and doc_type in doc_types_config:
72
+ files_loaded += self._load_document_type(company, doc_type, doc_types_config[doc_type])
73
+ # Si no se especificó, cargar todos los tipos configurados
74
+ elif not doc_type:
75
+ for type_name, type_config in doc_types_config.items():
76
+ files_loaded += self._load_document_type(company, type_name, type_config)
77
+ else:
78
+ # Comportamiento anterior: usar la configuración general
79
+ connector = load_config.get('connector', {})
80
+ if not connector:
81
+ raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
82
+ f"Falta configurar conector en empresa {company.short_name}")
83
+
84
+ files_loaded += self.load_data_source(connector)
85
+
86
+ return {'message': f'{files_loaded} files processed'}
87
+
88
+ def _load_document_type(self, company: Company, doc_type_name: str, type_config: Dict) -> int:
89
+ # load specific document_types for a company
90
+ connector = type_config.get('connector')
91
+ if not connector:
92
+ logging.warning(f"Falta configurar conector para tipo {doc_type_name} en empresa {company.short_name}")
93
+ raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
94
+ f"Falta configurar conector para tipo {doc_type_name} en empresa {company.short_name}")
95
+
96
+ # get the metadata for this connector
97
+ predefined_metadata = type_config.get('metadata', {})
98
+
99
+ # config specific filters
100
+ filters = type_config.get('filters', {"filename_contains": ".pdf"})
101
+
102
+ return self.load_data_source(connector, predefined_metadata, filters)
103
+
104
+ def load_data_source(self, connector_config: Dict, predefined_metadata: Dict = None, filters: Dict = None):
105
+ """
106
+ Carga archivos desde una fuente de datos usando un conector.
107
+
108
+ Args:
109
+ connector_config: Configuración del conector
110
+ predefined_metadata: Metadatos predefinidos para todos los documentos de esta fuente
111
+ filters: Filtros específicos para esta carga
112
+
113
+ Returns:
114
+ int o dict: Número de archivos procesados o diccionario de error
115
+ """
116
+ try:
117
+ # Si no se proporcionaron filtros, usar el predeterminado
118
+ if not filters:
119
+ filters = {"filename_contains": ".pdf"}
120
+
121
+ # Pasar metadata predefinida como parte del contexto al procesador
122
+ # para que esté disponible en la función load_file
123
+ extra_context = {}
124
+ if predefined_metadata:
125
+ extra_context['metadata'] = predefined_metadata
126
+
127
+ # config the processor
128
+ processor_config = FileProcessorConfig(
129
+ context=extra_context,
130
+ filters=filters,
131
+ action=self.load_file,
132
+ continue_on_error=True,
133
+ echo=True
134
+ )
135
+
136
+ connector = self.file_connector_factory.create(connector_config)
137
+ processor = FileProcessor(connector, processor_config)
138
+
139
+ # process the files
140
+ processor.process_files()
141
+
142
+ return processor.processed_files
143
+ except Exception as e:
144
+ logging.exception("Loading files error: %s", str(e))
145
+ return {"error": str(e)}
146
+
147
+ # load an individual filename
148
+ # this method is set up on the FileProcessorConfig object
149
+ def load_file(self, filename: str, content: bytes, context: dict = {}, company: Company = None):
150
+ if not company:
151
+ company = self.company
152
+
153
+ # check if file exist in repositories
154
+ if self.doc_repo.get(company=company,filename=filename):
155
+ return
156
+
157
+ try:
158
+ # extract text from the document
159
+ document_content = self.doc_service.file_to_txt(filename, content)
160
+ content_base64 = base64.b64encode(content).decode('utf-8')
161
+
162
+ # generate metada based on the filename structure
163
+ dynamic_metadata = self.dispatcher.get_metadata_from_filename(company_name=company.short_name, filename=filename)
164
+
165
+ # Obtener metadatos del contexto si existen
166
+ context_metadata = context.get('metadata', {}).copy() if context else {}
167
+
168
+ # Fusionar los metadatos. El orden de prioridad es:
169
+ # 1. dynamic_metadata (tiene mayor prioridad)
170
+ # 2. context_metadata (del parámetro context)
171
+ # Los valores en dynamic_metadata tendrán precedencia sobre los de context_metadata
172
+ final_meta = {**context_metadata, **dynamic_metadata}
173
+
174
+ # save the file in the document repositories
175
+ new_document = Document(
176
+ company_id=company.id,
177
+ filename=filename,
178
+ content=document_content,
179
+ content_b64=content_base64,
180
+ meta=final_meta
181
+ )
182
+
183
+ # insert the document into the Database (without commit)
184
+ session = self.doc_repo.session
185
+ session.add(new_document)
186
+ session.flush() # get the ID without commit
187
+
188
+ # split the content, and create the chunk list
189
+ splitted_content = self.splitter.split_text(document_content)
190
+ chunk_list = [
191
+ VSDoc(
192
+ company_id=company.id,
193
+ document_id=new_document.id,
194
+ text=text
195
+ )
196
+ for text in splitted_content
197
+ ]
198
+
199
+ # save to vector store
200
+ self.vector_store.add_document(chunk_list)
201
+
202
+ # confirm the transaction
203
+ session.commit()
204
+
205
+ return new_document
206
+ except Exception as e:
207
+ self.doc_repo.session.rollback()
208
+
209
+ # if something fails, throw exception
210
+ logging.exception("Error procesando el archivo %s: %s", filename, str(e))
211
+ raise IAToolkitException(IAToolkitException.ErrorType.LOAD_DOCUMENT_ERROR,
212
+ f"Error al procesar el archivo {filename}")
@@ -0,0 +1,62 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from infra.mail_app import MailApp
7
+ from injector import inject
8
+ from pathlib import Path
9
+ from common.exceptions import IAToolkitException
10
+ import base64
11
+
12
+ TEMP_DIR = Path("static/temp")
13
+
14
+ class MailService:
15
+ @inject
16
+ def __init__(self, mail_app: MailApp):
17
+ self.mail_app = mail_app
18
+
19
+ def _read_token_bytes(self, token: str) -> bytes:
20
+ # Defensa simple contra path traversal
21
+ if not token or "/" in token or "\\" in token or token.startswith("."):
22
+ raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
23
+ "attachment_token inválido")
24
+ path = TEMP_DIR / token
25
+ if not path.is_file():
26
+ raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
27
+ f"Adjunto no encontrado: {token}")
28
+ return path.read_bytes()
29
+
30
+ def send_mail(self, **kwargs):
31
+ from_email = kwargs.get('from_email', 'iatoolkit@iatoolkit.com')
32
+ recipient = kwargs.get('recipient')
33
+ subject = kwargs.get('subject')
34
+ body = kwargs.get('body')
35
+ attachments = kwargs.get('attachments')
36
+
37
+ # Normalizar a payload de MailApp (name + base64 content)
38
+ norm_attachments = []
39
+ for a in attachments or []:
40
+ if a.get("attachment_token"):
41
+ raw = self._read_token_bytes(a["attachment_token"])
42
+ norm_attachments.append({
43
+ "filename": a["filename"],
44
+ "content": base64.b64encode(raw).decode("utf-8"),
45
+ })
46
+ else:
47
+ # asumo que ya viene un base64
48
+ norm_attachments.append({
49
+ "filename": a["filename"],
50
+ "content": a["content"]
51
+ })
52
+
53
+ self.sender = {"email": from_email, "name": "IAToolkit"}
54
+
55
+ response = self.mail_app.send_email(
56
+ sender=self.sender,
57
+ to=recipient,
58
+ subject=subject,
59
+ body=body,
60
+ attachments=norm_attachments)
61
+
62
+ return 'mail enviado'