iatoolkit 0.71.4__py3-none-any.whl → 1.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iatoolkit/__init__.py +19 -7
- iatoolkit/base_company.py +1 -71
- iatoolkit/cli_commands.py +9 -21
- iatoolkit/common/exceptions.py +2 -0
- iatoolkit/common/interfaces/__init__.py +0 -0
- iatoolkit/common/interfaces/asset_storage.py +34 -0
- iatoolkit/common/interfaces/database_provider.py +38 -0
- iatoolkit/common/model_registry.py +159 -0
- iatoolkit/common/routes.py +53 -32
- iatoolkit/common/util.py +17 -12
- iatoolkit/company_registry.py +55 -14
- iatoolkit/{iatoolkit.py → core.py} +102 -72
- iatoolkit/infra/{mail_app.py → brevo_mail_app.py} +15 -37
- iatoolkit/infra/llm_providers/__init__.py +0 -0
- iatoolkit/infra/llm_providers/deepseek_adapter.py +278 -0
- iatoolkit/infra/{gemini_adapter.py → llm_providers/gemini_adapter.py} +11 -17
- iatoolkit/infra/{openai_adapter.py → llm_providers/openai_adapter.py} +41 -7
- iatoolkit/infra/llm_proxy.py +235 -134
- iatoolkit/infra/llm_response.py +5 -0
- iatoolkit/locales/en.yaml +134 -4
- iatoolkit/locales/es.yaml +293 -162
- iatoolkit/repositories/database_manager.py +92 -22
- iatoolkit/repositories/document_repo.py +7 -0
- iatoolkit/repositories/filesystem_asset_repository.py +36 -0
- iatoolkit/repositories/llm_query_repo.py +36 -22
- iatoolkit/repositories/models.py +86 -95
- iatoolkit/repositories/profile_repo.py +64 -13
- iatoolkit/repositories/vs_repo.py +31 -28
- iatoolkit/services/auth_service.py +1 -1
- iatoolkit/services/branding_service.py +1 -1
- iatoolkit/services/company_context_service.py +96 -39
- iatoolkit/services/configuration_service.py +329 -67
- iatoolkit/services/dispatcher_service.py +51 -227
- iatoolkit/services/document_service.py +10 -1
- iatoolkit/services/embedding_service.py +9 -6
- iatoolkit/services/excel_service.py +50 -2
- iatoolkit/services/file_processor_service.py +0 -5
- iatoolkit/services/history_manager_service.py +208 -0
- iatoolkit/services/jwt_service.py +1 -1
- iatoolkit/services/knowledge_base_service.py +412 -0
- iatoolkit/services/language_service.py +8 -2
- iatoolkit/services/license_service.py +82 -0
- iatoolkit/{infra/llm_client.py → services/llm_client_service.py} +42 -29
- iatoolkit/services/load_documents_service.py +18 -47
- iatoolkit/services/mail_service.py +171 -25
- iatoolkit/services/profile_service.py +69 -36
- iatoolkit/services/{prompt_manager_service.py → prompt_service.py} +136 -25
- iatoolkit/services/query_service.py +229 -203
- iatoolkit/services/sql_service.py +116 -34
- iatoolkit/services/tool_service.py +246 -0
- iatoolkit/services/user_feedback_service.py +18 -6
- iatoolkit/services/user_session_context_service.py +121 -51
- iatoolkit/static/images/iatoolkit_core.png +0 -0
- iatoolkit/static/images/iatoolkit_logo.png +0 -0
- iatoolkit/static/js/chat_feedback_button.js +1 -1
- iatoolkit/static/js/chat_help_content.js +4 -4
- iatoolkit/static/js/chat_main.js +61 -9
- iatoolkit/static/js/chat_model_selector.js +227 -0
- iatoolkit/static/js/chat_onboarding_button.js +1 -1
- iatoolkit/static/js/chat_reload_button.js +4 -1
- iatoolkit/static/styles/chat_iatoolkit.css +59 -3
- iatoolkit/static/styles/chat_public.css +28 -0
- iatoolkit/static/styles/documents.css +598 -0
- iatoolkit/static/styles/landing_page.css +223 -7
- iatoolkit/static/styles/llm_output.css +34 -1
- iatoolkit/system_prompts/__init__.py +0 -0
- iatoolkit/system_prompts/query_main.prompt +28 -3
- iatoolkit/system_prompts/sql_rules.prompt +47 -12
- iatoolkit/templates/_company_header.html +30 -5
- iatoolkit/templates/_login_widget.html +3 -3
- iatoolkit/templates/base.html +13 -0
- iatoolkit/templates/chat.html +45 -3
- iatoolkit/templates/forgot_password.html +3 -2
- iatoolkit/templates/onboarding_shell.html +1 -2
- iatoolkit/templates/signup.html +3 -0
- iatoolkit/views/base_login_view.py +8 -3
- iatoolkit/views/change_password_view.py +1 -1
- iatoolkit/views/chat_view.py +76 -0
- iatoolkit/views/forgot_password_view.py +9 -4
- iatoolkit/views/history_api_view.py +3 -3
- iatoolkit/views/home_view.py +4 -2
- iatoolkit/views/init_context_api_view.py +1 -1
- iatoolkit/views/llmquery_api_view.py +4 -3
- iatoolkit/views/load_company_configuration_api_view.py +49 -0
- iatoolkit/views/{file_store_api_view.py → load_document_api_view.py} +15 -11
- iatoolkit/views/login_view.py +25 -8
- iatoolkit/views/logout_api_view.py +10 -2
- iatoolkit/views/prompt_api_view.py +1 -1
- iatoolkit/views/rag_api_view.py +216 -0
- iatoolkit/views/root_redirect_view.py +22 -0
- iatoolkit/views/signup_view.py +12 -4
- iatoolkit/views/static_page_view.py +27 -0
- iatoolkit/views/users_api_view.py +33 -0
- iatoolkit/views/verify_user_view.py +1 -1
- iatoolkit-1.4.2.dist-info/METADATA +268 -0
- iatoolkit-1.4.2.dist-info/RECORD +133 -0
- iatoolkit-1.4.2.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
- iatoolkit/repositories/tasks_repo.py +0 -52
- iatoolkit/services/history_service.py +0 -37
- iatoolkit/services/search_service.py +0 -55
- iatoolkit/services/tasks_service.py +0 -188
- iatoolkit/templates/about.html +0 -13
- iatoolkit/templates/index.html +0 -145
- iatoolkit/templates/login_simulation.html +0 -45
- iatoolkit/views/external_login_view.py +0 -73
- iatoolkit/views/index_view.py +0 -14
- iatoolkit/views/login_simulation_view.py +0 -93
- iatoolkit/views/tasks_api_view.py +0 -72
- iatoolkit/views/tasks_review_api_view.py +0 -55
- iatoolkit-0.71.4.dist-info/METADATA +0 -276
- iatoolkit-0.71.4.dist-info/RECORD +0 -122
- {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/WHEEL +0 -0
- {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/licenses/LICENSE +0 -0
- {iatoolkit-0.71.4.dist-info → iatoolkit-1.4.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
import jwt
|
|
7
|
+
import os
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from iatoolkit.common.exceptions import IAToolkitException
|
|
11
|
+
from injector import inject, singleton
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@singleton
|
|
15
|
+
class LicenseService:
|
|
16
|
+
"""
|
|
17
|
+
Manages system restrictions and features based on a license (JWT).
|
|
18
|
+
If no license or an invalid license is provided, Community Edition limits apply.
|
|
19
|
+
"""
|
|
20
|
+
@inject
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.limits = self._load_limits()
|
|
23
|
+
|
|
24
|
+
def _load_limits(self):
|
|
25
|
+
# 1. Define default limits (Community Edition)
|
|
26
|
+
default_limits = {
|
|
27
|
+
"license_type": "Community Edition",
|
|
28
|
+
"plan": "Open Source (Community Edition)",
|
|
29
|
+
"max_companies": 1,
|
|
30
|
+
"max_tools": 3,
|
|
31
|
+
"features": {
|
|
32
|
+
"multi_tenant": False,
|
|
33
|
+
"rag_advanced": False,
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return default_limits
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# --- Information Getters ---
|
|
40
|
+
def get_license_type(self) -> str:
|
|
41
|
+
return self.limits.get("license_type", "Community Edition")
|
|
42
|
+
|
|
43
|
+
def get_plan_name(self) -> str:
|
|
44
|
+
return self.limits.get("plan", "Unknown")
|
|
45
|
+
|
|
46
|
+
def get_max_companies(self) -> int:
|
|
47
|
+
return self.limits.get("max_companies", 1)
|
|
48
|
+
|
|
49
|
+
def get_max_tools_per_company(self) -> int:
|
|
50
|
+
return self.limits.get("max_tools", 3)
|
|
51
|
+
|
|
52
|
+
def get_license_info(self) -> str:
|
|
53
|
+
return f"Plan: {self.get_plan_name()}, Companies: {self.get_max_companies()}, Tools: {self.get_max_tools_per_company()}"
|
|
54
|
+
|
|
55
|
+
# --- Restriction Validators ---
|
|
56
|
+
|
|
57
|
+
def validate_company_limit(self, current_count: int):
|
|
58
|
+
"""Raises exception if the limit of active companies is exceeded."""
|
|
59
|
+
limit = self.get_max_companies()
|
|
60
|
+
# -1 means unlimited
|
|
61
|
+
if limit != -1 and current_count > limit:
|
|
62
|
+
raise IAToolkitException(
|
|
63
|
+
IAToolkitException.ErrorType.PERMISSION,
|
|
64
|
+
f"Company limit ({limit}) reached for plan '{self.get_plan_name()}'."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def validate_tool_config_limit(self, tools_config: list):
|
|
69
|
+
"""Validates a configuration list before processing it."""
|
|
70
|
+
limit = self.get_max_tools_per_company()
|
|
71
|
+
if limit != -1 and len(tools_config) > limit:
|
|
72
|
+
raise IAToolkitException(
|
|
73
|
+
IAToolkitException.ErrorType.PERMISSION,
|
|
74
|
+
f"Configuration defines {len(tools_config)} tools, but limit is {limit}."
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# --- Feature Gating Validators ---
|
|
78
|
+
|
|
79
|
+
def has_feature(self, feature_key: str) -> bool:
|
|
80
|
+
"""Checks if a specific feature is enabled in the license."""
|
|
81
|
+
features = self.limits.get("features", {})
|
|
82
|
+
return features.get(feature_key, False)
|
|
@@ -8,6 +8,7 @@ from iatoolkit.repositories.models import Company, LLMQuery
|
|
|
8
8
|
from iatoolkit.repositories.llm_query_repo import LLMQueryRepo
|
|
9
9
|
from sqlalchemy.exc import SQLAlchemyError, OperationalError
|
|
10
10
|
from iatoolkit.common.util import Utility
|
|
11
|
+
from iatoolkit.common.model_registry import ModelRegistry
|
|
11
12
|
from injector import inject
|
|
12
13
|
import time
|
|
13
14
|
import markdown2
|
|
@@ -30,11 +31,13 @@ class llmClient:
|
|
|
30
31
|
@inject
|
|
31
32
|
def __init__(self,
|
|
32
33
|
llmquery_repo: LLMQueryRepo,
|
|
33
|
-
|
|
34
|
+
llm_proxy: LLMProxy,
|
|
35
|
+
model_registry: ModelRegistry,
|
|
34
36
|
util: Utility
|
|
35
37
|
):
|
|
36
38
|
self.llmquery_repo = llmquery_repo
|
|
37
|
-
self.
|
|
39
|
+
self.llm_proxy = llm_proxy
|
|
40
|
+
self.model_registry = model_registry
|
|
38
41
|
self.util = util
|
|
39
42
|
self._dispatcher = None # Cache for the lazy-loaded dispatcher
|
|
40
43
|
|
|
@@ -73,33 +76,31 @@ class llmClient:
|
|
|
73
76
|
response = None
|
|
74
77
|
sql_retry_count = 0
|
|
75
78
|
force_tool_name = None
|
|
76
|
-
reasoning = {}
|
|
77
79
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
80
|
+
# Resolve per-model defaults and apply overrides (without mutating inputs).
|
|
81
|
+
request_params = self.model_registry.resolve_request_params(model=model, text=text)
|
|
82
|
+
text_payload = request_params["text"]
|
|
83
|
+
reasoning = request_params["reasoning"]
|
|
81
84
|
|
|
82
85
|
try:
|
|
83
86
|
start_time = time.time()
|
|
84
|
-
logging.info(f"calling llm model '{model}' with {self.count_tokens(context)} tokens...")
|
|
87
|
+
logging.info(f"calling llm model '{model}' with {self.count_tokens(context, context_history)} tokens...")
|
|
85
88
|
|
|
86
|
-
#
|
|
87
|
-
llm_proxy = self.llm_proxy_factory.create_for_company(company)
|
|
88
|
-
|
|
89
|
-
# here is the first call to the LLM
|
|
89
|
+
# this is the first call to the LLM on the iteration
|
|
90
90
|
try:
|
|
91
91
|
input_messages = [{
|
|
92
92
|
"role": "user",
|
|
93
93
|
"content": context
|
|
94
94
|
}]
|
|
95
95
|
|
|
96
|
-
response = llm_proxy.create_response(
|
|
96
|
+
response = self.llm_proxy.create_response(
|
|
97
|
+
company_short_name=company.short_name,
|
|
97
98
|
model=model,
|
|
99
|
+
input=input_messages,
|
|
98
100
|
previous_response_id=previous_response_id,
|
|
99
101
|
context_history=context_history,
|
|
100
|
-
input=input_messages,
|
|
101
102
|
tools=tools,
|
|
102
|
-
text=
|
|
103
|
+
text=text_payload,
|
|
103
104
|
reasoning=reasoning,
|
|
104
105
|
)
|
|
105
106
|
stats = self.get_stats(response)
|
|
@@ -127,12 +128,18 @@ class llmClient:
|
|
|
127
128
|
# execute the function call through the dispatcher
|
|
128
129
|
fcall_time = time.time()
|
|
129
130
|
function_name = tool_call.name
|
|
130
|
-
|
|
131
|
-
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
args = json.loads(tool_call.arguments)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logging.error(f"[Dispatcher] json.loads failed: {e}")
|
|
136
|
+
raise
|
|
137
|
+
logging.debug(f"[Dispatcher] Parsed args = {args}")
|
|
138
|
+
|
|
132
139
|
try:
|
|
133
140
|
result = self.dispatcher.dispatch(
|
|
134
141
|
company_short_name=company.short_name,
|
|
135
|
-
|
|
142
|
+
function_name=function_name,
|
|
136
143
|
**args
|
|
137
144
|
)
|
|
138
145
|
force_tool_name = None
|
|
@@ -160,6 +167,7 @@ class llmClient:
|
|
|
160
167
|
input_messages.append({
|
|
161
168
|
"type": "function_call_output",
|
|
162
169
|
"call_id": tool_call.call_id,
|
|
170
|
+
"status": "completed",
|
|
163
171
|
"output": str(result)
|
|
164
172
|
})
|
|
165
173
|
function_calls = True
|
|
@@ -170,17 +178,18 @@ class llmClient:
|
|
|
170
178
|
f_calls.append(f_call_identity)
|
|
171
179
|
f_call_time += elapsed
|
|
172
180
|
|
|
173
|
-
logging.info(f"end execution {function_name} in {elapsed:.1f} secs.")
|
|
181
|
+
logging.info(f"[{company.short_name}] end execution of tool: {function_name} in {elapsed:.1f} secs.")
|
|
174
182
|
|
|
175
183
|
if not function_calls:
|
|
176
|
-
break # no function
|
|
184
|
+
break # no more function calls, the answer to send back to llm
|
|
177
185
|
|
|
178
186
|
# send results back to the LLM
|
|
179
187
|
tool_choice_value = "auto"
|
|
180
188
|
if force_tool_name:
|
|
181
189
|
tool_choice_value = "required"
|
|
182
190
|
|
|
183
|
-
response = llm_proxy.create_response(
|
|
191
|
+
response = self.llm_proxy.create_response(
|
|
192
|
+
company_short_name=company.short_name,
|
|
184
193
|
model=model,
|
|
185
194
|
input=input_messages,
|
|
186
195
|
previous_response_id=response.id,
|
|
@@ -188,7 +197,7 @@ class llmClient:
|
|
|
188
197
|
reasoning=reasoning,
|
|
189
198
|
tool_choice=tool_choice_value,
|
|
190
199
|
tools=tools,
|
|
191
|
-
text=
|
|
200
|
+
text=text_payload,
|
|
192
201
|
)
|
|
193
202
|
stats_fcall = self.add_stats(stats_fcall, self.get_stats(response))
|
|
194
203
|
|
|
@@ -200,9 +209,11 @@ class llmClient:
|
|
|
200
209
|
# decode the LLM response
|
|
201
210
|
decoded_response = self.decode_response(response)
|
|
202
211
|
|
|
212
|
+
# Extract reasoning from the final response object
|
|
213
|
+
final_reasoning = getattr(response, 'reasoning_content', '')
|
|
214
|
+
|
|
203
215
|
# save the query and response
|
|
204
216
|
query = LLMQuery(user_identifier=user_identifier,
|
|
205
|
-
task_id=0,
|
|
206
217
|
company_id=company.id,
|
|
207
218
|
query=question,
|
|
208
219
|
output=decoded_response.get('answer', ''),
|
|
@@ -226,6 +237,8 @@ class llmClient:
|
|
|
226
237
|
'aditional_data': decoded_response.get('aditional_data', {}),
|
|
227
238
|
'response_id': response.id,
|
|
228
239
|
'query_id': query.id,
|
|
240
|
+
'model': model,
|
|
241
|
+
'reasoning_content': final_reasoning,
|
|
229
242
|
}
|
|
230
243
|
except SQLAlchemyError as db_error:
|
|
231
244
|
# rollback
|
|
@@ -240,11 +253,10 @@ class llmClient:
|
|
|
240
253
|
|
|
241
254
|
# log the error in the llm_query table
|
|
242
255
|
query = LLMQuery(user_identifier=user_identifier,
|
|
243
|
-
task_id=0,
|
|
244
256
|
company_id=company.id,
|
|
245
257
|
query=question,
|
|
246
258
|
output=error_message,
|
|
247
|
-
response=
|
|
259
|
+
response={},
|
|
248
260
|
valid_response=False,
|
|
249
261
|
function_calls=f_calls,
|
|
250
262
|
)
|
|
@@ -265,14 +277,15 @@ class llmClient:
|
|
|
265
277
|
|
|
266
278
|
logging.info(f"initializing model '{model}' with company context: {self.count_tokens(company_base_context)} tokens...")
|
|
267
279
|
|
|
268
|
-
llm_proxy = self.llm_proxy_factory.create_for_company(company)
|
|
269
280
|
try:
|
|
270
|
-
response = llm_proxy.create_response(
|
|
281
|
+
response = self.llm_proxy.create_response(
|
|
282
|
+
company_short_name=company.short_name,
|
|
271
283
|
model=model,
|
|
272
284
|
input=[{
|
|
273
285
|
"role": "system",
|
|
274
286
|
"content": company_base_context
|
|
275
|
-
}]
|
|
287
|
+
}],
|
|
288
|
+
|
|
276
289
|
)
|
|
277
290
|
|
|
278
291
|
except Exception as e:
|
|
@@ -419,7 +432,7 @@ class llmClient:
|
|
|
419
432
|
html_answer = markdown2.markdown(answer).replace("\n", "")
|
|
420
433
|
return html_answer
|
|
421
434
|
|
|
422
|
-
def count_tokens(self, text):
|
|
435
|
+
def count_tokens(self, text, history = []):
|
|
423
436
|
# Codifica el texto y cuenta la cantidad de tokens
|
|
424
|
-
tokens = self.encoding.encode(text)
|
|
437
|
+
tokens = self.encoding.encode(text + json.dumps(history))
|
|
425
438
|
return len(tokens)
|
|
@@ -1,17 +1,13 @@
|
|
|
1
1
|
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
2
|
# Product: IAToolkit
|
|
3
3
|
|
|
4
|
-
from iatoolkit.repositories.
|
|
5
|
-
from iatoolkit.repositories.document_repo import DocumentRepo
|
|
6
|
-
from iatoolkit.repositories.models import Document, VSDoc, Company
|
|
7
|
-
from iatoolkit.services.document_service import DocumentService
|
|
4
|
+
from iatoolkit.repositories.models import Company
|
|
8
5
|
from iatoolkit.services.configuration_service import ConfigurationService
|
|
9
|
-
from
|
|
6
|
+
from iatoolkit.services.knowledge_base_service import KnowledgeBaseService
|
|
10
7
|
from iatoolkit.infra.connectors.file_connector_factory import FileConnectorFactory
|
|
11
8
|
from iatoolkit.services.file_processor_service import FileProcessorConfig, FileProcessor
|
|
12
9
|
from iatoolkit.common.exceptions import IAToolkitException
|
|
13
10
|
import logging
|
|
14
|
-
import base64
|
|
15
11
|
from injector import inject, singleton
|
|
16
12
|
import os
|
|
17
13
|
|
|
@@ -19,31 +15,21 @@ import os
|
|
|
19
15
|
@singleton
|
|
20
16
|
class LoadDocumentsService:
|
|
21
17
|
"""
|
|
22
|
-
Orchestrates the
|
|
23
|
-
|
|
18
|
+
Orchestrates the discovery and loading of documents from configured sources.
|
|
19
|
+
Delegates the processing and ingestion logic to KnowledgeBaseService.
|
|
24
20
|
"""
|
|
25
21
|
@inject
|
|
26
22
|
def __init__(self,
|
|
27
23
|
config_service: ConfigurationService,
|
|
28
24
|
file_connector_factory: FileConnectorFactory,
|
|
29
|
-
|
|
30
|
-
doc_repo: DocumentRepo,
|
|
31
|
-
vector_store: VSRepo,
|
|
25
|
+
knowledge_base_service: KnowledgeBaseService
|
|
32
26
|
):
|
|
33
27
|
self.config_service = config_service
|
|
34
|
-
self.doc_service = doc_service
|
|
35
|
-
self.doc_repo = doc_repo
|
|
36
|
-
self.vector_store = vector_store
|
|
37
28
|
self.file_connector_factory = file_connector_factory
|
|
29
|
+
self.knowledge_base_service = knowledge_base_service
|
|
38
30
|
|
|
39
31
|
logging.getLogger().setLevel(logging.ERROR)
|
|
40
32
|
|
|
41
|
-
self.splitter = RecursiveCharacterTextSplitter(
|
|
42
|
-
chunk_size=1000,
|
|
43
|
-
chunk_overlap=100,
|
|
44
|
-
separators=["\n\n", "\n", "."]
|
|
45
|
-
)
|
|
46
|
-
|
|
47
33
|
def load_sources(self,
|
|
48
34
|
company: Company,
|
|
49
35
|
sources_to_load: list[str] = None,
|
|
@@ -67,7 +53,7 @@ class LoadDocumentsService:
|
|
|
67
53
|
|
|
68
54
|
if not sources_to_load:
|
|
69
55
|
raise IAToolkitException(IAToolkitException.ErrorType.PARAM_NOT_FILLED,
|
|
70
|
-
|
|
56
|
+
f"Missing sources to load for company '{company.short_name}'.")
|
|
71
57
|
|
|
72
58
|
base_connector_config = self._get_base_connector_config(knowledge_base_config)
|
|
73
59
|
all_sources = knowledge_base_config.get('document_sources', {})
|
|
@@ -89,6 +75,7 @@ class LoadDocumentsService:
|
|
|
89
75
|
# Prepare the context for the callback function.
|
|
90
76
|
context = {
|
|
91
77
|
'company': company,
|
|
78
|
+
'collection': source_config.get('metadata', {}).get('collection'),
|
|
92
79
|
'metadata': source_config.get('metadata', {})
|
|
93
80
|
}
|
|
94
81
|
|
|
@@ -130,45 +117,29 @@ class LoadDocumentsService:
|
|
|
130
117
|
|
|
131
118
|
def _file_processing_callback(self, company: Company, filename: str, content: bytes, context: dict = None):
|
|
132
119
|
"""
|
|
133
|
-
Callback method to process a single file.
|
|
134
|
-
|
|
120
|
+
Callback method to process a single file.
|
|
121
|
+
Delegates the actual ingestion (storage, vectorization) to KnowledgeBaseService.
|
|
135
122
|
"""
|
|
136
123
|
if not company:
|
|
137
124
|
raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER, "Missing company object in callback.")
|
|
138
125
|
|
|
139
|
-
if self.doc_repo.get(company_id=company.id, filename=filename):
|
|
140
|
-
logging.debug(f"File '{filename}' already exists for company '{company.id}'. Skipping.")
|
|
141
|
-
return
|
|
142
|
-
|
|
143
126
|
try:
|
|
144
|
-
document_content = self.doc_service.file_to_txt(filename, content)
|
|
145
|
-
|
|
146
127
|
# Get predefined metadata from the context passed by the processor.
|
|
147
128
|
predefined_metadata = context.get('metadata', {}) if context else {}
|
|
148
129
|
|
|
149
|
-
#
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
company_id=company.id,
|
|
130
|
+
# Delegate heavy lifting to KnowledgeBaseService
|
|
131
|
+
new_document = self.knowledge_base_service.ingest_document_sync(
|
|
132
|
+
company=company,
|
|
153
133
|
filename=filename,
|
|
154
|
-
content=
|
|
155
|
-
|
|
156
|
-
|
|
134
|
+
content=content,
|
|
135
|
+
collection=predefined_metadata.get('collection'),
|
|
136
|
+
metadata=predefined_metadata
|
|
157
137
|
)
|
|
158
|
-
session.add(new_document)
|
|
159
|
-
session.flush() # Flush to get the new_document.id without committing.
|
|
160
138
|
|
|
161
|
-
# Split into chunks and prepare for vector store.
|
|
162
|
-
chunks = self.splitter.split_text(document_content)
|
|
163
|
-
vs_docs = [VSDoc(company_id=company.id, document_id=new_document.id, text=text) for text in chunks]
|
|
164
|
-
|
|
165
|
-
# Add document chunks to the vector store.
|
|
166
|
-
self.vector_store.add_document(company.short_name, vs_docs)
|
|
167
|
-
|
|
168
|
-
session.commit()
|
|
169
139
|
return new_document
|
|
140
|
+
|
|
170
141
|
except Exception as e:
|
|
171
|
-
|
|
142
|
+
# We log here but re-raise to let FileProcessor handle the error counting/continue logic
|
|
172
143
|
logging.exception(f"Error processing file '{filename}': {e}")
|
|
173
144
|
raise IAToolkitException(IAToolkitException.ErrorType.LOAD_DOCUMENT_ERROR,
|
|
174
145
|
f"Error while processing file: {filename}")
|
|
@@ -3,43 +3,40 @@
|
|
|
3
3
|
#
|
|
4
4
|
# IAToolkit is open source software.
|
|
5
5
|
|
|
6
|
-
from iatoolkit.
|
|
6
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
7
7
|
from iatoolkit.services.i18n_service import I18nService
|
|
8
|
+
from iatoolkit.infra.brevo_mail_app import BrevoMailApp
|
|
8
9
|
from injector import inject
|
|
9
10
|
from pathlib import Path
|
|
10
|
-
from iatoolkit.common.exceptions import IAToolkitException
|
|
11
11
|
import base64
|
|
12
|
+
import os
|
|
13
|
+
import smtplib
|
|
14
|
+
from email.message import EmailMessage
|
|
15
|
+
from iatoolkit.common.exceptions import IAToolkitException
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
TEMP_DIR = Path("static/temp")
|
|
14
19
|
|
|
15
20
|
class MailService:
|
|
16
21
|
@inject
|
|
17
22
|
def __init__(self,
|
|
18
|
-
|
|
19
|
-
|
|
23
|
+
config_service: ConfigurationService,
|
|
24
|
+
mail_app: BrevoMailApp,
|
|
25
|
+
i18n_service: I18nService,
|
|
26
|
+
brevo_mail_app: BrevoMailApp):
|
|
20
27
|
self.mail_app = mail_app
|
|
28
|
+
self.config_service = config_service
|
|
21
29
|
self.i18n_service = i18n_service
|
|
30
|
+
self.brevo_mail_app = brevo_mail_app
|
|
22
31
|
|
|
23
32
|
|
|
24
|
-
def
|
|
25
|
-
# Defensa simple contra path traversal
|
|
26
|
-
if not token or "/" in token or "\\" in token or token.startswith("."):
|
|
27
|
-
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
28
|
-
"attachment_token invalid")
|
|
29
|
-
path = TEMP_DIR / token
|
|
30
|
-
if not path.is_file():
|
|
31
|
-
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
32
|
-
f"attach file not found: {token}")
|
|
33
|
-
return path.read_bytes()
|
|
34
|
-
|
|
35
|
-
def send_mail(self, **kwargs):
|
|
36
|
-
from_email = kwargs.get('from_email', 'iatoolkit@iatoolkit.com')
|
|
33
|
+
def send_mail(self, company_short_name: str, **kwargs):
|
|
37
34
|
recipient = kwargs.get('recipient')
|
|
38
35
|
subject = kwargs.get('subject')
|
|
39
36
|
body = kwargs.get('body')
|
|
40
37
|
attachments = kwargs.get('attachments')
|
|
41
38
|
|
|
42
|
-
# Normalizar a payload de
|
|
39
|
+
# Normalizar a payload de BrevoMailApp (name + base64 content)
|
|
43
40
|
norm_attachments = []
|
|
44
41
|
for a in attachments or []:
|
|
45
42
|
if a.get("attachment_token"):
|
|
@@ -55,13 +52,162 @@ class MailService:
|
|
|
55
52
|
"content": a["content"]
|
|
56
53
|
})
|
|
57
54
|
|
|
58
|
-
|
|
55
|
+
# build provider configuration from company.yaml
|
|
56
|
+
provider, provider_config = self._build_provider_config(company_short_name)
|
|
57
|
+
|
|
58
|
+
# define the email sender
|
|
59
|
+
sender = {
|
|
60
|
+
"email": provider_config.get("sender_email"),
|
|
61
|
+
"name": provider_config.get("sender_name"),
|
|
62
|
+
}
|
|
59
63
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
# select provider and send the email through it
|
|
65
|
+
if provider == "brevo_mail":
|
|
66
|
+
response = self.brevo_mail_app.send_email(
|
|
67
|
+
provider_config=provider_config,
|
|
68
|
+
sender=sender,
|
|
69
|
+
to=recipient,
|
|
70
|
+
subject=subject,
|
|
71
|
+
body=body,
|
|
72
|
+
attachments=norm_attachments
|
|
73
|
+
)
|
|
74
|
+
elif provider == "smtplib":
|
|
75
|
+
response = self._send_with_smtplib(
|
|
76
|
+
provider_config=provider_config,
|
|
77
|
+
sender=sender,
|
|
78
|
+
recipient=recipient,
|
|
79
|
+
subject=subject,
|
|
80
|
+
body=body,
|
|
81
|
+
attachments=norm_attachments,
|
|
82
|
+
)
|
|
83
|
+
response = None
|
|
84
|
+
else:
|
|
85
|
+
raise IAToolkitException(
|
|
86
|
+
IAToolkitException.ErrorType.MAIL_ERROR,
|
|
87
|
+
f"Unknown mail provider '{provider}'"
|
|
88
|
+
)
|
|
66
89
|
|
|
67
90
|
return self.i18n_service.t('services.mail_sent')
|
|
91
|
+
|
|
92
|
+
def _build_provider_config(self, company_short_name: str) -> tuple[str, dict]:
|
|
93
|
+
"""
|
|
94
|
+
Determina el provider activo (brevo_mail / smtplib) y construye
|
|
95
|
+
el diccionario de configuración a partir de las variables de entorno
|
|
96
|
+
cuyos nombres están en company.yaml (mail_provider).
|
|
97
|
+
"""
|
|
98
|
+
# get company mail configuration and provider
|
|
99
|
+
mail_config = self.config_service.get_configuration(company_short_name, "mail_provider")
|
|
100
|
+
provider = mail_config.get("provider", "brevo_mail")
|
|
101
|
+
|
|
102
|
+
# get mail common parameteres
|
|
103
|
+
sender_email = mail_config.get("sender_email")
|
|
104
|
+
sender_name = mail_config.get("sender_name")
|
|
105
|
+
|
|
106
|
+
# get parameters depending on provider
|
|
107
|
+
if provider == "brevo_mail":
|
|
108
|
+
brevo_cfg = mail_config.get("brevo_mail", {})
|
|
109
|
+
api_key_env = brevo_cfg.get("brevo_api", "BREVO_API_KEY")
|
|
110
|
+
return provider, {
|
|
111
|
+
"api_key": os.getenv(api_key_env),
|
|
112
|
+
"sender_name": sender_name,
|
|
113
|
+
"sender_email": sender_email,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if provider == "smtplib":
|
|
117
|
+
smtp_cfg = mail_config.get("smtplib", {})
|
|
118
|
+
host = os.getenv(smtp_cfg.get("host_env", "SMTP_HOST"))
|
|
119
|
+
port = os.getenv(smtp_cfg.get("port_env", "SMTP_PORT"))
|
|
120
|
+
username = os.getenv(smtp_cfg.get("username_env", "SMTP_USERNAME"))
|
|
121
|
+
password = os.getenv(smtp_cfg.get("password_env", "SMTP_PASSWORD"))
|
|
122
|
+
use_tls = os.getenv(smtp_cfg.get("use_tls_env", "SMTP_USE_TLS"))
|
|
123
|
+
use_ssl = os.getenv(smtp_cfg.get("use_ssl_env", "SMTP_USE_SSL"))
|
|
124
|
+
|
|
125
|
+
return provider, {
|
|
126
|
+
"host": host,
|
|
127
|
+
"port": int(port) if port is not None else None,
|
|
128
|
+
"username": username,
|
|
129
|
+
"password": password,
|
|
130
|
+
"use_tls": str(use_tls).lower() == "true",
|
|
131
|
+
"use_ssl": str(use_ssl).lower() == "true",
|
|
132
|
+
"sender_name": sender_name,
|
|
133
|
+
"sender_email": sender_email,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
# Fallback simple si el provider no es reconocido
|
|
137
|
+
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
138
|
+
f"missing mail provider in mail configuration for company '{company_short_name}'")
|
|
139
|
+
|
|
140
|
+
def _send_with_smtplib(self,
|
|
141
|
+
provider_config: dict,
|
|
142
|
+
sender: dict,
|
|
143
|
+
recipient: str,
|
|
144
|
+
subject: str,
|
|
145
|
+
body: str,
|
|
146
|
+
attachments: list[dict] | None):
|
|
147
|
+
"""
|
|
148
|
+
Envía correo usando smtplib, utilizando la configuración normalizada
|
|
149
|
+
en provider_config.
|
|
150
|
+
"""
|
|
151
|
+
host = provider_config.get("host")
|
|
152
|
+
port = provider_config.get("port")
|
|
153
|
+
username = provider_config.get("username")
|
|
154
|
+
password = provider_config.get("password")
|
|
155
|
+
use_tls = provider_config.get("use_tls")
|
|
156
|
+
use_ssl = provider_config.get("use_ssl")
|
|
157
|
+
|
|
158
|
+
if not host or not port:
|
|
159
|
+
raise IAToolkitException(
|
|
160
|
+
IAToolkitException.ErrorType.MAIL_ERROR,
|
|
161
|
+
"smtplib configuration is incomplete (host/port missing)"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
msg = EmailMessage()
|
|
165
|
+
msg["From"] = f"{sender.get('name', '')} <{sender.get('email')}>"
|
|
166
|
+
msg["To"] = recipient
|
|
167
|
+
msg["Subject"] = subject
|
|
168
|
+
msg.set_content(body, subtype="html")
|
|
169
|
+
|
|
170
|
+
# Adjuntos: ya vienen como filename + base64 content
|
|
171
|
+
for a in attachments or []:
|
|
172
|
+
filename = a.get("filename")
|
|
173
|
+
content_b64 = a.get("content")
|
|
174
|
+
if not filename or not content_b64:
|
|
175
|
+
continue
|
|
176
|
+
try:
|
|
177
|
+
raw = base64.b64decode(content_b64, validate=True)
|
|
178
|
+
except Exception:
|
|
179
|
+
raise IAToolkitException(
|
|
180
|
+
IAToolkitException.ErrorType.MAIL_ERROR,
|
|
181
|
+
f"Invalid base64 for attachment '{filename}'"
|
|
182
|
+
)
|
|
183
|
+
msg.add_attachment(
|
|
184
|
+
raw,
|
|
185
|
+
maintype="application",
|
|
186
|
+
subtype="octet-stream",
|
|
187
|
+
filename=filename,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if use_ssl:
|
|
191
|
+
with smtplib.SMTP_SSL(host, port) as server:
|
|
192
|
+
if username and password:
|
|
193
|
+
server.login(username, password)
|
|
194
|
+
server.send_message(msg)
|
|
195
|
+
else:
|
|
196
|
+
with smtplib.SMTP(host, port) as server:
|
|
197
|
+
if use_tls:
|
|
198
|
+
server.starttls()
|
|
199
|
+
if username and password:
|
|
200
|
+
server.login(username, password)
|
|
201
|
+
server.send_message(msg)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _read_token_bytes(self, token: str) -> bytes:
|
|
205
|
+
# Defensa simple contra path traversal
|
|
206
|
+
if not token or "/" in token or "\\" in token or token.startswith("."):
|
|
207
|
+
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
208
|
+
"attachment_token invalid")
|
|
209
|
+
path = TEMP_DIR / token
|
|
210
|
+
if not path.is_file():
|
|
211
|
+
raise IAToolkitException(IAToolkitException.ErrorType.MAIL_ERROR,
|
|
212
|
+
f"attach file not found: {token}")
|
|
213
|
+
return path.read_bytes()
|