iatoolkit 0.3.9__py3-none-any.whl → 0.107.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of iatoolkit might be problematic. Click here for more details.
- iatoolkit/__init__.py +27 -35
- iatoolkit/base_company.py +3 -35
- iatoolkit/cli_commands.py +18 -47
- iatoolkit/common/__init__.py +0 -0
- iatoolkit/common/exceptions.py +48 -0
- iatoolkit/common/interfaces/__init__.py +0 -0
- iatoolkit/common/interfaces/asset_storage.py +34 -0
- iatoolkit/common/interfaces/database_provider.py +39 -0
- iatoolkit/common/model_registry.py +159 -0
- iatoolkit/common/routes.py +138 -0
- iatoolkit/common/session_manager.py +26 -0
- iatoolkit/common/util.py +353 -0
- iatoolkit/company_registry.py +66 -29
- iatoolkit/core.py +514 -0
- iatoolkit/infra/__init__.py +5 -0
- iatoolkit/infra/brevo_mail_app.py +123 -0
- iatoolkit/infra/call_service.py +140 -0
- iatoolkit/infra/connectors/__init__.py +5 -0
- iatoolkit/infra/connectors/file_connector.py +17 -0
- iatoolkit/infra/connectors/file_connector_factory.py +57 -0
- iatoolkit/infra/connectors/google_cloud_storage_connector.py +53 -0
- iatoolkit/infra/connectors/google_drive_connector.py +68 -0
- iatoolkit/infra/connectors/local_file_connector.py +46 -0
- iatoolkit/infra/connectors/s3_connector.py +33 -0
- iatoolkit/infra/google_chat_app.py +57 -0
- iatoolkit/infra/llm_providers/__init__.py +0 -0
- iatoolkit/infra/llm_providers/deepseek_adapter.py +278 -0
- iatoolkit/infra/llm_providers/gemini_adapter.py +350 -0
- iatoolkit/infra/llm_providers/openai_adapter.py +124 -0
- iatoolkit/infra/llm_proxy.py +268 -0
- iatoolkit/infra/llm_response.py +45 -0
- iatoolkit/infra/redis_session_manager.py +122 -0
- iatoolkit/locales/en.yaml +222 -0
- iatoolkit/locales/es.yaml +225 -0
- iatoolkit/repositories/__init__.py +5 -0
- iatoolkit/repositories/database_manager.py +187 -0
- iatoolkit/repositories/document_repo.py +33 -0
- iatoolkit/repositories/filesystem_asset_repository.py +36 -0
- iatoolkit/repositories/llm_query_repo.py +105 -0
- iatoolkit/repositories/models.py +279 -0
- iatoolkit/repositories/profile_repo.py +171 -0
- iatoolkit/repositories/vs_repo.py +150 -0
- iatoolkit/services/__init__.py +5 -0
- iatoolkit/services/auth_service.py +193 -0
- {services → iatoolkit/services}/benchmark_service.py +7 -7
- iatoolkit/services/branding_service.py +153 -0
- iatoolkit/services/company_context_service.py +214 -0
- iatoolkit/services/configuration_service.py +375 -0
- iatoolkit/services/dispatcher_service.py +134 -0
- {services → iatoolkit/services}/document_service.py +20 -8
- iatoolkit/services/embedding_service.py +148 -0
- iatoolkit/services/excel_service.py +156 -0
- {services → iatoolkit/services}/file_processor_service.py +36 -21
- iatoolkit/services/history_manager_service.py +208 -0
- iatoolkit/services/i18n_service.py +104 -0
- iatoolkit/services/jwt_service.py +80 -0
- iatoolkit/services/language_service.py +89 -0
- iatoolkit/services/license_service.py +82 -0
- iatoolkit/services/llm_client_service.py +438 -0
- iatoolkit/services/load_documents_service.py +174 -0
- iatoolkit/services/mail_service.py +213 -0
- {services → iatoolkit/services}/profile_service.py +200 -101
- iatoolkit/services/prompt_service.py +303 -0
- iatoolkit/services/query_service.py +467 -0
- iatoolkit/services/search_service.py +55 -0
- iatoolkit/services/sql_service.py +169 -0
- iatoolkit/services/tool_service.py +246 -0
- iatoolkit/services/user_feedback_service.py +117 -0
- iatoolkit/services/user_session_context_service.py +213 -0
- iatoolkit/static/images/fernando.jpeg +0 -0
- iatoolkit/static/images/iatoolkit_core.png +0 -0
- iatoolkit/static/images/iatoolkit_logo.png +0 -0
- iatoolkit/static/js/chat_feedback_button.js +80 -0
- iatoolkit/static/js/chat_filepond.js +85 -0
- iatoolkit/static/js/chat_help_content.js +124 -0
- iatoolkit/static/js/chat_history_button.js +110 -0
- iatoolkit/static/js/chat_logout_button.js +36 -0
- iatoolkit/static/js/chat_main.js +401 -0
- iatoolkit/static/js/chat_model_selector.js +227 -0
- iatoolkit/static/js/chat_onboarding_button.js +103 -0
- iatoolkit/static/js/chat_prompt_manager.js +94 -0
- iatoolkit/static/js/chat_reload_button.js +38 -0
- iatoolkit/static/styles/chat_iatoolkit.css +559 -0
- iatoolkit/static/styles/chat_modal.css +133 -0
- iatoolkit/static/styles/chat_public.css +135 -0
- iatoolkit/static/styles/documents.css +598 -0
- iatoolkit/static/styles/landing_page.css +398 -0
- iatoolkit/static/styles/llm_output.css +148 -0
- iatoolkit/static/styles/onboarding.css +176 -0
- iatoolkit/system_prompts/__init__.py +0 -0
- iatoolkit/system_prompts/query_main.prompt +30 -23
- iatoolkit/system_prompts/sql_rules.prompt +47 -12
- iatoolkit/templates/_company_header.html +45 -0
- iatoolkit/templates/_login_widget.html +42 -0
- iatoolkit/templates/base.html +78 -0
- iatoolkit/templates/change_password.html +66 -0
- iatoolkit/templates/chat.html +337 -0
- iatoolkit/templates/chat_modals.html +185 -0
- iatoolkit/templates/error.html +51 -0
- iatoolkit/templates/forgot_password.html +51 -0
- iatoolkit/templates/onboarding_shell.html +106 -0
- iatoolkit/templates/signup.html +79 -0
- iatoolkit/views/__init__.py +5 -0
- iatoolkit/views/base_login_view.py +96 -0
- iatoolkit/views/change_password_view.py +116 -0
- iatoolkit/views/chat_view.py +76 -0
- iatoolkit/views/embedding_api_view.py +65 -0
- iatoolkit/views/forgot_password_view.py +75 -0
- iatoolkit/views/help_content_api_view.py +54 -0
- iatoolkit/views/history_api_view.py +56 -0
- iatoolkit/views/home_view.py +63 -0
- iatoolkit/views/init_context_api_view.py +74 -0
- iatoolkit/views/llmquery_api_view.py +59 -0
- iatoolkit/views/load_company_configuration_api_view.py +49 -0
- iatoolkit/views/load_document_api_view.py +65 -0
- iatoolkit/views/login_view.py +170 -0
- iatoolkit/views/logout_api_view.py +57 -0
- iatoolkit/views/profile_api_view.py +46 -0
- iatoolkit/views/prompt_api_view.py +37 -0
- iatoolkit/views/root_redirect_view.py +22 -0
- iatoolkit/views/signup_view.py +100 -0
- iatoolkit/views/static_page_view.py +27 -0
- iatoolkit/views/user_feedback_api_view.py +60 -0
- iatoolkit/views/users_api_view.py +33 -0
- iatoolkit/views/verify_user_view.py +60 -0
- iatoolkit-0.107.4.dist-info/METADATA +268 -0
- iatoolkit-0.107.4.dist-info/RECORD +132 -0
- iatoolkit-0.107.4.dist-info/licenses/LICENSE +21 -0
- iatoolkit-0.107.4.dist-info/licenses/LICENSE_COMMUNITY.md +15 -0
- {iatoolkit-0.3.9.dist-info → iatoolkit-0.107.4.dist-info}/top_level.txt +0 -1
- iatoolkit/iatoolkit.py +0 -413
- iatoolkit/system_prompts/arquitectura.prompt +0 -32
- iatoolkit-0.3.9.dist-info/METADATA +0 -252
- iatoolkit-0.3.9.dist-info/RECORD +0 -32
- services/__init__.py +0 -5
- services/api_service.py +0 -75
- services/dispatcher_service.py +0 -351
- services/excel_service.py +0 -98
- services/history_service.py +0 -45
- services/jwt_service.py +0 -91
- services/load_documents_service.py +0 -212
- services/mail_service.py +0 -62
- services/prompt_manager_service.py +0 -172
- services/query_service.py +0 -334
- services/search_service.py +0 -32
- services/sql_service.py +0 -42
- services/tasks_service.py +0 -188
- services/user_feedback_service.py +0 -67
- services/user_session_context_service.py +0 -85
- {iatoolkit-0.3.9.dist-info → iatoolkit-0.107.4.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from iatoolkit.services.llm_client_service import llmClient
|
|
7
|
+
from iatoolkit.services.profile_service import ProfileService
|
|
8
|
+
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
9
|
+
from iatoolkit.services.tool_service import ToolService
|
|
10
|
+
from iatoolkit.services.document_service import DocumentService
|
|
11
|
+
from iatoolkit.services.company_context_service import CompanyContextService
|
|
12
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
13
|
+
from iatoolkit.services.configuration_service import ConfigurationService
|
|
14
|
+
from iatoolkit.services.dispatcher_service import Dispatcher
|
|
15
|
+
from iatoolkit.services.prompt_service import PromptService
|
|
16
|
+
from iatoolkit.services.user_session_context_service import UserSessionContextService
|
|
17
|
+
from iatoolkit.services.history_manager_service import HistoryManagerService
|
|
18
|
+
from iatoolkit.common.model_registry import ModelRegistry
|
|
19
|
+
from iatoolkit.common.util import Utility
|
|
20
|
+
from injector import inject
|
|
21
|
+
import base64
|
|
22
|
+
import logging
|
|
23
|
+
from typing import Optional
|
|
24
|
+
import json
|
|
25
|
+
import time
|
|
26
|
+
import hashlib
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class HistoryHandle:
|
|
32
|
+
"""Encapsulates the state needed to manage history for a single turn."""
|
|
33
|
+
company_short_name: str
|
|
34
|
+
user_identifier: str
|
|
35
|
+
type: str
|
|
36
|
+
model: str | None = None
|
|
37
|
+
request_params: dict = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class QueryService:
|
|
41
|
+
@inject
|
|
42
|
+
def __init__(self,
|
|
43
|
+
dispatcher: Dispatcher,
|
|
44
|
+
tool_service: ToolService,
|
|
45
|
+
llm_client: llmClient,
|
|
46
|
+
profile_service: ProfileService,
|
|
47
|
+
company_context_service: CompanyContextService,
|
|
48
|
+
document_service: DocumentService,
|
|
49
|
+
profile_repo: ProfileRepo,
|
|
50
|
+
prompt_service: PromptService,
|
|
51
|
+
i18n_service: I18nService,
|
|
52
|
+
session_context: UserSessionContextService,
|
|
53
|
+
configuration_service: ConfigurationService,
|
|
54
|
+
history_manager: HistoryManagerService,
|
|
55
|
+
util: Utility,
|
|
56
|
+
model_registry: ModelRegistry
|
|
57
|
+
):
|
|
58
|
+
self.profile_service = profile_service
|
|
59
|
+
self.company_context_service = company_context_service
|
|
60
|
+
self.document_service = document_service
|
|
61
|
+
self.profile_repo = profile_repo
|
|
62
|
+
self.tool_service = tool_service
|
|
63
|
+
self.prompt_service = prompt_service
|
|
64
|
+
self.i18n_service = i18n_service
|
|
65
|
+
self.util = util
|
|
66
|
+
self.dispatcher = dispatcher
|
|
67
|
+
self.session_context = session_context
|
|
68
|
+
self.configuration_service = configuration_service
|
|
69
|
+
self.llm_client = llm_client
|
|
70
|
+
self.history_manager = history_manager
|
|
71
|
+
self.model_registry = model_registry
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _resolve_model(self, company_short_name: str, model: Optional[str]) -> str:
|
|
75
|
+
# Priority: 1. Explicit model -> 2. Company config
|
|
76
|
+
effective_model = model
|
|
77
|
+
if not effective_model:
|
|
78
|
+
llm_config = self.configuration_service.get_configuration(company_short_name, 'llm')
|
|
79
|
+
if llm_config and llm_config.get('model'):
|
|
80
|
+
effective_model = llm_config['model']
|
|
81
|
+
return effective_model
|
|
82
|
+
|
|
83
|
+
def _get_history_type(self, model: str) -> str:
|
|
84
|
+
history_type_str = self.model_registry.get_history_type(model)
|
|
85
|
+
if history_type_str == "server_side":
|
|
86
|
+
return HistoryManagerService.TYPE_SERVER_SIDE
|
|
87
|
+
else:
|
|
88
|
+
return HistoryManagerService.TYPE_CLIENT_SIDE
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _build_user_facing_prompt(self, company, user_identifier: str,
|
|
92
|
+
client_data: dict, files: list,
|
|
93
|
+
prompt_name: Optional[str], question: str):
|
|
94
|
+
# get the user profile data from the session context
|
|
95
|
+
user_profile = self.profile_service.get_profile_by_identifier(company.short_name, user_identifier)
|
|
96
|
+
|
|
97
|
+
# combine client_data with user_profile
|
|
98
|
+
final_client_data = (user_profile or {}).copy()
|
|
99
|
+
final_client_data.update(client_data)
|
|
100
|
+
|
|
101
|
+
# Load attached files into the context
|
|
102
|
+
files_context = self.load_files_for_context(files)
|
|
103
|
+
|
|
104
|
+
# Initialize prompt_content. It will be an empty string for direct questions.
|
|
105
|
+
main_prompt = ""
|
|
106
|
+
# We use a local variable for the question to avoid modifying the argument reference if it were mutable,
|
|
107
|
+
# although strings are immutable, this keeps the logic clean regarding what 'question' means in each context.
|
|
108
|
+
effective_question = question
|
|
109
|
+
|
|
110
|
+
if prompt_name:
|
|
111
|
+
question_dict = {'prompt': prompt_name, 'data': final_client_data}
|
|
112
|
+
effective_question = json.dumps(question_dict)
|
|
113
|
+
prompt_content = self.prompt_service.get_prompt_content(company, prompt_name)
|
|
114
|
+
|
|
115
|
+
# Render the user requested prompt
|
|
116
|
+
main_prompt = self.util.render_prompt_from_string(
|
|
117
|
+
template_string=prompt_content,
|
|
118
|
+
question=effective_question,
|
|
119
|
+
client_data=final_client_data,
|
|
120
|
+
user_identifier=user_identifier,
|
|
121
|
+
company=company,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# This is the final user-facing prompt for this specific turn
|
|
125
|
+
user_turn_prompt = f"{main_prompt}\n{files_context}"
|
|
126
|
+
if not prompt_name:
|
|
127
|
+
user_turn_prompt += f"\n### La pregunta que debes responder es: {effective_question}"
|
|
128
|
+
else:
|
|
129
|
+
user_turn_prompt += f'\n### Contexto Adicional: El usuario ha aportado este contexto puede ayudar: {effective_question}'
|
|
130
|
+
|
|
131
|
+
return user_turn_prompt, effective_question
|
|
132
|
+
|
|
133
|
+
def _ensure_valid_history(self, company,
|
|
134
|
+
user_identifier: str,
|
|
135
|
+
effective_model: str,
|
|
136
|
+
user_turn_prompt: str,
|
|
137
|
+
ignore_history: bool
|
|
138
|
+
) -> tuple[Optional[HistoryHandle], Optional[dict]]:
|
|
139
|
+
"""
|
|
140
|
+
Manages the history strategy and rebuilds context if necessary.
|
|
141
|
+
Returns: (HistoryHandle, error_response)
|
|
142
|
+
"""
|
|
143
|
+
history_type = self._get_history_type(effective_model)
|
|
144
|
+
|
|
145
|
+
# Initialize the handle with base context info
|
|
146
|
+
handle = HistoryHandle(
|
|
147
|
+
company_short_name=company.short_name,
|
|
148
|
+
user_identifier=user_identifier,
|
|
149
|
+
type=history_type,
|
|
150
|
+
model=effective_model
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# pass the handle to populate request_params
|
|
154
|
+
needs_rebuild = self.history_manager.populate_request_params(
|
|
155
|
+
handle, user_turn_prompt, ignore_history
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if needs_rebuild:
|
|
159
|
+
logging.warning(f"No valid history for {company.short_name}/{user_identifier}. Rebuilding context...")
|
|
160
|
+
|
|
161
|
+
# try to rebuild the context
|
|
162
|
+
self.prepare_context(company_short_name=company.short_name, user_identifier=user_identifier)
|
|
163
|
+
self.set_context_for_llm(company_short_name=company.short_name, user_identifier=user_identifier,
|
|
164
|
+
model=effective_model)
|
|
165
|
+
|
|
166
|
+
# Retry populating params with the same handle
|
|
167
|
+
needs_rebuild = self.history_manager.populate_request_params(
|
|
168
|
+
handle, user_turn_prompt, ignore_history
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if needs_rebuild:
|
|
172
|
+
error_key = 'errors.services.context_rebuild_failed'
|
|
173
|
+
error_message = self.i18n_service.t(error_key, company_short_name=company.short_name,
|
|
174
|
+
user_identifier=user_identifier)
|
|
175
|
+
return None, {'error': True, "error_message": error_message}
|
|
176
|
+
|
|
177
|
+
return handle, None
|
|
178
|
+
|
|
179
|
+
def _build_context_and_profile(self, company_short_name: str, user_identifier: str) -> tuple:
|
|
180
|
+
# this method read the user/company context from the database and renders the system prompt
|
|
181
|
+
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
182
|
+
if not company:
|
|
183
|
+
return None, None
|
|
184
|
+
|
|
185
|
+
# Get the user profile from the single source of truth.
|
|
186
|
+
user_profile = self.profile_service.get_profile_by_identifier(company_short_name, user_identifier)
|
|
187
|
+
|
|
188
|
+
# render the iatoolkit main system prompt with the company/user information
|
|
189
|
+
system_prompt_template = self.prompt_service.get_system_prompt()
|
|
190
|
+
rendered_system_prompt = self.util.render_prompt_from_string(
|
|
191
|
+
template_string=system_prompt_template,
|
|
192
|
+
question=None,
|
|
193
|
+
client_data=user_profile,
|
|
194
|
+
company=company,
|
|
195
|
+
service_list=self.tool_service.get_tools_for_llm(company)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# get the company context: schemas, database models, .md files
|
|
199
|
+
company_specific_context = self.company_context_service.get_company_context(company_short_name)
|
|
200
|
+
|
|
201
|
+
# merge context: company + user
|
|
202
|
+
final_system_context = f"{company_specific_context}\n{rendered_system_prompt}"
|
|
203
|
+
|
|
204
|
+
return final_system_context, user_profile
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def init_context(self, company_short_name: str,
|
|
208
|
+
user_identifier: str,
|
|
209
|
+
model: str = None) -> dict:
|
|
210
|
+
"""
|
|
211
|
+
Forces a context rebuild for a given user and (optionally) model.
|
|
212
|
+
|
|
213
|
+
- Clears LLM-related context for the resolved model.
|
|
214
|
+
- Regenerates the static company/user context.
|
|
215
|
+
- Sends the context to the LLM for that model.
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
# 1. Resolve the effective model for this user/company
|
|
219
|
+
effective_model = self._resolve_model(company_short_name, model)
|
|
220
|
+
|
|
221
|
+
# 2. Clear only the LLM-related context for this model
|
|
222
|
+
self.session_context.clear_all_context(company_short_name, user_identifier,model=effective_model)
|
|
223
|
+
logging.info(
|
|
224
|
+
f"Context for {company_short_name}/{user_identifier} "
|
|
225
|
+
f"(model={effective_model}) has been cleared."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# 3. Static LLM context is now clean, we can prepare it again (model-agnostic)
|
|
229
|
+
self.prepare_context(
|
|
230
|
+
company_short_name=company_short_name,
|
|
231
|
+
user_identifier=user_identifier
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# 4. Communicate the new context to the specific LLM model
|
|
235
|
+
response = self.set_context_for_llm(
|
|
236
|
+
company_short_name=company_short_name,
|
|
237
|
+
user_identifier=user_identifier,
|
|
238
|
+
model=effective_model
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return response
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def prepare_context(self, company_short_name: str, user_identifier: str) -> dict:
|
|
245
|
+
# prepare the context and decide if it needs to be rebuilt
|
|
246
|
+
# save the generated context in the session context for later use
|
|
247
|
+
if not user_identifier:
|
|
248
|
+
return {'rebuild_needed': True, 'error': 'Invalid user identifier'}
|
|
249
|
+
|
|
250
|
+
# create the company/user context and compute its version
|
|
251
|
+
final_system_context, user_profile = self._build_context_and_profile(
|
|
252
|
+
company_short_name, user_identifier)
|
|
253
|
+
|
|
254
|
+
# save the user information in the session context
|
|
255
|
+
# it's needed for the jinja predefined prompts (filtering)
|
|
256
|
+
self.session_context.save_profile_data(company_short_name, user_identifier, user_profile)
|
|
257
|
+
|
|
258
|
+
# calculate the context version
|
|
259
|
+
current_version = self._compute_context_version_from_string(final_system_context)
|
|
260
|
+
|
|
261
|
+
# get the current version from the session cache
|
|
262
|
+
try:
|
|
263
|
+
prev_version = self.session_context.get_context_version(company_short_name, user_identifier)
|
|
264
|
+
except Exception:
|
|
265
|
+
prev_version = None
|
|
266
|
+
|
|
267
|
+
# Determine if we need to persist the prepared context again.
|
|
268
|
+
# If versions match, we assume the artifact is likely safe, but forcing a save
|
|
269
|
+
# on version mismatch ensures data consistency.
|
|
270
|
+
rebuild_is_needed = (prev_version != current_version)
|
|
271
|
+
|
|
272
|
+
# Save the prepared context and its version for `set_context_for_llm` to use.
|
|
273
|
+
self.session_context.save_prepared_context(company_short_name,
|
|
274
|
+
user_identifier,
|
|
275
|
+
final_system_context,
|
|
276
|
+
current_version)
|
|
277
|
+
return {'rebuild_needed': rebuild_is_needed}
|
|
278
|
+
|
|
279
|
+
def set_context_for_llm(self,
|
|
280
|
+
company_short_name: str,
|
|
281
|
+
user_identifier: str,
|
|
282
|
+
model: str = ''):
|
|
283
|
+
"""
|
|
284
|
+
Takes a pre-built static context and sends it to the LLM for the given model.
|
|
285
|
+
Also initializes the model-specific history through HistoryManagerService.
|
|
286
|
+
"""
|
|
287
|
+
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
288
|
+
if not company:
|
|
289
|
+
logging.error(f"Company not found: {company_short_name} in set_context_for_llm")
|
|
290
|
+
return
|
|
291
|
+
|
|
292
|
+
# --- Model Resolution ---
|
|
293
|
+
effective_model = self._resolve_model(company_short_name, model)
|
|
294
|
+
|
|
295
|
+
# Lock per (company, user, model) to avoid concurrent rebuilds for the same model
|
|
296
|
+
lock_key = f"lock:context:{company_short_name}/{user_identifier}/{effective_model}"
|
|
297
|
+
if not self.session_context.acquire_lock(lock_key, expire_seconds=60):
|
|
298
|
+
logging.warning(
|
|
299
|
+
f"try to rebuild context for user {user_identifier} while is still in process, ignored.")
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
start_time = time.time()
|
|
304
|
+
|
|
305
|
+
# get the prepared context and version from the session cache
|
|
306
|
+
prepared_context, version_to_save = self.session_context.get_and_clear_prepared_context(company_short_name, user_identifier)
|
|
307
|
+
if not prepared_context:
|
|
308
|
+
return
|
|
309
|
+
|
|
310
|
+
logging.info(f"sending context to LLM model {effective_model} for: {company_short_name}/{user_identifier}...")
|
|
311
|
+
|
|
312
|
+
# --- Use Strategy Pattern for History/Context Initialization ---
|
|
313
|
+
history_type = self._get_history_type(effective_model)
|
|
314
|
+
response_data = self.history_manager.initialize_context(
|
|
315
|
+
company_short_name, user_identifier, history_type, prepared_context, company, effective_model
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
if version_to_save:
|
|
319
|
+
self.session_context.save_context_version(company_short_name, user_identifier, version_to_save)
|
|
320
|
+
|
|
321
|
+
logging.info(
|
|
322
|
+
f"Context for: {company_short_name}/{user_identifier} settled in {int(time.time() - start_time)} sec.")
|
|
323
|
+
|
|
324
|
+
# Return data (e.g., response_id) if the manager generated any
|
|
325
|
+
return response_data
|
|
326
|
+
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logging.exception(f"Error in finalize_context_rebuild for {company_short_name}: {e}")
|
|
329
|
+
raise e
|
|
330
|
+
finally:
|
|
331
|
+
# release the lock
|
|
332
|
+
self.session_context.release_lock(lock_key)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def llm_query(self,
|
|
336
|
+
company_short_name: str,
|
|
337
|
+
user_identifier: str,
|
|
338
|
+
model: Optional[str] = None,
|
|
339
|
+
prompt_name: str = None,
|
|
340
|
+
question: str = '',
|
|
341
|
+
client_data: dict = {},
|
|
342
|
+
ignore_history: bool = False,
|
|
343
|
+
files: list = []
|
|
344
|
+
) -> dict:
|
|
345
|
+
try:
|
|
346
|
+
company = self.profile_repo.get_company_by_short_name(short_name=company_short_name)
|
|
347
|
+
if not company:
|
|
348
|
+
return {"error": True,
|
|
349
|
+
"error_message": self.i18n_service.t('errors.company_not_found', company_short_name=company_short_name)}
|
|
350
|
+
|
|
351
|
+
if not prompt_name and not question:
|
|
352
|
+
return {"error": True,
|
|
353
|
+
"error_message": self.i18n_service.t('services.start_query')}
|
|
354
|
+
|
|
355
|
+
# --- Model Resolution ---
|
|
356
|
+
effective_model = self._resolve_model(company_short_name, model)
|
|
357
|
+
|
|
358
|
+
# --- Build User-Facing Prompt ---
|
|
359
|
+
user_turn_prompt, effective_question = self._build_user_facing_prompt(
|
|
360
|
+
company=company,
|
|
361
|
+
user_identifier=user_identifier,
|
|
362
|
+
client_data=client_data,
|
|
363
|
+
files=files,
|
|
364
|
+
prompt_name=prompt_name,
|
|
365
|
+
question=question
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# --- History Management (Strategy Pattern) ---
|
|
369
|
+
history_handle, error_response = self._ensure_valid_history(
|
|
370
|
+
company=company,
|
|
371
|
+
user_identifier=user_identifier,
|
|
372
|
+
effective_model=effective_model,
|
|
373
|
+
user_turn_prompt=user_turn_prompt,
|
|
374
|
+
ignore_history=ignore_history
|
|
375
|
+
)
|
|
376
|
+
if error_response:
|
|
377
|
+
return error_response
|
|
378
|
+
|
|
379
|
+
# get the tools availables for this company
|
|
380
|
+
tools = self.tool_service.get_tools_for_llm(company)
|
|
381
|
+
|
|
382
|
+
# openai structured output instructions
|
|
383
|
+
output_schema = {}
|
|
384
|
+
|
|
385
|
+
# Safely extract parameters for invoke using the handle
|
|
386
|
+
# The handle is guaranteed to have request_params populated if no error returned
|
|
387
|
+
previous_response_id = history_handle.request_params.get('previous_response_id')
|
|
388
|
+
context_history = history_handle.request_params.get('context_history')
|
|
389
|
+
|
|
390
|
+
# Now send the instructions to the llm
|
|
391
|
+
response = self.llm_client.invoke(
|
|
392
|
+
company=company,
|
|
393
|
+
user_identifier=user_identifier,
|
|
394
|
+
model=effective_model,
|
|
395
|
+
previous_response_id=previous_response_id,
|
|
396
|
+
context_history=context_history,
|
|
397
|
+
question=effective_question,
|
|
398
|
+
context=user_turn_prompt,
|
|
399
|
+
tools=tools,
|
|
400
|
+
text=output_schema
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
if not response.get('valid_response'):
|
|
404
|
+
response['error'] = True
|
|
405
|
+
|
|
406
|
+
# save history using the manager passing the handle
|
|
407
|
+
self.history_manager.update_history(
|
|
408
|
+
history_handle, user_turn_prompt, response
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
return response
|
|
412
|
+
except Exception as e:
|
|
413
|
+
logging.exception(e)
|
|
414
|
+
return {'error': True, "error_message": f"{str(e)}"}
|
|
415
|
+
|
|
416
|
+
def _compute_context_version_from_string(self, final_system_context: str) -> str:
|
|
417
|
+
# returns a hash of the context string
|
|
418
|
+
try:
|
|
419
|
+
return hashlib.sha256(final_system_context.encode("utf-8")).hexdigest()
|
|
420
|
+
except Exception:
|
|
421
|
+
return "unknown"
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def load_files_for_context(self, files: list) -> str:
|
|
425
|
+
"""
|
|
426
|
+
Processes a list of attached files, decodes their content,
|
|
427
|
+
and formats them into a string context for the LLM.
|
|
428
|
+
"""
|
|
429
|
+
if not files:
|
|
430
|
+
return ''
|
|
431
|
+
|
|
432
|
+
context = f"""
|
|
433
|
+
A continuación encontraras una lista de documentos adjuntos
|
|
434
|
+
enviados por el usuario que hace la pregunta,
|
|
435
|
+
en total son: {len(files)} documentos adjuntos
|
|
436
|
+
"""
|
|
437
|
+
for document in files:
|
|
438
|
+
# Support both 'file_id' and 'filename' for robustness
|
|
439
|
+
filename = document.get('file_id') or document.get('filename') or document.get('name')
|
|
440
|
+
if not filename:
|
|
441
|
+
context += "\n<error>Documento adjunto sin nombre ignorado.</error>\n"
|
|
442
|
+
continue
|
|
443
|
+
|
|
444
|
+
# Support both 'base64' and 'content' for robustness
|
|
445
|
+
base64_content = document.get('base64') or document.get('content')
|
|
446
|
+
|
|
447
|
+
if not base64_content:
|
|
448
|
+
# Handles the case where a file is referenced but no content is provided
|
|
449
|
+
context += f"\n<error>El archivo '{filename}' no fue encontrado y no pudo ser cargado.</error>\n"
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
# Ensure content is bytes before decoding
|
|
454
|
+
if isinstance(base64_content, str):
|
|
455
|
+
base64_content = base64_content.encode('utf-8')
|
|
456
|
+
|
|
457
|
+
file_content = base64.b64decode(base64_content)
|
|
458
|
+
document_text = self.document_service.file_to_txt(filename, file_content)
|
|
459
|
+
context += f"\n<document name='{filename}'>\n{document_text}\n</document>\n"
|
|
460
|
+
except Exception as e:
|
|
461
|
+
# Catches errors from b64decode or file_to_txt
|
|
462
|
+
logging.error(f"Failed to process file {filename}: {e}")
|
|
463
|
+
context += f"\n<error>Error al procesar el archivo {filename}: {str(e)}</error>\n"
|
|
464
|
+
continue
|
|
465
|
+
|
|
466
|
+
return context
|
|
467
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from iatoolkit.repositories.vs_repo import VSRepo
|
|
7
|
+
from iatoolkit.repositories.document_repo import DocumentRepo
|
|
8
|
+
from iatoolkit.repositories.profile_repo import ProfileRepo
|
|
9
|
+
from iatoolkit.repositories.models import Company
|
|
10
|
+
from injector import inject
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SearchService:
|
|
14
|
+
@inject
|
|
15
|
+
def __init__(self,
|
|
16
|
+
profile_repo: ProfileRepo,
|
|
17
|
+
doc_repo: DocumentRepo,
|
|
18
|
+
vs_repo: VSRepo):
|
|
19
|
+
self.profile_repo = profile_repo
|
|
20
|
+
self.vs_repo = vs_repo
|
|
21
|
+
self.doc_repo = doc_repo
|
|
22
|
+
|
|
23
|
+
def search(self, company_short_name: str, query: str, metadata_filter: dict = None) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Performs a semantic search for a given query within a company's documents.
|
|
26
|
+
|
|
27
|
+
This method queries the vector store for relevant documents based on the
|
|
28
|
+
provided query text. It then constructs a formatted string containing the
|
|
29
|
+
content of the retrieved documents, which can be used as context for an LLM.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
company_short_name: The company to search within.
|
|
33
|
+
query: The text query to search for.
|
|
34
|
+
metadata_filter: An optional dictionary to filter documents by their metadata.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
A string containing the concatenated content of the found documents,
|
|
38
|
+
formatted to be used as a context.
|
|
39
|
+
"""
|
|
40
|
+
company = self.profile_repo.get_company_by_short_name(company_short_name)
|
|
41
|
+
if not company:
|
|
42
|
+
return f"error: company {company_short_name} not found"
|
|
43
|
+
|
|
44
|
+
document_list = self.vs_repo.query(company_short_name=company_short_name,
|
|
45
|
+
query_text=query,
|
|
46
|
+
metadata_filter=metadata_filter)
|
|
47
|
+
|
|
48
|
+
search_context = ''
|
|
49
|
+
for doc in document_list:
|
|
50
|
+
search_context += f'documento "{doc.filename}"'
|
|
51
|
+
if doc.meta and 'document_type' in doc.meta:
|
|
52
|
+
search_context += f' tipo: {doc.meta.get('document_type', '')}'
|
|
53
|
+
search_context += f': {doc.content}\n'
|
|
54
|
+
|
|
55
|
+
return search_context
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from iatoolkit.common.interfaces.database_provider import DatabaseProvider
|
|
7
|
+
from iatoolkit.repositories.database_manager import DatabaseManager
|
|
8
|
+
from iatoolkit.services.i18n_service import I18nService
|
|
9
|
+
from iatoolkit.common.exceptions import IAToolkitException
|
|
10
|
+
from iatoolkit.common.util import Utility
|
|
11
|
+
from injector import inject, singleton
|
|
12
|
+
from typing import Callable
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@singleton
|
|
18
|
+
class SqlService:
|
|
19
|
+
"""
|
|
20
|
+
Manages database connections and executes SQL statements.
|
|
21
|
+
It maintains a cache of named DatabaseManager instances to avoid reconnecting.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@inject
|
|
25
|
+
def __init__(self,
|
|
26
|
+
util: Utility,
|
|
27
|
+
i18n_service: I18nService):
|
|
28
|
+
self.util = util
|
|
29
|
+
self.i18n_service = i18n_service
|
|
30
|
+
|
|
31
|
+
# Cache for database providers. Key is tuple: (company_short_name, db_name)
|
|
32
|
+
# Value is the abstract interface DatabaseProvider
|
|
33
|
+
self._db_connections: dict[tuple[str, str], DatabaseProvider] = {}
|
|
34
|
+
|
|
35
|
+
# Registry of factory functions.
|
|
36
|
+
# Format: {'connection_type': function(config_dict) -> DatabaseProvider}
|
|
37
|
+
self._provider_factories: dict[str, Callable[[dict], DatabaseProvider]] = {}
|
|
38
|
+
|
|
39
|
+
# Register the default 'direct' strategy (SQLAlchemy)
|
|
40
|
+
self.register_provider_factory('direct', self._create_direct_connection)
|
|
41
|
+
|
|
42
|
+
def register_provider_factory(self, connection_type: str, factory: Callable[[dict], DatabaseProvider]):
|
|
43
|
+
"""
|
|
44
|
+
Allows plugins (Enterprise) to register new connection types.
|
|
45
|
+
"""
|
|
46
|
+
self._provider_factories[connection_type] = factory
|
|
47
|
+
|
|
48
|
+
def _create_direct_connection(self, config: dict) -> DatabaseProvider:
|
|
49
|
+
"""Default factory for standard SQLAlchemy connections."""
|
|
50
|
+
uri = config.get('db_uri') or config.get('DATABASE_URI')
|
|
51
|
+
schema = config.get('schema')
|
|
52
|
+
if not uri:
|
|
53
|
+
raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR,
|
|
54
|
+
"Missing db_uri for direct connection")
|
|
55
|
+
return DatabaseManager(uri, schema=schema, register_pgvector=False)
|
|
56
|
+
|
|
57
|
+
def register_database(self, company_short_name: str, db_name: str, config: dict):
|
|
58
|
+
"""
|
|
59
|
+
Creates and caches a DatabaseProvider instance based on the configuration.
|
|
60
|
+
"""
|
|
61
|
+
key = (company_short_name, db_name)
|
|
62
|
+
if key in self._db_connections:
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
# Determine connection type (default to 'direct')
|
|
66
|
+
conn_type = config.get('connection_type', 'direct')
|
|
67
|
+
logging.info(f"Registering DB '{db_name}' ({conn_type}) for company '{company_short_name}'")
|
|
68
|
+
|
|
69
|
+
factory = self._provider_factories.get(conn_type)
|
|
70
|
+
if not factory:
|
|
71
|
+
logging.error(f"Unknown connection type '{conn_type}' for DB '{db_name}'. Skipping.")
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Create the provider using the appropriate factory
|
|
76
|
+
provider_instance = factory(config)
|
|
77
|
+
self._db_connections[key] = provider_instance
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logging.error(f"Failed to register DB '{db_name}': {e}")
|
|
80
|
+
# We don't raise here to allow other DBs to load if one fails
|
|
81
|
+
|
|
82
|
+
def get_db_names(self, company_short_name: str) -> list[str]:
|
|
83
|
+
"""
|
|
84
|
+
Returns list of logical database names available ONLY for the specified company.
|
|
85
|
+
"""
|
|
86
|
+
return [db for (co, db) in self._db_connections.keys() if co == company_short_name]
|
|
87
|
+
|
|
88
|
+
def get_database_provider(self, company_short_name: str, db_name: str) -> DatabaseProvider:
|
|
89
|
+
"""
|
|
90
|
+
Retrieves a registered DatabaseProvider instance using the composite key.
|
|
91
|
+
Replaces the old 'get_database_manager'.
|
|
92
|
+
"""
|
|
93
|
+
key = (company_short_name, db_name)
|
|
94
|
+
try:
|
|
95
|
+
return self._db_connections[key]
|
|
96
|
+
except KeyError:
|
|
97
|
+
logging.error(
|
|
98
|
+
f"Attempted to access unregistered database: '{db_name}' for company '{company_short_name}'"
|
|
99
|
+
)
|
|
100
|
+
raise IAToolkitException(
|
|
101
|
+
IAToolkitException.ErrorType.DATABASE_ERROR,
|
|
102
|
+
f"Database '{db_name}' is not registered for this company."
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def exec_sql(self, company_short_name: str, **kwargs):
|
|
106
|
+
"""
|
|
107
|
+
Executes a raw SQL statement against a registered database provider.
|
|
108
|
+
Delegates the actual execution details to the provider implementation.
|
|
109
|
+
"""
|
|
110
|
+
database_name = kwargs.get('database_key')
|
|
111
|
+
query = kwargs.get('query')
|
|
112
|
+
format = kwargs.get('format', 'json')
|
|
113
|
+
commit = kwargs.get('commit')
|
|
114
|
+
|
|
115
|
+
if not database_name:
|
|
116
|
+
raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR,
|
|
117
|
+
'missing database_name in call to exec_sql')
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
# 1. Get the abstract provider (could be Direct or Bridge)
|
|
121
|
+
provider = self.get_database_provider(company_short_name, database_name)
|
|
122
|
+
|
|
123
|
+
# 2. Delegate execution
|
|
124
|
+
# The provider returns a clean List[Dict] or Dict result
|
|
125
|
+
result_data = provider.execute_query(query, commit=commit)
|
|
126
|
+
|
|
127
|
+
# 3. Handle Formatting (Service layer responsibility)
|
|
128
|
+
if format == 'dict':
|
|
129
|
+
return result_data
|
|
130
|
+
|
|
131
|
+
# Serialize the result
|
|
132
|
+
return json.dumps(result_data, default=self.util.serialize)
|
|
133
|
+
|
|
134
|
+
except IAToolkitException:
|
|
135
|
+
raise
|
|
136
|
+
except Exception as e:
|
|
137
|
+
# Attempt rollback if supported/needed
|
|
138
|
+
try:
|
|
139
|
+
provider = self.get_database_provider(company_short_name, database_name)
|
|
140
|
+
if provider:
|
|
141
|
+
provider.rollback()
|
|
142
|
+
except Exception:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
error_message = str(e)
|
|
146
|
+
if 'timed out' in str(e):
|
|
147
|
+
error_message = self.i18n_service.t('errors.timeout')
|
|
148
|
+
|
|
149
|
+
logging.error(f"Error executing SQL statement: {error_message}")
|
|
150
|
+
raise IAToolkitException(IAToolkitException.ErrorType.DATABASE_ERROR,
|
|
151
|
+
error_message) from e
|
|
152
|
+
|
|
153
|
+
def commit(self, company_short_name: str, database_name: str):
|
|
154
|
+
"""
|
|
155
|
+
Commits the current transaction for a registered database provider.
|
|
156
|
+
"""
|
|
157
|
+
provider = self.get_database_provider(company_short_name, database_name)
|
|
158
|
+
try:
|
|
159
|
+
provider.commit()
|
|
160
|
+
except Exception as e:
|
|
161
|
+
# Try rollback
|
|
162
|
+
try:
|
|
163
|
+
provider.rollback()
|
|
164
|
+
except:
|
|
165
|
+
pass
|
|
166
|
+
logging.error(f"Error while committing sql: '{str(e)}'")
|
|
167
|
+
raise IAToolkitException(
|
|
168
|
+
IAToolkitException.ErrorType.DATABASE_ERROR, str(e)
|
|
169
|
+
)
|