iatoolkit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iatoolkit might be problematic. Click here for more details.

iatoolkit/__init__.py CHANGED
@@ -21,13 +21,15 @@ from services.excel_service import ExcelService
21
21
  from services.dispatcher_service import Dispatcher
22
22
  from services.document_service import DocumentService
23
23
  from services.search_service import SearchService
24
+ from services.load_documents_service import LoadDocumentsService
24
25
  from repositories.profile_repo import ProfileRepo
25
26
  from repositories.llm_query_repo import LLMQueryRepo
26
27
  from services.query_service import QueryService
28
+ from services.prompt_manager_service import PromptService
27
29
  from repositories.database_manager import DatabaseManager
28
30
  from infra.call_service import CallServiceClient
29
31
  from common.util import Utility
30
- from repositories.models import Base, Company, Function, TaskType
32
+ from repositories.models import Base, Company, Function, TaskType, Prompt, PromptCategory
31
33
 
32
34
 
33
35
  __all__ = [
@@ -42,8 +44,10 @@ __all__ = [
42
44
  'DocumentService',
43
45
  'SearchService',
44
46
  'QueryService',
47
+ 'LoadDocumentsService',
45
48
  'ProfileRepo',
46
49
  'LLMQueryRepo',
50
+ 'PromptService',
47
51
  'DatabaseManager',
48
52
  'CallServiceClient',
49
53
  'Utility',
@@ -51,4 +55,6 @@ __all__ = [
51
55
  'Function',
52
56
  'TaskType',
53
57
  'Base',
58
+ 'Prompt',
59
+ 'PromptCategory'
54
60
  ]
iatoolkit/cli_commands.py CHANGED
@@ -24,6 +24,8 @@ def register_core_commands(app):
24
24
  def setup_company(company_short_name: str):
25
25
  """⚙️ Genera una nueva API key para una compañía ya registrada."""
26
26
  try:
27
+ dispatcher = IAToolkit.get_instance().get_injector().get(Dispatcher)
28
+ dispatcher.setup_all_companies()
27
29
  profile_service = IAToolkit.get_instance().get_injector().get(ProfileService)
28
30
  click.echo(f"🔑 Generando API key para '{company_short_name}'...")
29
31
  result = profile_service.new_api_key(company_short_name)
@@ -64,14 +66,4 @@ def register_core_commands(app):
64
66
  logging.exception(e)
65
67
  click.echo(f"Error: {str(e)}")
66
68
 
67
- @app.cli.command("load")
68
- def load_documents():
69
- from services.load_documents_service import LoadDocumentsService
70
69
 
71
- load_documents_service = IAToolkit.get_instance().get_injector().get(LoadDocumentsService)
72
- try:
73
- result = load_documents_service.load()
74
- click.echo(result['message'])
75
- except Exception as e:
76
- logging.exception(e)
77
- click.echo(f"Error: {str(e)}")
@@ -15,27 +15,21 @@ class CompanyRegistry:
15
15
  def __init__(self):
16
16
  self._company_classes: Dict[str, Type[BaseCompany]] = {}
17
17
  self._company_instances: Dict[str, BaseCompany] = {}
18
- self._injector = None
19
18
 
20
- def set_injector(self, injector) -> None:
21
- """Establece el injector para crear instancias con dependencias"""
22
- self._injector = injector
23
19
 
24
- def instantiate_companies(self) -> Dict[str, BaseCompany]:
20
+ def instantiate_companies(self, injector) -> Dict[str, BaseCompany]:
25
21
  """
26
22
  Instancia todas las empresas registradas con inyección de dependencias.
27
23
 
28
24
  Returns:
29
25
  Dict con instancias de empresas {name: instance}
30
26
  """
31
- if not self._injector:
32
- raise RuntimeError("Injector no configurado. Llame a set_injector() primero.")
33
27
 
34
28
  for company_key, company_class in self._company_classes.items():
35
29
  if company_key not in self._company_instances:
36
30
  try:
37
31
  # use de injector to create the instance
38
- company_instance = self._injector.get(company_class)
32
+ company_instance = injector.get(company_class)
39
33
  self._company_instances[company_key] = company_instance
40
34
  logging.info(f"company '{company_key}' created in dispatcher")
41
35
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iatoolkit
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: IAToolkit
5
5
  Author: Fernando Libedinsky
6
6
  License-Expression: MIT
@@ -207,46 +207,52 @@ Requires-Dist: yarl==1.18.3
207
207
  Requires-Dist: zipp==3.21.0
208
208
  Requires-Dist: zstandard==0.23.0
209
209
 
210
- # iatoolkit
210
+
211
+ <div align="center">
212
+ <h1>IAToolkit</h1>
213
+ <p><strong>The Open-Source Framework for Building AI Chatbots on Your Private Data.</strong></p>
214
+ </div>
211
215
 
212
216
  IAToolkit is a comprehensive, open-source framework designed for building enterprise-grade
213
217
  AI chatbots and conversational applications.
214
- Built on Flask with dependency injection, it provides a robust foundation for scalable AI solutions.
218
+ With IAToolkit, you can build production-ready, context-aware chatbots and agents that
219
+ can query relational databases, perform semantic searches on documents,
220
+ and connect to your internal APIs in minutes.
221
+
222
+ IAToolkit bridges the gap between powerful LLMs and your company's data.
223
+
215
224
 
216
225
  ## 🚀 Key Features
217
- - **Universal LLM Integration**: OpenAI GPT, Google Gemini
218
- - **Template System**: Jinja2-powered prompt templates with variables
219
- - **Context Management**: Maintain conversation context across sessions
220
-
221
- ### 🔒 **Enterprise Security**
222
- - **JWT Authentication**: Secure token-based authentication
223
- - **Session Management**: Redis-backed secure sessions
224
- - **CORS Configuration**: Flexible cross-origin resource sharing
225
-
226
- ### 🛠 **Function Calling & Tools**
227
- - **Native Function Calls**: Direct integration with LLM function calling
228
- - **Custom Tools**: Build and register custom tools for your chatbot
229
- - **SQL Query Generation**: Natural language to SQL conversion
230
- - **API Integrations**: Connect to external services and APIs
231
-
232
- ### 🗄 **Database & Storage**
233
- - **Multi-Database Support**: PostgreSQL, MySQL, SQLite via SQLAlchemy
234
- - **Vector Store Integration**: Semantic search and retrieval
235
- - **Document Processing**: PDF, Word, Excel, and text file handling
236
-
237
- ### 📊 **Analytics & Monitoring**
238
- - **Query Logging**: Track all LLM interactions
239
- - **Performance Metrics**: Response times, token usage, costs
240
- - **Benchmarking**: Compare model performance
241
- - **Task Management**: Async task processing with status tracking
242
-
243
- ### 🔧 **Developer Experience**
244
- - **Dependency Injection**: Clean, testable architecture
245
- - **CLI Tools**: Command-line interface for common tasks
246
- - **Hot Reloading**: Development-friendly configuration
247
- - **Comprehensive Logging**: Debug and monitor easily
248
-
249
- ## License
250
- MIT License
251
226
 
227
+ * **🔗 Unified Data Connection**:
228
+ * **Natural Language to SQL**: Let your chatbot query relational databases (PostgreSQL, MySQL, SQLite) using everyday language.
229
+ * **Semantic Document Search**: Automatically chunk, embed, and search across your private documents (PDFs, Word, etc.) to provide contextually accurate answers.
230
+
231
+ * **🏢 Enterprise-Ready Multi-Tenancy**:
232
+ * Deploy isolated "Company" modules, each with its own data, tools, and context. Perfect for SaaS products or internal departmental agents.
233
+
234
+ * **🧠 LLM Agnostic**:
235
+ * Switch between **OpenAI (GPT-*)** and **Google (Gemini-*)** with a single line change in your configuration. No code refactoring needed.
236
+
237
+ * **🛠️ Developer-First Experience**:
238
+ * Built with a clean, **Dependency Injection** architecture.
239
+ * High-quality code base with **90%+ test coverage**.
240
+ * Powerful Flask-based **CLI** for database setup, API key generation, and more.
241
+
242
+ * **🔒 Security & Observability Built-In**:
243
+ * Comes with JWT-based authentication, user management, and secure session handling out of the box.
244
+ * Full traceability with detailed logging of all queries, function calls, token usage, and costs.
245
+
246
+ ## ⚡ Quick Start: Create a Custom Tool in 30 Seconds
247
+
248
+ See how easy it is to give your AI a new skill. Just define a method inside your Company class and describe it.
249
+ IAToolkit handles the rest.
250
+
251
+ ## 🤝 Contributing
252
+
253
+ We welcome contributions! Whether it's adding a new feature, improving documentation, or fixing a bug,
254
+ please feel free to open a pull request.
255
+
256
+ ## 📄 License
252
257
 
258
+ IAToolkit is open-source and licensed under the [MIT License](LICENSE).
@@ -1,7 +1,7 @@
1
- iatoolkit/__init__.py,sha256=WImnnjDcaWy8_CoIS4UA2PwqYcMb0dDNUhg-OlkZ2X8,1512
1
+ iatoolkit/__init__.py,sha256=GkFxAQHKPifz4Kd8M73Rc8TWRVIxjxkl1N0nsPvb_sU,1743
2
2
  iatoolkit/base_company.py,sha256=FlB-HFYH8FoTl4nbtsYgfKjkdZtizJbKwXqaosxmRqc,2009
3
- iatoolkit/cli_commands.py,sha256=oWd5kwDYd0W1Lcpuk3N2cEnusPAVefaCrYveMQ1zDvY,3223
4
- iatoolkit/company_registry.py,sha256=HnDpVyCc41OAn-exVF53b_HMES7GelWZcvxR39S_nI4,2900
3
+ iatoolkit/cli_commands.py,sha256=CyaabHA3HdKd-eIqrJD8IQFT7Tqn_BEdi4jb1utisMo,2909
4
+ iatoolkit/company_registry.py,sha256=KOUzJHLYgzMAV6BxkTiDPlN_ME4fktp7yRzKLlXZ5-w,2597
5
5
  iatoolkit/iatoolkit.py,sha256=OwlGujwtNLBYtfZuCpcX_yzrgB8BVo9Jfh72owM8FFc,15651
6
6
  iatoolkit/system_prompts/arquitectura.prompt,sha256=2W-7NWy6P6y1Gh5_-zD1iK-BWq1Siu8TuvGCouP67bQ,1267
7
7
  iatoolkit/system_prompts/format_styles.prompt,sha256=MSMe1qvR3cF_0IbFshn8R0z6Wx6VCHQq1p37rpu5wwk,3576
@@ -9,14 +9,14 @@ iatoolkit/system_prompts/query_main.prompt,sha256=Eu5VOQzUygJ45Ct1WKYGbi0JMltgI6
9
9
  iatoolkit/system_prompts/sql_rules.prompt,sha256=y4nURVnb9AyFwt-lrbMNBHHtZlhk6kC9grYoOhRnrJo,59174
10
10
  services/__init__.py,sha256=fSvSfIcPW1dHwTBY1hQ5dBEhaoorzk_GzR4G46gD8tY,173
11
11
  services/api_service.py,sha256=InIKTc64BWcp4U4tYKHz28x4ErPxIfvR9x3ZlxJZlXs,2911
12
- services/benchmark_service.py,sha256=g9JVrmAqIe_iI0D1DwdQ6DJ2_FJRCTndarESNSVfhbw,5907
13
- services/dispatcher_service.py,sha256=jGixvvQ4DTQGZye8aa05q56B6U-s6NaDb6he6UTXmQc,15534
14
- services/document_service.py,sha256=sm5QtbrKs2dF9hpLuSLMB-IMWYNBD7yWHv3rd80aD0o,5960
12
+ services/benchmark_service.py,sha256=0Vgsx_FaUZL7igoBYbe1AZkIWOiEUx1FSCV_0Ut0mtk,5921
13
+ services/dispatcher_service.py,sha256=j3Vm3vgDIgwMn9tF1BBHN3sY-V30XIkbHNcXVR0u-kY,15491
14
+ services/document_service.py,sha256=np8wjaFpS8kVgAeVr8JWzGHcdRl1S4vsOX-dxyaLP8E,5961
15
15
  services/excel_service.py,sha256=wE9Udbyb96kGRSnZZ6KM2mbE484rKjTEhta9GKKpy-8,3630
16
- services/file_processor_service.py,sha256=82UArWtwpr94CAMkkoRP0_nPtoqItymdKSIABS0Xkxw,2943
16
+ services/file_processor_service.py,sha256=0CM4CQu6KKfcLVGkxs4hYxgdz8kKRWfkV5rDH9UoccM,4173
17
17
  services/history_service.py,sha256=6fGSSWxy60nxtkwp_fodwDHoVKhpIUbHnzAzUSiNi-Y,1657
18
18
  services/jwt_service.py,sha256=dC45Sn6FyzdzRiQJnzgkjN3Hy21V1imRxB0hTyWRvlA,3979
19
- services/load_documents_service.py,sha256=_-OTUih8Zk0m4dHqAhkE7kAwU2mbz_QoMrOKnrq7ZWs,8821
19
+ services/load_documents_service.py,sha256=eDqi4Nr2K0BvHS4om07LL_wbFcyfJ4qIQiMULviZWsE,7098
20
20
  services/mail_service.py,sha256=ystFit1LuYUC4ekYYebyiy1rqYQmxeL6K8h58MxEkOY,2233
21
21
  services/profile_service.py,sha256=vZV0cregZQiPKYcNLaD7xjez2y6-3Mq97cDndC8NL8w,17922
22
22
  services/prompt_manager_service.py,sha256=bWG4SIgt0u45PVUfm0xRLbLfKC7bk6uozVHRdkdgCmc,7761
@@ -26,7 +26,7 @@ services/sql_service.py,sha256=H7CIPpXTcxLXLojD2fBFr_mIAD0PW1vEJhKHLfJi4Hk,1418
26
26
  services/tasks_service.py,sha256=hHJDlcsSOPtEleD6_Vv3pocfxWNmthIhmZSdnoWFpEM,6861
27
27
  services/user_feedback_service.py,sha256=YtCndRBekDEWYEbac431Ksn2gMO5iBrI3WqKK0xtShE,2513
28
28
  services/user_session_context_service.py,sha256=5qn7fqpuiU8KgMpU4M5-iRUsETumz1raBw-EeZLuE1A,3868
29
- iatoolkit-0.4.0.dist-info/METADATA,sha256=1muE8emXWndqmPwC3xhtpVYjrFiktvWdWdD_UckbW10,8801
30
- iatoolkit-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- iatoolkit-0.4.0.dist-info/top_level.txt,sha256=dqlBbmgo9okD9d_WMR9uYzdup7Rxgj26yFF85jRGeu4,19
32
- iatoolkit-0.4.0.dist-info/RECORD,,
29
+ iatoolkit-0.4.2.dist-info/METADATA,sha256=F2KitiTXfL4FN4nIp7ebGr36828ZVRO5QFT5Bvxmfg8,9300
30
+ iatoolkit-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
+ iatoolkit-0.4.2.dist-info/top_level.txt,sha256=dqlBbmgo9okD9d_WMR9uYzdup7Rxgj26yFF85jRGeu4,19
32
+ iatoolkit-0.4.2.dist-info/RECORD,,
@@ -65,7 +65,7 @@ class BenchmarkService:
65
65
 
66
66
  company = self.profile_repo.get_company_by_short_name(company_short_name)
67
67
  if not company:
68
- raise IAToolkitException(IAToolkitException.ErrorType.CONFIG_ERROR, "Compañía 'maxxa' no encontrada.")
68
+ raise IAToolkitException(IAToolkitException.ErrorType.CONFIG_ERROR, f"Compañía {company_short_name} no encontrada.")
69
69
 
70
70
  total_rows = len(df)
71
71
  logging.info(f"Iniciando benchmark para {total_rows} casos de prueba desde el archivo: {file_path}")
@@ -78,8 +78,7 @@ class Dispatcher:
78
78
 
79
79
  # ✅ NOW it is safe to get the injector and instantiate companies.
80
80
  injector = current_iatoolkit().get_injector()
81
- self.company_registry.set_injector(injector)
82
- self.company_registry.instantiate_companies()
81
+ self.company_registry.instantiate_companies(injector)
83
82
 
84
83
 
85
84
  def start_execution(self):
@@ -110,7 +109,7 @@ class Dispatcher:
110
109
  prompt_name=prompt['name'],
111
110
  description=prompt['description'],
112
111
  order=1,
113
- is_system_prompt=True
112
+ is_system_prompt=True,
114
113
  )
115
114
  i += 1
116
115
 
@@ -118,7 +117,7 @@ class Dispatcher:
118
117
  for company in self.company_instances.values():
119
118
  company.register_company()
120
119
 
121
- def dispatch(self, company_name: str, action: str, **kwargs) -> str:
120
+ def dispatch(self, company_name: str, action: str, **kwargs) -> dict:
122
121
  company_key = company_name.lower()
123
122
 
124
123
  if company_key not in self.company_instances:
@@ -16,7 +16,7 @@ class DocumentService:
16
16
  @inject
17
17
  def __init__(self):
18
18
  # max number of pages to load
19
- self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "10"))
19
+ self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "200"))
20
20
 
21
21
  def file_to_txt(self, filename, file_content):
22
22
  try:
@@ -7,26 +7,45 @@ from infra.connectors.file_connector import FileConnector
7
7
  import logging
8
8
  import os
9
9
  from typing import Optional, Callable, Dict
10
+ from repositories.models import Company
10
11
 
11
12
 
12
13
  class FileProcessorConfig:
14
+ """Configuration class for the FileProcessor."""
13
15
  def __init__(
14
16
  self,
15
17
  filters: Dict,
16
- action: Callable[[str, bytes], None],
18
+ callback: Callable[[Company, str, bytes, dict], None],
17
19
  continue_on_error: bool = True,
18
20
  log_file: str = 'file_processor.log',
19
21
  echo: bool = False,
20
- context: dict = None,
22
+ context: dict = None
21
23
  ):
24
+ """
25
+ Initializes the FileProcessor configuration.
26
+
27
+ Args:
28
+ filters (Dict): A dictionary of filters to apply to file names.
29
+ Example: {'filename_contains': '.pdf'}
30
+ action (Callable): The function to execute for each processed file.
31
+ It receives filename (str), content (bytes), and context (dict).
32
+ continue_on_error (bool): If True, continues processing other files upon an error.
33
+ log_file (str): The path to the log file.
34
+ echo (bool): If True, prints progress to the console.
35
+ context (dict): A context dictionary passed to the action function.
36
+ """
22
37
  self.filters = filters
23
- self.action = action
38
+ self.callback = callback
24
39
  self.continue_on_error = continue_on_error
25
40
  self.log_file = log_file
26
41
  self.echo = echo
27
42
  self.context = context or {}
28
43
 
29
44
  class FileProcessor:
45
+ """
46
+ A generic service to process files from a given data source (connector).
47
+ It lists files, applies filters, and executes a specific action for each one.
48
+ """
30
49
  def __init__(self,
31
50
  connector: FileConnector,
32
51
  config: FileProcessorConfig,
@@ -45,6 +64,7 @@ class FileProcessor:
45
64
  return logging.getLogger(__name__)
46
65
 
47
66
  def process_files(self):
67
+ # Fetches files from the connector, filters them, and processes them.
48
68
  try:
49
69
  files = self.connector.list_files()
50
70
  except Exception as e:
@@ -67,9 +87,12 @@ class FileProcessor:
67
87
 
68
88
  content = self.connector.get_file_content(file_path)
69
89
 
70
- # execute the action defined
90
+ # execute the callback function
71
91
  filename = os.path.basename(file_name)
72
- self.config.action(filename, content, self.config.context)
92
+ self.config.callback(company=self.config.context.get('company'),
93
+ filename=filename,
94
+ content=content,
95
+ context=self.config.context)
73
96
  self.processed_files += 1
74
97
 
75
98
  self.logger.info(f"Successfully processed file: {file_path}")
@@ -21,6 +21,10 @@ from typing import Dict
21
21
 
22
22
 
23
23
  class LoadDocumentsService:
24
+ """
25
+ Orchestrates the process of loading, processing, and storing documents
26
+ from various sources for different companies.
27
+ """
24
28
  @inject
25
29
  def __init__(self,
26
30
  file_connector_factory: FileConnectorFactory,
@@ -38,7 +42,6 @@ class LoadDocumentsService:
38
42
  self.vector_store = vector_store
39
43
  self.file_connector_factory = file_connector_factory
40
44
  self.dispatcher = dispatcher
41
- self.company = None
42
45
 
43
46
  # lower warnings
44
47
  logging.getLogger().setLevel(logging.ERROR)
@@ -49,86 +52,46 @@ class LoadDocumentsService:
49
52
  separators=["\n\n", "\n", "."]
50
53
  )
51
54
 
52
- # load the files for all of the companies.
53
- def load(self, doc_type: str = None):
54
- # doc_type: an optional document_type for loading
55
- files_loaded = 0
56
- companies = self.profile_repo.get_companies()
57
-
58
- for company in companies:
59
- load_config = company.parameters.get('load', {})
60
- if not load_config:
61
- continue
62
-
63
- print(f"Cargando datos de ** {company.short_name} **")
64
- self.company = company
65
-
66
- # Si hay configuraciones de tipos de documento específicos
67
- doc_types_config = load_config.get('document_types', {})
68
-
69
- if doc_types_config and len(doc_types_config) > 0:
70
- # Si se especificó un tipo de documento, cargar solo ese tipo
71
- if doc_type and doc_type in doc_types_config:
72
- files_loaded += self._load_document_type(company, doc_type, doc_types_config[doc_type])
73
- # Si no se especificó, cargar todos los tipos configurados
74
- elif not doc_type:
75
- for type_name, type_config in doc_types_config.items():
76
- files_loaded += self._load_document_type(company, type_name, type_config)
77
- else:
78
- # Comportamiento anterior: usar la configuración general
79
- connector = load_config.get('connector', {})
80
- if not connector:
81
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
82
- f"Falta configurar conector en empresa {company.short_name}")
83
-
84
- files_loaded += self.load_data_source(connector)
85
-
86
- return {'message': f'{files_loaded} files processed'}
87
-
88
- def _load_document_type(self, company: Company, doc_type_name: str, type_config: Dict) -> int:
89
- # load specific document_types for a company
90
- connector = type_config.get('connector')
91
- if not connector:
92
- logging.warning(f"Falta configurar conector para tipo {doc_type_name} en empresa {company.short_name}")
93
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
94
- f"Falta configurar conector para tipo {doc_type_name} en empresa {company.short_name}")
95
-
96
- # get the metadata for this connector
97
- predefined_metadata = type_config.get('metadata', {})
98
-
99
- # config specific filters
100
- filters = type_config.get('filters', {"filename_contains": ".pdf"})
101
-
102
- return self.load_data_source(connector, predefined_metadata, filters)
103
-
104
- def load_data_source(self, connector_config: Dict, predefined_metadata: Dict = None, filters: Dict = None):
55
+ def load_company_files(self,
56
+ company: Company,
57
+ connector_config: Dict,
58
+ predefined_metadata: Dict = None,
59
+ filters: Dict = None):
105
60
  """
106
- Carga archivos desde una fuente de datos usando un conector.
61
+ Loads all the company files from a connector
107
62
 
108
63
  Args:
109
- connector_config: Configuración del conector
110
- predefined_metadata: Metadatos predefinidos para todos los documentos de esta fuente
111
- filters: Filtros específicos para esta carga
64
+ company (Company): The company to load files for.
65
+ connector_config (Dict): The configuration for the file connector.
66
+ predefined_metadata (Dict, optional): Metadata to be added to all documents from this source.
67
+ filters (Dict, optional): Filters to apply to the files.
112
68
 
113
69
  Returns:
114
- int o dict: Número de archivos procesados o diccionario de error
70
+ int: The number of processed files.
115
71
  """
72
+ if not connector_config:
73
+ raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
74
+ f"Falta configurar conector")
75
+
116
76
  try:
117
- # Si no se proporcionaron filtros, usar el predeterminado
118
77
  if not filters:
119
78
  filters = {"filename_contains": ".pdf"}
120
79
 
121
80
  # Pasar metadata predefinida como parte del contexto al procesador
122
- # para que esté disponible en la función load_file
123
- extra_context = {}
81
+ # para que esté disponible en la función load_file_callback
82
+ context = {
83
+ 'company': company,
84
+ 'metadata': {}
85
+ }
86
+
124
87
  if predefined_metadata:
125
- extra_context['metadata'] = predefined_metadata
88
+ context['metadata'] = predefined_metadata
126
89
 
127
90
  # config the processor
128
91
  processor_config = FileProcessorConfig(
129
- context=extra_context,
92
+ callback=self.load_file_callback,
93
+ context=context,
130
94
  filters=filters,
131
- action=self.load_file,
132
95
  continue_on_error=True,
133
96
  echo=True
134
97
  )
@@ -144,14 +107,21 @@ class LoadDocumentsService:
144
107
  logging.exception("Loading files error: %s", str(e))
145
108
  return {"error": str(e)}
146
109
 
147
- # load an individual filename
148
- # this method is set up on the FileProcessorConfig object
149
- def load_file(self, filename: str, content: bytes, context: dict = {}, company: Company = None):
150
- if not company:
151
- company = self.company
110
+ def load_file_callback(self, company: Company, filename: str, content: bytes, context: dict = {}):
111
+ """
112
+ Processes a single file: extracts text, generates metadata, and saves it
113
+ to the relational database and the vector store.
114
+ This method is intended to be used as the 'action' for FileProcessor.
115
+
116
+ Args:
117
+ company (Company): The company associated with the file.
118
+ filename (str): The name of the file.
119
+ content (bytes): The binary content of the file.
120
+ context (dict, optional): A context dictionary, may contain predefined metadata.
121
+ """
152
122
 
153
123
  # check if file exist in repositories
154
- if self.doc_repo.get(company=company,filename=filename):
124
+ if self.doc_repo.get(company_id=company.id,filename=filename):
155
125
  return
156
126
 
157
127
  try: