iatoolkit 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iatoolkit might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iatoolkit
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: IAToolkit
5
5
  Author: Fernando Libedinsky
6
6
  License-Expression: MIT
@@ -10,13 +10,13 @@ iatoolkit/system_prompts/sql_rules.prompt,sha256=y4nURVnb9AyFwt-lrbMNBHHtZlhk6kC
10
10
  services/__init__.py,sha256=fSvSfIcPW1dHwTBY1hQ5dBEhaoorzk_GzR4G46gD8tY,173
11
11
  services/api_service.py,sha256=InIKTc64BWcp4U4tYKHz28x4ErPxIfvR9x3ZlxJZlXs,2911
12
12
  services/benchmark_service.py,sha256=0Vgsx_FaUZL7igoBYbe1AZkIWOiEUx1FSCV_0Ut0mtk,5921
13
- services/dispatcher_service.py,sha256=AOLsv9haFOxW5hV-6LOP8tryDdlfvjEGhNNGjgphH2c,15489
14
- services/document_service.py,sha256=sm5QtbrKs2dF9hpLuSLMB-IMWYNBD7yWHv3rd80aD0o,5960
13
+ services/dispatcher_service.py,sha256=j3Vm3vgDIgwMn9tF1BBHN3sY-V30XIkbHNcXVR0u-kY,15491
14
+ services/document_service.py,sha256=np8wjaFpS8kVgAeVr8JWzGHcdRl1S4vsOX-dxyaLP8E,5961
15
15
  services/excel_service.py,sha256=wE9Udbyb96kGRSnZZ6KM2mbE484rKjTEhta9GKKpy-8,3630
16
16
  services/file_processor_service.py,sha256=0CM4CQu6KKfcLVGkxs4hYxgdz8kKRWfkV5rDH9UoccM,4173
17
17
  services/history_service.py,sha256=6fGSSWxy60nxtkwp_fodwDHoVKhpIUbHnzAzUSiNi-Y,1657
18
18
  services/jwt_service.py,sha256=dC45Sn6FyzdzRiQJnzgkjN3Hy21V1imRxB0hTyWRvlA,3979
19
- services/load_documents_service.py,sha256=AtB5VnSjOEILG3wcQm6YiXkRrVvL87JWeRPgon81hf4,11064
19
+ services/load_documents_service.py,sha256=eDqi4Nr2K0BvHS4om07LL_wbFcyfJ4qIQiMULviZWsE,7098
20
20
  services/mail_service.py,sha256=ystFit1LuYUC4ekYYebyiy1rqYQmxeL6K8h58MxEkOY,2233
21
21
  services/profile_service.py,sha256=vZV0cregZQiPKYcNLaD7xjez2y6-3Mq97cDndC8NL8w,17922
22
22
  services/prompt_manager_service.py,sha256=bWG4SIgt0u45PVUfm0xRLbLfKC7bk6uozVHRdkdgCmc,7761
@@ -26,7 +26,7 @@ services/sql_service.py,sha256=H7CIPpXTcxLXLojD2fBFr_mIAD0PW1vEJhKHLfJi4Hk,1418
26
26
  services/tasks_service.py,sha256=hHJDlcsSOPtEleD6_Vv3pocfxWNmthIhmZSdnoWFpEM,6861
27
27
  services/user_feedback_service.py,sha256=YtCndRBekDEWYEbac431Ksn2gMO5iBrI3WqKK0xtShE,2513
28
28
  services/user_session_context_service.py,sha256=5qn7fqpuiU8KgMpU4M5-iRUsETumz1raBw-EeZLuE1A,3868
29
- iatoolkit-0.4.1.dist-info/METADATA,sha256=rzRfT16CeRWOOnJorFb5kNJ3W0U98yKb1FZiB_L6p8M,9300
30
- iatoolkit-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- iatoolkit-0.4.1.dist-info/top_level.txt,sha256=dqlBbmgo9okD9d_WMR9uYzdup7Rxgj26yFF85jRGeu4,19
32
- iatoolkit-0.4.1.dist-info/RECORD,,
29
+ iatoolkit-0.4.2.dist-info/METADATA,sha256=F2KitiTXfL4FN4nIp7ebGr36828ZVRO5QFT5Bvxmfg8,9300
30
+ iatoolkit-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
+ iatoolkit-0.4.2.dist-info/top_level.txt,sha256=dqlBbmgo9okD9d_WMR9uYzdup7Rxgj26yFF85jRGeu4,19
32
+ iatoolkit-0.4.2.dist-info/RECORD,,
@@ -109,7 +109,7 @@ class Dispatcher:
109
109
  prompt_name=prompt['name'],
110
110
  description=prompt['description'],
111
111
  order=1,
112
- is_system_prompt=True
112
+ is_system_prompt=True,
113
113
  )
114
114
  i += 1
115
115
 
@@ -117,7 +117,7 @@ class Dispatcher:
117
117
  for company in self.company_instances.values():
118
118
  company.register_company()
119
119
 
120
- def dispatch(self, company_name: str, action: str, **kwargs) -> str:
120
+ def dispatch(self, company_name: str, action: str, **kwargs) -> dict:
121
121
  company_key = company_name.lower()
122
122
 
123
123
  if company_key not in self.company_instances:
@@ -16,7 +16,7 @@ class DocumentService:
16
16
  @inject
17
17
  def __init__(self):
18
18
  # max number of pages to load
19
- self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "10"))
19
+ self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "200"))
20
20
 
21
21
  def file_to_txt(self, filename, file_content):
22
22
  try:
@@ -52,103 +52,16 @@ class LoadDocumentsService:
52
52
  separators=["\n\n", "\n", "."]
53
53
  )
54
54
 
55
- # load the files for all of the companies.
56
- def load(self, doc_type: str = None):
55
+ def load_company_files(self,
56
+ company: Company,
57
+ connector_config: Dict,
58
+ predefined_metadata: Dict = None,
59
+ filters: Dict = None):
57
60
  """
58
- Loads documents for all companies based on their configuration.
59
- It can load all document types or a specific one if provided.
60
-
61
- Args:
62
- doc_type (str, optional): A specific document type to load.
63
- If None, all configured types are loaded.
64
-
65
- Returns:
66
- Dict: A dictionary with a summary message.
67
- """
68
- # doc_type: an optional document_type for loading
69
- files_loaded = 0
70
- companies = self.profile_repo.get_companies()
71
-
72
- for company in companies:
73
- load_config = company.parameters.get('load', {})
74
- if not load_config:
75
- continue
76
-
77
- print(f"Cargando datos de ** {company.short_name} **")
78
-
79
- # Si hay configuraciones de tipos de documento específicos
80
- doc_types_config = load_config.get('document_types', {})
81
-
82
- if doc_types_config and len(doc_types_config) > 0:
83
- # Si se especificó un tipo de documento, cargar solo ese tipo
84
- if doc_type and doc_type in doc_types_config:
85
- files_loaded += self._load_document_type(company, doc_type, doc_types_config[doc_type])
86
- # Si no se especificó, cargar todos los tipos configurados
87
- elif not doc_type:
88
- for type_name, type_config in doc_types_config.items():
89
- files_loaded += self._load_document_type(company, type_name, type_config)
90
- else:
91
- # Comportamiento anterior: usar la configuración general
92
- connector = load_config.get('connector', {})
93
- if not connector:
94
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
95
- f"Falta configurar conector en empresa {company.short_name}")
96
-
97
- files_loaded += self.load_data_source(company=company,
98
- connector_config=connector)
99
-
100
- return {'message': f'{files_loaded} files processed'}
101
-
102
- def load_company_files(self, company: Company,
103
- connector: dict,
104
- predefined_metadata: Dict = None,
105
- filters: Dict = None):
106
- """
107
- Loads all files for a specific company using a given connector.
61
+ Loads all the company files from a connector
108
62
 
109
63
  Args:
110
64
  company (Company): The company to load files for.
111
- connector (dict): The connector configuration.
112
-
113
- Returns:
114
- Dict: A dictionary with a summary message.
115
- """
116
- if not connector:
117
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
118
- f"Falta configurar conector")
119
-
120
- files_loaded = self.load_data_source(
121
- company=company,
122
- connector_config=connector,
123
- predefined_metadata=predefined_metadata,
124
- filters=filters)
125
-
126
- return {'message': f'{files_loaded} files processed'}
127
-
128
- def _load_document_type(self, company: Company, doc_type_name: str, type_config: Dict) -> int:
129
- # load specific document_types for a company
130
- connector = type_config.get('connector')
131
- if not connector:
132
- logging.warning(f"Falta configurar conector para tipo {doc_type_name} en empresa {company.short_name}")
133
- raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
134
- f"Falta configurar conector para tipo {doc_type_name} en empresa {company.short_name}")
135
-
136
- # get the metadata for this connector
137
- predefined_metadata = type_config.get('metadata', {})
138
-
139
- # config specific filters
140
- filters = type_config.get('filters', {"filename_contains": ".pdf"})
141
-
142
- return self.load_data_source(company=company,
143
- connector_config=connector,
144
- predefined_metadata=predefined_metadata,
145
- filters=filters)
146
-
147
- def load_data_source(self, company: Company, connector_config: Dict, predefined_metadata: Dict = None, filters: Dict = None):
148
- """
149
- Loads files from a data source using a connector and a FileProcessor.
150
-
151
- Args:
152
65
  connector_config (Dict): The configuration for the file connector.
153
66
  predefined_metadata (Dict, optional): Metadata to be added to all documents from this source.
154
67
  filters (Dict, optional): Filters to apply to the files.
@@ -156,6 +69,10 @@ class LoadDocumentsService:
156
69
  Returns:
157
70
  int: The number of processed files.
158
71
  """
72
+ if not connector_config:
73
+ raise IAToolkitException(IAToolkitException.ErrorType.MISSING_PARAMETER,
74
+ f"Falta configurar conector")
75
+
159
76
  try:
160
77
  if not filters:
161
78
  filters = {"filename_contains": ".pdf"}