iatoolkit 0.7.5__py3-none-any.whl → 0.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of iatoolkit might be problematic. Click here for more details.
- {iatoolkit-0.7.5.dist-info → iatoolkit-0.7.6.dist-info}/METADATA +1 -1
- iatoolkit-0.7.6.dist-info/RECORD +80 -0
- {iatoolkit-0.7.5.dist-info → iatoolkit-0.7.6.dist-info}/top_level.txt +3 -0
- infra/__init__.py +5 -0
- infra/call_service.py +140 -0
- infra/connectors/__init__.py +5 -0
- infra/connectors/file_connector.py +17 -0
- infra/connectors/file_connector_factory.py +57 -0
- infra/connectors/google_cloud_storage_connector.py +53 -0
- infra/connectors/google_drive_connector.py +68 -0
- infra/connectors/local_file_connector.py +46 -0
- infra/connectors/s3_connector.py +33 -0
- infra/gemini_adapter.py +356 -0
- infra/google_chat_app.py +57 -0
- infra/llm_client.py +430 -0
- infra/llm_proxy.py +139 -0
- infra/llm_response.py +40 -0
- infra/mail_app.py +145 -0
- infra/openai_adapter.py +90 -0
- infra/redis_session_manager.py +76 -0
- repositories/__init__.py +5 -0
- repositories/database_manager.py +95 -0
- repositories/document_repo.py +33 -0
- repositories/llm_query_repo.py +91 -0
- repositories/models.py +309 -0
- repositories/profile_repo.py +118 -0
- repositories/tasks_repo.py +52 -0
- repositories/vs_repo.py +139 -0
- views/__init__.py +5 -0
- views/change_password_view.py +91 -0
- views/chat_token_request_view.py +98 -0
- views/chat_view.py +51 -0
- views/download_file_view.py +58 -0
- views/external_chat_login_view.py +88 -0
- views/external_login_view.py +40 -0
- views/file_store_view.py +58 -0
- views/forgot_password_view.py +64 -0
- views/history_view.py +57 -0
- views/home_view.py +34 -0
- views/llmquery_view.py +65 -0
- views/login_view.py +60 -0
- views/prompt_view.py +37 -0
- views/signup_view.py +87 -0
- views/tasks_review_view.py +83 -0
- views/tasks_view.py +98 -0
- views/user_feedback_view.py +74 -0
- views/verify_user_view.py +55 -0
- iatoolkit-0.7.5.dist-info/RECORD +0 -36
- {iatoolkit-0.7.5.dist-info → iatoolkit-0.7.6.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
common/auth.py,sha256=kKBvZoIm8RPpPFZ6KEm1oowHJfJwSO0mf10Yac8SG9k,8456
|
|
3
|
+
common/exceptions.py,sha256=EXx40n5htp7UiOM6P1xfJ9U6NMcADqm62dlFaKz7ICU,1154
|
|
4
|
+
common/routes.py,sha256=_1ywiXorwEeJ8IAamQwSxU9z8jlAAL_U64tMb01-zFg,4188
|
|
5
|
+
common/session_manager.py,sha256=7D_RuJs60w-1zDr3fOGEz9JW7IZlSXuUHgUT87CzaUo,472
|
|
6
|
+
common/util.py,sha256=5zw1wL_FdPBibGJbQAl1kozgTjUpsOZlPqT3tbWsHp4,15515
|
|
7
|
+
iatoolkit/__init__.py,sha256=GkFxAQHKPifz4Kd8M73Rc8TWRVIxjxkl1N0nsPvb_sU,1743
|
|
8
|
+
iatoolkit/base_company.py,sha256=WmD4o0qFC1n5DW5eRsRsuNfaGot9nxGFcJe3LmibSuE,4259
|
|
9
|
+
iatoolkit/cli_commands.py,sha256=BGuThg19eoSssrBJIqzBNaWpMmyy7u4yRUz0JA7d-vc,2270
|
|
10
|
+
iatoolkit/company_registry.py,sha256=tduqt3oV8iDX_IB1eA7KIgvIxE4edTcy-3qZIXh3Lzw,2549
|
|
11
|
+
iatoolkit/iatoolkit.py,sha256=bbwLkLS_xZGNS0Dki3zX73RhAYMS32IshXDIdr_RVXg,15776
|
|
12
|
+
iatoolkit/system_prompts/format_styles.prompt,sha256=MSMe1qvR3cF_0IbFshn8R0z6Wx6VCHQq1p37rpu5wwk,3576
|
|
13
|
+
iatoolkit/system_prompts/query_main.prompt,sha256=w_9ybgWgiQH4V_RbAXqsvz0M7oOuiyhxcwf-D0CgfA4,3017
|
|
14
|
+
iatoolkit/system_prompts/sql_rules.prompt,sha256=y4nURVnb9AyFwt-lrbMNBHHtZlhk6kC9grYoOhRnrJo,59174
|
|
15
|
+
infra/__init__.py,sha256=5JqK9sZ6jBuK83zDQokUhxQ0wuJJJ9DXB8pYCLkX7X4,102
|
|
16
|
+
infra/call_service.py,sha256=170qCNJb_9caUiEi8PprQaqSUS3Rj7XmU917J2-m62I,4919
|
|
17
|
+
infra/gemini_adapter.py,sha256=MLesq8Oyx_Jd6-cSA_41zC1UtDM6CMino054NEybOJM,15031
|
|
18
|
+
infra/google_chat_app.py,sha256=XHyFQyX92ztpJQLvNx4mKPjbR2x7_15t_dcpVqW25ZQ,1927
|
|
19
|
+
infra/llm_client.py,sha256=cN1uaZxOP7H8LkmFpSpQMesBH3Pg1LKJuent4r1yovg,18548
|
|
20
|
+
infra/llm_proxy.py,sha256=qBird4-k3Z8ABECvnWXbljXCQWx2q6RTmylJZQLDk4U,5684
|
|
21
|
+
infra/llm_response.py,sha256=YUUQPBHzmP3Ce6-t0kKMRIpowvh7de1odSoefEByIvI,904
|
|
22
|
+
infra/mail_app.py,sha256=eWSOz5uWf-KmZoUJbYFPad-0uWP2LekINAamDYbxYpg,6093
|
|
23
|
+
infra/openai_adapter.py,sha256=roPrp-DJWcHHhOXdUttsgwfhJpMhHNy1RpkdzVKQMFo,3510
|
|
24
|
+
infra/redis_session_manager.py,sha256=Dkwii8fns0_Vd9hm9GmotmrKtSCxVps868U5sqj7FAE,2278
|
|
25
|
+
infra/connectors/__init__.py,sha256=5JqK9sZ6jBuK83zDQokUhxQ0wuJJJ9DXB8pYCLkX7X4,102
|
|
26
|
+
infra/connectors/file_connector.py,sha256=HOjRTFd-WfDOcFyvHncAhnGNZuFgChIwC-P6osPo9ZM,352
|
|
27
|
+
infra/connectors/file_connector_factory.py,sha256=xa4hphbeNVt0l4nn_mNbBT31lgn-OMlIQ8TtXpgN42M,2068
|
|
28
|
+
infra/connectors/google_cloud_storage_connector.py,sha256=5KDZlT7ndgkqFOC3SebrmSQpP2zSb3KnSBVXWWt7Alg,2040
|
|
29
|
+
infra/connectors/google_drive_connector.py,sha256=MyRECNWvLf5j-YvNwokcwihRS1WhZnBDUjx8tSup2L0,2527
|
|
30
|
+
infra/connectors/local_file_connector.py,sha256=FO9_HFGKRE_GQqOjyWuIHyNFXYUgro39CxxqEcXUq1c,1719
|
|
31
|
+
infra/connectors/s3_connector.py,sha256=M91gWnOgyCYIsoB0xka2oSaOTL4gDgjfjHwZi20jgnw,1113
|
|
32
|
+
repositories/__init__.py,sha256=5JqK9sZ6jBuK83zDQokUhxQ0wuJJJ9DXB8pYCLkX7X4,102
|
|
33
|
+
repositories/database_manager.py,sha256=UWOZhbUjNQV0yoL9xevuAnxV2OuBkMaxu5nqaqMuMQE,3098
|
|
34
|
+
repositories/document_repo.py,sha256=dq41jj5hBOd6mRKdaSRMhyiHk12ukX43njVyLuuGl44,1094
|
|
35
|
+
repositories/llm_query_repo.py,sha256=NF8Ybye_sMVvzeQGa_5MGJup0AFvl_P1Z0oCsr2Wz0Y,3592
|
|
36
|
+
repositories/models.py,sha256=RJH0dtq2VIDJFaFDufV05NUnNW4WtNpdTjhONtfLyCw,13179
|
|
37
|
+
repositories/profile_repo.py,sha256=JWbClu9SM18koUceLQdeuT5DA37h6ij5aeK-nwoRgIk,4042
|
|
38
|
+
repositories/tasks_repo.py,sha256=cCg4uz8ou4tkx7yzmktEV4lfQjQBU-RylcqUof2dytY,1725
|
|
39
|
+
repositories/vs_repo.py,sha256=PP6iPf0rt64rMyvqg-ffJk__sLbT2wgxx3dta3o8Vuo,5270
|
|
40
|
+
services/__init__.py,sha256=5JqK9sZ6jBuK83zDQokUhxQ0wuJJJ9DXB8pYCLkX7X4,102
|
|
41
|
+
services/benchmark_service.py,sha256=_ruKh9YzrTLtR0ZKrRNxqJQW0HdbwWuFz1gfLzJ9owA,5850
|
|
42
|
+
services/dispatcher_service.py,sha256=y2J-TfrkoCcsOESr3E6E3q5bleJfRibT4ONxs9AxUSg,13959
|
|
43
|
+
services/document_service.py,sha256=8MsJz0pihM9E9Z92PrPqDgQnpMAmpFrbogXr5HscMWM,5926
|
|
44
|
+
services/excel_service.py,sha256=ATPaeAvkLwQAkPZ3AKIUpO73RVyRg0D8c6i37_mcql0,3559
|
|
45
|
+
services/file_processor_service.py,sha256=98yWYF7nIq1nm7nh6IzMmTKaOMTIeqCFWYwXVtV-ZJI,4102
|
|
46
|
+
services/history_service.py,sha256=j0QCqcIIyw7DBy3GrZrEZNk0I4m-uuRoG5g0Z2RCcOE,1586
|
|
47
|
+
services/jwt_service.py,sha256=YoZ9h7_o9xBko-arNQv4MbcwnxoSWVNj4VbZmMo_QGY,3908
|
|
48
|
+
services/load_documents_service.py,sha256=UGfomYz7seWFXawbDuk2t6CyoEr1vggR8vmrCUAeLBg,7190
|
|
49
|
+
services/mail_service.py,sha256=_67pctxZO46DHnWBip51ayuYtWd4bEoS1kg29ACO7_I,2162
|
|
50
|
+
services/profile_service.py,sha256=vTK9TvH_2AFdqgL3sGjhck9LyLGIbvdC2USoaRx82G8,17561
|
|
51
|
+
services/prompt_manager_service.py,sha256=7SMC6N_T4BP4m5-ZNYAL3Y2YWHwl1bytXSgnEqu5bWI,8301
|
|
52
|
+
services/query_service.py,sha256=gvUnq0Vpn4gv5ycQk8-fklh7NDFIXpO1Vt1lT2ugO6Q,15283
|
|
53
|
+
services/search_service.py,sha256=bB3FWFxJi1iYsOdBxyu9tzIO406nQxcyeQzEowpmpjY,1803
|
|
54
|
+
services/sql_service.py,sha256=s84K1ADlvMtum949wgMh8jsmqlOUeL-m_SWfAM4Wsv4,2141
|
|
55
|
+
services/tasks_service.py,sha256=1DdbERlAxIkCpGEylnHDKC-KAsXRJugbaRSzRbPfL58,6790
|
|
56
|
+
services/user_feedback_service.py,sha256=_LeNBYz4hHFapXfYTQVfkkD34gE8j2UeKnyOZ8H0nWo,2442
|
|
57
|
+
services/user_session_context_service.py,sha256=GluNSgqP6W_hFke4oslSnfGnU_b-ph28BHH6jf3EIm0,3797
|
|
58
|
+
views/__init__.py,sha256=5JqK9sZ6jBuK83zDQokUhxQ0wuJJJ9DXB8pYCLkX7X4,102
|
|
59
|
+
views/change_password_view.py,sha256=KY6mf25Vc-rJn8pTLuJlkIHB7j9_NI0UoxIWod32qpM,3930
|
|
60
|
+
views/chat_token_request_view.py,sha256=7Bgf7rOVdgo4u4nU4lZWk87wi_D9_-sYxB9zBuL8TcA,4388
|
|
61
|
+
views/chat_view.py,sha256=XvKJApeJPU9uoH0XpHusehNbge3dDwwr72iMcuBveBY,2048
|
|
62
|
+
views/download_file_view.py,sha256=AIHLURyPi7w0k_Rs_fqz0yOp1Hav8K73AC5Bxx4vzf4,1980
|
|
63
|
+
views/external_chat_login_view.py,sha256=pz8EPmTSrYHpr3WbfYYBbPvSKaDJ9taOro4bMqTAZZE,3732
|
|
64
|
+
views/external_login_view.py,sha256=2NPzdxIqw1zvRHcsKOVvL896mE9B4GCUI6gBW7LsZCU,1335
|
|
65
|
+
views/file_store_view.py,sha256=591Kc2QR7SdxMrcJcL1ciMEwRopCpqYbmODL4XZ2SU8,1913
|
|
66
|
+
views/forgot_password_view.py,sha256=7W2VP3nZlUdgGLv-zc7Pj5wa7OsT3K66t6fkntrSPzI,2705
|
|
67
|
+
views/history_view.py,sha256=nkvV6Dg6SrVWZsWQJaCpy7D9VygwXDQF5FVanhzHdeo,2044
|
|
68
|
+
views/home_view.py,sha256=lQvDYUJ1X-xu0GuJcZJwF9iW74d834_q8fCkLgqo6j4,1238
|
|
69
|
+
views/llmquery_view.py,sha256=_CsbtFQFRuGCe50TWJDtTSUyaM3kMMwkn54kVuxMEQw,2412
|
|
70
|
+
views/login_view.py,sha256=s2FQPo8EuJCWPoC2hs1Wyqh8rY6Dc_5HmiL9b9GLhho,2322
|
|
71
|
+
views/prompt_view.py,sha256=ARuZ64EYjBhMAClUvjeFcr7zcgZUt2rEktbUvqpiE2U,1237
|
|
72
|
+
views/signup_view.py,sha256=Jn2F0gGBDmyUYt4vo0fw2UMHoJblau-Be62zNAMas50,3795
|
|
73
|
+
views/tasks_review_view.py,sha256=nvxdUJowfpgrJk0m8quxYg4iw5BZOhZhpDpPt5-wsDI,3368
|
|
74
|
+
views/tasks_view.py,sha256=TWW48wzcJzqIPat1kSRx41Et92-wLr-wCkgypWYZ7dk,4090
|
|
75
|
+
views/user_feedback_view.py,sha256=inZoNl5YnEq9_2FXOVvzKymXuSPELLmi9zPNG3zQxEQ,2632
|
|
76
|
+
views/verify_user_view.py,sha256=0hRKIudox7KjxZDH973trUhw4fZtc7eiLz6R44QbIG8,2341
|
|
77
|
+
iatoolkit-0.7.6.dist-info/METADATA,sha256=ewI228_gn4LwbqX0Dmd39uey9MCnkMRxpsL6Dcs5ujg,9300
|
|
78
|
+
iatoolkit-0.7.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
iatoolkit-0.7.6.dist-info/top_level.txt,sha256=ZXCST2AUsVPIeMiz49ZFn7WEfIiO4d8BfWY9QzUqP0s,51
|
|
80
|
+
iatoolkit-0.7.6.dist-info/RECORD,,
|
infra/__init__.py
ADDED
infra/call_service.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from common.exceptions import IAToolkitException
|
|
7
|
+
from injector import inject
|
|
8
|
+
# call_service.py
|
|
9
|
+
import requests
|
|
10
|
+
from typing import Optional, Dict, Any, Tuple, Union
|
|
11
|
+
from requests import RequestException
|
|
12
|
+
|
|
13
|
+
class CallServiceClient:
|
|
14
|
+
@inject
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.headers = {'Content-Type': 'application/json'}
|
|
17
|
+
|
|
18
|
+
def _merge_headers(self, extra: Optional[Dict[str, str]]) -> Dict[str, str]:
|
|
19
|
+
if not extra:
|
|
20
|
+
return dict(self.headers)
|
|
21
|
+
merged = dict(self.headers)
|
|
22
|
+
merged.update(extra)
|
|
23
|
+
return merged
|
|
24
|
+
|
|
25
|
+
def _normalize_timeout(self, timeout: Union[int, float, Tuple[int, int], Tuple[float, float]]) -> Tuple[float, float]:
|
|
26
|
+
# Si pasan un solo número → (connect, read) = (10, timeout)
|
|
27
|
+
if isinstance(timeout, (int, float)):
|
|
28
|
+
return (10, float(timeout))
|
|
29
|
+
return (float(timeout[0]), float(timeout[1]))
|
|
30
|
+
|
|
31
|
+
def _deserialize_response(self, response) -> Tuple[Any, int]:
|
|
32
|
+
try:
|
|
33
|
+
return response.json(), response.status_code
|
|
34
|
+
except ValueError:
|
|
35
|
+
# No es JSON → devolver texto
|
|
36
|
+
return response.text, response.status_code
|
|
37
|
+
|
|
38
|
+
def get(
|
|
39
|
+
self,
|
|
40
|
+
endpoint: str,
|
|
41
|
+
params: Optional[Dict[str, Any]] = None,
|
|
42
|
+
headers: Optional[Dict[str, str]] = None,
|
|
43
|
+
timeout: Union[int, float, Tuple[int, int]] = 10
|
|
44
|
+
):
|
|
45
|
+
try:
|
|
46
|
+
response = requests.get(
|
|
47
|
+
endpoint,
|
|
48
|
+
params=params,
|
|
49
|
+
headers=self._merge_headers(headers),
|
|
50
|
+
timeout=self._normalize_timeout(timeout)
|
|
51
|
+
)
|
|
52
|
+
except RequestException as e:
|
|
53
|
+
raise IAToolkitException(IAToolkitException.ErrorType.REQUEST_ERROR, str(e))
|
|
54
|
+
return self._deserialize_response(response)
|
|
55
|
+
|
|
56
|
+
def post(
|
|
57
|
+
self,
|
|
58
|
+
endpoint: str,
|
|
59
|
+
json_dict: Optional[Dict[str, Any]] = None,
|
|
60
|
+
params: Optional[Dict[str, Any]] = None,
|
|
61
|
+
headers: Optional[Dict[str, str]] = None,
|
|
62
|
+
timeout: Union[int, float, Tuple[int, int]] = 10
|
|
63
|
+
):
|
|
64
|
+
try:
|
|
65
|
+
response = requests.post(
|
|
66
|
+
endpoint,
|
|
67
|
+
params=params,
|
|
68
|
+
json=json_dict,
|
|
69
|
+
headers=self._merge_headers(headers),
|
|
70
|
+
timeout=self._normalize_timeout(timeout)
|
|
71
|
+
)
|
|
72
|
+
except RequestException as e:
|
|
73
|
+
raise IAToolkitException(IAToolkitException.ErrorType.REQUEST_ERROR, str(e))
|
|
74
|
+
return self._deserialize_response(response)
|
|
75
|
+
|
|
76
|
+
def put(
|
|
77
|
+
self,
|
|
78
|
+
endpoint: str,
|
|
79
|
+
json_dict: Optional[Dict[str, Any]] = None,
|
|
80
|
+
params: Optional[Dict[str, Any]] = None,
|
|
81
|
+
headers: Optional[Dict[str, str]] = None,
|
|
82
|
+
timeout: Union[int, float, Tuple[int, int]] = 10
|
|
83
|
+
):
|
|
84
|
+
try:
|
|
85
|
+
response = requests.put(
|
|
86
|
+
endpoint,
|
|
87
|
+
params=params,
|
|
88
|
+
json=json_dict,
|
|
89
|
+
headers=self._merge_headers(headers),
|
|
90
|
+
timeout=self._normalize_timeout(timeout)
|
|
91
|
+
)
|
|
92
|
+
except RequestException as e:
|
|
93
|
+
raise IAToolkitException(IAToolkitException.ErrorType.REQUEST_ERROR, str(e))
|
|
94
|
+
return self._deserialize_response(response)
|
|
95
|
+
|
|
96
|
+
def delete(
|
|
97
|
+
self,
|
|
98
|
+
endpoint: str,
|
|
99
|
+
json_dict: Optional[Dict[str, Any]] = None,
|
|
100
|
+
params: Optional[Dict[str, Any]] = None,
|
|
101
|
+
headers: Optional[Dict[str, str]] = None,
|
|
102
|
+
timeout: Union[int, float, Tuple[int, int]] = 10
|
|
103
|
+
):
|
|
104
|
+
try:
|
|
105
|
+
response = requests.delete(
|
|
106
|
+
endpoint,
|
|
107
|
+
params=params,
|
|
108
|
+
json=json_dict,
|
|
109
|
+
headers=self._merge_headers(headers),
|
|
110
|
+
timeout=self._normalize_timeout(timeout)
|
|
111
|
+
)
|
|
112
|
+
except RequestException as e:
|
|
113
|
+
raise IAToolkitException(IAToolkitException.ErrorType.REQUEST_ERROR, str(e))
|
|
114
|
+
return self._deserialize_response(response)
|
|
115
|
+
|
|
116
|
+
def post_files(
|
|
117
|
+
self,
|
|
118
|
+
endpoint: str,
|
|
119
|
+
data: Dict[str, Any],
|
|
120
|
+
params: Optional[Dict[str, Any]] = None,
|
|
121
|
+
headers: Optional[Dict[str, str]] = None,
|
|
122
|
+
timeout: Union[int, float, Tuple[int, int]] = 10
|
|
123
|
+
):
|
|
124
|
+
# Para multipart/form-data no imponemos Content-Type por defecto
|
|
125
|
+
merged_headers = dict(self.headers)
|
|
126
|
+
merged_headers.pop('Content-Type', None)
|
|
127
|
+
if headers:
|
|
128
|
+
merged_headers.update(headers)
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
response = requests.post(
|
|
132
|
+
endpoint,
|
|
133
|
+
params=params,
|
|
134
|
+
files=data,
|
|
135
|
+
headers=merged_headers,
|
|
136
|
+
timeout=self._normalize_timeout(timeout)
|
|
137
|
+
)
|
|
138
|
+
except RequestException as e:
|
|
139
|
+
raise IAToolkitException(IAToolkitException.ErrorType.REQUEST_ERROR, str(e))
|
|
140
|
+
return self._deserialize_response(response)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileConnector(ABC):
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def list_files(self) -> List[str]:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def get_file_content(self, file_path: str) -> bytes:
|
|
17
|
+
pass
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from infra.connectors.file_connector import FileConnector
|
|
7
|
+
from infra.connectors.local_file_connector import LocalFileConnector
|
|
8
|
+
from infra.connectors.s3_connector import S3Connector
|
|
9
|
+
from infra.connectors.google_drive_connector import GoogleDriveConnector
|
|
10
|
+
from infra.connectors.google_cloud_storage_connector import GoogleCloudStorageConnector
|
|
11
|
+
from typing import Dict
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FileConnectorFactory:
|
|
16
|
+
@staticmethod
|
|
17
|
+
def create(config: Dict) -> FileConnector:
|
|
18
|
+
"""
|
|
19
|
+
Configuración esperada:
|
|
20
|
+
{
|
|
21
|
+
"type": "local" | "s3" | "gdrive" | "gcs",
|
|
22
|
+
"path": "/ruta/local", # solo para local
|
|
23
|
+
"bucket": "mi-bucket", "prefix": "datos/", "auth": {...}, # solo para S3
|
|
24
|
+
"folder_id": "xxxxxxx", # solo para Google Drive
|
|
25
|
+
"bucket": "mi-bucket", "service_account": "/ruta/service_account.json" # solo para GCS
|
|
26
|
+
}
|
|
27
|
+
"""
|
|
28
|
+
connector_type = config.get('type')
|
|
29
|
+
|
|
30
|
+
if connector_type == 'local':
|
|
31
|
+
return LocalFileConnector(config['path'])
|
|
32
|
+
|
|
33
|
+
elif connector_type == 's3':
|
|
34
|
+
auth = {
|
|
35
|
+
'aws_access_key_id': os.getenv('AWS_ACCESS_KEY_ID'),
|
|
36
|
+
'aws_secret_access_key': os.getenv('AWS_SECRET_ACCESS_KEY'),
|
|
37
|
+
'region_name': os.getenv('AWS_REGION', 'us-east-1')
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return S3Connector(
|
|
41
|
+
bucket=config['bucket'],
|
|
42
|
+
prefix=config.get('prefix', ''),
|
|
43
|
+
auth=auth
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
elif connector_type == 'gdrive':
|
|
47
|
+
return GoogleDriveConnector(config['folder_id'],
|
|
48
|
+
'service_account.json')
|
|
49
|
+
|
|
50
|
+
elif connector_type == 'gcs':
|
|
51
|
+
return GoogleCloudStorageConnector(
|
|
52
|
+
bucket_name=config['bucket'],
|
|
53
|
+
service_account_path=config.get('service_account', 'service_account.json')
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
else:
|
|
57
|
+
raise ValueError(f"Unknown connector type: {connector_type}")
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from infra.connectors.file_connector import FileConnector
|
|
7
|
+
from google.cloud import storage
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GoogleCloudStorageConnector(FileConnector):
|
|
12
|
+
def __init__(self, bucket_name: str, service_account_path: str = "service_account.json"):
|
|
13
|
+
"""
|
|
14
|
+
Inicializa el conector de Google Cloud Storage utilizando la API oficial de Google.
|
|
15
|
+
:param bucket_name: Nombre del bucket en Google Cloud Storage.
|
|
16
|
+
:param service_account_path: Ruta al archivo JSON de la cuenta de servicio.
|
|
17
|
+
"""
|
|
18
|
+
self.bucket_name = bucket_name
|
|
19
|
+
self.service_account_path = service_account_path
|
|
20
|
+
self.storage_client = self._authenticate()
|
|
21
|
+
self.bucket = self.storage_client.bucket(bucket_name)
|
|
22
|
+
|
|
23
|
+
def _authenticate(self):
|
|
24
|
+
"""
|
|
25
|
+
Autentica en Google Cloud Storage utilizando una cuenta de servicio.
|
|
26
|
+
"""
|
|
27
|
+
# Crear cliente de GCS con las credenciales
|
|
28
|
+
client = storage.Client.from_service_account_json(self.service_account_path)
|
|
29
|
+
return client
|
|
30
|
+
|
|
31
|
+
def list_files(self) -> List[dict]:
|
|
32
|
+
"""
|
|
33
|
+
Lista todos los archivos en el bucket de GCS como diccionarios con claves 'path', 'name' y 'metadata'.
|
|
34
|
+
"""
|
|
35
|
+
blobs = self.bucket.list_blobs()
|
|
36
|
+
|
|
37
|
+
return [
|
|
38
|
+
{
|
|
39
|
+
"path": blob.name, # Nombre o "ruta" del blob en el bucket
|
|
40
|
+
"name": blob.name.split("/")[-1], # Nombre del archivo (última parte del path)
|
|
41
|
+
"metadata": {"size": blob.size} # Incluye tamaño como metadata (u otros metadatos relevantes)
|
|
42
|
+
}
|
|
43
|
+
for blob in blobs
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
def get_file_content(self, file_path: str) -> bytes:
|
|
47
|
+
"""
|
|
48
|
+
Descarga el contenido de un archivo en GCS dado su path (nombre del blob).
|
|
49
|
+
"""
|
|
50
|
+
blob = self.bucket.blob(file_path)
|
|
51
|
+
file_buffer = blob.download_as_bytes() # Descarga el contenido como bytes
|
|
52
|
+
|
|
53
|
+
return file_buffer
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
from infra.connectors.file_connector import FileConnector
|
|
7
|
+
from googleapiclient.discovery import build
|
|
8
|
+
from googleapiclient.http import MediaIoBaseDownload
|
|
9
|
+
from google.oauth2.service_account import Credentials
|
|
10
|
+
import io
|
|
11
|
+
from typing import List
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GoogleDriveConnector(FileConnector):
|
|
15
|
+
def __init__(self, folder_id: str, service_account_path: str = "service_account.json"):
|
|
16
|
+
"""
|
|
17
|
+
Inicializa el conector de Google Drive utilizando la API oficial de Google.
|
|
18
|
+
:param folder_id: ID de la carpeta en Google Drive.
|
|
19
|
+
:param service_account_path: Ruta al archivo JSON de la cuenta de servicio.
|
|
20
|
+
"""
|
|
21
|
+
self.folder_id = folder_id
|
|
22
|
+
self.service_account_path = service_account_path
|
|
23
|
+
self.drive_service = self._authenticate()
|
|
24
|
+
|
|
25
|
+
def _authenticate(self):
|
|
26
|
+
"""
|
|
27
|
+
Autentica en Google Drive utilizando una cuenta de servicio.
|
|
28
|
+
"""
|
|
29
|
+
# Cargar credenciales desde el archivo de servicio
|
|
30
|
+
credentials = Credentials.from_service_account_file(
|
|
31
|
+
self.service_account_path,
|
|
32
|
+
scopes=["https://www.googleapis.com/auth/drive"]
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Crear el cliente de Google Drive API
|
|
36
|
+
service = build('drive', 'v3', credentials=credentials)
|
|
37
|
+
return service
|
|
38
|
+
|
|
39
|
+
def list_files(self) -> List[dict]:
|
|
40
|
+
"""
|
|
41
|
+
Estándar: Lista todos los archivos como diccionarios con claves 'path', 'name' y 'metadata'.
|
|
42
|
+
"""
|
|
43
|
+
query = f"'{self.folder_id}' in parents and trashed=false"
|
|
44
|
+
results = self.drive_service.files().list(q=query, fields="files(id, name)").execute()
|
|
45
|
+
files = results.get('files', [])
|
|
46
|
+
|
|
47
|
+
return [
|
|
48
|
+
{
|
|
49
|
+
"path": file['id'], # ID único del archivo en Google Drive
|
|
50
|
+
"name": file['name'], # Nombre del archivo en Google Drive
|
|
51
|
+
"metadata": {} # No hay metadatos adicionales en este caso
|
|
52
|
+
}
|
|
53
|
+
for file in files
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
def get_file_content(self, file_path: str) -> bytes:
|
|
57
|
+
"""
|
|
58
|
+
Obtiene el contenido de un archivo en Google Drive utilizando su ID (file_path).
|
|
59
|
+
"""
|
|
60
|
+
request = self.drive_service.files().get_media(fileId=file_path)
|
|
61
|
+
file_buffer = io.BytesIO()
|
|
62
|
+
downloader = MediaIoBaseDownload(file_buffer, request)
|
|
63
|
+
|
|
64
|
+
done = False
|
|
65
|
+
while not done:
|
|
66
|
+
status, done = downloader.next_chunk()
|
|
67
|
+
|
|
68
|
+
return file_buffer.getvalue()
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from infra.connectors.file_connector import FileConnector
|
|
8
|
+
from typing import List
|
|
9
|
+
from common.exceptions import IAToolkitException
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LocalFileConnector(FileConnector):
|
|
13
|
+
def __init__(self, directory: str):
|
|
14
|
+
local_root = os.getenv("ROOT_DIR_LOCAL_FILES", '')
|
|
15
|
+
self.directory = os.path.join(local_root, directory)
|
|
16
|
+
|
|
17
|
+
def list_files(self) -> List[dict]:
|
|
18
|
+
"""
|
|
19
|
+
Estándar: Lista todos los archivos como diccionarios con claves 'path', 'name' y 'metadata'.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
files = [
|
|
23
|
+
os.path.join(self.directory, f)
|
|
24
|
+
for f in os.listdir(self.directory)
|
|
25
|
+
if os.path.isfile(os.path.join(self.directory, f))
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
return [
|
|
29
|
+
{
|
|
30
|
+
"path": file, # Ruta completa al archivo local
|
|
31
|
+
"name": os.path.basename(file), # Nombre del archivo
|
|
32
|
+
"metadata": {"size": os.path.getsize(file), "last_modified": os.path.getmtime(file)}
|
|
33
|
+
}
|
|
34
|
+
for file in files
|
|
35
|
+
]
|
|
36
|
+
except Exception as e:
|
|
37
|
+
raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
|
|
38
|
+
f"Error procesando el directorio {self.directory}: {e}")
|
|
39
|
+
|
|
40
|
+
def get_file_content(self, file_path: str) -> bytes:
|
|
41
|
+
try:
|
|
42
|
+
with open(file_path, 'rb') as f:
|
|
43
|
+
return f.read()
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
|
|
46
|
+
f"Error leyendo el archivo {file_path}: {e}")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright (c) 2024 Fernando Libedinsky
|
|
2
|
+
# Product: IAToolkit
|
|
3
|
+
#
|
|
4
|
+
# IAToolkit is open source software.
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
from infra.connectors.file_connector import FileConnector
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class S3Connector(FileConnector):
|
|
12
|
+
def __init__(self, bucket: str, prefix: str, auth: dict):
|
|
13
|
+
self.bucket = bucket
|
|
14
|
+
self.prefix = prefix
|
|
15
|
+
self.s3 = boto3.client('s3', **auth)
|
|
16
|
+
|
|
17
|
+
def list_files(self) -> List[dict]:
|
|
18
|
+
# list all the files as dictionaries, with keys: 'path', 'name' y 'metadata'.
|
|
19
|
+
response = self.s3.list_objects_v2(Bucket=self.bucket, Prefix=self.prefix)
|
|
20
|
+
files = response.get('Contents', [])
|
|
21
|
+
|
|
22
|
+
return [
|
|
23
|
+
{
|
|
24
|
+
"path": obj['Key'], # s3 key
|
|
25
|
+
"name": obj['Key'].split('/')[-1], # filename
|
|
26
|
+
"metadata": {"size": obj.get('Size'), "last_modified": obj.get('LastModified')}
|
|
27
|
+
}
|
|
28
|
+
for obj in files
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
def get_file_content(self, file_path: str) -> bytes:
|
|
32
|
+
response = self.s3.get_object(Bucket=self.bucket, Key=file_path)
|
|
33
|
+
return response['Body'].read()
|