sibi-dst 0.3.52__py3-none-any.whl → 0.3.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/_df_helper.py +2 -1
- sibi_dst/df_helper/_parquet_artifact.py +13 -1
- sibi_dst/utils/data_wrapper.py +6 -3
- sibi_dst/utils/parquet_saver.py +2 -7
- {sibi_dst-0.3.52.dist-info → sibi_dst-0.3.53.dist-info}/METADATA +6 -7
- {sibi_dst-0.3.52.dist-info → sibi_dst-0.3.53.dist-info}/RECORD +7 -7
- {sibi_dst-0.3.52.dist-info → sibi_dst-0.3.53.dist-info}/WHEEL +0 -0
sibi_dst/df_helper/_df_helper.py
CHANGED
@@ -74,7 +74,7 @@ class DfHelper:
|
|
74
74
|
logger: Logger
|
75
75
|
default_config: Dict = None
|
76
76
|
|
77
|
-
def __init__(self, backend='
|
77
|
+
def __init__(self, backend='sqlalchemy', **kwargs):
|
78
78
|
# Ensure default_config is not shared across instances
|
79
79
|
self.default_config = self.default_config or {}
|
80
80
|
kwargs = {**self.default_config.copy(), **kwargs}
|
@@ -209,6 +209,7 @@ class DfHelper:
|
|
209
209
|
else:
|
210
210
|
self.logger.debug("Regular asyncio run...")
|
211
211
|
return asyncio.run(self.__load_from_http(**options))
|
212
|
+
return dd.from_pandas(pd.DataFrame(), npartitions=1)
|
212
213
|
|
213
214
|
def __load_from_sqlalchemy(self, **options):
|
214
215
|
"""
|
@@ -151,9 +151,21 @@ class ParquetArtifact(DfHelper):
|
|
151
151
|
'parquet_end_date': datetime.date.today().strftime('%Y-%m-%d')
|
152
152
|
}
|
153
153
|
|
154
|
+
def custom_config():
|
155
|
+
try:
|
156
|
+
start_date = kwargs.pop('start_on')
|
157
|
+
end_date = kwargs.pop('end_on')
|
158
|
+
except KeyError:
|
159
|
+
raise ValueError("For period 'custom', you must provide 'start_on' in kwargs.")
|
160
|
+
return {
|
161
|
+
'parquet_start_date': start_date,
|
162
|
+
'parquet_end_date': end_date
|
163
|
+
}
|
164
|
+
|
154
165
|
config_map = {
|
155
166
|
'itd': itd_config,
|
156
|
-
'ytd': ytd_config
|
167
|
+
'ytd': ytd_config,
|
168
|
+
'custom': custom_config,
|
157
169
|
}
|
158
170
|
|
159
171
|
if period in config_map:
|
sibi_dst/utils/data_wrapper.py
CHANGED
@@ -47,7 +47,8 @@ class DataWrapper:
|
|
47
47
|
show_progress: bool = False,
|
48
48
|
timeout: float = 60,
|
49
49
|
reference_date: datetime.date = None,
|
50
|
-
custom_priority_map: Dict[str, int] = None
|
50
|
+
custom_priority_map: Dict[str, int] = None,
|
51
|
+
max_threads: int = 3):
|
51
52
|
self.dataclass = dataclass
|
52
53
|
self.date_field = date_field
|
53
54
|
self.data_path = self._ensure_forward_slash(data_path)
|
@@ -70,6 +71,7 @@ class DataWrapper:
|
|
70
71
|
self.timeout = timeout
|
71
72
|
self.reference_date = reference_date or datetime.date.today()
|
72
73
|
self.priority_map = custom_priority_map or self.DEFAULT_PRIORITY_MAP
|
74
|
+
self.max_threads = max_threads
|
73
75
|
|
74
76
|
self.start_date = self._convert_to_date(start_date)
|
75
77
|
self.end_date = self._convert_to_date(end_date)
|
@@ -138,8 +140,9 @@ class DataWrapper:
|
|
138
140
|
|
139
141
|
desc = f"Processing {self.dataclass.__name__}, task: {self._priority_label(priority)}"
|
140
142
|
self.logger.info(f"Starting {desc.lower()}")
|
141
|
-
|
142
|
-
|
143
|
+
max_threads = min(len(dates), self.max_threads)
|
144
|
+
self.logger.info(f"DataWrapper Max threads set at: {max_threads}")
|
145
|
+
with ThreadPoolExecutor(max_workers=max_threads) as executor:
|
143
146
|
futures = {
|
144
147
|
executor.submit(self._process_date_with_retry, date, max_retries): date
|
145
148
|
for date in dates
|
sibi_dst/utils/parquet_saver.py
CHANGED
@@ -1,15 +1,10 @@
|
|
1
1
|
import base64
|
2
|
+
import hashlib
|
2
3
|
import logging
|
3
|
-
|
4
|
+
import warnings
|
4
5
|
from typing import Optional
|
5
6
|
|
6
7
|
import pyarrow as pa
|
7
|
-
import fsspec
|
8
|
-
import warnings
|
9
|
-
import hashlib
|
10
|
-
from s3fs import S3FileSystem
|
11
|
-
|
12
|
-
from fsspec import filesystem
|
13
8
|
|
14
9
|
# Suppress the specific UserWarning message
|
15
10
|
warnings.filterwarnings("ignore")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sibi-dst
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.53
|
4
4
|
Summary: Data Science Toolkit
|
5
5
|
Author: Luis Valverde
|
6
6
|
Author-email: lvalverdeb@gmail.com
|
@@ -11,8 +11,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.13
|
12
12
|
Provides-Extra: complete
|
13
13
|
Provides-Extra: df-helper
|
14
|
-
Provides-Extra:
|
15
|
-
Provides-Extra: osmnx-helper
|
14
|
+
Provides-Extra: geospatial
|
16
15
|
Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
|
17
16
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
18
17
|
Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
|
@@ -21,9 +20,9 @@ Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
|
|
21
20
|
Requires-Dist: dask[complete] (>=2025.3.0,<2026.0.0)
|
22
21
|
Requires-Dist: django (>=5.1.4,<6.0.0) ; extra == "df-helper" or extra == "complete"
|
23
22
|
Requires-Dist: djangorestframework (>=3.15.2,<4.0.0) ; extra == "df-helper" or extra == "complete"
|
24
|
-
Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "
|
25
|
-
Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "
|
26
|
-
Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "
|
23
|
+
Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "geospatial" or extra == "complete"
|
24
|
+
Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "geospatial" or extra == "complete"
|
25
|
+
Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
|
27
26
|
Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
|
28
27
|
Requires-Dist: httpx (>=0.27.2,<0.28.0)
|
29
28
|
Requires-Dist: ipython (>=8.29.0,<9.0.0)
|
@@ -31,7 +30,7 @@ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
31
30
|
Requires-Dist: mysqlclient (>=2.2.6,<3.0.0) ; extra == "df-helper" or extra == "complete"
|
32
31
|
Requires-Dist: nltk (>=3.9.1,<4.0.0)
|
33
32
|
Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
|
34
|
-
Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "
|
33
|
+
Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
|
35
34
|
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
36
35
|
Requires-Dist: paramiko (>=3.5.0,<4.0.0)
|
37
36
|
Requires-Dist: psutil (>=6.1.0,<7.0.0)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
|
2
2
|
sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
|
3
3
|
sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=toH2QvNF-CQNJ4Bc8xreytuWr37G0EWz4ciWVdFMVqU,11646
|
4
|
-
sibi_dst/df_helper/_df_helper.py,sha256=
|
5
|
-
sibi_dst/df_helper/_parquet_artifact.py,sha256=
|
4
|
+
sibi_dst/df_helper/_df_helper.py,sha256=D85n4oUdu92IN2QaPc6k9uJJ_Vm197me1aoHojuWEYs,29833
|
5
|
+
sibi_dst/df_helper/_parquet_artifact.py,sha256=dDoR5Tq0EJViW52tD9XW_vno7hkOfA8WeAilR1mAb_g,10636
|
6
6
|
sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
|
7
7
|
sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
|
@@ -44,13 +44,13 @@ sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcW
|
|
44
44
|
sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
|
45
45
|
sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
|
46
46
|
sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
|
47
|
-
sibi_dst/utils/data_wrapper.py,sha256=
|
47
|
+
sibi_dst/utils/data_wrapper.py,sha256=OMmdjnUCjxDsC1T0K1JbzxKXLTiDOtxPp1ujV66bMAY,12345
|
48
48
|
sibi_dst/utils/date_utils.py,sha256=OCJqkWl5e8fE7z11Ufz4206DUeuLMd_Gf_JGZu914Pg,18539
|
49
49
|
sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
|
50
50
|
sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
|
51
51
|
sibi_dst/utils/filepath_generator.py,sha256=-HHO0U-PR8fysDDFwnWdHRlgqksh_RkmgBZLWv9hM7s,6669
|
52
52
|
sibi_dst/utils/log_utils.py,sha256=eSAbi_jmMpJ8RpycakzT4S4zNkqVZDj3FY8WwnxpdXc,4623
|
53
|
-
sibi_dst/utils/parquet_saver.py,sha256=
|
53
|
+
sibi_dst/utils/parquet_saver.py,sha256=EBtd9blzk7Wb65aDBVVU0ZMHFtqjfWi_fCUt1LvyAC4,8069
|
54
54
|
sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
|
55
55
|
sibi_dst/utils/storage_config.py,sha256=Cg8EOGLZ_5v9sunaQHZLYHdp5FDkgPrCVVNHF-ys5sQ,2181
|
56
56
|
sibi_dst/utils/storage_manager.py,sha256=btecX7ggNb7rfu5EK9Xuu2q_FZA7r_rB_tfhQ8V96qc,6567
|
@@ -75,6 +75,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
|
|
75
75
|
sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
|
76
76
|
sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
|
77
77
|
sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
|
78
|
-
sibi_dst-0.3.
|
79
|
-
sibi_dst-0.3.
|
80
|
-
sibi_dst-0.3.
|
78
|
+
sibi_dst-0.3.53.dist-info/METADATA,sha256=rNy87i3acBUu27BdqZ-J73e9oy6pd8YEKCX-HrE48ig,7182
|
79
|
+
sibi_dst-0.3.53.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
80
|
+
sibi_dst-0.3.53.dist-info/RECORD,,
|
File without changes
|