sibi-dst 0.3.52__py3-none-any.whl → 0.3.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,7 +74,7 @@ class DfHelper:
74
74
  logger: Logger
75
75
  default_config: Dict = None
76
76
 
77
- def __init__(self, backend='django_db', **kwargs):
77
+ def __init__(self, backend='sqlalchemy', **kwargs):
78
78
  # Ensure default_config is not shared across instances
79
79
  self.default_config = self.default_config or {}
80
80
  kwargs = {**self.default_config.copy(), **kwargs}
@@ -209,6 +209,7 @@ class DfHelper:
209
209
  else:
210
210
  self.logger.debug("Regular asyncio run...")
211
211
  return asyncio.run(self.__load_from_http(**options))
212
+ return dd.from_pandas(pd.DataFrame(), npartitions=1)
212
213
 
213
214
  def __load_from_sqlalchemy(self, **options):
214
215
  """
@@ -151,9 +151,21 @@ class ParquetArtifact(DfHelper):
151
151
  'parquet_end_date': datetime.date.today().strftime('%Y-%m-%d')
152
152
  }
153
153
 
154
+ def custom_config():
155
+ try:
156
+ start_date = kwargs.pop('start_on')
157
+ end_date = kwargs.pop('end_on')
158
+ except KeyError:
159
+ raise ValueError("For period 'custom', you must provide 'start_on' in kwargs.")
160
+ return {
161
+ 'parquet_start_date': start_date,
162
+ 'parquet_end_date': end_date
163
+ }
164
+
154
165
  config_map = {
155
166
  'itd': itd_config,
156
- 'ytd': ytd_config
167
+ 'ytd': ytd_config,
168
+ 'custom': custom_config,
157
169
  }
158
170
 
159
171
  if period in config_map:
@@ -47,7 +47,8 @@ class DataWrapper:
47
47
  show_progress: bool = False,
48
48
  timeout: float = 60,
49
49
  reference_date: datetime.date = None,
50
- custom_priority_map: Dict[str, int] = None):
50
+ custom_priority_map: Dict[str, int] = None,
51
+ max_threads: int = 3):
51
52
  self.dataclass = dataclass
52
53
  self.date_field = date_field
53
54
  self.data_path = self._ensure_forward_slash(data_path)
@@ -70,6 +71,7 @@ class DataWrapper:
70
71
  self.timeout = timeout
71
72
  self.reference_date = reference_date or datetime.date.today()
72
73
  self.priority_map = custom_priority_map or self.DEFAULT_PRIORITY_MAP
74
+ self.max_threads = max_threads
73
75
 
74
76
  self.start_date = self._convert_to_date(start_date)
75
77
  self.end_date = self._convert_to_date(end_date)
@@ -138,8 +140,9 @@ class DataWrapper:
138
140
 
139
141
  desc = f"Processing {self.dataclass.__name__}, task: {self._priority_label(priority)}"
140
142
  self.logger.info(f"Starting {desc.lower()}")
141
-
142
- with ThreadPoolExecutor() as executor:
143
+ max_threads = min(len(dates), self.max_threads)
144
+ self.logger.info(f"DataWrapper Max threads set at: {max_threads}")
145
+ with ThreadPoolExecutor(max_workers=max_threads) as executor:
143
146
  futures = {
144
147
  executor.submit(self._process_date_with_retry, date, max_retries): date
145
148
  for date in dates
@@ -1,15 +1,10 @@
1
1
  import base64
2
+ import hashlib
2
3
  import logging
3
- from pathlib import Path
4
+ import warnings
4
5
  from typing import Optional
5
6
 
6
7
  import pyarrow as pa
7
- import fsspec
8
- import warnings
9
- import hashlib
10
- from s3fs import S3FileSystem
11
-
12
- from fsspec import filesystem
13
8
 
14
9
  # Suppress the specific UserWarning message
15
10
  warnings.filterwarnings("ignore")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.52
3
+ Version: 0.3.53
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -11,8 +11,7 @@ Classifier: Programming Language :: Python :: 3.12
11
11
  Classifier: Programming Language :: Python :: 3.13
12
12
  Provides-Extra: complete
13
13
  Provides-Extra: df-helper
14
- Provides-Extra: geopy-helper
15
- Provides-Extra: osmnx-helper
14
+ Provides-Extra: geospatial
16
15
  Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
17
16
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
18
17
  Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
@@ -21,9 +20,9 @@ Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
21
20
  Requires-Dist: dask[complete] (>=2025.3.0,<2026.0.0)
22
21
  Requires-Dist: django (>=5.1.4,<6.0.0) ; extra == "df-helper" or extra == "complete"
23
22
  Requires-Dist: djangorestframework (>=3.15.2,<4.0.0) ; extra == "df-helper" or extra == "complete"
24
- Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "osmnx-helper" or extra == "complete"
25
- Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "osmnx-helper" or extra == "complete"
26
- Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geopy-helper" or extra == "complete"
23
+ Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "geospatial" or extra == "complete"
24
+ Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "geospatial" or extra == "complete"
25
+ Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
27
26
  Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
28
27
  Requires-Dist: httpx (>=0.27.2,<0.28.0)
29
28
  Requires-Dist: ipython (>=8.29.0,<9.0.0)
@@ -31,7 +30,7 @@ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
31
30
  Requires-Dist: mysqlclient (>=2.2.6,<3.0.0) ; extra == "df-helper" or extra == "complete"
32
31
  Requires-Dist: nltk (>=3.9.1,<4.0.0)
33
32
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
34
- Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "osmnx-helper" or extra == "complete"
33
+ Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
35
34
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
36
35
  Requires-Dist: paramiko (>=3.5.0,<4.0.0)
37
36
  Requires-Dist: psutil (>=6.1.0,<7.0.0)
@@ -1,8 +1,8 @@
1
1
  sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
2
2
  sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
3
3
  sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=toH2QvNF-CQNJ4Bc8xreytuWr37G0EWz4ciWVdFMVqU,11646
4
- sibi_dst/df_helper/_df_helper.py,sha256=IS1m9r9U-eJ7EVMBqmITmre6S0JfIl6nJtPIwNI3xKY,29771
5
- sibi_dst/df_helper/_parquet_artifact.py,sha256=qt3WZXrE2EZs3KI0biGzm3znIZazzTh8fgiipCVr_Ic,10196
4
+ sibi_dst/df_helper/_df_helper.py,sha256=D85n4oUdu92IN2QaPc6k9uJJ_Vm197me1aoHojuWEYs,29833
5
+ sibi_dst/df_helper/_parquet_artifact.py,sha256=dDoR5Tq0EJViW52tD9XW_vno7hkOfA8WeAilR1mAb_g,10636
6
6
  sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
7
7
  sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
@@ -44,13 +44,13 @@ sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcW
44
44
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
45
45
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
46
46
  sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
47
- sibi_dst/utils/data_wrapper.py,sha256=pIIQxeHknUeQd0YbISkAhL-xYBK4OdijoATBY-oBznw,12114
47
+ sibi_dst/utils/data_wrapper.py,sha256=OMmdjnUCjxDsC1T0K1JbzxKXLTiDOtxPp1ujV66bMAY,12345
48
48
  sibi_dst/utils/date_utils.py,sha256=OCJqkWl5e8fE7z11Ufz4206DUeuLMd_Gf_JGZu914Pg,18539
49
49
  sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
50
50
  sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
51
51
  sibi_dst/utils/filepath_generator.py,sha256=-HHO0U-PR8fysDDFwnWdHRlgqksh_RkmgBZLWv9hM7s,6669
52
52
  sibi_dst/utils/log_utils.py,sha256=eSAbi_jmMpJ8RpycakzT4S4zNkqVZDj3FY8WwnxpdXc,4623
53
- sibi_dst/utils/parquet_saver.py,sha256=Tucxv9jRX66VuLQZn0dPQBN7JOttBou6SF8FxqufeGE,8169
53
+ sibi_dst/utils/parquet_saver.py,sha256=EBtd9blzk7Wb65aDBVVU0ZMHFtqjfWi_fCUt1LvyAC4,8069
54
54
  sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
55
55
  sibi_dst/utils/storage_config.py,sha256=Cg8EOGLZ_5v9sunaQHZLYHdp5FDkgPrCVVNHF-ys5sQ,2181
56
56
  sibi_dst/utils/storage_manager.py,sha256=btecX7ggNb7rfu5EK9Xuu2q_FZA7r_rB_tfhQ8V96qc,6567
@@ -75,6 +75,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
75
75
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
76
76
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
77
77
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
78
- sibi_dst-0.3.52.dist-info/METADATA,sha256=VIiTzkdO-FsYBkie1cfuwFKEwFFSE5TL5Ory-VzPa0g,7221
79
- sibi_dst-0.3.52.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
80
- sibi_dst-0.3.52.dist-info/RECORD,,
78
+ sibi_dst-0.3.53.dist-info/METADATA,sha256=rNy87i3acBUu27BdqZ-J73e9oy6pd8YEKCX-HrE48ig,7182
79
+ sibi_dst-0.3.53.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
80
+ sibi_dst-0.3.53.dist-info/RECORD,,