sibi-dst 0.3.52__tar.gz → 0.3.53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/PKG-INFO +6 -7
  2. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/pyproject.toml +2 -3
  3. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/_df_helper.py +2 -1
  4. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/_parquet_artifact.py +13 -1
  5. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/data_wrapper.py +6 -3
  6. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/parquet_saver.py +2 -7
  7. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/README.md +0 -0
  8. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/__init__.py +0 -0
  9. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/__init__.py +0 -0
  10. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -0
  11. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/_parquet_reader.py +0 -0
  12. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/__init__.py +0 -0
  13. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/django/__init__.py +0 -0
  14. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/django/_db_connection.py +0 -0
  15. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/django/_io_dask.py +0 -0
  16. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/django/_load_from_db.py +0 -0
  17. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -0
  18. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  19. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  20. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  21. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
  22. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
  23. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  24. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  25. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -0
  26. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  27. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  28. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  29. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/core/__init__.py +0 -0
  30. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/core/_defaults.py +0 -0
  31. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  32. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/core/_params_config.py +0 -0
  33. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/core/_query_config.py +0 -0
  34. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/df_helper/data_cleaner.py +0 -0
  35. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/geopy_helper/__init__.py +0 -0
  36. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  37. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/geopy_helper/utils.py +0 -0
  38. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/osmnx_helper/__init__.py +0 -0
  39. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  40. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  41. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  42. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  43. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/osmnx_helper/utils.py +0 -0
  44. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/tests/__init__.py +0 -0
  45. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  46. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/__init__.py +0 -0
  47. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/airflow_manager.py +0 -0
  48. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/clickhouse_writer.py +0 -0
  49. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/credentials.py +0 -0
  50. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/data_from_http_source.py +0 -0
  51. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/data_utils.py +0 -0
  52. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/date_utils.py +0 -0
  53. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/df_utils.py +0 -0
  54. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/file_utils.py +0 -0
  55. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/filepath_generator.py +0 -0
  56. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/log_utils.py +0 -0
  57. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/phone_formatter.py +0 -0
  58. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/storage_config.py +0 -0
  59. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/storage_manager.py +0 -0
  60. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/utils/webdav_client.py +0 -0
  61. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/__init__.py +0 -0
  62. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/__init__.py +0 -0
  63. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  64. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  65. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  66. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  67. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  68. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  69. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  70. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  71. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  72. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  73. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  74. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  75. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  76. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  77. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  78. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  79. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/utils/__init__.py +0 -0
  80. {sibi_dst-0.3.52 → sibi_dst-0.3.53}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.52
3
+ Version: 0.3.53
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -11,8 +11,7 @@ Classifier: Programming Language :: Python :: 3.12
11
11
  Classifier: Programming Language :: Python :: 3.13
12
12
  Provides-Extra: complete
13
13
  Provides-Extra: df-helper
14
- Provides-Extra: geopy-helper
15
- Provides-Extra: osmnx-helper
14
+ Provides-Extra: geospatial
16
15
  Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
17
16
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
18
17
  Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
@@ -21,9 +20,9 @@ Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
21
20
  Requires-Dist: dask[complete] (>=2025.3.0,<2026.0.0)
22
21
  Requires-Dist: django (>=5.1.4,<6.0.0) ; extra == "df-helper" or extra == "complete"
23
22
  Requires-Dist: djangorestframework (>=3.15.2,<4.0.0) ; extra == "df-helper" or extra == "complete"
24
- Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "osmnx-helper" or extra == "complete"
25
- Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "osmnx-helper" or extra == "complete"
26
- Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geopy-helper" or extra == "complete"
23
+ Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "geospatial" or extra == "complete"
24
+ Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "geospatial" or extra == "complete"
25
+ Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
27
26
  Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
28
27
  Requires-Dist: httpx (>=0.27.2,<0.28.0)
29
28
  Requires-Dist: ipython (>=8.29.0,<9.0.0)
@@ -31,7 +30,7 @@ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
31
30
  Requires-Dist: mysqlclient (>=2.2.6,<3.0.0) ; extra == "df-helper" or extra == "complete"
32
31
  Requires-Dist: nltk (>=3.9.1,<4.0.0)
33
32
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
34
- Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "osmnx-helper" or extra == "complete"
33
+ Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
35
34
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
36
35
  Requires-Dist: paramiko (>=3.5.0,<4.0.0)
37
36
  Requires-Dist: psutil (>=6.1.0,<7.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "0.3.52"
3
+ version = "0.3.53"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -48,8 +48,7 @@ webdav4 = { extras = ["fsspec"], version = "^0.10.0" }
48
48
 
49
49
  [tool.poetry.extras]
50
50
  df_helper = ["django", "sqlalchemy", "sqlmodel", "djangorestframework", "pymysql", "psycopg2", "mysqlclient"]
51
- geopy_helper = ["geopy"]
52
- osmnx_helper = ["folium", "geopandas", "osmnx"]
51
+ geospatial = ["geopy","folium", "geopandas", "osmnx"]
53
52
  complete = ["django", "sqlalchemy", "sqlmodel", "djangorestframework", "pymysql", "psycopg2", "mysqlclient", "geopy", "folium", "geopandas", "osmnx"]
54
53
  [build-system]
55
54
  requires = ["poetry-core"]
@@ -74,7 +74,7 @@ class DfHelper:
74
74
  logger: Logger
75
75
  default_config: Dict = None
76
76
 
77
- def __init__(self, backend='django_db', **kwargs):
77
+ def __init__(self, backend='sqlalchemy', **kwargs):
78
78
  # Ensure default_config is not shared across instances
79
79
  self.default_config = self.default_config or {}
80
80
  kwargs = {**self.default_config.copy(), **kwargs}
@@ -209,6 +209,7 @@ class DfHelper:
209
209
  else:
210
210
  self.logger.debug("Regular asyncio run...")
211
211
  return asyncio.run(self.__load_from_http(**options))
212
+ return dd.from_pandas(pd.DataFrame(), npartitions=1)
212
213
 
213
214
  def __load_from_sqlalchemy(self, **options):
214
215
  """
@@ -151,9 +151,21 @@ class ParquetArtifact(DfHelper):
151
151
  'parquet_end_date': datetime.date.today().strftime('%Y-%m-%d')
152
152
  }
153
153
 
154
+ def custom_config():
155
+ try:
156
+ start_date = kwargs.pop('start_on')
157
+ end_date = kwargs.pop('end_on')
158
+ except KeyError:
159
+ raise ValueError("For period 'custom', you must provide 'start_on' in kwargs.")
160
+ return {
161
+ 'parquet_start_date': start_date,
162
+ 'parquet_end_date': end_date
163
+ }
164
+
154
165
  config_map = {
155
166
  'itd': itd_config,
156
- 'ytd': ytd_config
167
+ 'ytd': ytd_config,
168
+ 'custom': custom_config,
157
169
  }
158
170
 
159
171
  if period in config_map:
@@ -47,7 +47,8 @@ class DataWrapper:
47
47
  show_progress: bool = False,
48
48
  timeout: float = 60,
49
49
  reference_date: datetime.date = None,
50
- custom_priority_map: Dict[str, int] = None):
50
+ custom_priority_map: Dict[str, int] = None,
51
+ max_threads: int = 3):
51
52
  self.dataclass = dataclass
52
53
  self.date_field = date_field
53
54
  self.data_path = self._ensure_forward_slash(data_path)
@@ -70,6 +71,7 @@ class DataWrapper:
70
71
  self.timeout = timeout
71
72
  self.reference_date = reference_date or datetime.date.today()
72
73
  self.priority_map = custom_priority_map or self.DEFAULT_PRIORITY_MAP
74
+ self.max_threads = max_threads
73
75
 
74
76
  self.start_date = self._convert_to_date(start_date)
75
77
  self.end_date = self._convert_to_date(end_date)
@@ -138,8 +140,9 @@ class DataWrapper:
138
140
 
139
141
  desc = f"Processing {self.dataclass.__name__}, task: {self._priority_label(priority)}"
140
142
  self.logger.info(f"Starting {desc.lower()}")
141
-
142
- with ThreadPoolExecutor() as executor:
143
+ max_threads = min(len(dates), self.max_threads)
144
+ self.logger.info(f"DataWrapper Max threads set at: {max_threads}")
145
+ with ThreadPoolExecutor(max_workers=max_threads) as executor:
143
146
  futures = {
144
147
  executor.submit(self._process_date_with_retry, date, max_retries): date
145
148
  for date in dates
@@ -1,15 +1,10 @@
1
1
  import base64
2
+ import hashlib
2
3
  import logging
3
- from pathlib import Path
4
+ import warnings
4
5
  from typing import Optional
5
6
 
6
7
  import pyarrow as pa
7
- import fsspec
8
- import warnings
9
- import hashlib
10
- from s3fs import S3FileSystem
11
-
12
- from fsspec import filesystem
13
8
 
14
9
  # Suppress the specific UserWarning message
15
10
  warnings.filterwarnings("ignore")
File without changes