sibi-dst 0.3.64__py3-none-any.whl → 2025.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,7 @@ class BaseBackend:
26
26
  def __init__(self, helper: DfHelper):
27
27
  self.helper = helper
28
28
  self.logger = helper.logger
29
+ self.debug = helper.debug
29
30
 
30
31
  def load(self, **options) -> dd.DataFrame | pd.DataFrame:
31
32
  """Synchronous data loading method. Must be implemented by sync backends."""
@@ -47,7 +48,8 @@ class SqlAlchemyBackend(BaseBackend):
47
48
  plugin_sqlalchemy=self.helper.backend_db_connection,
48
49
  plugin_query=self.helper._backend_query,
49
50
  plugin_params=self.helper._backend_params,
50
- logger=self.logger
51
+ logger=self.logger,
52
+ debug= self.debug
51
53
  )
52
54
  return db_loader.build_and_load()
53
55
  except Exception as e:
@@ -62,10 +64,10 @@ class ParquetBackend(BaseBackend):
62
64
  try:
63
65
  df = self.helper.backend_parquet.load_files()
64
66
  if options and df is not None:
65
- df = FilterHandler('dask', self.logger).apply_filters(df, filters=options)
67
+ df = FilterHandler('dask', logger=self.logger, debug=False).apply_filters(df, filters=options)
66
68
  return df
67
69
  except Exception as e:
68
- self.logger.error(f"Failed to load data from parquet: {e}", exc_info=self.debug)
70
+ self.logger.error(f"Failed to load data from parquet: {e}", exc_info=True)
69
71
  return dd.from_pandas(pd.DataFrame(), npartitions=1)
70
72
 
71
73
 
@@ -1,3 +1,4 @@
1
+
1
2
  from typing import Type
2
3
 
3
4
  import dask
@@ -5,13 +6,12 @@ import dask.dataframe as dd
5
6
  import pandas as pd
6
7
  from sqlalchemy import (
7
8
  inspect,
8
- select,
9
- func,
9
+ select
10
10
  )
11
11
  from sqlalchemy.engine import Engine
12
12
  from sqlalchemy.orm import declarative_base
13
13
  import time
14
- from sqlalchemy.exc import TimeoutError
14
+ from sqlalchemy.exc import TimeoutError as SASQLTimeoutError, OperationalError
15
15
  import sqlalchemy as sa
16
16
  from sibi_dst.df_helper.core import FilterHandler
17
17
  from sibi_dst.utils import Logger
@@ -103,7 +103,8 @@ class SQLAlchemyDask:
103
103
  query = self.filter_handler_cls(
104
104
  backend="sqlalchemy", logger=self.logger, debug=self.debug
105
105
  ).apply_filters(query, model=self.model, filters=self.filters)
106
-
106
+ else:
107
+ query = query.limit(self.chunk_size)
107
108
  self.logger.debug(f"Base query for pagination: {query}")
108
109
 
109
110
  # 2. Get metadata for the Dask DataFrame structure
@@ -112,13 +113,7 @@ class SQLAlchemyDask:
112
113
  meta_df = pd.DataFrame(columns=ordered_columns).astype(meta_dtypes)
113
114
 
114
115
  # 3. Get the total record count to calculate the number of chunks
115
- # try:
116
- # with self.engine.connect() as connection:
117
- # count_query = select(func.count()).select_from(query.alias())
118
- # total_records = connection.execute(count_query).scalar_one()
119
- # except Exception as e:
120
- # self.logger.error(f"Failed to count records for pagination: {e}", exc_info=True)
121
- # return dd.from_pandas(meta_df, npartitions=1)
116
+
122
117
  retry_attempts = 3
123
118
  backoff_factor = 0.5 # start with a 0.5-second delay
124
119
 
@@ -131,7 +126,7 @@ class SQLAlchemyDask:
131
126
  # If successful, break the loop
132
127
  break
133
128
 
134
- except TimeoutError:
129
+ except SASQLTimeoutError:
135
130
  if attempt < retry_attempts - 1:
136
131
  self.logger.warning(
137
132
  f"Connection pool limit reached. Retrying in {backoff_factor} seconds..."
@@ -144,7 +139,16 @@ class SQLAlchemyDask:
144
139
  exc_info=True
145
140
  )
146
141
  return dd.from_pandas(meta_df, npartitions=1)
147
-
142
+ except OperationalError as oe:
143
+ # sometimes the DB driver wraps timeouts in OperationalError
144
+ if "timeout" in str(oe).lower():
145
+ self.logger.warning("OperationalTimeout, retrying…", exc_info=True)
146
+ time.sleep(backoff_factor)
147
+ backoff_factor *= 2
148
+ continue
149
+ else:
150
+ self.logger.error("OperationalError", exc_info=True)
151
+ return dd.from_pandas(meta_df, npartitions=1)
148
152
  except Exception as e:
149
153
  self.logger.error(f"An unexpected error occurred: {e}", exc_info=True)
150
154
  return dd.from_pandas(meta_df, npartitions=1)
@@ -177,3 +181,149 @@ class SQLAlchemyDask:
177
181
  self.logger.debug(f"Successfully created a lazy Dask DataFrame with {ddf.npartitions} partitions.")
178
182
 
179
183
  return ddf
184
+
185
+ ## Dask-Only Solution to test in better hardware
186
+
187
+ # from typing import Type, Dict, Any
188
+ # import math
189
+ # import time
190
+ # import pandas as pd
191
+ # import dask
192
+ # import dask.dataframe as dd
193
+ #
194
+ # import sqlalchemy as sa
195
+ # from sqlalchemy import select, func
196
+ # from sqlalchemy.engine import Engine
197
+ # from sqlalchemy.exc import TimeoutError as SASQLTimeoutError, OperationalError
198
+ # from sqlalchemy.orm import declarative_base
199
+ #
200
+ # from sibi_dst.df_helper.core import FilterHandler
201
+ # from sibi_dst.utils import Logger
202
+ #
203
+ #
204
+ # class SQLAlchemyDask:
205
+ # """
206
+ # Loads data into a Dask DataFrame. If there’s exactly one integer PK,
207
+ # use dask.dataframe.read_sql_table; otherwise fall back to offset‐based
208
+ # pagination pushed into dask.delayed to keep memory use minimal.
209
+ # """
210
+ #
211
+ # def __init__(
212
+ # self,
213
+ # model: Type[declarative_base()],
214
+ # filters: Dict[str, Any],
215
+ # engine: Engine,
216
+ # chunk_size: int = 1_000,
217
+ # logger=None,
218
+ # debug: bool = False,
219
+ # ):
220
+ # self.model = model
221
+ # self.filters = filters or {}
222
+ # self.engine = engine
223
+ # self.chunk_size = chunk_size
224
+ # self.logger = logger or Logger.default_logger(self.__class__.__name__)
225
+ # self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
226
+ # self.filter_handler_cls = FilterHandler
227
+ # self.debug = debug
228
+ #
229
+ # def read_frame(self, fillna_value=None) -> dd.DataFrame:
230
+ # # 1) Build base query + filters
231
+ # base_q = select(self.model)
232
+ # if self.filters:
233
+ # base_q = self.filter_handler_cls(
234
+ # backend="sqlalchemy",
235
+ # logger=self.logger,
236
+ # debug=self.debug,
237
+ # ).apply_filters(base_q, model=self.model, filters=self.filters)
238
+ #
239
+ # # 2) Zero-row meta for dtype inference
240
+ # meta = pd.read_sql_query(base_q.limit(0), self.engine).iloc[:0]
241
+ # if meta.shape[1] == 0:
242
+ # self.logger.warning("No columns detected; returning empty DataFrame.")
243
+ # return dd.from_pandas(meta, npartitions=1)
244
+ #
245
+ # # 3) Single‐PK parallel path?
246
+ # pk_cols = list(self.model.__table__.primary_key.columns)
247
+ # if (
248
+ # len(pk_cols) == 1
249
+ # and pd.api.types.is_integer_dtype(meta[pk_cols[0].name])
250
+ # ):
251
+ # try:
252
+ # return self._ddf_via_read_sql_table(pk_cols[0], meta, fillna_value)
253
+ # except Exception:
254
+ # self.logger.warning(
255
+ # "read_sql_table path failed, falling back to offset pagination",
256
+ # exc_info=True,
257
+ # )
258
+ #
259
+ # # 4) Composite PK or fallback → offset pagination in delayed tasks
260
+ # return self._offset_paginated_ddf(base_q, meta, fillna_value)
261
+ #
262
+ # def _offset_paginated_ddf(self, base_q, meta, fillna):
263
+ # # 1) count total rows
264
+ # try:
265
+ # with self.engine.connect() as conn:
266
+ # total = conn.execute(
267
+ # select(func.count()).select_from(base_q.alias())
268
+ # ).scalar_one()
269
+ # except Exception:
270
+ # self.logger.error("Failed to count records; returning empty DataFrame", exc_info=True)
271
+ # return dd.from_pandas(meta, npartitions=1)
272
+ #
273
+ # if total == 0:
274
+ # self.logger.warning("Query returned 0 records.")
275
+ # return dd.from_pandas(meta, npartitions=1)
276
+ # self.logger.debug(f"Total records to fetch: {total}. Chunk size: {self.chunk_size}.")
277
+ # # 2) create delayed tasks per offset
278
+ # @dask.delayed
279
+ # def _fetch_chunk(offset: int) -> pd.DataFrame:
280
+ # q = base_q.limit(self.chunk_size).offset(offset)
281
+ # df = pd.read_sql_query(q, self.engine)
282
+ # if fillna is not None:
283
+ # df = df.fillna(fillna)
284
+ # return df[meta.columns].astype(meta.dtypes.to_dict())
285
+ #
286
+ # offsets = range(0, total, self.chunk_size)
287
+ # parts = [_fetch_chunk(off) for off in offsets]
288
+ #
289
+ # ddf = dd.from_delayed(parts, meta=meta)
290
+ # self.logger.debug(f"Offset‐paginated read → {len(parts)} partitions")
291
+ # return ddf
292
+ #
293
+ # def _ddf_via_read_sql_table(self, pk_col, meta, fillna) -> dd.DataFrame:
294
+ # # same as before: min/max + dd.read_sql_table
295
+ # backoff = 0.5
296
+ # for attempt in range(3):
297
+ # try:
298
+ # with self.engine.connect() as conn:
299
+ # min_id, max_id = conn.execute(
300
+ # select(func.min(pk_col), func.max(pk_col))
301
+ # .select_from(self.model.__table__)
302
+ # ).one()
303
+ # break
304
+ # except (SASQLTimeoutError, OperationalError) as e:
305
+ # if "timeout" in str(e).lower() and attempt < 2:
306
+ # self.logger.warning(f"Timeout fetching PK bounds; retrying in {backoff}s")
307
+ # time.sleep(backoff)
308
+ # backoff *= 2
309
+ # else:
310
+ # raise
311
+ #
312
+ # if min_id is None or max_id is None:
313
+ # self.logger.warning("Table empty—no PK bounds.")
314
+ # return dd.from_pandas(meta, npartitions=1)
315
+ #
316
+ # total = max_id - min_id + 1
317
+ # nparts = max(1, math.ceil(total / self.chunk_size))
318
+ # ddf = dd.read_sql_table(
319
+ # table=self.model.__table__.name,
320
+ # uri=str(self.engine.url),
321
+ # index_col=pk_col.name,
322
+ # limits=(min_id, max_id),
323
+ # npartitions=nparts,
324
+ # columns=list(meta.columns),
325
+ # )
326
+ # if fillna is not None:
327
+ # ddf = ddf.fillna(fillna)
328
+ # self.logger.debug(f"Parallel read via dask.read_sql_table → {nparts} partitions")
329
+ # return ddf
@@ -1,8 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from ._defaults import (
4
- django_field_conversion_map_pandas,
5
- django_field_conversion_map_dask,
6
4
  sqlalchemy_field_conversion_map_dask,
7
5
  normalize_sqlalchemy_type)
8
6
  from ._filter_handler import FilterHandler
@@ -12,8 +10,6 @@ from ._query_config import QueryConfig
12
10
  __all__ = [
13
11
  "ParamsConfig",
14
12
  "QueryConfig",
15
- "django_field_conversion_map_pandas",
16
- "django_field_conversion_map_dask",
17
13
  "sqlalchemy_field_conversion_map_dask",
18
14
  "normalize_sqlalchemy_type",
19
15
  "FilterHandler",
@@ -13,56 +13,7 @@ from sqlalchemy.dialects.mysql import TINYINT, MEDIUMTEXT
13
13
  # conversion_map is a dictionary that maps the field types to their corresponding data type conversion functions.
14
14
  # Each entry in the dictionary is a pair of a field type (as a string) and a callable function that performs the
15
15
  # conversion. This mapping is used to convert the values in a pandas DataFrame to the appropriate data types based on
16
- # the Django field type.
17
-
18
- django_field_conversion_map_pandas: Dict[str, callable] = {
19
- "CharField": lambda x: x.astype(str),
20
- "TextField": lambda x: x.astype(str),
21
- "IntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
22
- "AutoField": lambda x: pd.to_numeric(x, errors="coerce"),
23
- "BigAutoField": lambda x: pd.to_numeric(x, errors="coerce"),
24
- "BigIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
25
- "SmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
26
- "PositiveIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
27
- "PositiveSmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
28
- "FloatField": lambda x: pd.to_numeric(x, errors="coerce"),
29
- "DecimalField": lambda x: pd.to_numeric(x, errors="coerce"),
30
- "BooleanField": lambda x: x.astype(bool),
31
- "NullBooleanField": lambda x: x.astype(bool),
32
- "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
33
- "DateField": lambda x: pd.to_datetime(x, errors="coerce").dt.date,
34
- "TimeField": lambda x: pd.to_datetime(x, errors="coerce").dt.time,
35
- "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
36
- # for JSONField, assuming JSON objects are represented as string in df
37
- "JSONField": lambda x: x.apply(json.loads),
38
- "ArrayField": lambda x: x.apply(eval),
39
- "UUIDField": lambda x: x.astype(str),
40
- }
41
-
42
- django_field_conversion_map_dask: Dict[str, callable] = {
43
- "CharField": lambda x: x.astype(str),
44
- "TextField": lambda x: x.astype(str),
45
- "IntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
46
- "AutoField": lambda x: pd.to_numeric(x, errors="coerce"),
47
- "BigAutoField": lambda x: pd.to_numeric(x, errors="coerce"),
48
- "BigIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
49
- "SmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
50
- "PositiveIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
51
- "PositiveSmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
52
- "FloatField": lambda x: pd.to_numeric(x, errors="coerce"),
53
- "DecimalField": lambda x: pd.to_numeric(x, errors="coerce"),
54
- "BooleanField": lambda x: x.astype(bool),
55
- "NullBooleanField": lambda x: x.astype(bool),
56
- "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
57
- "DateField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date,
58
- meta=("date", "object")),
59
- "TimeField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time,
60
- meta=("time", "object")),
61
- "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
62
- "JSONField": lambda x: x.map_partitions(lambda s: s.apply(json.loads), meta=("json", "object")),
63
- "ArrayField": lambda x: x.map_partitions(lambda s: s.apply(eval), meta=("array", "object")),
64
- "UUIDField": lambda x: x.astype(str),
65
- }
16
+ # the db field type.
66
17
 
67
18
  sqlalchemy_field_conversion_map_dask: Dict[str, callable] = {
68
19
  String.__name__: lambda x: x.astype(str).fillna(""),
@@ -10,7 +10,6 @@ from .df_utils import DfUtils
10
10
  from .storage_manager import StorageManager
11
11
  from .parquet_saver import ParquetSaver
12
12
  from .clickhouse_writer import ClickHouseWriter
13
- from .airflow_manager import AirflowDAGManager
14
13
  from .credentials import *
15
14
  from .update_planner import UpdatePlanner
16
15
  from .data_wrapper import DataWrapper
@@ -35,7 +34,6 @@ __all__ = [
35
34
  "StorageManager",
36
35
  "DfUtils",
37
36
  "ClickHouseWriter",
38
- "AirflowDAGManager",
39
37
  "StorageConfig",
40
38
  "FsRegistry",
41
39
  "DataFromHttpSource",
@@ -38,7 +38,7 @@ class DataWrapper:
38
38
  logger: Logger = None,
39
39
  show_progress: bool = False,
40
40
  timeout: float = 30,
41
- max_threads: int = 1,
41
+ max_threads: int = 3,
42
42
  **kwargs: Any,
43
43
  ):
44
44
  self.dataclass = dataclass
@@ -66,6 +66,7 @@ class DataWrapper:
66
66
  self.benchmarks: Dict[datetime.date, Dict[str, float]] = {}
67
67
  self.mmanifest = kwargs.get("mmanifest", None)
68
68
  self.update_planner=kwargs.get("update_planner", None)
69
+ self.datacls = self.dataclass(**self.class_params)
69
70
 
70
71
  def __enter__(self):
71
72
  """Context manager entry"""
@@ -164,28 +165,24 @@ class DataWrapper:
164
165
  def _process_single_date(self, date: datetime.date):
165
166
  """Core date processing logic with load/save timing and thread reporting"""
166
167
  path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
167
- self.logger.info(f"Processing date {date.isoformat()} for {path}")
168
- # self.logger.info(f"Path {path} in {self.skipped}: {path in self.skipped}")
168
+ self.logger.debug(f"Processing date {date.isoformat()} for {path}")
169
169
  if path in self.update_planner.skipped and self.update_planner.ignore_missing:
170
170
  self.logger.info(f"Skipping {date} as it exists in the skipped list")
171
171
  return
172
172
  full_path = f"{path}{self.parquet_filename}"
173
173
 
174
174
  thread_name = threading.current_thread().name
175
- self.logger.info(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
175
+ self.logger.debug(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
176
176
 
177
177
  overall_start = time.perf_counter()
178
178
  try:
179
179
  load_start = time.perf_counter()
180
- with self.dataclass(**self.class_params) as data:
181
- df = data.load_period(
182
- dt_field=self.date_field,
183
- start=date,
184
- end=date,
185
- **self.load_params
186
- )
180
+ date_filter = {f"{self.date_field}__date": {date.isoformat()}}
181
+ self.logger.debug(f"Loading data for {date} with filter: {date_filter}")
182
+ # Load data using the dataclass with the provided date filter
183
+ self.load_params.update(date_filter)
184
+ df = self.datacls.load(**self.load_params)
187
185
  load_time = time.perf_counter() - load_start
188
-
189
186
  if df.head(1, compute=True).empty:
190
187
  if self.mmanifest:
191
188
  schema = df._meta.dtypes.astype(str).to_dict()
@@ -73,6 +73,8 @@ class UpdatePlanner:
73
73
  self.show_progress = show_progress
74
74
  self.logger = logger or Logger.default_logger(logger_name="update_planner")
75
75
  self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
76
+ self.debug = debug
77
+ self.verbose = verbose
76
78
 
77
79
  # Filesystem and age helper
78
80
  self.fs = fs or fsspec.filesystem(filesystem_type, **(filesystem_options or {}))
@@ -0,0 +1,55 @@
1
+ Metadata-Version: 2.1
2
+ Name: sibi-dst
3
+ Version: 2025.1.2
4
+ Summary: Data Science Toolkit
5
+ Author: Luis Valverde
6
+ Author-email: lvalverdeb@gmail.com
7
+ Requires-Python: >=3.12,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Requires-Dist: clickhouse-connect (>=0.8.18,<0.9.0)
12
+ Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
13
+ Requires-Dist: dask[complete] (>=2025.5.1,<2026.0.0)
14
+ Requires-Dist: mysqlclient (>=2.2.7,<3.0.0)
15
+ Requires-Dist: pandas (>=2.3.1,<3.0.0)
16
+ Requires-Dist: psycopg2 (>=2.9.10,<3.0.0)
17
+ Requires-Dist: pydantic (>=2.11.7,<3.0.0)
18
+ Requires-Dist: pymysql (>=1.1.1,<2.0.0)
19
+ Requires-Dist: s3fs (>=2025.5.1,<2026.0.0)
20
+ Requires-Dist: sqlalchemy (>=2.0.41,<3.0.0)
21
+ Requires-Dist: tqdm (>=4.67.1,<5.0.0)
22
+ Requires-Dist: webdav4 (>=0.10.0,<0.11.0)
23
+ Description-Content-Type: text/markdown
24
+
25
+ ### SIBI-DST
26
+
27
+ Data Science Toolkit built with Python, Pandas, Dask, OpenStreetMaps, NetworkX, SQLAlchemy, GeoPandas, and Folium.
28
+
29
+ ## Example Use Cases
30
+
31
+ 1. **Build DataCubes, DataSets, and DataObjects** from diverse data sources, including **relational databases, Parquet files, Excel (`.xlsx`), delimited tables (`.csv`, `.tsv`), JSON, and RESTful APIs**.
32
+ 2. **Comprehensive DataFrame Management** utilities for efficient data handling, transformation, and optimization using **Pandas** and **Dask**.
33
+ 3. **Flexible Data Sharing** with client applications by writing to **Data Warehouses in Clickhouse, local filesystems, and cloud storage platforms** such as **S3**.
34
+ 4. **Microservices for Data Access** – Build scalable **API-driven services** using **RESTful APIs (`Django REST Framework`, `FastAPI`)** for high-performance data exchange.
35
+ 5. **Geospatial Analysis** – Utilize **OpenStreetMaps** and **GeoPandas** for advanced geospatial data processing and visualization.
36
+
37
+ ## Supported Technologies
38
+
39
+ - **Data Processing**: Pandas, Dask
40
+ - **Databases & Storage**: SQLAlchemy, Parquet, S3, Clickhouse
41
+ - **Mapping & Geospatial Analysis**: OpenStreetMaps, OSMnx, Geopy
42
+ - **API Development**: Django REST Framework, FastAPI
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ # with pip
48
+
49
+ pip install sibi-dst # Install only the main package
50
+ pip install sibi-dst[geospatial] # Install with geospatial dependencies
51
+ pip install sibi-dst[dev,test,geospatial] # Install all optional dependencies
52
+
53
+
54
+ ```
55
+
@@ -1,15 +1,10 @@
1
1
  sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
2
2
  sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
3
3
  sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=-Y4i5KAxKY2BNkmoVeMEZxjTFD7zaM9oQ0aRsvUbQrs,9340
4
- sibi_dst/df_helper/_df_helper.py,sha256=tpSX5o7caTq5TnbIZ78OLJFBbT1J4Ukeld_ZTULg4yE,10856
4
+ sibi_dst/df_helper/_df_helper.py,sha256=DJRQWTihnEtgBm3X0ar9nH-xcE1PCkWmh1JgID3WDsY,10939
5
5
  sibi_dst/df_helper/_parquet_artifact.py,sha256=Nio5GSD6rTYl52nf_TSpQhYIF0hKqRrB3H3A4zYnaG8,14987
6
6
  sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
7
7
  sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
9
- sibi_dst/df_helper/backends/django/_db_connection.py,sha256=AGbqCnmiX4toMaFPE5ne5h7QCkImjnBKvzGtUD6Ge8Q,3698
10
- sibi_dst/df_helper/backends/django/_io_dask.py,sha256=NjvJg6y9qKKCRiNrJL4f_A03iKDKEcjCi7LGbr9DgtM,19555
11
- sibi_dst/df_helper/backends/django/_load_from_db.py,sha256=htG9ec4ix371ClEHQVpx4r3mhBdQaSykeHUCCRhN7L4,10637
12
- sibi_dst/df_helper/backends/django/_sql_model_builder.py,sha256=at9J7ecGkZbOOYba85uofe9C-ic4wwOqVgJcHpQNiYQ,21449
13
8
  sibi_dst/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
14
9
  sibi_dst/df_helper/backends/http/_http_config.py,sha256=eGPFdqZ5M3Tscqx2P93B6XoBEEzlmdt7yNg7PXUQnNQ,4726
15
10
  sibi_dst/df_helper/backends/parquet/__init__.py,sha256=esWJ9aSuYC26d-T01z9dPrJ1uqJzvdaPNTYRb5qXTlQ,182
@@ -17,11 +12,11 @@ sibi_dst/df_helper/backends/parquet/_filter_handler.py,sha256=TvDf0RXta7mwJv11GN
17
12
  sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=TaU5_wG1Y3lQC8DVCItVvMnc6ZJmECLu3avssVEMbaM,10591
18
13
  sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=LjWm9B7CweTvlvFOgB90XjSe0lVLILAIYMWKPkFXFm8,265
19
14
  sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=gppZrXLGK8U8xfkzRQPZCIFoWY-miP04nDNHpV8lXtU,10600
20
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=UKLWY3s3l8kMmieg3XZtEol-pj2WMb3ZB3496KbmmiA,6805
15
+ sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=ph4w8Sd9eVr_jUIZuDhGyEwtDn0KQkb0lUkERrIXKGM,12852
21
16
  sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=NXVhtYF2mYsrW2fXBkL29VQ5gxAlOYPJkYa8HZKYUyM,2846
22
17
  sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Q93O_xqK0SdrS3IrijVcqky_Zf6xKjtPHdI3qnf1g8E,7457
23
- sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
24
- sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
18
+ sibi_dst/df_helper/core/__init__.py,sha256=LfmTqFh6GUZup-g95bcXgAxX7J5Hkve7ftLE_CJg_AE,409
19
+ sibi_dst/df_helper/core/_defaults.py,sha256=9UMEMu2wXznO5UzEhnQ82f_ZazZ20JRyRXIi3HP3gDw,4043
25
20
  sibi_dst/df_helper/core/_filter_handler.py,sha256=Pmbzygry2mpkNPVS7DBMulHpAb1yYZNFqUU0bJTWJF0,11214
26
21
  sibi_dst/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxRrQKE5FQRxcEWsac,6736
27
22
  sibi_dst/df_helper/core/_query_config.py,sha256=1ApqmuSGXTC3CdF-xMsSbCa3V2Z5hOP3Wq5huhzZwqY,439
@@ -37,13 +32,12 @@ sibi_dst/osmnx_helper/basemaps/router_plotter.py,sha256=UAiijn-J-jjX4YnL0_P9SFqT
37
32
  sibi_dst/osmnx_helper/utils.py,sha256=BzuY8CtYnBAAO8UAr_M7EOk6CP1zcifNLs8pkdFZEFg,20577
38
33
  sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
34
  sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUHMLFB_PqqUq7k,3336
40
- sibi_dst/utils/__init__.py,sha256=w0_q4rl3yD7x1Q5yWxH-GN_3Ju1XlebIzm3nJdrUeGE,1234
41
- sibi_dst/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
35
+ sibi_dst/utils/__init__.py,sha256=H0Yr_Xo4dBTf03_Si_cggmPNSv6cf8_BBetoHJ86Tiw,1162
42
36
  sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcWxIQJ60,9877
43
37
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
44
38
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
45
39
  sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
46
- sibi_dst/utils/data_wrapper.py,sha256=Tb9bHIHI6qVsdH791BOFN1VrPb-7GS4fHhhHV8hktec,9641
40
+ sibi_dst/utils/data_wrapper.py,sha256=69aPQFP178-QTJ_joJYqymP--wNxa1qzri_KkvvUTIw,9688
47
41
  sibi_dst/utils/date_utils.py,sha256=T3ij-WOQu3cIfmNAweSVMWWr-hVtuBcTGjEY-cMJIvU,18627
48
42
  sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
49
43
  sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
@@ -54,7 +48,7 @@ sibi_dst/utils/parquet_saver.py,sha256=O62xwPfphOpKgEiHqnts20CPSU96pxs49Cg7PVetL
54
48
  sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
55
49
  sibi_dst/utils/storage_config.py,sha256=TE15H-7d0mqwYPSUgrdidK9U7N7p87Z8JfUQH4-jdPs,4123
56
50
  sibi_dst/utils/storage_manager.py,sha256=btecX7ggNb7rfu5EK9Xuu2q_FZA7r_rB_tfhQ8V96qc,6567
57
- sibi_dst/utils/update_planner.py,sha256=dJXLC-KdbWrCs-MFe7Xa8F-ZhlNJq8P1szjLAzMJZk0,9684
51
+ sibi_dst/utils/update_planner.py,sha256=t9A5DLE9cDiYNO8ctQIWVyVWnkMSV0PfbBJ43A0bQv4,9742
58
52
  sibi_dst/utils/webdav_client.py,sha256=pYF1UsGOuxYeGLq7aBfwZFvkvD4meOcbbaiZ4d6GW9I,7107
59
53
  sibi_dst/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
54
  sibi_dst/v2/df_helper/__init__.py,sha256=XuH6jKYAPg2DdRbsxxBSxp9X3x-ARyaT0xe27uILrVo,99
@@ -80,6 +74,6 @@ sibi_dst/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
74
  sibi_dst/v3/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
75
  sibi_dst/v3/df_helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
76
  sibi_dst/v3/df_helper/_df_helper.py,sha256=NKIQ4Y-Tn-e841sbZxzLh3Q071_Zo9Vu4y3OAXcsO98,3900
83
- sibi_dst-0.3.64.dist-info/METADATA,sha256=S-GXRa0njyB4k2RB51TboYGXeumRmGunMY9km0UJNUE,4292
84
- sibi_dst-0.3.64.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
85
- sibi_dst-0.3.64.dist-info/RECORD,,
77
+ sibi_dst-2025.1.2.dist-info/METADATA,sha256=H5j6tyATGtD5fANqo3OBNa5Mwy_qd3hgG0_Qfotu1RM,2366
78
+ sibi_dst-2025.1.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
79
+ sibi_dst-2025.1.2.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from ._io_dask import ReadFrameDask
4
- from ._db_connection import DjangoConnectionConfig
5
- from ._load_from_db import DjangoLoadFromDb
6
-
7
- __all__ = [
8
- "DjangoConnectionConfig",
9
- "ReadFrameDask",
10
- "DjangoLoadFromDb"
11
- ]
@@ -1,88 +0,0 @@
1
- from typing import Any
2
-
3
- from pydantic import BaseModel, model_validator
4
-
5
- from ._sql_model_builder import DjangoSqlModelBuilder
6
-
7
-
8
- class DjangoConnectionConfig(BaseModel):
9
- """
10
- Represents a configuration for establishing a Django database connection.
11
-
12
- This class is used for defining the configurations necessary to establish a Django
13
- database connection. It supports dynamic model generation if the model is not
14
- provided explicitly. It also validates the connection configuration to ensure it
15
- is properly set up before being used.
16
-
17
- :ivar live: Indicates whether the connection is live. Automatically set to False if
18
- a table is provided without a pre-built model.
19
- :type live: bool
20
- :ivar connection_name: The name of the database connection to use. This is a mandatory
21
- parameter and must be provided.
22
- :type connection_name: str
23
- :ivar table: The name of the database table to use. Required for dynamic model
24
- generation when no model is provided.
25
- :type table: str
26
- :ivar model: The Django model that represents the database table. If not provided,
27
- this can be generated dynamically by using the table name.
28
- :type model: Any
29
- """
30
- live: bool = False
31
- connection_name: str = None
32
- table: str = None
33
- model: Any = None
34
-
35
- @model_validator(mode="after")
36
- def check_model(self):
37
- """
38
- Validates and modifies the instance based on the provided attributes and conditions.
39
- This method ensures that all required parameters are populated and consistent, and it
40
- dynamically builds a model if necessary. The method also ensures the connection is
41
- validated after the model preparation process.
42
-
43
- :raises ValueError: If `connection_name` is not provided.
44
- :raises ValueError: If `table` name is not specified when building the model dynamically.
45
- :raises ValueError: If there are errors during the dynamic model-building process.
46
- :raises ValueError: If `validate_connection` fails due to invalid configuration.
47
- :return: The validated and potentially mutated instance.
48
- """
49
- # connection_name is mandatory
50
- if self.connection_name is None:
51
- raise ValueError("Connection name must be specified")
52
-
53
- # If table is provided, enforce live=False
54
- if self.table:
55
- self.live = False
56
-
57
- # If model is not provided, build it dynamically
58
- if not self.model:
59
- if not self.table:
60
- raise ValueError("Table name must be specified to build the model")
61
- try:
62
- self.model = DjangoSqlModelBuilder(
63
- connection_name=self.connection_name, table=self.table
64
- ).build_model()
65
- except Exception as e:
66
- raise ValueError(f"Failed to build model: {e}")
67
- else:
68
- self.live = True
69
- # Validate the connection after building the model
70
- self.validate_connection()
71
- return self
72
-
73
- def validate_connection(self):
74
- """
75
- Ensures the database connection is valid by performing a simple
76
- query. Raises a ValueError if the connection is broken or if any
77
- other exception occurs during the query.
78
-
79
- :raises ValueError: If the connection to the database cannot be
80
- established or if the query fails.
81
- """
82
- try:
83
- # Perform a simple query to test the connection
84
- self.model.objects.using(self.connection_name).exists()
85
- except Exception as e:
86
- raise ValueError(
87
- f"Failed to connect to the database '{self.connection_name}': {e}"
88
- )