PyPI - sibi-dst - Versions diffs - 0.3.64__py3-none-any.whl → 2025.1.1__py3-none-any.whl - Mend

sibi-dst 0.3.64py3-none-any.whl → 2025.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

sibi_dst/df_helper/_df_helper.py +5 -3
sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +163 -13
sibi_dst/df_helper/core/__init__.py +0 -4
sibi_dst/df_helper/core/_defaults.py +1 -50
sibi_dst/utils/__init__.py +0 -2
sibi_dst/utils/data_wrapper.py +9 -12
sibi_dst/utils/update_planner.py +2 -0
sibi_dst-2025.1.1.dist-info/METADATA +55 -0
{sibi_dst-0.3.64.dist-info → sibi_dst-2025.1.1.dist-info}/RECORD +10 -16
sibi_dst/df_helper/backends/django/__init__.py +0 -11
sibi_dst/df_helper/backends/django/_db_connection.py +0 -88
sibi_dst/df_helper/backends/django/_io_dask.py +0 -450
sibi_dst/df_helper/backends/django/_load_from_db.py +0 -227
sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -493
sibi_dst/utils/airflow_manager.py +0 -212
sibi_dst-0.3.64.dist-info/METADATA +0 -90
{sibi_dst-0.3.64.dist-info → sibi_dst-2025.1.1.dist-info}/WHEEL +0 -0

sibi_dst/df_helper/_df_helper.py CHANGED Viewed

@@ -26,6 +26,7 @@ class BaseBackend:
     def __init__(self, helper: DfHelper):
         self.helper = helper
         self.logger = helper.logger
+        self.debug = helper.debug
     def load(self, **options) -> dd.DataFrame | pd.DataFrame:
         """Synchronous data loading method. Must be implemented by sync backends."""
@@ -47,7 +48,8 @@ class SqlAlchemyBackend(BaseBackend):
                 plugin_sqlalchemy=self.helper.backend_db_connection,
                 plugin_query=self.helper._backend_query,
                 plugin_params=self.helper._backend_params,
-                logger=self.logger
+                logger=self.logger,
+                debug= self.debug
             )
             return db_loader.build_and_load()
         except Exception as e:
@@ -62,10 +64,10 @@ class ParquetBackend(BaseBackend):
         try:
             df = self.helper.backend_parquet.load_files()
             if options and df is not None:
-                df = FilterHandler('dask', self.logger).apply_filters(df, filters=options)
+                df = FilterHandler('dask', logger=self.logger, debug=False).apply_filters(df, filters=options)
             return df
         except Exception as e:
-            self.logger.error(f"Failed to load data from parquet: {e}", exc_info=self.debug)
+            self.logger.error(f"Failed to load data from parquet: {e}", exc_info=True)
             return dd.from_pandas(pd.DataFrame(), npartitions=1)

sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import Type
 import dask
@@ -5,13 +6,12 @@ import dask.dataframe as dd
 import pandas as pd
 from sqlalchemy import (
     inspect,
-    select,
-    func,
+    select
 )
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import declarative_base
 import time
-from sqlalchemy.exc import TimeoutError
+from sqlalchemy.exc import TimeoutError as SASQLTimeoutError, OperationalError
 import sqlalchemy as sa
 from sibi_dst.df_helper.core import FilterHandler
 from sibi_dst.utils import Logger
@@ -103,7 +103,8 @@ class SQLAlchemyDask:
             query = self.filter_handler_cls(
                 backend="sqlalchemy", logger=self.logger, debug=self.debug
             ).apply_filters(query, model=self.model, filters=self.filters)
+        else:
+            query = query.limit(self.chunk_size)
         self.logger.debug(f"Base query for pagination: {query}")
         # 2. Get metadata for the Dask DataFrame structure
@@ -112,13 +113,7 @@ class SQLAlchemyDask:
         meta_df = pd.DataFrame(columns=ordered_columns).astype(meta_dtypes)
         # 3. Get the total record count to calculate the number of chunks
-        # try:
-        #     with self.engine.connect() as connection:
-        #         count_query = select(func.count()).select_from(query.alias())
-        #         total_records = connection.execute(count_query).scalar_one()
-        # except Exception as e:
-        #     self.logger.error(f"Failed to count records for pagination: {e}", exc_info=True)
-        #     return dd.from_pandas(meta_df, npartitions=1)
         retry_attempts = 3
         backoff_factor = 0.5  # start with a 0.5-second delay
@@ -131,7 +126,7 @@ class SQLAlchemyDask:
                 # If successful, break the loop
                 break
-            except TimeoutError:
+            except SASQLTimeoutError:
                 if attempt < retry_attempts - 1:
                     self.logger.warning(
                         f"Connection pool limit reached. Retrying in {backoff_factor} seconds..."
@@ -144,7 +139,16 @@ class SQLAlchemyDask:
                         exc_info=True
                     )
                     return dd.from_pandas(meta_df, npartitions=1)
+            except OperationalError as oe:
+                # sometimes the DB driver wraps timeouts in OperationalError
+                if "timeout" in str(oe).lower():
+                    self.logger.warning("OperationalTimeout, retrying…", exc_info=True)
+                    time.sleep(backoff_factor)
+                    backoff_factor *= 2
+                    continue
+                else:
+                    self.logger.error("OperationalError", exc_info=True)
+                    return dd.from_pandas(meta_df, npartitions=1)
             except Exception as e:
                 self.logger.error(f"An unexpected error occurred: {e}", exc_info=True)
                 return dd.from_pandas(meta_df, npartitions=1)
@@ -177,3 +181,149 @@ class SQLAlchemyDask:
         self.logger.debug(f"Successfully created a lazy Dask DataFrame with {ddf.npartitions} partitions.")
         return ddf
+## Dask-Only Solution to test in better hardware
+# from typing import Type, Dict, Any
+# import math
+# import time
+# import pandas as pd
+# import dask
+# import dask.dataframe as dd
+#
+# import sqlalchemy as sa
+# from sqlalchemy import select, func
+# from sqlalchemy.engine import Engine
+# from sqlalchemy.exc import TimeoutError as SASQLTimeoutError, OperationalError
+# from sqlalchemy.orm import declarative_base
+#
+# from sibi_dst.df_helper.core import FilterHandler
+# from sibi_dst.utils import Logger
+#
+#
+# class SQLAlchemyDask:
+#     """
+#     Loads data into a Dask DataFrame.  If there’s exactly one integer PK,
+#     use dask.dataframe.read_sql_table; otherwise fall back to offset‐based
+#     pagination pushed into dask.delayed to keep memory use minimal.
+#     """
+#
+#     def __init__(
+#         self,
+#         model: Type[declarative_base()],
+#         filters: Dict[str, Any],
+#         engine: Engine,
+#         chunk_size: int = 1_000,
+#         logger=None,
+#         debug: bool = False,
+#     ):
+#         self.model      = model
+#         self.filters    = filters or {}
+#         self.engine     = engine
+#         self.chunk_size = chunk_size
+#         self.logger     = logger or Logger.default_logger(self.__class__.__name__)
+#         self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
+#         self.filter_handler_cls = FilterHandler
+#         self.debug = debug
+#
+#     def read_frame(self, fillna_value=None) -> dd.DataFrame:
+#         # 1) Build base query + filters
+#         base_q = select(self.model)
+#         if self.filters:
+#             base_q = self.filter_handler_cls(
+#                 backend="sqlalchemy",
+#                 logger=self.logger,
+#                 debug=self.debug,
+#             ).apply_filters(base_q, model=self.model, filters=self.filters)
+#
+#         # 2) Zero-row meta for dtype inference
+#         meta = pd.read_sql_query(base_q.limit(0), self.engine).iloc[:0]
+#         if meta.shape[1] == 0:
+#             self.logger.warning("No columns detected; returning empty DataFrame.")
+#             return dd.from_pandas(meta, npartitions=1)
+#
+#         # 3) Single‐PK parallel path?
+#         pk_cols = list(self.model.__table__.primary_key.columns)
+#         if (
+#             len(pk_cols) == 1
+#             and pd.api.types.is_integer_dtype(meta[pk_cols[0].name])
+#         ):
+#             try:
+#                 return self._ddf_via_read_sql_table(pk_cols[0], meta, fillna_value)
+#             except Exception:
+#                 self.logger.warning(
+#                     "read_sql_table path failed, falling back to offset pagination",
+#                     exc_info=True,
+#                 )
+#
+#         # 4) Composite PK or fallback → offset pagination in delayed tasks
+#         return self._offset_paginated_ddf(base_q, meta, fillna_value)
+#
+#     def _offset_paginated_ddf(self, base_q, meta, fillna):
+#         # 1) count total rows
+#         try:
+#             with self.engine.connect() as conn:
+#                 total = conn.execute(
+#                     select(func.count()).select_from(base_q.alias())
+#                 ).scalar_one()
+#         except Exception:
+#             self.logger.error("Failed to count records; returning empty DataFrame", exc_info=True)
+#             return dd.from_pandas(meta, npartitions=1)
+#
+#         if total == 0:
+#             self.logger.warning("Query returned 0 records.")
+#             return dd.from_pandas(meta, npartitions=1)
+#         self.logger.debug(f"Total records to fetch: {total}. Chunk size: {self.chunk_size}.")
+#         # 2) create delayed tasks per offset
+#         @dask.delayed
+#         def _fetch_chunk(offset: int) -> pd.DataFrame:
+#             q = base_q.limit(self.chunk_size).offset(offset)
+#             df = pd.read_sql_query(q, self.engine)
+#             if fillna is not None:
+#                 df = df.fillna(fillna)
+#             return df[meta.columns].astype(meta.dtypes.to_dict())
+#
+#         offsets = range(0, total, self.chunk_size)
+#         parts = [_fetch_chunk(off) for off in offsets]
+#
+#         ddf = dd.from_delayed(parts, meta=meta)
+#         self.logger.debug(f"Offset‐paginated read → {len(parts)} partitions")
+#         return ddf
+#
+#     def _ddf_via_read_sql_table(self, pk_col, meta, fillna) -> dd.DataFrame:
+#         # same as before: min/max + dd.read_sql_table
+#         backoff = 0.5
+#         for attempt in range(3):
+#             try:
+#                 with self.engine.connect() as conn:
+#                     min_id, max_id = conn.execute(
+#                         select(func.min(pk_col), func.max(pk_col))
+#                         .select_from(self.model.__table__)
+#                     ).one()
+#                 break
+#             except (SASQLTimeoutError, OperationalError) as e:
+#                 if "timeout" in str(e).lower() and attempt < 2:
+#                     self.logger.warning(f"Timeout fetching PK bounds; retrying in {backoff}s")
+#                     time.sleep(backoff)
+#                     backoff *= 2
+#                 else:
+#                     raise
+#
+#         if min_id is None or max_id is None:
+#             self.logger.warning("Table empty—no PK bounds.")
+#             return dd.from_pandas(meta, npartitions=1)
+#
+#         total = max_id - min_id + 1
+#         nparts = max(1, math.ceil(total / self.chunk_size))
+#         ddf = dd.read_sql_table(
+#             table=self.model.__table__.name,
+#             uri=str(self.engine.url),
+#             index_col=pk_col.name,
+#             limits=(min_id, max_id),
+#             npartitions=nparts,
+#             columns=list(meta.columns),
+#         )
+#         if fillna is not None:
+#             ddf = ddf.fillna(fillna)
+#         self.logger.debug(f"Parallel read via dask.read_sql_table → {nparts} partitions")
+#         return ddf

sibi_dst/df_helper/core/__init__.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from __future__ import annotations
 from ._defaults import (
-    django_field_conversion_map_pandas,
-    django_field_conversion_map_dask,
     sqlalchemy_field_conversion_map_dask,
     normalize_sqlalchemy_type)
 from ._filter_handler import FilterHandler
@@ -12,8 +10,6 @@ from ._query_config import QueryConfig
 __all__ = [
     "ParamsConfig",
     "QueryConfig",
-    "django_field_conversion_map_pandas",
-    "django_field_conversion_map_dask",
     "sqlalchemy_field_conversion_map_dask",
     "normalize_sqlalchemy_type",
     "FilterHandler",

sibi_dst/df_helper/core/_defaults.py CHANGED Viewed

@@ -13,56 +13,7 @@ from sqlalchemy.dialects.mysql import TINYINT, MEDIUMTEXT
 # conversion_map is a dictionary that maps the field types to their corresponding data type conversion functions.
 # Each entry in the dictionary is a pair of a field type (as a string) and a callable function that performs the
 # conversion. This mapping is used to convert the values in a pandas DataFrame to the appropriate data types based on
-# the Django field type.
-django_field_conversion_map_pandas: Dict[str, callable] = {
-    "CharField": lambda x: x.astype(str),
-    "TextField": lambda x: x.astype(str),
-    "IntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "AutoField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "BigAutoField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "BigIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "SmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "PositiveIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "PositiveSmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "FloatField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "DecimalField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "BooleanField": lambda x: x.astype(bool),
-    "NullBooleanField": lambda x: x.astype(bool),
-    "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
-    "DateField": lambda x: pd.to_datetime(x, errors="coerce").dt.date,
-    "TimeField": lambda x: pd.to_datetime(x, errors="coerce").dt.time,
-    "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
-    # for JSONField, assuming JSON objects are represented as string in df
-    "JSONField": lambda x: x.apply(json.loads),
-    "ArrayField": lambda x: x.apply(eval),
-    "UUIDField": lambda x: x.astype(str),
-}
-django_field_conversion_map_dask: Dict[str, callable] = {
-    "CharField": lambda x: x.astype(str),
-    "TextField": lambda x: x.astype(str),
-    "IntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "AutoField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "BigAutoField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "BigIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "SmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "PositiveIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "PositiveSmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "FloatField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "DecimalField": lambda x: pd.to_numeric(x, errors="coerce"),
-    "BooleanField": lambda x: x.astype(bool),
-    "NullBooleanField": lambda x: x.astype(bool),
-    "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
-    "DateField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date,
-                                                                             meta=("date", "object")),
-    "TimeField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time,
-                                                                             meta=("time", "object")),
-    "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
-    "JSONField": lambda x: x.map_partitions(lambda s: s.apply(json.loads), meta=("json", "object")),
-    "ArrayField": lambda x: x.map_partitions(lambda s: s.apply(eval), meta=("array", "object")),
-    "UUIDField": lambda x: x.astype(str),
-}
+# the db field type.
 sqlalchemy_field_conversion_map_dask: Dict[str, callable] = {
     String.__name__: lambda x: x.astype(str).fillna(""),

sibi_dst/utils/__init__.py CHANGED Viewed

@@ -10,7 +10,6 @@ from .df_utils import DfUtils
 from .storage_manager import StorageManager
 from .parquet_saver import ParquetSaver
 from .clickhouse_writer import ClickHouseWriter
-from .airflow_manager import AirflowDAGManager
 from .credentials import *
 from .update_planner import UpdatePlanner
 from .data_wrapper import DataWrapper
@@ -35,7 +34,6 @@ __all__ = [
     "StorageManager",
     "DfUtils",
     "ClickHouseWriter",
-    "AirflowDAGManager",
     "StorageConfig",
     "FsRegistry",
     "DataFromHttpSource",

sibi_dst/utils/data_wrapper.py CHANGED Viewed

@@ -38,7 +38,7 @@ class DataWrapper:
             logger: Logger = None,
             show_progress: bool = False,
             timeout: float = 30,
-            max_threads: int = 1,
+            max_threads: int = 3,
             **kwargs: Any,
     ):
         self.dataclass = dataclass
@@ -66,6 +66,7 @@ class DataWrapper:
         self.benchmarks: Dict[datetime.date, Dict[str, float]] = {}
         self.mmanifest = kwargs.get("mmanifest", None)
         self.update_planner=kwargs.get("update_planner", None)
+        self.datacls = self.dataclass(**self.class_params)
     def __enter__(self):
         """Context manager entry"""
@@ -164,28 +165,24 @@ class DataWrapper:
     def _process_single_date(self, date: datetime.date):
         """Core date processing logic with load/save timing and thread reporting"""
         path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
-        self.logger.info(f"Processing date {date.isoformat()} for {path}")
-        # self.logger.info(f"Path {path} in {self.skipped}: {path in self.skipped}")
+        self.logger.debug(f"Processing date {date.isoformat()} for {path}")
         if path in self.update_planner.skipped and self.update_planner.ignore_missing:
             self.logger.info(f"Skipping {date} as it exists in the skipped list")
             return
         full_path = f"{path}{self.parquet_filename}"
         thread_name = threading.current_thread().name
-        self.logger.info(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
+        self.logger.debug(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
         overall_start = time.perf_counter()
         try:
             load_start = time.perf_counter()
-            with self.dataclass(**self.class_params) as data:
-                df = data.load_period(
-                    dt_field=self.date_field,
-                    start=date,
-                    end=date,
-                    **self.load_params
-                )
+            date_filter = {f"{self.date_field}__date": {date.isoformat()}}
+            self.logger.debug(f"Loading data for {date} with filter: {date_filter}")
+            # Load data using the dataclass with the provided date filter
+            self.load_params.update(date_filter)
+            df = self.datacls.load(**self.load_params)
             load_time = time.perf_counter() - load_start
             if df.head(1, compute=True).empty:
                 if self.mmanifest:
                     schema = df._meta.dtypes.astype(str).to_dict()

sibi_dst/utils/update_planner.py CHANGED Viewed

@@ -73,6 +73,8 @@ class UpdatePlanner:
         self.show_progress = show_progress
         self.logger = logger or Logger.default_logger(logger_name="update_planner")
         self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
+        self.debug = debug
+        self.verbose = verbose
         # Filesystem and age helper
         self.fs = fs or fsspec.filesystem(filesystem_type, **(filesystem_options or {}))

sibi_dst-2025.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,55 @@
+Metadata-Version: 2.1
+Name: sibi-dst
+Version: 2025.1.1
+Summary: Data Science Toolkit
+Author: Luis Valverde
+Author-email: lvalverdeb@gmail.com
+Requires-Python: >=3.12,<4.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Dist: clickhouse-connect (>=0.8.18,<0.9.0)
+Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
+Requires-Dist: dask[complete] (>=2025.5.1,<2026.0.0)
+Requires-Dist: mysqlclient (>=2.2.7,<3.0.0)
+Requires-Dist: pandas (>=2.3.1,<3.0.0)
+Requires-Dist: psycopg2 (>=2.9.10,<3.0.0)
+Requires-Dist: pydantic (>=2.11.7,<3.0.0)
+Requires-Dist: pymysql (>=1.1.1,<2.0.0)
+Requires-Dist: s3fs (>=2025.5.1,<2026.0.0)
+Requires-Dist: sqlalchemy (>=2.0.41,<3.0.0)
+Requires-Dist: tqdm (>=4.67.1,<5.0.0)
+Requires-Dist: webdav4 (>=0.10.0,<0.11.0)
+Description-Content-Type: text/markdown
+### SIBI-DST
+Data Science Toolkit built with Python, Pandas, Dask, OpenStreetMaps, NetworkX, SQLAlchemy, GeoPandas, and Folium.
+## Example Use Cases
+1. **Build DataCubes, DataSets, and DataObjects** from diverse data sources, including **relational databases, Parquet files, Excel (`.xlsx`), delimited tables (`.csv`, `.tsv`), JSON, and RESTful APIs**.
+2. **Comprehensive DataFrame Management** utilities for efficient data handling, transformation, and optimization using **Pandas** and **Dask**.
+3. **Flexible Data Sharing** with client applications by writing to **Data Warehouses in Clickhouse, local filesystems, and cloud storage platforms** such as **S3**.
+4. **Microservices for Data Access** – Build scalable **API-driven services** using **RESTful APIs (`Django REST Framework`, `FastAPI`)** for high-performance data exchange.
+5. **Geospatial Analysis** – Utilize **OpenStreetMaps** and **GeoPandas** for advanced geospatial data processing and visualization.
+## Supported Technologies
+- **Data Processing**: Pandas, Dask
+- **Databases & Storage**: SQLAlchemy, Parquet, S3, Clickhouse
+- **Mapping & Geospatial Analysis**: OpenStreetMaps, OSMnx, Geopy
+- **API Development**: Django REST Framework, FastAPI
+## Installation
+```bash
+# with pip
+pip install sibi-dst  # Install only the main package
+pip install sibi-dst[geospatial] # Install with geospatial dependencies
+pip install sibi-dst[dev,test,geospatial]  # Install all optional dependencies
+```

{sibi_dst-0.3.64.dist-info → sibi_dst-2025.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,10 @@
 sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
 sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
 sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=-Y4i5KAxKY2BNkmoVeMEZxjTFD7zaM9oQ0aRsvUbQrs,9340
-sibi_dst/df_helper/_df_helper.py,sha256=tpSX5o7caTq5TnbIZ78OLJFBbT1J4Ukeld_ZTULg4yE,10856
+sibi_dst/df_helper/_df_helper.py,sha256=DJRQWTihnEtgBm3X0ar9nH-xcE1PCkWmh1JgID3WDsY,10939
 sibi_dst/df_helper/_parquet_artifact.py,sha256=Nio5GSD6rTYl52nf_TSpQhYIF0hKqRrB3H3A4zYnaG8,14987
 sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
 sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
-sibi_dst/df_helper/backends/django/_db_connection.py,sha256=AGbqCnmiX4toMaFPE5ne5h7QCkImjnBKvzGtUD6Ge8Q,3698
-sibi_dst/df_helper/backends/django/_io_dask.py,sha256=NjvJg6y9qKKCRiNrJL4f_A03iKDKEcjCi7LGbr9DgtM,19555
-sibi_dst/df_helper/backends/django/_load_from_db.py,sha256=htG9ec4ix371ClEHQVpx4r3mhBdQaSykeHUCCRhN7L4,10637
-sibi_dst/df_helper/backends/django/_sql_model_builder.py,sha256=at9J7ecGkZbOOYba85uofe9C-ic4wwOqVgJcHpQNiYQ,21449
 sibi_dst/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
 sibi_dst/df_helper/backends/http/_http_config.py,sha256=eGPFdqZ5M3Tscqx2P93B6XoBEEzlmdt7yNg7PXUQnNQ,4726
 sibi_dst/df_helper/backends/parquet/__init__.py,sha256=esWJ9aSuYC26d-T01z9dPrJ1uqJzvdaPNTYRb5qXTlQ,182
@@ -17,11 +12,11 @@ sibi_dst/df_helper/backends/parquet/_filter_handler.py,sha256=TvDf0RXta7mwJv11GN
 sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=TaU5_wG1Y3lQC8DVCItVvMnc6ZJmECLu3avssVEMbaM,10591
 sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=LjWm9B7CweTvlvFOgB90XjSe0lVLILAIYMWKPkFXFm8,265
 sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=gppZrXLGK8U8xfkzRQPZCIFoWY-miP04nDNHpV8lXtU,10600
-sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=UKLWY3s3l8kMmieg3XZtEol-pj2WMb3ZB3496KbmmiA,6805
+sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=ph4w8Sd9eVr_jUIZuDhGyEwtDn0KQkb0lUkERrIXKGM,12852
 sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=NXVhtYF2mYsrW2fXBkL29VQ5gxAlOYPJkYa8HZKYUyM,2846
 sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Q93O_xqK0SdrS3IrijVcqky_Zf6xKjtPHdI3qnf1g8E,7457
-sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
-sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
+sibi_dst/df_helper/core/__init__.py,sha256=LfmTqFh6GUZup-g95bcXgAxX7J5Hkve7ftLE_CJg_AE,409
+sibi_dst/df_helper/core/_defaults.py,sha256=9UMEMu2wXznO5UzEhnQ82f_ZazZ20JRyRXIi3HP3gDw,4043
 sibi_dst/df_helper/core/_filter_handler.py,sha256=Pmbzygry2mpkNPVS7DBMulHpAb1yYZNFqUU0bJTWJF0,11214
 sibi_dst/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxRrQKE5FQRxcEWsac,6736
 sibi_dst/df_helper/core/_query_config.py,sha256=1ApqmuSGXTC3CdF-xMsSbCa3V2Z5hOP3Wq5huhzZwqY,439
@@ -37,13 +32,12 @@ sibi_dst/osmnx_helper/basemaps/router_plotter.py,sha256=UAiijn-J-jjX4YnL0_P9SFqT
 sibi_dst/osmnx_helper/utils.py,sha256=BzuY8CtYnBAAO8UAr_M7EOk6CP1zcifNLs8pkdFZEFg,20577
 sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUHMLFB_PqqUq7k,3336
-sibi_dst/utils/__init__.py,sha256=w0_q4rl3yD7x1Q5yWxH-GN_3Ju1XlebIzm3nJdrUeGE,1234
-sibi_dst/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
+sibi_dst/utils/__init__.py,sha256=H0Yr_Xo4dBTf03_Si_cggmPNSv6cf8_BBetoHJ86Tiw,1162
 sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcWxIQJ60,9877
 sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
 sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
 sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
-sibi_dst/utils/data_wrapper.py,sha256=Tb9bHIHI6qVsdH791BOFN1VrPb-7GS4fHhhHV8hktec,9641
+sibi_dst/utils/data_wrapper.py,sha256=69aPQFP178-QTJ_joJYqymP--wNxa1qzri_KkvvUTIw,9688
 sibi_dst/utils/date_utils.py,sha256=T3ij-WOQu3cIfmNAweSVMWWr-hVtuBcTGjEY-cMJIvU,18627
 sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
 sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
@@ -54,7 +48,7 @@ sibi_dst/utils/parquet_saver.py,sha256=O62xwPfphOpKgEiHqnts20CPSU96pxs49Cg7PVetL
 sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
 sibi_dst/utils/storage_config.py,sha256=TE15H-7d0mqwYPSUgrdidK9U7N7p87Z8JfUQH4-jdPs,4123
 sibi_dst/utils/storage_manager.py,sha256=btecX7ggNb7rfu5EK9Xuu2q_FZA7r_rB_tfhQ8V96qc,6567
-sibi_dst/utils/update_planner.py,sha256=dJXLC-KdbWrCs-MFe7Xa8F-ZhlNJq8P1szjLAzMJZk0,9684
+sibi_dst/utils/update_planner.py,sha256=t9A5DLE9cDiYNO8ctQIWVyVWnkMSV0PfbBJ43A0bQv4,9742
 sibi_dst/utils/webdav_client.py,sha256=pYF1UsGOuxYeGLq7aBfwZFvkvD4meOcbbaiZ4d6GW9I,7107
 sibi_dst/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sibi_dst/v2/df_helper/__init__.py,sha256=XuH6jKYAPg2DdRbsxxBSxp9X3x-ARyaT0xe27uILrVo,99
@@ -80,6 +74,6 @@ sibi_dst/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sibi_dst/v3/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sibi_dst/v3/df_helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sibi_dst/v3/df_helper/_df_helper.py,sha256=NKIQ4Y-Tn-e841sbZxzLh3Q071_Zo9Vu4y3OAXcsO98,3900
-sibi_dst-0.3.64.dist-info/METADATA,sha256=S-GXRa0njyB4k2RB51TboYGXeumRmGunMY9km0UJNUE,4292
-sibi_dst-0.3.64.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-sibi_dst-0.3.64.dist-info/RECORD,,
+sibi_dst-2025.1.1.dist-info/METADATA,sha256=OBt3aCLjPtRPN-YxaKLvNL13_H5sRqjEo-NpDMK-nD0,2366
+sibi_dst-2025.1.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+sibi_dst-2025.1.1.dist-info/RECORD,,

sibi_dst/df_helper/backends/django/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-from __future__ import annotations
-from ._io_dask import ReadFrameDask
-from ._db_connection import DjangoConnectionConfig
-from ._load_from_db import DjangoLoadFromDb
-__all__ = [
-    "DjangoConnectionConfig",
-    "ReadFrameDask",
-    "DjangoLoadFromDb"
-]

sibi_dst/df_helper/backends/django/_db_connection.py DELETED Viewed

@@ -1,88 +0,0 @@
-from typing import Any
-from pydantic import BaseModel, model_validator
-from ._sql_model_builder import DjangoSqlModelBuilder
-class DjangoConnectionConfig(BaseModel):
-    """
-    Represents a configuration for establishing a Django database connection.
-    This class is used for defining the configurations necessary to establish a Django
-    database connection. It supports dynamic model generation if the model is not
-    provided explicitly. It also validates the connection configuration to ensure it
-    is properly set up before being used.
-    :ivar live: Indicates whether the connection is live. Automatically set to False if
-        a table is provided without a pre-built model.
-    :type live: bool
-    :ivar connection_name: The name of the database connection to use. This is a mandatory
-        parameter and must be provided.
-    :type connection_name: str
-    :ivar table: The name of the database table to use. Required for dynamic model
-        generation when no model is provided.
-    :type table: str
-    :ivar model: The Django model that represents the database table. If not provided,
-        this can be generated dynamically by using the table name.
-    :type model: Any
-    """
-    live: bool = False
-    connection_name: str = None
-    table: str = None
-    model: Any = None
-    @model_validator(mode="after")
-    def check_model(self):
-        """
-        Validates and modifies the instance based on the provided attributes and conditions.
-        This method ensures that all required parameters are populated and consistent, and it
-        dynamically builds a model if necessary. The method also ensures the connection is
-        validated after the model preparation process.
-        :raises ValueError: If `connection_name` is not provided.
-        :raises ValueError: If `table` name is not specified when building the model dynamically.
-        :raises ValueError: If there are errors during the dynamic model-building process.
-        :raises ValueError: If `validate_connection` fails due to invalid configuration.
-        :return: The validated and potentially mutated instance.
-        """
-        # connection_name is mandatory
-        if self.connection_name is None:
-            raise ValueError("Connection name must be specified")
-        # If table is provided, enforce live=False
-        if self.table:
-            self.live = False
-        # If model is not provided, build it dynamically
-        if not self.model:
-            if not self.table:
-                raise ValueError("Table name must be specified to build the model")
-            try:
-                self.model = DjangoSqlModelBuilder(
-                    connection_name=self.connection_name, table=self.table
-                ).build_model()
-            except Exception as e:
-                raise ValueError(f"Failed to build model: {e}")
-        else:
-            self.live = True
-        # Validate the connection after building the model
-        self.validate_connection()
-        return self
-    def validate_connection(self):
-        """
-        Ensures the database connection is valid by performing a simple
-        query. Raises a ValueError if the connection is broken or if any
-        other exception occurs during the query.
-        :raises ValueError: If the connection to the database cannot be
-            established or if the query fails.
-        """
-        try:
-            # Perform a simple query to test the connection
-            self.model.objects.using(self.connection_name).exists()
-        except Exception as e:
-            raise ValueError(
-                f"Failed to connect to the database '{self.connection_name}': {e}"
-            )

sibi-dst 0.3.64__py3-none-any.whl → 2025.1.1__py3-none-any.whl

sibi-dst 0.3.64py3-none-any.whl → 2025.1.1py3-none-any.whl