sibi-dst 2025.8.6__tar.gz → 2025.8.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/PKG-INFO +1 -1
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/pyproject.toml +1 -1
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/_df_helper.py +38 -4
- sibi_dst-2025.8.7/sibi_dst/utils/async_utils.py +12 -0
- sibi_dst-2025.8.7/sibi_dst/utils/storage_hive.py +195 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/README.md +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/_artifact_updater_async.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/_parquet_reader.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/osmnx_helper/utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/base.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/business_days.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/clickhouse_writer.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/data_utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/data_wrapper.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/date_utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/df_utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/file_age_checker.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/file_utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/filepath_generator.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/log_utils.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/manifest_manager.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/parquet_saver.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/periods.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/phone_formatter.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/update_planner.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/utils/webdav_client.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import asyncio
|
3
4
|
from typing import Any, Dict, Optional, TypeVar, Union
|
4
5
|
|
5
6
|
import dask.dataframe as dd
|
@@ -104,7 +105,6 @@ class HttpBackend(BaseBackend):
|
|
104
105
|
return self.total_records, result
|
105
106
|
|
106
107
|
|
107
|
-
# ---- Main DfHelper ----
|
108
108
|
class DfHelper(ManagedResource):
|
109
109
|
_BACKEND_STRATEGIES = {
|
110
110
|
"sqlalchemy": SqlAlchemyBackend,
|
@@ -198,6 +198,37 @@ class DfHelper(ManagedResource):
|
|
198
198
|
df = df.persist() if persist else df
|
199
199
|
return df.compute() if as_pandas else df
|
200
200
|
|
201
|
+
async def load_async(
|
202
|
+
self,
|
203
|
+
*,
|
204
|
+
persist: bool = False,
|
205
|
+
as_pandas: bool = False,
|
206
|
+
prefer_native: bool = False,
|
207
|
+
**options,
|
208
|
+
):
|
209
|
+
"""
|
210
|
+
Async load that prefers native async backends when available,
|
211
|
+
otherwise runs the sync `load()` in a worker thread via asyncio.to_thread.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
persist: same as `load`
|
215
|
+
as_pandas: same as `load`
|
216
|
+
prefer_native: if True and the backend overrides `aload`, use it.
|
217
|
+
otherwise force thread offload of `load()`.
|
218
|
+
**options: forwarded to `load` / `aload`
|
219
|
+
"""
|
220
|
+
# If the backend provided an override for `aload`, use it
|
221
|
+
if prefer_native and type(self.backend_strategy).aload is not BaseBackend.aload:
|
222
|
+
return await self.aload(persist=persist, as_pandas=as_pandas, **options)
|
223
|
+
|
224
|
+
# Fall back to offloading the sync path to a thread
|
225
|
+
return await asyncio.to_thread(
|
226
|
+
self.load,
|
227
|
+
persist=persist,
|
228
|
+
as_pandas=as_pandas,
|
229
|
+
**options,
|
230
|
+
)
|
231
|
+
|
201
232
|
# ---------- dataframe post-processing ----------
|
202
233
|
def _post_process_df(self, df: dd.DataFrame) -> dd.DataFrame:
|
203
234
|
self.logger.debug("Post-processing DataFrame.")
|
@@ -240,9 +271,12 @@ class DfHelper(ManagedResource):
|
|
240
271
|
return df
|
241
272
|
|
242
273
|
# ---------- sinks ----------
|
243
|
-
def save_to_parquet(self, df: dd.DataFrame,
|
274
|
+
def save_to_parquet(self, df: dd.DataFrame, **kwargs):
|
244
275
|
fs: AbstractFileSystem = kwargs.pop("fs", self.fs)
|
245
|
-
path: str = kwargs.pop("parquet_storage_path")
|
276
|
+
path: str = kwargs.pop("parquet_storage_path", self.backend_parquet.parquet_storage_path if self.backend_parquet else None)
|
277
|
+
parquet_filename = kwargs.pop("parquet_filename" or self._backend_params.parquet_filename if self.backend_parquet else None)
|
278
|
+
if not parquet_filename:
|
279
|
+
raise ValueError("A 'parquet_filename' keyword argument must be provided.")
|
246
280
|
if not fs:
|
247
281
|
raise ValueError("A filesystem (fs) must be provided to save the parquet file.")
|
248
282
|
if not path:
|
@@ -272,7 +306,7 @@ class DfHelper(ManagedResource):
|
|
272
306
|
writer.save_to_clickhouse(df)
|
273
307
|
self.logger.debug("Save to ClickHouse completed.")
|
274
308
|
|
275
|
-
# ----------
|
309
|
+
# ---------- period loaders ----------
|
276
310
|
def load_period(self, dt_field: str, start: str, end: str, **kwargs):
|
277
311
|
final_kwargs = self._prepare_period_filters(dt_field, start, end, **kwargs)
|
278
312
|
return self.load(**final_kwargs)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
import asyncio
|
2
|
+
import dask.dataframe as dd
|
3
|
+
|
4
|
+
|
5
|
+
def is_dask_dataframe(df):
|
6
|
+
"""Check if the given object is a Dask DataFrame."""
|
7
|
+
return isinstance(df, dd.DataFrame)
|
8
|
+
|
9
|
+
async def to_thread(func, *args, **kwargs):
|
10
|
+
"""Explicit helper to keep code clear where we hop off the event loop."""
|
11
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
12
|
+
|
@@ -0,0 +1,195 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import pandas as pd
|
3
|
+
import dask.dataframe as dd
|
4
|
+
from typing import Iterable, Optional, List, Tuple, Union
|
5
|
+
import fsspec
|
6
|
+
|
7
|
+
DNFFilter = List[List[Tuple[str, str, Union[str, int]]]]
|
8
|
+
|
9
|
+
|
10
|
+
class HiveDatePartitionedStore:
|
11
|
+
"""
|
12
|
+
Dask-only Parquet store with Hive-style yyyy=…/mm=…/dd=… partitions.
|
13
|
+
|
14
|
+
- `write(...)` safely "overwrites" S3 prefixes via per-object deletes (no bulk DeleteObjects).
|
15
|
+
- `read_range(...)` builds DNF filters and auto-matches partition types (string vs int).
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
path: str,
|
21
|
+
*,
|
22
|
+
filesystem=None, # fsspec filesystem or None to infer from path
|
23
|
+
date_col: str = "tracking_dt",
|
24
|
+
compression: str = "zstd",
|
25
|
+
partition_values_as_strings: bool = True, # keep mm=07, dd=01 folder names
|
26
|
+
logger=None,
|
27
|
+
) -> None:
|
28
|
+
self.path = path
|
29
|
+
self.fs = filesystem or fsspec.open(path).fs
|
30
|
+
self.date_col = date_col
|
31
|
+
self.compression = compression
|
32
|
+
self.partition_values_as_strings = partition_values_as_strings
|
33
|
+
self.log = logger
|
34
|
+
|
35
|
+
# ----------------- public API -----------------
|
36
|
+
|
37
|
+
def write(
|
38
|
+
self,
|
39
|
+
df: dd.DataFrame,
|
40
|
+
*,
|
41
|
+
repartition: Optional[int] = None,
|
42
|
+
overwrite: bool = False,
|
43
|
+
) -> None:
|
44
|
+
"""Write Dask DataFrame to Hive-style yyyy/mm/dd partitions."""
|
45
|
+
self._require_col(df, self.date_col)
|
46
|
+
ser = dd.to_datetime(df[self.date_col], errors="coerce")
|
47
|
+
|
48
|
+
if self.partition_values_as_strings:
|
49
|
+
parts = {
|
50
|
+
"yyyy": ser.dt.strftime("%Y"),
|
51
|
+
"mm": ser.dt.strftime("%m"),
|
52
|
+
"dd": ser.dt.strftime("%d"),
|
53
|
+
}
|
54
|
+
else:
|
55
|
+
parts = {
|
56
|
+
"yyyy": ser.dt.year.astype("int32"),
|
57
|
+
"mm": ser.dt.month.astype("int8"),
|
58
|
+
"dd": ser.dt.day.astype("int8"),
|
59
|
+
}
|
60
|
+
|
61
|
+
df = df.assign(**{self.date_col: ser}, **parts)
|
62
|
+
|
63
|
+
if repartition:
|
64
|
+
df = df.repartition(npartitions=repartition)
|
65
|
+
|
66
|
+
if overwrite:
|
67
|
+
self._safe_rm_prefix(self.path)
|
68
|
+
|
69
|
+
if self.log:
|
70
|
+
self.log.info(f"Writing parquet to {self.path} (hive yyyy/mm/dd)…")
|
71
|
+
|
72
|
+
df.to_parquet(
|
73
|
+
self.path,
|
74
|
+
engine="pyarrow",
|
75
|
+
write_index=False,
|
76
|
+
filesystem=self.fs,
|
77
|
+
partition_on=["yyyy", "mm", "dd"],
|
78
|
+
compression=self.compression,
|
79
|
+
overwrite=False, # we pre-cleaned if overwrite=True
|
80
|
+
)
|
81
|
+
|
82
|
+
def read_range(
|
83
|
+
self,
|
84
|
+
start: Union[str, pd.Timestamp],
|
85
|
+
end: Union[str, pd.Timestamp],
|
86
|
+
*,
|
87
|
+
columns: Optional[Iterable[str]] = None,
|
88
|
+
) -> dd.DataFrame:
|
89
|
+
"""
|
90
|
+
Read a date window with partition pruning. Tries string filters first,
|
91
|
+
falls back to integer filters if Arrow infers partition types as ints.
|
92
|
+
"""
|
93
|
+
str_filters = self._dnf_filters_for_range_str(start, end)
|
94
|
+
try:
|
95
|
+
return dd.read_parquet(
|
96
|
+
self.path,
|
97
|
+
engine="pyarrow",
|
98
|
+
filesystem=self.fs,
|
99
|
+
columns=list(columns) if columns else None,
|
100
|
+
filters=str_filters,
|
101
|
+
)
|
102
|
+
except Exception:
|
103
|
+
int_filters = self._dnf_filters_for_range_int(start, end)
|
104
|
+
return dd.read_parquet(
|
105
|
+
self.path,
|
106
|
+
engine="pyarrow",
|
107
|
+
filesystem=self.fs,
|
108
|
+
columns=list(columns) if columns else None,
|
109
|
+
filters=int_filters,
|
110
|
+
)
|
111
|
+
|
112
|
+
# Convenience: full month / single day
|
113
|
+
def read_month(self, year: int, month: int, *, columns=None) -> dd.DataFrame:
|
114
|
+
start = pd.Timestamp(year=year, month=month, day=1)
|
115
|
+
end = (start + pd.offsets.MonthEnd(0))
|
116
|
+
return self.read_range(start, end, columns=columns)
|
117
|
+
|
118
|
+
def read_day(self, year: int, month: int, day: int, *, columns=None) -> dd.DataFrame:
|
119
|
+
ts = pd.Timestamp(year=year, month=month, day=day)
|
120
|
+
return self.read_range(ts, ts, columns=columns)
|
121
|
+
|
122
|
+
# ----------------- internals -----------------
|
123
|
+
|
124
|
+
@staticmethod
|
125
|
+
def _pad2(n: int) -> str:
|
126
|
+
return f"{n:02d}"
|
127
|
+
|
128
|
+
def _safe_rm_prefix(self, path: str) -> None:
|
129
|
+
"""Per-object delete to avoid S3 bulk DeleteObjects (and Content-MD5 issues)."""
|
130
|
+
if not self.fs.exists(path):
|
131
|
+
return
|
132
|
+
if self.log:
|
133
|
+
self.log.info(f"Cleaning prefix (safe delete): {path}")
|
134
|
+
for k in self.fs.find(path):
|
135
|
+
try:
|
136
|
+
(self.fs.rm_file(k) if hasattr(self.fs, "rm_file") else self.fs.rm(k, recursive=False))
|
137
|
+
except Exception as e:
|
138
|
+
if self.log:
|
139
|
+
self.log.warning(f"Could not delete {k}: {e}")
|
140
|
+
|
141
|
+
@staticmethod
|
142
|
+
def _require_col(df: dd.DataFrame, col: str) -> None:
|
143
|
+
if col not in df.columns:
|
144
|
+
raise KeyError(f"'{col}' not in DataFrame")
|
145
|
+
|
146
|
+
# ---- DNF builders (string vs int) ----
|
147
|
+
def _dnf_filters_for_range_str(self, start, end) -> DNFFilter:
|
148
|
+
s, e = pd.Timestamp(start), pd.Timestamp(end)
|
149
|
+
if s > e:
|
150
|
+
raise ValueError("start > end")
|
151
|
+
sY, sM, sD = s.year, s.month, s.day
|
152
|
+
eY, eM, eD = e.year, e.month, e.day
|
153
|
+
p2 = self._pad2
|
154
|
+
if sY == eY and sM == eM:
|
155
|
+
return [[("yyyy","==",str(sY)),("mm","==",p2(sM)),("dd",">=",p2(sD)),("dd","<=",p2(eD))]]
|
156
|
+
clauses: DNFFilter = [
|
157
|
+
[("yyyy","==",str(sY)),("mm","==",p2(sM)),("dd",">=",p2(sD))],
|
158
|
+
[("yyyy","==",str(eY)),("mm","==",p2(eM)),("dd","<=",p2(eD))]
|
159
|
+
]
|
160
|
+
if sY == eY:
|
161
|
+
for m in range(sM+1, eM):
|
162
|
+
clauses.append([("yyyy","==",str(sY)),("mm","==",p2(m))])
|
163
|
+
return clauses
|
164
|
+
for m in range(sM+1, 13):
|
165
|
+
clauses.append([("yyyy","==",str(sY)),("mm","==",p2(m))])
|
166
|
+
for y in range(sY+1, eY):
|
167
|
+
clauses.append([("yyyy","==",str(y))])
|
168
|
+
for m in range(1, eM):
|
169
|
+
clauses.append([("yyyy","==",str(eY)),("mm","==",p2(m))])
|
170
|
+
return clauses
|
171
|
+
|
172
|
+
@staticmethod
|
173
|
+
def _dnf_filters_for_range_int(start, end) -> DNFFilter:
|
174
|
+
s, e = pd.Timestamp(start), pd.Timestamp(end)
|
175
|
+
if s > e:
|
176
|
+
raise ValueError("start > end")
|
177
|
+
sY, sM, sD = s.year, s.month, s.day
|
178
|
+
eY, eM, eD = e.year, e.month, e.day
|
179
|
+
if sY == eY and sM == eM:
|
180
|
+
return [[("yyyy","==",sY),("mm","==",sM),("dd",">=",sD),("dd","<=",eD)]]
|
181
|
+
clauses: DNFFilter = [
|
182
|
+
[("yyyy","==",sY),("mm","==",sM),("dd",">=",sD)],
|
183
|
+
[("yyyy","==",eY),("mm","==",eM),("dd","<=",eD)],
|
184
|
+
]
|
185
|
+
if sY == eY:
|
186
|
+
for m in range(sM+1, eM):
|
187
|
+
clauses.append([("yyyy","==",sY),("mm","==",m)])
|
188
|
+
return clauses
|
189
|
+
for m in range(sM+1, 13):
|
190
|
+
clauses.append([("yyyy","==",sY),("mm","==",m)])
|
191
|
+
for y in range(sY+1, eY):
|
192
|
+
clauses.append([("yyyy","==",y)])
|
193
|
+
for m in range(1, eM):
|
194
|
+
clauses.append([("yyyy","==",eY),("mm","==",m)])
|
195
|
+
return clauses
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/parquet/_parquet_options.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.8.6 → sibi_dst-2025.8.7}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|