sibi-dst 2025.8.7__py3-none-any.whl → 2025.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/_df_helper.py +105 -89
- sibi_dst/df_helper/_parquet_artifact.py +11 -10
- sibi_dst/df_helper/_parquet_reader.py +4 -0
- sibi_dst/df_helper/backends/parquet/_parquet_options.py +504 -214
- sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +11 -10
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +9 -8
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +4 -76
- sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -104
- sibi_dst/utils/boilerplate/__init__.py +6 -0
- sibi_dst/utils/boilerplate/base_data_artifact.py +110 -0
- sibi_dst/utils/boilerplate/base_data_cube.py +79 -0
- sibi_dst/utils/data_wrapper.py +22 -263
- sibi_dst/utils/iceberg_saver.py +126 -0
- sibi_dst/utils/log_utils.py +0 -346
- sibi_dst/utils/parquet_saver.py +110 -9
- sibi_dst/utils/progress/__init__.py +5 -0
- sibi_dst/utils/progress/jobs.py +82 -0
- sibi_dst/utils/progress/sse_runner.py +82 -0
- sibi_dst/utils/storage_hive.py +38 -1
- sibi_dst/utils/update_planner.py +617 -116
- {sibi_dst-2025.8.7.dist-info → sibi_dst-2025.8.8.dist-info}/METADATA +3 -2
- {sibi_dst-2025.8.7.dist-info → sibi_dst-2025.8.8.dist-info}/RECORD +23 -16
- {sibi_dst-2025.8.7.dist-info → sibi_dst-2025.8.8.dist-info}/WHEEL +0 -0
sibi_dst/utils/storage_hive.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
|
2
5
|
import pandas as pd
|
3
6
|
import dask.dataframe as dd
|
4
7
|
from typing import Iterable, Optional, List, Tuple, Union
|
@@ -192,4 +195,38 @@ class HiveDatePartitionedStore:
|
|
192
195
|
clauses.append([("yyyy","==",y)])
|
193
196
|
for m in range(1, eM):
|
194
197
|
clauses.append([("yyyy","==",eY),("mm","==",m)])
|
195
|
-
return clauses
|
198
|
+
return clauses
|
199
|
+
|
200
|
+
async def write_async(
|
201
|
+
self,
|
202
|
+
df: dd.DataFrame,
|
203
|
+
*,
|
204
|
+
repartition: int | None = None,
|
205
|
+
overwrite: bool = False,
|
206
|
+
timeout: float | None = None,
|
207
|
+
) -> None:
|
208
|
+
async def _run():
|
209
|
+
return await asyncio.to_thread(self.write, df, repartition=repartition, overwrite=overwrite)
|
210
|
+
|
211
|
+
return await (asyncio.wait_for(_run(), timeout) if timeout else _run())
|
212
|
+
|
213
|
+
async def read_range_async(
|
214
|
+
self,
|
215
|
+
start, end, *, columns: Iterable[str] | None = None, timeout: float | None = None
|
216
|
+
) -> dd.DataFrame:
|
217
|
+
async def _run():
|
218
|
+
return await asyncio.to_thread(self.read_range, start, end, columns=columns)
|
219
|
+
|
220
|
+
return await (asyncio.wait_for(_run(), timeout) if timeout else _run())
|
221
|
+
|
222
|
+
async def read_month_async(self, year: int, month: int, *, columns=None, timeout: float | None = None):
|
223
|
+
async def _run():
|
224
|
+
return await asyncio.to_thread(self.read_month, year, month, columns=columns)
|
225
|
+
|
226
|
+
return await (asyncio.wait_for(_run(), timeout) if timeout else _run())
|
227
|
+
|
228
|
+
async def read_day_async(self, year: int, month: int, day: int, *, columns=None, timeout: float | None = None):
|
229
|
+
async def _run():
|
230
|
+
return await asyncio.to_thread(self.read_day, year, month, day, columns=columns)
|
231
|
+
|
232
|
+
return await (asyncio.wait_for(_run(), timeout) if timeout else _run())
|