sibi-dst 2025.9.6__tar.gz → 2025.9.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/PKG-INFO +1 -1
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/pyproject.toml +1 -1
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/boilerplate/__init__.py +2 -0
- sibi_dst-2025.9.8/sibi_dst/utils/boilerplate/base_pipeline.py +178 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/clickhouse_writer.py +5 -6
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/README.md +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/_artifact_updater_async.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/_parquet_reader.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/osmnx_helper/utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/tests/test_baseclass.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/async_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/base.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/boilerplate/base_attacher.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/boilerplate/hybrid_data_loader.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/business_days.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/dask_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/data_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/data_wrapper.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/date_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/df_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/file_age_checker.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/file_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/filepath_generator.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/iceberg_saver.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/log_utils.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/manifest_manager.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/parquet_saver.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/periods.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/phone_formatter.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/progress/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/progress/jobs.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/progress/sse_runner.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/storage_hive.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/update_planner.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/webdav_client.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/utils/write_gatekeeper.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -3,6 +3,7 @@ from .base_data_cube import BaseDataCube
|
|
3
3
|
from .base_attacher import make_attacher
|
4
4
|
from .base_parquet_reader import BaseParquetReader
|
5
5
|
from .hybrid_data_loader import HybridDataLoader
|
6
|
+
from .base_pipeline import BasePipeline
|
6
7
|
|
7
8
|
__all__ = [
|
8
9
|
"BaseDataCube",
|
@@ -10,5 +11,6 @@ __all__ = [
|
|
10
11
|
"make_attacher",
|
11
12
|
"BaseParquetReader",
|
12
13
|
"HybridDataLoader",
|
14
|
+
"BasePipeline",
|
13
15
|
]
|
14
16
|
|
@@ -0,0 +1,178 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
5
|
+
from typing import Type, Any, Callable, List
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
import dask.dataframe as dd
|
9
|
+
|
10
|
+
from sibi_dst.utils import ManagedResource, ParquetSaver
|
11
|
+
from sibi_dst.df_helper import ParquetReader
|
12
|
+
from sibi_dst.utils.dask_utils import dask_is_empty
|
13
|
+
|
14
|
+
|
15
|
+
class DateRangeHelper:
|
16
|
+
@staticmethod
|
17
|
+
def generate_daily_ranges(start_date: str, end_date: str, date_format: str = "%Y-%m-%d") -> List[str]:
|
18
|
+
start = pd.to_datetime(start_date)
|
19
|
+
end = pd.to_datetime(end_date)
|
20
|
+
return [d.strftime(date_format) for d in pd.date_range(start, end, freq="D")]
|
21
|
+
|
22
|
+
@staticmethod
|
23
|
+
def generate_monthly_ranges(start_date: str, end_date: str, date_format: str = "%Y-%m-%d") -> List[tuple[str, str]]:
|
24
|
+
"""
|
25
|
+
Generate (start_date, end_date) tuples for each calendar month in range.
|
26
|
+
Always includes the first and last month, even if partial.
|
27
|
+
"""
|
28
|
+
start = pd.to_datetime(start_date)
|
29
|
+
end = pd.to_datetime(end_date)
|
30
|
+
ranges = []
|
31
|
+
current = start.replace(day=1)
|
32
|
+
while current <= end:
|
33
|
+
month_end = (current + pd.offsets.MonthEnd(0)).normalize()
|
34
|
+
ranges.append((
|
35
|
+
current.strftime(date_format),
|
36
|
+
min(month_end, end).strftime(date_format)
|
37
|
+
))
|
38
|
+
current += pd.DateOffset(months=1)
|
39
|
+
return ranges
|
40
|
+
|
41
|
+
class BasePipeline(ManagedResource):
|
42
|
+
def __init__(
|
43
|
+
self,
|
44
|
+
start_date: str,
|
45
|
+
end_date: str,
|
46
|
+
dataset_cls: Type,
|
47
|
+
parquet_storage_path: str,
|
48
|
+
*,
|
49
|
+
fs: Any,
|
50
|
+
filename: str = "dataset",
|
51
|
+
date_field: str = "date",
|
52
|
+
max_workers: int = 4,
|
53
|
+
dataset_kwargs: dict = None,
|
54
|
+
**kwargs,
|
55
|
+
):
|
56
|
+
kwargs["fs"] = fs
|
57
|
+
super().__init__(**kwargs)
|
58
|
+
|
59
|
+
self.start_date = start_date
|
60
|
+
self.end_date = end_date
|
61
|
+
self.fs = fs
|
62
|
+
self.filename = filename
|
63
|
+
self.date_field = date_field
|
64
|
+
self.max_workers = max_workers
|
65
|
+
self.storage_path = parquet_storage_path.rstrip("/")
|
66
|
+
self.df: dd.DataFrame | None = None
|
67
|
+
|
68
|
+
self.ds = dataset_cls(
|
69
|
+
start_date=self.start_date,
|
70
|
+
end_date=self.end_date,
|
71
|
+
debug=self.debug,
|
72
|
+
logger=self.logger,
|
73
|
+
**(dataset_kwargs or {}),
|
74
|
+
)
|
75
|
+
|
76
|
+
def _get_storage_path_for_date(self, date: pd.Timestamp) -> str:
|
77
|
+
return f"{self.storage_path}/{date.year}/{date.month:02d}/{date.day:02d}"
|
78
|
+
|
79
|
+
def _get_output_filename(self, fmt: str = "parquet") -> str:
|
80
|
+
return f"{self.filename}.{fmt}"
|
81
|
+
|
82
|
+
async def aload(self, **kwargs) -> dd.DataFrame:
|
83
|
+
await self.emit("status", message="Loading dataset...", progress=5)
|
84
|
+
self.df = await self.ds.aload(**kwargs)
|
85
|
+
return self.df
|
86
|
+
|
87
|
+
async def to_parquet(self, **kwargs) -> None:
|
88
|
+
df = await self.aload(**kwargs)
|
89
|
+
if dask_is_empty(df):
|
90
|
+
self.logger.warning("No data to save.")
|
91
|
+
return
|
92
|
+
|
93
|
+
df[self.date_field] = dd.to_datetime(df[self.date_field], errors="coerce")
|
94
|
+
dates = DateRangeHelper.generate_daily_ranges(self.start_date, self.end_date)
|
95
|
+
|
96
|
+
tasks = []
|
97
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
98
|
+
for date_str in dates:
|
99
|
+
date_obj = pd.to_datetime(date_str).date()
|
100
|
+
df_day = df[df[self.date_field].dt.date == date_obj]
|
101
|
+
if dask_is_empty(df_day):
|
102
|
+
self.logger.info(f"No data for {date_obj}, skipping.")
|
103
|
+
continue
|
104
|
+
|
105
|
+
path = self._get_storage_path_for_date(pd.Timestamp(date_obj))
|
106
|
+
await self.emit("status", message=f"Saving data for {date_obj}")
|
107
|
+
|
108
|
+
saver = ParquetSaver(
|
109
|
+
df_result=df_day,
|
110
|
+
parquet_storage_path=path,
|
111
|
+
fs=self.fs,
|
112
|
+
debug=self.debug,
|
113
|
+
logger=self.logger,
|
114
|
+
)
|
115
|
+
|
116
|
+
tasks.append(
|
117
|
+
asyncio.get_running_loop().run_in_executor(
|
118
|
+
executor, saver.save_to_parquet, self._get_output_filename()
|
119
|
+
)
|
120
|
+
)
|
121
|
+
|
122
|
+
await asyncio.gather(*tasks)
|
123
|
+
await self.emit("complete", message="All partitions written.")
|
124
|
+
|
125
|
+
async def from_parquet(self, **kwargs) -> dd.DataFrame:
|
126
|
+
reader = ParquetReader(
|
127
|
+
parquet_start_date=self.start_date,
|
128
|
+
parquet_end_date=self.end_date,
|
129
|
+
parquet_storage_path=self.storage_path,
|
130
|
+
parquet_filename=self._get_output_filename(),
|
131
|
+
fs=self.fs,
|
132
|
+
debug=self.debug,
|
133
|
+
logger=self.logger,
|
134
|
+
)
|
135
|
+
return await reader.aload(**kwargs)
|
136
|
+
|
137
|
+
async def to_clickhouse(self, clk_conf: dict, **kwargs):
|
138
|
+
"""
|
139
|
+
Writes daily-partitioned data to ClickHouse using concurrent threads.
|
140
|
+
"""
|
141
|
+
from sibi_dst.utils import ClickHouseWriter
|
142
|
+
|
143
|
+
df = await self.from_parquet(**kwargs)
|
144
|
+
if dask_is_empty(df):
|
145
|
+
self.logger.warning("No data to write to ClickHouse.")
|
146
|
+
return
|
147
|
+
|
148
|
+
df[self.date_field] = dd.to_datetime(df[self.date_field], errors="coerce")
|
149
|
+
df = df.persist()
|
150
|
+
|
151
|
+
unique_dates = df[self.date_field].dt.date.dropna().unique().compute()
|
152
|
+
if len(unique_dates)==0:
|
153
|
+
self.logger.warning("No valid dates found for partitioning.")
|
154
|
+
return
|
155
|
+
|
156
|
+
clk = ClickHouseWriter(**clk_conf)
|
157
|
+
loop = asyncio.get_running_loop()
|
158
|
+
tasks = []
|
159
|
+
|
160
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
161
|
+
for date in unique_dates:
|
162
|
+
df_day = df[df[self.date_field].dt.date == date]
|
163
|
+
if dask_is_empty(df_day):
|
164
|
+
self.logger.info(f"[ClickHouse] No data for {date}, skipping.")
|
165
|
+
continue
|
166
|
+
|
167
|
+
self.logger.info(f"[ClickHouse] Writing {len(df_day)} rows for {date}")
|
168
|
+
|
169
|
+
tasks.append(
|
170
|
+
loop.run_in_executor(executor, clk.save_to_clickhouse, df_day)
|
171
|
+
)
|
172
|
+
|
173
|
+
await asyncio.gather(*tasks)
|
174
|
+
|
175
|
+
self.logger.info(f"ClickHouse write complete for {len(unique_dates)} daily partitions.")
|
176
|
+
|
177
|
+
|
178
|
+
__all__ = ["BasePipeline"]
|
@@ -90,10 +90,14 @@ class ClickHouseWriter(ManagedResource):
|
|
90
90
|
|
91
91
|
# one client per thread to avoid session contention
|
92
92
|
self._tlocal = threading.local()
|
93
|
+
ow = self.overwrite
|
94
|
+
if ow:
|
95
|
+
self._command(f"DROP TABLE IF EXISTS {self._ident(self.table)}")
|
96
|
+
self.logger.info(f"Dropped table {self.table} (overwrite=True)")
|
93
97
|
|
94
98
|
# ------------- public -------------
|
95
99
|
|
96
|
-
def save_to_clickhouse(self, df: dd.DataFrame
|
100
|
+
def save_to_clickhouse(self, df: dd.DataFrame) -> None:
|
97
101
|
"""
|
98
102
|
Persist a Dask DataFrame into ClickHouse.
|
99
103
|
|
@@ -118,15 +122,10 @@ class ClickHouseWriter(ManagedResource):
|
|
118
122
|
)
|
119
123
|
|
120
124
|
# (re)create table
|
121
|
-
ow = self.overwrite if overwrite is None else bool(overwrite)
|
122
125
|
dtypes = df._meta_nonempty.dtypes # metadata-only types (no compute)
|
123
126
|
schema_sql = self._generate_clickhouse_schema(dtypes)
|
124
127
|
engine_sql = self._default_engine_sql() if not self.engine else self.engine
|
125
128
|
|
126
|
-
if ow:
|
127
|
-
self._command(f"DROP TABLE IF EXISTS {self._ident(self.table)}")
|
128
|
-
self.logger.info(f"Dropped table {self.table} (overwrite=True)")
|
129
|
-
|
130
129
|
create_sql = f"CREATE TABLE IF NOT EXISTS {self._ident(self.table)} ({schema_sql}) {engine_sql}"
|
131
130
|
self._command(create_sql)
|
132
131
|
self.logger.info(f"Ensured table {self.table} exists")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/parquet/_parquet_options.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.9.6 → sibi_dst-2025.9.8}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|