sibi-dst 2025.9.8__py3-none-any.whl → 2025.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/utils/boilerplate/__init__.py +2 -0
- sibi_dst/utils/boilerplate/base_attacher.py +57 -12
- sibi_dst/utils/boilerplate/base_pipeline_template.py +54 -0
- {sibi_dst-2025.9.8.dist-info → sibi_dst-2025.9.9.dist-info}/METADATA +1 -1
- {sibi_dst-2025.9.8.dist-info → sibi_dst-2025.9.9.dist-info}/RECORD +6 -5
- {sibi_dst-2025.9.8.dist-info → sibi_dst-2025.9.9.dist-info}/WHEEL +0 -0
@@ -4,6 +4,7 @@ from .base_attacher import make_attacher
|
|
4
4
|
from .base_parquet_reader import BaseParquetReader
|
5
5
|
from .hybrid_data_loader import HybridDataLoader
|
6
6
|
from .base_pipeline import BasePipeline
|
7
|
+
from .base_pipeline_template import PipelineTemplate
|
7
8
|
|
8
9
|
__all__ = [
|
9
10
|
"BaseDataCube",
|
@@ -12,5 +13,6 @@ __all__ = [
|
|
12
13
|
"BaseParquetReader",
|
13
14
|
"HybridDataLoader",
|
14
15
|
"BasePipeline",
|
16
|
+
"PipelineTemplate",
|
15
17
|
]
|
16
18
|
|
@@ -1,25 +1,70 @@
|
|
1
1
|
from typing import Any, Awaitable, Callable, Sequence, Type
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
fieldnames: Sequence[str],
|
6
|
-
column_names: Sequence[str],
|
7
|
-
) -> Callable[..., Awaitable[Any]]:
|
3
|
+
|
4
|
+
class AttachmentMaker:
|
8
5
|
"""
|
9
|
-
|
6
|
+
Async attacher class.
|
10
7
|
Skips work if any param value is falsy ([], None, {}, etc.).
|
11
8
|
"""
|
12
9
|
|
13
|
-
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
cube_cls: Type,
|
13
|
+
fieldnames: Sequence[str],
|
14
|
+
column_names: Sequence[str],
|
15
|
+
):
|
16
|
+
self.cube_cls = cube_cls
|
17
|
+
self.fieldnames = tuple(fieldnames)
|
18
|
+
self.column_names = list(column_names)
|
19
|
+
|
20
|
+
async def attach(self, *, logger=None, debug: bool = False, **params: Any):
|
14
21
|
if any(not v for v in params.values()):
|
15
22
|
return None
|
16
23
|
call_params = {
|
17
|
-
"fieldnames":
|
18
|
-
"column_names":
|
24
|
+
"fieldnames": self.fieldnames,
|
25
|
+
"column_names": self.column_names,
|
19
26
|
**params,
|
20
27
|
}
|
21
|
-
return await cube_cls(logger=logger, debug=debug).aload(**call_params)
|
28
|
+
return await self.cube_cls(logger=logger, debug=debug).aload(**call_params)
|
29
|
+
|
30
|
+
|
31
|
+
# Factory function for backward compatibility
|
32
|
+
def make_attacher(
|
33
|
+
cube_cls: Type,
|
34
|
+
fieldnames: Sequence[str],
|
35
|
+
column_names: Sequence[str],
|
36
|
+
) -> Callable[..., Awaitable[Any]]:
|
37
|
+
"""
|
38
|
+
Factory for async attachers.
|
39
|
+
Skips work if any param value is falsy ([], None, {}, etc.).
|
40
|
+
"""
|
41
|
+
attacher = AttachmentMaker(cube_cls, fieldnames, column_names)
|
42
|
+
return attacher.attach
|
22
43
|
|
23
|
-
return attach
|
24
44
|
|
25
|
-
__all__ = ['make_attacher']
|
45
|
+
__all__ = ['AttachmentMaker', 'make_attacher']
|
46
|
+
# from typing import Any, Awaitable, Callable, Sequence, Type
|
47
|
+
#
|
48
|
+
# def make_attacher(
|
49
|
+
# cube_cls: Type,
|
50
|
+
# fieldnames: Sequence[str],
|
51
|
+
# column_names: Sequence[str],
|
52
|
+
# ) -> Callable[..., Awaitable[Any]]:
|
53
|
+
# """
|
54
|
+
# Factory for async attachers.
|
55
|
+
# Skips work if any param value is falsy ([], None, {}, etc.).
|
56
|
+
# """
|
57
|
+
#
|
58
|
+
# async def attach(*, logger=None, debug: bool = False, **params: Any):
|
59
|
+
# if any(not v for v in params.values()):
|
60
|
+
# return None
|
61
|
+
# call_params = {
|
62
|
+
# "fieldnames": tuple(fieldnames),
|
63
|
+
# "column_names": list(column_names),
|
64
|
+
# **params,
|
65
|
+
# }
|
66
|
+
# return await cube_cls(logger=logger, debug=debug).aload(**call_params)
|
67
|
+
#
|
68
|
+
# return attach
|
69
|
+
#
|
70
|
+
# __all__ = ['make_attacher']
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from sibi_dst.utils.boilerplate import BasePipeline
|
6
|
+
|
7
|
+
|
8
|
+
class PipelineTemplate:
|
9
|
+
"""
|
10
|
+
A reusable base class for executing product-related pipelines end-to-end.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def __init__(
|
14
|
+
self,
|
15
|
+
start_date: str,
|
16
|
+
end_date: str,
|
17
|
+
fs_instance,
|
18
|
+
storage_path: str,
|
19
|
+
dataset_cls,
|
20
|
+
filename: str,
|
21
|
+
date_field: str = "last_activity_dt",
|
22
|
+
**kwargs
|
23
|
+
):
|
24
|
+
self.start_date = start_date
|
25
|
+
self.end_date = end_date
|
26
|
+
self.max_workers = kwargs.pop('max_workers', 4)
|
27
|
+
self.fs = fs_instance
|
28
|
+
self.storage_path = storage_path
|
29
|
+
|
30
|
+
self.pipeline = BasePipeline(
|
31
|
+
start_date=self.start_date,
|
32
|
+
end_date=self.end_date,
|
33
|
+
dataset_cls=dataset_cls,
|
34
|
+
parquet_storage_path=self.storage_path,
|
35
|
+
fs=self.fs,
|
36
|
+
filename=filename,
|
37
|
+
date_field=date_field,
|
38
|
+
max_workers=self.max_workers,
|
39
|
+
)
|
40
|
+
|
41
|
+
async def to_parquet(self, **kwargs) -> pd.DataFrame:
|
42
|
+
await self.pipeline.to_parquet(**kwargs)
|
43
|
+
df = await self.pipeline.from_parquet(**kwargs)
|
44
|
+
return df
|
45
|
+
|
46
|
+
async def from_parquet(self, **kwargs) -> pd.DataFrame:
|
47
|
+
df = await self.pipeline.from_parquet(**kwargs)
|
48
|
+
return df
|
49
|
+
|
50
|
+
async def to_clickhouse(self, clickhouse_conf, **kwargs) -> None:
|
51
|
+
cnf = clickhouse_conf.copy()
|
52
|
+
cnf["table"] = self.pipeline.filename
|
53
|
+
cnf["overwrite"] = True
|
54
|
+
await self.pipeline.to_clickhouse(cnf, **kwargs)
|
@@ -40,12 +40,13 @@ sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUH
|
|
40
40
|
sibi_dst/utils/__init__.py,sha256=vShNCOMPw8KKwlb4tq5XGrpjqakJ_OE8YDc_xDAWAxI,1302
|
41
41
|
sibi_dst/utils/async_utils.py,sha256=53aywfgq1Q6-0OVr9qR1Sf6g7Qv3I9qunAAR4fjFXBE,351
|
42
42
|
sibi_dst/utils/base.py,sha256=sFngliI7Ku8bZMz0YdVhppuaPNZ0dvqRwCsPe9XdF1A,16256
|
43
|
-
sibi_dst/utils/boilerplate/__init__.py,sha256=
|
44
|
-
sibi_dst/utils/boilerplate/base_attacher.py,sha256=
|
43
|
+
sibi_dst/utils/boilerplate/__init__.py,sha256=St0xyt-PMgzYKquKU9QTUyMRrJU2Nf-tpFLHuJj87H8,507
|
44
|
+
sibi_dst/utils/boilerplate/base_attacher.py,sha256=iZftWNUx8y370OJP_kGCs5v3t2RgPuARIK_jQeFfbAU,2089
|
45
45
|
sibi_dst/utils/boilerplate/base_data_cube.py,sha256=ErKTM2kT8LsSXADcyYvT436O_Mp0J2hm8xs1IUircb4,2760
|
46
46
|
sibi_dst/utils/boilerplate/base_parquet_artifact.py,sha256=oqPbjHFfChA9j1WL-eDAh7XLA3zmf-Rq7s_kzITVniA,3753
|
47
47
|
sibi_dst/utils/boilerplate/base_parquet_reader.py,sha256=3kN9_bbxyX-WuJLMBWejeApW2V_BDArSljhSUOAOhVU,692
|
48
48
|
sibi_dst/utils/boilerplate/base_pipeline.py,sha256=R9_mMEn8gCtfTS7c3DyzWMf_oQjCSL_O7CR8z_t3nmc,6323
|
49
|
+
sibi_dst/utils/boilerplate/base_pipeline_template.py,sha256=D5HFA4odsR2wlTY6iLg1tm57Tsh91QkoYjjX8eUgrjU,1574
|
49
50
|
sibi_dst/utils/boilerplate/hybrid_data_loader.py,sha256=Tazn7QL3FmWKVMXxzkvxPrG_2ucsPHvSotIW9dBLoNc,6018
|
50
51
|
sibi_dst/utils/business_days.py,sha256=dP0Xj4FhTBIvZZrZYLOHZl5zOpDAgWkD4p_1a7BOT7I,8461
|
51
52
|
sibi_dst/utils/clickhouse_writer.py,sha256=IQJ_rgd7VuF-g-aPbo9TfqZi0EB_3evCFTzcCNHSmpw,16969
|
@@ -94,6 +95,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
|
|
94
95
|
sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
|
95
96
|
sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
|
96
97
|
sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
|
97
|
-
sibi_dst-2025.9.
|
98
|
-
sibi_dst-2025.9.
|
99
|
-
sibi_dst-2025.9.
|
98
|
+
sibi_dst-2025.9.9.dist-info/METADATA,sha256=aAHhqJxfHiRrDTTAGrOQ3WbRDPMmLV0I4k2OTDRu5s4,2710
|
99
|
+
sibi_dst-2025.9.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
100
|
+
sibi_dst-2025.9.9.dist-info/RECORD,,
|
File without changes
|