sibi-dst 2025.9.8__py3-none-any.whl → 2025.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ from .base_attacher import make_attacher
4
4
  from .base_parquet_reader import BaseParquetReader
5
5
  from .hybrid_data_loader import HybridDataLoader
6
6
  from .base_pipeline import BasePipeline
7
+ from .base_pipeline_template import PipelineTemplate
7
8
 
8
9
  __all__ = [
9
10
  "BaseDataCube",
@@ -12,5 +13,6 @@ __all__ = [
12
13
  "BaseParquetReader",
13
14
  "HybridDataLoader",
14
15
  "BasePipeline",
16
+ "PipelineTemplate",
15
17
  ]
16
18
 
@@ -1,25 +1,70 @@
1
1
  from typing import Any, Awaitable, Callable, Sequence, Type
2
2
 
3
- def make_attacher(
4
- cube_cls: Type,
5
- fieldnames: Sequence[str],
6
- column_names: Sequence[str],
7
- ) -> Callable[..., Awaitable[Any]]:
3
+
4
+ class AttachmentMaker:
8
5
  """
9
- Factory for async attachers.
6
+ Async attacher class.
10
7
  Skips work if any param value is falsy ([], None, {}, etc.).
11
8
  """
12
9
 
13
- async def attach(*, logger=None, debug: bool = False, **params: Any):
10
+ def __init__(
11
+ self,
12
+ cube_cls: Type,
13
+ fieldnames: Sequence[str],
14
+ column_names: Sequence[str],
15
+ ):
16
+ self.cube_cls = cube_cls
17
+ self.fieldnames = tuple(fieldnames)
18
+ self.column_names = list(column_names)
19
+
20
+ async def attach(self, *, logger=None, debug: bool = False, **params: Any):
14
21
  if any(not v for v in params.values()):
15
22
  return None
16
23
  call_params = {
17
- "fieldnames": tuple(fieldnames),
18
- "column_names": list(column_names),
24
+ "fieldnames": self.fieldnames,
25
+ "column_names": self.column_names,
19
26
  **params,
20
27
  }
21
- return await cube_cls(logger=logger, debug=debug).aload(**call_params)
28
+ return await self.cube_cls(logger=logger, debug=debug).aload(**call_params)
29
+
30
+
31
+ # Factory function for backward compatibility
32
+ def make_attacher(
33
+ cube_cls: Type,
34
+ fieldnames: Sequence[str],
35
+ column_names: Sequence[str],
36
+ ) -> Callable[..., Awaitable[Any]]:
37
+ """
38
+ Factory for async attachers.
39
+ Skips work if any param value is falsy ([], None, {}, etc.).
40
+ """
41
+ attacher = AttachmentMaker(cube_cls, fieldnames, column_names)
42
+ return attacher.attach
22
43
 
23
- return attach
24
44
 
25
- __all__ = ['make_attacher']
45
+ __all__ = ['AttachmentMaker', 'make_attacher']
46
+ # from typing import Any, Awaitable, Callable, Sequence, Type
47
+ #
48
+ # def make_attacher(
49
+ # cube_cls: Type,
50
+ # fieldnames: Sequence[str],
51
+ # column_names: Sequence[str],
52
+ # ) -> Callable[..., Awaitable[Any]]:
53
+ # """
54
+ # Factory for async attachers.
55
+ # Skips work if any param value is falsy ([], None, {}, etc.).
56
+ # """
57
+ #
58
+ # async def attach(*, logger=None, debug: bool = False, **params: Any):
59
+ # if any(not v for v in params.values()):
60
+ # return None
61
+ # call_params = {
62
+ # "fieldnames": tuple(fieldnames),
63
+ # "column_names": list(column_names),
64
+ # **params,
65
+ # }
66
+ # return await cube_cls(logger=logger, debug=debug).aload(**call_params)
67
+ #
68
+ # return attach
69
+ #
70
+ # __all__ = ['make_attacher']
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+ from sibi_dst.utils.boilerplate import BasePipeline
6
+
7
+
8
+ class PipelineTemplate:
9
+ """
10
+ A reusable base class for executing product-related pipelines end-to-end.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ start_date: str,
16
+ end_date: str,
17
+ fs_instance,
18
+ storage_path: str,
19
+ dataset_cls,
20
+ filename: str,
21
+ date_field: str = "last_activity_dt",
22
+ **kwargs
23
+ ):
24
+ self.start_date = start_date
25
+ self.end_date = end_date
26
+ self.max_workers = kwargs.pop('max_workers', 4)
27
+ self.fs = fs_instance
28
+ self.storage_path = storage_path
29
+
30
+ self.pipeline = BasePipeline(
31
+ start_date=self.start_date,
32
+ end_date=self.end_date,
33
+ dataset_cls=dataset_cls,
34
+ parquet_storage_path=self.storage_path,
35
+ fs=self.fs,
36
+ filename=filename,
37
+ date_field=date_field,
38
+ max_workers=self.max_workers,
39
+ )
40
+
41
+ async def to_parquet(self, **kwargs) -> pd.DataFrame:
42
+ await self.pipeline.to_parquet(**kwargs)
43
+ df = await self.pipeline.from_parquet(**kwargs)
44
+ return df
45
+
46
+ async def from_parquet(self, **kwargs) -> pd.DataFrame:
47
+ df = await self.pipeline.from_parquet(**kwargs)
48
+ return df
49
+
50
+ async def to_clickhouse(self, clickhouse_conf, **kwargs) -> None:
51
+ cnf = clickhouse_conf.copy()
52
+ cnf["table"] = self.pipeline.filename
53
+ cnf["overwrite"] = True
54
+ await self.pipeline.to_clickhouse(cnf, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.9.8
3
+ Version: 2025.9.9
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -40,12 +40,13 @@ sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUH
40
40
  sibi_dst/utils/__init__.py,sha256=vShNCOMPw8KKwlb4tq5XGrpjqakJ_OE8YDc_xDAWAxI,1302
41
41
  sibi_dst/utils/async_utils.py,sha256=53aywfgq1Q6-0OVr9qR1Sf6g7Qv3I9qunAAR4fjFXBE,351
42
42
  sibi_dst/utils/base.py,sha256=sFngliI7Ku8bZMz0YdVhppuaPNZ0dvqRwCsPe9XdF1A,16256
43
- sibi_dst/utils/boilerplate/__init__.py,sha256=Zi4jHfYm_fGsXwG6TVxUUPjWQMYgZS-HsGcva7QxosU,430
44
- sibi_dst/utils/boilerplate/base_attacher.py,sha256=JRAyvfljQjKVD5BJDDd09cBY9pGPIe8LQp0aUv_xJs0,736
43
+ sibi_dst/utils/boilerplate/__init__.py,sha256=St0xyt-PMgzYKquKU9QTUyMRrJU2Nf-tpFLHuJj87H8,507
44
+ sibi_dst/utils/boilerplate/base_attacher.py,sha256=iZftWNUx8y370OJP_kGCs5v3t2RgPuARIK_jQeFfbAU,2089
45
45
  sibi_dst/utils/boilerplate/base_data_cube.py,sha256=ErKTM2kT8LsSXADcyYvT436O_Mp0J2hm8xs1IUircb4,2760
46
46
  sibi_dst/utils/boilerplate/base_parquet_artifact.py,sha256=oqPbjHFfChA9j1WL-eDAh7XLA3zmf-Rq7s_kzITVniA,3753
47
47
  sibi_dst/utils/boilerplate/base_parquet_reader.py,sha256=3kN9_bbxyX-WuJLMBWejeApW2V_BDArSljhSUOAOhVU,692
48
48
  sibi_dst/utils/boilerplate/base_pipeline.py,sha256=R9_mMEn8gCtfTS7c3DyzWMf_oQjCSL_O7CR8z_t3nmc,6323
49
+ sibi_dst/utils/boilerplate/base_pipeline_template.py,sha256=D5HFA4odsR2wlTY6iLg1tm57Tsh91QkoYjjX8eUgrjU,1574
49
50
  sibi_dst/utils/boilerplate/hybrid_data_loader.py,sha256=Tazn7QL3FmWKVMXxzkvxPrG_2ucsPHvSotIW9dBLoNc,6018
50
51
  sibi_dst/utils/business_days.py,sha256=dP0Xj4FhTBIvZZrZYLOHZl5zOpDAgWkD4p_1a7BOT7I,8461
51
52
  sibi_dst/utils/clickhouse_writer.py,sha256=IQJ_rgd7VuF-g-aPbo9TfqZi0EB_3evCFTzcCNHSmpw,16969
@@ -94,6 +95,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
94
95
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
95
96
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
96
97
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
97
- sibi_dst-2025.9.8.dist-info/METADATA,sha256=rQ9QLcSm_bvFK2KOgi1ZmIgVZMwixMWvXT9SNmBU6fg,2710
98
- sibi_dst-2025.9.8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
99
- sibi_dst-2025.9.8.dist-info/RECORD,,
98
+ sibi_dst-2025.9.9.dist-info/METADATA,sha256=aAHhqJxfHiRrDTTAGrOQ3WbRDPMmLV0I4k2OTDRu5s4,2710
99
+ sibi_dst-2025.9.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
100
+ sibi_dst-2025.9.9.dist-info/RECORD,,