sibi-dst 2025.9.8__tar.gz → 2025.9.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/PKG-INFO +1 -1
  2. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/pyproject.toml +1 -1
  3. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/boilerplate/__init__.py +2 -0
  4. sibi_dst-2025.9.9/sibi_dst/utils/boilerplate/base_attacher.py +70 -0
  5. sibi_dst-2025.9.9/sibi_dst/utils/boilerplate/base_pipeline_template.py +54 -0
  6. sibi_dst-2025.9.8/sibi_dst/utils/boilerplate/base_attacher.py +0 -25
  7. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/README.md +0 -0
  8. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/__init__.py +0 -0
  9. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/__init__.py +0 -0
  10. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/_artifact_updater_async.py +0 -0
  11. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
  12. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/_df_helper.py +0 -0
  13. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
  14. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/_parquet_reader.py +0 -0
  15. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/__init__.py +0 -0
  16. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  17. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  18. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  19. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
  20. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  21. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  22. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
  23. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  24. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  25. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
  26. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  27. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/core/__init__.py +0 -0
  28. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/core/_defaults.py +0 -0
  29. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  30. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/core/_params_config.py +0 -0
  31. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/core/_query_config.py +0 -0
  32. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/df_helper/data_cleaner.py +0 -0
  33. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/geopy_helper/__init__.py +0 -0
  34. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  35. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/geopy_helper/utils.py +0 -0
  36. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/__init__.py +0 -0
  37. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  38. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  39. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  40. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
  41. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  42. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
  43. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/osmnx_helper/utils.py +0 -0
  44. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/tests/__init__.py +0 -0
  45. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/tests/test_baseclass.py +0 -0
  46. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  47. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/__init__.py +0 -0
  48. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/async_utils.py +0 -0
  49. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/base.py +0 -0
  50. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
  51. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
  52. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
  53. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/boilerplate/base_pipeline.py +0 -0
  54. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/boilerplate/hybrid_data_loader.py +0 -0
  55. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/business_days.py +0 -0
  56. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/clickhouse_writer.py +0 -0
  57. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/credentials.py +0 -0
  58. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/dask_utils.py +0 -0
  59. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/data_from_http_source.py +0 -0
  60. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/data_utils.py +0 -0
  61. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/data_wrapper.py +0 -0
  62. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/date_utils.py +0 -0
  63. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/df_utils.py +0 -0
  64. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/file_age_checker.py +0 -0
  65. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/file_utils.py +0 -0
  66. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/filepath_generator.py +0 -0
  67. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/iceberg_saver.py +0 -0
  68. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/log_utils.py +0 -0
  69. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/manifest_manager.py +0 -0
  70. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/parquet_saver.py +0 -0
  71. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/periods.py +0 -0
  72. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/phone_formatter.py +0 -0
  73. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/progress/__init__.py +0 -0
  74. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/progress/jobs.py +0 -0
  75. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/progress/sse_runner.py +0 -0
  76. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/storage_config.py +0 -0
  77. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/storage_hive.py +0 -0
  78. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/storage_manager.py +0 -0
  79. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/update_planner.py +0 -0
  80. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/webdav_client.py +0 -0
  81. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/utils/write_gatekeeper.py +0 -0
  82. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/__init__.py +0 -0
  83. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/__init__.py +0 -0
  84. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  85. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  86. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  87. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  88. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  89. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  90. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  91. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  92. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  93. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  94. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  95. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  96. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  97. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  98. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  99. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  100. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/utils/__init__.py +0 -0
  101. {sibi_dst-2025.9.8 → sibi_dst-2025.9.9}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.9.8
3
+ Version: 2025.9.9
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "2025.9.8"
3
+ version = "2025.9.9"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -4,6 +4,7 @@ from .base_attacher import make_attacher
4
4
  from .base_parquet_reader import BaseParquetReader
5
5
  from .hybrid_data_loader import HybridDataLoader
6
6
  from .base_pipeline import BasePipeline
7
+ from .base_pipeline_template import PipelineTemplate
7
8
 
8
9
  __all__ = [
9
10
  "BaseDataCube",
@@ -12,5 +13,6 @@ __all__ = [
12
13
  "BaseParquetReader",
13
14
  "HybridDataLoader",
14
15
  "BasePipeline",
16
+ "PipelineTemplate",
15
17
  ]
16
18
 
@@ -0,0 +1,70 @@
1
+ from typing import Any, Awaitable, Callable, Sequence, Type
2
+
3
+
4
+ class AttachmentMaker:
5
+ """
6
+ Async attacher class.
7
+ Skips work if any param value is falsy ([], None, {}, etc.).
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ cube_cls: Type,
13
+ fieldnames: Sequence[str],
14
+ column_names: Sequence[str],
15
+ ):
16
+ self.cube_cls = cube_cls
17
+ self.fieldnames = tuple(fieldnames)
18
+ self.column_names = list(column_names)
19
+
20
+ async def attach(self, *, logger=None, debug: bool = False, **params: Any):
21
+ if any(not v for v in params.values()):
22
+ return None
23
+ call_params = {
24
+ "fieldnames": self.fieldnames,
25
+ "column_names": self.column_names,
26
+ **params,
27
+ }
28
+ return await self.cube_cls(logger=logger, debug=debug).aload(**call_params)
29
+
30
+
31
+ # Factory function for backward compatibility
32
+ def make_attacher(
33
+ cube_cls: Type,
34
+ fieldnames: Sequence[str],
35
+ column_names: Sequence[str],
36
+ ) -> Callable[..., Awaitable[Any]]:
37
+ """
38
+ Factory for async attachers.
39
+ Skips work if any param value is falsy ([], None, {}, etc.).
40
+ """
41
+ attacher = AttachmentMaker(cube_cls, fieldnames, column_names)
42
+ return attacher.attach
43
+
44
+
45
+ __all__ = ['AttachmentMaker', 'make_attacher']
46
+ # from typing import Any, Awaitable, Callable, Sequence, Type
47
+ #
48
+ # def make_attacher(
49
+ # cube_cls: Type,
50
+ # fieldnames: Sequence[str],
51
+ # column_names: Sequence[str],
52
+ # ) -> Callable[..., Awaitable[Any]]:
53
+ # """
54
+ # Factory for async attachers.
55
+ # Skips work if any param value is falsy ([], None, {}, etc.).
56
+ # """
57
+ #
58
+ # async def attach(*, logger=None, debug: bool = False, **params: Any):
59
+ # if any(not v for v in params.values()):
60
+ # return None
61
+ # call_params = {
62
+ # "fieldnames": tuple(fieldnames),
63
+ # "column_names": list(column_names),
64
+ # **params,
65
+ # }
66
+ # return await cube_cls(logger=logger, debug=debug).aload(**call_params)
67
+ #
68
+ # return attach
69
+ #
70
+ # __all__ = ['make_attacher']
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+ from sibi_dst.utils.boilerplate import BasePipeline
6
+
7
+
8
+ class PipelineTemplate:
9
+ """
10
+ A reusable base class for executing product-related pipelines end-to-end.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ start_date: str,
16
+ end_date: str,
17
+ fs_instance,
18
+ storage_path: str,
19
+ dataset_cls,
20
+ filename: str,
21
+ date_field: str = "last_activity_dt",
22
+ **kwargs
23
+ ):
24
+ self.start_date = start_date
25
+ self.end_date = end_date
26
+ self.max_workers = kwargs.pop('max_workers', 4)
27
+ self.fs = fs_instance
28
+ self.storage_path = storage_path
29
+
30
+ self.pipeline = BasePipeline(
31
+ start_date=self.start_date,
32
+ end_date=self.end_date,
33
+ dataset_cls=dataset_cls,
34
+ parquet_storage_path=self.storage_path,
35
+ fs=self.fs,
36
+ filename=filename,
37
+ date_field=date_field,
38
+ max_workers=self.max_workers,
39
+ )
40
+
41
+ async def to_parquet(self, **kwargs) -> pd.DataFrame:
42
+ await self.pipeline.to_parquet(**kwargs)
43
+ df = await self.pipeline.from_parquet(**kwargs)
44
+ return df
45
+
46
+ async def from_parquet(self, **kwargs) -> pd.DataFrame:
47
+ df = await self.pipeline.from_parquet(**kwargs)
48
+ return df
49
+
50
+ async def to_clickhouse(self, clickhouse_conf, **kwargs) -> None:
51
+ cnf = clickhouse_conf.copy()
52
+ cnf["table"] = self.pipeline.filename
53
+ cnf["overwrite"] = True
54
+ await self.pipeline.to_clickhouse(cnf, **kwargs)
@@ -1,25 +0,0 @@
1
- from typing import Any, Awaitable, Callable, Sequence, Type
2
-
3
- def make_attacher(
4
- cube_cls: Type,
5
- fieldnames: Sequence[str],
6
- column_names: Sequence[str],
7
- ) -> Callable[..., Awaitable[Any]]:
8
- """
9
- Factory for async attachers.
10
- Skips work if any param value is falsy ([], None, {}, etc.).
11
- """
12
-
13
- async def attach(*, logger=None, debug: bool = False, **params: Any):
14
- if any(not v for v in params.values()):
15
- return None
16
- call_params = {
17
- "fieldnames": tuple(fieldnames),
18
- "column_names": list(column_names),
19
- **params,
20
- }
21
- return await cube_cls(logger=logger, debug=debug).aload(**call_params)
22
-
23
- return attach
24
-
25
- __all__ = ['make_attacher']
File without changes