sibi-dst 2025.8.9__py3-none-any.whl → 2025.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,11 @@
1
- from .base_data_artifact import BaseDataArtifact
1
+ from .base_parquet_artifact import BaseParquetArtifact
2
2
  from .base_data_cube import BaseDataCube
3
+ from .base_attacher import make_attacher
4
+ from .base_parquet_reader import BaseParquetReader
5
+ __all__ = [
6
+ "BaseDataCube",
7
+ "BaseParquetArtifact",
8
+ "make_attacher",
9
+ "BaseParquetReader"
10
+ ]
3
11
 
4
- __all__ = ["BaseDataCube",
5
- "BaseDataArtifact"
6
- ]
@@ -0,0 +1,25 @@
1
+ from typing import Any, Awaitable, Callable, Sequence, Type
2
+
3
+ def make_attacher(
4
+ cube_cls: Type,
5
+ fieldnames: Sequence[str],
6
+ column_names: Sequence[str],
7
+ ) -> Callable[..., Awaitable[Any]]:
8
+ """
9
+ Factory for async attachers.
10
+ Skips work if any param value is falsy ([], None, {}, etc.).
11
+ """
12
+
13
+ async def attach(*, logger=None, debug: bool = False, **params: Any):
14
+ if any(not v for v in params.values()):
15
+ return None
16
+ call_params = {
17
+ "fieldnames": tuple(fieldnames),
18
+ "column_names": list(column_names),
19
+ **params,
20
+ }
21
+ return await cube_cls(logger=logger, debug=debug).aload(**call_params)
22
+
23
+ return attach
24
+
25
+ __all__ = ['make_attacher']
@@ -30,7 +30,7 @@ def _validate_and_format_date(name: str, value: DateLike) -> Optional[str]:
30
30
  raise TypeError(f"{name} must be str, date, datetime, or None; got {type(value)}")
31
31
 
32
32
 
33
- class BaseDataArtifact(ParquetArtifact):
33
+ class BaseParquetArtifact(ParquetArtifact):
34
34
  """
35
35
  Base class for Parquet artifacts with optional date window.
36
36
 
@@ -0,0 +1,21 @@
1
+ from sibi_dst.df_helper import ParquetReader
2
+
3
+ class BaseParquetReader(ParquetReader):
4
+ """
5
+ Base class for Parquet readers that merges configuration parameters and handles
6
+ debug and logger initialization.
7
+ """
8
+ config = {
9
+ 'backend': 'parquet'
10
+ }
11
+ def __init__(self, parquet_start_date, parquet_end_date, **kwargs):
12
+ # Merge the class-level config with any additional keyword arguments,
13
+ # and include debug and logger.
14
+ kwargs = {**self.config,**kwargs}
15
+ super().__init__(
16
+ parquet_start_date=parquet_start_date,
17
+ parquet_end_date=parquet_end_date,
18
+ **kwargs
19
+ )
20
+
21
+ __all__ = ['BaseParquetReader']
@@ -10,6 +10,14 @@ import clickhouse_connect
10
10
 
11
11
  from . import ManagedResource
12
12
 
13
+ def _to_bool(val: Any) -> bool:
14
+ if isinstance(val, bool):
15
+ return val
16
+ if isinstance(val, (int, float)):
17
+ return bool(val)
18
+ if isinstance(val, str):
19
+ return val.strip().lower() in ("1", "true", "yes", "on")
20
+ return False
13
21
 
14
22
  class ClickHouseWriter(ManagedResource):
15
23
  """
@@ -47,6 +55,11 @@ class ClickHouseWriter(ManagedResource):
47
55
  database: str = "sibi_data",
48
56
  user: str = "default",
49
57
  password: str = "",
58
+ secure: bool = False,
59
+ verify: bool = False,
60
+ ca_cert: str = "",
61
+ client_cert: str = "",
62
+ compression: str = "",
50
63
  table: str = "test_sibi_table",
51
64
  order_by: str = "id",
52
65
  engine: Optional[str] = None, # e.g. "ENGINE MergeTree ORDER BY (`id`)"
@@ -61,6 +74,11 @@ class ClickHouseWriter(ManagedResource):
61
74
  self.database = database
62
75
  self.user = user
63
76
  self.password = password
77
+ self.secure = _to_bool(secure)
78
+ self.verify = _to_bool(verify)
79
+ self.ca_cert = ca_cert
80
+ self.client_cert = client_cert
81
+ self.compression = compression # e.g. 'lz4', 'zstd',
64
82
  self.table = table
65
83
  self.order_by = order_by
66
84
  self.engine = engine # if None → default MergeTree ORDER BY
@@ -224,6 +242,7 @@ class ClickHouseWriter(ManagedResource):
224
242
  # ------------- low-level helpers -------------
225
243
 
226
244
  def _get_client(self):
245
+ print(self.secure, " ", self.verify)
227
246
  cli = getattr(self._tlocal, "client", None)
228
247
  if cli is not None:
229
248
  return cli
@@ -233,6 +252,11 @@ class ClickHouseWriter(ManagedResource):
233
252
  database=self.database,
234
253
  username=self.user, # clickhouse-connect uses 'username'
235
254
  password=self.password,
255
+ secure=self.secure,
256
+ verify=self.verify,
257
+ ca_cert=self.ca_cert or None,
258
+ client_cert=self.client_cert or None,
259
+ compression=self.compression or None,
236
260
  )
237
261
  self._tlocal.client = cli
238
262
  return cli
@@ -0,0 +1,61 @@
1
+ import asyncio
2
+ from typing import List, Any, Dict
3
+
4
+ import dask
5
+ import dask.dataframe as dd
6
+
7
+ def _to_int_safe(x) -> int:
8
+ """
9
+ Convert scalar-like to int safely.
10
+ Handles numpy scalars, pandas Series/DataFrame outputs.
11
+ """
12
+ if hasattr(x, "item"): # numpy scalar, pandas scalar
13
+ return int(x.item())
14
+ if hasattr(x, "iloc"): # Series-like
15
+ return int(x.iloc[0])
16
+ return int(x)
17
+
18
+ def dask_is_probably_empty(ddf: dd.DataFrame) -> bool:
19
+ return getattr(ddf, "npartitions", 0) == 0 or len(ddf._meta.columns) == 0
20
+
21
+
22
+ def dask_is_empty_truthful(ddf: dd.DataFrame) -> bool:
23
+ n = ddf.map_partitions(len).sum().compute()
24
+ return int(n) == 0
25
+
26
+
27
+ def dask_is_empty(ddf: dd.DataFrame, *, sample: int = 4) -> bool:
28
+ if dask_is_probably_empty(ddf):
29
+ return True
30
+
31
+ k = min(max(sample, 1), ddf.npartitions)
32
+ probes = dask.compute(*[
33
+ ddf.get_partition(i).map_partitions(len) for i in range(k)
34
+ ])
35
+
36
+ if any(_to_int_safe(n) > 0 for n in probes):
37
+ return False
38
+ if k == ddf.npartitions and all(_to_int_safe(n) == 0 for n in probes):
39
+ return True
40
+
41
+ return dask_is_empty_truthful(ddf)
42
+
43
+ class UniqueValuesExtractor:
44
+ @staticmethod
45
+ def _compute_to_list_sync(series) -> List[Any]:
46
+ """Run in a worker thread when Dask-backed."""
47
+ if hasattr(series, "compute"):
48
+ return series.compute().tolist()
49
+ return series.tolist()
50
+
51
+ async def compute_to_list(self, series) -> List[Any]:
52
+ # Offload potential Dask .compute() to a thread to avoid blocking the event loop
53
+ return await asyncio.to_thread(self._compute_to_list_sync, series)
54
+
55
+ async def extract_unique_values(self, df, *columns: str) -> Dict[str, List[Any]]:
56
+ async def one(col: str):
57
+ ser = df[col].dropna().unique()
58
+ return col, await self.compute_to_list(ser)
59
+
60
+ pairs = await asyncio.gather(*(one(c) for c in columns))
61
+ return dict(pairs)
@@ -49,6 +49,8 @@ class SSERunner:
49
49
  async def handler(request: Request): # <-- only Request
50
50
  queue: asyncio.Queue = asyncio.Queue()
51
51
  task_id = str(asyncio.get_running_loop().time()).replace(".", "")
52
+ self.logger.debug(
53
+ f"SSE {task_id}: new request client={request.client} path={request.url.path} q={dict(request.query_params)}")
52
54
 
53
55
  ctx: Dict[str, Any] = {
54
56
  "path": dict(request.path_params), # <-- pull path params here
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.8.9
3
+ Version: 2025.9.2
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -21,6 +21,7 @@ Requires-Dist: pyarrow (>=20.0.0,<21.0.0)
21
21
  Requires-Dist: pydantic (>=2.11.7,<3.0.0)
22
22
  Requires-Dist: pyiceberg[hive,s3fs] (>=0.9.1,<0.10.0)
23
23
  Requires-Dist: pymysql (>=1.1.1,<2.0.0)
24
+ Requires-Dist: pyrosm (>=0.6.2,<0.7.0)
24
25
  Requires-Dist: s3fs (>=2025.5.1,<2026.0.0)
25
26
  Requires-Dist: sqlalchemy (>=2.0.41,<3.0.0)
26
27
  Requires-Dist: sse-starlette (>=3.0.2,<4.0.0)
@@ -2,19 +2,19 @@ sibi_dst/__init__.py,sha256=D01Z2Ds4zES8uz5Zp7qOWD0EcfCllWgew7AWt2X1SQg,445
2
2
  sibi_dst/df_helper/__init__.py,sha256=CyDXtFhRnMrycktxNO8jGGkP0938QiScl56kMZS1Sf8,578
3
3
  sibi_dst/df_helper/_artifact_updater_async.py,sha256=0lUwel-IkmKewRnmMv9GtuT-P6SivkIKtgOHvKchHlc,8462
4
4
  sibi_dst/df_helper/_artifact_updater_threaded.py,sha256=M5GNZismOqMmBrcyfolP1DPv87VILQf_P18is_epn50,7238
5
- sibi_dst/df_helper/_df_helper.py,sha256=tDBpiDpYW9xn_AGPCJ_sXxuAwvfyeS5Wt79f5PCmy4w,15477
5
+ sibi_dst/df_helper/_df_helper.py,sha256=rgVP4ggiCW6tTHmUz2UqUvLznwOtY5IyoVS3WSlg73U,17005
6
6
  sibi_dst/df_helper/_parquet_artifact.py,sha256=Lse0wlgHMEnyOfQTGD2OeT8U1ZK9aP93_42JkDk46r4,12636
7
7
  sibi_dst/df_helper/_parquet_reader.py,sha256=SKLpCeZdBEO86IRGNEp5IegE6lZtmNoXzjpGBoO-AZo,3215
8
8
  sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  sibi_dst/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
10
10
  sibi_dst/df_helper/backends/http/_http_config.py,sha256=eGPFdqZ5M3Tscqx2P93B6XoBEEzlmdt7yNg7PXUQnNQ,4726
11
11
  sibi_dst/df_helper/backends/parquet/__init__.py,sha256=0A6BGHZLwiLBmuBBaUvEHfeWTcInvy2NbymlrI_nuXE,104
12
- sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=BDJwBP8IJrtfccdt0v2ElQFUp8YSHv3lX8pRTdSNTKM,25231
12
+ sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=L0GBvPXRAL_2PpaqyGabva6B99uNYrSVPiwEYfZWsvk,25308
13
13
  sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=LjWm9B7CweTvlvFOgB90XjSe0lVLILAIYMWKPkFXFm8,265
14
14
  sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=6705rABdh0RY0JisxD7sE62m6890hMCAv_cpyHOMSvM,8729
15
15
  sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py,sha256=GQwDy2JwPUx37vpwxPM5hg4ZydilPIP824y5C_clsl0,383
16
16
  sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=Ur1V7J89nULdtvtFTr2nkKuCcIS-6tVBt5NWO87WyCc,7662
17
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=K_YRTxLqCSOPztG49X0w87tF8aRinB9b8Lnp4WmsHz0,2257
17
+ sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=urMT7f1WWieVdCYKjfzyhiEoNIgAlXcMx0rVnv2vMAk,2259
18
18
  sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py,sha256=MHk64f5WDOKHQ_L4mM8L-I-Uep_y1dczAodxA9fDJHs,6667
19
19
  sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=yc5ij1oLOZvMN_mFWFUwuOoLnOOCxSerYpbmrHlWxzE,1480
20
20
  sibi_dst/df_helper/core/__init__.py,sha256=LfmTqFh6GUZup-g95bcXgAxX7J5Hkve7ftLE_CJg_AE,409
@@ -33,18 +33,21 @@ sibi_dst/osmnx_helper/basemaps/calendar_html.py,sha256=UArt6FDgoCgoRte45Xo3IHqd-
33
33
  sibi_dst/osmnx_helper/basemaps/route_map_plotter.py,sha256=rsJidieojcqIoe0kBanZbrxcelrS6nWoAyWoQXWdPiQ,11849
34
34
  sibi_dst/osmnx_helper/basemaps/router_plotter.py,sha256=UAiijn-J-jjX4YnL0_P9SFqTadrxMx-YK4djYhqPqfQ,10941
35
35
  sibi_dst/osmnx_helper/route_path_builder.py,sha256=XJJyu4YXegAkCRjE-knyQncwXaxDVXZhalYacLcb7e0,3557
36
- sibi_dst/osmnx_helper/utils.py,sha256=HfxrmXVPq3akf68SiwncbAp7XI1ER-zp8YN_doh7YaY,20679
36
+ sibi_dst/osmnx_helper/utils.py,sha256=7-lFVhGn4rHjGz6FvpXtC2jY8UzGIVyKR3MVyEfB7nw,14407
37
37
  sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUHMLFB_PqqUq7k,3336
39
39
  sibi_dst/utils/__init__.py,sha256=vShNCOMPw8KKwlb4tq5XGrpjqakJ_OE8YDc_xDAWAxI,1302
40
40
  sibi_dst/utils/async_utils.py,sha256=53aywfgq1Q6-0OVr9qR1Sf6g7Qv3I9qunAAR4fjFXBE,351
41
- sibi_dst/utils/base.py,sha256=IyObjZ7AaE-YjVU0RLIXNCnQKWwzi5NH2I6D1KfcIyk,8716
42
- sibi_dst/utils/boilerplate/__init__.py,sha256=RYERDvBTIoaTtKLyVAmfcB4HyImqd-S8wr605iiB-TQ,161
43
- sibi_dst/utils/boilerplate/base_data_artifact.py,sha256=rlUc3iOOhuiiqXczp3nEiXCVCxDn2CVzlmSPGPD0BzA,3750
41
+ sibi_dst/utils/base.py,sha256=W501bJFjpgElPBo9Xp7SkgFj-oGPXXfFE25Br0dZqxc,25470
42
+ sibi_dst/utils/boilerplate/__init__.py,sha256=998ptGqawJl79WZA-UEeTyBhvc-ClENzXrMaCSWsrL4,295
43
+ sibi_dst/utils/boilerplate/base_attacher.py,sha256=JRAyvfljQjKVD5BJDDd09cBY9pGPIe8LQp0aUv_xJs0,736
44
44
  sibi_dst/utils/boilerplate/base_data_cube.py,sha256=ErKTM2kT8LsSXADcyYvT436O_Mp0J2hm8xs1IUircb4,2760
45
+ sibi_dst/utils/boilerplate/base_parquet_artifact.py,sha256=oqPbjHFfChA9j1WL-eDAh7XLA3zmf-Rq7s_kzITVniA,3753
46
+ sibi_dst/utils/boilerplate/base_parquet_reader.py,sha256=3kN9_bbxyX-WuJLMBWejeApW2V_BDArSljhSUOAOhVU,692
45
47
  sibi_dst/utils/business_days.py,sha256=dP0Xj4FhTBIvZZrZYLOHZl5zOpDAgWkD4p_1a7BOT7I,8461
46
- sibi_dst/utils/clickhouse_writer.py,sha256=NngJyJpx2PjUQWsX0YmwCuGdeViK77Wi3HmYqHz3jTc,9544
48
+ sibi_dst/utils/clickhouse_writer.py,sha256=XjOxPirylcYkxT3U9wu4gleZLVR1Fmir75eeBWiXrsw,10409
47
49
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
50
+ sibi_dst/utils/dask_utils.py,sha256=FURwrNqij6ptxFhI4v7yaGkyOIIyW9lSPpMfE9-kxHY,1970
48
51
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
49
52
  sibi_dst/utils/data_utils.py,sha256=7bLidEjppieNoozDFb6OuRY0W995cxg4tiGAlkGfePI,7768
50
53
  sibi_dst/utils/data_wrapper.py,sha256=090s2odlgS77mSw150V6m8-pEpD4sJ7OvjeMKNjbXxg,11604
@@ -61,7 +64,7 @@ sibi_dst/utils/periods.py,sha256=8eTGi-bToa6_a8Vwyg4fkBPryyzft9Nzy-3ToxjqC8c,143
61
64
  sibi_dst/utils/phone_formatter.py,sha256=oeM22nLjhObENrpItCNeVpkYS4pXRm5hSxdk0M4nvwU,4580
62
65
  sibi_dst/utils/progress/__init__.py,sha256=VELVxzo2cePN_-LL0veel8-F3po6tokY5MOOpu6pz1A,92
63
66
  sibi_dst/utils/progress/jobs.py,sha256=nE58ng9GPCPZhnaCDltr1tQgu3AJVqBJ1dWbGcCH4xo,3089
64
- sibi_dst/utils/progress/sse_runner.py,sha256=wx6-wGcG5Lktr5NnT_u0vTs1yGvNWOqsWfYg2xeH-YM,3569
67
+ sibi_dst/utils/progress/sse_runner.py,sha256=PySHBXcpxd_eqLqZRBU1t8Ys7Df3SM-iz5R9P_vthfE,3726
65
68
  sibi_dst/utils/storage_config.py,sha256=DLtP5jKVM0mdFdgRw6LQfRqyavMjJcCVU7GhsUCRH78,4427
66
69
  sibi_dst/utils/storage_hive.py,sha256=eZ3nq2YWLUUG-06iJubSC15cwSHEbKKdKIwoVhD_I_E,8568
67
70
  sibi_dst/utils/storage_manager.py,sha256=La1NY79bhRAmHWXp7QcXJZtbHoRboJMgoXOSXbIl1SA,6643
@@ -87,6 +90,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
87
90
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
88
91
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
89
92
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
90
- sibi_dst-2025.8.9.dist-info/METADATA,sha256=euFCqxH_OCKd1_56ino8Dg9k9FfZszwrNf-8wnJSX4s,2671
91
- sibi_dst-2025.8.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
92
- sibi_dst-2025.8.9.dist-info/RECORD,,
93
+ sibi_dst-2025.9.2.dist-info/METADATA,sha256=YhNbMyjgWVHGl4gQiTs0QdKrV35tCdT2hcis0m76UwY,2710
94
+ sibi_dst-2025.9.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
95
+ sibi_dst-2025.9.2.dist-info/RECORD,,