sibi-dst 2025.8.9__py3-none-any.whl → 2025.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/_df_helper.py +27 -3
- sibi_dst/df_helper/backends/parquet/_parquet_options.py +2 -0
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +1 -1
- sibi_dst/osmnx_helper/utils.py +82 -214
- sibi_dst/utils/base.py +567 -122
- sibi_dst/utils/boilerplate/__init__.py +9 -4
- sibi_dst/utils/boilerplate/base_attacher.py +25 -0
- sibi_dst/utils/boilerplate/{base_data_artifact.py → base_parquet_artifact.py} +1 -1
- sibi_dst/utils/boilerplate/base_parquet_reader.py +21 -0
- sibi_dst/utils/clickhouse_writer.py +24 -0
- sibi_dst/utils/dask_utils.py +61 -0
- sibi_dst/utils/progress/sse_runner.py +2 -0
- {sibi_dst-2025.8.9.dist-info → sibi_dst-2025.9.2.dist-info}/METADATA +2 -1
- {sibi_dst-2025.8.9.dist-info → sibi_dst-2025.9.2.dist-info}/RECORD +15 -12
- {sibi_dst-2025.8.9.dist-info → sibi_dst-2025.9.2.dist-info}/WHEEL +0 -0
@@ -1,6 +1,11 @@
|
|
1
|
-
from .
|
1
|
+
from .base_parquet_artifact import BaseParquetArtifact
|
2
2
|
from .base_data_cube import BaseDataCube
|
3
|
+
from .base_attacher import make_attacher
|
4
|
+
from .base_parquet_reader import BaseParquetReader
|
5
|
+
__all__ = [
|
6
|
+
"BaseDataCube",
|
7
|
+
"BaseParquetArtifact",
|
8
|
+
"make_attacher",
|
9
|
+
"BaseParquetReader"
|
10
|
+
]
|
3
11
|
|
4
|
-
__all__ = ["BaseDataCube",
|
5
|
-
"BaseDataArtifact"
|
6
|
-
]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from typing import Any, Awaitable, Callable, Sequence, Type
|
2
|
+
|
3
|
+
def make_attacher(
|
4
|
+
cube_cls: Type,
|
5
|
+
fieldnames: Sequence[str],
|
6
|
+
column_names: Sequence[str],
|
7
|
+
) -> Callable[..., Awaitable[Any]]:
|
8
|
+
"""
|
9
|
+
Factory for async attachers.
|
10
|
+
Skips work if any param value is falsy ([], None, {}, etc.).
|
11
|
+
"""
|
12
|
+
|
13
|
+
async def attach(*, logger=None, debug: bool = False, **params: Any):
|
14
|
+
if any(not v for v in params.values()):
|
15
|
+
return None
|
16
|
+
call_params = {
|
17
|
+
"fieldnames": tuple(fieldnames),
|
18
|
+
"column_names": list(column_names),
|
19
|
+
**params,
|
20
|
+
}
|
21
|
+
return await cube_cls(logger=logger, debug=debug).aload(**call_params)
|
22
|
+
|
23
|
+
return attach
|
24
|
+
|
25
|
+
__all__ = ['make_attacher']
|
@@ -30,7 +30,7 @@ def _validate_and_format_date(name: str, value: DateLike) -> Optional[str]:
|
|
30
30
|
raise TypeError(f"{name} must be str, date, datetime, or None; got {type(value)}")
|
31
31
|
|
32
32
|
|
33
|
-
class
|
33
|
+
class BaseParquetArtifact(ParquetArtifact):
|
34
34
|
"""
|
35
35
|
Base class for Parquet artifacts with optional date window.
|
36
36
|
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from sibi_dst.df_helper import ParquetReader
|
2
|
+
|
3
|
+
class BaseParquetReader(ParquetReader):
|
4
|
+
"""
|
5
|
+
Base class for Parquet readers that merges configuration parameters and handles
|
6
|
+
debug and logger initialization.
|
7
|
+
"""
|
8
|
+
config = {
|
9
|
+
'backend': 'parquet'
|
10
|
+
}
|
11
|
+
def __init__(self, parquet_start_date, parquet_end_date, **kwargs):
|
12
|
+
# Merge the class-level config with any additional keyword arguments,
|
13
|
+
# and include debug and logger.
|
14
|
+
kwargs = {**self.config,**kwargs}
|
15
|
+
super().__init__(
|
16
|
+
parquet_start_date=parquet_start_date,
|
17
|
+
parquet_end_date=parquet_end_date,
|
18
|
+
**kwargs
|
19
|
+
)
|
20
|
+
|
21
|
+
__all__ = ['BaseParquetReader']
|
@@ -10,6 +10,14 @@ import clickhouse_connect
|
|
10
10
|
|
11
11
|
from . import ManagedResource
|
12
12
|
|
13
|
+
def _to_bool(val: Any) -> bool:
|
14
|
+
if isinstance(val, bool):
|
15
|
+
return val
|
16
|
+
if isinstance(val, (int, float)):
|
17
|
+
return bool(val)
|
18
|
+
if isinstance(val, str):
|
19
|
+
return val.strip().lower() in ("1", "true", "yes", "on")
|
20
|
+
return False
|
13
21
|
|
14
22
|
class ClickHouseWriter(ManagedResource):
|
15
23
|
"""
|
@@ -47,6 +55,11 @@ class ClickHouseWriter(ManagedResource):
|
|
47
55
|
database: str = "sibi_data",
|
48
56
|
user: str = "default",
|
49
57
|
password: str = "",
|
58
|
+
secure: bool = False,
|
59
|
+
verify: bool = False,
|
60
|
+
ca_cert: str = "",
|
61
|
+
client_cert: str = "",
|
62
|
+
compression: str = "",
|
50
63
|
table: str = "test_sibi_table",
|
51
64
|
order_by: str = "id",
|
52
65
|
engine: Optional[str] = None, # e.g. "ENGINE MergeTree ORDER BY (`id`)"
|
@@ -61,6 +74,11 @@ class ClickHouseWriter(ManagedResource):
|
|
61
74
|
self.database = database
|
62
75
|
self.user = user
|
63
76
|
self.password = password
|
77
|
+
self.secure = _to_bool(secure)
|
78
|
+
self.verify = _to_bool(verify)
|
79
|
+
self.ca_cert = ca_cert
|
80
|
+
self.client_cert = client_cert
|
81
|
+
self.compression = compression # e.g. 'lz4', 'zstd',
|
64
82
|
self.table = table
|
65
83
|
self.order_by = order_by
|
66
84
|
self.engine = engine # if None → default MergeTree ORDER BY
|
@@ -224,6 +242,7 @@ class ClickHouseWriter(ManagedResource):
|
|
224
242
|
# ------------- low-level helpers -------------
|
225
243
|
|
226
244
|
def _get_client(self):
|
245
|
+
print(self.secure, " ", self.verify)
|
227
246
|
cli = getattr(self._tlocal, "client", None)
|
228
247
|
if cli is not None:
|
229
248
|
return cli
|
@@ -233,6 +252,11 @@ class ClickHouseWriter(ManagedResource):
|
|
233
252
|
database=self.database,
|
234
253
|
username=self.user, # clickhouse-connect uses 'username'
|
235
254
|
password=self.password,
|
255
|
+
secure=self.secure,
|
256
|
+
verify=self.verify,
|
257
|
+
ca_cert=self.ca_cert or None,
|
258
|
+
client_cert=self.client_cert or None,
|
259
|
+
compression=self.compression or None,
|
236
260
|
)
|
237
261
|
self._tlocal.client = cli
|
238
262
|
return cli
|
@@ -0,0 +1,61 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import List, Any, Dict
|
3
|
+
|
4
|
+
import dask
|
5
|
+
import dask.dataframe as dd
|
6
|
+
|
7
|
+
def _to_int_safe(x) -> int:
|
8
|
+
"""
|
9
|
+
Convert scalar-like to int safely.
|
10
|
+
Handles numpy scalars, pandas Series/DataFrame outputs.
|
11
|
+
"""
|
12
|
+
if hasattr(x, "item"): # numpy scalar, pandas scalar
|
13
|
+
return int(x.item())
|
14
|
+
if hasattr(x, "iloc"): # Series-like
|
15
|
+
return int(x.iloc[0])
|
16
|
+
return int(x)
|
17
|
+
|
18
|
+
def dask_is_probably_empty(ddf: dd.DataFrame) -> bool:
|
19
|
+
return getattr(ddf, "npartitions", 0) == 0 or len(ddf._meta.columns) == 0
|
20
|
+
|
21
|
+
|
22
|
+
def dask_is_empty_truthful(ddf: dd.DataFrame) -> bool:
|
23
|
+
n = ddf.map_partitions(len).sum().compute()
|
24
|
+
return int(n) == 0
|
25
|
+
|
26
|
+
|
27
|
+
def dask_is_empty(ddf: dd.DataFrame, *, sample: int = 4) -> bool:
|
28
|
+
if dask_is_probably_empty(ddf):
|
29
|
+
return True
|
30
|
+
|
31
|
+
k = min(max(sample, 1), ddf.npartitions)
|
32
|
+
probes = dask.compute(*[
|
33
|
+
ddf.get_partition(i).map_partitions(len) for i in range(k)
|
34
|
+
])
|
35
|
+
|
36
|
+
if any(_to_int_safe(n) > 0 for n in probes):
|
37
|
+
return False
|
38
|
+
if k == ddf.npartitions and all(_to_int_safe(n) == 0 for n in probes):
|
39
|
+
return True
|
40
|
+
|
41
|
+
return dask_is_empty_truthful(ddf)
|
42
|
+
|
43
|
+
class UniqueValuesExtractor:
|
44
|
+
@staticmethod
|
45
|
+
def _compute_to_list_sync(series) -> List[Any]:
|
46
|
+
"""Run in a worker thread when Dask-backed."""
|
47
|
+
if hasattr(series, "compute"):
|
48
|
+
return series.compute().tolist()
|
49
|
+
return series.tolist()
|
50
|
+
|
51
|
+
async def compute_to_list(self, series) -> List[Any]:
|
52
|
+
# Offload potential Dask .compute() to a thread to avoid blocking the event loop
|
53
|
+
return await asyncio.to_thread(self._compute_to_list_sync, series)
|
54
|
+
|
55
|
+
async def extract_unique_values(self, df, *columns: str) -> Dict[str, List[Any]]:
|
56
|
+
async def one(col: str):
|
57
|
+
ser = df[col].dropna().unique()
|
58
|
+
return col, await self.compute_to_list(ser)
|
59
|
+
|
60
|
+
pairs = await asyncio.gather(*(one(c) for c in columns))
|
61
|
+
return dict(pairs)
|
@@ -49,6 +49,8 @@ class SSERunner:
|
|
49
49
|
async def handler(request: Request): # <-- only Request
|
50
50
|
queue: asyncio.Queue = asyncio.Queue()
|
51
51
|
task_id = str(asyncio.get_running_loop().time()).replace(".", "")
|
52
|
+
self.logger.debug(
|
53
|
+
f"SSE {task_id}: new request client={request.client} path={request.url.path} q={dict(request.query_params)}")
|
52
54
|
|
53
55
|
ctx: Dict[str, Any] = {
|
54
56
|
"path": dict(request.path_params), # <-- pull path params here
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sibi-dst
|
3
|
-
Version: 2025.
|
3
|
+
Version: 2025.9.2
|
4
4
|
Summary: Data Science Toolkit
|
5
5
|
Author: Luis Valverde
|
6
6
|
Author-email: lvalverdeb@gmail.com
|
@@ -21,6 +21,7 @@ Requires-Dist: pyarrow (>=20.0.0,<21.0.0)
|
|
21
21
|
Requires-Dist: pydantic (>=2.11.7,<3.0.0)
|
22
22
|
Requires-Dist: pyiceberg[hive,s3fs] (>=0.9.1,<0.10.0)
|
23
23
|
Requires-Dist: pymysql (>=1.1.1,<2.0.0)
|
24
|
+
Requires-Dist: pyrosm (>=0.6.2,<0.7.0)
|
24
25
|
Requires-Dist: s3fs (>=2025.5.1,<2026.0.0)
|
25
26
|
Requires-Dist: sqlalchemy (>=2.0.41,<3.0.0)
|
26
27
|
Requires-Dist: sse-starlette (>=3.0.2,<4.0.0)
|
@@ -2,19 +2,19 @@ sibi_dst/__init__.py,sha256=D01Z2Ds4zES8uz5Zp7qOWD0EcfCllWgew7AWt2X1SQg,445
|
|
2
2
|
sibi_dst/df_helper/__init__.py,sha256=CyDXtFhRnMrycktxNO8jGGkP0938QiScl56kMZS1Sf8,578
|
3
3
|
sibi_dst/df_helper/_artifact_updater_async.py,sha256=0lUwel-IkmKewRnmMv9GtuT-P6SivkIKtgOHvKchHlc,8462
|
4
4
|
sibi_dst/df_helper/_artifact_updater_threaded.py,sha256=M5GNZismOqMmBrcyfolP1DPv87VILQf_P18is_epn50,7238
|
5
|
-
sibi_dst/df_helper/_df_helper.py,sha256=
|
5
|
+
sibi_dst/df_helper/_df_helper.py,sha256=rgVP4ggiCW6tTHmUz2UqUvLznwOtY5IyoVS3WSlg73U,17005
|
6
6
|
sibi_dst/df_helper/_parquet_artifact.py,sha256=Lse0wlgHMEnyOfQTGD2OeT8U1ZK9aP93_42JkDk46r4,12636
|
7
7
|
sibi_dst/df_helper/_parquet_reader.py,sha256=SKLpCeZdBEO86IRGNEp5IegE6lZtmNoXzjpGBoO-AZo,3215
|
8
8
|
sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
sibi_dst/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
|
10
10
|
sibi_dst/df_helper/backends/http/_http_config.py,sha256=eGPFdqZ5M3Tscqx2P93B6XoBEEzlmdt7yNg7PXUQnNQ,4726
|
11
11
|
sibi_dst/df_helper/backends/parquet/__init__.py,sha256=0A6BGHZLwiLBmuBBaUvEHfeWTcInvy2NbymlrI_nuXE,104
|
12
|
-
sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=
|
12
|
+
sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=L0GBvPXRAL_2PpaqyGabva6B99uNYrSVPiwEYfZWsvk,25308
|
13
13
|
sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=LjWm9B7CweTvlvFOgB90XjSe0lVLILAIYMWKPkFXFm8,265
|
14
14
|
sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=6705rABdh0RY0JisxD7sE62m6890hMCAv_cpyHOMSvM,8729
|
15
15
|
sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py,sha256=GQwDy2JwPUx37vpwxPM5hg4ZydilPIP824y5C_clsl0,383
|
16
16
|
sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=Ur1V7J89nULdtvtFTr2nkKuCcIS-6tVBt5NWO87WyCc,7662
|
17
|
-
sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=
|
17
|
+
sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=urMT7f1WWieVdCYKjfzyhiEoNIgAlXcMx0rVnv2vMAk,2259
|
18
18
|
sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py,sha256=MHk64f5WDOKHQ_L4mM8L-I-Uep_y1dczAodxA9fDJHs,6667
|
19
19
|
sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=yc5ij1oLOZvMN_mFWFUwuOoLnOOCxSerYpbmrHlWxzE,1480
|
20
20
|
sibi_dst/df_helper/core/__init__.py,sha256=LfmTqFh6GUZup-g95bcXgAxX7J5Hkve7ftLE_CJg_AE,409
|
@@ -33,18 +33,21 @@ sibi_dst/osmnx_helper/basemaps/calendar_html.py,sha256=UArt6FDgoCgoRte45Xo3IHqd-
|
|
33
33
|
sibi_dst/osmnx_helper/basemaps/route_map_plotter.py,sha256=rsJidieojcqIoe0kBanZbrxcelrS6nWoAyWoQXWdPiQ,11849
|
34
34
|
sibi_dst/osmnx_helper/basemaps/router_plotter.py,sha256=UAiijn-J-jjX4YnL0_P9SFqTadrxMx-YK4djYhqPqfQ,10941
|
35
35
|
sibi_dst/osmnx_helper/route_path_builder.py,sha256=XJJyu4YXegAkCRjE-knyQncwXaxDVXZhalYacLcb7e0,3557
|
36
|
-
sibi_dst/osmnx_helper/utils.py,sha256=
|
36
|
+
sibi_dst/osmnx_helper/utils.py,sha256=7-lFVhGn4rHjGz6FvpXtC2jY8UzGIVyKR3MVyEfB7nw,14407
|
37
37
|
sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
38
|
sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUHMLFB_PqqUq7k,3336
|
39
39
|
sibi_dst/utils/__init__.py,sha256=vShNCOMPw8KKwlb4tq5XGrpjqakJ_OE8YDc_xDAWAxI,1302
|
40
40
|
sibi_dst/utils/async_utils.py,sha256=53aywfgq1Q6-0OVr9qR1Sf6g7Qv3I9qunAAR4fjFXBE,351
|
41
|
-
sibi_dst/utils/base.py,sha256=
|
42
|
-
sibi_dst/utils/boilerplate/__init__.py,sha256=
|
43
|
-
sibi_dst/utils/boilerplate/
|
41
|
+
sibi_dst/utils/base.py,sha256=W501bJFjpgElPBo9Xp7SkgFj-oGPXXfFE25Br0dZqxc,25470
|
42
|
+
sibi_dst/utils/boilerplate/__init__.py,sha256=998ptGqawJl79WZA-UEeTyBhvc-ClENzXrMaCSWsrL4,295
|
43
|
+
sibi_dst/utils/boilerplate/base_attacher.py,sha256=JRAyvfljQjKVD5BJDDd09cBY9pGPIe8LQp0aUv_xJs0,736
|
44
44
|
sibi_dst/utils/boilerplate/base_data_cube.py,sha256=ErKTM2kT8LsSXADcyYvT436O_Mp0J2hm8xs1IUircb4,2760
|
45
|
+
sibi_dst/utils/boilerplate/base_parquet_artifact.py,sha256=oqPbjHFfChA9j1WL-eDAh7XLA3zmf-Rq7s_kzITVniA,3753
|
46
|
+
sibi_dst/utils/boilerplate/base_parquet_reader.py,sha256=3kN9_bbxyX-WuJLMBWejeApW2V_BDArSljhSUOAOhVU,692
|
45
47
|
sibi_dst/utils/business_days.py,sha256=dP0Xj4FhTBIvZZrZYLOHZl5zOpDAgWkD4p_1a7BOT7I,8461
|
46
|
-
sibi_dst/utils/clickhouse_writer.py,sha256=
|
48
|
+
sibi_dst/utils/clickhouse_writer.py,sha256=XjOxPirylcYkxT3U9wu4gleZLVR1Fmir75eeBWiXrsw,10409
|
47
49
|
sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
|
50
|
+
sibi_dst/utils/dask_utils.py,sha256=FURwrNqij6ptxFhI4v7yaGkyOIIyW9lSPpMfE9-kxHY,1970
|
48
51
|
sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
|
49
52
|
sibi_dst/utils/data_utils.py,sha256=7bLidEjppieNoozDFb6OuRY0W995cxg4tiGAlkGfePI,7768
|
50
53
|
sibi_dst/utils/data_wrapper.py,sha256=090s2odlgS77mSw150V6m8-pEpD4sJ7OvjeMKNjbXxg,11604
|
@@ -61,7 +64,7 @@ sibi_dst/utils/periods.py,sha256=8eTGi-bToa6_a8Vwyg4fkBPryyzft9Nzy-3ToxjqC8c,143
|
|
61
64
|
sibi_dst/utils/phone_formatter.py,sha256=oeM22nLjhObENrpItCNeVpkYS4pXRm5hSxdk0M4nvwU,4580
|
62
65
|
sibi_dst/utils/progress/__init__.py,sha256=VELVxzo2cePN_-LL0veel8-F3po6tokY5MOOpu6pz1A,92
|
63
66
|
sibi_dst/utils/progress/jobs.py,sha256=nE58ng9GPCPZhnaCDltr1tQgu3AJVqBJ1dWbGcCH4xo,3089
|
64
|
-
sibi_dst/utils/progress/sse_runner.py,sha256=
|
67
|
+
sibi_dst/utils/progress/sse_runner.py,sha256=PySHBXcpxd_eqLqZRBU1t8Ys7Df3SM-iz5R9P_vthfE,3726
|
65
68
|
sibi_dst/utils/storage_config.py,sha256=DLtP5jKVM0mdFdgRw6LQfRqyavMjJcCVU7GhsUCRH78,4427
|
66
69
|
sibi_dst/utils/storage_hive.py,sha256=eZ3nq2YWLUUG-06iJubSC15cwSHEbKKdKIwoVhD_I_E,8568
|
67
70
|
sibi_dst/utils/storage_manager.py,sha256=La1NY79bhRAmHWXp7QcXJZtbHoRboJMgoXOSXbIl1SA,6643
|
@@ -87,6 +90,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
|
|
87
90
|
sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
|
88
91
|
sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
|
89
92
|
sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
|
90
|
-
sibi_dst-2025.
|
91
|
-
sibi_dst-2025.
|
92
|
-
sibi_dst-2025.
|
93
|
+
sibi_dst-2025.9.2.dist-info/METADATA,sha256=YhNbMyjgWVHGl4gQiTs0QdKrV35tCdT2hcis0m76UwY,2710
|
94
|
+
sibi_dst-2025.9.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
95
|
+
sibi_dst-2025.9.2.dist-info/RECORD,,
|
File without changes
|