sibi-dst 2025.9.9__tar.gz → 2025.9.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/PKG-INFO +2 -1
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/pyproject.toml +2 -1
- sibi_dst-2025.9.10/sibi_dst/df_helper/_artifact_updater_async.py +292 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_parquet_artifact.py +6 -326
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_parquet_reader.py +2 -1
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +26 -2
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/__init__.py +5 -3
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_pipeline.py +14 -29
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/clickhouse_writer.py +1 -1
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/data_wrapper.py +46 -312
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/parquet_saver.py +29 -16
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/progress/sse_runner.py +39 -11
- sibi_dst-2025.9.10/sibi_dst/utils/update_planner.py +391 -0
- sibi_dst-2025.9.9/sibi_dst/df_helper/_artifact_updater_async.py +0 -238
- sibi_dst-2025.9.9/sibi_dst/utils/update_planner.py +0 -1035
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/README.md +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/tests/test_baseclass.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/async_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/base.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_attacher.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_pipeline_template.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/hybrid_data_loader.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/business_days.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/dask_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/data_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/date_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/df_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/file_age_checker.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/file_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/filepath_generator.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/iceberg_saver.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/log_utils.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/manifest_manager.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/periods.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/phone_formatter.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/progress/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/progress/jobs.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/storage_hive.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/webdav_client.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/utils/write_gatekeeper.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.9.9 → sibi_dst-2025.9.10}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sibi-dst
|
3
|
-
Version: 2025.9.
|
3
|
+
Version: 2025.9.10
|
4
4
|
Summary: Data Science Toolkit
|
5
5
|
Author: Luis Valverde
|
6
6
|
Author-email: lvalverdeb@gmail.com
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
12
12
|
Requires-Dist: clickhouse-connect (>=0.8.18,<0.9.0)
|
13
13
|
Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
|
14
14
|
Requires-Dist: dask[complete] (>=2025.9.0,<2026.0.0)
|
15
|
+
Requires-Dist: distributed (>=2025.9.1,<2026.0.0)
|
15
16
|
Requires-Dist: mysqlclient (>=2.2.7,<3.0.0)
|
16
17
|
Requires-Dist: opentelemetry-exporter-otlp (>=1.35.0,<2.0.0)
|
17
18
|
Requires-Dist: opentelemetry-sdk (>=1.35.0,<2.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sibi-dst"
|
3
|
-
version = "2025.9.
|
3
|
+
version = "2025.9.10"
|
4
4
|
description = "Data Science Toolkit"
|
5
5
|
authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
|
6
6
|
readme = "README.md"
|
@@ -26,6 +26,7 @@ opentelemetry-sdk = "^1.35.0"
|
|
26
26
|
pyiceberg = {extras = ["hive", "s3fs"], version = "^0.9.1"}
|
27
27
|
sse-starlette = "^3.0.2"
|
28
28
|
pyrosm = "^0.6.2"
|
29
|
+
distributed = "^2025.9.1"
|
29
30
|
|
30
31
|
[tool.poetry.group.dev]
|
31
32
|
optional = true
|
@@ -0,0 +1,292 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import datetime
|
5
|
+
import random
|
6
|
+
import time
|
7
|
+
from contextlib import ExitStack
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Type
|
10
|
+
|
11
|
+
from sibi_dst.utils import ManagedResource
|
12
|
+
|
13
|
+
try:
|
14
|
+
from dask.distributed import Client, LocalCluster
|
15
|
+
except ImportError:
|
16
|
+
Client = None
|
17
|
+
LocalCluster = None
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass(slots=True)
|
21
|
+
class _RetryCfg:
|
22
|
+
attempts: int = 3
|
23
|
+
backoff_base: float = 2.0
|
24
|
+
backoff_max: float = 60.0
|
25
|
+
jitter: float = 0.15
|
26
|
+
|
27
|
+
|
28
|
+
# ---------------- Worker (safe for Dask pickling) ----------------
|
29
|
+
def run_artifact_update(
|
30
|
+
cls: Type,
|
31
|
+
artifact_class_kwargs: Dict[str, Any],
|
32
|
+
retry: _RetryCfg,
|
33
|
+
period: str,
|
34
|
+
artifact_kwargs: Dict[str, Any],
|
35
|
+
) -> Dict[str, Any]:
|
36
|
+
"""Standalone worker — safe for Dask distributed execution."""
|
37
|
+
import logging
|
38
|
+
|
39
|
+
logger = logging.getLogger(cls.__name__)
|
40
|
+
|
41
|
+
start_wall = datetime.datetime.now()
|
42
|
+
attempt_count = 0
|
43
|
+
success = False
|
44
|
+
error_msg = None
|
45
|
+
|
46
|
+
for attempt in range(1, retry.attempts + 1):
|
47
|
+
attempt_count = attempt
|
48
|
+
try:
|
49
|
+
with ExitStack() as stack:
|
50
|
+
inst = cls(**artifact_class_kwargs)
|
51
|
+
inst = stack.enter_context(inst)
|
52
|
+
inst.update_parquet(period=period, **artifact_kwargs)
|
53
|
+
success = True
|
54
|
+
break
|
55
|
+
except Exception as e:
|
56
|
+
error_msg = str(e)
|
57
|
+
if attempt < retry.attempts:
|
58
|
+
delay = min(retry.backoff_base ** (attempt - 1), retry.backoff_max)
|
59
|
+
delay *= 1 + random.uniform(0, retry.jitter)
|
60
|
+
time.sleep(delay)
|
61
|
+
|
62
|
+
end_wall = datetime.datetime.now()
|
63
|
+
duration = (end_wall - start_wall).total_seconds()
|
64
|
+
|
65
|
+
return {
|
66
|
+
"artifact": cls.__name__,
|
67
|
+
"period": period,
|
68
|
+
"start": start_wall.isoformat(),
|
69
|
+
"end": end_wall.isoformat(),
|
70
|
+
"processing_time": duration,
|
71
|
+
"retries": attempt_count - 1 if success else attempt_count,
|
72
|
+
"success": success,
|
73
|
+
"error": error_msg,
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
|
78
|
+
"""
|
79
|
+
Async/Threaded orchestrator.
|
80
|
+
Dask-enabled if a Client is passed (or created automatically).
|
81
|
+
"""
|
82
|
+
|
83
|
+
def __init__(
|
84
|
+
self,
|
85
|
+
wrapped_classes: Dict[str, Sequence[Type]],
|
86
|
+
*,
|
87
|
+
max_workers: int = 3,
|
88
|
+
retry_attempts: int = 3,
|
89
|
+
update_timeout_seconds: int = 600,
|
90
|
+
backoff_base: float = 2.0,
|
91
|
+
backoff_max: float = 60.0,
|
92
|
+
backoff_jitter: float = 0.15,
|
93
|
+
priority_fn: Optional[Callable[[Type], int]] = None,
|
94
|
+
artifact_class_kwargs: Optional[Dict[str, Any]] = None,
|
95
|
+
dask_client: Optional[Client] = None,
|
96
|
+
use_dask: bool = True,
|
97
|
+
**kwargs: Any,
|
98
|
+
) -> None:
|
99
|
+
super().__init__(**kwargs)
|
100
|
+
|
101
|
+
self.wrapped_classes = wrapped_classes
|
102
|
+
self.max_workers = int(max_workers)
|
103
|
+
self.update_timeout_seconds = int(update_timeout_seconds)
|
104
|
+
self.priority_fn = priority_fn
|
105
|
+
self.use_dask = use_dask
|
106
|
+
self.client: Optional[Client] = dask_client
|
107
|
+
self._owns_client = False
|
108
|
+
|
109
|
+
self._retry = _RetryCfg(
|
110
|
+
attempts=int(retry_attempts),
|
111
|
+
backoff_base=float(backoff_base),
|
112
|
+
backoff_max=float(backoff_max),
|
113
|
+
jitter=float(backoff_jitter),
|
114
|
+
)
|
115
|
+
|
116
|
+
# Safe kwargs for artifacts
|
117
|
+
if self.use_dask:
|
118
|
+
self.artifact_class_kwargs = {
|
119
|
+
"debug": self.debug,
|
120
|
+
"verbose": self.verbose,
|
121
|
+
**(artifact_class_kwargs or {}),
|
122
|
+
}
|
123
|
+
else:
|
124
|
+
self.artifact_class_kwargs = {
|
125
|
+
"logger": self.logger,
|
126
|
+
"fs": self.fs,
|
127
|
+
"debug": self.debug,
|
128
|
+
"verbose": self.verbose,
|
129
|
+
**(artifact_class_kwargs or {}),
|
130
|
+
}
|
131
|
+
|
132
|
+
self.completion_secs: Dict[str, float] = {}
|
133
|
+
self.failed: List[str] = []
|
134
|
+
self._stop = asyncio.Event()
|
135
|
+
|
136
|
+
if self.use_dask and Client is None:
|
137
|
+
raise RuntimeError("Dask is not installed, cannot use Dask mode")
|
138
|
+
|
139
|
+
# auto-start local client if requested
|
140
|
+
if self.use_dask and not self.client:
|
141
|
+
self.client = Client(
|
142
|
+
LocalCluster(
|
143
|
+
n_workers=max_workers,
|
144
|
+
threads_per_worker=1,
|
145
|
+
dashboard_address=None,
|
146
|
+
)
|
147
|
+
)
|
148
|
+
self._owns_client = True
|
149
|
+
|
150
|
+
# ---- Internals ------------------------------------------------------------
|
151
|
+
|
152
|
+
def _classes_for(self, period: str) -> List[Type]:
|
153
|
+
try:
|
154
|
+
classes = list(self.wrapped_classes[period])
|
155
|
+
except KeyError:
|
156
|
+
raise ValueError(f"Unsupported period '{period}'.")
|
157
|
+
if not classes:
|
158
|
+
raise ValueError(f"No artifact classes configured for '{period}'.")
|
159
|
+
if self.priority_fn:
|
160
|
+
try:
|
161
|
+
classes.sort(key=self.priority_fn)
|
162
|
+
except Exception as e:
|
163
|
+
self.logger.warning(f"priority_fn failed; using listed order: {e}")
|
164
|
+
return classes
|
165
|
+
|
166
|
+
def _submit_one_dask(self, cls: Type, period: str, artifact_kwargs: Dict[str, Any]):
|
167
|
+
return self.client.submit(
|
168
|
+
run_artifact_update,
|
169
|
+
cls,
|
170
|
+
dict(self.artifact_class_kwargs),
|
171
|
+
self._retry,
|
172
|
+
period,
|
173
|
+
artifact_kwargs,
|
174
|
+
pure=False,
|
175
|
+
)
|
176
|
+
|
177
|
+
async def _run_one_async(
|
178
|
+
self,
|
179
|
+
cls: Type,
|
180
|
+
period: str,
|
181
|
+
sem: asyncio.Semaphore,
|
182
|
+
artifact_kwargs: Dict[str, Any],
|
183
|
+
) -> Dict[str, Any]:
|
184
|
+
"""Async/threaded fallback execution."""
|
185
|
+
name = cls.__name__
|
186
|
+
self.logger.info(f"▶️ Starting {name} for period '{period}'")
|
187
|
+
start_wall = datetime.datetime.now()
|
188
|
+
|
189
|
+
attempt_count = 0
|
190
|
+
success = False
|
191
|
+
error_msg = None
|
192
|
+
|
193
|
+
try:
|
194
|
+
async with sem:
|
195
|
+
for attempt in range(1, self._retry.attempts + 1):
|
196
|
+
attempt_count = attempt
|
197
|
+
try:
|
198
|
+
def _sync_block():
|
199
|
+
with ExitStack() as stack:
|
200
|
+
inst = cls(**self.artifact_class_kwargs)
|
201
|
+
inst = stack.enter_context(inst)
|
202
|
+
inst.update_parquet(period=period, **artifact_kwargs)
|
203
|
+
|
204
|
+
await asyncio.wait_for(
|
205
|
+
asyncio.to_thread(_sync_block),
|
206
|
+
timeout=self.update_timeout_seconds,
|
207
|
+
)
|
208
|
+
success = True
|
209
|
+
break
|
210
|
+
except Exception as e:
|
211
|
+
error_msg = str(e)
|
212
|
+
if attempt < self._retry.attempts and not self._stop.is_set():
|
213
|
+
delay = min(
|
214
|
+
self._retry.backoff_base ** (attempt - 1),
|
215
|
+
self._retry.backoff_max,
|
216
|
+
)
|
217
|
+
delay *= 1 + random.uniform(0, self._retry.jitter)
|
218
|
+
await asyncio.sleep(delay)
|
219
|
+
finally:
|
220
|
+
end_wall = datetime.datetime.now()
|
221
|
+
duration = (end_wall - start_wall).total_seconds()
|
222
|
+
|
223
|
+
result = {
|
224
|
+
"artifact": name,
|
225
|
+
"period": period,
|
226
|
+
"start": start_wall.isoformat(),
|
227
|
+
"end": end_wall.isoformat(),
|
228
|
+
"processing_time": duration,
|
229
|
+
"retries": attempt_count - 1 if success else attempt_count,
|
230
|
+
"success": success,
|
231
|
+
"error": error_msg,
|
232
|
+
}
|
233
|
+
|
234
|
+
if success:
|
235
|
+
self.logger.info(f"✅ Artifact {name} succeeded", extra=result)
|
236
|
+
self.completion_secs[name] = duration
|
237
|
+
else:
|
238
|
+
self.logger.error(f"❌ Artifact {name} failed", extra=result)
|
239
|
+
self.failed.append(name)
|
240
|
+
|
241
|
+
return result
|
242
|
+
|
243
|
+
# ---- Public API -----------------------------------------------------------
|
244
|
+
|
245
|
+
async def update_data(self, period: str, **kwargs: Any) -> List[Dict[str, Any]]:
|
246
|
+
self.completion_secs.clear()
|
247
|
+
self.failed.clear()
|
248
|
+
classes = self._classes_for(period)
|
249
|
+
|
250
|
+
try:
|
251
|
+
if self.use_dask:
|
252
|
+
futures = [self._submit_one_dask(cls, period, kwargs) for cls in classes]
|
253
|
+
results = await asyncio.to_thread(lambda: self.client.gather(futures))
|
254
|
+
else:
|
255
|
+
sem = asyncio.Semaphore(self.max_workers)
|
256
|
+
tasks = [
|
257
|
+
asyncio.create_task(self._run_one_async(cls, period, sem, kwargs))
|
258
|
+
for cls in classes
|
259
|
+
]
|
260
|
+
results = await asyncio.gather(*tasks)
|
261
|
+
return results
|
262
|
+
finally:
|
263
|
+
# only shut down if we own the client
|
264
|
+
if self._owns_client:
|
265
|
+
self.close()
|
266
|
+
|
267
|
+
def get_update_status(self) -> Dict[str, Any]:
|
268
|
+
done = set(self.completion_secs)
|
269
|
+
fail = set(self.failed)
|
270
|
+
all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
|
271
|
+
return {
|
272
|
+
"total": len(all_names),
|
273
|
+
"completed": sorted(done),
|
274
|
+
"failed": sorted(fail),
|
275
|
+
"pending": sorted(all_names - done - fail),
|
276
|
+
"completion_times": dict(self.completion_secs),
|
277
|
+
}
|
278
|
+
|
279
|
+
# ---- Lifecycle ------------------------------------------------------------
|
280
|
+
|
281
|
+
def _cleanup(self) -> None:
|
282
|
+
"""Release any resources created by this wrapper."""
|
283
|
+
if self._owns_client and self.client is not None:
|
284
|
+
try:
|
285
|
+
cluster = getattr(self.client, "cluster", None)
|
286
|
+
self.client.close()
|
287
|
+
if cluster is not None:
|
288
|
+
cluster.close()
|
289
|
+
finally:
|
290
|
+
self.client = None
|
291
|
+
self._owns_client = False
|
292
|
+
|