sibi-dst 2025.1.13__tar.gz → 2025.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/PKG-INFO +1 -1
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/pyproject.toml +1 -1
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/__init__.py +7 -1
- sibi_dst-2025.8.1/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +315 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/_df_helper.py +573 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/_parquet_artifact.py +300 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
- sibi_dst-2025.8.1/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +211 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +186 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +128 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
- sibi_dst-2025.8.1/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +154 -0
- sibi_dst-2025.8.1/sibi_dst/osmnx_helper/route_path_builder.py +97 -0
- sibi_dst-2025.8.1/sibi_dst/utils/base.py +323 -0
- sibi_dst-2025.8.1/sibi_dst/utils/clickhouse_writer.py +501 -0
- sibi_dst-2025.8.1/sibi_dst/utils/data_utils.py +201 -0
- sibi_dst-2025.8.1/sibi_dst/utils/data_wrapper.py +458 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/date_utils.py +1 -0
- sibi_dst-2025.8.1/sibi_dst/utils/df_utils.py +264 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/file_utils.py +3 -2
- sibi_dst-2025.8.1/sibi_dst/utils/filepath_generator.py +349 -0
- sibi_dst-2025.8.1/sibi_dst/utils/log_utils.py +711 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/manifest_manager.py +60 -76
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/parquet_saver.py +33 -27
- sibi_dst-2025.8.1/sibi_dst/utils/phone_formatter.py +120 -0
- sibi_dst-2025.8.1/sibi_dst/utils/update_planner.py +300 -0
- sibi_dst-2025.8.1/sibi_dst/utils/webdav_client.py +170 -0
- sibi_dst-2025.1.13/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -422
- sibi_dst-2025.1.13/sibi_dst/df_helper/_df_helper.py +0 -273
- sibi_dst-2025.1.13/sibi_dst/df_helper/_parquet_artifact.py +0 -328
- sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -250
- sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -336
- sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -80
- sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -104
- sibi_dst-2025.1.13/sibi_dst/osmnx_helper/route_path_builder.py +0 -98
- sibi_dst-2025.1.13/sibi_dst/utils/base.py +0 -117
- sibi_dst-2025.1.13/sibi_dst/utils/clickhouse_writer.py +0 -235
- sibi_dst-2025.1.13/sibi_dst/utils/data_utils.py +0 -248
- sibi_dst-2025.1.13/sibi_dst/utils/data_wrapper.py +0 -214
- sibi_dst-2025.1.13/sibi_dst/utils/df_utils.py +0 -284
- sibi_dst-2025.1.13/sibi_dst/utils/filepath_generator.py +0 -187
- sibi_dst-2025.1.13/sibi_dst/utils/log_utils.py +0 -372
- sibi_dst-2025.1.13/sibi_dst/utils/phone_formatter.py +0 -127
- sibi_dst-2025.1.13/sibi_dst/utils/update_planner.py +0 -298
- sibi_dst-2025.1.13/sibi_dst/utils/webdav_client.py +0 -220
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/README.md +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/_parquet_reader.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/osmnx_helper/utils.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.1.13 → sibi_dst-2025.8.1}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -10,4 +10,10 @@ except version_reader.PackageNotFoundError:
|
|
10
10
|
|
11
11
|
__all__ = [
|
12
12
|
"__version__",
|
13
|
-
]
|
13
|
+
]
|
14
|
+
|
15
|
+
from . import df_helper as df_helper
|
16
|
+
from . import osmnx_helper as osmnx_helper
|
17
|
+
from . import geopy_helper as geopy_helper
|
18
|
+
from . import utils as sibi_utils
|
19
|
+
|
@@ -0,0 +1,315 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import time
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
from sibi_dst.utils import ManagedResource
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass(slots=True)
|
12
|
+
class _RetryCfg:
|
13
|
+
attempts: int = 3
|
14
|
+
backoff_base: float = 2.0
|
15
|
+
backoff_max: float = 60.0
|
16
|
+
jitter: float = 0.15
|
17
|
+
|
18
|
+
|
19
|
+
_ORCHESTRATOR_KEYS = {
|
20
|
+
"retry_attempts",
|
21
|
+
"backoff_base",
|
22
|
+
"backoff_max",
|
23
|
+
"backoff_jitter",
|
24
|
+
"update_timeout_seconds", # accepted but unused in pure-threads version
|
25
|
+
"max_workers",
|
26
|
+
"priority_fn",
|
27
|
+
"artifact_class_kwargs",
|
28
|
+
}
|
29
|
+
|
30
|
+
|
31
|
+
def _default_artifact_kwargs(resource: ManagedResource) -> Dict[str, Any]:
|
32
|
+
return {
|
33
|
+
"logger": resource.logger,
|
34
|
+
"debug": resource.debug,
|
35
|
+
"fs": resource.fs,
|
36
|
+
"verbose": resource.verbose,
|
37
|
+
}
|
38
|
+
|
39
|
+
|
40
|
+
class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
|
41
|
+
"""
|
42
|
+
Backward-compatible threaded orchestrator.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
wrapped_classes: Dict[str, Sequence[Type]],
|
48
|
+
*,
|
49
|
+
max_workers: int = 4,
|
50
|
+
retry_attempts: int = 3,
|
51
|
+
backoff_base: float = 2.0,
|
52
|
+
backoff_max: float = 60.0,
|
53
|
+
backoff_jitter: float = 0.15,
|
54
|
+
priority_fn: Optional[Callable[[Type], int]] = None,
|
55
|
+
artifact_class_kwargs: Optional[Dict[str, Any]] = None,
|
56
|
+
**kwargs: Any,
|
57
|
+
) -> None:
|
58
|
+
super().__init__(**kwargs)
|
59
|
+
self.wrapped_classes = wrapped_classes
|
60
|
+
self.max_workers = int(max_workers)
|
61
|
+
self.priority_fn = priority_fn
|
62
|
+
self._retry = _RetryCfg(
|
63
|
+
attempts=int(retry_attempts),
|
64
|
+
backoff_base=float(backoff_base),
|
65
|
+
backoff_max=float(backoff_max),
|
66
|
+
jitter=float(backoff_jitter),
|
67
|
+
)
|
68
|
+
self.artifact_class_kwargs = {
|
69
|
+
**_default_artifact_kwargs(self),
|
70
|
+
**(artifact_class_kwargs or {}),
|
71
|
+
}
|
72
|
+
self.completion_secs: Dict[str, float] = {}
|
73
|
+
self.failed: List[str] = []
|
74
|
+
|
75
|
+
def _classes_for(self, period: str) -> List[Type]:
|
76
|
+
try:
|
77
|
+
classes = list(self.wrapped_classes[period])
|
78
|
+
except KeyError:
|
79
|
+
raise ValueError(f"Unsupported period '{period}'.")
|
80
|
+
if not classes:
|
81
|
+
raise ValueError(f"No artifact classes configured for period '{period}'.")
|
82
|
+
if self.priority_fn:
|
83
|
+
try:
|
84
|
+
classes.sort(key=self.priority_fn)
|
85
|
+
except Exception as e:
|
86
|
+
self.logger.warning(f"priority_fn failed; using listed order: {e}")
|
87
|
+
return classes
|
88
|
+
|
89
|
+
@staticmethod
|
90
|
+
def _split_kwargs(raw: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
91
|
+
orch: Dict[str, Any] = {}
|
92
|
+
art: Dict[str, Any] = {}
|
93
|
+
for k, v in raw.items():
|
94
|
+
if k in _ORCHESTRATOR_KEYS:
|
95
|
+
orch[k] = v
|
96
|
+
else:
|
97
|
+
art[k] = v
|
98
|
+
return orch, art
|
99
|
+
|
100
|
+
def _run_one(self, cls: Type, period: str, artifact_kwargs: Dict[str, Any]) -> str:
|
101
|
+
name = cls.__name__
|
102
|
+
start = time.monotonic()
|
103
|
+
for attempt in range(1, self._retry.attempts + 1):
|
104
|
+
try:
|
105
|
+
with ExitStack() as stack:
|
106
|
+
inst = cls(**self.artifact_class_kwargs)
|
107
|
+
inst = stack.enter_context(inst)
|
108
|
+
inst.update_parquet(period=period, **artifact_kwargs)
|
109
|
+
self.completion_secs[name] = time.monotonic() - start
|
110
|
+
return name
|
111
|
+
except Exception as e:
|
112
|
+
if attempt < self._retry.attempts:
|
113
|
+
delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
|
114
|
+
delay *= 1 + random.uniform(0, self._retry.jitter)
|
115
|
+
time.sleep(delay)
|
116
|
+
else:
|
117
|
+
raise RuntimeError(f"{name} failed after {self._retry.attempts} attempts: {e}") from e
|
118
|
+
|
119
|
+
def update_data(self, period: str, **kwargs: Any) -> None:
|
120
|
+
# Split kwargs to preserve backward compatibility
|
121
|
+
_, artifact_kwargs = self._split_kwargs(kwargs)
|
122
|
+
|
123
|
+
self.completion_secs.clear()
|
124
|
+
self.failed.clear()
|
125
|
+
|
126
|
+
classes = self._classes_for(period)
|
127
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
|
128
|
+
fut2name = {pool.submit(self._run_one, cls, period, dict(artifact_kwargs)): cls.__name__ for cls in classes}
|
129
|
+
for fut in as_completed(fut2name):
|
130
|
+
name = fut2name[fut]
|
131
|
+
try:
|
132
|
+
fut.result()
|
133
|
+
self.logger.info(f"✅ {name} ({period}) in {self.completion_secs[name]:.2f}s")
|
134
|
+
except Exception as e:
|
135
|
+
self.failed.append(name)
|
136
|
+
self.logger.error(f"✖️ {name} permanently failed: {e}")
|
137
|
+
|
138
|
+
self.logger.info(
|
139
|
+
f"Artifacts processed: total={len(classes)}, "
|
140
|
+
f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
|
141
|
+
)
|
142
|
+
|
143
|
+
def get_update_status(self) -> Dict[str, Any]:
|
144
|
+
done = set(self.completion_secs)
|
145
|
+
fail = set(self.failed)
|
146
|
+
all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
|
147
|
+
return {
|
148
|
+
"total": len(all_names),
|
149
|
+
"completed": sorted(done),
|
150
|
+
"failed": sorted(fail),
|
151
|
+
"pending": sorted(all_names - done - fail),
|
152
|
+
"completion_times": dict(self.completion_secs),
|
153
|
+
}
|
154
|
+
|
155
|
+
import asyncio
|
156
|
+
import random
|
157
|
+
from contextlib import ExitStack
|
158
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Type
|
159
|
+
|
160
|
+
class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
|
161
|
+
"""
|
162
|
+
Backward-compatible async orchestrator.
|
163
|
+
|
164
|
+
Public API preserved:
|
165
|
+
• __init__(wrapped_classes, *, max_workers=..., retry_attempts=..., backoff_*=..., update_timeout_seconds=..., priority_fn=..., artifact_class_kwargs=..., **kwargs)
|
166
|
+
• update_data(period, **kwargs) -> forwards only artifact-friendly kwargs to update_parquet
|
167
|
+
"""
|
168
|
+
|
169
|
+
def __init__(
|
170
|
+
self,
|
171
|
+
wrapped_classes: Dict[str, Sequence[Type]],
|
172
|
+
*,
|
173
|
+
max_workers: int = 3,
|
174
|
+
retry_attempts: int = 3,
|
175
|
+
update_timeout_seconds: int = 600,
|
176
|
+
backoff_base: float = 2.0,
|
177
|
+
backoff_max: float = 60.0,
|
178
|
+
backoff_jitter: float = 0.15,
|
179
|
+
priority_fn: Optional[Callable[[Type], int]] = None,
|
180
|
+
artifact_class_kwargs: Optional[Dict[str, Any]] = None,
|
181
|
+
**kwargs: Any,
|
182
|
+
) -> None:
|
183
|
+
super().__init__(**kwargs)
|
184
|
+
self.wrapped_classes = wrapped_classes
|
185
|
+
self.max_workers = int(max_workers)
|
186
|
+
self.update_timeout_seconds = int(update_timeout_seconds)
|
187
|
+
self.priority_fn = priority_fn
|
188
|
+
|
189
|
+
self._retry = _RetryCfg(
|
190
|
+
attempts=int(retry_attempts),
|
191
|
+
backoff_base=float(backoff_base),
|
192
|
+
backoff_max=float(backoff_max),
|
193
|
+
jitter=float(backoff_jitter),
|
194
|
+
)
|
195
|
+
|
196
|
+
self.artifact_class_kwargs = {
|
197
|
+
**_default_artifact_kwargs(self),
|
198
|
+
**(artifact_class_kwargs or {}),
|
199
|
+
}
|
200
|
+
|
201
|
+
self.completion_secs: Dict[str, float] = {}
|
202
|
+
self.failed: List[str] = []
|
203
|
+
|
204
|
+
# ---- internals -----------------------------------------------------------
|
205
|
+
|
206
|
+
def _classes_for(self, period: str) -> List[Type]:
|
207
|
+
try:
|
208
|
+
classes = list(self.wrapped_classes[period])
|
209
|
+
except KeyError:
|
210
|
+
raise ValueError(f"Unsupported period '{period}'.")
|
211
|
+
if not classes:
|
212
|
+
raise ValueError(f"No artifact classes configured for period '{period}'.")
|
213
|
+
if self.priority_fn:
|
214
|
+
try:
|
215
|
+
classes.sort(key=self.priority_fn)
|
216
|
+
except Exception as e:
|
217
|
+
self.logger.warning(f"priority_fn failed; using listed order: {e}")
|
218
|
+
return classes
|
219
|
+
|
220
|
+
@staticmethod
|
221
|
+
def _split_kwargs(raw: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
222
|
+
"""
|
223
|
+
Split kwargs into (orchestrator-only, artifact-forwarded).
|
224
|
+
Keeps backward compatibility: callers can pass all knobs in one dict.
|
225
|
+
"""
|
226
|
+
orch: Dict[str, Any] = {}
|
227
|
+
art: Dict[str, Any] = {}
|
228
|
+
for k, v in raw.items():
|
229
|
+
if k in _ORCHESTRATOR_KEYS:
|
230
|
+
orch[k] = v
|
231
|
+
else:
|
232
|
+
art[k] = v
|
233
|
+
return orch, art
|
234
|
+
|
235
|
+
async def _run_one(self, cls: Type, period: str, sem: asyncio.Semaphore, artifact_kwargs: Dict[str, Any]) -> None:
|
236
|
+
name = cls.__name__
|
237
|
+
async with sem:
|
238
|
+
start = asyncio.get_running_loop().time()
|
239
|
+
for attempt in range(1, self._retry.attempts + 1):
|
240
|
+
try:
|
241
|
+
# Run sync context + method in thread
|
242
|
+
def _sync_block() -> None:
|
243
|
+
with ExitStack() as stack:
|
244
|
+
inst = cls(**self.artifact_class_kwargs)
|
245
|
+
inst = stack.enter_context(inst)
|
246
|
+
inst.update_parquet(period=period, **artifact_kwargs)
|
247
|
+
|
248
|
+
await asyncio.wait_for(
|
249
|
+
asyncio.to_thread(_sync_block),
|
250
|
+
timeout=self.update_timeout_seconds,
|
251
|
+
)
|
252
|
+
dt_secs = asyncio.get_running_loop().time() - start
|
253
|
+
self.completion_secs[name] = dt_secs
|
254
|
+
self.logger.info(f"✅ {name} ({period}) in {dt_secs:.2f}s")
|
255
|
+
return
|
256
|
+
|
257
|
+
except asyncio.TimeoutError:
|
258
|
+
self.logger.warning(f"Timeout in {name} attempt {attempt}/{self._retry.attempts}")
|
259
|
+
except Exception as e:
|
260
|
+
self.logger.error(
|
261
|
+
f"{name} attempt {attempt}/{self._retry.attempts} failed: {e}",
|
262
|
+
exc_info=self.debug,
|
263
|
+
)
|
264
|
+
|
265
|
+
if attempt < self._retry.attempts:
|
266
|
+
delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
|
267
|
+
delay *= 1 + random.uniform(0, self._retry.jitter)
|
268
|
+
await asyncio.sleep(delay)
|
269
|
+
|
270
|
+
self.failed.append(name)
|
271
|
+
self.logger.error(f"✖️ {name} permanently failed")
|
272
|
+
|
273
|
+
# ---- public API ----------------------------------------------------------
|
274
|
+
|
275
|
+
async def update_data(self, period: str, **kwargs: Any) -> None:
|
276
|
+
"""
|
277
|
+
Backward-compatible:
|
278
|
+
- Accepts orchestrator knobs in kwargs (we consume them).
|
279
|
+
- Forwards only artifact-friendly kwargs to update_parquet.
|
280
|
+
"""
|
281
|
+
# split kwargs; ignore any runtime attempts to mutate orchestrator config mid-call
|
282
|
+
_, artifact_kwargs = self._split_kwargs(kwargs)
|
283
|
+
|
284
|
+
self.completion_secs.clear()
|
285
|
+
self.failed.clear()
|
286
|
+
|
287
|
+
classes = self._classes_for(period)
|
288
|
+
sem = asyncio.Semaphore(self.max_workers)
|
289
|
+
tasks = [asyncio.create_task(self._run_one(cls, period, sem, dict(artifact_kwargs))) for cls in classes]
|
290
|
+
|
291
|
+
for t in asyncio.as_completed(tasks):
|
292
|
+
try:
|
293
|
+
await t
|
294
|
+
except asyncio.CancelledError:
|
295
|
+
for rest in tasks:
|
296
|
+
rest.cancel()
|
297
|
+
raise
|
298
|
+
|
299
|
+
self.logger.info(
|
300
|
+
f"Artifacts processed: total={len(classes)}, "
|
301
|
+
f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
|
302
|
+
)
|
303
|
+
|
304
|
+
# Optional helper
|
305
|
+
def get_update_status(self) -> Dict[str, Any]:
|
306
|
+
done = set(self.completion_secs)
|
307
|
+
fail = set(self.failed)
|
308
|
+
all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
|
309
|
+
return {
|
310
|
+
"total": len(all_names),
|
311
|
+
"completed": sorted(done),
|
312
|
+
"failed": sorted(fail),
|
313
|
+
"pending": sorted(all_names - done - fail),
|
314
|
+
"completion_times": dict(self.completion_secs),
|
315
|
+
}
|