sibi-dst 2025.8.1__py3-none-any.whl → 2025.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,315 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import time
4
- from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from dataclasses import dataclass
6
- from typing import Any, Dict
7
-
8
- from sibi_dst.utils import ManagedResource
9
-
10
-
11
- @dataclass(slots=True)
12
- class _RetryCfg:
13
- attempts: int = 3
14
- backoff_base: float = 2.0
15
- backoff_max: float = 60.0
16
- jitter: float = 0.15
17
-
18
-
19
- _ORCHESTRATOR_KEYS = {
20
- "retry_attempts",
21
- "backoff_base",
22
- "backoff_max",
23
- "backoff_jitter",
24
- "update_timeout_seconds", # accepted but unused in pure-threads version
25
- "max_workers",
26
- "priority_fn",
27
- "artifact_class_kwargs",
28
- }
29
-
30
-
31
- def _default_artifact_kwargs(resource: ManagedResource) -> Dict[str, Any]:
32
- return {
33
- "logger": resource.logger,
34
- "debug": resource.debug,
35
- "fs": resource.fs,
36
- "verbose": resource.verbose,
37
- }
38
-
39
-
40
- class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
41
- """
42
- Backward-compatible threaded orchestrator.
43
- """
44
-
45
- def __init__(
46
- self,
47
- wrapped_classes: Dict[str, Sequence[Type]],
48
- *,
49
- max_workers: int = 4,
50
- retry_attempts: int = 3,
51
- backoff_base: float = 2.0,
52
- backoff_max: float = 60.0,
53
- backoff_jitter: float = 0.15,
54
- priority_fn: Optional[Callable[[Type], int]] = None,
55
- artifact_class_kwargs: Optional[Dict[str, Any]] = None,
56
- **kwargs: Any,
57
- ) -> None:
58
- super().__init__(**kwargs)
59
- self.wrapped_classes = wrapped_classes
60
- self.max_workers = int(max_workers)
61
- self.priority_fn = priority_fn
62
- self._retry = _RetryCfg(
63
- attempts=int(retry_attempts),
64
- backoff_base=float(backoff_base),
65
- backoff_max=float(backoff_max),
66
- jitter=float(backoff_jitter),
67
- )
68
- self.artifact_class_kwargs = {
69
- **_default_artifact_kwargs(self),
70
- **(artifact_class_kwargs or {}),
71
- }
72
- self.completion_secs: Dict[str, float] = {}
73
- self.failed: List[str] = []
74
-
75
- def _classes_for(self, period: str) -> List[Type]:
76
- try:
77
- classes = list(self.wrapped_classes[period])
78
- except KeyError:
79
- raise ValueError(f"Unsupported period '{period}'.")
80
- if not classes:
81
- raise ValueError(f"No artifact classes configured for period '{period}'.")
82
- if self.priority_fn:
83
- try:
84
- classes.sort(key=self.priority_fn)
85
- except Exception as e:
86
- self.logger.warning(f"priority_fn failed; using listed order: {e}")
87
- return classes
88
-
89
- @staticmethod
90
- def _split_kwargs(raw: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
91
- orch: Dict[str, Any] = {}
92
- art: Dict[str, Any] = {}
93
- for k, v in raw.items():
94
- if k in _ORCHESTRATOR_KEYS:
95
- orch[k] = v
96
- else:
97
- art[k] = v
98
- return orch, art
99
-
100
- def _run_one(self, cls: Type, period: str, artifact_kwargs: Dict[str, Any]) -> str:
101
- name = cls.__name__
102
- start = time.monotonic()
103
- for attempt in range(1, self._retry.attempts + 1):
104
- try:
105
- with ExitStack() as stack:
106
- inst = cls(**self.artifact_class_kwargs)
107
- inst = stack.enter_context(inst)
108
- inst.update_parquet(period=period, **artifact_kwargs)
109
- self.completion_secs[name] = time.monotonic() - start
110
- return name
111
- except Exception as e:
112
- if attempt < self._retry.attempts:
113
- delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
114
- delay *= 1 + random.uniform(0, self._retry.jitter)
115
- time.sleep(delay)
116
- else:
117
- raise RuntimeError(f"{name} failed after {self._retry.attempts} attempts: {e}") from e
118
-
119
- def update_data(self, period: str, **kwargs: Any) -> None:
120
- # Split kwargs to preserve backward compatibility
121
- _, artifact_kwargs = self._split_kwargs(kwargs)
122
-
123
- self.completion_secs.clear()
124
- self.failed.clear()
125
-
126
- classes = self._classes_for(period)
127
- with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
128
- fut2name = {pool.submit(self._run_one, cls, period, dict(artifact_kwargs)): cls.__name__ for cls in classes}
129
- for fut in as_completed(fut2name):
130
- name = fut2name[fut]
131
- try:
132
- fut.result()
133
- self.logger.info(f"✅ {name} ({period}) in {self.completion_secs[name]:.2f}s")
134
- except Exception as e:
135
- self.failed.append(name)
136
- self.logger.error(f"✖️ {name} permanently failed: {e}")
137
-
138
- self.logger.info(
139
- f"Artifacts processed: total={len(classes)}, "
140
- f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
141
- )
142
-
143
- def get_update_status(self) -> Dict[str, Any]:
144
- done = set(self.completion_secs)
145
- fail = set(self.failed)
146
- all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
147
- return {
148
- "total": len(all_names),
149
- "completed": sorted(done),
150
- "failed": sorted(fail),
151
- "pending": sorted(all_names - done - fail),
152
- "completion_times": dict(self.completion_secs),
153
- }
154
-
155
- import asyncio
156
- import random
157
- from contextlib import ExitStack
158
- from typing import Any, Callable, Dict, List, Optional, Sequence, Type
159
-
160
- class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
161
- """
162
- Backward-compatible async orchestrator.
163
-
164
- Public API preserved:
165
- • __init__(wrapped_classes, *, max_workers=..., retry_attempts=..., backoff_*=..., update_timeout_seconds=..., priority_fn=..., artifact_class_kwargs=..., **kwargs)
166
- • update_data(period, **kwargs) -> forwards only artifact-friendly kwargs to update_parquet
167
- """
168
-
169
- def __init__(
170
- self,
171
- wrapped_classes: Dict[str, Sequence[Type]],
172
- *,
173
- max_workers: int = 3,
174
- retry_attempts: int = 3,
175
- update_timeout_seconds: int = 600,
176
- backoff_base: float = 2.0,
177
- backoff_max: float = 60.0,
178
- backoff_jitter: float = 0.15,
179
- priority_fn: Optional[Callable[[Type], int]] = None,
180
- artifact_class_kwargs: Optional[Dict[str, Any]] = None,
181
- **kwargs: Any,
182
- ) -> None:
183
- super().__init__(**kwargs)
184
- self.wrapped_classes = wrapped_classes
185
- self.max_workers = int(max_workers)
186
- self.update_timeout_seconds = int(update_timeout_seconds)
187
- self.priority_fn = priority_fn
188
-
189
- self._retry = _RetryCfg(
190
- attempts=int(retry_attempts),
191
- backoff_base=float(backoff_base),
192
- backoff_max=float(backoff_max),
193
- jitter=float(backoff_jitter),
194
- )
195
-
196
- self.artifact_class_kwargs = {
197
- **_default_artifact_kwargs(self),
198
- **(artifact_class_kwargs or {}),
199
- }
200
-
201
- self.completion_secs: Dict[str, float] = {}
202
- self.failed: List[str] = []
203
-
204
- # ---- internals -----------------------------------------------------------
205
-
206
- def _classes_for(self, period: str) -> List[Type]:
207
- try:
208
- classes = list(self.wrapped_classes[period])
209
- except KeyError:
210
- raise ValueError(f"Unsupported period '{period}'.")
211
- if not classes:
212
- raise ValueError(f"No artifact classes configured for period '{period}'.")
213
- if self.priority_fn:
214
- try:
215
- classes.sort(key=self.priority_fn)
216
- except Exception as e:
217
- self.logger.warning(f"priority_fn failed; using listed order: {e}")
218
- return classes
219
-
220
- @staticmethod
221
- def _split_kwargs(raw: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
222
- """
223
- Split kwargs into (orchestrator-only, artifact-forwarded).
224
- Keeps backward compatibility: callers can pass all knobs in one dict.
225
- """
226
- orch: Dict[str, Any] = {}
227
- art: Dict[str, Any] = {}
228
- for k, v in raw.items():
229
- if k in _ORCHESTRATOR_KEYS:
230
- orch[k] = v
231
- else:
232
- art[k] = v
233
- return orch, art
234
-
235
- async def _run_one(self, cls: Type, period: str, sem: asyncio.Semaphore, artifact_kwargs: Dict[str, Any]) -> None:
236
- name = cls.__name__
237
- async with sem:
238
- start = asyncio.get_running_loop().time()
239
- for attempt in range(1, self._retry.attempts + 1):
240
- try:
241
- # Run sync context + method in thread
242
- def _sync_block() -> None:
243
- with ExitStack() as stack:
244
- inst = cls(**self.artifact_class_kwargs)
245
- inst = stack.enter_context(inst)
246
- inst.update_parquet(period=period, **artifact_kwargs)
247
-
248
- await asyncio.wait_for(
249
- asyncio.to_thread(_sync_block),
250
- timeout=self.update_timeout_seconds,
251
- )
252
- dt_secs = asyncio.get_running_loop().time() - start
253
- self.completion_secs[name] = dt_secs
254
- self.logger.info(f"✅ {name} ({period}) in {dt_secs:.2f}s")
255
- return
256
-
257
- except asyncio.TimeoutError:
258
- self.logger.warning(f"Timeout in {name} attempt {attempt}/{self._retry.attempts}")
259
- except Exception as e:
260
- self.logger.error(
261
- f"{name} attempt {attempt}/{self._retry.attempts} failed: {e}",
262
- exc_info=self.debug,
263
- )
264
-
265
- if attempt < self._retry.attempts:
266
- delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
267
- delay *= 1 + random.uniform(0, self._retry.jitter)
268
- await asyncio.sleep(delay)
269
-
270
- self.failed.append(name)
271
- self.logger.error(f"✖️ {name} permanently failed")
272
-
273
- # ---- public API ----------------------------------------------------------
274
-
275
- async def update_data(self, period: str, **kwargs: Any) -> None:
276
- """
277
- Backward-compatible:
278
- - Accepts orchestrator knobs in kwargs (we consume them).
279
- - Forwards only artifact-friendly kwargs to update_parquet.
280
- """
281
- # split kwargs; ignore any runtime attempts to mutate orchestrator config mid-call
282
- _, artifact_kwargs = self._split_kwargs(kwargs)
283
-
284
- self.completion_secs.clear()
285
- self.failed.clear()
286
-
287
- classes = self._classes_for(period)
288
- sem = asyncio.Semaphore(self.max_workers)
289
- tasks = [asyncio.create_task(self._run_one(cls, period, sem, dict(artifact_kwargs))) for cls in classes]
290
-
291
- for t in asyncio.as_completed(tasks):
292
- try:
293
- await t
294
- except asyncio.CancelledError:
295
- for rest in tasks:
296
- rest.cancel()
297
- raise
298
-
299
- self.logger.info(
300
- f"Artifacts processed: total={len(classes)}, "
301
- f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
302
- )
303
-
304
- # Optional helper
305
- def get_update_status(self) -> Dict[str, Any]:
306
- done = set(self.completion_secs)
307
- fail = set(self.failed)
308
- all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
309
- return {
310
- "total": len(all_names),
311
- "completed": sorted(done),
312
- "failed": sorted(fail),
313
- "pending": sorted(all_names - done - fail),
314
- "completion_times": dict(self.completion_secs),
315
- }