sibi-dst 2025.1.13__tar.gz → 2025.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/PKG-INFO +1 -1
  2. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/pyproject.toml +1 -1
  3. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/__init__.py +7 -1
  4. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/__init__.py +3 -2
  5. sibi_dst-2025.8.2/sibi_dst/df_helper/_artifact_updater_async.py +238 -0
  6. sibi_dst-2025.8.2/sibi_dst/df_helper/_artifact_updater_threaded.py +195 -0
  7. sibi_dst-2025.8.2/sibi_dst/df_helper/_df_helper.py +573 -0
  8. sibi_dst-2025.8.2/sibi_dst/df_helper/_parquet_artifact.py +320 -0
  9. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/_parquet_reader.py +9 -10
  10. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  11. sibi_dst-2025.8.2/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +211 -0
  12. sibi_dst-2025.8.2/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  13. sibi_dst-2025.8.2/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +186 -0
  14. sibi_dst-2025.8.2/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +128 -0
  15. sibi_dst-2025.8.2/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  16. sibi_dst-2025.8.2/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +154 -0
  17. sibi_dst-2025.8.2/sibi_dst/osmnx_helper/route_path_builder.py +97 -0
  18. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/__init__.py +2 -0
  19. sibi_dst-2025.8.2/sibi_dst/utils/base.py +252 -0
  20. sibi_dst-2025.8.2/sibi_dst/utils/business_days.py +248 -0
  21. sibi_dst-2025.8.2/sibi_dst/utils/clickhouse_writer.py +501 -0
  22. sibi_dst-2025.8.2/sibi_dst/utils/data_utils.py +201 -0
  23. sibi_dst-2025.8.2/sibi_dst/utils/data_wrapper.py +518 -0
  24. sibi_dst-2025.8.2/sibi_dst/utils/date_utils.py +778 -0
  25. sibi_dst-2025.8.2/sibi_dst/utils/df_utils.py +264 -0
  26. sibi_dst-2025.8.2/sibi_dst/utils/file_age_checker.py +301 -0
  27. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/file_utils.py +3 -2
  28. sibi_dst-2025.8.2/sibi_dst/utils/filepath_generator.py +349 -0
  29. sibi_dst-2025.8.2/sibi_dst/utils/log_utils.py +711 -0
  30. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/manifest_manager.py +60 -76
  31. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/parquet_saver.py +33 -27
  32. sibi_dst-2025.8.2/sibi_dst/utils/periods.py +42 -0
  33. sibi_dst-2025.8.2/sibi_dst/utils/phone_formatter.py +120 -0
  34. sibi_dst-2025.8.2/sibi_dst/utils/update_planner.py +300 -0
  35. sibi_dst-2025.8.2/sibi_dst/utils/webdav_client.py +170 -0
  36. sibi_dst-2025.1.13/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -422
  37. sibi_dst-2025.1.13/sibi_dst/df_helper/_df_helper.py +0 -273
  38. sibi_dst-2025.1.13/sibi_dst/df_helper/_parquet_artifact.py +0 -328
  39. sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -250
  40. sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -336
  41. sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -80
  42. sibi_dst-2025.1.13/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -104
  43. sibi_dst-2025.1.13/sibi_dst/osmnx_helper/route_path_builder.py +0 -98
  44. sibi_dst-2025.1.13/sibi_dst/utils/base.py +0 -117
  45. sibi_dst-2025.1.13/sibi_dst/utils/clickhouse_writer.py +0 -235
  46. sibi_dst-2025.1.13/sibi_dst/utils/data_utils.py +0 -248
  47. sibi_dst-2025.1.13/sibi_dst/utils/data_wrapper.py +0 -214
  48. sibi_dst-2025.1.13/sibi_dst/utils/date_utils.py +0 -460
  49. sibi_dst-2025.1.13/sibi_dst/utils/df_utils.py +0 -284
  50. sibi_dst-2025.1.13/sibi_dst/utils/filepath_generator.py +0 -187
  51. sibi_dst-2025.1.13/sibi_dst/utils/log_utils.py +0 -372
  52. sibi_dst-2025.1.13/sibi_dst/utils/phone_formatter.py +0 -127
  53. sibi_dst-2025.1.13/sibi_dst/utils/update_planner.py +0 -298
  54. sibi_dst-2025.1.13/sibi_dst/utils/webdav_client.py +0 -220
  55. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/README.md +0 -0
  56. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/__init__.py +0 -0
  57. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  58. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  59. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  60. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
  61. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  62. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/core/__init__.py +0 -0
  63. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/core/_defaults.py +0 -0
  64. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  65. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/core/_params_config.py +0 -0
  66. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/core/_query_config.py +0 -0
  67. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/df_helper/data_cleaner.py +0 -0
  68. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/geopy_helper/__init__.py +0 -0
  69. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  70. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/geopy_helper/utils.py +0 -0
  71. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/__init__.py +0 -0
  72. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  73. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  74. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  75. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
  76. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  77. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/osmnx_helper/utils.py +0 -0
  78. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/tests/__init__.py +0 -0
  79. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  80. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/credentials.py +0 -0
  81. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/data_from_http_source.py +0 -0
  82. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/storage_config.py +0 -0
  83. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/utils/storage_manager.py +0 -0
  84. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/__init__.py +0 -0
  85. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/__init__.py +0 -0
  86. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  87. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  88. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  89. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  90. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  91. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  92. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  93. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  94. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  95. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  96. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  97. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  98. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  99. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  100. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  101. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  102. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/utils/__init__.py +0 -0
  103. {sibi_dst-2025.1.13 → sibi_dst-2025.8.2}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.1.13
3
+ Version: 2025.8.2
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "2025.1.13"
3
+ version = "2025.8.2"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -10,4 +10,10 @@ except version_reader.PackageNotFoundError:
10
10
 
11
11
  __all__ = [
12
12
  "__version__",
13
- ]
13
+ ]
14
+
15
+ from . import df_helper as df_helper
16
+ from . import osmnx_helper as osmnx_helper
17
+ from . import geopy_helper as geopy_helper
18
+ from . import utils as sibi_utils
19
+
@@ -3,8 +3,9 @@ from __future__ import annotations
3
3
  from ._df_helper import DfHelper
4
4
  from ._parquet_artifact import ParquetArtifact
5
5
  from ._parquet_reader import ParquetReader
6
- from ._artifact_updater_multi_wrapper import ArtifactUpdaterMultiWrapperThreaded, ArtifactUpdaterMultiWrapperAsync
7
-
6
+ #from ._artifact_updater_multi_wrapper import ArtifactUpdaterMultiWrapperThreaded, ArtifactUpdaterMultiWrapperAsync
7
+ from ._artifact_updater_async import ArtifactUpdaterMultiWrapperAsync
8
+ from ._artifact_updater_threaded import ArtifactUpdaterMultiWrapperThreaded
8
9
  __all__ = [
9
10
  'DfHelper',
10
11
  'ParquetArtifact',
@@ -0,0 +1,238 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import datetime
5
+ import random
6
+ import time
7
+ from contextlib import ExitStack
8
+ from dataclasses import dataclass
9
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type
10
+
11
+ from sibi_dst.utils import ManagedResource
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class _RetryCfg:
16
+ attempts: int = 3
17
+ backoff_base: float = 2.0
18
+ backoff_max: float = 60.0
19
+ jitter: float = 0.15
20
+
21
+
22
+ _ORCHESTRATOR_KEYS = {
23
+ "retry_attempts",
24
+ "backoff_base",
25
+ "backoff_max",
26
+ "backoff_jitter",
27
+ "update_timeout_seconds",
28
+ "max_workers",
29
+ "priority_fn",
30
+ "artifact_class_kwargs",
31
+ }
32
+
33
+
34
+ def _default_artifact_kwargs(resource: ManagedResource) -> Dict[str, Any]:
35
+ return {
36
+ "logger": resource.logger,
37
+ "debug": resource.debug,
38
+ "fs": resource.fs,
39
+ "verbose": resource.verbose,
40
+ }
41
+
42
+
43
+ class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
44
+ """
45
+ Backward-compatible async orchestrator with shutdown-aware scheduling.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ wrapped_classes: Dict[str, Sequence[Type]],
51
+ *,
52
+ max_workers: int = 3,
53
+ retry_attempts: int = 3,
54
+ update_timeout_seconds: int = 600,
55
+ backoff_base: float = 2.0,
56
+ backoff_max: float = 60.0,
57
+ backoff_jitter: float = 0.15,
58
+ priority_fn: Optional[Callable[[Type], int]] = None,
59
+ artifact_class_kwargs: Optional[Dict[str, Any]] = None,
60
+ **kwargs: Any,
61
+ ) -> None:
62
+ super().__init__(**kwargs)
63
+ self.wrapped_classes = wrapped_classes
64
+ self.max_workers = int(max_workers)
65
+ self.update_timeout_seconds = int(update_timeout_seconds)
66
+ self.priority_fn = priority_fn
67
+
68
+ self._retry = _RetryCfg(
69
+ attempts=int(retry_attempts),
70
+ backoff_base=float(backoff_base),
71
+ backoff_max=float(backoff_max),
72
+ jitter=float(backoff_jitter),
73
+ )
74
+
75
+ self.artifact_class_kwargs = {
76
+ **_default_artifact_kwargs(self),
77
+ **(artifact_class_kwargs or {}),
78
+ }
79
+
80
+ self.completion_secs: Dict[str, float] = {}
81
+ self.failed: List[str] = []
82
+
83
+ # NEW: async stop gate — tripped on cleanup/cancel
84
+ self._stop = asyncio.Event()
85
+
86
+ # Trip stop gate on close paths
87
+ def _cleanup(self) -> None:
88
+ try:
89
+ loop = asyncio.get_running_loop()
90
+ loop.call_soon_threadsafe(self._stop.set)
91
+ except RuntimeError:
92
+ self._stop.set()
93
+
94
+ async def _acleanup(self) -> None:
95
+ self._stop.set()
96
+
97
+ # ---- internals -----------------------------------------------------------
98
+
99
+ def _classes_for(self, period: str) -> List[Type]:
100
+ try:
101
+ classes = list(self.wrapped_classes[period])
102
+ except KeyError:
103
+ raise ValueError(f"Unsupported period '{period}'.")
104
+ if not classes:
105
+ raise ValueError(f"No artifact classes configured for period '{period}'.")
106
+ if self.priority_fn:
107
+ try:
108
+ classes.sort(key=self.priority_fn)
109
+ except Exception as e:
110
+ self.logger.warning(f"priority_fn failed; using listed order: {e}")
111
+ return classes
112
+
113
+ @staticmethod
114
+ def _split_kwargs(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
115
+ orch: Dict[str, Any] = {}
116
+ art: Dict[str, Any] = {}
117
+ for k, v in raw.items():
118
+ if k in _ORCHESTRATOR_KEYS:
119
+ orch[k] = v
120
+ else:
121
+ art[k] = v
122
+ return orch, art
123
+
124
+ async def _run_one(self, cls: Type, period: str, sem: asyncio.Semaphore, artifact_kwargs: Dict[str, Any]) -> None:
125
+ name = cls.__name__
126
+ if self._stop.is_set() or self.closed:
127
+ raise asyncio.CancelledError()
128
+
129
+ self.logger.info(f"Running {name} with period '{period}'", extra={"artifact": name, "period": period})
130
+ async with sem:
131
+ loop = asyncio.get_running_loop()
132
+ start = loop.time()
133
+ for attempt in range(1, self._retry.attempts + 1):
134
+ if self._stop.is_set() or self.closed:
135
+ raise asyncio.CancelledError()
136
+ try:
137
+ def _sync_block() -> None:
138
+ with ExitStack() as stack:
139
+ inst = cls(**self.artifact_class_kwargs)
140
+ inst = stack.enter_context(inst)
141
+ inst.update_parquet(period=period, **artifact_kwargs)
142
+
143
+ await asyncio.wait_for(asyncio.to_thread(_sync_block), timeout=self.update_timeout_seconds)
144
+ dt_secs = loop.time() - start
145
+ self.completion_secs[name] = dt_secs
146
+ self.logger.info(f"✅ {name} ({period}) in {dt_secs:.2f}s")
147
+ return
148
+
149
+ except asyncio.TimeoutError:
150
+ self.logger.warning(f"Timeout in {name} attempt {attempt}/{self._retry.attempts}")
151
+ except asyncio.CancelledError:
152
+ raise
153
+ except Exception as e:
154
+ self.logger.error(
155
+ f"{name} attempt {attempt}/{self._retry.attempts} failed: {e}",
156
+ exc_info=self.debug,
157
+ )
158
+
159
+ if attempt < self._retry.attempts and not self._stop.is_set():
160
+ delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
161
+ delay *= 1 + random.uniform(0, self._retry.jitter)
162
+ try:
163
+ await asyncio.sleep(delay)
164
+ except asyncio.CancelledError:
165
+ raise
166
+
167
+ self.failed.append(name)
168
+ self.logger.error(f"✖️ {name} permanently failed")
169
+
170
+ # ---- public API ----------------------------------------------------------
171
+
172
+ async def update_data(self, period: str, **kwargs: Any) -> None:
173
+ """
174
+ Backward-compatible:
175
+ - Accepts orchestrator knobs in kwargs (we consume them).
176
+ - Forwards only artifact-friendly kwargs to update_parquet.
177
+ """
178
+ _, artifact_kwargs = self._split_kwargs(kwargs)
179
+
180
+ self.completion_secs.clear()
181
+ self.failed.clear()
182
+
183
+ classes = self._classes_for(period)
184
+ self.logger.info(
185
+ f"Starting update of {len(classes)} artifacts for period '{period}'",
186
+ extra={
187
+ "action_module_name": self.__class__.__name__,
188
+ "date_of_update": time.strftime('%Y-%m-%d'),
189
+ "start_time": time.strftime('%H:%M:%S'),
190
+ "period": period,
191
+ },
192
+ )
193
+
194
+ sem = asyncio.Semaphore(self.max_workers)
195
+ tasks = [asyncio.create_task(self._run_one(cls, period, sem, dict(artifact_kwargs))) for cls in classes]
196
+
197
+ try:
198
+ for t in asyncio.as_completed(tasks):
199
+ if self._stop.is_set():
200
+ break
201
+ await t
202
+ except (asyncio.CancelledError, KeyboardInterrupt):
203
+ self._stop.set()
204
+ for t in tasks:
205
+ t.cancel()
206
+ raise
207
+ finally:
208
+ # Drain/cancel everything deterministically
209
+ for t in tasks:
210
+ if not t.done():
211
+ t.cancel()
212
+ await asyncio.gather(*tasks, return_exceptions=True)
213
+
214
+ self.logger.info(
215
+ f"Update completed for period: {period}",
216
+ extra={
217
+ "action_module_name": self.__class__.__name__,
218
+ "date_of_update": datetime.date.today().strftime('%Y-%m-%d'),
219
+ "end_time": datetime.datetime.now().strftime('%H:%M:%S'),
220
+ "period": period,
221
+ },
222
+ )
223
+ self.logger.info(
224
+ f"Artifacts processed: total={len(classes)}, "
225
+ f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
226
+ )
227
+
228
+ def get_update_status(self) -> Dict[str, Any]:
229
+ done = set(self.completion_secs)
230
+ fail = set(self.failed)
231
+ all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
232
+ return {
233
+ "total": len(all_names),
234
+ "completed": sorted(done),
235
+ "failed": sorted(fail),
236
+ "pending": sorted(all_names - done - fail),
237
+ "completion_times": dict(self.completion_secs),
238
+ }
@@ -0,0 +1,195 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import time
5
+ import random
6
+ import threading
7
+ from concurrent.futures import ThreadPoolExecutor, as_completed
8
+ from contextlib import ExitStack
9
+ from dataclasses import dataclass
10
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Tuple
11
+
12
+ from sibi_dst.utils import ManagedResource
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class _RetryCfg:
17
+ attempts: int = 3
18
+ backoff_base: float = 2.0
19
+ backoff_max: float = 60.0
20
+ jitter: float = 0.15
21
+
22
+
23
+ _ORCHESTRATOR_KEYS = {
24
+ "retry_attempts",
25
+ "backoff_base",
26
+ "backoff_max",
27
+ "backoff_jitter",
28
+ "update_timeout_seconds", # accepted but unused in pure-threads version
29
+ "max_workers",
30
+ "priority_fn",
31
+ "artifact_class_kwargs",
32
+ }
33
+
34
+
35
+ def _default_artifact_kwargs(resource: ManagedResource) -> Dict[str, Any]:
36
+ return {
37
+ "logger": resource.logger,
38
+ "debug": resource.debug,
39
+ "fs": resource.fs,
40
+ "verbose": resource.verbose,
41
+ }
42
+
43
+
44
+ class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
45
+ """
46
+ Backward-compatible threaded orchestrator with shutdown-aware scheduling.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ wrapped_classes: Dict[str, Sequence[Type]],
52
+ *,
53
+ max_workers: int = 4,
54
+ retry_attempts: int = 3,
55
+ backoff_base: float = 2.0,
56
+ backoff_max: float = 60.0,
57
+ backoff_jitter: float = 0.15,
58
+ priority_fn: Optional[Callable[[Type], int]] = None,
59
+ artifact_class_kwargs: Optional[Dict[str, Any]] = None,
60
+ **kwargs: Any,
61
+ ) -> None:
62
+ super().__init__(**kwargs)
63
+ self.wrapped_classes = wrapped_classes
64
+ self.max_workers = int(max_workers)
65
+ self.priority_fn = priority_fn
66
+ self._retry = _RetryCfg(
67
+ attempts=int(retry_attempts),
68
+ backoff_base=float(backoff_base),
69
+ backoff_max=float(backoff_max),
70
+ jitter=float(backoff_jitter),
71
+ )
72
+ self.artifact_class_kwargs = {
73
+ **_default_artifact_kwargs(self),
74
+ **(artifact_class_kwargs or {}),
75
+ }
76
+ self.completion_secs: Dict[str, float] = {}
77
+ self.failed: List[str] = []
78
+
79
+ # NEW: stop gate — tripped on cleanup / Ctrl-C to stop scheduling & retries
80
+ self._stop_event = threading.Event()
81
+
82
+ # Trip the stop gate when this wrapper is closed
83
+ def _cleanup(self) -> None:
84
+ self._stop_event.set()
85
+
86
+ def _classes_for(self, period: str) -> List[Type]:
87
+ try:
88
+ classes = list(self.wrapped_classes[period])
89
+ except KeyError:
90
+ raise ValueError(f"Unsupported period '{period}'.")
91
+ if not classes:
92
+ raise ValueError(f"No artifact classes configured for period '{period}'.")
93
+ if self.priority_fn:
94
+ try:
95
+ classes.sort(key=self.priority_fn)
96
+ except Exception as e:
97
+ self.logger.warning(f"priority_fn failed; using listed order: {e}")
98
+ return classes
99
+
100
+ @staticmethod
101
+ def _split_kwargs(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
102
+ orch: Dict[str, Any] = {}
103
+ art: Dict[str, Any] = {}
104
+ for k, v in raw.items():
105
+ if k in _ORCHESTRATOR_KEYS:
106
+ orch[k] = v
107
+ else:
108
+ art[k] = v
109
+ return orch, art
110
+
111
+ def _run_one(self, cls: Type, period: str, artifact_kwargs: Dict[str, Any]) -> str:
112
+ name = cls.__name__
113
+ start = time.monotonic()
114
+ for attempt in range(1, self._retry.attempts + 1):
115
+ if self._stop_event.is_set() or self.closed:
116
+ raise RuntimeError("shutting_down")
117
+ try:
118
+ with ExitStack() as stack:
119
+ inst = cls(**self.artifact_class_kwargs)
120
+ inst = stack.enter_context(inst)
121
+ inst.update_parquet(period=period, **artifact_kwargs)
122
+ self.completion_secs[name] = time.monotonic() - start
123
+ return name
124
+ except Exception as e:
125
+ if attempt < self._retry.attempts and not self._stop_event.is_set():
126
+ # interruptible backoff sleep
127
+ delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
128
+ delay *= 1 + random.uniform(0, self._retry.jitter)
129
+ end = time.monotonic() + delay
130
+ while not self._stop_event.is_set() and time.monotonic() < end:
131
+ time.sleep(min(0.1, end - time.monotonic()))
132
+ continue
133
+ raise RuntimeError(f"{name} failed after {self._retry.attempts} attempts: {e}") from e
134
+
135
+ def update_data(self, period: str, **kwargs: Any) -> None:
136
+ # Split kwargs to preserve backward compatibility
137
+ _, artifact_kwargs = self._split_kwargs(kwargs)
138
+
139
+ self.completion_secs.clear()
140
+ self.failed.clear()
141
+
142
+ classes = self._classes_for(period)
143
+
144
+ executor = ThreadPoolExecutor(max_workers=self.max_workers, thread_name_prefix="artifact-updater")
145
+ try:
146
+ fut2name: Dict[Any, str] = {}
147
+ for cls in classes:
148
+ if self._stop_event.is_set() or self.closed:
149
+ break
150
+ try:
151
+ fut = executor.submit(self._run_one, cls, period, dict(artifact_kwargs))
152
+ except RuntimeError as e:
153
+ if "cannot schedule new futures after shutdown" in str(e).lower():
154
+ self.logger.warning("Executor shutting down; halting new submissions.")
155
+ break
156
+ raise
157
+ fut2name[fut] = cls.__name__
158
+
159
+ for fut in as_completed(fut2name):
160
+ name = fut2name[fut]
161
+ try:
162
+ fut.result()
163
+ self.logger.info(f"✅ {name} ({period}) in {self.completion_secs[name]:.2f}s")
164
+ except Exception as e:
165
+ self.failed.append(name)
166
+ self.logger.error(f"✖️ {name} permanently failed: {e}")
167
+ except KeyboardInterrupt:
168
+ self.logger.warning("KeyboardInterrupt — stopping scheduling and shutting down.")
169
+ self._stop_event.set()
170
+ raise
171
+ finally:
172
+ # Ensure queued-but-not-started tasks are canceled
173
+ executor.shutdown(wait=True, cancel_futures=True)
174
+
175
+ self.logger.info(
176
+ f"Artifacts processed: total={len(classes)}, "
177
+ f"completed={len(self.completion_secs)}, failed={len(self.failed)}",
178
+ extra={
179
+ "date_of_update": time.strftime('%Y-%m-%d'),
180
+ "start_time": time.strftime('%H:%M:%S'),
181
+ "period": period,
182
+ },
183
+ )
184
+
185
+ def get_update_status(self) -> Dict[str, Any]:
186
+ done = set(self.completion_secs)
187
+ fail = set(self.failed)
188
+ all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
189
+ return {
190
+ "total": len(all_names),
191
+ "completed": sorted(done),
192
+ "failed": sorted(fail),
193
+ "pending": sorted(all_names - done - fail),
194
+ "completion_times": dict(self.completion_secs),
195
+ }