sibi-dst 2025.9.8__tar.gz → 2025.9.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/PKG-INFO +2 -1
  2. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/pyproject.toml +2 -1
  3. sibi_dst-2025.9.10/sibi_dst/df_helper/_artifact_updater_async.py +292 -0
  4. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_parquet_artifact.py +6 -326
  5. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_parquet_reader.py +2 -1
  6. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +26 -2
  7. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/__init__.py +7 -3
  8. sibi_dst-2025.9.10/sibi_dst/utils/boilerplate/base_attacher.py +70 -0
  9. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_pipeline.py +14 -29
  10. sibi_dst-2025.9.10/sibi_dst/utils/boilerplate/base_pipeline_template.py +54 -0
  11. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/clickhouse_writer.py +1 -1
  12. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/data_wrapper.py +46 -312
  13. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/parquet_saver.py +29 -16
  14. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/progress/sse_runner.py +39 -11
  15. sibi_dst-2025.9.10/sibi_dst/utils/update_planner.py +391 -0
  16. sibi_dst-2025.9.8/sibi_dst/df_helper/_artifact_updater_async.py +0 -238
  17. sibi_dst-2025.9.8/sibi_dst/utils/boilerplate/base_attacher.py +0 -25
  18. sibi_dst-2025.9.8/sibi_dst/utils/update_planner.py +0 -1035
  19. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/README.md +0 -0
  20. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/__init__.py +0 -0
  21. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/__init__.py +0 -0
  22. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
  23. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/_df_helper.py +0 -0
  24. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/__init__.py +0 -0
  25. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  26. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  27. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  28. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  29. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  30. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
  31. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  32. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  33. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
  34. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  35. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/__init__.py +0 -0
  36. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_defaults.py +0 -0
  37. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  38. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_params_config.py +0 -0
  39. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/core/_query_config.py +0 -0
  40. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/df_helper/data_cleaner.py +0 -0
  41. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/geopy_helper/__init__.py +0 -0
  42. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  43. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/geopy_helper/utils.py +0 -0
  44. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/__init__.py +0 -0
  45. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  46. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  47. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  48. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
  49. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  50. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
  51. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/osmnx_helper/utils.py +0 -0
  52. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/tests/__init__.py +0 -0
  53. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/tests/test_baseclass.py +0 -0
  54. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  55. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/__init__.py +0 -0
  56. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/async_utils.py +0 -0
  57. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/base.py +0 -0
  58. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
  59. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
  60. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
  61. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/boilerplate/hybrid_data_loader.py +0 -0
  62. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/business_days.py +0 -0
  63. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/credentials.py +0 -0
  64. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/dask_utils.py +0 -0
  65. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/data_from_http_source.py +0 -0
  66. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/data_utils.py +0 -0
  67. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/date_utils.py +0 -0
  68. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/df_utils.py +0 -0
  69. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/file_age_checker.py +0 -0
  70. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/file_utils.py +0 -0
  71. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/filepath_generator.py +0 -0
  72. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/iceberg_saver.py +0 -0
  73. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/log_utils.py +0 -0
  74. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/manifest_manager.py +0 -0
  75. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/periods.py +0 -0
  76. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/phone_formatter.py +0 -0
  77. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/progress/__init__.py +0 -0
  78. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/progress/jobs.py +0 -0
  79. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/storage_config.py +0 -0
  80. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/storage_hive.py +0 -0
  81. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/storage_manager.py +0 -0
  82. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/webdav_client.py +0 -0
  83. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/utils/write_gatekeeper.py +0 -0
  84. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/__init__.py +0 -0
  85. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/__init__.py +0 -0
  86. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  87. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  88. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  89. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  90. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  91. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  92. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  93. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  94. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  95. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  96. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  97. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  98. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  99. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  100. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  101. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  102. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/utils/__init__.py +0 -0
  103. {sibi_dst-2025.9.8 → sibi_dst-2025.9.10}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.9.8
3
+ Version: 2025.9.10
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.13
12
12
  Requires-Dist: clickhouse-connect (>=0.8.18,<0.9.0)
13
13
  Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
14
14
  Requires-Dist: dask[complete] (>=2025.9.0,<2026.0.0)
15
+ Requires-Dist: distributed (>=2025.9.1,<2026.0.0)
15
16
  Requires-Dist: mysqlclient (>=2.2.7,<3.0.0)
16
17
  Requires-Dist: opentelemetry-exporter-otlp (>=1.35.0,<2.0.0)
17
18
  Requires-Dist: opentelemetry-sdk (>=1.35.0,<2.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "2025.9.8"
3
+ version = "2025.9.10"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -26,6 +26,7 @@ opentelemetry-sdk = "^1.35.0"
26
26
  pyiceberg = {extras = ["hive", "s3fs"], version = "^0.9.1"}
27
27
  sse-starlette = "^3.0.2"
28
28
  pyrosm = "^0.6.2"
29
+ distributed = "^2025.9.1"
29
30
 
30
31
  [tool.poetry.group.dev]
31
32
  optional = true
@@ -0,0 +1,292 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import datetime
5
+ import random
6
+ import time
7
+ from contextlib import ExitStack
8
+ from dataclasses import dataclass
9
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Type
10
+
11
+ from sibi_dst.utils import ManagedResource
12
+
13
+ try:
14
+ from dask.distributed import Client, LocalCluster
15
+ except ImportError:
16
+ Client = None
17
+ LocalCluster = None
18
+
19
+
20
+ @dataclass(slots=True)
21
+ class _RetryCfg:
22
+ attempts: int = 3
23
+ backoff_base: float = 2.0
24
+ backoff_max: float = 60.0
25
+ jitter: float = 0.15
26
+
27
+
28
+ # ---------------- Worker (safe for Dask pickling) ----------------
29
+ def run_artifact_update(
30
+ cls: Type,
31
+ artifact_class_kwargs: Dict[str, Any],
32
+ retry: _RetryCfg,
33
+ period: str,
34
+ artifact_kwargs: Dict[str, Any],
35
+ ) -> Dict[str, Any]:
36
+ """Standalone worker — safe for Dask distributed execution."""
37
+ import logging
38
+
39
+ logger = logging.getLogger(cls.__name__)
40
+
41
+ start_wall = datetime.datetime.now()
42
+ attempt_count = 0
43
+ success = False
44
+ error_msg = None
45
+
46
+ for attempt in range(1, retry.attempts + 1):
47
+ attempt_count = attempt
48
+ try:
49
+ with ExitStack() as stack:
50
+ inst = cls(**artifact_class_kwargs)
51
+ inst = stack.enter_context(inst)
52
+ inst.update_parquet(period=period, **artifact_kwargs)
53
+ success = True
54
+ break
55
+ except Exception as e:
56
+ error_msg = str(e)
57
+ if attempt < retry.attempts:
58
+ delay = min(retry.backoff_base ** (attempt - 1), retry.backoff_max)
59
+ delay *= 1 + random.uniform(0, retry.jitter)
60
+ time.sleep(delay)
61
+
62
+ end_wall = datetime.datetime.now()
63
+ duration = (end_wall - start_wall).total_seconds()
64
+
65
+ return {
66
+ "artifact": cls.__name__,
67
+ "period": period,
68
+ "start": start_wall.isoformat(),
69
+ "end": end_wall.isoformat(),
70
+ "processing_time": duration,
71
+ "retries": attempt_count - 1 if success else attempt_count,
72
+ "success": success,
73
+ "error": error_msg,
74
+ }
75
+
76
+
77
+ class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
78
+ """
79
+ Async/Threaded orchestrator.
80
+ Dask-enabled if a Client is passed (or created automatically).
81
+ """
82
+
83
+ def __init__(
84
+ self,
85
+ wrapped_classes: Dict[str, Sequence[Type]],
86
+ *,
87
+ max_workers: int = 3,
88
+ retry_attempts: int = 3,
89
+ update_timeout_seconds: int = 600,
90
+ backoff_base: float = 2.0,
91
+ backoff_max: float = 60.0,
92
+ backoff_jitter: float = 0.15,
93
+ priority_fn: Optional[Callable[[Type], int]] = None,
94
+ artifact_class_kwargs: Optional[Dict[str, Any]] = None,
95
+ dask_client: Optional[Client] = None,
96
+ use_dask: bool = True,
97
+ **kwargs: Any,
98
+ ) -> None:
99
+ super().__init__(**kwargs)
100
+
101
+ self.wrapped_classes = wrapped_classes
102
+ self.max_workers = int(max_workers)
103
+ self.update_timeout_seconds = int(update_timeout_seconds)
104
+ self.priority_fn = priority_fn
105
+ self.use_dask = use_dask
106
+ self.client: Optional[Client] = dask_client
107
+ self._owns_client = False
108
+
109
+ self._retry = _RetryCfg(
110
+ attempts=int(retry_attempts),
111
+ backoff_base=float(backoff_base),
112
+ backoff_max=float(backoff_max),
113
+ jitter=float(backoff_jitter),
114
+ )
115
+
116
+ # Safe kwargs for artifacts
117
+ if self.use_dask:
118
+ self.artifact_class_kwargs = {
119
+ "debug": self.debug,
120
+ "verbose": self.verbose,
121
+ **(artifact_class_kwargs or {}),
122
+ }
123
+ else:
124
+ self.artifact_class_kwargs = {
125
+ "logger": self.logger,
126
+ "fs": self.fs,
127
+ "debug": self.debug,
128
+ "verbose": self.verbose,
129
+ **(artifact_class_kwargs or {}),
130
+ }
131
+
132
+ self.completion_secs: Dict[str, float] = {}
133
+ self.failed: List[str] = []
134
+ self._stop = asyncio.Event()
135
+
136
+ if self.use_dask and Client is None:
137
+ raise RuntimeError("Dask is not installed, cannot use Dask mode")
138
+
139
+ # auto-start local client if requested
140
+ if self.use_dask and not self.client:
141
+ self.client = Client(
142
+ LocalCluster(
143
+ n_workers=max_workers,
144
+ threads_per_worker=1,
145
+ dashboard_address=None,
146
+ )
147
+ )
148
+ self._owns_client = True
149
+
150
+ # ---- Internals ------------------------------------------------------------
151
+
152
+ def _classes_for(self, period: str) -> List[Type]:
153
+ try:
154
+ classes = list(self.wrapped_classes[period])
155
+ except KeyError:
156
+ raise ValueError(f"Unsupported period '{period}'.")
157
+ if not classes:
158
+ raise ValueError(f"No artifact classes configured for '{period}'.")
159
+ if self.priority_fn:
160
+ try:
161
+ classes.sort(key=self.priority_fn)
162
+ except Exception as e:
163
+ self.logger.warning(f"priority_fn failed; using listed order: {e}")
164
+ return classes
165
+
166
+ def _submit_one_dask(self, cls: Type, period: str, artifact_kwargs: Dict[str, Any]):
167
+ return self.client.submit(
168
+ run_artifact_update,
169
+ cls,
170
+ dict(self.artifact_class_kwargs),
171
+ self._retry,
172
+ period,
173
+ artifact_kwargs,
174
+ pure=False,
175
+ )
176
+
177
+ async def _run_one_async(
178
+ self,
179
+ cls: Type,
180
+ period: str,
181
+ sem: asyncio.Semaphore,
182
+ artifact_kwargs: Dict[str, Any],
183
+ ) -> Dict[str, Any]:
184
+ """Async/threaded fallback execution."""
185
+ name = cls.__name__
186
+ self.logger.info(f"▶️ Starting {name} for period '{period}'")
187
+ start_wall = datetime.datetime.now()
188
+
189
+ attempt_count = 0
190
+ success = False
191
+ error_msg = None
192
+
193
+ try:
194
+ async with sem:
195
+ for attempt in range(1, self._retry.attempts + 1):
196
+ attempt_count = attempt
197
+ try:
198
+ def _sync_block():
199
+ with ExitStack() as stack:
200
+ inst = cls(**self.artifact_class_kwargs)
201
+ inst = stack.enter_context(inst)
202
+ inst.update_parquet(period=period, **artifact_kwargs)
203
+
204
+ await asyncio.wait_for(
205
+ asyncio.to_thread(_sync_block),
206
+ timeout=self.update_timeout_seconds,
207
+ )
208
+ success = True
209
+ break
210
+ except Exception as e:
211
+ error_msg = str(e)
212
+ if attempt < self._retry.attempts and not self._stop.is_set():
213
+ delay = min(
214
+ self._retry.backoff_base ** (attempt - 1),
215
+ self._retry.backoff_max,
216
+ )
217
+ delay *= 1 + random.uniform(0, self._retry.jitter)
218
+ await asyncio.sleep(delay)
219
+ finally:
220
+ end_wall = datetime.datetime.now()
221
+ duration = (end_wall - start_wall).total_seconds()
222
+
223
+ result = {
224
+ "artifact": name,
225
+ "period": period,
226
+ "start": start_wall.isoformat(),
227
+ "end": end_wall.isoformat(),
228
+ "processing_time": duration,
229
+ "retries": attempt_count - 1 if success else attempt_count,
230
+ "success": success,
231
+ "error": error_msg,
232
+ }
233
+
234
+ if success:
235
+ self.logger.info(f"✅ Artifact {name} succeeded", extra=result)
236
+ self.completion_secs[name] = duration
237
+ else:
238
+ self.logger.error(f"❌ Artifact {name} failed", extra=result)
239
+ self.failed.append(name)
240
+
241
+ return result
242
+
243
+ # ---- Public API -----------------------------------------------------------
244
+
245
+ async def update_data(self, period: str, **kwargs: Any) -> List[Dict[str, Any]]:
246
+ self.completion_secs.clear()
247
+ self.failed.clear()
248
+ classes = self._classes_for(period)
249
+
250
+ try:
251
+ if self.use_dask:
252
+ futures = [self._submit_one_dask(cls, period, kwargs) for cls in classes]
253
+ results = await asyncio.to_thread(lambda: self.client.gather(futures))
254
+ else:
255
+ sem = asyncio.Semaphore(self.max_workers)
256
+ tasks = [
257
+ asyncio.create_task(self._run_one_async(cls, period, sem, kwargs))
258
+ for cls in classes
259
+ ]
260
+ results = await asyncio.gather(*tasks)
261
+ return results
262
+ finally:
263
+ # only shut down if we own the client
264
+ if self._owns_client:
265
+ self.close()
266
+
267
+ def get_update_status(self) -> Dict[str, Any]:
268
+ done = set(self.completion_secs)
269
+ fail = set(self.failed)
270
+ all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
271
+ return {
272
+ "total": len(all_names),
273
+ "completed": sorted(done),
274
+ "failed": sorted(fail),
275
+ "pending": sorted(all_names - done - fail),
276
+ "completion_times": dict(self.completion_secs),
277
+ }
278
+
279
+ # ---- Lifecycle ------------------------------------------------------------
280
+
281
+ def _cleanup(self) -> None:
282
+ """Release any resources created by this wrapper."""
283
+ if self._owns_client and self.client is not None:
284
+ try:
285
+ cluster = getattr(self.client, "cluster", None)
286
+ self.client.close()
287
+ if cluster is not None:
288
+ cluster.close()
289
+ finally:
290
+ self.client = None
291
+ self._owns_client = False
292
+