PyPI - sibi-dst - Versions diffs - 2025.1.12__py3-none-any.whl → 2025.8.1__py3-none-any.whl - Mend

sibi-dst 2025.1.12py3-none-any.whl → 2025.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

sibi_dst/__init__.py +7 -1
sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
sibi_dst/df_helper/_df_helper.py +417 -117
sibi_dst/df_helper/_parquet_artifact.py +255 -283
sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
sibi_dst/osmnx_helper/__init__.py +1 -0
sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +203 -0
sibi_dst/osmnx_helper/route_path_builder.py +97 -0
sibi_dst/osmnx_helper/utils.py +2 -0
sibi_dst/utils/base.py +302 -96
sibi_dst/utils/clickhouse_writer.py +472 -206
sibi_dst/utils/data_utils.py +139 -186
sibi_dst/utils/data_wrapper.py +317 -73
sibi_dst/utils/date_utils.py +1 -0
sibi_dst/utils/df_utils.py +193 -213
sibi_dst/utils/file_utils.py +3 -2
sibi_dst/utils/filepath_generator.py +314 -152
sibi_dst/utils/log_utils.py +581 -242
sibi_dst/utils/manifest_manager.py +60 -76
sibi_dst/utils/parquet_saver.py +33 -27
sibi_dst/utils/phone_formatter.py +88 -95
sibi_dst/utils/update_planner.py +180 -178
sibi_dst/utils/webdav_client.py +116 -166
{sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
{sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +32 -28
{sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0

sibi_dst/df_helper/_artifact_updater_multi_wrapper.py CHANGED Viewed

@@ -1,422 +1,315 @@
+from __future__ import annotations
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Any, Callable, Dict, List, Optional, Type
+from dataclasses import dataclass
+from typing import Any, Dict
 from sibi_dst.utils import ManagedResource
+@dataclass(slots=True)
+class _RetryCfg:
+    attempts: int = 3
+    backoff_base: float = 2.0
+    backoff_max: float = 60.0
+    jitter: float = 0.15
+_ORCHESTRATOR_KEYS = {
+    "retry_attempts",
+    "backoff_base",
+    "backoff_max",
+    "backoff_jitter",
+    "update_timeout_seconds",  # accepted but unused in pure-threads version
+    "max_workers",
+    "priority_fn",
+    "artifact_class_kwargs",
+}
+def _default_artifact_kwargs(resource: ManagedResource) -> Dict[str, Any]:
+    return {
+        "logger": resource.logger,
+        "debug": resource.debug,
+        "fs": resource.fs,
+        "verbose": resource.verbose,
+    }
 class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
     """
-    Updates artifacts concurrently using a ThreadPoolExecutor.
-    This version is refactored for a pure multi-threaded environment, aligning
-    the orchestration model with the underlying threaded workers (DataWrapper).
+    Backward-compatible threaded orchestrator.
     """
-    wrapped_classes: Dict[str, List[Type]]
     def __init__(
-            self,
-            wrapped_classes: Dict[str, List[Type]],
-            *,
-            max_workers: int = 4,
-            retry_attempts: int = 3,
-            backoff_base: int = 2,
-            backoff_max: int = 60,
-            backoff_jitter: float = 0.1,
-            priority_fn: Optional[Callable[[Type], int]] = None,
-            artifact_class_kwargs: Optional[Dict[str, Any]] = None,
-            **kwargs: Dict[str, Any]
+        self,
+        wrapped_classes: Dict[str, Sequence[Type]],
+        *,
+        max_workers: int = 4,
+        retry_attempts: int = 3,
+        backoff_base: float = 2.0,
+        backoff_max: float = 60.0,
+        backoff_jitter: float = 0.15,
+        priority_fn: Optional[Callable[[Type], int]] = None,
+        artifact_class_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
     ) -> None:
         super().__init__(**kwargs)
         self.wrapped_classes = wrapped_classes
-        self.max_workers = max_workers
-        self.retry_attempts = retry_attempts
-        self.backoff_base = backoff_base
-        self.backoff_max = backoff_max
-        self.backoff_jitter = backoff_jitter
+        self.max_workers = int(max_workers)
         self.priority_fn = priority_fn
-        # Default artifact init kwargs
-        today = datetime.datetime.today() + datetime.timedelta(days=1)
-        default_kwargs = {
-            'parquet_start_date': today.strftime('%Y-%m-%d'),
-            'parquet_end_date':   today.strftime('%Y-%m-%d'),
-            'logger':             self.logger,
-            'debug':              self.debug,
-            'fs':                self.fs,
-            'verbose':            self.verbose,
+        self._retry = _RetryCfg(
+            attempts=int(retry_attempts),
+            backoff_base=float(backoff_base),
+            backoff_max=float(backoff_max),
+            jitter=float(backoff_jitter),
+        )
+        self.artifact_class_kwargs = {
+            **_default_artifact_kwargs(self),
+            **(artifact_class_kwargs or {}),
         }
-        self.artifact_class_kwargs = artifact_class_kwargs or default_kwargs.copy()
-        # State tracking
-        self.completion_times: Dict[str, float] = {}
+        self.completion_secs: Dict[str, float] = {}
         self.failed: List[str] = []
-        self.original_classes: List[Type] = []
-        self.logger.info("ArtifactUpdaterMultiWrapperThreaded initialized")
-    def get_artifact_classes(self, data_type: str) -> List[Type]:
-        """Retrieve artifact classes by data type."""
-        self.logger.info(f"Fetching artifact classes for '{data_type}'")
-        classes = self.wrapped_classes.get(data_type)
+    def _classes_for(self, period: str) -> List[Type]:
+        try:
+            classes = list(self.wrapped_classes[period])
+        except KeyError:
+            raise ValueError(f"Unsupported period '{period}'.")
         if not classes:
-            raise ValueError(f"Unsupported data type: {data_type}")
-        self.logger.info(f"Found {len(classes)} artifact classes for '{data_type}'")
-        return classes
-    def estimate_priority(self, artifact_cls: Type) -> int:
-        """
-        Determines task priority. Lower values run first.
-        Note: This is a blocking call and will run sequentially before updates start.
-        """
-        name = artifact_cls.__name__
-        # Custom priority function takes precedence
+            raise ValueError(f"No artifact classes configured for period '{period}'.")
         if self.priority_fn:
             try:
-                return self.priority_fn(artifact_cls)
+                classes.sort(key=self.priority_fn)
             except Exception as e:
-                self.logger.warning(f"priority_fn error for {name}: {e}")
-        # # Fallback to size estimate if available
-        # if hasattr(artifact_cls, 'get_size_estimate'):
-        #     try:
-        #         # This performs blocking I/O
-        #         return artifact_cls(**self.artifact_class_kwargs).get_size_estimate()
-        #
-        #     except Exception as e:
-        #         self.logger.warning(f"get_size_estimate failed for {name}: {e}")
-        # Default priority
-        return 999
-    def _update_artifact_with_retry(self, artifact_cls: Type, update_kwargs: Dict[str, Any]) -> str:
-        """
-        A blocking worker function that handles instantiation, update, and retries for a single artifact.
-        This function is designed to be run in a ThreadPoolExecutor.
-        """
-        name = artifact_cls.__name__
-        self.logger.debug(f"Worker thread starting update for {name}")
+                self.logger.warning(f"priority_fn failed; using listed order: {e}")
+        return classes
-        for attempt in range(1, self.retry_attempts + 1):
+    @staticmethod
+    def _split_kwargs(raw: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
+        orch: Dict[str, Any] = {}
+        art: Dict[str, Any] = {}
+        for k, v in raw.items():
+            if k in _ORCHESTRATOR_KEYS:
+                orch[k] = v
+            else:
+                art[k] = v
+        return orch, art
+    def _run_one(self, cls: Type, period: str, artifact_kwargs: Dict[str, Any]) -> str:
+        name = cls.__name__
+        start = time.monotonic()
+        for attempt in range(1, self._retry.attempts + 1):
             try:
-                # Instantiate and update directly within the worker thread
-                artifact_instance = artifact_cls(**self.artifact_class_kwargs)
-                artifact_instance.update_parquet(**update_kwargs)
-                self.logger.info(f"✅ {name} updated successfully on attempt {attempt}")
-                return name  # Return the name on success
+                with ExitStack() as stack:
+                    inst = cls(**self.artifact_class_kwargs)
+                    inst = stack.enter_context(inst)
+                    inst.update_parquet(period=period, **artifact_kwargs)
+                self.completion_secs[name] = time.monotonic() - start
+                return name
             except Exception as e:
-                self.logger.error(f"Error on {name} attempt {attempt}/{self.retry_attempts}: {e}", exc_info=self.debug)
-                if attempt < self.retry_attempts:
-                    delay = min(self.backoff_base ** (attempt - 1), self.backoff_max)
-                    delay *= 1 + random.uniform(0, self.backoff_jitter)
-                    self.logger.info(f"Sleeping {delay:.1f}s before retrying {name}")
+                if attempt < self._retry.attempts:
+                    delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
+                    delay *= 1 + random.uniform(0, self._retry.jitter)
                     time.sleep(delay)
+                else:
+                    raise RuntimeError(f"{name} failed after {self._retry.attempts} attempts: {e}") from e
-        # If all retries fail, raise an exception to be caught by the main loop
-        raise RuntimeError(f"{name} failed after {self.retry_attempts} attempts.")
+    def update_data(self, period: str, **kwargs: Any) -> None:
+        # Split kwargs to preserve backward compatibility
+        _, artifact_kwargs = self._split_kwargs(kwargs)
-    async def update_data(self, data_type: str, **kwargs: Any) -> None:
-        """
-        Entry point to update all artifacts of a given type using a ThreadPoolExecutor.
-        """
-        self.logger.debug(f"Starting multi-threaded update for '{data_type}' with kwargs={kwargs}")
-        # Reset state for this run
-        self.completion_times.clear()
+        self.completion_secs.clear()
         self.failed.clear()
-        self.original_classes = self.get_artifact_classes(data_type)
-        # Sequentially estimate priorities and sort classes before execution
-        self.logger.debug("Estimating priorities to order tasks...")
-        ordered_classes = sorted(self.original_classes, key=self.estimate_priority)
-        self.logger.debug("Priority estimation complete. Submitting tasks to thread pool.")
-        start_time = time.monotonic()
-        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
-            future_to_class_name = {
-                executor.submit(self._update_artifact_with_retry, cls, kwargs): cls.__name__
-                for cls in ordered_classes
-            }
-            for future in as_completed(future_to_class_name):
-                name = future_to_class_name[future]
+        classes = self._classes_for(period)
+        with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
+            fut2name = {pool.submit(self._run_one, cls, period, dict(artifact_kwargs)): cls.__name__ for cls in classes}
+            for fut in as_completed(fut2name):
+                name = fut2name[fut]
                 try:
-                    # result() will re-raise the exception from the worker if one occurred
-                    future.result()
-                    # If no exception, the task succeeded
-                    self.completion_times[name] = time.monotonic() - start_time
+                    fut.result()
+                    self.logger.info(f"✅ {name} ({period}) in {self.completion_secs[name]:.2f}s")
                 except Exception as e:
-                    self.logger.error(f"✖️ {name} permanently failed. See error log above.")
                     self.failed.append(name)
+                    self.logger.error(f"✖️  {name} permanently failed: {e}")
-        # Log final status
-        total = len(self.original_classes)
-        completed = len(self.completion_times)
-        failed_count = len(self.failed)
-        self.logger.info(f"All artifacts processed: total={total}, completed={completed}, failed={failed_count}")
+        self.logger.info(
+            f"Artifacts processed: total={len(classes)}, "
+            f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
+        )
     def get_update_status(self) -> Dict[str, Any]:
-        """Returns a summary status including completion times."""
-        completed_set = set(self.completion_times.keys())
-        failed_set = set(self.failed)
-        pending_set = {cls.__name__ for cls in self.original_classes} - completed_set - failed_set
+        done = set(self.completion_secs)
+        fail = set(self.failed)
+        all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
         return {
-            'total': len(self.original_classes),
-            'completed': list(completed_set),
-            'failed': list(failed_set),
-            'pending': list(pending_set),
-            'completion_times': self.completion_times,
+            "total": len(all_names),
+            "completed": sorted(done),
+            "failed": sorted(fail),
+            "pending": sorted(all_names - done - fail),
+            "completion_times": dict(self.completion_secs),
         }
-    @staticmethod
-    def format_status_table(status: Dict[str, Any]) -> str:
-        """Formats the status dictionary into a readable table."""
-        lines = [
-            f"Total: {status['total']}",
-            f"Completed: {len(status['completed'])}",
-            f"Failed:    {len(status['failed'])}",
-            f"Pending:   {len(status['pending'])}",
-            "\nPer-artifact completion times (seconds):"
-        ]
-        sorted_times = sorted(status['completion_times'].items(), key=lambda item: item[1], reverse=True)
-        for name, duration in sorted_times:
-            lines.append(f"  - {name:<30}: {duration:.2f}s")
-        if status['failed']:
-            lines.append("\nFailed artifacts:")
-            for name in status['failed']:
-                lines.append(f"  - {name}")
-        return "\n".join(lines)
 import asyncio
-import datetime
 import random
-from typing import Any, Callable, Dict, List, Optional, Type
+from contextlib import ExitStack
+from typing import Any, Callable, Dict, List, Optional, Sequence, Type
 class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
     """
-    Simplified wrapper that updates artifacts concurrently using an asyncio.Semaphore.
-    Features:
-    - Caps concurrency at max_workers via semaphore
-    - Optionally prioritises tasks via a priority function or static method on artifact classes
-    - Tracks per-artifact completion times
-    - Configurable retry/backoff strategy
-    - Optional metrics integration
-    - Thread-safe within a single asyncio loop
-    Usage:
-        wrapper = ArtifactUpdaterMultiWrapper(
-            wrapped_classes={
-                'mydata': [DataArtifactA, DataArtifactB],
-            },
-            max_workers=4,
-            retry_attempts=3,
-            update_timeout_seconds=600,
-            backoff_base=2,
-            backoff_max=60,
-            backoff_jitter=0.1,
-            priority_fn=None,  # or custom
-            metrics_client=None,
-            debug=True,
-            logger=None,
-            artifact_class_kwargs={
-                'fs': my_fs,
-                'parquet_storage_path': 's3://bucket/data',
-                'logger': my_logger,
-                'debug': True,
-            }
-        )
-        await wrapper.update_data('mydata', period='ytd', overwrite=True)
+    Backward-compatible async orchestrator.
+    Public API preserved:
+      • __init__(wrapped_classes, *, max_workers=..., retry_attempts=..., backoff_*=..., update_timeout_seconds=..., priority_fn=..., artifact_class_kwargs=..., **kwargs)
+      • update_data(period, **kwargs)  -> forwards only artifact-friendly kwargs to update_parquet
     """
     def __init__(
         self,
-        wrapped_classes: Dict[str, List[Type]],
+        wrapped_classes: Dict[str, Sequence[Type]],
         *,
         max_workers: int = 3,
         retry_attempts: int = 3,
         update_timeout_seconds: int = 600,
-        backoff_base: int = 2,
-        backoff_max: Optional[int] = 60,
-        backoff_jitter: float = 0.1,
+        backoff_base: float = 2.0,
+        backoff_max: float = 60.0,
+        backoff_jitter: float = 0.15,
         priority_fn: Optional[Callable[[Type], int]] = None,
-        metrics_client: Any = None,
         artifact_class_kwargs: Optional[Dict[str, Any]] = None,
-        **kwargs: Dict[str, Any]
+        **kwargs: Any,
     ) -> None:
         super().__init__(**kwargs)
         self.wrapped_classes = wrapped_classes
-        self.max_workers = max_workers
-        self.retry_attempts = retry_attempts
-        self.update_timeout_seconds = update_timeout_seconds
-        self.backoff_base = backoff_base
-        self.backoff_max = backoff_max
-        self.backoff_jitter = backoff_jitter
+        self.max_workers = int(max_workers)
+        self.update_timeout_seconds = int(update_timeout_seconds)
         self.priority_fn = priority_fn
-        self.metrics_client = metrics_client
-        # Default artifact init kwargs
-        today = datetime.datetime.today() + datetime.timedelta(days=1)
-        default_kwargs = {
-            'parquet_start_date': today.strftime('%Y-%m-%d'),
-            'parquet_end_date': today.strftime('%Y-%m-%d'),
-            'logger': self.logger,
-            'debug': self.debug,
-            'fs': self.fs,
-            'verbose': self.verbose,
+        self._retry = _RetryCfg(
+            attempts=int(retry_attempts),
+            backoff_base=float(backoff_base),
+            backoff_max=float(backoff_max),
+            jitter=float(backoff_jitter),
+        )
+        self.artifact_class_kwargs = {
+            **_default_artifact_kwargs(self),
+            **(artifact_class_kwargs or {}),
         }
-        self.artifact_class_kwargs = artifact_class_kwargs or default_kwargs.copy()
-        # State
-        self.completion_times: Dict[str, float] = {}
+        self.completion_secs: Dict[str, float] = {}
         self.failed: List[str] = []
-        self.original_classes: List[Type] = []
-        self.logger.info("ArtifactUpdaterMultiWrapperAsync initialized")
-    def get_artifact_classes(self, data_type: str) -> List[Type]:
-        """
-        Retrieve artifact classes by data type.
-        """
-        self.logger.info(f"Fetching artifact classes for '{data_type}'")
-        if data_type not in self.wrapped_classes:
-            raise ValueError(f"Unsupported data type: {data_type}")
-        classes = self.wrapped_classes[data_type]
-        self.logger.info(f"Found {len(classes)} artifact classes for '{data_type}'")
-        return classes
+    # ---- internals -----------------------------------------------------------
-    def estimate_priority(self, artifact_cls: Type) -> int:
-        """
-        Determine task priority for ordering. Lower values run first.
-        """
-        name = artifact_cls.__name__
+    def _classes_for(self, period: str) -> List[Type]:
+        try:
+            classes = list(self.wrapped_classes[period])
+        except KeyError:
+            raise ValueError(f"Unsupported period '{period}'.")
+        if not classes:
+            raise ValueError(f"No artifact classes configured for period '{period}'.")
         if self.priority_fn:
             try:
-                pr = self.priority_fn(artifact_cls)
-                self.logger.debug(f"priority_fn for {name}: {pr}")
-                return pr
+                classes.sort(key=self.priority_fn)
             except Exception as e:
-                self.logger.warning(f"priority_fn error for {name}: {e}")
-        try:
-            fs = self.artifact_class_kwargs.get('fs')
-            path = self.artifact_class_kwargs.get('parquet_storage_path')
-            pr=1
-            if hasattr(artifact_cls, 'get_size_estimate'):
-                pr = artifact_cls.get_size_estimate(fs, path)
-            self.logger.debug(f"Estimated priority for {name}: {pr}")
-            return pr
-        except Exception:
-            return 1
-    async def _bounded_update(self, artifact_cls: Type, sem: asyncio.Semaphore, **update_kwargs) -> None:
+                self.logger.warning(f"priority_fn failed; using listed order: {e}")
+        return classes
+    @staticmethod
+    def _split_kwargs(raw: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
         """
-        Wrap update_artifact in a semaphore slot to limit concurrency.
+        Split kwargs into (orchestrator-only, artifact-forwarded).
+        Keeps backward compatibility: callers can pass all knobs in one dict.
         """
+        orch: Dict[str, Any] = {}
+        art: Dict[str, Any] = {}
+        for k, v in raw.items():
+            if k in _ORCHESTRATOR_KEYS:
+                orch[k] = v
+            else:
+                art[k] = v
+        return orch, art
+    async def _run_one(self, cls: Type, period: str, sem: asyncio.Semaphore, artifact_kwargs: Dict[str, Any]) -> None:
+        name = cls.__name__
         async with sem:
-            name = artifact_cls.__name__
-            start = asyncio.get_event_loop().time()
-            self.logger.info(f"Starting update for {name}")
-            try:
-                for attempt in range(1, self.retry_attempts + 1):
-                    try:
-                        artifact = await asyncio.to_thread(
-                            artifact_cls, **self.artifact_class_kwargs
-                        )
-                        await asyncio.wait_for(
-                            asyncio.to_thread(
-                                artifact.update_parquet, **update_kwargs
-                            ),
-                            timeout=self.update_timeout_seconds
-                        )
-                        duration = asyncio.get_event_loop().time() - start
-                        self.completion_times[name] = duration
-                        self.logger.info(f"✅ {name} updated in {duration:.2f}s (attempt {attempt})")
-                        if self.metrics_client:
-                            self.metrics_client.increment('task_succeeded')
-                        return
-                    except asyncio.TimeoutError:
-                        self.logger.warning(f"Timeout on {name}, attempt {attempt}")
-                    except Exception as e:
-                        self.logger.error(f"Error on {name} attempt {attempt}: {e}")
-                    delay = min(self.backoff_base ** (attempt - 1), self.backoff_max)
-                    delay *= 1 + random.uniform(0, self.backoff_jitter)
-                    self.logger.info(f"Sleeping {delay:.1f}s before retrying {name}")
+            start = asyncio.get_running_loop().time()
+            for attempt in range(1, self._retry.attempts + 1):
+                try:
+                    # Run sync context + method in thread
+                    def _sync_block() -> None:
+                        with ExitStack() as stack:
+                            inst = cls(**self.artifact_class_kwargs)
+                            inst = stack.enter_context(inst)
+                            inst.update_parquet(period=period, **artifact_kwargs)
+                    await asyncio.wait_for(
+                        asyncio.to_thread(_sync_block),
+                        timeout=self.update_timeout_seconds,
+                    )
+                    dt_secs = asyncio.get_running_loop().time() - start
+                    self.completion_secs[name] = dt_secs
+                    self.logger.info(f"✅ {name} ({period}) in {dt_secs:.2f}s")
+                    return
+                except asyncio.TimeoutError:
+                    self.logger.warning(f"Timeout in {name} attempt {attempt}/{self._retry.attempts}")
+                except Exception as e:
+                    self.logger.error(
+                        f"{name} attempt {attempt}/{self._retry.attempts} failed: {e}",
+                        exc_info=self.debug,
+                    )
+                if attempt < self._retry.attempts:
+                    delay = min(self._retry.backoff_base ** (attempt - 1), self._retry.backoff_max)
+                    delay *= 1 + random.uniform(0, self._retry.jitter)
                     await asyncio.sleep(delay)
-            except asyncio.CancelledError:
-                self.logger.warning(f"{name} update cancelled")
-                raise
-            # permanent failure
-            self.logger.error(f"✖️  {name} permanently failed after {self.retry_attempts} attempts")
-            if self.metrics_client:
-                self.metrics_client.increment('task_failed')
             self.failed.append(name)
+            self.logger.error(f"✖️  {name} permanently failed")
+    # ---- public API ----------------------------------------------------------
-    async def update_data(self, data_type: str, **kwargs: Any) -> None:
+    async def update_data(self, period: str, **kwargs: Any) -> None:
         """
-        Entry point to update all artifacts of a given type concurrently.
+        Backward-compatible:
+          - Accepts orchestrator knobs in kwargs (we consume them).
+          - Forwards only artifact-friendly kwargs to update_parquet.
         """
-        self.logger.info(f"Starting update_data for '{data_type}' with kwargs={kwargs}")
+        # split kwargs; ignore any runtime attempts to mutate orchestrator config mid-call
+        _, artifact_kwargs = self._split_kwargs(kwargs)
-        # RESET STATE
-        self.completion_times.clear()
+        self.completion_secs.clear()
         self.failed.clear()
-        self.original_classes = self.get_artifact_classes(data_type)
-        # NON-DESTRUCTIVE SORTING
-        ordered = sorted(self.original_classes, key=self.estimate_priority)
+        classes = self._classes_for(period)
         sem = asyncio.Semaphore(self.max_workers)
-        tasks = [
-            asyncio.create_task(self._bounded_update(cls, sem, **kwargs))
-            for cls in ordered
-        ]
+        tasks = [asyncio.create_task(self._run_one(cls, period, sem, dict(artifact_kwargs))) for cls in classes]
-        try:
-            for coro in asyncio.as_completed(tasks):
-                await coro
-        except asyncio.CancelledError:
-            self.logger.warning("update_data was cancelled—aborting remaining retries")
-            for t in tasks:
-                t.cancel()
-            raise
-        finally:
-            total = len(self.original_classes)
-            completed = len(self.completion_times)
-            failed = len(self.failed)
-            self.logger.info(f"All artifacts processed: total={total}, completed={completed}, failed={failed}")
+        for t in asyncio.as_completed(tasks):
+            try:
+                await t
+            except asyncio.CancelledError:
+                for rest in tasks:
+                    rest.cancel()
+                raise
-    def get_update_status(self) -> Dict[str, Any]:
-        """
-        Returns summary status including completion times.
-        """
-        total = len(self.original_classes)
-        completed = set(self.completion_times.keys())
-        failed = set(self.failed)
-        pending = {cls.__name__ for cls in self.original_classes} - completed - failed
+        self.logger.info(
+            f"Artifacts processed: total={len(classes)}, "
+            f"completed={len(self.completion_secs)}, failed={len(self.failed)}"
+        )
+    # Optional helper
+    def get_update_status(self) -> Dict[str, Any]:
+        done = set(self.completion_secs)
+        fail = set(self.failed)
+        all_names = {c.__name__ for v in self.wrapped_classes.values() for c in v}
         return {
-            'total': total,
-            'completed': list(completed),
-            'failed':    list(failed),
-            'pending':   list(pending),
-            'completion_times': self.completion_times,
-        }
-    @staticmethod
-    def format_status_table(status: Dict[str, Any]) -> str:
-        """
-        Formats the status dict into a readable table.
-        """
-        lines = [
-            f"Total: {status['total']}",
-            f"Completed: {len(status['completed'])}  {status['completed']}",
-            f"Failed:    {len(status['failed'])}  {status['failed']}",
-            f"Pending:   {len(status['pending'])}  {status['pending']}",
-            "",
-            "Per-artifact timings:"
-        ]
-        for name, dur in status['completion_times'].items():
-            lines.append(f"  {name}: {dur:.2f}s")
-        return "\n".join(lines)
+            "total": len(all_names),
+            "completed": sorted(done),
+            "failed": sorted(fail),
+            "pending": sorted(all_names - done - fail),
+            "completion_times": dict(self.completion_secs),
+        }

sibi-dst 2025.1.12__py3-none-any.whl → 2025.8.1__py3-none-any.whl

sibi-dst 2025.1.12py3-none-any.whl → 2025.8.1py3-none-any.whl