PyPI - sibi-dst - Versions diffs - 2025.9.9__py3-none-any.whl → 2025.9.11__py3-none-any.whl - Mend

sibi-dst 2025.9.9py3-none-any.whl → 2025.9.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

sibi_dst/df_helper/_artifact_updater_async.py +191 -137
sibi_dst/df_helper/_parquet_artifact.py +6 -326
sibi_dst/df_helper/_parquet_reader.py +2 -1
sibi_dst/df_helper/backends/parquet/_parquet_options.py +24 -2
sibi_dst/utils/boilerplate/__init__.py +5 -3
sibi_dst/utils/boilerplate/base_pipeline.py +14 -29
sibi_dst/utils/business_days.py +19 -51
sibi_dst/utils/clickhouse_writer.py +1 -1
sibi_dst/utils/data_wrapper.py +46 -312
sibi_dst/utils/filepath_generator.py +1 -154
sibi_dst/utils/parquet_saver.py +29 -16
sibi_dst/utils/progress/sse_runner.py +39 -11
sibi_dst/utils/update_planner.py +161 -805
{sibi_dst-2025.9.9.dist-info → sibi_dst-2025.9.11.dist-info}/METADATA +2 -1
{sibi_dst-2025.9.9.dist-info → sibi_dst-2025.9.11.dist-info}/RECORD +16 -16
{sibi_dst-2025.9.9.dist-info → sibi_dst-2025.9.11.dist-info}/WHEEL +0 -0

sibi_dst/utils/business_days.py CHANGED Viewed

@@ -1,19 +1,22 @@
+from __future__ import annotations
 import datetime as dt
 from typing import Any, Dict, Iterable, Optional
-from sibi_dst.utils import Logger
+import dask.dataframe as dd
 import numpy as np
 import pandas as pd
-import dask.dataframe as dd
+from sibi_dst.utils import Logger
 # ---------------- Vectorized helpers (used by Dask map_partitions) ----------------
 def _to_np_days(series: pd.Series) -> np.ndarray:
     """Coerce to numpy datetime64[D] with NaT-safe conversion."""
-    # Use pandas for robust parsing, then cast to date-days
     s = pd.to_datetime(series, errors="coerce")
-    # Convert to numpy datetime64[D] (day precision)
-    return s.values.astype("datetime64[D]")
+    # Return day precision array directly
+    return s.dt.floor("D").to_numpy(dtype="datetime64[D]")
 def _vectorized_busday_count(
@@ -24,8 +27,8 @@ def _vectorized_busday_count(
         weekmask: Optional[str],
         inclusive: bool,
 ) -> pd.Series:
-    start = _to_np_days(part[begin_col])  # numpy datetime64[D]
-    end = _to_np_days(part[end_col])  # numpy datetime64[D]
+    start = _to_np_days(part[begin_col])
+    end = _to_np_days(part[end_col])
     kwargs: Dict[str, Any] = {}
     if holidays:
@@ -38,7 +41,7 @@ def _vectorized_busday_count(
         with np.errstate(invalid="ignore"):
             end_adj = end + np.timedelta64(1, "D")
-    valid = (~pd.isna(start)) & (~pd.isna(end))  # numpy bool mask
+    valid = (~pd.isna(start)) & (~pd.isna(end))
     result = np.full(part.shape[0], np.nan, dtype="float64")
     if valid.any():
         counts = np.busday_count(
@@ -59,8 +62,8 @@ def _vectorized_busday_offset(
         weekmask: Optional[str],
         roll: str,
 ) -> pd.Series:
-    start = _to_np_days(part[start_col])  # numpy datetime64[D]
-    n_days = pd.to_numeric(part[n_days_col], errors="coerce").to_numpy()  # numpy float -> cast later
+    start = _to_np_days(part[start_col])
+    n_days = pd.to_numeric(part[n_days_col], errors="coerce").to_numpy()
     kwargs: Dict[str, Any] = {"roll": roll}
     if holidays:
@@ -68,7 +71,7 @@ def _vectorized_busday_offset(
     if weekmask:
         kwargs["weekmask"] = weekmask
-    valid = (~pd.isna(start)) & (~pd.isna(n_days))  # numpy bool mask
+    valid = (~pd.isna(start)) & (~pd.isna(n_days))
     out = np.full(part.shape[0], np.datetime64("NaT", "ns"), dtype="datetime64[ns]")
     if valid.any():
         offs = np.busday_offset(
@@ -86,26 +89,6 @@ def _vectorized_busday_offset(
 class BusinessDays:
     """
     Business day calculations with custom holidays and optional weekmask.
-    Features
-    - Scalar helpers:
-        - get_business_days_count(begin, end, inclusive=False) -> int
-        - add_business_days(start_date, n_days, roll='forward') -> np.datetime64
-    - Dask DataFrame helpers (vectorized via map_partitions):
-        - calc_business_days_from_df(df, begin_col, end_col, result_col='business_days', inclusive=False)
-        - calc_sla_end_date(df, start_date_col, n_days_col, result_col='sla_end_date', roll='forward')
-    Parameters
-    ----------
-    holiday_list : dict[str, list[str]] | Iterable[str]
-        Either a mapping of year -> [YYYY-MM-DD, ...] or a flat iterable of YYYY-MM-DD strings.
-    logger : Any
-        Logger with .debug/.info/.warning/.error.
-    weekmask : str | None
-        A numpy business day weekmask like '1111100' (Mon–Fri). None means default Mon–Fri.
-        Examples:
-            '1111100' -> Mon-Fri
-            '1111110' -> Mon-Sat
     """
     def __init__(
@@ -119,12 +102,11 @@ class BusinessDays:
         self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
         self.weekmask = weekmask
-        # Normalize holidays to a flat, sorted tuple of 'YYYY-MM-DD'
         if isinstance(holiday_list, dict):
             flat = [d for _, days in sorted(holiday_list.items()) for d in days]
         else:
             flat = list(holiday_list)
-        # Deduplicate while preserving order
         seen = set()
         flat_unique = []
         for d in flat:
@@ -142,7 +124,6 @@ class BusinessDays:
             *,
             inclusive: bool = False,
     ) -> int:
-        """Business days between two dates. If inclusive=True, include the end date."""
         b = pd.to_datetime(begin_date).date()
         e = pd.to_datetime(end_date).date()
@@ -153,11 +134,11 @@ class BusinessDays:
             kwargs["weekmask"] = self.weekmask
         if inclusive:
-            e_np = np.datetime64(e) + np.timedelta64(1, "D")
+            e_np = np.datetime64(e, "D") + np.timedelta64(1, "D")
         else:
-            e_np = np.datetime64(e)
+            e_np = np.datetime64(e, "D")
-        val = int(np.busday_count(np.datetime64(b), e_np, **kwargs))
+        val = int(np.busday_count(np.datetime64(b, "D"), e_np, **kwargs))
         return val
     def add_business_days(
@@ -167,11 +148,6 @@ class BusinessDays:
             *,
             roll: str = "forward",
     ) -> np.datetime64:
-        """
-        Add (or subtract) business days to a date. Returns numpy datetime64[D].
-        roll: {'forward','backward','following','preceding','modifiedfollowing',
-               'modifiedpreceding','nat'}
-        """
         s = pd.to_datetime(start_date).date()
         kwargs: Dict[str, Any] = {"roll": roll}
         if self.holidays:
@@ -179,7 +155,7 @@ class BusinessDays:
         if self.weekmask:
             kwargs["weekmask"] = self.weekmask
-        return np.busday_offset(np.datetime64(s), int(n_days), **kwargs)
+        return np.busday_offset(np.datetime64(s, "D"), int(n_days), **kwargs)
     # -------- Dask API --------
@@ -192,10 +168,6 @@ class BusinessDays:
             *,
             inclusive: bool = False,
     ) -> dd.DataFrame:
-        """
-        Vectorized business-day difference between two date columns.
-        Produces float64 (NaN where either side is missing).
-        """
         missing = {begin_date_col, end_date_col} - set(df.columns)
         if missing:
             self.logger.error(f"Missing columns: {missing}")
@@ -224,10 +196,6 @@ class BusinessDays:
             *,
             roll: str = "forward",
     ) -> dd.DataFrame:
-        """
-        Vectorized business-day offset for SLA end date.
-        Produces datetime64[ns] with NaT where invalid.
-        """
         missing = {start_date_col, n_days_col} - set(df.columns)
         if missing:
             self.logger.error(f"Missing columns: {missing}")

sibi_dst/utils/clickhouse_writer.py CHANGED Viewed

@@ -224,7 +224,7 @@ class ClickHouseWriter(ManagedResource):
     def _default_engine_sql(self) -> str:
         # minimal MergeTree clause; quote order_by safely
         ob = self.order_by if self.order_by.startswith("(") else f"(`{self.order_by}`)"
-        return f"ENGINE = MergeTree ORDER BY {ob}"
+        return f"ENGINE = MergeTree ORDER BY {ob} SETTINGS allow_nullable_key = 1"
     # ------------- partition write -------------

sibi_dst/utils/data_wrapper.py CHANGED Viewed

@@ -37,7 +37,6 @@ class DataWrapper(ManagedResource):
         dataclass: Type,
         date_field: str,
         data_path: str,
-        parquet_filename: str,
         class_params: Optional[Dict] = None,
         load_params: Optional[Dict] = None,
         show_progress: bool = False,
@@ -50,7 +49,7 @@ class DataWrapper(ManagedResource):
         self.dataclass: Type = dataclass
         self.date_field: str = date_field
         self.data_path: str = self._ensure_forward_slash(data_path)
-        self.parquet_filename: str = parquet_filename
+        self.partition_on_date: bool = True # Assume Hive-style date partitioning by default
         if self.fs is None:
             raise ValueError("DataWrapper requires a File system (fs) to be provided.")
@@ -282,16 +281,23 @@ class DataWrapper(ManagedResource):
     def _process_single_date(self, date: datetime.date):
         """Process a single date: load, save to Parquet."""
         # --- 1. Setup paths and logging ---
-        path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
-        log_extra = self._log_extra(date_context=date.isoformat())
-        self.logger.debug(f"Processing date {date.isoformat()} for {path}", extra=log_extra)
+        path = self.data_path.rstrip("/")+"/"
+        if not self.partition_on_date:
+            # not a Hive-style partitioned path
+            path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
+            log_extra = self._log_extra(date_context=date.isoformat())
+            self.logger.debug(f"Processing date {date.isoformat()} for legacy {path}", extra=log_extra)
+        else :
+            # Hive-style partitioned path
+            log_extra = self._log_extra(date_context=date.isoformat(), partition_on=self.date_field)
+            self.logger.debug(f"Processing date {date.isoformat()} for partitioned {self.data_path} with hive-style partitions", extra=log_extra)
         # --- 2. Check if date/path should be skipped ---
         if (self.update_planner and path in self.update_planner.skipped and
                 getattr(self.update_planner, 'ignore_missing', False)):
             self.logger.debug(f"Skipping {date} as it exists in the skipped list", extra=log_extra)
             return
-        full_path = f"{path}{self.parquet_filename}"
+        self.logger.debug(f"Processing date {date.isoformat()} for {path}", extra=log_extra)
         # --- 3. Timing ---
         overall_start = time.perf_counter()
@@ -326,39 +332,44 @@ class DataWrapper(ManagedResource):
                                 self.mmanifest.record(full_path=path)
                             except Exception as e:
                                 self.logger.error(f"Failed to record missing path {path}: {e}", extra=log_extra)
-                        self.logger.info(f"No data found for {full_path}. Logged to missing manifest.", extra=log_extra)
+                        self.logger.info(f"No data found for {path}. Logged to missing manifest.", extra=log_extra)
                         return # Done for this date
                     if total_records < 0:
-                        self.logger.warning(f"Negative record count ({total_records}) for {full_path}. Proceeding.", extra=log_extra)
+                        self.logger.warning(f"Negative record count ({total_records}) for {path}. Proceeding.", extra=log_extra)
                         # Continue processing even with negative count
-                # --- 6. Save to Parquet ---
-                save_start = time.perf_counter()
-                parquet_params = {
-                    "df_result": df,
-                    "parquet_storage_path": path,
-                    "fs": self.fs,
-                    "logger": self.logger,
-                    "debug": self.debug,
-                    "verbose": self.verbose,
-                }
-                self.logger.debug(f"{self.dataclass.__name__} saving to parquet started...", extra=log_extra)
-                with ParquetSaver(**parquet_params) as ps:
-                    ps.save_to_parquet(self.parquet_filename, overwrite=True)
-                save_time = time.perf_counter() - save_start
-                self.logger.debug(f"Parquet saving for {date} completed in {save_time:.2f}s", extra=log_extra)
-                # --- 7. Benchmarking ---
-                total_time = time.perf_counter() - overall_start
-                self.benchmarks[date] = {
-                    "load_duration": load_time,
-                    "save_duration": save_time,
-                    "total_duration": total_time,
-                }
-                # --- 8. Log Success ---
-                self._log_success(date, total_time, full_path)
+                    # --- 6. Save to Parquet ---
+                    save_start = time.perf_counter()
+                    parquet_params = {
+                        "df_result": df,
+                        "parquet_storage_path": path,
+                        "fs": self.fs,
+                        "logger": self.logger,
+                        "debug": self.debug,
+                        "verbose": self.verbose,
+                    }
+                    if self.partition_on_date:
+                        df["partition_date"] = df[self.date_field].dt.date.astype(str)
+                        parquet_params["partition_on"] = ["partition_date"]
+                    self.logger.debug(f"{self.dataclass.__name__} saving to parquet started...", extra=log_extra)
+                    with ParquetSaver(**parquet_params) as ps:
+                        ps.save_to_parquet()
+                    save_time = time.perf_counter() - save_start
+                    self.logger.debug(f"Parquet saving for {date} completed in {save_time:.2f}s", extra=log_extra)
+                    # --- 7. Benchmarking ---
+                    total_time = time.perf_counter() - overall_start
+                    self.benchmarks[date] = {
+                        "load_duration": load_time,
+                        "save_duration": save_time,
+                        "total_duration": total_time,
+                    }
+                    # --- 8. Log Success ---
+                    self._log_success(date, total_time, path)
         except Exception as e:
             # --- 9. Handle Errors ---
@@ -397,280 +408,3 @@ class DataWrapper(ManagedResource):
         except Exception as e:
             self.logger.error(f"Error generating benchmark summary: {e}", extra=self.logger_extra)
-# import datetime
-# import random
-# import threading
-# import time
-# from concurrent.futures import ThreadPoolExecutor, as_completed
-# from typing import Type, Any, Dict, Optional, Union, List, ClassVar
-#
-# import pandas as pd
-# from tqdm import tqdm
-#
-# from . import ManagedResource
-# from .parquet_saver import ParquetSaver
-#
-#
-# class DataWrapper(ManagedResource):
-#     DEFAULT_PRIORITY_MAP: ClassVar[Dict[str, int]] = {
-#         "overwrite": 1,
-#         "missing_in_history": 2,
-#         "existing_but_stale": 3,
-#         "missing_outside_history": 4,
-#         "file_is_recent": 0,
-#     }
-#     DEFAULT_MAX_AGE_MINUTES: int = 1440
-#     DEFAULT_HISTORY_DAYS_THRESHOLD: int = 30
-#
-#     logger_extra = {"sibi_dst_component": __name__}
-#
-#     def __init__(
-#         self,
-#         dataclass: Type,
-#         date_field: str,
-#         data_path: str,
-#         parquet_filename: str,
-#         class_params: Optional[Dict] = None,
-#         load_params: Optional[Dict] = None,
-#         show_progress: bool = False,
-#         timeout: float = 30,
-#         max_threads: int = 3,
-#         **kwargs: Any,
-#     ):
-#         super().__init__(**kwargs)
-#         self.dataclass = dataclass
-#         self.date_field = date_field
-#         self.data_path = self._ensure_forward_slash(data_path)
-#         self.parquet_filename = parquet_filename
-#         if self.fs is None:
-#             raise ValueError("DataWrapper requires a File system (fs) to be provided.")
-#         self.show_progress = show_progress
-#         self.timeout = timeout
-#         self.max_threads = max_threads
-#         self.class_params = class_params or {
-#             "debug": self.debug,
-#             "logger": self.logger,
-#             "fs": self.fs,
-#             "verbose": self.verbose,
-#         }
-#         self.load_params = load_params or {}
-#
-#         self._lock = threading.Lock()
-#         self.processed_dates: List[datetime.date] = []
-#         self.benchmarks: Dict[datetime.date, Dict[str, float]] = {}
-#         self.mmanifest = kwargs.get("mmanifest", None)
-#         self.update_planner = kwargs.get("update_planner", None)
-#
-#         # --- NEW: stop gate tripped during cleanup/interrupt to block further scheduling/retries
-#         self._stop_event = threading.Event()
-#         self.logger_extra.update({"action_module_name": "data_wrapper", "dataclass": self.dataclass.__name__})
-#
-#     # ensure manifest is saved on context exit
-#     def __exit__(self, exc_type, exc_val, exc_tb):
-#         if self.mmanifest:
-#             self.mmanifest.save()
-#         super().__exit__(exc_type, exc_val, exc_tb)
-#         return False
-#
-#     # --- NEW: trip stop gate during class-specific cleanup (close/aclose/finalizer path)
-#     def _cleanup(self) -> None:
-#         self._stop_event.set()
-#
-#     @staticmethod
-#     def _convert_to_date(date: Union[datetime.date, str]) -> datetime.date:
-#         if isinstance(date, datetime.date):
-#             return date
-#         try:
-#             return pd.to_datetime(date).date()
-#         except ValueError as e:
-#             raise ValueError(f"Error converting {date} to datetime: {e}")
-#
-#     @staticmethod
-#     def _ensure_forward_slash(path: str) -> str:
-#         return path.rstrip("/") + "/"
-#
-#     def process(
-#         self,
-#         max_retries: int = 3,
-#         backoff_base: float = 2.0,
-#         backoff_jitter: float = 0.1,
-#         backoff_max: float = 60.0,
-#     ):
-#         """
-#         Execute the update plan with concurrency, retries and exponential backoff.
-#         Stops scheduling immediately if closed or interrupted (Ctrl-C).
-#         """
-#         overall_start = time.perf_counter()
-#         tasks = list(self.update_planner.get_tasks_by_priority())
-#         if not tasks:
-#             self.logger.info("No updates required based on the current plan.")
-#             return
-#
-#         if self.update_planner.show_progress:
-#             self.update_planner.show_update_plan()
-#
-#         try:
-#             for priority, dates in tasks:
-#                 if self._stop_event.is_set():
-#                     break
-#                 self._execute_task_batch(priority, dates, max_retries, backoff_base, backoff_jitter, backoff_max)
-#         except KeyboardInterrupt:
-#             self.logger.warning("KeyboardInterrupt received — stopping scheduling and shutting down.", extra=self.logger_extra)
-#             self._stop_event.set()
-#             raise
-#         finally:
-#             total_time = time.perf_counter() - overall_start
-#             if self.processed_dates:
-#                 count = len(self.processed_dates)
-#                 self.logger.info(f"Processed {count} dates in {total_time:.1f}s (avg {total_time / count:.1f}s/date)", extra=self.logger_extra)
-#                 if self.update_planner.show_progress:
-#                     self.show_benchmark_summary()
-#
-#     def _execute_task_batch(
-#         self,
-#         priority: int,
-#         dates: List[datetime.date],
-#         max_retries: int,
-#         backoff_base: float,
-#         backoff_jitter: float,
-#         backoff_max: float,
-#     ):
-#         desc = f"Processing {self.dataclass.__name__}, priority: {priority}"
-#         max_thr = min(len(dates), self.max_threads)
-#         self.logger.info(f"Executing {len(dates)} tasks with priority {priority} using {max_thr} threads.", extra=self.logger_extra)
-#
-#         # Use explicit try/finally so we can request cancel of queued tasks on teardown
-#         executor = ThreadPoolExecutor(max_workers=max_thr, thread_name_prefix="datawrapper")
-#         try:
-#             futures = {}
-#             for date in dates:
-#                 if self._stop_event.is_set():
-#                     break
-#                 try:
-#                     fut = executor.submit(
-#                         self._process_date_with_retry, date, max_retries, backoff_base, backoff_jitter, backoff_max
-#                     )
-#                     futures[fut] = date
-#                 except RuntimeError as e:
-#                     # tolerate race: executor shutting down
-#                     if "cannot schedule new futures after shutdown" in str(e).lower():
-#                         self.logger.warning("Executor is shutting down; halting new submissions for this batch.", extra=self.logger_extra)
-#                         break
-#                     raise
-#
-#             iterator = as_completed(futures)
-#             if self.show_progress:
-#                 iterator = tqdm(iterator, total=len(futures), desc=desc)
-#
-#             for future in iterator:
-#                 try:
-#                     future.result(timeout=self.timeout)
-#                 except Exception as e:
-#                     self.logger.error(f"Permanent failure for {futures[future]}: {e}", extra=self.logger_extra)
-#         finally:
-#             # Python 3.9+: cancel_futures prevents queued tasks from starting
-#             executor.shutdown(wait=True, cancel_futures=True)
-#
-#     def _process_date_with_retry(
-#         self,
-#         date: datetime.date,
-#         max_retries: int,
-#         backoff_base: float,
-#         backoff_jitter: float,
-#         backoff_max: float,
-#     ):
-#         for attempt in range(max_retries):
-#             # --- NEW: bail out quickly if shutdown/interrupt began
-#             if self._stop_event.is_set():
-#                 raise RuntimeError("shutting_down")
-#
-#             try:
-#                 self._process_single_date(date)
-#                 return
-#             except Exception as e:
-#                 if attempt < max_retries - 1 and not self._stop_event.is_set():
-#                     base_delay = min(backoff_base ** attempt, backoff_max)
-#                     delay = base_delay * (1 + random.uniform(0.0, max(0.0, backoff_jitter)))
-#                     self.logger.warning(
-#                         f"Retry {attempt + 1}/{max_retries} for {date}: {e} (sleep {delay:.2f}s)",
-#                         extra=self.logger_extra
-#                     )
-#                     time.sleep(delay)
-#                 else:
-#                     self.logger.error(f"Failed processing {date} after {max_retries} attempts.", extra=self.logger_extra)
-#                     raise
-#
-#     def _process_single_date(self, date: datetime.date):
-#         path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
-#         self.logger.debug(f"Processing date {date.isoformat()} for {path}", extra=self.logger_extra)
-#         if path in self.update_planner.skipped and self.update_planner.ignore_missing:
-#             self.logger.debug(f"Skipping {date} as it exists in the skipped list", extra=self.logger_extra)
-#             return
-#         full_path = f"{path}{self.parquet_filename}"
-#
-#         overall_start = time.perf_counter()
-#         try:
-#             load_start = time.perf_counter()
-#             date_filter = {f"{self.date_field}__date": {date.isoformat()}}
-#             self.logger.debug(f"{self.dataclass.__name__} is loading data for {date} with filter: {date_filter}", extra=self.logger_extra)
-#
-#             local_load_params = self.load_params.copy()
-#             local_load_params.update(date_filter)
-#
-#             with self.dataclass(**self.class_params) as local_class_instance:
-#                 df = local_class_instance.load(**local_load_params)  # expected to be Dask
-#                 load_time = time.perf_counter() - load_start
-#
-#                 if hasattr(local_class_instance, "total_records"):
-#                     total_records = int(local_class_instance.total_records)
-#                     self.logger.debug(f"Total records loaded: {total_records}", extra=self.logger_extra)
-#
-#                     if total_records == 0:
-#                         if self.mmanifest:
-#                             self.mmanifest.record(full_path=path)
-#                         self.logger.info(f"No data found for {full_path}. Logged to missing manifest.", extra=self.logger_extra)
-#                         return
-#
-#                     if total_records < 0:
-#                         self.logger.warning(f"Negative record count ({total_records}) for {full_path}.", extra=self.logger_extra)
-#                         return
-#
-#                 save_start = time.perf_counter()
-#                 parquet_params = {
-#                     "df_result": df,
-#                     "parquet_storage_path": path,
-#                     "fs": self.fs,
-#                     "logger": self.logger,
-#                     "debug": self.debug,
-#                 }
-#                 with ParquetSaver(**parquet_params) as ps:
-#                     ps.save_to_parquet(self.parquet_filename, overwrite=True)
-#                 save_time = time.perf_counter() - save_start
-#
-#                 total_time = time.perf_counter() - overall_start
-#                 self.benchmarks[date] = {
-#                     "load_duration": load_time,
-#                     "save_duration": save_time,
-#                     "total_duration": total_time,
-#                 }
-#                 self._log_success(date, total_time, full_path)
-#
-#         except Exception as e:
-#             self._log_failure(date, e)
-#             raise
-#
-#     def _log_success(self, date: datetime.date, duration: float, path: str):
-#         self.logger.info(f"Completed {date} in {duration:.1f}s | Saved to {path}", extra=self.logger_extra)
-#         self.processed_dates.append(date)
-#
-#     def _log_failure(self, date: datetime.date, error: Exception):
-#         self.logger.error(f"Failed processing {date}: {error}", extra=self.logger_extra)
-#
-#     def show_benchmark_summary(self):
-#         if not self.benchmarks:
-#             self.logger.info("No benchmarking data to show", extra=self.logger_extra)
-#             return
-#         df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
-#         df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
-#         self.logger.info(f"Benchmark Summary:\n {self.dataclass.__name__}\n" + df_bench.to_string(), extra=self.logger_extra)
-#

sibi-dst 2025.9.9__py3-none-any.whl → 2025.9.11__py3-none-any.whl

sibi-dst 2025.9.9py3-none-any.whl → 2025.9.11py3-none-any.whl