sibi-dst 2025.1.13__py3-none-any.whl → 2025.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. sibi_dst/__init__.py +7 -1
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
  3. sibi_dst/df_helper/_df_helper.py +417 -117
  4. sibi_dst/df_helper/_parquet_artifact.py +255 -283
  5. sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
  7. sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  8. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
  9. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
  10. sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  11. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
  12. sibi_dst/osmnx_helper/route_path_builder.py +45 -46
  13. sibi_dst/utils/base.py +302 -96
  14. sibi_dst/utils/clickhouse_writer.py +472 -206
  15. sibi_dst/utils/data_utils.py +139 -186
  16. sibi_dst/utils/data_wrapper.py +317 -73
  17. sibi_dst/utils/date_utils.py +1 -0
  18. sibi_dst/utils/df_utils.py +193 -213
  19. sibi_dst/utils/file_utils.py +3 -2
  20. sibi_dst/utils/filepath_generator.py +314 -152
  21. sibi_dst/utils/log_utils.py +581 -242
  22. sibi_dst/utils/manifest_manager.py +60 -76
  23. sibi_dst/utils/parquet_saver.py +33 -27
  24. sibi_dst/utils/phone_formatter.py +88 -95
  25. sibi_dst/utils/update_planner.py +180 -178
  26. sibi_dst/utils/webdav_client.py +116 -166
  27. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
  28. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +29 -27
  29. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
@@ -1,9 +1,11 @@
1
1
  import datetime
2
2
  import threading
3
3
  import time
4
+ import random
4
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
6
  from typing import Type, Any, Dict, Optional, Union, List, ClassVar
6
7
 
8
+ import dask.dataframe as dd
7
9
  import pandas as pd
8
10
  from tqdm import tqdm
9
11
 
@@ -17,23 +19,23 @@ class DataWrapper(ManagedResource):
17
19
  "missing_in_history": 2,
18
20
  "existing_but_stale": 3,
19
21
  "missing_outside_history": 4,
20
- "file_is_recent": 0
22
+ "file_is_recent": 0,
21
23
  }
22
24
  DEFAULT_MAX_AGE_MINUTES: int = 1440
23
25
  DEFAULT_HISTORY_DAYS_THRESHOLD: int = 30
24
26
 
25
27
  def __init__(
26
- self,
27
- dataclass: Type,
28
- date_field: str,
29
- data_path: str,
30
- parquet_filename: str,
31
- class_params: Optional[Dict] = None,
32
- load_params: Optional[Dict] = None,
33
- show_progress: bool = False,
34
- timeout: float = 30,
35
- max_threads: int = 3,
36
- **kwargs: Any,
28
+ self,
29
+ dataclass: Type,
30
+ date_field: str,
31
+ data_path: str,
32
+ parquet_filename: str,
33
+ class_params: Optional[Dict] = None,
34
+ load_params: Optional[Dict] = None,
35
+ show_progress: bool = False,
36
+ timeout: float = 30,
37
+ max_threads: int = 3,
38
+ **kwargs: Any,
37
39
  ):
38
40
  super().__init__(**kwargs)
39
41
  self.dataclass = dataclass
@@ -41,15 +43,15 @@ class DataWrapper(ManagedResource):
41
43
  self.data_path = self._ensure_forward_slash(data_path)
42
44
  self.parquet_filename = parquet_filename
43
45
  if self.fs is None:
44
- raise ValueError("Datawrapper requires a File system (fs) to be provided .")
46
+ raise ValueError("DataWrapper requires a File system (fs) to be provided.")
45
47
  self.show_progress = show_progress
46
48
  self.timeout = timeout
47
49
  self.max_threads = max_threads
48
50
  self.class_params = class_params or {
49
- 'debug': self.debug,
50
- 'logger': self.logger,
51
- 'fs': self.fs,
52
- 'verbose': self.verbose,
51
+ "debug": self.debug,
52
+ "logger": self.logger,
53
+ "fs": self.fs,
54
+ "verbose": self.verbose,
53
55
  }
54
56
  self.load_params = load_params or {}
55
57
 
@@ -60,7 +62,6 @@ class DataWrapper(ManagedResource):
60
62
  self.update_planner = kwargs.get("update_planner", None)
61
63
 
62
64
  def __exit__(self, exc_type, exc_val, exc_tb):
63
- """Context manager exit"""
64
65
  if self.mmanifest:
65
66
  self.mmanifest.save()
66
67
  super().__exit__(exc_type, exc_val, exc_tb)
@@ -77,10 +78,24 @@ class DataWrapper(ManagedResource):
77
78
 
78
79
  @staticmethod
79
80
  def _ensure_forward_slash(path: str) -> str:
80
- return path.rstrip('/') + '/'
81
+ return path.rstrip("/") + "/"
81
82
 
82
- def process(self, max_retries: int = 3):
83
- """Process updates with priority-based execution, retries, benchmarking and progress updates"""
83
+ def process(
84
+ self,
85
+ max_retries: int = 3,
86
+ backoff_base: float = 2.0,
87
+ backoff_jitter: float = 0.1,
88
+ backoff_max: float = 60.0,
89
+ ):
90
+ """
91
+ Execute the update plan with concurrency, retries and exponential backoff.
92
+
93
+ Args:
94
+ max_retries: attempts per date.
95
+ backoff_base: base for exponential backoff (delay = base**attempt).
96
+ backoff_jitter: multiplicative jitter factor in [0, backoff_jitter].
97
+ backoff_max: maximum backoff seconds per attempt (before jitter).
98
+ """
84
99
  overall_start = time.perf_counter()
85
100
  tasks = list(self.update_planner.get_tasks_by_priority())
86
101
  if not tasks:
@@ -91,7 +106,7 @@ class DataWrapper(ManagedResource):
91
106
  self.update_planner.show_update_plan()
92
107
 
93
108
  for priority, dates in tasks:
94
- self._execute_task_batch(priority, dates, max_retries)
109
+ self._execute_task_batch(priority, dates, max_retries, backoff_base, backoff_jitter, backoff_max)
95
110
 
96
111
  total_time = time.perf_counter() - overall_start
97
112
  if self.processed_dates:
@@ -100,14 +115,26 @@ class DataWrapper(ManagedResource):
100
115
  if self.update_planner.show_progress:
101
116
  self.show_benchmark_summary()
102
117
 
103
- def _execute_task_batch(self, priority: int, dates: List[datetime.date], max_retries: int):
104
- """Executes a single batch of tasks (dates) using a thread pool."""
118
+ def _execute_task_batch(
119
+ self,
120
+ priority: int,
121
+ dates: List[datetime.date],
122
+ max_retries: int,
123
+ backoff_base: float,
124
+ backoff_jitter: float,
125
+ backoff_max: float,
126
+ ):
105
127
  desc = f"Processing {self.dataclass.__name__}, priority: {priority}"
106
128
  max_thr = min(len(dates), self.max_threads)
107
129
  self.logger.info(f"Executing {len(dates)} tasks with priority {priority} using {max_thr} threads.")
108
130
 
109
131
  with ThreadPoolExecutor(max_workers=max_thr) as executor:
110
- futures = {executor.submit(self._process_date_with_retry, date, max_retries): date for date in dates}
132
+ futures = {
133
+ executor.submit(
134
+ self._process_date_with_retry, date, max_retries, backoff_base, backoff_jitter, backoff_max
135
+ ): date
136
+ for date in dates
137
+ }
111
138
  iterator = as_completed(futures)
112
139
  if self.show_progress:
113
140
  iterator = tqdm(iterator, total=len(futures), desc=desc)
@@ -118,22 +145,30 @@ class DataWrapper(ManagedResource):
118
145
  except Exception as e:
119
146
  self.logger.error(f"Permanent failure for {futures[future]}: {e}")
120
147
 
121
- def _process_date_with_retry(self, date: datetime.date, max_retries: int):
122
- """Wrapper to apply retry logic to single date processing."""
148
+ def _process_date_with_retry(
149
+ self,
150
+ date: datetime.date,
151
+ max_retries: int,
152
+ backoff_base: float,
153
+ backoff_jitter: float,
154
+ backoff_max: float,
155
+ ):
123
156
  for attempt in range(max_retries):
124
157
  try:
125
158
  self._process_single_date(date)
126
159
  return
127
160
  except Exception as e:
128
161
  if attempt < max_retries - 1:
129
- self.logger.warning(f"Retry {attempt + 1}/{max_retries} for {date}: {e}")
130
- time.sleep(2 ** attempt) # Exponential backoff
162
+ base_delay = min(backoff_base ** attempt, backoff_max)
163
+ delay = base_delay * (1 + random.uniform(0.0, max(0.0, backoff_jitter)))
164
+ self.logger.warning(
165
+ f"Retry {attempt + 1}/{max_retries} for {date}: {e} (sleep {delay:.2f}s)"
166
+ )
167
+ time.sleep(delay)
131
168
  else:
132
169
  self.logger.error(f"Failed processing {date} after {max_retries} attempts.")
133
- # raise
134
170
 
135
171
  def _process_single_date(self, date: datetime.date):
136
- """Core date processing logic with load/save timing and thread reporting"""
137
172
  path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
138
173
  self.logger.debug(f"Processing date {date.isoformat()} for {path}")
139
174
  if path in self.update_planner.skipped and self.update_planner.ignore_missing:
@@ -141,74 +176,283 @@ class DataWrapper(ManagedResource):
141
176
  return
142
177
  full_path = f"{path}{self.parquet_filename}"
143
178
 
144
- # thread_name = threading.current_thread().name
145
- # self.logger.debug(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
146
-
147
179
  overall_start = time.perf_counter()
148
180
  try:
149
181
  load_start = time.perf_counter()
150
182
  date_filter = {f"{self.date_field}__date": {date.isoformat()}}
151
183
  self.logger.debug(f"Loading data for {date} with filter: {date_filter}")
152
- # Load data using the dataclass with the provided date filter
153
- # Create a copy to avoid mutating the shared instance dictionary
184
+
154
185
  local_load_params = self.load_params.copy()
155
186
  local_load_params.update(date_filter)
187
+
156
188
  with self.dataclass(**self.class_params) as local_class_instance:
157
- df = local_class_instance.load(**local_load_params)
189
+ df = local_class_instance.load(**local_load_params) # expected to be Dask
158
190
  load_time = time.perf_counter() - load_start
159
191
 
160
192
  if hasattr(local_class_instance, "total_records"):
161
- self.logger.debug(
162
- f"Total records loaded by {local_class_instance.__class__.__name__}: {local_class_instance.total_records}")
163
- if int(local_class_instance.total_records) == 0: # If no records were loaded but not due to an error
193
+ total_records = int(local_class_instance.total_records)
194
+ self.logger.debug(f"Total records loaded: {total_records}")
195
+
196
+ if total_records == 0:
164
197
  if self.mmanifest:
165
- self.mmanifest.record(
166
- full_path=path
167
- )
198
+ self.mmanifest.record(full_path=path)
168
199
  self.logger.info(f"No data found for {full_path}. Logged to missing manifest.")
169
- elif int(local_class_instance.total_records) < 0:
170
- self.logger.warning(
171
- f"Negative record count ({local_class_instance.total_records}) for {full_path}. "
172
- "This may indicate an error in the data loading process."
173
- )
174
- else:
175
- save_start = time.perf_counter()
176
- parquet_params ={
177
- "df_result": df,
178
- "parquet_storage_path": path,
179
- "fs": self.fs,
180
- "logger": self.logger,
181
- "debug": self.debug,
182
- }
183
- with ParquetSaver(**parquet_params) as ps:
184
- ps.save_to_parquet(self.parquet_filename, overwrite=True)
185
- save_time = time.perf_counter() - save_start
186
-
187
- total_time = time.perf_counter() - overall_start
188
- self.benchmarks[date] = {
189
- "load_duration": load_time,
190
- "save_duration": save_time,
191
- "total_duration": total_time
192
- }
193
- self._log_success(date, total_time, full_path)
200
+ return
201
+
202
+ if total_records < 0:
203
+ self.logger.warning(f"Negative record count ({total_records}) for {full_path}.")
204
+ return
205
+
206
+ save_start = time.perf_counter()
207
+ parquet_params = {
208
+ "df_result": df,
209
+ "parquet_storage_path": path,
210
+ "fs": self.fs,
211
+ "logger": self.logger,
212
+ "debug": self.debug,
213
+ }
214
+ with ParquetSaver(**parquet_params) as ps:
215
+ ps.save_to_parquet(self.parquet_filename, overwrite=True)
216
+ save_time = time.perf_counter() - save_start
217
+
218
+ total_time = time.perf_counter() - overall_start
219
+ self.benchmarks[date] = {
220
+ "load_duration": load_time,
221
+ "save_duration": save_time,
222
+ "total_duration": total_time,
223
+ }
224
+ self._log_success(date, total_time, full_path)
225
+
194
226
  except Exception as e:
195
227
  self._log_failure(date, e)
196
228
  raise
197
229
 
198
230
  def _log_success(self, date: datetime.date, duration: float, path: str):
199
- msg = f"Completed {date} in {duration:.1f}s | Saved to {path}"
200
- self.logger.info(msg)
231
+ self.logger.info(f"Completed {date} in {duration:.1f}s | Saved to {path}")
201
232
  self.processed_dates.append(date)
202
233
 
203
234
  def _log_failure(self, date: datetime.date, error: Exception):
204
- msg = f"Failed processing {date}: {error}"
205
- self.logger.error(msg)
235
+ self.logger.error(f"Failed processing {date}: {error}")
206
236
 
207
237
  def show_benchmark_summary(self):
208
- """Display a summary of load/save timings per date"""
209
238
  if not self.benchmarks:
210
239
  self.logger.info("No benchmarking data to show")
211
240
  return
212
241
  df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
213
242
  df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
214
243
  self.logger.info(f"Benchmark Summary:\n {self.dataclass.__name__}\n" + df_bench.to_string())
244
+
245
+ # import datetime
246
+ # import threading
247
+ # import time
248
+ # from concurrent.futures import ThreadPoolExecutor, as_completed
249
+ # from typing import Type, Any, Dict, Optional, Union, List, ClassVar
250
+ #
251
+ # import pandas as pd
252
+ # from tqdm import tqdm
253
+ #
254
+ # from . import ManagedResource
255
+ # from .parquet_saver import ParquetSaver
256
+ #
257
+ #
258
+ # class DataWrapper(ManagedResource):
259
+ # DEFAULT_PRIORITY_MAP: ClassVar[Dict[str, int]] = {
260
+ # "overwrite": 1,
261
+ # "missing_in_history": 2,
262
+ # "existing_but_stale": 3,
263
+ # "missing_outside_history": 4,
264
+ # "file_is_recent": 0
265
+ # }
266
+ # DEFAULT_MAX_AGE_MINUTES: int = 1440
267
+ # DEFAULT_HISTORY_DAYS_THRESHOLD: int = 30
268
+ #
269
+ # def __init__(
270
+ # self,
271
+ # dataclass: Type,
272
+ # date_field: str,
273
+ # data_path: str,
274
+ # parquet_filename: str,
275
+ # class_params: Optional[Dict] = None,
276
+ # load_params: Optional[Dict] = None,
277
+ # show_progress: bool = False,
278
+ # timeout: float = 30,
279
+ # max_threads: int = 3,
280
+ # **kwargs: Any,
281
+ # ):
282
+ # super().__init__(**kwargs)
283
+ # self.dataclass = dataclass
284
+ # self.date_field = date_field
285
+ # self.data_path = self._ensure_forward_slash(data_path)
286
+ # self.parquet_filename = parquet_filename
287
+ # if self.fs is None:
288
+ # raise ValueError("Datawrapper requires a File system (fs) to be provided .")
289
+ # self.show_progress = show_progress
290
+ # self.timeout = timeout
291
+ # self.max_threads = max_threads
292
+ # self.class_params = class_params or {
293
+ # 'debug': self.debug,
294
+ # 'logger': self.logger,
295
+ # 'fs': self.fs,
296
+ # 'verbose': self.verbose,
297
+ # }
298
+ # self.load_params = load_params or {}
299
+ #
300
+ # self._lock = threading.Lock()
301
+ # self.processed_dates: List[datetime.date] = []
302
+ # self.benchmarks: Dict[datetime.date, Dict[str, float]] = {}
303
+ # self.mmanifest = kwargs.get("mmanifest", None)
304
+ # self.update_planner = kwargs.get("update_planner", None)
305
+ #
306
+ # def __exit__(self, exc_type, exc_val, exc_tb):
307
+ # """Context manager exit"""
308
+ # if self.mmanifest:
309
+ # self.mmanifest.save()
310
+ # super().__exit__(exc_type, exc_val, exc_tb)
311
+ # return False
312
+ #
313
+ # @staticmethod
314
+ # def _convert_to_date(date: Union[datetime.date, str]) -> datetime.date:
315
+ # if isinstance(date, datetime.date):
316
+ # return date
317
+ # try:
318
+ # return pd.to_datetime(date).date()
319
+ # except ValueError as e:
320
+ # raise ValueError(f"Error converting {date} to datetime: {e}")
321
+ #
322
+ # @staticmethod
323
+ # def _ensure_forward_slash(path: str) -> str:
324
+ # return path.rstrip('/') + '/'
325
+ #
326
+ # def process(self, max_retries: int = 3):
327
+ # """Process updates with priority-based execution, retries, benchmarking and progress updates"""
328
+ # overall_start = time.perf_counter()
329
+ # tasks = list(self.update_planner.get_tasks_by_priority())
330
+ # if not tasks:
331
+ # self.logger.info("No updates required based on the current plan.")
332
+ # return
333
+ #
334
+ # if self.update_planner.show_progress:
335
+ # self.update_planner.show_update_plan()
336
+ #
337
+ # for priority, dates in tasks:
338
+ # self._execute_task_batch(priority, dates, max_retries)
339
+ #
340
+ # total_time = time.perf_counter() - overall_start
341
+ # if self.processed_dates:
342
+ # count = len(self.processed_dates)
343
+ # self.logger.info(f"Processed {count} dates in {total_time:.1f}s (avg {total_time / count:.1f}s/date)")
344
+ # if self.update_planner.show_progress:
345
+ # self.show_benchmark_summary()
346
+ #
347
+ # def _execute_task_batch(self, priority: int, dates: List[datetime.date], max_retries: int):
348
+ # """Executes a single batch of tasks (dates) using a thread pool."""
349
+ # desc = f"Processing {self.dataclass.__name__}, priority: {priority}"
350
+ # max_thr = min(len(dates), self.max_threads)
351
+ # self.logger.info(f"Executing {len(dates)} tasks with priority {priority} using {max_thr} threads.")
352
+ #
353
+ # with ThreadPoolExecutor(max_workers=max_thr) as executor:
354
+ # futures = {executor.submit(self._process_date_with_retry, date, max_retries): date for date in dates}
355
+ # iterator = as_completed(futures)
356
+ # if self.show_progress:
357
+ # iterator = tqdm(iterator, total=len(futures), desc=desc)
358
+ #
359
+ # for future in iterator:
360
+ # try:
361
+ # future.result(timeout=self.timeout)
362
+ # except Exception as e:
363
+ # self.logger.error(f"Permanent failure for {futures[future]}: {e}")
364
+ #
365
+ # def _process_date_with_retry(self, date: datetime.date, max_retries: int):
366
+ # """Wrapper to apply retry logic to single date processing."""
367
+ # for attempt in range(max_retries):
368
+ # try:
369
+ # self._process_single_date(date)
370
+ # return
371
+ # except Exception as e:
372
+ # if attempt < max_retries - 1:
373
+ # self.logger.warning(f"Retry {attempt + 1}/{max_retries} for {date}: {e}")
374
+ # time.sleep(2 ** attempt) # Exponential backoff
375
+ # else:
376
+ # self.logger.error(f"Failed processing {date} after {max_retries} attempts.")
377
+ # # raise
378
+ #
379
+ # def _process_single_date(self, date: datetime.date):
380
+ # """Core date processing logic with load/save timing and thread reporting"""
381
+ # path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
382
+ # self.logger.debug(f"Processing date {date.isoformat()} for {path}")
383
+ # if path in self.update_planner.skipped and self.update_planner.ignore_missing:
384
+ # self.logger.debug(f"Skipping {date} as it exists in the skipped list")
385
+ # return
386
+ # full_path = f"{path}{self.parquet_filename}"
387
+ #
388
+ # # thread_name = threading.current_thread().name
389
+ # # self.logger.debug(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
390
+ #
391
+ # overall_start = time.perf_counter()
392
+ # try:
393
+ # load_start = time.perf_counter()
394
+ # date_filter = {f"{self.date_field}__date": {date.isoformat()}}
395
+ # self.logger.debug(f"Loading data for {date} with filter: {date_filter}")
396
+ # # Load data using the dataclass with the provided date filter
397
+ # # Create a copy to avoid mutating the shared instance dictionary
398
+ # local_load_params = self.load_params.copy()
399
+ # local_load_params.update(date_filter)
400
+ # with self.dataclass(**self.class_params) as local_class_instance:
401
+ # df = local_class_instance.load(**local_load_params)
402
+ # load_time = time.perf_counter() - load_start
403
+ #
404
+ # if hasattr(local_class_instance, "total_records"):
405
+ # self.logger.debug(
406
+ # f"Total records loaded by {local_class_instance.__class__.__name__}: {local_class_instance.total_records}")
407
+ # if int(local_class_instance.total_records) == 0: # If no records were loaded but not due to an error
408
+ # if self.mmanifest:
409
+ # self.mmanifest.record(
410
+ # full_path=path
411
+ # )
412
+ # self.logger.info(f"No data found for {full_path}. Logged to missing manifest.")
413
+ # elif int(local_class_instance.total_records) < 0:
414
+ # self.logger.warning(
415
+ # f"Negative record count ({local_class_instance.total_records}) for {full_path}. "
416
+ # "This may indicate an error in the data loading process."
417
+ # )
418
+ # else:
419
+ # save_start = time.perf_counter()
420
+ # parquet_params ={
421
+ # "df_result": df,
422
+ # "parquet_storage_path": path,
423
+ # "fs": self.fs,
424
+ # "logger": self.logger,
425
+ # "debug": self.debug,
426
+ # }
427
+ # with ParquetSaver(**parquet_params) as ps:
428
+ # ps.save_to_parquet(self.parquet_filename, overwrite=True)
429
+ # save_time = time.perf_counter() - save_start
430
+ #
431
+ # total_time = time.perf_counter() - overall_start
432
+ # self.benchmarks[date] = {
433
+ # "load_duration": load_time,
434
+ # "save_duration": save_time,
435
+ # "total_duration": total_time
436
+ # }
437
+ # self._log_success(date, total_time, full_path)
438
+ # except Exception as e:
439
+ # self._log_failure(date, e)
440
+ # raise
441
+ #
442
+ # def _log_success(self, date: datetime.date, duration: float, path: str):
443
+ # msg = f"Completed {date} in {duration:.1f}s | Saved to {path}"
444
+ # self.logger.info(msg)
445
+ # self.processed_dates.append(date)
446
+ #
447
+ # def _log_failure(self, date: datetime.date, error: Exception):
448
+ # msg = f"Failed processing {date}: {error}"
449
+ # self.logger.error(msg)
450
+ #
451
+ # def show_benchmark_summary(self):
452
+ # """Display a summary of load/save timings per date"""
453
+ # if not self.benchmarks:
454
+ # self.logger.info("No benchmarking data to show")
455
+ # return
456
+ # df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
457
+ # df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
458
+ # self.logger.info(f"Benchmark Summary:\n {self.dataclass.__name__}\n" + df_bench.to_string())
@@ -145,6 +145,7 @@ class DateUtils:
145
145
  'current_month': lambda: cls.get_month_range(n=0),
146
146
  'last_month': lambda: cls.get_month_range(n=-1),
147
147
  'current_year': lambda: cls.get_year_timerange(today().year),
148
+ 'last_year': lambda: cls.get_year_timerange(today().year - 1),
148
149
  'current_quarter': lambda: (
149
150
  cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
150
151
  'ytd': lambda: (datetime.date(today().year, 1, 1), today()),