sibi-dst 2025.1.5__py3-none-any.whl → 2025.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,12 @@ from __future__ import annotations
3
3
  from ._df_helper import DfHelper
4
4
  from ._parquet_artifact import ParquetArtifact
5
5
  from ._parquet_reader import ParquetReader
6
- from ._artifact_updater_multi_wrapper import ArtifactUpdaterMultiWrapperThreaded
6
+ from ._artifact_updater_multi_wrapper import ArtifactUpdaterMultiWrapperThreaded, ArtifactUpdaterMultiWrapperAsync
7
7
 
8
8
  __all__ = [
9
9
  'DfHelper',
10
10
  'ParquetArtifact',
11
11
  'ParquetReader',
12
12
  'ArtifactUpdaterMultiWrapperThreaded',
13
+ 'ArtifactUpdaterMultiWrapperAsync',
13
14
  ]
@@ -1,11 +1,8 @@
1
- import datetime
2
1
  import time
3
- import random
4
2
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
3
  from typing import Any, Callable, Dict, List, Optional, Type
6
4
 
7
- from sibi_dst.utils import ManagedResource
8
-
5
+ from sibi_dst.utils import ManagedResource
9
6
 
10
7
  class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
11
8
  """
@@ -14,7 +11,7 @@ class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
14
11
  This version is refactored for a pure multi-threaded environment, aligning
15
12
  the orchestration model with the underlying threaded workers (DataWrapper).
16
13
  """
17
-
14
+ wrapped_classes: Dict[str, List[Type]]
18
15
  def __init__(
19
16
  self,
20
17
  wrapped_classes: Dict[str, List[Type]],
@@ -26,7 +23,7 @@ class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
26
23
  backoff_jitter: float = 0.1,
27
24
  priority_fn: Optional[Callable[[Type], int]] = None,
28
25
  artifact_class_kwargs: Optional[Dict[str, Any]] = None,
29
- **kwargs: Any,
26
+ **kwargs: Dict[str, Any]
30
27
  ) -> None:
31
28
  super().__init__(**kwargs)
32
29
  self.wrapped_classes = wrapped_classes
@@ -75,14 +72,14 @@ class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
75
72
  except Exception as e:
76
73
  self.logger.warning(f"priority_fn error for {name}: {e}")
77
74
 
78
- # Fallback to size estimate if available
79
- if hasattr(artifact_cls, 'get_size_estimate'):
80
- try:
81
- # This performs blocking I/O
82
- return artifact_cls(**self.artifact_class_kwargs).get_size_estimate()
83
-
84
- except Exception as e:
85
- self.logger.warning(f"get_size_estimate failed for {name}: {e}")
75
+ # # Fallback to size estimate if available
76
+ # if hasattr(artifact_cls, 'get_size_estimate'):
77
+ # try:
78
+ # # This performs blocking I/O
79
+ # return artifact_cls(**self.artifact_class_kwargs).get_size_estimate()
80
+ #
81
+ # except Exception as e:
82
+ # self.logger.warning(f"get_size_estimate failed for {name}: {e}")
86
83
 
87
84
  # Default priority
88
85
  return 999
@@ -115,7 +112,7 @@ class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
115
112
  # If all retries fail, raise an exception to be caught by the main loop
116
113
  raise RuntimeError(f"{name} failed after {self.retry_attempts} attempts.")
117
114
 
118
- def update_data(self, data_type: str, **kwargs: Any) -> None:
115
+ async def update_data(self, data_type: str, **kwargs: Any) -> None:
119
116
  """
120
117
  Entry point to update all artifacts of a given type using a ThreadPoolExecutor.
121
118
  """
@@ -190,242 +187,234 @@ class ArtifactUpdaterMultiWrapperThreaded(ManagedResource):
190
187
  return "\n".join(lines)
191
188
 
192
189
 
193
- # import asyncio
194
- # import logging
195
- # import datetime
196
- # import random
197
- # from typing import Any, Callable, Dict, List, Optional, Type
198
- #
199
- # from sibi_dst.utils import Logger
200
- #
201
- #
202
- # class ArtifactUpdaterMultiWrapperAsync:
203
- # """
204
- # Simplified wrapper that updates artifacts concurrently using an asyncio.Semaphore.
205
- #
206
- # Features:
207
- # - Caps concurrency at max_workers via semaphore
208
- # - Optionally prioritises tasks via a priority function or static method on artifact classes
209
- # - Tracks per-artifact completion times
210
- # - Configurable retry/backoff strategy
211
- # - Optional metrics integration
212
- # - Thread-safe within a single asyncio loop
213
- #
214
- # Usage:
215
- # wrapper = ArtifactUpdaterMultiWrapper(
216
- # wrapped_classes={
217
- # 'mydata': [DataArtifactA, DataArtifactB],
218
- # },
219
- # max_workers=4,
220
- # retry_attempts=3,
221
- # update_timeout_seconds=600,
222
- # backoff_base=2,
223
- # backoff_max=60,
224
- # backoff_jitter=0.1,
225
- # priority_fn=None, # or custom
226
- # metrics_client=None,
227
- # debug=True,
228
- # logger=None,
229
- # artifact_class_kwargs={
230
- # 'fs': my_fs,
231
- # 'parquet_storage_path': 's3://bucket/data',
232
- # 'logger': my_logger,
233
- # 'debug': True,
234
- # }
235
- # )
236
- # await wrapper.update_data('mydata', period='ytd', overwrite=True)
237
- # """
238
- # def __init__(
239
- # self,
240
- # wrapped_classes: Dict[str, List[Type]],
241
- # *,
242
- # max_workers: int = 3,
243
- # retry_attempts: int = 3,
244
- # update_timeout_seconds: int = 600,
245
- # backoff_base: int = 2,
246
- # backoff_max: Optional[int] = 60,
247
- # backoff_jitter: float = 0.1,
248
- # priority_fn: Optional[Callable[[Type], int]] = None,
249
- # metrics_client: Any = None,
250
- # debug: bool = False,
251
- # logger: Optional[logging.Logger] = None,
252
- # artifact_class_kwargs: Optional[Dict[str, Any]] = None,
253
- # ) -> None:
254
- # self.wrapped_classes = wrapped_classes
255
- # self.max_workers = max_workers
256
- # self.retry_attempts = retry_attempts
257
- # self.update_timeout_seconds = update_timeout_seconds
258
- # self.backoff_base = backoff_base
259
- # self.backoff_max = backoff_max
260
- # self.backoff_jitter = backoff_jitter
261
- # self.priority_fn = priority_fn
262
- # self.metrics_client = metrics_client
263
- #
264
- # self.debug = debug
265
- # self.logger = logger or Logger.default_logger(
266
- # logger_name=self.__class__.__name__,
267
- # log_level=Logger.DEBUG if debug else Logger.INFO
268
- # )
269
- #
270
- # # Default artifact init kwargs
271
- # today = datetime.datetime.today() + datetime.timedelta(days=1)
272
- # default_kwargs = {
273
- # 'parquet_start_date': today.strftime('%Y-%m-%d'),
274
- # 'parquet_end_date': today.strftime('%Y-%m-%d'),
275
- # 'logger': self.logger,
276
- # 'debug': self.debug,
277
- # }
278
- # self.artifact_class_kwargs = artifact_class_kwargs or default_kwargs.copy()
279
- #
280
- # # State
281
- # self.completion_times: Dict[str, float] = {}
282
- # self.failed: List[str] = []
283
- # self.original_classes: List[Type] = []
284
- #
285
- # def get_artifact_classes(self, data_type: str) -> List[Type]:
286
- # """
287
- # Retrieve artifact classes by data type.
288
- # """
289
- # self.logger.info(f"Fetching artifact classes for '{data_type}'")
290
- # if data_type not in self.wrapped_classes:
291
- # raise ValueError(f"Unsupported data type: {data_type}")
292
- # classes = self.wrapped_classes[data_type]
293
- # self.logger.info(f"Found {len(classes)} artifact classes for '{data_type}'")
294
- # return classes
295
- #
296
- # def estimate_priority(self, artifact_cls: Type) -> int:
297
- # """
298
- # Determine task priority for ordering. Lower values run first.
299
- # """
300
- # name = artifact_cls.__name__
301
- # if self.priority_fn:
302
- # try:
303
- # pr = self.priority_fn(artifact_cls)
304
- # self.logger.debug(f"priority_fn for {name}: {pr}")
305
- # return pr
306
- # except Exception as e:
307
- # self.logger.warning(f"priority_fn error for {name}: {e}")
308
- # try:
309
- # fs = self.artifact_class_kwargs.get('fs')
310
- # path = self.artifact_class_kwargs.get('parquet_storage_path')
311
- # pr=1
312
- # if hasattr(artifact_cls, 'get_size_estimate'):
313
- # pr = artifact_cls.get_size_estimate(fs, path)
314
- # self.logger.debug(f"Estimated priority for {name}: {pr}")
315
- # return pr
316
- # except Exception:
317
- # return 1
318
- #
319
- # async def _bounded_update(self, artifact_cls: Type, sem: asyncio.Semaphore, **update_kwargs) -> None:
320
- # """
321
- # Wrap update_artifact in a semaphore slot to limit concurrency.
322
- # """
323
- # async with sem:
324
- # name = artifact_cls.__name__
325
- # start = asyncio.get_event_loop().time()
326
- # self.logger.info(f"Starting update for {name}")
327
- # try:
328
- # for attempt in range(1, self.retry_attempts + 1):
329
- # try:
330
- # artifact = await asyncio.to_thread(
331
- # artifact_cls, **self.artifact_class_kwargs
332
- # )
333
- # await asyncio.wait_for(
334
- # asyncio.to_thread(
335
- # artifact.update_parquet, **update_kwargs
336
- # ),
337
- # timeout=self.update_timeout_seconds
338
- # )
339
- # duration = asyncio.get_event_loop().time() - start
340
- # self.completion_times[name] = duration
341
- # self.logger.info(f"✅ {name} updated in {duration:.2f}s (attempt {attempt})")
342
- # if self.metrics_client:
343
- # self.metrics_client.increment('task_succeeded')
344
- # return
345
- # except asyncio.TimeoutError:
346
- # self.logger.warning(f"Timeout on {name}, attempt {attempt}")
347
- # except Exception as e:
348
- # self.logger.error(f"Error on {name} attempt {attempt}: {e}")
349
- #
350
- # delay = min(self.backoff_base ** (attempt - 1), self.backoff_max)
351
- # delay *= 1 + random.uniform(0, self.backoff_jitter)
352
- # self.logger.info(f"Sleeping {delay:.1f}s before retrying {name}")
353
- # await asyncio.sleep(delay)
354
- #
355
- # except asyncio.CancelledError:
356
- # self.logger.warning(f"{name} update cancelled")
357
- # raise
358
- #
359
- # # permanent failure
360
- # self.logger.error(f"✖️ {name} permanently failed after {self.retry_attempts} attempts")
361
- # if self.metrics_client:
362
- # self.metrics_client.increment('task_failed')
363
- # self.failed.append(name)
364
- #
365
- # async def update_data(self, data_type: str, **kwargs: Any) -> None:
366
- # """
367
- # Entry point to update all artifacts of a given type concurrently.
368
- # """
369
- # self.logger.info(f"Starting update_data for '{data_type}' with kwargs={kwargs}")
370
- #
371
- # # RESET STATE
372
- # self.completion_times.clear()
373
- # self.failed.clear()
374
- # self.original_classes = self.get_artifact_classes(data_type)
375
- #
376
- # # NON-DESTRUCTIVE SORTING
377
- # ordered = sorted(self.original_classes, key=self.estimate_priority)
378
- #
379
- # sem = asyncio.Semaphore(self.max_workers)
380
- # tasks = [
381
- # asyncio.create_task(self._bounded_update(cls, sem, **kwargs))
382
- # for cls in ordered
383
- # ]
384
- #
385
- # try:
386
- # for coro in asyncio.as_completed(tasks):
387
- # await coro
388
- # except asyncio.CancelledError:
389
- # self.logger.warning("update_data was cancelled—aborting remaining retries")
390
- # for t in tasks:
391
- # t.cancel()
392
- # raise
393
- # finally:
394
- # total = len(self.original_classes)
395
- # completed = len(self.completion_times)
396
- # failed = len(self.failed)
397
- # self.logger.info(f"All artifacts processed: total={total}, completed={completed}, failed={failed}")
398
- #
399
- # def get_update_status(self) -> Dict[str, Any]:
400
- # """
401
- # Returns summary status including completion times.
402
- # """
403
- # total = len(self.original_classes)
404
- # completed = set(self.completion_times.keys())
405
- # failed = set(self.failed)
406
- # pending = {cls.__name__ for cls in self.original_classes} - completed - failed
407
- #
408
- # return {
409
- # 'total': total,
410
- # 'completed': list(completed),
411
- # 'failed': list(failed),
412
- # 'pending': list(pending),
413
- # 'completion_times': self.completion_times,
414
- # }
415
- #
416
- # @staticmethod
417
- # def format_status_table(status: Dict[str, Any]) -> str:
418
- # """
419
- # Formats the status dict into a readable table.
420
- # """
421
- # lines = [
422
- # f"Total: {status['total']}",
423
- # f"Completed: {len(status['completed'])} {status['completed']}",
424
- # f"Failed: {len(status['failed'])} {status['failed']}",
425
- # f"Pending: {len(status['pending'])} {status['pending']}",
426
- # "",
427
- # "Per-artifact timings:"
428
- # ]
429
- # for name, dur in status['completion_times'].items():
430
- # lines.append(f" {name}: {dur:.2f}s")
431
- # return "\n".join(lines)
190
+ import asyncio
191
+ import datetime
192
+ import random
193
+ from typing import Any, Callable, Dict, List, Optional, Type
194
+
195
+ class ArtifactUpdaterMultiWrapperAsync(ManagedResource):
196
+ """
197
+ Simplified wrapper that updates artifacts concurrently using an asyncio.Semaphore.
198
+
199
+ Features:
200
+ - Caps concurrency at max_workers via semaphore
201
+ - Optionally prioritises tasks via a priority function or static method on artifact classes
202
+ - Tracks per-artifact completion times
203
+ - Configurable retry/backoff strategy
204
+ - Optional metrics integration
205
+ - Thread-safe within a single asyncio loop
206
+
207
+ Usage:
208
+ wrapper = ArtifactUpdaterMultiWrapper(
209
+ wrapped_classes={
210
+ 'mydata': [DataArtifactA, DataArtifactB],
211
+ },
212
+ max_workers=4,
213
+ retry_attempts=3,
214
+ update_timeout_seconds=600,
215
+ backoff_base=2,
216
+ backoff_max=60,
217
+ backoff_jitter=0.1,
218
+ priority_fn=None, # or custom
219
+ metrics_client=None,
220
+ debug=True,
221
+ logger=None,
222
+ artifact_class_kwargs={
223
+ 'fs': my_fs,
224
+ 'parquet_storage_path': 's3://bucket/data',
225
+ 'logger': my_logger,
226
+ 'debug': True,
227
+ }
228
+ )
229
+ await wrapper.update_data('mydata', period='ytd', overwrite=True)
230
+ """
231
+ def __init__(
232
+ self,
233
+ wrapped_classes: Dict[str, List[Type]],
234
+ *,
235
+ max_workers: int = 3,
236
+ retry_attempts: int = 3,
237
+ update_timeout_seconds: int = 600,
238
+ backoff_base: int = 2,
239
+ backoff_max: Optional[int] = 60,
240
+ backoff_jitter: float = 0.1,
241
+ priority_fn: Optional[Callable[[Type], int]] = None,
242
+ metrics_client: Any = None,
243
+ artifact_class_kwargs: Optional[Dict[str, Any]] = None,
244
+ **kwargs: Dict[str, Any]
245
+ ) -> None:
246
+ super().__init__(**kwargs)
247
+ self.wrapped_classes = wrapped_classes
248
+ self.max_workers = max_workers
249
+ self.retry_attempts = retry_attempts
250
+ self.update_timeout_seconds = update_timeout_seconds
251
+ self.backoff_base = backoff_base
252
+ self.backoff_max = backoff_max
253
+ self.backoff_jitter = backoff_jitter
254
+ self.priority_fn = priority_fn
255
+ self.metrics_client = metrics_client
256
+
257
+ # Default artifact init kwargs
258
+ today = datetime.datetime.today() + datetime.timedelta(days=1)
259
+ default_kwargs = {
260
+ 'parquet_start_date': today.strftime('%Y-%m-%d'),
261
+ 'parquet_end_date': today.strftime('%Y-%m-%d'),
262
+ 'logger': self.logger,
263
+ 'debug': self.debug,
264
+ 'fs': self.fs,
265
+ 'verbose': self.verbose,
266
+ }
267
+ self.artifact_class_kwargs = artifact_class_kwargs or default_kwargs.copy()
268
+
269
+ # State
270
+ self.completion_times: Dict[str, float] = {}
271
+ self.failed: List[str] = []
272
+ self.original_classes: List[Type] = []
273
+
274
+ def get_artifact_classes(self, data_type: str) -> List[Type]:
275
+ """
276
+ Retrieve artifact classes by data type.
277
+ """
278
+ self.logger.info(f"Fetching artifact classes for '{data_type}'")
279
+ if data_type not in self.wrapped_classes:
280
+ raise ValueError(f"Unsupported data type: {data_type}")
281
+ classes = self.wrapped_classes[data_type]
282
+ self.logger.info(f"Found {len(classes)} artifact classes for '{data_type}'")
283
+ return classes
284
+
285
+ def estimate_priority(self, artifact_cls: Type) -> int:
286
+ """
287
+ Determine task priority for ordering. Lower values run first.
288
+ """
289
+ name = artifact_cls.__name__
290
+ if self.priority_fn:
291
+ try:
292
+ pr = self.priority_fn(artifact_cls)
293
+ self.logger.debug(f"priority_fn for {name}: {pr}")
294
+ return pr
295
+ except Exception as e:
296
+ self.logger.warning(f"priority_fn error for {name}: {e}")
297
+ try:
298
+ fs = self.artifact_class_kwargs.get('fs')
299
+ path = self.artifact_class_kwargs.get('parquet_storage_path')
300
+ pr=1
301
+ if hasattr(artifact_cls, 'get_size_estimate'):
302
+ pr = artifact_cls.get_size_estimate(fs, path)
303
+ self.logger.debug(f"Estimated priority for {name}: {pr}")
304
+ return pr
305
+ except Exception:
306
+ return 1
307
+
308
+ async def _bounded_update(self, artifact_cls: Type, sem: asyncio.Semaphore, **update_kwargs) -> None:
309
+ """
310
+ Wrap update_artifact in a semaphore slot to limit concurrency.
311
+ """
312
+ async with sem:
313
+ name = artifact_cls.__name__
314
+ start = asyncio.get_event_loop().time()
315
+ self.logger.info(f"Starting update for {name}")
316
+ try:
317
+ for attempt in range(1, self.retry_attempts + 1):
318
+ try:
319
+ artifact = await asyncio.to_thread(
320
+ artifact_cls, **self.artifact_class_kwargs
321
+ )
322
+ await asyncio.wait_for(
323
+ asyncio.to_thread(
324
+ artifact.update_parquet, **update_kwargs
325
+ ),
326
+ timeout=self.update_timeout_seconds
327
+ )
328
+ duration = asyncio.get_event_loop().time() - start
329
+ self.completion_times[name] = duration
330
+ self.logger.info(f"✅ {name} updated in {duration:.2f}s (attempt {attempt})")
331
+ if self.metrics_client:
332
+ self.metrics_client.increment('task_succeeded')
333
+ return
334
+ except asyncio.TimeoutError:
335
+ self.logger.warning(f"Timeout on {name}, attempt {attempt}")
336
+ except Exception as e:
337
+ self.logger.error(f"Error on {name} attempt {attempt}: {e}")
338
+
339
+ delay = min(self.backoff_base ** (attempt - 1), self.backoff_max)
340
+ delay *= 1 + random.uniform(0, self.backoff_jitter)
341
+ self.logger.info(f"Sleeping {delay:.1f}s before retrying {name}")
342
+ await asyncio.sleep(delay)
343
+
344
+ except asyncio.CancelledError:
345
+ self.logger.warning(f"{name} update cancelled")
346
+ raise
347
+
348
+ # permanent failure
349
+ self.logger.error(f"✖️ {name} permanently failed after {self.retry_attempts} attempts")
350
+ if self.metrics_client:
351
+ self.metrics_client.increment('task_failed')
352
+ self.failed.append(name)
353
+
354
+ async def update_data(self, data_type: str, **kwargs: Any) -> None:
355
+ """
356
+ Entry point to update all artifacts of a given type concurrently.
357
+ """
358
+ self.logger.info(f"Starting update_data for '{data_type}' with kwargs={kwargs}")
359
+
360
+ # RESET STATE
361
+ self.completion_times.clear()
362
+ self.failed.clear()
363
+ self.original_classes = self.get_artifact_classes(data_type)
364
+
365
+ # NON-DESTRUCTIVE SORTING
366
+ ordered = sorted(self.original_classes, key=self.estimate_priority)
367
+
368
+ sem = asyncio.Semaphore(self.max_workers)
369
+ tasks = [
370
+ asyncio.create_task(self._bounded_update(cls, sem, **kwargs))
371
+ for cls in ordered
372
+ ]
373
+
374
+ try:
375
+ for coro in asyncio.as_completed(tasks):
376
+ await coro
377
+ except asyncio.CancelledError:
378
+ self.logger.warning("update_data was cancelled—aborting remaining retries")
379
+ for t in tasks:
380
+ t.cancel()
381
+ raise
382
+ finally:
383
+ total = len(self.original_classes)
384
+ completed = len(self.completion_times)
385
+ failed = len(self.failed)
386
+ self.logger.info(f"All artifacts processed: total={total}, completed={completed}, failed={failed}")
387
+
388
+ def get_update_status(self) -> Dict[str, Any]:
389
+ """
390
+ Returns summary status including completion times.
391
+ """
392
+ total = len(self.original_classes)
393
+ completed = set(self.completion_times.keys())
394
+ failed = set(self.failed)
395
+ pending = {cls.__name__ for cls in self.original_classes} - completed - failed
396
+
397
+ return {
398
+ 'total': total,
399
+ 'completed': list(completed),
400
+ 'failed': list(failed),
401
+ 'pending': list(pending),
402
+ 'completion_times': self.completion_times,
403
+ }
404
+
405
+ @staticmethod
406
+ def format_status_table(status: Dict[str, Any]) -> str:
407
+ """
408
+ Formats the status dict into a readable table.
409
+ """
410
+ lines = [
411
+ f"Total: {status['total']}",
412
+ f"Completed: {len(status['completed'])} {status['completed']}",
413
+ f"Failed: {len(status['failed'])} {status['failed']}",
414
+ f"Pending: {len(status['pending'])} {status['pending']}",
415
+ "",
416
+ "Per-artifact timings:"
417
+ ]
418
+ for name, dur in status['completion_times'].items():
419
+ lines.append(f" {name}: {dur:.2f}s")
420
+ return "\n".join(lines)
@@ -1,8 +1,7 @@
1
1
  from __future__ import annotations
2
2
  import datetime
3
- import logging
4
3
  import threading
5
- from typing import Optional, Any, Dict
4
+ from typing import Optional, Any, Dict, ClassVar
6
5
 
7
6
  import dask.dataframe as dd
8
7
  import fsspec
@@ -55,7 +54,7 @@ class ParquetArtifact(DfHelper):
55
54
  :ivar fs: Filesystem object used for storage operations.
56
55
  :type fs: fsspec.AbstractFileSystem
57
56
  """
58
- DEFAULT_CONFIG = {
57
+ DEFAULT_CONFIG: ClassVar[Dict[str, str]] = {
59
58
  'backend': 'parquet'
60
59
  }
61
60
 
@@ -91,8 +90,6 @@ class ParquetArtifact(DfHelper):
91
90
  }
92
91
  self.df: Optional[dd.DataFrame] = None
93
92
  super().__init__(**self.config)
94
- #self._own_logger = False
95
- #self._setup_logging()
96
93
  self.data_wrapper_class = data_wrapper_class
97
94
 
98
95
  self.date_field = self._validate_required('date_field')
@@ -101,16 +98,6 @@ class ParquetArtifact(DfHelper):
101
98
  self.parquet_start_date = self._validate_required('parquet_start_date')
102
99
  self.parquet_end_date = self._validate_required('parquet_end_date')
103
100
 
104
- # Filesystem setup
105
- #self.filesystem_type = self.config.setdefault('filesystem_type', 'file')
106
- #self.filesystem_options = self.config.setdefault('filesystem_options', {})
107
- #self.fs = self.config.setdefault('fs', None)
108
- #self._own_fs = self.fs is None
109
- #if self.fs is None:
110
- # self.fs = fsspec.filesystem(self.filesystem_type, **self.filesystem_options)
111
- # self._own_fs = True
112
- #self.config.setdefault('fs', self.fs)
113
- ## Populate to parameters to pass to data_wrapper_class
114
101
  self.class_params = self.config.pop('class_params', {
115
102
  'debug': self.debug,
116
103
  'logger': self.logger,
@@ -125,15 +112,6 @@ class ParquetArtifact(DfHelper):
125
112
  self.update_planner_params = {}
126
113
  self.datawrapper_params = {}
127
114
 
128
- #def _setup_logging(self):
129
- # """Initialize logger and debug settings."""
130
- # self.debug = self.config.get('debug', False)
131
- # logger = self.config.get('logger', None)
132
- # self._own_logger = logger is None
133
- # self.logger = logger or Logger.default_logger(
134
- # logger_name=f'Parquet_Artifact_InstanceOf_{self.__class__.__name__}')
135
- # self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
136
-
137
115
  def _validate_required(self, key: str) -> Any:
138
116
  """Validate required configuration fields."""
139
117
  value = self.config.setdefault(key, None)
@@ -211,28 +189,28 @@ class ParquetArtifact(DfHelper):
211
189
  """
212
190
  Synchronously estimates artifact size for use in multi-threaded environments.
213
191
 
214
- This method uses the filesystem's own .sync() method to safely execute
215
- asynchronous I/O operations from a synchronous context, preventing
216
- event loop conflicts.
192
+ This method safely executes asynchronous I/O operations from a synchronous
193
+ context, handling variations in fsspec filesystem implementations.
217
194
  """
218
195
 
219
196
  async def _get_total_bytes_async():
220
197
  """A helper async coroutine to perform the I/O."""
221
198
  import asyncio
222
199
 
223
- # Use the async versions of fsspec methods (e.g., _glob, _size)
224
200
  files = await self.fs._glob(f"{self.parquet_storage_path}/*.parquet")
225
201
  if not files:
226
202
  return 0
227
203
 
228
- # Concurrently gather the size of all files for performance
229
204
  size_tasks = [self.fs._size(f) for f in files]
230
205
  sizes = await asyncio.gather(*size_tasks)
231
206
  return sum(s for s in sizes if s is not None)
232
207
 
233
- # Use the filesystem's own built-in sync method. This is the most
234
- # reliable way to bridge the sync/async gap for fsspec.
235
- total_bytes = self.fs.sync(_get_total_bytes_async())
208
+ try:
209
+ # Attempt the standard fsspec method first
210
+ total_bytes = self.fs.sync(_get_total_bytes_async())
211
+ except AttributeError:
212
+ # fallback for filesystems like s3fs that lack .sync()
213
+ total_bytes = self.fs.loop.run_until_complete(_get_total_bytes_async())
236
214
 
237
215
  # Convert to megabytes, ensuring a minimum of 1
238
216
  return max(1, int(total_bytes / (1024 ** 2)))
@@ -1,11 +1,9 @@
1
- import logging
2
- from typing import Optional
1
+ from typing import Optional, ClassVar, Dict
3
2
 
4
3
  import dask.dataframe as dd
5
4
  import fsspec
6
5
 
7
6
  from sibi_dst.df_helper import DfHelper
8
- from sibi_dst.utils import Logger
9
7
 
10
8
  class ParquetReader(DfHelper):
11
9
  """
@@ -44,19 +42,17 @@ class ParquetReader(DfHelper):
44
42
  Parquet storage.
45
43
  :type fs: fsspec.AbstractFileSystem
46
44
  """
47
- DEFAULT_CONFIG = {
45
+ DEFAULT_CONFIG: ClassVar[Dict[str, int]] = {
48
46
  'backend': 'parquet'
49
47
  }
50
48
 
51
- def __init__(self, filesystem_type="file", filesystem_options=None, **kwargs):
49
+ def __init__(self, **kwargs):
52
50
  self.config = {
53
51
  **self.DEFAULT_CONFIG,
54
52
  **kwargs,
55
53
  }
56
- self.df: Optional[dd.DataFrame] = None
57
- #self.debug = self.config.setdefault('debug', False)
58
- #self.logger = self.config.setdefault('logger', Logger.default_logger(logger_name=self.__class__.__name__))
59
- #self.logger.set_level(logging.DEBUG if self.debug else logging.INFO)
54
+ super().__init__(**self.config)
55
+
60
56
  self.parquet_storage_path = self.config.setdefault('parquet_storage_path', None)
61
57
  if self.parquet_storage_path is None:
62
58
  raise ValueError('parquet_storage_path must be set')
@@ -68,19 +64,9 @@ class ParquetReader(DfHelper):
68
64
  if self.parquet_end_date is None:
69
65
  raise ValueError('parquet_end_date must be set')
70
66
 
71
- # Filesystem setup
72
- #self.filesystem_type = filesystem_type
73
- #self.filesystem_options = filesystem_options or {}
74
- #self.fs = self.config.setdefault('fs', None)
75
- #if self.fs is None:
76
- # self.fs = fsspec.filesystem(self.filesystem_type, **self.filesystem_options)
77
- #self.config.setdefault('fs', self.fs)
78
-
79
67
  if not self.directory_exists():
80
68
  raise ValueError(f"{self.parquet_storage_path} does not exist")
81
69
 
82
- super().__init__(**self.config)
83
-
84
70
  def load(self, **kwargs):
85
71
  self.df = super().load(**kwargs)
86
72
  return self.df
@@ -90,5 +76,4 @@ class ParquetReader(DfHelper):
90
76
  info = self.fs.info(self.parquet_storage_path)
91
77
  return info['type'] == 'directory'
92
78
  except FileNotFoundError:
93
- return False
94
-
79
+ return False
@@ -85,7 +85,8 @@ class ParquetConfig(BaseModel):
85
85
  if self.logger is None:
86
86
  self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
87
87
  self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
88
-
88
+ if self.fs is None:
89
+ raise ValueError('Parquet Options: File system (fs) must be specified')
89
90
 
90
91
  if self.parquet_storage_path is None:
91
92
  raise ValueError('Parquet storage path must be specified')
sibi_dst/utils/base.py CHANGED
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  from .log_utils import Logger
2
3
 
3
4
  class ManagedResource:
@@ -61,7 +62,7 @@ class ManagedResource:
61
62
 
62
63
  async def __aexit__(self, exc_type, exc_val, exc_tb):
63
64
  """Exit the runtime context and trigger cleanup for 'async with' statements."""
64
- self.cleanup()
65
+ await self.acleanup()
65
66
  return False # Propagate exceptions
66
67
 
67
68
  def __repr__(self) -> str:
@@ -80,7 +81,7 @@ class ManagedResource:
80
81
 
81
82
  def cleanup(self):
82
83
  """
83
- Clean up resources managed by this instance.
84
+ Cleanup resources managed by this instance.
84
85
  """
85
86
  if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
86
87
  if self.logger:
@@ -88,10 +89,29 @@ class ManagedResource:
88
89
  self.fs.clear_instance_cache()
89
90
 
90
91
  if self._own_logger and hasattr(self.logger, "shutdown"):
91
- # Ensure logger exists before trying to use or shut it down
92
+ # Ensure the logger exists before trying to use or shut it down
92
93
  if self.logger:
93
94
  self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
94
95
  self.logger.shutdown()
95
96
  self.logger = None # Set to None after shutdown
96
97
 
97
98
  self._entered = False
99
+
100
+ async def acleanup(self):
101
+ """
102
+ Async Cleanup resources managed by this instance.
103
+ """
104
+ if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
105
+ if self.logger:
106
+ self.logger.debug(f"'{self.__class__.__name__}' is clearing its own filesystem cache.")
107
+ self.fs.clear_instance_cache()
108
+
109
+ if self._own_logger and hasattr(self.logger, "shutdown"):
110
+ # Ensure the logger exists before trying to use or shut it down
111
+ if self.logger:
112
+ self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
113
+ self.logger.shutdown()
114
+ self.logger = None # Set to None after shutdown
115
+
116
+ self._entered = False
117
+
@@ -1,4 +1,5 @@
1
1
  from concurrent.futures import ThreadPoolExecutor
2
+ from typing import ClassVar, Dict
2
3
 
3
4
  import clickhouse_connect
4
5
  import pandas as pd
@@ -36,7 +37,7 @@ class ClickHouseWriter(ManagedResource):
36
37
  :ivar order_by: Field or column name to use for table ordering.
37
38
  :type order_by: str
38
39
  """
39
- dtype_to_clickhouse = {
40
+ dtype_to_clickhouse: ClassVar[Dict[str, str]] = {
40
41
  'int64': 'Int64',
41
42
  'int32': 'Int32',
42
43
  'float64': 'Float64',
@@ -3,7 +3,7 @@ import logging
3
3
  import threading
4
4
  import time
5
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
- from typing import Type, Any, Dict, Optional, Union, List
6
+ from typing import Type, Any, Dict, Optional, Union, List, ClassVar
7
7
 
8
8
  import fsspec
9
9
  import pandas as pd
@@ -15,15 +15,15 @@ from .parquet_saver import ParquetSaver
15
15
 
16
16
 
17
17
  class DataWrapper(ManagedResource):
18
- DEFAULT_PRIORITY_MAP = {
18
+ DEFAULT_PRIORITY_MAP: ClassVar[Dict[str, int]] = {
19
19
  "overwrite": 1,
20
20
  "missing_in_history": 2,
21
21
  "existing_but_stale": 3,
22
22
  "missing_outside_history": 4,
23
23
  "file_is_recent": 0
24
24
  }
25
- DEFAULT_MAX_AGE_MINUTES = 1440
26
- DEFAULT_HISTORY_DAYS_THRESHOLD = 30
25
+ DEFAULT_MAX_AGE_MINUTES: int = 1440
26
+ DEFAULT_HISTORY_DAYS_THRESHOLD: int = 30
27
27
 
28
28
  def __init__(
29
29
  self,
@@ -31,12 +31,8 @@ class DataWrapper(ManagedResource):
31
31
  date_field: str,
32
32
  data_path: str,
33
33
  parquet_filename: str,
34
- #fs: Optional[fsspec.AbstractFileSystem] = None,
35
- #debug: bool = False,
36
- #verbose: bool = False,
37
34
  class_params: Optional[Dict] = None,
38
35
  load_params: Optional[Dict] = None,
39
- #logger: Logger = None,
40
36
  show_progress: bool = False,
41
37
  timeout: float = 30,
42
38
  max_threads: int = 3,
@@ -47,14 +43,8 @@ class DataWrapper(ManagedResource):
47
43
  self.date_field = date_field
48
44
  self.data_path = self._ensure_forward_slash(data_path)
49
45
  self.parquet_filename = parquet_filename
50
- #self.fs = fs or None
51
46
  if self.fs is None:
52
47
  raise ValueError("Datawrapper requires a File system (fs) to be provided .")
53
- #self.debug = debug
54
- #self.verbose = verbose
55
- #self._own_logger = logger is None
56
- #self.logger = logger or Logger.default_logger(logger_name=self.dataclass.__name__)
57
- #self.logger.set_level(logging.DEBUG if debug else logging.INFO)
58
48
  self.show_progress = show_progress
59
49
  self.timeout = timeout
60
50
  self.max_threads = max_threads
@@ -71,7 +61,6 @@ class DataWrapper(ManagedResource):
71
61
  self.benchmarks: Dict[datetime.date, Dict[str, float]] = {}
72
62
  self.mmanifest = kwargs.get("mmanifest", None)
73
63
  self.update_planner=kwargs.get("update_planner", None)
74
- # self.datacls = self.dataclass(**self.class_params)
75
64
 
76
65
 
77
66
  def __exit__(self, exc_type, exc_val, exc_tb):
@@ -174,7 +163,7 @@ class DataWrapper(ManagedResource):
174
163
  load_time = time.perf_counter() - load_start
175
164
 
176
165
  if hasattr(local_class_instance, "total_records"):
177
- self.logger.debug(f"Total records loaded by {local_class_instance}: {local_class_instance.total_records}")
166
+ self.logger.debug(f"Total records loaded by {local_class_instance.__class__.__name__}: {local_class_instance.total_records}")
178
167
  if int(local_class_instance.total_records) == 0: # If no records were loaded but not due to an error
179
168
  if self.mmanifest:
180
169
  self.mmanifest.record(
@@ -183,12 +172,13 @@ class DataWrapper(ManagedResource):
183
172
  self.logger.info(f"No data found for {date}. Logged to missing manifest.")
184
173
  return
185
174
  save_start = time.perf_counter()
186
- ParquetSaver(
175
+ with ParquetSaver(
187
176
  df_result=df,
188
177
  parquet_storage_path=path,
189
178
  fs=self.fs,
190
179
  logger=self.logger
191
- ).save_to_parquet(self.parquet_filename, overwrite=True)
180
+ ) as ps:
181
+ ps.save_to_parquet(self.parquet_filename, overwrite=True)
192
182
  save_time = time.perf_counter() - save_start
193
183
 
194
184
  total_time = time.perf_counter() - overall_start
@@ -218,4 +208,4 @@ class DataWrapper(ManagedResource):
218
208
  return
219
209
  df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
220
210
  df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
221
- self.logger.info("Benchmark Summary:\n" + df_bench.to_string())
211
+ self.logger.info(f"Benchmark Summary:\n {self.dataclass.__name__}\n" + df_bench.to_string())
@@ -1,6 +1,6 @@
1
1
  import datetime
2
2
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
- from typing import List, Optional, Dict, Union, Tuple, Set, Iterator
3
+ from typing import List, Optional, Dict, Union, Tuple, Set, Iterator, ClassVar
4
4
  import pandas as pd
5
5
  from .date_utils import FileAgeChecker
6
6
  from pydantic import BaseModel, Field
@@ -55,7 +55,7 @@ class UpdatePlanner(ManagedResource):
55
55
  generate_plan() will overwrite self.plan and self.df_req, and returns a DataFrame of required updates.
56
56
  """
57
57
 
58
- DEFAULT_PRIORITY_MAP = {
58
+ DEFAULT_PRIORITY_MAP: ClassVar[Dict[str, int]]={
59
59
  "file_is_recent": 0,
60
60
  "missing_ignored": 0,
61
61
  "overwrite_forced": 1,
@@ -64,8 +64,8 @@ class UpdatePlanner(ManagedResource):
64
64
  "stale_in_history": 4,
65
65
  }
66
66
 
67
- DEFAULT_MAX_AGE_MINUTES = 1440
68
- DEFAULT_HISTORY_DAYS_THRESHOLD = 30
67
+ DEFAULT_MAX_AGE_MINUTES: int = 1440
68
+ DEFAULT_HISTORY_DAYS_THRESHOLD: int = 30
69
69
 
70
70
  def __init__(
71
71
  self,
@@ -217,7 +217,7 @@ class UpdatePlanner(ManagedResource):
217
217
  for priority in sorted(required_updates["update_priority"].unique()):
218
218
  dates_df = required_updates[required_updates["update_priority"] == priority]
219
219
  # Sort dates within the priority group
220
- sorted_dates = dates_df.sort_values(by="date", ascending=not self.reverse_order)
220
+ sorted_dates = dates_df.sort_values(by=["date"], ascending=not self.reverse_order)
221
221
  dates = sorted_dates["date"].tolist()
222
222
  if dates:
223
223
  yield priority, dates
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.1.5
3
+ Version: 2025.1.7
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -13,6 +13,8 @@ Requires-Dist: clickhouse-connect (>=0.8.18,<0.9.0)
13
13
  Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
14
14
  Requires-Dist: dask[complete] (>=2025.5.1,<2026.0.0)
15
15
  Requires-Dist: mysqlclient (>=2.2.7,<3.0.0)
16
+ Requires-Dist: opentelemetry-exporter-otlp (>=1.35.0,<2.0.0)
17
+ Requires-Dist: opentelemetry-sdk (>=1.35.0,<2.0.0)
16
18
  Requires-Dist: pandas (>=2.3.1,<3.0.0)
17
19
  Requires-Dist: psycopg2 (>=2.9.10,<3.0.0)
18
20
  Requires-Dist: pyarrow (>=20.0.0,<21.0.0)
@@ -1,15 +1,15 @@
1
1
  sibi_dst/__init__.py,sha256=j8lZpGCJlxlLgEgeIMxZnWdqJ0g3MCs7-gsnbvPn_KY,285
2
- sibi_dst/df_helper/__init__.py,sha256=VJE1qvKO-7QsFADZxSY5s4LVoWnPKfz0rP3nYO2ljhA,358
3
- sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=LvFCNr4VKFV-b-NS_TeRkaoKsM4tdsPtAgSIwMvKgGA,18043
2
+ sibi_dst/df_helper/__init__.py,sha256=Jur_MO8RGPkVw0CS3XH5YIWv-d922DC_FwRDTvHHV6Y,432
3
+ sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=10EkCYEfoWwTQbS-ahYWo6TvbtNXM8p0UqqDu0gTuyI,17426
4
4
  sibi_dst/df_helper/_df_helper.py,sha256=iBoWz2iVgLzQ3hA1EwllL62dkraKamRx2sXseu30FVI,11914
5
- sibi_dst/df_helper/_parquet_artifact.py,sha256=vDZOtSugBuWuZ3W6l2Y7IBO6RohIrA_sVNuPHp8e8h0,15438
6
- sibi_dst/df_helper/_parquet_reader.py,sha256=o5ijxHtD1EMzUUD9e6PIoGMeuSLHDItvZIouGfVZhgA,3817
5
+ sibi_dst/df_helper/_parquet_artifact.py,sha256=dCvUA2bytv0wY0pFI8lxbcLwXlgGpHndS36iKfEmjLw,14310
6
+ sibi_dst/df_helper/_parquet_reader.py,sha256=m98C0TZRroOXvVc2LpEuElrJnquGlR81E1gjI7v1hi4,3102
7
7
  sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  sibi_dst/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
9
9
  sibi_dst/df_helper/backends/http/_http_config.py,sha256=eGPFdqZ5M3Tscqx2P93B6XoBEEzlmdt7yNg7PXUQnNQ,4726
10
10
  sibi_dst/df_helper/backends/parquet/__init__.py,sha256=esWJ9aSuYC26d-T01z9dPrJ1uqJzvdaPNTYRb5qXTlQ,182
11
11
  sibi_dst/df_helper/backends/parquet/_filter_handler.py,sha256=TvDf0RXta7mwJv11GNQttYJsXgFf2XDj4oLIjt4xTzA,5219
12
- sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=TaU5_wG1Y3lQC8DVCItVvMnc6ZJmECLu3avssVEMbaM,10591
12
+ sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=lrDn2-BbgxDor5g71LAu5LDg2g3ApGAPiQfbFTB2xNA,10702
13
13
  sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=LjWm9B7CweTvlvFOgB90XjSe0lVLILAIYMWKPkFXFm8,265
14
14
  sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=Rsvh1nfVtqzfMhv968vNTYYIqVxYsEs4PB-O5CTSYdk,10935
15
15
  sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=NqBSHqeYv_1vHt6J0tez0GdMwKrP_sIRcXYXu869ZkY,13313
@@ -33,12 +33,12 @@ sibi_dst/osmnx_helper/utils.py,sha256=BzuY8CtYnBAAO8UAr_M7EOk6CP1zcifNLs8pkdFZEF
33
33
  sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  sibi_dst/tests/test_data_wrapper_class.py,sha256=6uFmZR2DxnxQz49L5jT2ehlKvlLnpUHMLFB_PqqUq7k,3336
35
35
  sibi_dst/utils/__init__.py,sha256=PQsG188_lnqgSFljkCc15Nyv933HnvmQ7XYs02m77Vc,1217
36
- sibi_dst/utils/base.py,sha256=MBshlQKg-WNeTeuE_aMLQjcBRaa4O_TFED7bVKDhJ-o,3783
37
- sibi_dst/utils/clickhouse_writer.py,sha256=5XDRjXU9d0Vhb9GFdCiRoNXrucJvTm6h8auCAQbEwW0,9917
36
+ sibi_dst/utils/base.py,sha256=RGLcCpGeWTKxsAl9wcxicaS6nfq3lFdsgE9XiOOD_a8,4568
37
+ sibi_dst/utils/clickhouse_writer.py,sha256=mNUJoYOreIdRrEFv2mQ6pdtLi1Iz_2rALDyO6ARTxhs,9978
38
38
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
39
39
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
40
40
  sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
41
- sibi_dst/utils/data_wrapper.py,sha256=Vx3At8SlAoMCTaXmVsTiTGynfjV2isB9W6yL0cWZ7g4,9346
41
+ sibi_dst/utils/data_wrapper.py,sha256=8lbKe85XJB0VhIR9GRo7-R_9DBhRZvNv8MsqSbsupeA,8945
42
42
  sibi_dst/utils/date_utils.py,sha256=8fwPpOYqSdM3nHeNykh7Ftk-uPdFa44cEAy5S8iUNw4,18667
43
43
  sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
44
44
  sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
@@ -49,7 +49,7 @@ sibi_dst/utils/parquet_saver.py,sha256=zau_s0Mn2ccz9ivVtRbTkBmCghUgCofI1LsCdy1df
49
49
  sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
50
50
  sibi_dst/utils/storage_config.py,sha256=uaCBF8rgCeYkk-lxVSCjsic8O8HJKAu455MR-OBliCo,4325
51
51
  sibi_dst/utils/storage_manager.py,sha256=yyZqT8XjTf4MKFrfznCmxXxOYz_TiWgtQhzqPoXR9So,6569
52
- sibi_dst/utils/update_planner.py,sha256=2ZVsuZlghKDRv7IhqaraS-7GRIY6nGRpFnpBdXYo7Io,11538
52
+ sibi_dst/utils/update_planner.py,sha256=UH14HVFUtvWtCY9jzmYbQbe7lUHRrpnNDn2h0JT7OvI,11584
53
53
  sibi_dst/utils/webdav_client.py,sha256=pYF1UsGOuxYeGLq7aBfwZFvkvD4meOcbbaiZ4d6GW9I,7107
54
54
  sibi_dst/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  sibi_dst/v2/df_helper/__init__.py,sha256=XuH6jKYAPg2DdRbsxxBSxp9X3x-ARyaT0xe27uILrVo,99
@@ -71,6 +71,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
71
71
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
72
72
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
73
73
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
74
- sibi_dst-2025.1.5.dist-info/METADATA,sha256=T0zFKtNSQ7if1S590EwTZ_CN96oiDe8t559-xFQ-XWM,2498
75
- sibi_dst-2025.1.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
76
- sibi_dst-2025.1.5.dist-info/RECORD,,
74
+ sibi_dst-2025.1.7.dist-info/METADATA,sha256=AaJunhF_PdvxT9KpA0mVzQhoY1sAZoe5HTA9ClabYPs,2610
75
+ sibi_dst-2025.1.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
76
+ sibi_dst-2025.1.7.dist-info/RECORD,,