sibi-dst 0.3.60__py3-none-any.whl → 0.3.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,25 +30,14 @@ class DataWrapper:
30
30
  date_field: str,
31
31
  data_path: str,
32
32
  parquet_filename: str,
33
- #start_date: Any,
34
- #end_date: Any,
35
33
  fs: Optional[fsspec.AbstractFileSystem] = None,
36
- #filesystem_type: str = "file",
37
- #filesystem_options: Optional[Dict] = None,
38
34
  debug: bool = False,
39
35
  verbose: bool = False,
40
36
  class_params: Optional[Dict] = None,
41
37
  load_params: Optional[Dict] = None,
42
- #reverse_order: bool = False,
43
- #overwrite: bool = False,
44
- #ignore_missing: bool = False,
45
38
  logger: Logger = None,
46
- #max_age_minutes: int = DEFAULT_MAX_AGE_MINUTES,
47
- #history_days_threshold: int = DEFAULT_HISTORY_DAYS_THRESHOLD,
48
39
  show_progress: bool = False,
49
40
  timeout: float = 60,
50
- #reference_date: datetime.date = None,
51
- #custom_priority_map: Dict[str, int] = None,
52
41
  max_threads: int = 3,
53
42
  **kwargs: Any,
54
43
  ):
@@ -56,22 +45,13 @@ class DataWrapper:
56
45
  self.date_field = date_field
57
46
  self.data_path = self._ensure_forward_slash(data_path)
58
47
  self.parquet_filename = parquet_filename
59
- #self.filesystem_type = filesystem_type
60
- #self.filesystem_options = filesystem_options or {}
61
48
  self.fs = fs or None
62
49
  self.debug = debug
63
50
  self.verbose = verbose
64
- # self.reverse_order = reverse_order
65
- # self.overwrite = overwrite
66
- # self.ignore_missing = ignore_missing
67
51
  self.logger = logger or Logger.default_logger(logger_name=self.dataclass.__name__)
68
52
  self.logger.set_level(logging.DEBUG if debug else logging.INFO)
69
- # self.max_age_minutes = max_age_minutes
70
- # self.history_days_threshold = history_days_threshold
71
53
  self.show_progress = show_progress
72
54
  self.timeout = timeout
73
- #self.reference_date = reference_date or datetime.date.today()
74
- #self.priority_map = custom_priority_map or self.DEFAULT_PRIORITY_MAP
75
55
  self.max_threads = max_threads
76
56
  self.class_params = class_params or {
77
57
  'debug': self.debug,
@@ -95,6 +75,7 @@ class DataWrapper:
95
75
  """Context manager exit"""
96
76
  if self.mmanifest and self.mmanifest._new_records:
97
77
  self.mmanifest.save()
78
+ self.mmanifest.cleanup_temp_manifests()
98
79
  if exc_type is not None:
99
80
  self.logger.error(f"Exception occurred: {exc_val}")
100
81
  return False
@@ -185,9 +166,9 @@ class DataWrapper:
185
166
  path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
186
167
  self.logger.info(f"Processing date {date.isoformat()} for {path}")
187
168
  # self.logger.info(f"Path {path} in {self.skipped}: {path in self.skipped}")
188
- #if path in self.skipped:
189
- # self.logger.info(f"Skipping {date} as it exists in the skipped list")
190
- # return
169
+ if path in self.update_planner.skipped and self.update_planner.ignore_missing:
170
+ self.logger.info(f"Skipping {date} as it exists in the skipped list")
171
+ return
191
172
  full_path = f"{path}{self.parquet_filename}"
192
173
 
193
174
  thread_name = threading.current_thread().name
@@ -254,5 +235,5 @@ class DataWrapper:
254
235
  self.logger.info("No benchmarking data to show")
255
236
  return
256
237
  df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
257
- df_bench = df_bench.set_index("date").sort_index(ascending=not self.reverse_order)
238
+ df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
258
239
  self.logger.info("Benchmark Summary:\n" + df_bench.to_string())
@@ -2,6 +2,9 @@ import pandas as pd
2
2
  import fsspec
3
3
  import threading
4
4
  import uuid
5
+ import hashlib
6
+ import base64
7
+ import json
5
8
  from typing import List, Optional, Set, Dict, Any
6
9
 
7
10
  from sibi_dst.utils import Logger
@@ -13,11 +16,11 @@ class MissingManifestManager:
13
16
  """
14
17
 
15
18
  def __init__(
16
- self,
17
- fs: fsspec.AbstractFileSystem,
18
- manifest_path: str,
19
- clear_existing: bool = False,
20
- **kwargs: Any,
19
+ self,
20
+ fs: fsspec.AbstractFileSystem,
21
+ manifest_path: str,
22
+ clear_existing: bool = False,
23
+ **kwargs: Any,
21
24
  ):
22
25
  self.fs = fs
23
26
  self.manifest_path = manifest_path.rstrip("/")
@@ -30,29 +33,18 @@ class MissingManifestManager:
30
33
  )
31
34
  self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
32
35
 
33
- # In-memory list for new paths
34
36
  self._new_records: List[Dict[str, str]] = []
35
- # Cached set of existing paths
36
37
  self._loaded_paths: Optional[Set[str]] = None
37
-
38
- # Use a reentrant lock so save() can call load_existing() safely
39
38
  self._lock = threading.RLock()
40
39
 
41
40
  def _safe_exists(self, path: str) -> bool:
42
41
  try:
43
42
  return self.fs.exists(path)
44
- except PermissionError:
45
- if self.debug:
46
- self.logger.debug(f"Permission denied checking existence of '{path}'")
47
- return False
48
43
  except Exception as e:
49
44
  self.logger.warning(f"Error checking existence of '{path}': {e}")
50
45
  return False
51
46
 
52
47
  def load_existing(self) -> Set[str]:
53
- """
54
- Load and cache existing manifest paths.
55
- """
56
48
  with self._lock:
57
49
  if self._loaded_paths is not None:
58
50
  return self._loaded_paths
@@ -65,8 +57,8 @@ class MissingManifestManager:
65
57
  df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
66
58
  paths = (
67
59
  df.get("path", pd.Series(dtype=str))
68
- .dropna().astype(str)
69
- .loc[lambda s: s.str.strip().astype(bool)]
60
+ .dropna().astype(str)
61
+ .loc[lambda s: s.str.strip().astype(bool)]
70
62
  )
71
63
  self._loaded_paths = set(paths.tolist())
72
64
  except Exception as e:
@@ -76,34 +68,25 @@ class MissingManifestManager:
76
68
  return self._loaded_paths
77
69
 
78
70
  def record(self, full_path: str) -> None:
79
- """
80
- Register a missing file path.
81
- """
82
71
  if not full_path or not isinstance(full_path, str):
83
72
  return
84
73
  with self._lock:
85
74
  self._new_records.append({"path": full_path})
86
75
 
87
76
  def save(self) -> None:
88
- """
89
- Merge new records into the manifest and write it out atomically.
90
- """
91
77
  with self._lock:
92
- # Build DataFrame of new entries
93
78
  new_df = pd.DataFrame(self._new_records)
94
79
  should_overwrite = self.clear_existing or not self._safe_exists(self.manifest_path)
95
80
  if new_df.empty and not should_overwrite:
96
81
  return
97
82
 
98
- # Clean new_df
99
83
  new_df = (
100
84
  new_df.get("path", pd.Series(dtype=str))
101
- .dropna().astype(str)
102
- .loc[lambda s: s.str.strip().astype(bool)]
103
- .to_frame()
85
+ .dropna().astype(str)
86
+ .loc[lambda s: s.str.strip().astype(bool)]
87
+ .to_frame()
104
88
  )
105
89
 
106
- # Merge or overwrite
107
90
  if should_overwrite:
108
91
  out_df = new_df
109
92
  else:
@@ -111,9 +94,9 @@ class MissingManifestManager:
111
94
  old_df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
112
95
  old_paths = (
113
96
  old_df.get("path", pd.Series(dtype=str))
114
- .dropna().astype(str)
115
- .loc[lambda s: s.str.strip().astype(bool)]
116
- .to_frame()
97
+ .dropna().astype(str)
98
+ .loc[lambda s: s.str.strip().astype(bool)]
99
+ .to_frame()
117
100
  )
118
101
  out_df = pd.concat([old_paths, new_df], ignore_index=True)
119
102
  except Exception as e:
@@ -122,14 +105,12 @@ class MissingManifestManager:
122
105
 
123
106
  out_df = out_df.drop_duplicates(subset=["path"]).reset_index(drop=True)
124
107
 
125
- # Ensure parent dir
126
108
  parent = self.manifest_path.rsplit("/", 1)[0]
127
109
  try:
128
110
  self.fs.makedirs(parent, exist_ok=True)
129
111
  except Exception as e:
130
112
  self.logger.warning(f"Could not create manifest directory '{parent}': {e}")
131
113
 
132
- # Write atomically: temp file + rename
133
114
  temp_path = f"{self.manifest_path}.tmp-{uuid.uuid4().hex}"
134
115
  try:
135
116
  out_df.to_parquet(
@@ -137,18 +118,369 @@ class MissingManifestManager:
137
118
  filesystem=self.fs,
138
119
  index=False
139
120
  )
140
- # rename into place (atomic in most filesystems)
141
- self.fs.mv(temp_path, self.manifest_path, recursive=False)
121
+ self.fs.copy(temp_path, self.manifest_path)
122
+ self.logger.info(f"Copied manifest to {self.manifest_path} (temp: {temp_path})")
142
123
  except Exception as e:
143
- self.logger.error(f"Failed to write or rename manifest: {e}")
144
- # Clean up temp if it exists
145
- try:
146
- if self.fs.exists(temp_path):
147
- self.fs.rm(temp_path, recursive=True)
148
- except Exception:
149
- pass
124
+ self.logger.error(f"Failed to write or copy manifest: {e}")
150
125
  raise
151
126
 
152
- # Reset memory & cache
127
+ self.logger.debug(f"Temp file left behind: {temp_path}")
153
128
  self._new_records.clear()
154
- self._loaded_paths = set(out_df["path"].tolist())
129
+ self._loaded_paths = set(out_df["path"].tolist())
130
+
131
+ def cleanup_temp_manifests(self) -> None:
132
+ if not hasattr(self.fs, "s3"):
133
+ self.logger.info("Filesystem is not s3fs; skipping temp cleanup.")
134
+ return
135
+
136
+ try:
137
+ bucket, prefix = self._parse_s3_path(self.manifest_path.rsplit("/", 1)[0])
138
+ files = self.fs.ls(f"s3://{bucket}/{prefix}", detail=True)
139
+ temp_files = [
140
+ f for f in files
141
+ if f["name"].endswith(".parquet") and ".tmp-" in f["name"]
142
+ ]
143
+ if not temp_files:
144
+ return
145
+
146
+ objects = [{"Key": f["name"].replace(f"{bucket}/", "", 1)} for f in temp_files]
147
+ delete_payload = {
148
+ "Objects": objects,
149
+ "Quiet": True
150
+ }
151
+
152
+ json_payload = json.dumps(delete_payload).encode("utf-8")
153
+ content_md5 = base64.b64encode(hashlib.md5(json_payload).digest()).decode("utf-8")
154
+
155
+ self.fs.s3.meta.client.delete_objects(
156
+ Bucket=bucket,
157
+ Delete=delete_payload,
158
+ ContentMD5=content_md5
159
+ )
160
+ self.logger.info(f"Deleted {len(objects)} temp manifest files in s3://{bucket}/{prefix}")
161
+ except Exception as e:
162
+ self.logger.error(f"Failed to cleanup temp manifest files: {e}")
163
+
164
+ @staticmethod
165
+ def _parse_s3_path(s3_path: str):
166
+ if not s3_path.startswith("s3://"):
167
+ raise ValueError("Invalid S3 path. Must start with 's3://'.")
168
+ path_parts = s3_path[5:].split("/", 1)
169
+ bucket_name = path_parts[0]
170
+ prefix = path_parts[1] if len(path_parts) > 1 else ""
171
+ return bucket_name, prefix
172
+
173
+ # import pandas as pd
174
+ # import fsspec
175
+ # import threading
176
+ # import uuid
177
+ # from typing import List, Optional, Set, Dict, Any
178
+ #
179
+ # from sibi_dst.utils import Logger
180
+ #
181
+ #
182
+ # class MissingManifestManager:
183
+ # """
184
+ # Thread-safe manager for a “missing-partitions” manifest (Parquet file).
185
+ # """
186
+ #
187
+ # def __init__(
188
+ # self,
189
+ # fs: fsspec.AbstractFileSystem,
190
+ # manifest_path: str,
191
+ # clear_existing: bool = False,
192
+ # **kwargs: Any,
193
+ # ):
194
+ # self.fs = fs
195
+ # self.manifest_path = manifest_path.rstrip("/")
196
+ # self.clear_existing = clear_existing
197
+ #
198
+ # self.debug: bool = kwargs.get("debug", False)
199
+ # self.logger = kwargs.get(
200
+ # "logger",
201
+ # Logger.default_logger(logger_name="missing_manifest_manager")
202
+ # )
203
+ # self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
204
+ #
205
+ # # In-memory list for new paths
206
+ # self._new_records: List[Dict[str, str]] = []
207
+ # # Cached set of existing paths
208
+ # self._loaded_paths: Optional[Set[str]] = None
209
+ #
210
+ # # Use a reentrant lock so save() can call load_existing() safely
211
+ # self._lock = threading.RLock()
212
+ #
213
+ # def _safe_exists(self, path: str) -> bool:
214
+ # try:
215
+ # return self.fs.exists(path)
216
+ # except PermissionError:
217
+ # if self.debug:
218
+ # self.logger.debug(f"Permission denied checking existence of '{path}'")
219
+ # return False
220
+ # except Exception as e:
221
+ # self.logger.warning(f"Error checking existence of '{path}': {e}")
222
+ # return False
223
+ #
224
+ # def load_existing(self) -> Set[str]:
225
+ # """
226
+ # Load and cache existing manifest paths.
227
+ # """
228
+ # with self._lock:
229
+ # if self._loaded_paths is not None:
230
+ # return self._loaded_paths
231
+ #
232
+ # if not self._safe_exists(self.manifest_path):
233
+ # self._loaded_paths = set()
234
+ # return self._loaded_paths
235
+ #
236
+ # try:
237
+ # df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
238
+ # paths = (
239
+ # df.get("path", pd.Series(dtype=str))
240
+ # .dropna().astype(str)
241
+ # .loc[lambda s: s.str.strip().astype(bool)]
242
+ # )
243
+ # self._loaded_paths = set(paths.tolist())
244
+ # except Exception as e:
245
+ # self.logger.warning(f"Failed to load manifest '{self.manifest_path}': {e}")
246
+ # self._loaded_paths = set()
247
+ #
248
+ # return self._loaded_paths
249
+ #
250
+ # def record(self, full_path: str) -> None:
251
+ # """
252
+ # Register a missing file path.
253
+ # """
254
+ # if not full_path or not isinstance(full_path, str):
255
+ # return
256
+ # with self._lock:
257
+ # self._new_records.append({"path": full_path})
258
+ #
259
+ # def save(self) -> None:
260
+ # """
261
+ # Merge new records into the manifest and write it out atomically.
262
+ # """
263
+ # with self._lock:
264
+ # # Build DataFrame of new entries
265
+ # new_df = pd.DataFrame(self._new_records)
266
+ # should_overwrite = self.clear_existing or not self._safe_exists(self.manifest_path)
267
+ # if new_df.empty and not should_overwrite:
268
+ # return
269
+ #
270
+ # # Clean new_df
271
+ # new_df = (
272
+ # new_df.get("path", pd.Series(dtype=str))
273
+ # .dropna().astype(str)
274
+ # .loc[lambda s: s.str.strip().astype(bool)]
275
+ # .to_frame()
276
+ # )
277
+ #
278
+ # # Merge or overwrite
279
+ # if should_overwrite:
280
+ # out_df = new_df
281
+ # else:
282
+ # try:
283
+ # old_df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
284
+ # old_paths = (
285
+ # old_df.get("path", pd.Series(dtype=str))
286
+ # .dropna().astype(str)
287
+ # .loc[lambda s: s.str.strip().astype(bool)]
288
+ # .to_frame()
289
+ # )
290
+ # out_df = pd.concat([old_paths, new_df], ignore_index=True)
291
+ # except Exception as e:
292
+ # self.logger.warning(f"Could not merge manifest, overwriting: {e}")
293
+ # out_df = new_df
294
+ #
295
+ # out_df = out_df.drop_duplicates(subset=["path"]).reset_index(drop=True)
296
+ #
297
+ # # Ensure parent dir
298
+ # parent = self.manifest_path.rsplit("/", 1)[0]
299
+ # try:
300
+ # self.fs.makedirs(parent, exist_ok=True)
301
+ # except Exception as e:
302
+ # self.logger.warning(f"Could not create manifest directory '{parent}': {e}")
303
+ #
304
+ # # Write atomically: temp file + rename
305
+ # temp_path = f"{self.manifest_path}.tmp-{uuid.uuid4().hex}"
306
+ # try:
307
+ # out_df.to_parquet(
308
+ # temp_path,
309
+ # filesystem=self.fs,
310
+ # index=False
311
+ # )
312
+ # # rename into place (atomic in most filesystems)
313
+ # #self.fs.mv(temp_path, self.manifest_path, recursive=False)
314
+ # try:
315
+ # self.fs.copy(temp_path, self.manifest_path)
316
+ # self.fs.rm(temp_path)
317
+ # except Exception as e:
318
+ # self.logger.error(f"Failed to copy or delete manifest: {e}")
319
+ # raise
320
+ # except Exception as e:
321
+ # self.logger.error(f"Failed to write or rename manifest: {e}")
322
+ # # Clean up temp if it exists
323
+ # try:
324
+ # if self.fs.exists(temp_path):
325
+ # self.fs.rm(temp_path, recursive=True)
326
+ # except Exception:
327
+ # pass
328
+ # raise
329
+ #
330
+ # # Reset memory & cache
331
+ # self._new_records.clear()
332
+ # self._loaded_paths = set(out_df["path"].tolist())
333
+ # import pandas as pd
334
+ # import fsspec
335
+ # import threading
336
+ # import uuid
337
+ # from typing import List, Optional, Set, Dict, Any
338
+ #
339
+ # from sibi_dst.utils import Logger
340
+ #
341
+ #
342
+ # class MissingManifestManager:
343
+ # """
344
+ # Thread-safe manager for a “missing-partitions” manifest (Parquet file).
345
+ # """
346
+ #
347
+ # def __init__(
348
+ # self,
349
+ # fs: fsspec.AbstractFileSystem,
350
+ # manifest_path: str,
351
+ # clear_existing: bool = False,
352
+ # **kwargs: Any,
353
+ # ):
354
+ # self.fs = fs
355
+ # self.manifest_path = manifest_path.rstrip("/")
356
+ # self.clear_existing = clear_existing
357
+ #
358
+ # self.debug: bool = kwargs.get("debug", False)
359
+ # self.logger = kwargs.get(
360
+ # "logger",
361
+ # Logger.default_logger(logger_name="missing_manifest_manager")
362
+ # )
363
+ # self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
364
+ #
365
+ # # In-memory list for new paths
366
+ # self._new_records: List[Dict[str, str]] = []
367
+ # # Cached set of existing paths
368
+ # self._loaded_paths: Optional[Set[str]] = None
369
+ #
370
+ # # Use a reentrant lock so save() can call load_existing() safely
371
+ # self._lock = threading.RLock()
372
+ #
373
+ # def _safe_exists(self, path: str) -> bool:
374
+ # try:
375
+ # return self.fs.exists(path)
376
+ # except PermissionError:
377
+ # if self.debug:
378
+ # self.logger.debug(f"Permission denied checking existence of '{path}'")
379
+ # return False
380
+ # except Exception as e:
381
+ # self.logger.warning(f"Error checking existence of '{path}': {e}")
382
+ # return False
383
+ #
384
+ # def load_existing(self) -> Set[str]:
385
+ # """
386
+ # Load and cache existing manifest paths.
387
+ # """
388
+ # with self._lock:
389
+ # if self._loaded_paths is not None:
390
+ # return self._loaded_paths
391
+ #
392
+ # if not self._safe_exists(self.manifest_path):
393
+ # self._loaded_paths = set()
394
+ # return self._loaded_paths
395
+ #
396
+ # try:
397
+ # df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
398
+ # paths = (
399
+ # df.get("path", pd.Series(dtype=str))
400
+ # .dropna().astype(str)
401
+ # .loc[lambda s: s.str.strip().astype(bool)]
402
+ # )
403
+ # self._loaded_paths = set(paths.tolist())
404
+ # except Exception as e:
405
+ # self.logger.warning(f"Failed to load manifest '{self.manifest_path}': {e}")
406
+ # self._loaded_paths = set()
407
+ #
408
+ # return self._loaded_paths
409
+ #
410
+ # def record(self, full_path: str) -> None:
411
+ # """
412
+ # Register a missing file path.
413
+ # """
414
+ # if not full_path or not isinstance(full_path, str):
415
+ # return
416
+ # with self._lock:
417
+ # self._new_records.append({"path": full_path})
418
+ #
419
+ # def save(self) -> None:
420
+ # """
421
+ # Merge new records into the manifest and write it out atomically.
422
+ # """
423
+ # with self._lock:
424
+ # # Build DataFrame of new entries
425
+ # new_df = pd.DataFrame(self._new_records)
426
+ # should_overwrite = self.clear_existing or not self._safe_exists(self.manifest_path)
427
+ # if new_df.empty and not should_overwrite:
428
+ # return
429
+ #
430
+ # # Clean new_df
431
+ # new_df = (
432
+ # new_df.get("path", pd.Series(dtype=str))
433
+ # .dropna().astype(str)
434
+ # .loc[lambda s: s.str.strip().astype(bool)]
435
+ # .to_frame()
436
+ # )
437
+ #
438
+ # # Merge or overwrite
439
+ # if should_overwrite:
440
+ # out_df = new_df
441
+ # else:
442
+ # try:
443
+ # old_df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
444
+ # old_paths = (
445
+ # old_df.get("path", pd.Series(dtype=str))
446
+ # .dropna().astype(str)
447
+ # .loc[lambda s: s.str.strip().astype(bool)]
448
+ # .to_frame()
449
+ # )
450
+ # out_df = pd.concat([old_paths, new_df], ignore_index=True)
451
+ # except Exception as e:
452
+ # self.logger.warning(f"Could not merge manifest, overwriting: {e}")
453
+ # out_df = new_df
454
+ #
455
+ # out_df = out_df.drop_duplicates(subset=["path"]).reset_index(drop=True)
456
+ #
457
+ # # Ensure parent dir
458
+ # parent = self.manifest_path.rsplit("/", 1)[0]
459
+ # try:
460
+ # self.fs.makedirs(parent, exist_ok=True)
461
+ # except Exception as e:
462
+ # self.logger.warning(f"Could not create manifest directory '{parent}': {e}")
463
+ #
464
+ # # Write atomically: temp file + rename
465
+ # temp_path = f"{self.manifest_path}.tmp-{uuid.uuid4().hex}"
466
+ # try:
467
+ # out_df.to_parquet(
468
+ # temp_path,
469
+ # filesystem=self.fs,
470
+ # index=False
471
+ # )
472
+ # # rename into place (atomic in most filesystems)
473
+ # self.fs.mv(temp_path, self.manifest_path, recursive=False)
474
+ # except Exception as e:
475
+ # self.logger.error(f"Failed to write or rename manifest: {e}")
476
+ # # Clean up temp if it exists
477
+ # try:
478
+ # if self.fs.exists(temp_path):
479
+ # self.fs.rm(temp_path, recursive=True)
480
+ # except Exception:
481
+ # pass
482
+ # raise
483
+ #
484
+ # # Reset memory & cache
485
+ # self._new_records.clear()
486
+ # self._loaded_paths = set(out_df["path"].tolist())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.60
3
+ Version: 0.3.62
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -44,13 +44,13 @@ sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcW
44
44
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
45
45
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
46
46
  sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
47
- sibi_dst/utils/data_wrapper.py,sha256=DFkqi84DIGxcrf36FfbgmeF9Hu7PZjMO9otNerV8ZYk,10546
47
+ sibi_dst/utils/data_wrapper.py,sha256=o7zAdiUl2ohXhISYi4cv7osxppdongNh-A_OXTs-CXU,9641
48
48
  sibi_dst/utils/date_utils.py,sha256=T3ij-WOQu3cIfmNAweSVMWWr-hVtuBcTGjEY-cMJIvU,18627
49
49
  sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
50
50
  sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
51
51
  sibi_dst/utils/filepath_generator.py,sha256=-HHO0U-PR8fysDDFwnWdHRlgqksh_RkmgBZLWv9hM7s,6669
52
52
  sibi_dst/utils/log_utils.py,sha256=77xACRagKU83H9vn7aVeBzkQjxWlbe4dg4KuxPRCgvw,4635
53
- sibi_dst/utils/manifest_manager.py,sha256=abm97TuWgJqNViPXMbpl5W7ttrg1BeiJkf2SMGc4hd8,5512
53
+ sibi_dst/utils/manifest_manager.py,sha256=eyk6Dvrn86gUpAaAsnQvNnEJn5-Tno-sDDJsDMfHtTA,18161
54
54
  sibi_dst/utils/parquet_saver.py,sha256=O62xwPfphOpKgEiHqnts20CPSU96pxs49Cg7PVetLK0,8193
55
55
  sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
56
56
  sibi_dst/utils/storage_config.py,sha256=TE15H-7d0mqwYPSUgrdidK9U7N7p87Z8JfUQH4-jdPs,4123
@@ -77,6 +77,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
77
77
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
78
78
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
79
79
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
80
- sibi_dst-0.3.60.dist-info/METADATA,sha256=WuBvzuHMuZBUpHF2-dAO65MI1e5EhuZ1-Hvil3oQY6o,4292
81
- sibi_dst-0.3.60.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
82
- sibi_dst-0.3.60.dist-info/RECORD,,
80
+ sibi_dst-0.3.62.dist-info/METADATA,sha256=wqtWlXfA57ZXflNzp6WYvnHWyZ0JkDu8P1gxmEqF3rs,4292
81
+ sibi_dst-0.3.62.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
82
+ sibi_dst-0.3.62.dist-info/RECORD,,