sibi-dst 0.3.61__py3-none-any.whl → 0.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -158,6 +158,7 @@ class ParquetArtifact(DfHelper):
158
158
  if not manifest_exists:
159
159
  self.logger.info(f"Creating new manifest at {self.missing_manifest_path}")
160
160
  self.mmanifest.save()
161
+ self.mmanifest.cleanup_temp_manifests()
161
162
  else:
162
163
  self.logger.info(f"Manifest already exists at {self.missing_manifest_path}")
163
164
 
@@ -206,6 +207,7 @@ class ParquetArtifact(DfHelper):
206
207
  try:
207
208
  if self.mmanifest and self.mmanifest._new_records:
208
209
  self.mmanifest.save()
210
+ self.mmanifest.cleanup_temp_manifests()
209
211
  if getattr(self, "_entered", False) and self.fs and self._own_fs:
210
212
  self.fs.close()
211
213
  except Exception as e:
@@ -30,48 +30,28 @@ class DataWrapper:
30
30
  date_field: str,
31
31
  data_path: str,
32
32
  parquet_filename: str,
33
- #start_date: Any,
34
- #end_date: Any,
35
33
  fs: Optional[fsspec.AbstractFileSystem] = None,
36
- #filesystem_type: str = "file",
37
- #filesystem_options: Optional[Dict] = None,
38
34
  debug: bool = False,
39
35
  verbose: bool = False,
40
36
  class_params: Optional[Dict] = None,
41
37
  load_params: Optional[Dict] = None,
42
- #reverse_order: bool = False,
43
- #overwrite: bool = False,
44
- #ignore_missing: bool = False,
45
38
  logger: Logger = None,
46
- #max_age_minutes: int = DEFAULT_MAX_AGE_MINUTES,
47
- #history_days_threshold: int = DEFAULT_HISTORY_DAYS_THRESHOLD,
48
39
  show_progress: bool = False,
49
- timeout: float = 60,
50
- #reference_date: datetime.date = None,
51
- #custom_priority_map: Dict[str, int] = None,
52
- max_threads: int = 3,
40
+ timeout: float = 30,
41
+ max_threads: int = 1,
53
42
  **kwargs: Any,
54
43
  ):
55
44
  self.dataclass = dataclass
56
45
  self.date_field = date_field
57
46
  self.data_path = self._ensure_forward_slash(data_path)
58
47
  self.parquet_filename = parquet_filename
59
- #self.filesystem_type = filesystem_type
60
- #self.filesystem_options = filesystem_options or {}
61
48
  self.fs = fs or None
62
49
  self.debug = debug
63
50
  self.verbose = verbose
64
- # self.reverse_order = reverse_order
65
- # self.overwrite = overwrite
66
- # self.ignore_missing = ignore_missing
67
51
  self.logger = logger or Logger.default_logger(logger_name=self.dataclass.__name__)
68
52
  self.logger.set_level(logging.DEBUG if debug else logging.INFO)
69
- # self.max_age_minutes = max_age_minutes
70
- # self.history_days_threshold = history_days_threshold
71
53
  self.show_progress = show_progress
72
54
  self.timeout = timeout
73
- #self.reference_date = reference_date or datetime.date.today()
74
- #self.priority_map = custom_priority_map or self.DEFAULT_PRIORITY_MAP
75
55
  self.max_threads = max_threads
76
56
  self.class_params = class_params or {
77
57
  'debug': self.debug,
@@ -95,6 +75,7 @@ class DataWrapper:
95
75
  """Context manager exit"""
96
76
  if self.mmanifest and self.mmanifest._new_records:
97
77
  self.mmanifest.save()
78
+ self.mmanifest.cleanup_temp_manifests()
98
79
  if exc_type is not None:
99
80
  self.logger.error(f"Exception occurred: {exc_val}")
100
81
  return False
@@ -185,9 +166,9 @@ class DataWrapper:
185
166
  path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
186
167
  self.logger.info(f"Processing date {date.isoformat()} for {path}")
187
168
  # self.logger.info(f"Path {path} in {self.skipped}: {path in self.skipped}")
188
- #if path in self.skipped:
189
- # self.logger.info(f"Skipping {date} as it exists in the skipped list")
190
- # return
169
+ if path in self.update_planner.skipped and self.update_planner.ignore_missing:
170
+ self.logger.info(f"Skipping {date} as it exists in the skipped list")
171
+ return
191
172
  full_path = f"{path}{self.parquet_filename}"
192
173
 
193
174
  thread_name = threading.current_thread().name
@@ -254,5 +235,5 @@ class DataWrapper:
254
235
  self.logger.info("No benchmarking data to show")
255
236
  return
256
237
  df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
257
- df_bench = df_bench.set_index("date").sort_index(ascending=not self.reverse_order)
238
+ df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
258
239
  self.logger.info("Benchmark Summary:\n" + df_bench.to_string())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.61
3
+ Version: 0.3.63
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -2,7 +2,7 @@ sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
2
2
  sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
3
3
  sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=-Y4i5KAxKY2BNkmoVeMEZxjTFD7zaM9oQ0aRsvUbQrs,9340
4
4
  sibi_dst/df_helper/_df_helper.py,sha256=uKP6i-7dasZQ5zViD8-VJU0lNHumrdZG6IXvDFijZ18,31214
5
- sibi_dst/df_helper/_parquet_artifact.py,sha256=6y8nJ-HDAdmy3XNSvnEdA2zBXDhUIVoUeKgXLmVMGCo,14879
5
+ sibi_dst/df_helper/_parquet_artifact.py,sha256=Nio5GSD6rTYl52nf_TSpQhYIF0hKqRrB3H3A4zYnaG8,14987
6
6
  sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
7
7
  sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
@@ -44,7 +44,7 @@ sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcW
44
44
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
45
45
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
46
46
  sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
47
- sibi_dst/utils/data_wrapper.py,sha256=DFkqi84DIGxcrf36FfbgmeF9Hu7PZjMO9otNerV8ZYk,10546
47
+ sibi_dst/utils/data_wrapper.py,sha256=Tb9bHIHI6qVsdH791BOFN1VrPb-7GS4fHhhHV8hktec,9641
48
48
  sibi_dst/utils/date_utils.py,sha256=T3ij-WOQu3cIfmNAweSVMWWr-hVtuBcTGjEY-cMJIvU,18627
49
49
  sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
50
50
  sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
@@ -77,6 +77,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
77
77
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
78
78
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
79
79
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
80
- sibi_dst-0.3.61.dist-info/METADATA,sha256=GZ-Yz9oiehgGgI2iJoCejdExgtclAlaz-N-sI5hGIi0,4292
81
- sibi_dst-0.3.61.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
82
- sibi_dst-0.3.61.dist-info/RECORD,,
80
+ sibi_dst-0.3.63.dist-info/METADATA,sha256=ZsVn8AeFIUeVrb0Ybxmjk393FdUyn2j2fOnGQ8MXM1k,4292
81
+ sibi_dst-0.3.63.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
82
+ sibi_dst-0.3.63.dist-info/RECORD,,