sibi-dst 0.3.61__py3-none-any.whl → 0.3.63__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -158,6 +158,7 @@ class ParquetArtifact(DfHelper):
|
|
158
158
|
if not manifest_exists:
|
159
159
|
self.logger.info(f"Creating new manifest at {self.missing_manifest_path}")
|
160
160
|
self.mmanifest.save()
|
161
|
+
self.mmanifest.cleanup_temp_manifests()
|
161
162
|
else:
|
162
163
|
self.logger.info(f"Manifest already exists at {self.missing_manifest_path}")
|
163
164
|
|
@@ -206,6 +207,7 @@ class ParquetArtifact(DfHelper):
|
|
206
207
|
try:
|
207
208
|
if self.mmanifest and self.mmanifest._new_records:
|
208
209
|
self.mmanifest.save()
|
210
|
+
self.mmanifest.cleanup_temp_manifests()
|
209
211
|
if getattr(self, "_entered", False) and self.fs and self._own_fs:
|
210
212
|
self.fs.close()
|
211
213
|
except Exception as e:
|
sibi_dst/utils/data_wrapper.py
CHANGED
@@ -30,48 +30,28 @@ class DataWrapper:
|
|
30
30
|
date_field: str,
|
31
31
|
data_path: str,
|
32
32
|
parquet_filename: str,
|
33
|
-
#start_date: Any,
|
34
|
-
#end_date: Any,
|
35
33
|
fs: Optional[fsspec.AbstractFileSystem] = None,
|
36
|
-
#filesystem_type: str = "file",
|
37
|
-
#filesystem_options: Optional[Dict] = None,
|
38
34
|
debug: bool = False,
|
39
35
|
verbose: bool = False,
|
40
36
|
class_params: Optional[Dict] = None,
|
41
37
|
load_params: Optional[Dict] = None,
|
42
|
-
#reverse_order: bool = False,
|
43
|
-
#overwrite: bool = False,
|
44
|
-
#ignore_missing: bool = False,
|
45
38
|
logger: Logger = None,
|
46
|
-
#max_age_minutes: int = DEFAULT_MAX_AGE_MINUTES,
|
47
|
-
#history_days_threshold: int = DEFAULT_HISTORY_DAYS_THRESHOLD,
|
48
39
|
show_progress: bool = False,
|
49
|
-
timeout: float =
|
50
|
-
|
51
|
-
#custom_priority_map: Dict[str, int] = None,
|
52
|
-
max_threads: int = 3,
|
40
|
+
timeout: float = 30,
|
41
|
+
max_threads: int = 1,
|
53
42
|
**kwargs: Any,
|
54
43
|
):
|
55
44
|
self.dataclass = dataclass
|
56
45
|
self.date_field = date_field
|
57
46
|
self.data_path = self._ensure_forward_slash(data_path)
|
58
47
|
self.parquet_filename = parquet_filename
|
59
|
-
#self.filesystem_type = filesystem_type
|
60
|
-
#self.filesystem_options = filesystem_options or {}
|
61
48
|
self.fs = fs or None
|
62
49
|
self.debug = debug
|
63
50
|
self.verbose = verbose
|
64
|
-
# self.reverse_order = reverse_order
|
65
|
-
# self.overwrite = overwrite
|
66
|
-
# self.ignore_missing = ignore_missing
|
67
51
|
self.logger = logger or Logger.default_logger(logger_name=self.dataclass.__name__)
|
68
52
|
self.logger.set_level(logging.DEBUG if debug else logging.INFO)
|
69
|
-
# self.max_age_minutes = max_age_minutes
|
70
|
-
# self.history_days_threshold = history_days_threshold
|
71
53
|
self.show_progress = show_progress
|
72
54
|
self.timeout = timeout
|
73
|
-
#self.reference_date = reference_date or datetime.date.today()
|
74
|
-
#self.priority_map = custom_priority_map or self.DEFAULT_PRIORITY_MAP
|
75
55
|
self.max_threads = max_threads
|
76
56
|
self.class_params = class_params or {
|
77
57
|
'debug': self.debug,
|
@@ -95,6 +75,7 @@ class DataWrapper:
|
|
95
75
|
"""Context manager exit"""
|
96
76
|
if self.mmanifest and self.mmanifest._new_records:
|
97
77
|
self.mmanifest.save()
|
78
|
+
self.mmanifest.cleanup_temp_manifests()
|
98
79
|
if exc_type is not None:
|
99
80
|
self.logger.error(f"Exception occurred: {exc_val}")
|
100
81
|
return False
|
@@ -185,9 +166,9 @@ class DataWrapper:
|
|
185
166
|
path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
|
186
167
|
self.logger.info(f"Processing date {date.isoformat()} for {path}")
|
187
168
|
# self.logger.info(f"Path {path} in {self.skipped}: {path in self.skipped}")
|
188
|
-
|
189
|
-
|
190
|
-
|
169
|
+
if path in self.update_planner.skipped and self.update_planner.ignore_missing:
|
170
|
+
self.logger.info(f"Skipping {date} as it exists in the skipped list")
|
171
|
+
return
|
191
172
|
full_path = f"{path}{self.parquet_filename}"
|
192
173
|
|
193
174
|
thread_name = threading.current_thread().name
|
@@ -254,5 +235,5 @@ class DataWrapper:
|
|
254
235
|
self.logger.info("No benchmarking data to show")
|
255
236
|
return
|
256
237
|
df_bench = pd.DataFrame.from_records([{"date": d, **m} for d, m in self.benchmarks.items()])
|
257
|
-
df_bench = df_bench.set_index("date").sort_index(ascending=not self.reverse_order)
|
238
|
+
df_bench = df_bench.set_index("date").sort_index(ascending=not self.update_planner.reverse_order)
|
258
239
|
self.logger.info("Benchmark Summary:\n" + df_bench.to_string())
|
@@ -2,7 +2,7 @@ sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
|
|
2
2
|
sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
|
3
3
|
sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=-Y4i5KAxKY2BNkmoVeMEZxjTFD7zaM9oQ0aRsvUbQrs,9340
|
4
4
|
sibi_dst/df_helper/_df_helper.py,sha256=uKP6i-7dasZQ5zViD8-VJU0lNHumrdZG6IXvDFijZ18,31214
|
5
|
-
sibi_dst/df_helper/_parquet_artifact.py,sha256=
|
5
|
+
sibi_dst/df_helper/_parquet_artifact.py,sha256=Nio5GSD6rTYl52nf_TSpQhYIF0hKqRrB3H3A4zYnaG8,14987
|
6
6
|
sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
|
7
7
|
sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
|
@@ -44,7 +44,7 @@ sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcW
|
|
44
44
|
sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
|
45
45
|
sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
|
46
46
|
sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
|
47
|
-
sibi_dst/utils/data_wrapper.py,sha256=
|
47
|
+
sibi_dst/utils/data_wrapper.py,sha256=Tb9bHIHI6qVsdH791BOFN1VrPb-7GS4fHhhHV8hktec,9641
|
48
48
|
sibi_dst/utils/date_utils.py,sha256=T3ij-WOQu3cIfmNAweSVMWWr-hVtuBcTGjEY-cMJIvU,18627
|
49
49
|
sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
|
50
50
|
sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
|
@@ -77,6 +77,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
|
|
77
77
|
sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
|
78
78
|
sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
|
79
79
|
sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
|
80
|
-
sibi_dst-0.3.
|
81
|
-
sibi_dst-0.3.
|
82
|
-
sibi_dst-0.3.
|
80
|
+
sibi_dst-0.3.63.dist-info/METADATA,sha256=ZsVn8AeFIUeVrb0Ybxmjk393FdUyn2j2fOnGQ8MXM1k,4292
|
81
|
+
sibi_dst-0.3.63.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
82
|
+
sibi_dst-0.3.63.dist-info/RECORD,,
|
File without changes
|