deltacat 1.1.21__py3-none-any.whl → 1.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/s3u.py +2 -2
- deltacat/compute/compactor/utils/round_completion_file.py +1 -1
- deltacat/compute/compactor_v2/private/compaction_utils.py +12 -1
- deltacat/compute/compactor_v2/utils/content_type_params.py +6 -4
- deltacat/compute/resource_estimation/delta.py +16 -2
- deltacat/io/file_object_store.py +16 -1
- deltacat/io/memcached_object_store.py +45 -7
- deltacat/io/object_store.py +14 -0
- deltacat/io/redis_object_store.py +32 -4
- deltacat/io/s3_object_store.py +17 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +50 -0
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +92 -76
- deltacat/tests/compute/test_compact_partition_rebase.py +88 -73
- deltacat/tests/io/test_file_object_store.py +44 -14
- deltacat/tests/io/test_memcached_object_store.py +40 -0
- deltacat/tests/io/test_redis_object_store.py +20 -0
- deltacat/tests/io/test_s3_object_store.py +9 -0
- {deltacat-1.1.21.dist-info → deltacat-1.1.23.dist-info}/METADATA +2 -2
- {deltacat-1.1.21.dist-info → deltacat-1.1.23.dist-info}/RECORD +23 -23
- {deltacat-1.1.21.dist-info → deltacat-1.1.23.dist-info}/LICENSE +0 -0
- {deltacat-1.1.21.dist-info → deltacat-1.1.23.dist-info}/WHEEL +0 -0
- {deltacat-1.1.21.dist-info → deltacat-1.1.23.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
deltacat/aws/s3u.py
CHANGED
@@ -291,7 +291,7 @@ def read_file(
|
|
291
291
|
f"Retry download for: {s3_url} after receiving {type(e).__name__}"
|
292
292
|
) from e
|
293
293
|
except BaseException as e:
|
294
|
-
logger.
|
294
|
+
logger.warning(
|
295
295
|
f"Read has failed for {s3_url} and content_type={content_type} "
|
296
296
|
f"and encoding={content_encoding}. Error: {e}",
|
297
297
|
exc_info=True,
|
@@ -416,7 +416,7 @@ def upload_table(
|
|
416
416
|
f"Retry upload for: {s3_url} after receiving {type(e).__name__}",
|
417
417
|
) from e
|
418
418
|
except BaseException as e:
|
419
|
-
logger.
|
419
|
+
logger.warning(
|
420
420
|
f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
|
421
421
|
exc_info=True,
|
422
422
|
)
|
@@ -63,7 +63,7 @@ def read_round_completion_file(
|
|
63
63
|
logger.info(f"Read round completion info: {round_completion_info}")
|
64
64
|
break
|
65
65
|
else:
|
66
|
-
logger.
|
66
|
+
logger.warning(f"Round completion file not present at {rcf_uri}")
|
67
67
|
|
68
68
|
return round_completion_info
|
69
69
|
|
@@ -227,6 +227,7 @@ def _run_hash_and_merge(
|
|
227
227
|
previous_compacted_delta_manifest: Optional[Manifest],
|
228
228
|
compacted_partition: Partition,
|
229
229
|
) -> List[MergeResult]:
|
230
|
+
created_obj_ids = set()
|
230
231
|
telemetry_time_hb = 0
|
231
232
|
total_input_records_count = np.int64(0)
|
232
233
|
total_hb_record_count = np.int64(0)
|
@@ -288,6 +289,7 @@ def _run_hash_and_merge(
|
|
288
289
|
hb_result.hash_bucket_group_to_obj_id_tuple
|
289
290
|
):
|
290
291
|
if object_id_size_tuple:
|
292
|
+
created_obj_ids.add(object_id_size_tuple[0])
|
291
293
|
all_hash_group_idx_to_obj_id[hash_group_index].append(
|
292
294
|
object_id_size_tuple[0],
|
293
295
|
)
|
@@ -365,7 +367,16 @@ def _run_hash_and_merge(
|
|
365
367
|
mutable_compaction_audit.set_telemetry_time_in_seconds(
|
366
368
|
telemetry_this_round + previous_telemetry
|
367
369
|
)
|
368
|
-
params.
|
370
|
+
if params.num_rounds > 1:
|
371
|
+
logger.info(
|
372
|
+
f"Detected number of rounds to be {params.num_rounds}, "
|
373
|
+
f"preparing to delete {len(created_obj_ids)} objects from object store..."
|
374
|
+
)
|
375
|
+
params.object_store.delete_many(list(created_obj_ids))
|
376
|
+
else:
|
377
|
+
logger.info(
|
378
|
+
f"Detected number of rounds to be {params.num_rounds}, not cleaning up object store..."
|
379
|
+
)
|
369
380
|
|
370
381
|
return merge_results
|
371
382
|
|
@@ -97,7 +97,7 @@ def append_content_type_params(
|
|
97
97
|
max_parquet_meta_size_bytes: Optional[int] = MAX_PARQUET_METADATA_SIZE,
|
98
98
|
deltacat_storage=unimplemented_deltacat_storage,
|
99
99
|
deltacat_storage_kwargs: Optional[Dict[str, Any]] = {},
|
100
|
-
) ->
|
100
|
+
) -> bool:
|
101
101
|
"""
|
102
102
|
This operation appends content type params into the delta entry. Note
|
103
103
|
that this operation can be time consuming, hence we cache it in a Ray actor.
|
@@ -105,7 +105,7 @@ def append_content_type_params(
|
|
105
105
|
|
106
106
|
if not delta.meta:
|
107
107
|
logger.warning(f"Delta with locator {delta.locator} doesn't contain meta.")
|
108
|
-
return
|
108
|
+
return False
|
109
109
|
|
110
110
|
entry_indices_to_download = []
|
111
111
|
for entry_index, entry in enumerate(delta.manifest.entries):
|
@@ -120,7 +120,7 @@ def append_content_type_params(
|
|
120
120
|
logger.info(
|
121
121
|
f"No parquet type params to download for delta with locator {delta.locator}."
|
122
122
|
)
|
123
|
-
return
|
123
|
+
return False
|
124
124
|
|
125
125
|
ray_namespace = ray.get_runtime_context().namespace
|
126
126
|
logger.info(
|
@@ -147,7 +147,7 @@ def append_content_type_params(
|
|
147
147
|
f" {delta.locator} and digest {delta.locator.hexdigest()}."
|
148
148
|
)
|
149
149
|
delta.manifest = cached_value.manifest
|
150
|
-
return
|
150
|
+
return True
|
151
151
|
logger.info(
|
152
152
|
f"Cache doesn't contain parquet meta for delta with locator {delta.locator}."
|
153
153
|
)
|
@@ -215,3 +215,5 @@ def append_content_type_params(
|
|
215
215
|
)
|
216
216
|
ray.get(cache.put.remote(delta.locator.hexdigest(), delta))
|
217
217
|
assert ray.get(cache.get.remote(delta.locator.hexdigest())) is not None
|
218
|
+
|
219
|
+
return True
|
@@ -42,7 +42,11 @@ def _estimate_resources_required_to_process_delta_using_previous_inflation(
|
|
42
42
|
in_memory_size = (
|
43
43
|
delta.meta.content_length * estimate_resources_params.previous_inflation
|
44
44
|
)
|
45
|
-
num_rows =
|
45
|
+
num_rows = 0
|
46
|
+
if estimate_resources_params.average_record_size_bytes is not None:
|
47
|
+
num_rows = int(
|
48
|
+
in_memory_size / estimate_resources_params.average_record_size_bytes
|
49
|
+
)
|
46
50
|
|
47
51
|
return EstimatedResources.of(
|
48
52
|
memory_bytes=in_memory_size,
|
@@ -68,6 +72,10 @@ def _estimate_resources_required_to_process_delta_using_type_params(
|
|
68
72
|
), "Number of rows can only be estimated for PYARROW_DOWNLOAD operation"
|
69
73
|
|
70
74
|
if estimate_resources_params.parquet_to_pyarrow_inflation is None:
|
75
|
+
logger.debug(
|
76
|
+
"Could not estimate using type params as "
|
77
|
+
f"parquet_to_pyarrow_inflation is None for {delta.locator}"
|
78
|
+
)
|
71
79
|
return None
|
72
80
|
|
73
81
|
if not delta.manifest:
|
@@ -86,12 +94,18 @@ def _estimate_resources_required_to_process_delta_using_type_params(
|
|
86
94
|
),
|
87
95
|
)
|
88
96
|
|
89
|
-
append_content_type_params(
|
97
|
+
appended = append_content_type_params(
|
90
98
|
delta=delta,
|
91
99
|
deltacat_storage=deltacat_storage,
|
92
100
|
deltacat_storage_kwargs=deltacat_storage_kwargs,
|
93
101
|
)
|
94
102
|
|
103
|
+
if not appended:
|
104
|
+
logger.debug(
|
105
|
+
f"Could not append content type params for {delta.locator}, returning None"
|
106
|
+
)
|
107
|
+
return None
|
108
|
+
|
95
109
|
in_memory_size = 0.0
|
96
110
|
num_rows = 0
|
97
111
|
|
deltacat/io/file_object_store.py
CHANGED
@@ -41,8 +41,23 @@ class FileObjectStore(IObjectStore):
|
|
41
41
|
serialized = f.read()
|
42
42
|
loaded = cloudpickle.loads(serialized)
|
43
43
|
result.append(loaded)
|
44
|
-
os.remove(ref)
|
45
44
|
end = time.monotonic()
|
46
45
|
|
47
46
|
logger.info(f"The total time taken to read all objects is: {end - start}")
|
48
47
|
return result
|
48
|
+
|
49
|
+
def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
|
50
|
+
start = time.monotonic()
|
51
|
+
num_deleted = 0
|
52
|
+
for ref in refs:
|
53
|
+
try:
|
54
|
+
os.remove(ref)
|
55
|
+
num_deleted += 1
|
56
|
+
except Exception:
|
57
|
+
logger.warning(f"Failed to delete ref {ref}!", exc_info=True)
|
58
|
+
end = time.monotonic()
|
59
|
+
|
60
|
+
logger.info(
|
61
|
+
f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
|
62
|
+
)
|
63
|
+
return num_deleted == len(refs)
|
@@ -100,16 +100,10 @@ class MemcachedObjectStore(IObjectStore):
|
|
100
100
|
|
101
101
|
def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
|
102
102
|
result = []
|
103
|
-
refs_per_ip =
|
103
|
+
refs_per_ip = self._get_refs_per_ip(refs)
|
104
104
|
chunks_by_refs = defaultdict(lambda: [])
|
105
105
|
|
106
106
|
start = time.monotonic()
|
107
|
-
for ref in refs:
|
108
|
-
uid, ip, chunk_count = ref.split(self.SEPARATOR)
|
109
|
-
chunk_count = int(chunk_count)
|
110
|
-
for chunk_index in range(chunk_count):
|
111
|
-
current_ref = self._create_ref(uid, ip, chunk_index)
|
112
|
-
refs_per_ip[ip].append(current_ref)
|
113
107
|
|
114
108
|
total_ref_count = 0
|
115
109
|
for (ip, current_refs) in refs_per_ip.items():
|
@@ -193,6 +187,39 @@ class MemcachedObjectStore(IObjectStore):
|
|
193
187
|
|
194
188
|
return cloudpickle.loads(serialized)
|
195
189
|
|
190
|
+
def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
|
191
|
+
refs_per_ip = self._get_refs_per_ip(refs)
|
192
|
+
all_deleted = True
|
193
|
+
|
194
|
+
start = time.monotonic()
|
195
|
+
|
196
|
+
total_refs = 0
|
197
|
+
fully_deleted_refs = 0
|
198
|
+
for (ip, current_refs) in refs_per_ip.items():
|
199
|
+
client = self._get_client_by_ip(ip)
|
200
|
+
total_refs += len(current_refs)
|
201
|
+
try:
|
202
|
+
# always returns true
|
203
|
+
client.delete_many(current_refs, no_reply=self.noreply)
|
204
|
+
fully_deleted_refs += len(current_refs)
|
205
|
+
except Exception:
|
206
|
+
# if an exception is raised then all, some, or none of the keys may have been deleted
|
207
|
+
logger.warning(
|
208
|
+
f"Failed to fully delete refs: {current_refs}", exc_info=True
|
209
|
+
)
|
210
|
+
all_deleted = False
|
211
|
+
|
212
|
+
end = time.monotonic()
|
213
|
+
|
214
|
+
logger.info(
|
215
|
+
f"From {len(refs)} objects, found {total_refs} total chunk references, of which {fully_deleted_refs} were guaranteed to be successfully deleted."
|
216
|
+
)
|
217
|
+
logger.info(
|
218
|
+
f"The total time taken to attempt deleting {len(refs)} objects is: {end - start}"
|
219
|
+
)
|
220
|
+
|
221
|
+
return all_deleted
|
222
|
+
|
196
223
|
def clear(self) -> bool:
|
197
224
|
flushed = all(
|
198
225
|
[
|
@@ -260,3 +287,14 @@ class MemcachedObjectStore(IObjectStore):
|
|
260
287
|
self.current_ip = socket.gethostbyname(socket.gethostname())
|
261
288
|
|
262
289
|
return self.current_ip
|
290
|
+
|
291
|
+
def _get_refs_per_ip(self, refs: List[Any]):
|
292
|
+
refs_per_ip = defaultdict(lambda: [])
|
293
|
+
|
294
|
+
for ref in refs:
|
295
|
+
uid, ip, chunk_count = ref.split(self.SEPARATOR)
|
296
|
+
chunk_count = int(chunk_count)
|
297
|
+
for chunk_index in range(chunk_count):
|
298
|
+
current_ref = self._create_ref(uid, ip, chunk_index)
|
299
|
+
refs_per_ip[ip].append(current_ref)
|
300
|
+
return refs_per_ip
|
deltacat/io/object_store.py
CHANGED
@@ -43,6 +43,19 @@ class IObjectStore:
|
|
43
43
|
or may not return ordered results.
|
44
44
|
"""
|
45
45
|
|
46
|
+
def delete(self, ref: Any, *args, **kwargs) -> bool:
|
47
|
+
"""
|
48
|
+
Delete a single object from the object store.
|
49
|
+
"""
|
50
|
+
return self.delete_many([ref])
|
51
|
+
|
52
|
+
def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
|
53
|
+
...
|
54
|
+
|
55
|
+
"""
|
56
|
+
Delete many objects from the object store.
|
57
|
+
"""
|
58
|
+
|
46
59
|
def clear(self, *args, **kwargs) -> bool:
|
47
60
|
...
|
48
61
|
|
@@ -52,6 +65,7 @@ class IObjectStore:
|
|
52
65
|
|
53
66
|
def close(self, *args, **kwargs) -> None:
|
54
67
|
...
|
68
|
+
|
55
69
|
"""
|
56
70
|
Closes all the active connections to object store without clearing
|
57
71
|
the data in the object store.
|
@@ -56,12 +56,9 @@ class RedisObjectStore(IObjectStore):
|
|
56
56
|
|
57
57
|
def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
|
58
58
|
result = []
|
59
|
-
uid_per_ip =
|
59
|
+
uid_per_ip = self._get_uids_per_ip(refs)
|
60
60
|
|
61
61
|
start = time.monotonic()
|
62
|
-
for ref in refs:
|
63
|
-
uid, ip = ref.split(self.SEPARATOR)
|
64
|
-
uid_per_ip[ip].append(uid)
|
65
62
|
|
66
63
|
for (ip, uids) in uid_per_ip.items():
|
67
64
|
client = self._get_client_by_ip(ip)
|
@@ -95,6 +92,29 @@ class RedisObjectStore(IObjectStore):
|
|
95
92
|
serialized = client.get(uid)
|
96
93
|
return cloudpickle.loads(serialized)
|
97
94
|
|
95
|
+
def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
|
96
|
+
uid_per_ip = self._get_uids_per_ip(refs)
|
97
|
+
|
98
|
+
start = time.monotonic()
|
99
|
+
|
100
|
+
num_deleted = 0
|
101
|
+
for (ip, uids) in uid_per_ip.items():
|
102
|
+
client = self._get_client_by_ip(ip)
|
103
|
+
num_keys_deleted = client.delete(*uids)
|
104
|
+
num_deleted += num_keys_deleted
|
105
|
+
if num_keys_deleted != len(uids):
|
106
|
+
logger.warning(
|
107
|
+
f"Failed to delete {len(uids) - num_keys_deleted} out of {len(uids)} uids: {uids}"
|
108
|
+
)
|
109
|
+
|
110
|
+
end = time.monotonic()
|
111
|
+
|
112
|
+
logger.info(
|
113
|
+
f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
|
114
|
+
)
|
115
|
+
|
116
|
+
return num_deleted == len(refs)
|
117
|
+
|
98
118
|
def _get_client_by_ip(self, ip_address: str):
|
99
119
|
if ip_address in self.client_cache:
|
100
120
|
return self.client_cache[ip_address]
|
@@ -112,3 +132,11 @@ class RedisObjectStore(IObjectStore):
|
|
112
132
|
|
113
133
|
def _create_ref(self, uid, ip):
|
114
134
|
return f"{uid}{self.SEPARATOR}{ip}"
|
135
|
+
|
136
|
+
def _get_uids_per_ip(self, refs: List[Any]):
|
137
|
+
uid_per_ip = defaultdict(lambda: [])
|
138
|
+
|
139
|
+
for ref in refs:
|
140
|
+
uid, ip = ref.split(self.SEPARATOR)
|
141
|
+
uid_per_ip[ip].append(uid)
|
142
|
+
return uid_per_ip
|
deltacat/io/s3_object_store.py
CHANGED
@@ -42,3 +42,20 @@ class S3ObjectStore(IObjectStore):
|
|
42
42
|
|
43
43
|
logger.info(f"The total time taken to read all objects is: {end - start}")
|
44
44
|
return result
|
45
|
+
|
46
|
+
def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
|
47
|
+
start = time.monotonic()
|
48
|
+
num_deleted = 0
|
49
|
+
for ref in refs:
|
50
|
+
try:
|
51
|
+
s3_utils.delete_files_by_prefix(self.bucket, str(ref))
|
52
|
+
num_deleted += 1
|
53
|
+
except Exception:
|
54
|
+
logger.warning(f"Failed to delete ref {ref}!", exc_info=True)
|
55
|
+
end = time.monotonic()
|
56
|
+
|
57
|
+
logger.info(
|
58
|
+
f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
|
59
|
+
)
|
60
|
+
|
61
|
+
return num_deleted == len(refs)
|
@@ -526,6 +526,30 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
526
526
|
== parquet_delta_with_manifest.meta.content_length
|
527
527
|
)
|
528
528
|
|
529
|
+
def test_parquet_delta_when_default_v2_without_avg_record_size_and_sampling(
|
530
|
+
self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
531
|
+
):
|
532
|
+
params = EstimateResourcesParams.of(
|
533
|
+
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
534
|
+
previous_inflation=7,
|
535
|
+
parquet_to_pyarrow_inflation=1,
|
536
|
+
)
|
537
|
+
|
538
|
+
result = estimate_resources_required_to_process_delta(
|
539
|
+
delta=parquet_delta_with_manifest,
|
540
|
+
operation_type=OperationType.PYARROW_DOWNLOAD,
|
541
|
+
deltacat_storage=ds,
|
542
|
+
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
|
543
|
+
estimate_resources_params=params,
|
544
|
+
)
|
545
|
+
|
546
|
+
assert parquet_delta_with_manifest.manifest is not None
|
547
|
+
assert result.memory_bytes is not None
|
548
|
+
assert (
|
549
|
+
result.statistics.on_disk_size_bytes
|
550
|
+
== parquet_delta_with_manifest.meta.content_length
|
551
|
+
)
|
552
|
+
|
529
553
|
def test_parquet_delta_when_default_v2_and_files_to_sample_zero(
|
530
554
|
self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
531
555
|
):
|
@@ -578,6 +602,32 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
578
602
|
== utsv_delta_with_manifest.meta.content_length
|
579
603
|
)
|
580
604
|
|
605
|
+
def test_utsv_delta_when_default_v2_without_avg_record_size(
|
606
|
+
self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
607
|
+
):
|
608
|
+
params = EstimateResourcesParams.of(
|
609
|
+
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
610
|
+
previous_inflation=7,
|
611
|
+
average_record_size_bytes=None, # note
|
612
|
+
parquet_to_pyarrow_inflation=1,
|
613
|
+
)
|
614
|
+
|
615
|
+
result = estimate_resources_required_to_process_delta(
|
616
|
+
delta=utsv_delta_with_manifest,
|
617
|
+
operation_type=OperationType.PYARROW_DOWNLOAD,
|
618
|
+
deltacat_storage=ds,
|
619
|
+
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
|
620
|
+
estimate_resources_params=params,
|
621
|
+
)
|
622
|
+
|
623
|
+
assert utsv_delta_with_manifest.manifest is not None
|
624
|
+
assert result.memory_bytes is not None
|
625
|
+
assert result.statistics.record_count == 0
|
626
|
+
assert (
|
627
|
+
result.statistics.on_disk_size_bytes
|
628
|
+
== utsv_delta_with_manifest.meta.content_length
|
629
|
+
)
|
630
|
+
|
581
631
|
def test_parquet_delta_without_inflation_when_default_v2(
|
582
632
|
self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
583
633
|
):
|
@@ -5,8 +5,9 @@ import pytest
|
|
5
5
|
import boto3
|
6
6
|
from boto3.resources.base import ServiceResource
|
7
7
|
import pyarrow as pa
|
8
|
-
from deltacat.io.
|
8
|
+
from deltacat.io.file_object_store import FileObjectStore
|
9
9
|
from pytest_benchmark.fixture import BenchmarkFixture
|
10
|
+
import tempfile
|
10
11
|
|
11
12
|
from deltacat.tests.compute.test_util_constant import (
|
12
13
|
TEST_S3_RCF_BUCKET_NAME,
|
@@ -247,84 +248,99 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
|
|
247
248
|
pgm = PlacementGroupManager(
|
248
249
|
1, total_cpus, DEFAULT_WORKER_INSTANCE_CPUS, memory_per_bundle=4000000
|
249
250
|
).pgs[0]
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
251
|
+
with tempfile.TemporaryDirectory() as test_dir:
|
252
|
+
compact_partition_params = CompactPartitionParams.of(
|
253
|
+
{
|
254
|
+
"compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
|
255
|
+
"compacted_file_content_type": ContentType.PARQUET,
|
256
|
+
"dd_max_parallelism_ratio": 1.0,
|
257
|
+
"deltacat_storage": ds,
|
258
|
+
"deltacat_storage_kwargs": ds_mock_kwargs,
|
259
|
+
"destination_partition_locator": rebased_partition.locator,
|
260
|
+
"hash_bucket_count": hash_bucket_count_param,
|
261
|
+
"last_stream_position_to_compact": source_partition.stream_position,
|
262
|
+
"list_deltas_kwargs": {
|
263
|
+
**ds_mock_kwargs,
|
264
|
+
**{"equivalent_table_types": []},
|
265
|
+
},
|
266
|
+
"object_store": FileObjectStore(test_dir),
|
267
|
+
"pg_config": pgm,
|
268
|
+
"primary_keys": primary_keys,
|
269
|
+
"read_kwargs_provider": read_kwargs_provider_param,
|
270
|
+
"rebase_source_partition_locator": source_partition.locator,
|
271
|
+
"rebase_source_partition_high_watermark": rebased_partition.stream_position,
|
272
|
+
"records_per_compacted_file": records_per_compacted_file_param,
|
273
|
+
"s3_client_kwargs": {},
|
274
|
+
"source_partition_locator": rebased_partition.locator,
|
275
|
+
"sort_keys": sort_keys if sort_keys else None,
|
276
|
+
"num_rounds": num_rounds_param,
|
277
|
+
"drop_duplicates": drop_duplicates_param,
|
278
|
+
"min_delta_bytes": 560,
|
279
|
+
}
|
280
|
+
)
|
281
|
+
if expected_terminal_exception:
|
282
|
+
with pytest.raises(expected_terminal_exception) as exc_info:
|
283
|
+
benchmark(compact_partition_func, compact_partition_params)
|
284
|
+
assert expected_terminal_exception_message in str(exc_info.value)
|
285
|
+
return
|
286
|
+
from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
|
287
|
+
ExecutionCompactionResult,
|
288
|
+
)
|
284
289
|
|
285
|
-
|
290
|
+
execute_compaction_result_spy = mocker.spy(
|
291
|
+
ExecutionCompactionResult, "__init__"
|
292
|
+
)
|
293
|
+
object_store_delete_many_spy = mocker.spy(FileObjectStore, "delete_many")
|
286
294
|
|
287
|
-
|
288
|
-
|
295
|
+
# execute
|
296
|
+
rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
|
289
297
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
298
|
+
round_completion_info: RoundCompletionInfo = get_rcf(
|
299
|
+
s3_resource, rcf_file_s3_uri
|
300
|
+
)
|
301
|
+
audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
|
302
|
+
round_completion_info.compaction_audit_url
|
303
|
+
)
|
294
304
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
305
|
+
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
306
|
+
s3_resource, audit_bucket, audit_key
|
307
|
+
)
|
308
|
+
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
309
|
+
**compaction_audit_obj
|
310
|
+
)
|
301
311
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
312
|
+
# Assert not in-place compacted
|
313
|
+
assert (
|
314
|
+
execute_compaction_result_spy.call_args.args[-1] is False
|
315
|
+
), "Table version erroneously marked as in-place compacted!"
|
316
|
+
compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
|
317
|
+
s3_resource, rcf_file_s3_uri
|
318
|
+
)
|
319
|
+
tables = ds.download_delta(
|
320
|
+
compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
|
321
|
+
)
|
322
|
+
actual_rebase_compacted_table = pa.concat_tables(tables)
|
323
|
+
# if no primary key is specified then sort by sort_key for consistent assertion
|
324
|
+
sorting_cols: List[Any] = (
|
325
|
+
[(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
|
326
|
+
)
|
327
|
+
rebase_expected_compact_partition_result = (
|
328
|
+
rebase_expected_compact_partition_result.combine_chunks().sort_by(
|
329
|
+
sorting_cols
|
330
|
+
)
|
331
|
+
)
|
332
|
+
actual_rebase_compacted_table = (
|
333
|
+
actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
|
334
|
+
)
|
335
|
+
assert actual_rebase_compacted_table.equals(
|
336
|
+
rebase_expected_compact_partition_result
|
337
|
+
), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
|
326
338
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
339
|
+
if assert_compaction_audit:
|
340
|
+
if not assert_compaction_audit(compactor_version, compaction_audit):
|
341
|
+
assert False, "Compaction audit assertion failed"
|
342
|
+
assert os.listdir(test_dir) == []
|
343
|
+
assert (
|
344
|
+
object_store_delete_many_spy.call_count
|
345
|
+
), "Object store was never cleaned up!"
|
346
|
+
return
|
@@ -5,8 +5,9 @@ import pytest
|
|
5
5
|
import boto3
|
6
6
|
from boto3.resources.base import ServiceResource
|
7
7
|
import pyarrow as pa
|
8
|
-
from deltacat.io.
|
8
|
+
from deltacat.io.file_object_store import FileObjectStore
|
9
9
|
from pytest_benchmark.fixture import BenchmarkFixture
|
10
|
+
import tempfile
|
10
11
|
|
11
12
|
from deltacat.tests.compute.test_util_constant import (
|
12
13
|
TEST_S3_RCF_BUCKET_NAME,
|
@@ -250,83 +251,97 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
250
251
|
1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
|
251
252
|
).pgs[0]
|
252
253
|
last_stream_position_to_compact = source_partition.stream_position
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
254
|
+
with tempfile.TemporaryDirectory() as test_dir:
|
255
|
+
compact_partition_params = CompactPartitionParams.of(
|
256
|
+
{
|
257
|
+
"compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
|
258
|
+
"compacted_file_content_type": ContentType.PARQUET,
|
259
|
+
"dd_max_parallelism_ratio": 1.0,
|
260
|
+
"deltacat_storage": ds,
|
261
|
+
"deltacat_storage_kwargs": ds_mock_kwargs,
|
262
|
+
"destination_partition_locator": rebased_partition.locator,
|
263
|
+
"hash_bucket_count": hash_bucket_count_param,
|
264
|
+
"last_stream_position_to_compact": last_stream_position_to_compact,
|
265
|
+
"list_deltas_kwargs": {
|
266
|
+
**ds_mock_kwargs,
|
267
|
+
**{"equivalent_table_types": []},
|
268
|
+
},
|
269
|
+
"object_store": FileObjectStore(test_dir),
|
270
|
+
"pg_config": pgm,
|
271
|
+
"primary_keys": primary_keys,
|
272
|
+
"read_kwargs_provider": read_kwargs_provider_param,
|
273
|
+
"rebase_source_partition_locator": source_partition.locator,
|
274
|
+
"rebase_source_partition_high_watermark": rebased_partition.stream_position,
|
275
|
+
"records_per_compacted_file": records_per_compacted_file_param,
|
276
|
+
"s3_client_kwargs": {},
|
277
|
+
"source_partition_locator": rebased_partition.locator,
|
278
|
+
"sort_keys": sort_keys if sort_keys else None,
|
279
|
+
"drop_duplicates": drop_duplicates_param,
|
280
|
+
}
|
281
|
+
)
|
277
282
|
|
278
|
-
|
279
|
-
|
280
|
-
|
283
|
+
from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
|
284
|
+
ExecutionCompactionResult,
|
285
|
+
)
|
281
286
|
|
282
|
-
|
287
|
+
execute_compaction_result_spy = mocker.spy(
|
288
|
+
ExecutionCompactionResult, "__init__"
|
289
|
+
)
|
290
|
+
object_store_put_many_spy = mocker.spy(FileObjectStore, "put_many")
|
283
291
|
|
284
|
-
|
285
|
-
|
292
|
+
# execute
|
293
|
+
rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
|
286
294
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
295
|
+
round_completion_info: RoundCompletionInfo = get_rcf(
|
296
|
+
s3_resource, rcf_file_s3_uri
|
297
|
+
)
|
298
|
+
audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
|
299
|
+
round_completion_info.compaction_audit_url
|
300
|
+
)
|
291
301
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
302
|
+
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
303
|
+
s3_resource, audit_bucket, audit_key
|
304
|
+
)
|
305
|
+
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
306
|
+
**compaction_audit_obj
|
307
|
+
)
|
298
308
|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
309
|
+
# Assert not in-place compacted
|
310
|
+
assert (
|
311
|
+
execute_compaction_result_spy.call_args.args[-1] is False
|
312
|
+
), "Table version erroneously marked as in-place compacted!"
|
313
|
+
compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
|
314
|
+
s3_resource, rcf_file_s3_uri
|
315
|
+
)
|
316
|
+
assert (
|
317
|
+
compacted_delta_locator.stream_position == last_stream_position_to_compact
|
318
|
+
), "Compacted delta locator must be equal to last stream position"
|
319
|
+
tables = ds.download_delta(
|
320
|
+
compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
|
321
|
+
)
|
322
|
+
actual_rebase_compacted_table = pa.concat_tables(tables)
|
323
|
+
# if no primary key is specified then sort by sort_key for consistent assertion
|
324
|
+
sorting_cols: List[Any] = []
|
325
|
+
if primary_keys:
|
326
|
+
sorting_cols.extend([(val, "ascending") for val in primary_keys])
|
327
|
+
if sort_keys:
|
328
|
+
sorting_cols.extend(sort_keys)
|
319
329
|
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
330
|
+
rebase_expected_compact_partition_result = (
|
331
|
+
rebase_expected_compact_partition_result.combine_chunks().sort_by(
|
332
|
+
sorting_cols
|
333
|
+
)
|
334
|
+
)
|
335
|
+
actual_rebase_compacted_table = (
|
336
|
+
actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
|
337
|
+
)
|
338
|
+
assert actual_rebase_compacted_table.equals(
|
339
|
+
rebase_expected_compact_partition_result
|
340
|
+
), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
|
329
341
|
|
330
|
-
|
331
|
-
|
332
|
-
|
342
|
+
if assert_compaction_audit is not None:
|
343
|
+
if not assert_compaction_audit(compactor_version, compaction_audit):
|
344
|
+
assert False, "Compaction audit assertion failed"
|
345
|
+
# We do not expect object store to be cleaned up when there's only one round
|
346
|
+
if object_store_put_many_spy.call_count:
|
347
|
+
assert os.listdir(test_dir) != []
|
@@ -27,11 +27,12 @@ class TestFileObjectStore(unittest.TestCase):
|
|
27
27
|
new_callable=mock.mock_open,
|
28
28
|
read_data="data",
|
29
29
|
)
|
30
|
-
|
30
|
+
@mock.patch("deltacat.io.file_object_store.cloudpickle.dumps")
|
31
|
+
def test_put_many_sanity(self, mock_dumps, mock_file):
|
31
32
|
from deltacat.io.file_object_store import FileObjectStore
|
32
33
|
|
33
34
|
object_store = FileObjectStore(dir_path="")
|
34
|
-
|
35
|
+
mock_dumps.return_value = self.TEST_VALUE
|
35
36
|
result = object_store.put_many(["a", "b"])
|
36
37
|
|
37
38
|
self.assertEqual(2, len(result))
|
@@ -42,11 +43,29 @@ class TestFileObjectStore(unittest.TestCase):
|
|
42
43
|
new_callable=mock.mock_open,
|
43
44
|
read_data="data",
|
44
45
|
)
|
45
|
-
|
46
|
+
@mock.patch("deltacat.io.file_object_store.cloudpickle.dumps")
|
47
|
+
def test_put_sanity(self, mock_dumps, mock_file):
|
46
48
|
from deltacat.io.file_object_store import FileObjectStore
|
47
49
|
|
48
50
|
object_store = FileObjectStore(dir_path="")
|
49
|
-
|
51
|
+
mock_dumps.return_value = self.TEST_VALUE
|
52
|
+
|
53
|
+
result = object_store.put("test")
|
54
|
+
|
55
|
+
self.assertIsNotNone(result)
|
56
|
+
self.assertEqual(1, mock_file.call_count)
|
57
|
+
|
58
|
+
@mock.patch(
|
59
|
+
"deltacat.io.file_object_store.open",
|
60
|
+
new_callable=mock.mock_open,
|
61
|
+
read_data="data",
|
62
|
+
)
|
63
|
+
@mock.patch("deltacat.io.file_object_store.cloudpickle.loads")
|
64
|
+
def test_get_many_sanity(self, mock_loads, mock_file):
|
65
|
+
from deltacat.io.file_object_store import FileObjectStore
|
66
|
+
|
67
|
+
object_store = FileObjectStore(dir_path="")
|
68
|
+
mock_loads.return_value = self.TEST_VALUE
|
50
69
|
|
51
70
|
result = object_store.get_many(["test", "test"])
|
52
71
|
|
@@ -58,11 +77,12 @@ class TestFileObjectStore(unittest.TestCase):
|
|
58
77
|
new_callable=mock.mock_open,
|
59
78
|
read_data="data",
|
60
79
|
)
|
61
|
-
|
80
|
+
@mock.patch("deltacat.io.file_object_store.cloudpickle.loads")
|
81
|
+
def test_get_sanity(self, mock_loads, mock_file):
|
62
82
|
from deltacat.io.file_object_store import FileObjectStore
|
63
83
|
|
64
84
|
object_store = FileObjectStore(dir_path="")
|
65
|
-
|
85
|
+
mock_loads.return_value = self.TEST_VALUE
|
66
86
|
|
67
87
|
result = object_store.get("test")
|
68
88
|
|
@@ -70,17 +90,27 @@ class TestFileObjectStore(unittest.TestCase):
|
|
70
90
|
self.assertEqual(1, mock_file.call_count)
|
71
91
|
|
72
92
|
@mock.patch(
|
73
|
-
"deltacat.io.file_object_store.
|
74
|
-
new_callable=mock.mock_open,
|
75
|
-
read_data="data",
|
93
|
+
"deltacat.io.file_object_store.os.remove",
|
76
94
|
)
|
77
|
-
def
|
95
|
+
def test_delete_sanity(self, mock_remove):
|
78
96
|
from deltacat.io.file_object_store import FileObjectStore
|
79
97
|
|
80
98
|
object_store = FileObjectStore(dir_path="")
|
81
|
-
self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
|
82
99
|
|
83
|
-
|
100
|
+
delete_success = object_store.delete("test")
|
84
101
|
|
85
|
-
self.
|
86
|
-
self.assertEqual(1,
|
102
|
+
self.assertTrue(delete_success)
|
103
|
+
self.assertEqual(1, mock_remove.call_count)
|
104
|
+
|
105
|
+
@mock.patch(
|
106
|
+
"deltacat.io.file_object_store.os.remove",
|
107
|
+
)
|
108
|
+
def test_delete_many_sanity(self, mock_remove):
|
109
|
+
from deltacat.io.file_object_store import FileObjectStore
|
110
|
+
|
111
|
+
object_store = FileObjectStore(dir_path="")
|
112
|
+
|
113
|
+
delete_success = object_store.delete_many(["test", "test"])
|
114
|
+
|
115
|
+
self.assertTrue(delete_success)
|
116
|
+
self.assertEqual(2, mock_remove.call_count)
|
@@ -28,6 +28,15 @@ class MockPyMemcacheClient:
|
|
28
28
|
def get(self, key, *args, **kwargs):
|
29
29
|
return self.store.get(key)
|
30
30
|
|
31
|
+
def delete(self, key, *args, **kwargs):
|
32
|
+
self.store.pop(key, None)
|
33
|
+
return True
|
34
|
+
|
35
|
+
def delete_many(self, keys, *args, **kwargs):
|
36
|
+
for key in keys:
|
37
|
+
self.store.pop(key, None)
|
38
|
+
return True
|
39
|
+
|
31
40
|
def flush_all(self, *args, **kwargs):
|
32
41
|
for key, value in self.store.items():
|
33
42
|
self.store[key] = None
|
@@ -200,6 +209,37 @@ class TestMemcachedObjectStore(unittest.TestCase):
|
|
200
209
|
result = self.object_store.get(ref)
|
201
210
|
self.assertEqual(result, self.TEST_VALUE_LARGE)
|
202
211
|
|
212
|
+
@mock.patch("deltacat.io.memcached_object_store.Client")
|
213
|
+
@mock.patch("deltacat.io.memcached_object_store.RetryingClient")
|
214
|
+
def test_delete_sanity(self, mock_retrying_client, mock_client):
|
215
|
+
mock_client.return_value = MockPyMemcacheClient()
|
216
|
+
mock_retrying_client.return_value = mock_client.return_value
|
217
|
+
|
218
|
+
# setup
|
219
|
+
ref = self.object_store.put(np.arange(100))
|
220
|
+
|
221
|
+
# action
|
222
|
+
delete_success = self.object_store.delete(ref)
|
223
|
+
|
224
|
+
# assert
|
225
|
+
self.assertTrue(delete_success)
|
226
|
+
|
227
|
+
@mock.patch("deltacat.io.memcached_object_store.Client")
|
228
|
+
@mock.patch("deltacat.io.memcached_object_store.RetryingClient")
|
229
|
+
def test_delete_many_sanity(self, mock_retrying_client, mock_client):
|
230
|
+
mock_client.return_value = MockPyMemcacheClient()
|
231
|
+
mock_retrying_client.return_value = mock_client.return_value
|
232
|
+
|
233
|
+
# setup
|
234
|
+
ref1 = self.object_store.put("a")
|
235
|
+
ref2 = self.object_store.put(np.arange(100))
|
236
|
+
|
237
|
+
# action
|
238
|
+
delete_success = self.object_store.delete_many([ref2, ref1])
|
239
|
+
|
240
|
+
# assert
|
241
|
+
self.assertTrue(delete_success)
|
242
|
+
|
203
243
|
@mock.patch("deltacat.io.memcached_object_store.Client")
|
204
244
|
@mock.patch("deltacat.io.memcached_object_store.RetryingClient")
|
205
245
|
def test_clear_sanity(self, mock_retrying_client, mock_client):
|
@@ -101,3 +101,23 @@ class TestRedisObjectStore(unittest.TestCase):
|
|
101
101
|
self.object_store.put("test_ip")
|
102
102
|
|
103
103
|
self.assertEqual(1, mock_client.Redis.return_value.set.call_count)
|
104
|
+
|
105
|
+
@mock.patch("deltacat.io.redis_object_store.redis")
|
106
|
+
def test_delete_sanity(self, mock_client):
|
107
|
+
mock_client.Redis.return_value.delete.return_value = 1
|
108
|
+
|
109
|
+
delete_success = self.object_store.delete("test_ip")
|
110
|
+
|
111
|
+
self.assertTrue(delete_success)
|
112
|
+
self.assertEqual(1, mock_client.Redis.return_value.delete.call_count)
|
113
|
+
|
114
|
+
@mock.patch("deltacat.io.redis_object_store.redis")
|
115
|
+
def test_delete_many_sanity(self, mock_client):
|
116
|
+
mock_client.Redis.return_value.delete.side_effect = [2, 1]
|
117
|
+
|
118
|
+
delete_success = self.object_store.delete_many(
|
119
|
+
["test_ip", "test_ip", "test_ip2"]
|
120
|
+
)
|
121
|
+
|
122
|
+
self.assertTrue(delete_success)
|
123
|
+
self.assertEqual(2, mock_client.Redis.return_value.delete.call_count)
|
@@ -57,3 +57,12 @@ class TestS3ObjectStore(unittest.TestCase):
|
|
57
57
|
|
58
58
|
self.assertIsNotNone(result)
|
59
59
|
self.assertEqual(1, mock_upload.call_count)
|
60
|
+
|
61
|
+
@mock.patch("deltacat.io.s3_object_store.s3_utils.delete_files_by_prefix")
|
62
|
+
def test_delete_many_sanity(self, mock_delete):
|
63
|
+
self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
|
64
|
+
|
65
|
+
delete_success = self.object_store.delete_many(["test", "test"])
|
66
|
+
|
67
|
+
self.assertTrue(delete_success)
|
68
|
+
self.assertEqual(2, mock_delete.call_count)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: deltacat
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.23
|
4
4
|
Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
@@ -27,7 +27,7 @@ Requires-Dist: tenacity==8.1.0
|
|
27
27
|
Requires-Dist: typing-extensions==4.4.0
|
28
28
|
Requires-Dist: pymemcache==4.0.0
|
29
29
|
Requires-Dist: redis==4.6.0
|
30
|
-
Requires-Dist: getdaft==0.3.
|
30
|
+
Requires-Dist: getdaft==0.3.6
|
31
31
|
Requires-Dist: schedule==1.2.0
|
32
32
|
|
33
33
|
# DeltaCAT
|
@@ -1,11 +1,11 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=J06L_sl7VtYV3kAUxq2ai3s2SIfFAvKt1S2mxU7kMfo,1778
|
2
2
|
deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
|
3
3
|
deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
|
4
4
|
deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
deltacat/aws/clients.py,sha256=4eQvpkV1PzFfxog7EriuglOGGwNFHR5hbGYpjsNNPxk,6949
|
7
7
|
deltacat/aws/constants.py,sha256=hcYAUot4ahq9GXCMClQiuYCtiDs5XaOebdUoKg4V84k,1222
|
8
|
-
deltacat/aws/s3u.py,sha256=
|
8
|
+
deltacat/aws/s3u.py,sha256=GRmYwE9If-JQAazowUo5BCCu2yRa5EeOwwLfOPIGeCc,28584
|
9
9
|
deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
|
10
10
|
deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
deltacat/aws/redshift/model/manifest.py,sha256=-ap44dxaG2bVNkVMzpJe-oIFHx0iBWCnA_sO-riQp0Y,13605
|
@@ -46,7 +46,7 @@ deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J
|
|
46
46
|
deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
47
|
deltacat/compute/compactor/utils/io.py,sha256=S-JZdjETP_tHblK4j860jLHyX9S6A87BPz3Rl0jGbRM,17303
|
48
48
|
deltacat/compute/compactor/utils/primary_key_index.py,sha256=ay2-7t4mP9I_l5gKkrv5h5_r8Icts8mBcbH7OJBknrY,2435
|
49
|
-
deltacat/compute/compactor/utils/round_completion_file.py,sha256=
|
49
|
+
deltacat/compute/compactor/utils/round_completion_file.py,sha256=fFevhUuveCvrU3g_JhX_vPCuEv9Oku0ihbi-n9E6H74,3381
|
50
50
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
51
51
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
52
52
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -66,12 +66,12 @@ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViV
|
|
66
66
|
deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
|
67
67
|
deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
|
68
68
|
deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
-
deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=
|
69
|
+
deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=AuzysedzCyapfNf1pfqsZe6mZw121lx6h6NTyLB-pyM,30930
|
70
70
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
|
72
72
|
deltacat/compute/compactor_v2/steps/merge.py,sha256=LpktsDPfj7Of6RgUw9w1f3Y3OBkPDjvtyXjzFaIDoSo,21771
|
73
73
|
deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
-
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=
|
74
|
+
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=1P9CDpuWErsFcTTlRCeuUQHDokVI92he_MsL82uRAdA,7424
|
75
75
|
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
|
76
76
|
deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
|
77
77
|
deltacat/compute/compactor_v2/utils/io.py,sha256=3m4dorxj-WD6Yu9_3gRE6gz3C-eNJA7nn02sHKwo-J8,6018
|
@@ -85,7 +85,7 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
|
|
85
85
|
deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
|
87
87
|
deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
|
88
|
-
deltacat/compute/resource_estimation/delta.py,sha256=
|
88
|
+
deltacat/compute/resource_estimation/delta.py,sha256=Ei4v9UYhtcT5P-wNEMAg0E4mYl0z5FpSkaTufVoGD18,9492
|
89
89
|
deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
|
90
90
|
deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
|
91
91
|
deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
|
@@ -99,13 +99,13 @@ deltacat/compute/stats/models/manifest_entry_stats.py,sha256=NCDAe2nPDEI4kOkuwNk
|
|
99
99
|
deltacat/compute/stats/models/stats_result.py,sha256=XQAlmzhUqRmg4jzEMUAOqcYn1HUOBTMryBH1CCVlet8,3820
|
100
100
|
deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
101
101
|
deltacat/io/dataset.py,sha256=pFU5UfK-fD9C4fIeffJtrA6yVQSgAx2UPbxzQ4GMFL8,3203
|
102
|
-
deltacat/io/file_object_store.py,sha256=
|
103
|
-
deltacat/io/memcached_object_store.py,sha256=
|
104
|
-
deltacat/io/object_store.py,sha256=
|
102
|
+
deltacat/io/file_object_store.py,sha256=YoNL3Qla8uLOHaWnyBmIgotjSGAy3Td3Tumah0kk73Y,1868
|
103
|
+
deltacat/io/memcached_object_store.py,sha256=h1pyAHV_tYwxUqdsmOx-xWFJyPtaSWKl1A0FlblAz_M,10663
|
104
|
+
deltacat/io/object_store.py,sha256=z3Crt8TLyLyoRunOuXAri373TQZKFoz66QHpxGOV82U,1910
|
105
105
|
deltacat/io/ray_plasma_object_store.py,sha256=TyoUPWybE_cSISZ2SQa3YfD93QWMp0r82-6WnoVSmzk,905
|
106
106
|
deltacat/io/read_api.py,sha256=BhkjL3xjY-fsa62AA9Yv20_88uTskn4_Bv2W6VmMXVA,7023
|
107
|
-
deltacat/io/redis_object_store.py,sha256=
|
108
|
-
deltacat/io/s3_object_store.py,sha256=
|
107
|
+
deltacat/io/redis_object_store.py,sha256=ZXkJIrx7uHnnAayD-FG1BiB5xxDjMch9GO-YUkPVwqU,4410
|
108
|
+
deltacat/io/s3_object_store.py,sha256=kwAEWvUJRbSmD3xywZBovfQvwr-EVDBKzWiB6T7Hr3I,1907
|
109
109
|
deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
110
|
deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
111
|
deltacat/storage/__init__.py,sha256=4sWa3oq89IC3YPclsnVc6ZhnlFM2MuSqshT2uW5cSEY,2158
|
@@ -138,9 +138,9 @@ deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFq
|
|
138
138
|
deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
|
139
139
|
deltacat/tests/compute/compact_partition_test_cases.py,sha256=R9eiKvxCLqcoHjAx3iOogdnXZEO9TvLbRf0wA7bcJN4,26170
|
140
140
|
deltacat/tests/compute/test_compact_partition_incremental.py,sha256=Z0hyQGhMZjCaOn1Vk4qUbgDiS7HDhtdNeFQyG1PJhqA,14559
|
141
|
-
deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=
|
141
|
+
deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=XOkpB5r-6lyDbqSIens8loaj86HG29PoNrHAOlIMqTM,12587
|
142
142
|
deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
|
143
|
-
deltacat/tests/compute/test_compact_partition_rebase.py,sha256=
|
143
|
+
deltacat/tests/compute/test_compact_partition_rebase.py,sha256=ztSiLgC2OpU4yz81vz-4xWzvZyrLGojtzomsW4q7Bl8,12626
|
144
144
|
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=CHHfNFEJW8S1We7NE1Gg6EaoKEWnaOMRxWrLyirrahc,14643
|
145
145
|
deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
|
146
146
|
deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
|
@@ -157,16 +157,16 @@ deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6ip
|
|
157
157
|
deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
158
158
|
deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
|
159
159
|
deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
160
|
-
deltacat/tests/compute/resource_estimation/test_delta.py,sha256=
|
160
|
+
deltacat/tests/compute/resource_estimation/test_delta.py,sha256=LyzRitBrasQa35Bq7rHTQInaOelSWOSoC0_dyjgpNuE,24505
|
161
161
|
deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
|
162
162
|
deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
163
163
|
deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
164
164
|
deltacat/tests/io/test_cloudpickle_bug_fix.py,sha256=qnYJg_S-nsLai77a4_I3Qs2Jtr_KWQJOxyl96f9PgHA,1376
|
165
|
-
deltacat/tests/io/test_file_object_store.py,sha256=
|
166
|
-
deltacat/tests/io/test_memcached_object_store.py,sha256=
|
165
|
+
deltacat/tests/io/test_file_object_store.py,sha256=bjORXnHe7Ea733XUUO0S2Su_oqSwGuO84TlIfoNO6qA,3587
|
166
|
+
deltacat/tests/io/test_memcached_object_store.py,sha256=0EIaU5MHiEmIEkA4x5qUXFY9TE6TJ7V2RGH827cu3AU,9512
|
167
167
|
deltacat/tests/io/test_ray_plasma_object_store.py,sha256=-wJZP6lRtEOogR25wjEiIBGz_lpvWVihwlZ5GqandZU,1911
|
168
|
-
deltacat/tests/io/test_redis_object_store.py,sha256=
|
169
|
-
deltacat/tests/io/test_s3_object_store.py,sha256=
|
168
|
+
deltacat/tests/io/test_redis_object_store.py,sha256=YpMsMFT6ltmJHlpAdmlxLK91KjCN8YFMaQrpJ6dcR6E,4595
|
169
|
+
deltacat/tests/io/test_s3_object_store.py,sha256=I8AbyrPfS32CAYvRHtn_OanL-XPpAnJeuCuhD-u9irQ,2270
|
170
170
|
deltacat/tests/local_deltacat_storage/__init__.py,sha256=5T9ubNIS42-BotEH0yrUiWEU92feW7lkoSA1-wMeAnQ,40104
|
171
171
|
deltacat/tests/local_deltacat_storage/exceptions.py,sha256=oxZ0psmrEO0M6P2r8gHQ2E8E-Y8UBfUCBUIwfuHcx38,251
|
172
172
|
deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -210,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
210
210
|
deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
|
211
211
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
212
212
|
deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
|
213
|
-
deltacat-1.1.
|
214
|
-
deltacat-1.1.
|
215
|
-
deltacat-1.1.
|
216
|
-
deltacat-1.1.
|
217
|
-
deltacat-1.1.
|
213
|
+
deltacat-1.1.23.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
214
|
+
deltacat-1.1.23.dist-info/METADATA,sha256=vVw3plixIREPOrFT8PN1M1X55lIDnno0kaXVAtff2Hk,1733
|
215
|
+
deltacat-1.1.23.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
216
|
+
deltacat-1.1.23.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
217
|
+
deltacat-1.1.23.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|