deltacat 1.1.26__py3-none-any.whl → 1.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor_v2/private/compaction_utils.py +4 -1
- deltacat/compute/resource_estimation/delta.py +1 -1
- deltacat/tests/compute/resource_estimation/test_delta.py +37 -0
- deltacat/tests/compute/test_compact_partition_incremental.py +10 -0
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +10 -0
- deltacat/tests/compute/test_compact_partition_rebase.py +11 -0
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +10 -0
- {deltacat-1.1.26.dist-info → deltacat-1.1.27.dist-info}/METADATA +1 -1
- {deltacat-1.1.26.dist-info → deltacat-1.1.27.dist-info}/RECORD +13 -13
- {deltacat-1.1.26.dist-info → deltacat-1.1.27.dist-info}/LICENSE +0 -0
- {deltacat-1.1.26.dist-info → deltacat-1.1.27.dist-info}/WHEEL +0 -0
- {deltacat-1.1.26.dist-info → deltacat-1.1.27.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
@@ -584,8 +584,11 @@ def _process_merge_results(
|
|
584
584
|
f"Duplicate record count ({duplicate_hash_bucket_mat_results}) is as large "
|
585
585
|
f"as or greater than params.num_rounds, which is {params.num_rounds}"
|
586
586
|
)
|
587
|
+
# ensure start index is the first file index if task index is same
|
587
588
|
hb_id_to_entry_indices_range[str(mat_result.task_index)] = (
|
588
|
-
file_index
|
589
|
+
hb_id_to_entry_indices_range.get(str(mat_result.task_index), [file_index])[
|
590
|
+
0
|
591
|
+
],
|
589
592
|
file_index + mat_result.pyarrow_write_result.files,
|
590
593
|
)
|
591
594
|
|
@@ -188,7 +188,7 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
|
|
188
188
|
sampled_on_disk_size += delta.manifest.entries[entry_index].meta.content_length
|
189
189
|
sampled_num_rows += len(tbl)
|
190
190
|
|
191
|
-
if not sampled_on_disk_size:
|
191
|
+
if not sampled_on_disk_size or not sampled_in_memory_size:
|
192
192
|
return EstimatedResources.of(
|
193
193
|
memory_bytes=0,
|
194
194
|
statistics=Statistics.of(
|
@@ -437,6 +437,43 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
437
437
|
== parquet_delta_with_manifest.meta.content_length
|
438
438
|
)
|
439
439
|
|
440
|
+
def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
|
441
|
+
self,
|
442
|
+
local_deltacat_storage_kwargs,
|
443
|
+
parquet_delta_with_manifest: Delta,
|
444
|
+
monkeypatch,
|
445
|
+
):
|
446
|
+
params = EstimateResourcesParams.of(
|
447
|
+
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
|
448
|
+
max_files_to_sample=2,
|
449
|
+
)
|
450
|
+
|
451
|
+
def mock_func(*args, **kwargs):
|
452
|
+
class MockedValue:
|
453
|
+
nbytes = 0
|
454
|
+
|
455
|
+
def __len__(self):
|
456
|
+
return 0
|
457
|
+
|
458
|
+
return MockedValue()
|
459
|
+
|
460
|
+
monkeypatch.setattr(ds, "download_delta_manifest_entry", mock_func)
|
461
|
+
|
462
|
+
result = estimate_resources_required_to_process_delta(
|
463
|
+
delta=parquet_delta_with_manifest,
|
464
|
+
operation_type=OperationType.PYARROW_DOWNLOAD,
|
465
|
+
deltacat_storage=ds,
|
466
|
+
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
|
467
|
+
estimate_resources_params=params,
|
468
|
+
)
|
469
|
+
|
470
|
+
assert parquet_delta_with_manifest.manifest is not None
|
471
|
+
assert result.memory_bytes == 0
|
472
|
+
assert (
|
473
|
+
result.statistics.on_disk_size_bytes
|
474
|
+
== parquet_delta_with_manifest.meta.content_length
|
475
|
+
)
|
476
|
+
|
440
477
|
def test_delta_manifest_utsv_when_file_sampling(
|
441
478
|
self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
442
479
|
):
|
@@ -328,6 +328,16 @@ def test_compact_partition_incremental(
|
|
328
328
|
**compaction_audit_obj
|
329
329
|
)
|
330
330
|
|
331
|
+
# assert if RCF covers all files
|
332
|
+
if compactor_version != CompactorVersion.V1.value:
|
333
|
+
previous_end = None
|
334
|
+
for start, end in round_completion_info.hb_index_to_entry_range.values():
|
335
|
+
assert (previous_end is None and start == 0) or start == previous_end
|
336
|
+
previous_end = end
|
337
|
+
assert (
|
338
|
+
previous_end == round_completion_info.compacted_pyarrow_write_result.files
|
339
|
+
)
|
340
|
+
|
331
341
|
tables = ds.download_delta(
|
332
342
|
compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
|
333
343
|
)
|
@@ -309,6 +309,16 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
|
|
309
309
|
**compaction_audit_obj
|
310
310
|
)
|
311
311
|
|
312
|
+
# assert if RCF covers all files
|
313
|
+
# multiple rounds feature is only supported in V2 compactor
|
314
|
+
previous_end = None
|
315
|
+
for start, end in round_completion_info.hb_index_to_entry_range.values():
|
316
|
+
assert (previous_end is None and start == 0) or start == previous_end
|
317
|
+
previous_end = end
|
318
|
+
assert (
|
319
|
+
previous_end == round_completion_info.compacted_pyarrow_write_result.files
|
320
|
+
)
|
321
|
+
|
312
322
|
# Assert not in-place compacted
|
313
323
|
assert (
|
314
324
|
execute_compaction_result_spy.call_args.args[-1] is False
|
@@ -299,6 +299,17 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
299
299
|
round_completion_info.compaction_audit_url
|
300
300
|
)
|
301
301
|
|
302
|
+
# assert if RCF covers all files
|
303
|
+
if compactor_version != CompactorVersion.V1.value:
|
304
|
+
previous_end = None
|
305
|
+
for start, end in round_completion_info.hb_index_to_entry_range.values():
|
306
|
+
assert (previous_end is None and start == 0) or start == previous_end
|
307
|
+
previous_end = end
|
308
|
+
assert (
|
309
|
+
previous_end
|
310
|
+
== round_completion_info.compacted_pyarrow_write_result.files
|
311
|
+
)
|
312
|
+
|
302
313
|
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
303
314
|
s3_resource, audit_bucket, audit_key
|
304
315
|
)
|
@@ -355,6 +355,16 @@ def test_compact_partition_rebase_then_incremental(
|
|
355
355
|
compacted_delta_locator_incremental: DeltaLocator = (
|
356
356
|
round_completion_info.compacted_delta_locator
|
357
357
|
)
|
358
|
+
# assert if RCF covers all files
|
359
|
+
if compactor_version != CompactorVersion.V1.value:
|
360
|
+
previous_end = None
|
361
|
+
for start, end in round_completion_info.hb_index_to_entry_range.values():
|
362
|
+
assert (previous_end is None and start == 0) or start == previous_end
|
363
|
+
previous_end = end
|
364
|
+
assert (
|
365
|
+
previous_end == round_completion_info.compacted_pyarrow_write_result.files
|
366
|
+
)
|
367
|
+
|
358
368
|
audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
|
359
369
|
"s3://", ""
|
360
370
|
).split("/", 1)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=NNgt1N6a4dwztCKl6C7klF3mQEn-S-sBHNZPKPqRHko,1778
|
2
2
|
deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
|
3
3
|
deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
|
4
4
|
deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
|
@@ -66,7 +66,7 @@ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViV
|
|
66
66
|
deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
|
67
67
|
deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
|
68
68
|
deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
-
deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=
|
69
|
+
deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=fMWXg1SCIIgjk9p_OFYrcm760dOKNbFO1Lj3_JI3GCY,30929
|
70
70
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
|
72
72
|
deltacat/compute/compactor_v2/steps/merge.py,sha256=LpktsDPfj7Of6RgUw9w1f3Y3OBkPDjvtyXjzFaIDoSo,21771
|
@@ -85,7 +85,7 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
|
|
85
85
|
deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
|
87
87
|
deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
|
88
|
-
deltacat/compute/resource_estimation/delta.py,sha256=
|
88
|
+
deltacat/compute/resource_estimation/delta.py,sha256=8oRy1rgGUimwMqPB5At81AS-AsjPHdcvLHzJ9TW8RpM,9522
|
89
89
|
deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
|
90
90
|
deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
|
91
91
|
deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
|
@@ -137,11 +137,11 @@ deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=kW
|
|
137
137
|
deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFqNaJoqeCuj9xIBjM4Ch2bx-mJcO4BRrLo,16839
|
138
138
|
deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
|
139
139
|
deltacat/tests/compute/compact_partition_test_cases.py,sha256=R9eiKvxCLqcoHjAx3iOogdnXZEO9TvLbRf0wA7bcJN4,26170
|
140
|
-
deltacat/tests/compute/test_compact_partition_incremental.py,sha256=
|
141
|
-
deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=
|
140
|
+
deltacat/tests/compute/test_compact_partition_incremental.py,sha256=lkfAraOJmEmieesf7b1BqlfTS26YjYM5xXOXoTMrsos,14989
|
141
|
+
deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=xXBA66TTfARR90m5KQs31nmiokuMy9iGQt7Z9evyG7M,12950
|
142
142
|
deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
|
143
|
-
deltacat/tests/compute/test_compact_partition_rebase.py,sha256=
|
144
|
-
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=
|
143
|
+
deltacat/tests/compute/test_compact_partition_rebase.py,sha256=DNcpmnBo5QoZ23BiIhJCC3zaDK0xClZLUb2-ZEEp5s4,13108
|
144
|
+
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=Rxen3QGIaxVPa8lcO7NDMRxQ0aBjrOKn46LK5ZsfQTo,15073
|
145
145
|
deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
|
146
146
|
deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
|
147
147
|
deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=Q3HJj1fjoe2JwRUOW8KEjbTqPIIoP2o_T3ZGH6SJnCM,13244
|
@@ -157,7 +157,7 @@ deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6ip
|
|
157
157
|
deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
158
158
|
deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
|
159
159
|
deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
160
|
-
deltacat/tests/compute/resource_estimation/test_delta.py,sha256=
|
160
|
+
deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
|
161
161
|
deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
|
162
162
|
deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
163
163
|
deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -210,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
210
210
|
deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
|
211
211
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
212
212
|
deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
|
213
|
-
deltacat-1.1.
|
214
|
-
deltacat-1.1.
|
215
|
-
deltacat-1.1.
|
216
|
-
deltacat-1.1.
|
217
|
-
deltacat-1.1.
|
213
|
+
deltacat-1.1.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
214
|
+
deltacat-1.1.27.dist-info/METADATA,sha256=VL7sWG3lO3cV3tzwTiCTgpm7h0K5Dh3GtKiqojgSgHI,1733
|
215
|
+
deltacat-1.1.27.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
216
|
+
deltacat-1.1.27.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
217
|
+
deltacat-1.1.27.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|