deltacat 1.1.26__py3-none-any.whl → 1.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.26"
47
+ __version__ = "1.1.27"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -584,8 +584,11 @@ def _process_merge_results(
584
584
  f"Duplicate record count ({duplicate_hash_bucket_mat_results}) is as large "
585
585
  f"as or greater than params.num_rounds, which is {params.num_rounds}"
586
586
  )
587
+ # ensure start index is the first file index if task index is same
587
588
  hb_id_to_entry_indices_range[str(mat_result.task_index)] = (
588
- file_index,
589
+ hb_id_to_entry_indices_range.get(str(mat_result.task_index), [file_index])[
590
+ 0
591
+ ],
589
592
  file_index + mat_result.pyarrow_write_result.files,
590
593
  )
591
594
 
@@ -188,7 +188,7 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
188
188
  sampled_on_disk_size += delta.manifest.entries[entry_index].meta.content_length
189
189
  sampled_num_rows += len(tbl)
190
190
 
191
- if not sampled_on_disk_size:
191
+ if not sampled_on_disk_size or not sampled_in_memory_size:
192
192
  return EstimatedResources.of(
193
193
  memory_bytes=0,
194
194
  statistics=Statistics.of(
@@ -437,6 +437,43 @@ class TestEstimateResourcesRequiredToProcessDelta:
437
437
  == parquet_delta_with_manifest.meta.content_length
438
438
  )
439
439
 
440
+ def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
441
+ self,
442
+ local_deltacat_storage_kwargs,
443
+ parquet_delta_with_manifest: Delta,
444
+ monkeypatch,
445
+ ):
446
+ params = EstimateResourcesParams.of(
447
+ resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
448
+ max_files_to_sample=2,
449
+ )
450
+
451
+ def mock_func(*args, **kwargs):
452
+ class MockedValue:
453
+ nbytes = 0
454
+
455
+ def __len__(self):
456
+ return 0
457
+
458
+ return MockedValue()
459
+
460
+ monkeypatch.setattr(ds, "download_delta_manifest_entry", mock_func)
461
+
462
+ result = estimate_resources_required_to_process_delta(
463
+ delta=parquet_delta_with_manifest,
464
+ operation_type=OperationType.PYARROW_DOWNLOAD,
465
+ deltacat_storage=ds,
466
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
467
+ estimate_resources_params=params,
468
+ )
469
+
470
+ assert parquet_delta_with_manifest.manifest is not None
471
+ assert result.memory_bytes == 0
472
+ assert (
473
+ result.statistics.on_disk_size_bytes
474
+ == parquet_delta_with_manifest.meta.content_length
475
+ )
476
+
440
477
  def test_delta_manifest_utsv_when_file_sampling(
441
478
  self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
442
479
  ):
@@ -328,6 +328,16 @@ def test_compact_partition_incremental(
328
328
  **compaction_audit_obj
329
329
  )
330
330
 
331
+ # assert if RCF covers all files
332
+ if compactor_version != CompactorVersion.V1.value:
333
+ previous_end = None
334
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
335
+ assert (previous_end is None and start == 0) or start == previous_end
336
+ previous_end = end
337
+ assert (
338
+ previous_end == round_completion_info.compacted_pyarrow_write_result.files
339
+ )
340
+
331
341
  tables = ds.download_delta(
332
342
  compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
333
343
  )
@@ -309,6 +309,16 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
309
309
  **compaction_audit_obj
310
310
  )
311
311
 
312
+ # assert if RCF covers all files
313
+ # multiple rounds feature is only supported in V2 compactor
314
+ previous_end = None
315
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
316
+ assert (previous_end is None and start == 0) or start == previous_end
317
+ previous_end = end
318
+ assert (
319
+ previous_end == round_completion_info.compacted_pyarrow_write_result.files
320
+ )
321
+
312
322
  # Assert not in-place compacted
313
323
  assert (
314
324
  execute_compaction_result_spy.call_args.args[-1] is False
@@ -299,6 +299,17 @@ def test_compact_partition_rebase_same_source_and_destination(
299
299
  round_completion_info.compaction_audit_url
300
300
  )
301
301
 
302
+ # assert if RCF covers all files
303
+ if compactor_version != CompactorVersion.V1.value:
304
+ previous_end = None
305
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
306
+ assert (previous_end is None and start == 0) or start == previous_end
307
+ previous_end = end
308
+ assert (
309
+ previous_end
310
+ == round_completion_info.compacted_pyarrow_write_result.files
311
+ )
312
+
302
313
  compaction_audit_obj: Dict[str, Any] = read_s3_contents(
303
314
  s3_resource, audit_bucket, audit_key
304
315
  )
@@ -355,6 +355,16 @@ def test_compact_partition_rebase_then_incremental(
355
355
  compacted_delta_locator_incremental: DeltaLocator = (
356
356
  round_completion_info.compacted_delta_locator
357
357
  )
358
+ # assert if RCF covers all files
359
+ if compactor_version != CompactorVersion.V1.value:
360
+ previous_end = None
361
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
362
+ assert (previous_end is None and start == 0) or start == previous_end
363
+ previous_end = end
364
+ assert (
365
+ previous_end == round_completion_info.compacted_pyarrow_write_result.files
366
+ )
367
+
358
368
  audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
359
369
  "s3://", ""
360
370
  ).split("/", 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.26
3
+ Version: 1.1.27
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=N7LrDYFJUaYdchJUVZ8VN_9QUJzuETzkz-oT833iEr4,1778
1
+ deltacat/__init__.py,sha256=NNgt1N6a4dwztCKl6C7klF3mQEn-S-sBHNZPKPqRHko,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -66,7 +66,7 @@ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViV
66
66
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
67
67
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
68
68
  deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=e8pZFobq6KBCy67ZRn2z1CAwNVjPIJnAiD4HHDmDbCk,30757
69
+ deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=fMWXg1SCIIgjk9p_OFYrcm760dOKNbFO1Lj3_JI3GCY,30929
70
70
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
72
  deltacat/compute/compactor_v2/steps/merge.py,sha256=LpktsDPfj7Of6RgUw9w1f3Y3OBkPDjvtyXjzFaIDoSo,21771
@@ -85,7 +85,7 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
85
85
  deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
87
87
  deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
88
- deltacat/compute/resource_estimation/delta.py,sha256=Ei4v9UYhtcT5P-wNEMAg0E4mYl0z5FpSkaTufVoGD18,9492
88
+ deltacat/compute/resource_estimation/delta.py,sha256=8oRy1rgGUimwMqPB5At81AS-AsjPHdcvLHzJ9TW8RpM,9522
89
89
  deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
90
90
  deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
91
91
  deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
@@ -137,11 +137,11 @@ deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=kW
137
137
  deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFqNaJoqeCuj9xIBjM4Ch2bx-mJcO4BRrLo,16839
138
138
  deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
139
139
  deltacat/tests/compute/compact_partition_test_cases.py,sha256=R9eiKvxCLqcoHjAx3iOogdnXZEO9TvLbRf0wA7bcJN4,26170
140
- deltacat/tests/compute/test_compact_partition_incremental.py,sha256=Z0hyQGhMZjCaOn1Vk4qUbgDiS7HDhtdNeFQyG1PJhqA,14559
141
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=Qw74ajnKf41C3MCMvf4bIPXA6-ucKlPj_IeEqDm8rCg,12503
140
+ deltacat/tests/compute/test_compact_partition_incremental.py,sha256=lkfAraOJmEmieesf7b1BqlfTS26YjYM5xXOXoTMrsos,14989
141
+ deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=xXBA66TTfARR90m5KQs31nmiokuMy9iGQt7Z9evyG7M,12950
142
142
  deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
143
- deltacat/tests/compute/test_compact_partition_rebase.py,sha256=ztSiLgC2OpU4yz81vz-4xWzvZyrLGojtzomsW4q7Bl8,12626
144
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=CHHfNFEJW8S1We7NE1Gg6EaoKEWnaOMRxWrLyirrahc,14643
143
+ deltacat/tests/compute/test_compact_partition_rebase.py,sha256=DNcpmnBo5QoZ23BiIhJCC3zaDK0xClZLUb2-ZEEp5s4,13108
144
+ deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=Rxen3QGIaxVPa8lcO7NDMRxQ0aBjrOKn46LK5ZsfQTo,15073
145
145
  deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
146
146
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
147
147
  deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=Q3HJj1fjoe2JwRUOW8KEjbTqPIIoP2o_T3ZGH6SJnCM,13244
@@ -157,7 +157,7 @@ deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6ip
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
159
159
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
- deltacat/tests/compute/resource_estimation/test_delta.py,sha256=LyzRitBrasQa35Bq7rHTQInaOelSWOSoC0_dyjgpNuE,24505
160
+ deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
161
161
  deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
162
162
  deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
163
163
  deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -210,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
210
210
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
211
211
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
212
212
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
213
- deltacat-1.1.26.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
- deltacat-1.1.26.dist-info/METADATA,sha256=5p2qZYAkOXBNT_rc9PyfGJ5Id3zKfbTp3KhiqZWNxas,1733
215
- deltacat-1.1.26.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
- deltacat-1.1.26.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
- deltacat-1.1.26.dist-info/RECORD,,
213
+ deltacat-1.1.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
+ deltacat-1.1.27.dist-info/METADATA,sha256=VL7sWG3lO3cV3tzwTiCTgpm7h0K5Dh3GtKiqojgSgHI,1733
215
+ deltacat-1.1.27.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
+ deltacat-1.1.27.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
+ deltacat-1.1.27.dist-info/RECORD,,