deltacat 1.1.26__py3-none-any.whl → 1.1.27__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.26"
47
+ __version__ = "1.1.27"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -584,8 +584,11 @@ def _process_merge_results(
584
584
  f"Duplicate record count ({duplicate_hash_bucket_mat_results}) is as large "
585
585
  f"as or greater than params.num_rounds, which is {params.num_rounds}"
586
586
  )
587
+ # ensure start index is the first file index if task index is same
587
588
  hb_id_to_entry_indices_range[str(mat_result.task_index)] = (
588
- file_index,
589
+ hb_id_to_entry_indices_range.get(str(mat_result.task_index), [file_index])[
590
+ 0
591
+ ],
589
592
  file_index + mat_result.pyarrow_write_result.files,
590
593
  )
591
594
 
@@ -188,7 +188,7 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
188
188
  sampled_on_disk_size += delta.manifest.entries[entry_index].meta.content_length
189
189
  sampled_num_rows += len(tbl)
190
190
 
191
- if not sampled_on_disk_size:
191
+ if not sampled_on_disk_size or not sampled_in_memory_size:
192
192
  return EstimatedResources.of(
193
193
  memory_bytes=0,
194
194
  statistics=Statistics.of(
@@ -437,6 +437,43 @@ class TestEstimateResourcesRequiredToProcessDelta:
437
437
  == parquet_delta_with_manifest.meta.content_length
438
438
  )
439
439
 
440
+ def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
441
+ self,
442
+ local_deltacat_storage_kwargs,
443
+ parquet_delta_with_manifest: Delta,
444
+ monkeypatch,
445
+ ):
446
+ params = EstimateResourcesParams.of(
447
+ resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
448
+ max_files_to_sample=2,
449
+ )
450
+
451
+ def mock_func(*args, **kwargs):
452
+ class MockedValue:
453
+ nbytes = 0
454
+
455
+ def __len__(self):
456
+ return 0
457
+
458
+ return MockedValue()
459
+
460
+ monkeypatch.setattr(ds, "download_delta_manifest_entry", mock_func)
461
+
462
+ result = estimate_resources_required_to_process_delta(
463
+ delta=parquet_delta_with_manifest,
464
+ operation_type=OperationType.PYARROW_DOWNLOAD,
465
+ deltacat_storage=ds,
466
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
467
+ estimate_resources_params=params,
468
+ )
469
+
470
+ assert parquet_delta_with_manifest.manifest is not None
471
+ assert result.memory_bytes == 0
472
+ assert (
473
+ result.statistics.on_disk_size_bytes
474
+ == parquet_delta_with_manifest.meta.content_length
475
+ )
476
+
440
477
  def test_delta_manifest_utsv_when_file_sampling(
441
478
  self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
442
479
  ):
@@ -328,6 +328,16 @@ def test_compact_partition_incremental(
328
328
  **compaction_audit_obj
329
329
  )
330
330
 
331
+ # assert if RCF covers all files
332
+ if compactor_version != CompactorVersion.V1.value:
333
+ previous_end = None
334
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
335
+ assert (previous_end is None and start == 0) or start == previous_end
336
+ previous_end = end
337
+ assert (
338
+ previous_end == round_completion_info.compacted_pyarrow_write_result.files
339
+ )
340
+
331
341
  tables = ds.download_delta(
332
342
  compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
333
343
  )
@@ -309,6 +309,16 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
309
309
  **compaction_audit_obj
310
310
  )
311
311
 
312
+ # assert if RCF covers all files
313
+ # multiple rounds feature is only supported in V2 compactor
314
+ previous_end = None
315
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
316
+ assert (previous_end is None and start == 0) or start == previous_end
317
+ previous_end = end
318
+ assert (
319
+ previous_end == round_completion_info.compacted_pyarrow_write_result.files
320
+ )
321
+
312
322
  # Assert not in-place compacted
313
323
  assert (
314
324
  execute_compaction_result_spy.call_args.args[-1] is False
@@ -299,6 +299,17 @@ def test_compact_partition_rebase_same_source_and_destination(
299
299
  round_completion_info.compaction_audit_url
300
300
  )
301
301
 
302
+ # assert if RCF covers all files
303
+ if compactor_version != CompactorVersion.V1.value:
304
+ previous_end = None
305
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
306
+ assert (previous_end is None and start == 0) or start == previous_end
307
+ previous_end = end
308
+ assert (
309
+ previous_end
310
+ == round_completion_info.compacted_pyarrow_write_result.files
311
+ )
312
+
302
313
  compaction_audit_obj: Dict[str, Any] = read_s3_contents(
303
314
  s3_resource, audit_bucket, audit_key
304
315
  )
@@ -355,6 +355,16 @@ def test_compact_partition_rebase_then_incremental(
355
355
  compacted_delta_locator_incremental: DeltaLocator = (
356
356
  round_completion_info.compacted_delta_locator
357
357
  )
358
+ # assert if RCF covers all files
359
+ if compactor_version != CompactorVersion.V1.value:
360
+ previous_end = None
361
+ for start, end in round_completion_info.hb_index_to_entry_range.values():
362
+ assert (previous_end is None and start == 0) or start == previous_end
363
+ previous_end = end
364
+ assert (
365
+ previous_end == round_completion_info.compacted_pyarrow_write_result.files
366
+ )
367
+
358
368
  audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
359
369
  "s3://", ""
360
370
  ).split("/", 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.26
3
+ Version: 1.1.27
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=N7LrDYFJUaYdchJUVZ8VN_9QUJzuETzkz-oT833iEr4,1778
1
+ deltacat/__init__.py,sha256=NNgt1N6a4dwztCKl6C7klF3mQEn-S-sBHNZPKPqRHko,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -66,7 +66,7 @@ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViV
66
66
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
67
67
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
68
68
  deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=e8pZFobq6KBCy67ZRn2z1CAwNVjPIJnAiD4HHDmDbCk,30757
69
+ deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=fMWXg1SCIIgjk9p_OFYrcm760dOKNbFO1Lj3_JI3GCY,30929
70
70
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
72
  deltacat/compute/compactor_v2/steps/merge.py,sha256=LpktsDPfj7Of6RgUw9w1f3Y3OBkPDjvtyXjzFaIDoSo,21771
@@ -85,7 +85,7 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
85
85
  deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
87
87
  deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
88
- deltacat/compute/resource_estimation/delta.py,sha256=Ei4v9UYhtcT5P-wNEMAg0E4mYl0z5FpSkaTufVoGD18,9492
88
+ deltacat/compute/resource_estimation/delta.py,sha256=8oRy1rgGUimwMqPB5At81AS-AsjPHdcvLHzJ9TW8RpM,9522
89
89
  deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
90
90
  deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
91
91
  deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
@@ -137,11 +137,11 @@ deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=kW
137
137
  deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFqNaJoqeCuj9xIBjM4Ch2bx-mJcO4BRrLo,16839
138
138
  deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
139
139
  deltacat/tests/compute/compact_partition_test_cases.py,sha256=R9eiKvxCLqcoHjAx3iOogdnXZEO9TvLbRf0wA7bcJN4,26170
140
- deltacat/tests/compute/test_compact_partition_incremental.py,sha256=Z0hyQGhMZjCaOn1Vk4qUbgDiS7HDhtdNeFQyG1PJhqA,14559
141
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=Qw74ajnKf41C3MCMvf4bIPXA6-ucKlPj_IeEqDm8rCg,12503
140
+ deltacat/tests/compute/test_compact_partition_incremental.py,sha256=lkfAraOJmEmieesf7b1BqlfTS26YjYM5xXOXoTMrsos,14989
141
+ deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=xXBA66TTfARR90m5KQs31nmiokuMy9iGQt7Z9evyG7M,12950
142
142
  deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
143
- deltacat/tests/compute/test_compact_partition_rebase.py,sha256=ztSiLgC2OpU4yz81vz-4xWzvZyrLGojtzomsW4q7Bl8,12626
144
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=CHHfNFEJW8S1We7NE1Gg6EaoKEWnaOMRxWrLyirrahc,14643
143
+ deltacat/tests/compute/test_compact_partition_rebase.py,sha256=DNcpmnBo5QoZ23BiIhJCC3zaDK0xClZLUb2-ZEEp5s4,13108
144
+ deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=Rxen3QGIaxVPa8lcO7NDMRxQ0aBjrOKn46LK5ZsfQTo,15073
145
145
  deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
146
146
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
147
147
  deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=Q3HJj1fjoe2JwRUOW8KEjbTqPIIoP2o_T3ZGH6SJnCM,13244
@@ -157,7 +157,7 @@ deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6ip
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
159
159
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
- deltacat/tests/compute/resource_estimation/test_delta.py,sha256=LyzRitBrasQa35Bq7rHTQInaOelSWOSoC0_dyjgpNuE,24505
160
+ deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
161
161
  deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
162
162
  deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
163
163
  deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -210,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
210
210
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
211
211
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
212
212
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
213
- deltacat-1.1.26.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
- deltacat-1.1.26.dist-info/METADATA,sha256=5p2qZYAkOXBNT_rc9PyfGJ5Id3zKfbTp3KhiqZWNxas,1733
215
- deltacat-1.1.26.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
- deltacat-1.1.26.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
- deltacat-1.1.26.dist-info/RECORD,,
213
+ deltacat-1.1.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
+ deltacat-1.1.27.dist-info/METADATA,sha256=VL7sWG3lO3cV3tzwTiCTgpm7h0K5Dh3GtKiqojgSgHI,1733
215
+ deltacat-1.1.27.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
+ deltacat-1.1.27.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
+ deltacat-1.1.27.dist-info/RECORD,,