deltacat 1.1.14__py3-none-any.whl → 1.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/compaction_session.py +3 -2
  3. deltacat/compute/compactor/model/compact_partition_params.py +11 -1
  4. deltacat/compute/compactor/model/compaction_session_audit_info.py +2 -2
  5. deltacat/compute/compactor/model/delta_annotated.py +2 -4
  6. deltacat/compute/compactor/steps/hash_bucket.py +2 -3
  7. deltacat/compute/compactor_v2/compaction_session.py +27 -33
  8. deltacat/compute/compactor_v2/constants.py +4 -0
  9. deltacat/compute/compactor_v2/private/compaction_utils.py +112 -67
  10. deltacat/compute/compactor_v2/steps/merge.py +0 -3
  11. deltacat/compute/compactor_v2/utils/delta.py +2 -3
  12. deltacat/compute/compactor_v2/utils/io.py +0 -2
  13. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  14. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +855 -0
  15. deltacat/tests/compute/compactor_v2/test_compaction_session.py +147 -1
  16. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +330 -0
  17. deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
  18. deltacat/tests/compute/test_util_create_table_deltas_repo.py +118 -0
  19. deltacat/tests/local_deltacat_storage/__init__.py +8 -5
  20. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/METADATA +1 -1
  21. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/RECORD +24 -22
  22. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/LICENSE +0 -0
  23. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/WHEEL +0 -0
  24. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/top_level.txt +0 -0
@@ -87,6 +87,7 @@ class TestCompactionSession:
87
87
  INCREMENTAL_FILE_PATH = (
88
88
  "deltacat/tests/compute/compactor_v2/data/incremental_source_date_pk.csv"
89
89
  )
90
+ ERROR_RATE = 0.05
90
91
 
91
92
  def test_compact_partition_when_no_input_deltas_to_compact(
92
93
  self, local_deltacat_storage_kwargs
@@ -178,7 +179,7 @@ class TestCompactionSession:
178
179
  },
179
180
  "primary_keys": [],
180
181
  "rebase_source_partition_locator": source_delta.partition_locator,
181
- "rebase_source_partition_high_watermark": None,
182
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
182
183
  "records_per_compacted_file": 4000,
183
184
  "s3_client_kwargs": {},
184
185
  "source_partition_locator": source_delta.partition_locator,
@@ -253,3 +254,148 @@ class TestCompactionSession:
253
254
  # as it should be running incremental
254
255
  assert compaction_audit.uniform_deltas_created == 1
255
256
  assert compaction_audit.input_records == 6
257
+
258
+ def test_compact_partition_when_incremental_then_rcf_stats_accurate(
259
+ self, s3_resource, local_deltacat_storage_kwargs
260
+ ):
261
+ """
262
+ A test case which asserts the RCF stats are correctly generated for
263
+ a rebase and incremental use-case.
264
+ """
265
+
266
+ # setup
267
+ staged_source = stage_partition_from_file_paths(
268
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
269
+ )
270
+
271
+ source_delta = commit_delta_to_staged_partition(
272
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
273
+ )
274
+
275
+ staged_dest = stage_partition_from_file_paths(
276
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
277
+ )
278
+ dest_partition = ds.commit_partition(
279
+ staged_dest, **local_deltacat_storage_kwargs
280
+ )
281
+
282
+ # action
283
+ rcf_url = compact_partition(
284
+ CompactPartitionParams.of(
285
+ {
286
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
287
+ "compacted_file_content_type": ContentType.PARQUET,
288
+ "dd_max_parallelism_ratio": 1.0,
289
+ "deltacat_storage": ds,
290
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
291
+ "destination_partition_locator": dest_partition.locator,
292
+ "drop_duplicates": True,
293
+ "hash_bucket_count": 2,
294
+ "last_stream_position_to_compact": source_delta.stream_position,
295
+ "list_deltas_kwargs": {
296
+ **local_deltacat_storage_kwargs,
297
+ **{"equivalent_table_types": []},
298
+ },
299
+ "primary_keys": ["pk"],
300
+ "rebase_source_partition_locator": source_delta.partition_locator,
301
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
302
+ "records_per_compacted_file": 4000,
303
+ "s3_client_kwargs": {},
304
+ "source_partition_locator": source_delta.partition_locator,
305
+ }
306
+ )
307
+ )
308
+
309
+ backfill_rcf = get_rcf(s3_resource, rcf_url)
310
+ _, compaction_audit_key = backfill_rcf.compaction_audit_url.strip(
311
+ "s3://"
312
+ ).split("/", 1)
313
+ compaction_audit = CompactionSessionAuditInfo(
314
+ **read_s3_contents(
315
+ s3_resource, TEST_S3_RCF_BUCKET_NAME, compaction_audit_key
316
+ )
317
+ )
318
+
319
+ assert abs(backfill_rcf.input_inflation - 0.05235042735042735) <= 1e-5
320
+ assert abs(backfill_rcf.input_average_record_size_bytes - 12.25) <= 1e-5
321
+
322
+ assert compaction_audit.input_records == 4
323
+ assert compaction_audit.records_deduped == 0
324
+ assert compaction_audit.records_deleted == 0
325
+ assert compaction_audit.untouched_file_count == 0
326
+ assert compaction_audit.untouched_record_count == 0
327
+ assert compaction_audit.untouched_size_bytes == 0
328
+ assert compaction_audit.untouched_file_ratio == 0
329
+ assert compaction_audit.uniform_deltas_created == 1
330
+ assert compaction_audit.hash_bucket_count == 2
331
+ assert compaction_audit.input_file_count == 1
332
+ assert compaction_audit.output_file_count == 2
333
+ assert abs(compaction_audit.output_size_bytes - 1832) / 1832 <= self.ERROR_RATE
334
+ assert abs(compaction_audit.input_size_bytes - 936) / 936 <= self.ERROR_RATE
335
+
336
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
337
+ new_source_delta = commit_delta_to_partition(
338
+ source_delta.partition_locator,
339
+ [self.INCREMENTAL_FILE_PATH],
340
+ **local_deltacat_storage_kwargs,
341
+ )
342
+
343
+ new_destination_partition = ds.get_partition(
344
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
345
+ )
346
+
347
+ new_rcf_url = compact_partition(
348
+ CompactPartitionParams.of(
349
+ {
350
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
351
+ "compacted_file_content_type": ContentType.PARQUET,
352
+ "dd_max_parallelism_ratio": 1.0,
353
+ "deltacat_storage": ds,
354
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
355
+ "destination_partition_locator": new_destination_partition.locator,
356
+ "drop_duplicates": True,
357
+ "hash_bucket_count": 2,
358
+ "last_stream_position_to_compact": new_source_delta.stream_position,
359
+ "list_deltas_kwargs": {
360
+ **local_deltacat_storage_kwargs,
361
+ **{"equivalent_table_types": []},
362
+ },
363
+ "primary_keys": ["pk"],
364
+ "rebase_source_partition_locator": None,
365
+ "rebase_source_partition_high_watermark": None,
366
+ "records_per_compacted_file": 4000,
367
+ "s3_client_kwargs": {},
368
+ "source_partition_locator": new_source_delta.partition_locator,
369
+ }
370
+ )
371
+ )
372
+
373
+ new_rcf = get_rcf(s3_resource, new_rcf_url)
374
+ _, compaction_audit_key = new_rcf.compaction_audit_url.strip("s3://").split(
375
+ "/", 1
376
+ )
377
+ compaction_audit = CompactionSessionAuditInfo(
378
+ **read_s3_contents(
379
+ s3_resource, TEST_S3_RCF_BUCKET_NAME, compaction_audit_key
380
+ )
381
+ )
382
+
383
+ # as it should be running incremental
384
+ assert abs(new_rcf.input_inflation - 0.027292576419213975) <= 1e-5
385
+ assert abs(new_rcf.input_average_record_size_bytes - 12.5) <= 1e-5
386
+
387
+ assert compaction_audit.input_records == 6
388
+ assert compaction_audit.records_deduped == 1
389
+ assert compaction_audit.records_deleted == 0
390
+ assert compaction_audit.untouched_file_count == 1
391
+ assert compaction_audit.untouched_record_count == 2
392
+ assert (
393
+ abs(compaction_audit.untouched_size_bytes - 916) / 916 <= self.ERROR_RATE
394
+ ) # 5% error
395
+ assert abs(compaction_audit.untouched_file_ratio - 50) <= 1e-5
396
+ assert compaction_audit.uniform_deltas_created == 1
397
+ assert compaction_audit.hash_bucket_count == 2
398
+ assert compaction_audit.input_file_count == 3
399
+ assert compaction_audit.output_file_count == 2
400
+ assert abs(compaction_audit.output_size_bytes - 1843) / 1843 <= self.ERROR_RATE
401
+ assert abs(compaction_audit.input_size_bytes - 2748) / 2748 <= self.ERROR_RATE
@@ -0,0 +1,330 @@
1
+ import ray
2
+ import os
3
+ from moto import mock_s3
4
+ import pytest
5
+ import boto3
6
+ from boto3.resources.base import ServiceResource
7
+ import pyarrow as pa
8
+ from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
9
+ from pytest_benchmark.fixture import BenchmarkFixture
10
+
11
+ from deltacat.tests.compute.test_util_constant import (
12
+ TEST_S3_RCF_BUCKET_NAME,
13
+ DEFAULT_NUM_WORKERS,
14
+ DEFAULT_WORKER_INSTANCE_CPUS,
15
+ )
16
+ from deltacat.tests.compute.test_util_common import (
17
+ get_rcf,
18
+ )
19
+ from deltacat.tests.test_utils.utils import read_s3_contents
20
+ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
21
+ from deltacat.tests.compute.test_util_common import (
22
+ get_compacted_delta_locator_from_rcf,
23
+ )
24
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
25
+ CompactionSessionAuditInfo,
26
+ )
27
+ from deltacat.tests.compute.test_util_create_table_deltas_repo import (
28
+ multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy,
29
+ )
30
+ from deltacat.tests.compute.compact_partition_multiple_rounds_test_cases import (
31
+ MULTIPLE_ROUNDS_TEST_CASES,
32
+ )
33
+ from typing import Any, Callable, Dict, List, Optional, Set
34
+ from deltacat.types.media import StorageType
35
+ from deltacat.storage import (
36
+ DeltaLocator,
37
+ Partition,
38
+ )
39
+ from deltacat.types.media import ContentType
40
+ from deltacat.compute.compactor.model.compact_partition_params import (
41
+ CompactPartitionParams,
42
+ )
43
+ from deltacat.compute.compactor import (
44
+ RoundCompletionInfo,
45
+ )
46
+ from deltacat.utils.placement import (
47
+ PlacementGroupManager,
48
+ )
49
+
50
+ DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
51
+ "db_file_path",
52
+ "deltacat/tests/local_deltacat_storage/db_test.sqlite",
53
+ )
54
+
55
+
56
+ """
57
+ MODULE scoped fixtures
58
+ """
59
+
60
+
61
+ @pytest.fixture(autouse=True, scope="module")
62
+ def setup_ray_cluster():
63
+ ray.init(local_mode=True, ignore_reinit_error=True)
64
+ yield
65
+ ray.shutdown()
66
+
67
+
68
+ @pytest.fixture(autouse=True, scope="module")
69
+ def mock_aws_credential():
70
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
71
+ os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
72
+ os.environ["AWS_SECURITY_TOKEN"] = "testing"
73
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
74
+ os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
75
+ yield
76
+
77
+
78
+ @pytest.fixture(autouse=True, scope="module")
79
+ def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
80
+ # make sure the database file is deleted after all the compactor package tests are completed
81
+ if os.path.exists(DATABASE_FILE_PATH_VALUE):
82
+ os.remove(DATABASE_FILE_PATH_VALUE)
83
+
84
+
85
+ @pytest.fixture(scope="module")
86
+ def s3_resource(mock_aws_credential):
87
+ with mock_s3():
88
+ yield boto3.resource("s3")
89
+
90
+
91
+ @pytest.fixture(autouse=True, scope="module")
92
+ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
93
+ s3_resource.create_bucket(
94
+ ACL="authenticated-read",
95
+ Bucket=TEST_S3_RCF_BUCKET_NAME,
96
+ )
97
+ yield
98
+
99
+
100
+ """
101
+ FUNCTION scoped fixtures
102
+ """
103
+
104
+
105
+ @pytest.fixture(scope="function")
106
+ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
107
+ # see deltacat/tests/local_deltacat_storage/README.md for documentation
108
+ kwargs_for_local_deltacat_storage: Dict[str, Any] = {
109
+ DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
110
+ }
111
+ yield kwargs_for_local_deltacat_storage
112
+ if os.path.exists(DATABASE_FILE_PATH_VALUE):
113
+ os.remove(DATABASE_FILE_PATH_VALUE)
114
+
115
+
116
+ @pytest.mark.parametrize(
117
+ [
118
+ "test_name",
119
+ "primary_keys",
120
+ "sort_keys",
121
+ "partition_keys_param",
122
+ "partition_values_param",
123
+ "input_deltas_param",
124
+ "expected_terminal_compact_partition_result",
125
+ "expected_terminal_exception",
126
+ "expected_terminal_exception_message",
127
+ "create_placement_group_param",
128
+ "records_per_compacted_file_param",
129
+ "hash_bucket_count_param",
130
+ "read_kwargs_provider_param",
131
+ "drop_duplicates_param",
132
+ "skip_enabled_compact_partition_drivers",
133
+ "assert_compaction_audit",
134
+ "rebase_expected_compact_partition_result",
135
+ "num_rounds_param",
136
+ "compact_partition_func",
137
+ "compactor_version",
138
+ ],
139
+ [
140
+ (
141
+ test_name,
142
+ primary_keys,
143
+ sort_keys,
144
+ partition_keys_param,
145
+ partition_values_param,
146
+ input_deltas,
147
+ expected_terminal_compact_partition_result,
148
+ expected_terminal_exception,
149
+ expected_terminal_exception_message,
150
+ create_placement_group_param,
151
+ records_per_compacted_file_param,
152
+ hash_bucket_count_param,
153
+ drop_duplicates_param,
154
+ read_kwargs_provider,
155
+ skip_enabled_compact_partition_drivers,
156
+ assert_compaction_audit,
157
+ rebase_expected_compact_partition_result,
158
+ num_rounds_param,
159
+ compact_partition_func,
160
+ compactor_version,
161
+ )
162
+ for test_name, (
163
+ primary_keys,
164
+ sort_keys,
165
+ partition_keys_param,
166
+ partition_values_param,
167
+ input_deltas,
168
+ expected_terminal_compact_partition_result,
169
+ expected_terminal_exception,
170
+ expected_terminal_exception_message,
171
+ create_placement_group_param,
172
+ records_per_compacted_file_param,
173
+ hash_bucket_count_param,
174
+ drop_duplicates_param,
175
+ read_kwargs_provider,
176
+ skip_enabled_compact_partition_drivers,
177
+ assert_compaction_audit,
178
+ rebase_expected_compact_partition_result,
179
+ num_rounds_param,
180
+ compact_partition_func,
181
+ compactor_version,
182
+ ) in MULTIPLE_ROUNDS_TEST_CASES.items()
183
+ ],
184
+ ids=[test_name for test_name in MULTIPLE_ROUNDS_TEST_CASES],
185
+ )
186
+ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
187
+ mocker,
188
+ s3_resource: ServiceResource,
189
+ local_deltacat_storage_kwargs: Dict[str, Any],
190
+ test_name: str,
191
+ primary_keys: Set[str],
192
+ sort_keys: List[Optional[Any]],
193
+ partition_keys_param: Optional[List[Any]],
194
+ partition_values_param: List[Optional[str]],
195
+ input_deltas_param: List[pa.Array],
196
+ expected_terminal_compact_partition_result: pa.Table,
197
+ expected_terminal_exception: BaseException,
198
+ expected_terminal_exception_message: Optional[str],
199
+ create_placement_group_param: bool,
200
+ records_per_compacted_file_param: int,
201
+ hash_bucket_count_param: int,
202
+ drop_duplicates_param: bool,
203
+ read_kwargs_provider_param: Any,
204
+ rebase_expected_compact_partition_result: pa.Table,
205
+ skip_enabled_compact_partition_drivers: List[CompactorVersion],
206
+ assert_compaction_audit: Optional[Callable],
207
+ compactor_version: Optional[CompactorVersion],
208
+ compact_partition_func: Callable,
209
+ num_rounds_param: int,
210
+ benchmark: BenchmarkFixture,
211
+ ):
212
+ import deltacat.tests.local_deltacat_storage as ds
213
+
214
+ ds_mock_kwargs = local_deltacat_storage_kwargs
215
+ """
216
+ This test tests different multi-round compaction rebase configurations,
217
+ as specified in compact_partition_multiple_rounds_test_cases.py
218
+ These tests do not test multi-round compaction backfill, which is
219
+ currently unsupported.
220
+ """
221
+ (
222
+ source_table_stream,
223
+ _,
224
+ rebased_table_stream,
225
+ _,
226
+ ) = multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
227
+ primary_keys,
228
+ sort_keys,
229
+ partition_keys_param,
230
+ input_deltas_param,
231
+ partition_values_param,
232
+ ds_mock_kwargs,
233
+ )
234
+ source_partition: Partition = ds.get_partition(
235
+ source_table_stream.locator,
236
+ partition_values_param,
237
+ **ds_mock_kwargs,
238
+ )
239
+ rebased_partition: Partition = ds.get_partition(
240
+ rebased_table_stream.locator,
241
+ partition_values_param,
242
+ **ds_mock_kwargs,
243
+ )
244
+ total_cpus = DEFAULT_NUM_WORKERS * DEFAULT_WORKER_INSTANCE_CPUS
245
+ pgm = None
246
+ if create_placement_group_param:
247
+ pgm = PlacementGroupManager(
248
+ 1, total_cpus, DEFAULT_WORKER_INSTANCE_CPUS, memory_per_bundle=4000000
249
+ ).pgs[0]
250
+ compact_partition_params = CompactPartitionParams.of(
251
+ {
252
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
253
+ "compacted_file_content_type": ContentType.PARQUET,
254
+ "dd_max_parallelism_ratio": 1.0,
255
+ "deltacat_storage": ds,
256
+ "deltacat_storage_kwargs": ds_mock_kwargs,
257
+ "destination_partition_locator": rebased_partition.locator,
258
+ "hash_bucket_count": hash_bucket_count_param,
259
+ "last_stream_position_to_compact": source_partition.stream_position,
260
+ "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
261
+ "object_store": RayPlasmaObjectStore(),
262
+ "pg_config": pgm,
263
+ "primary_keys": primary_keys,
264
+ "read_kwargs_provider": read_kwargs_provider_param,
265
+ "rebase_source_partition_locator": source_partition.locator,
266
+ "rebase_source_partition_high_watermark": rebased_partition.stream_position,
267
+ "records_per_compacted_file": records_per_compacted_file_param,
268
+ "s3_client_kwargs": {},
269
+ "source_partition_locator": rebased_partition.locator,
270
+ "sort_keys": sort_keys if sort_keys else None,
271
+ "num_rounds": num_rounds_param,
272
+ "drop_duplicates": drop_duplicates_param,
273
+ "min_delta_bytes": 560,
274
+ }
275
+ )
276
+ if expected_terminal_exception:
277
+ with pytest.raises(expected_terminal_exception) as exc_info:
278
+ benchmark(compact_partition_func, compact_partition_params)
279
+ assert expected_terminal_exception_message in str(exc_info.value)
280
+ return
281
+ from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
282
+ ExecutionCompactionResult,
283
+ )
284
+
285
+ execute_compaction_result_spy = mocker.spy(ExecutionCompactionResult, "__init__")
286
+
287
+ # execute
288
+ rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
289
+
290
+ round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
291
+ audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
292
+ round_completion_info.compaction_audit_url
293
+ )
294
+
295
+ compaction_audit_obj: Dict[str, Any] = read_s3_contents(
296
+ s3_resource, audit_bucket, audit_key
297
+ )
298
+ compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
299
+ **compaction_audit_obj
300
+ )
301
+
302
+ # Assert not in-place compacted
303
+ assert (
304
+ execute_compaction_result_spy.call_args.args[-1] is False
305
+ ), "Table version erroneously marked as in-place compacted!"
306
+ compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
307
+ s3_resource, rcf_file_s3_uri
308
+ )
309
+ tables = ds.download_delta(
310
+ compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
311
+ )
312
+ actual_rebase_compacted_table = pa.concat_tables(tables)
313
+ # if no primary key is specified then sort by sort_key for consistent assertion
314
+ sorting_cols: List[Any] = (
315
+ [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
316
+ )
317
+ rebase_expected_compact_partition_result = (
318
+ rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
319
+ )
320
+ actual_rebase_compacted_table = (
321
+ actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
322
+ )
323
+ assert actual_rebase_compacted_table.equals(
324
+ rebase_expected_compact_partition_result
325
+ ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
326
+
327
+ if assert_compaction_audit:
328
+ if not assert_compaction_audit(compactor_version, compaction_audit):
329
+ assert False, "Compaction audit assertion failed"
330
+ return
@@ -279,7 +279,7 @@ def test_compact_partition_rebase_same_source_and_destination(
279
279
  execute_compaction_result_spy = mocker.spy(ExecutionCompactionResult, "__init__")
280
280
 
281
281
  # execute
282
- rcf_file_s3_uri = compact_partition_func(compact_partition_params)
282
+ rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
283
283
 
284
284
  round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
285
285
  audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
@@ -269,3 +269,121 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
269
269
  destination_table_stream,
270
270
  rebased_stream_after_committed,
271
271
  )
272
+
273
+
274
+ def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
275
+ primary_keys: Set[str],
276
+ sort_keys: Optional[List[Any]],
277
+ partition_keys: Optional[List[PartitionKey]],
278
+ input_deltas: List[pa.Table],
279
+ partition_values: Optional[List[Any]],
280
+ ds_mock_kwargs: Optional[Dict[str, Any]],
281
+ ) -> Tuple[Stream, Stream, Optional[Stream], bool]:
282
+ import deltacat.tests.local_deltacat_storage as ds
283
+ from deltacat.storage import Partition, Stream
284
+
285
+ source_namespace, source_table_name, source_table_version = create_src_table(
286
+ primary_keys, sort_keys, partition_keys, ds_mock_kwargs
287
+ )
288
+
289
+ source_table_stream: Stream = ds.get_stream(
290
+ namespace=source_namespace,
291
+ table_name=source_table_name,
292
+ table_version=source_table_version,
293
+ **ds_mock_kwargs,
294
+ )
295
+ staged_partition: Partition = ds.stage_partition(
296
+ source_table_stream, partition_values, **ds_mock_kwargs
297
+ )
298
+ is_delete = False
299
+ input_delta_length = 0
300
+ for (
301
+ input_delta,
302
+ input_delta_type,
303
+ input_delta_parameters,
304
+ ) in input_deltas:
305
+ if input_delta_type is DeltaType.DELETE:
306
+ is_delete = True
307
+ staged_delta = ds.stage_delta(
308
+ input_delta,
309
+ staged_partition,
310
+ input_delta_type,
311
+ delete_parameters=input_delta_parameters,
312
+ **ds_mock_kwargs,
313
+ )
314
+ ds.commit_delta(
315
+ staged_delta,
316
+ **ds_mock_kwargs,
317
+ )
318
+ input_delta_length += len(input_delta) if input_delta else 0
319
+ ds.commit_partition(staged_partition, **ds_mock_kwargs)
320
+ source_table_stream_after_committed: Stream = ds.get_stream(
321
+ namespace=source_namespace,
322
+ table_name=source_table_name,
323
+ table_version=source_table_version,
324
+ **ds_mock_kwargs,
325
+ )
326
+ # create the destination table
327
+ (
328
+ destination_table_namespace,
329
+ destination_table_name,
330
+ destination_table_version,
331
+ ) = create_destination_table(
332
+ primary_keys, sort_keys, partition_keys, ds_mock_kwargs
333
+ )
334
+ # create the rebase table
335
+ (
336
+ rebase_table_namespace,
337
+ rebase_table_name,
338
+ rebase_table_version,
339
+ ) = create_rebase_table(primary_keys, sort_keys, partition_keys, ds_mock_kwargs)
340
+ rebasing_table_stream: Stream = ds.get_stream(
341
+ namespace=rebase_table_namespace,
342
+ table_name=rebase_table_name,
343
+ table_version=rebase_table_version,
344
+ **ds_mock_kwargs,
345
+ )
346
+ staged_partition: Partition = ds.stage_partition(
347
+ rebasing_table_stream, partition_values, **ds_mock_kwargs
348
+ )
349
+ input_delta_length = 0
350
+ for (
351
+ input_delta,
352
+ input_delta_type,
353
+ input_delta_parameters,
354
+ ) in input_deltas:
355
+ if input_delta_type is DeltaType.DELETE:
356
+ is_delete = True
357
+ staged_delta = ds.stage_delta(
358
+ input_delta,
359
+ staged_partition,
360
+ input_delta_type,
361
+ delete_parameters=input_delta_parameters,
362
+ **ds_mock_kwargs,
363
+ )
364
+ ds.commit_delta(
365
+ staged_delta,
366
+ **ds_mock_kwargs,
367
+ )
368
+ input_delta_length += len(input_delta) if input_delta else 0
369
+ ds.commit_partition(staged_partition, **ds_mock_kwargs)
370
+
371
+ # get streams
372
+ destination_table_stream: Stream = ds.get_stream(
373
+ namespace=destination_table_namespace,
374
+ table_name=destination_table_name,
375
+ table_version=destination_table_version,
376
+ **ds_mock_kwargs,
377
+ )
378
+ rebased_stream_after_committed: Stream = ds.get_stream(
379
+ namespace=rebase_table_namespace,
380
+ table_name=rebase_table_name,
381
+ table_version=rebase_table_version,
382
+ **ds_mock_kwargs,
383
+ )
384
+ return (
385
+ source_table_stream_after_committed,
386
+ destination_table_stream,
387
+ rebased_stream_after_committed,
388
+ is_delete,
389
+ )
@@ -394,12 +394,13 @@ def download_delta(
394
394
  **kwargs,
395
395
  ) -> Union[LocalDataset, DistributedDataset]: # type: ignore
396
396
  result = []
397
- manifest = get_delta_manifest(delta_like, *args, **kwargs)
398
-
397
+ if isinstance(delta_like, Delta) and delta_like.manifest is not None:
398
+ manifest = Delta(delta_like).manifest
399
+ else:
400
+ manifest = get_delta_manifest(delta_like, *args, **kwargs)
399
401
  partition_values: PartitionValues = None
400
402
  if partition_filter is not None:
401
403
  partition_values = partition_filter.partition_values
402
-
403
404
  for entry_index in range(len(manifest.entries)):
404
405
  if (
405
406
  partition_values is not None
@@ -440,8 +441,10 @@ def download_delta_manifest_entry(
440
441
  **kwargs,
441
442
  ) -> LocalTable:
442
443
  cur, con = _get_sqlite3_cursor_con(kwargs)
443
-
444
- manifest = get_delta_manifest(delta_like, *args, **kwargs)
444
+ if isinstance(delta_like, Delta) and delta_like.manifest is not None:
445
+ manifest = Delta(delta_like).manifest
446
+ else:
447
+ manifest = get_delta_manifest(delta_like, *args, **kwargs)
445
448
  if entry_index >= len(manifest.entries):
446
449
  raise IndexError(
447
450
  f"Manifest entry index {entry_index} does not exist. "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.14
3
+ Version: 1.1.16
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team