deltacat 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/constants.py +21 -4
  3. deltacat/aws/s3u.py +48 -21
  4. deltacat/compute/compactor/model/round_completion_info.py +4 -0
  5. deltacat/compute/compactor_v2/compaction_session.py +51 -25
  6. deltacat/compute/compactor_v2/constants.py +12 -0
  7. deltacat/compute/compactor_v2/model/compaction_session.py +21 -0
  8. deltacat/compute/compactor_v2/steps/hash_bucket.py +6 -0
  9. deltacat/compute/compactor_v2/steps/merge.py +6 -0
  10. deltacat/compute/compactor_v2/utils/task_options.py +4 -1
  11. deltacat/storage/interface.py +10 -3
  12. deltacat/tests/aws/test_s3u.py +84 -3
  13. deltacat/tests/catalog/test_default_catalog_impl.py +2 -0
  14. deltacat/tests/compute/compact_partition_test_cases.py +61 -0
  15. deltacat/tests/compute/compactor_v2/test_compaction_session.py +2 -0
  16. deltacat/tests/compute/test_compact_partition_incremental.py +89 -32
  17. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +21 -26
  18. deltacat/tests/compute/test_util_create_table_deltas_repo.py +45 -2
  19. deltacat/tests/local_deltacat_storage/__init__.py +38 -19
  20. deltacat/tests/utils/ray_utils/__init__.py +0 -0
  21. deltacat/tests/utils/ray_utils/test_concurrency.py +50 -0
  22. deltacat/tests/utils/test_resources.py +28 -0
  23. deltacat/utils/resources.py +45 -0
  24. {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/METADATA +5 -6
  25. {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/RECORD +28 -25
  26. {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/WHEEL +1 -1
  27. {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/LICENSE +0 -0
  28. {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ from deltacat.storage import (
22
22
  from deltacat.compute.compactor_v2.compaction_session import (
23
23
  compact_partition as compact_partition_v2,
24
24
  )
25
+ from deltacat.storage import DeleteParameters
25
26
 
26
27
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
27
28
 
@@ -89,9 +90,13 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
89
90
  """
90
91
  Args:
91
92
  is_inplace: bool - argument to indicate whether to try compacting an in-place compacted table (the source table is the destination table). Also needed to control whether the destination table is created
93
+ add_late_deltas: List[Tuple[pa.Table, DeltaType]] - argument to indicate whether to add deltas to the source_partition after we've triggered compaction
92
94
  """
93
95
 
94
96
  is_inplace: bool
97
+ add_late_deltas: Optional[
98
+ List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
99
+ ]
95
100
 
96
101
 
97
102
  @dataclass(frozen=True)
@@ -148,6 +153,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
148
153
  read_kwargs_provider=None,
149
154
  drop_duplicates=True,
150
155
  is_inplace=False,
156
+ add_late_deltas=None,
151
157
  skip_enabled_compact_partition_drivers=None,
152
158
  ),
153
159
  "2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
@@ -175,6 +181,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
175
181
  read_kwargs_provider=None,
176
182
  drop_duplicates=True,
177
183
  is_inplace=False,
184
+ add_late_deltas=None,
178
185
  skip_enabled_compact_partition_drivers=None,
179
186
  ),
180
187
  "3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
@@ -211,6 +218,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
211
218
  read_kwargs_provider=None,
212
219
  drop_duplicates=True,
213
220
  is_inplace=False,
221
+ add_late_deltas=None,
214
222
  skip_enabled_compact_partition_drivers=None,
215
223
  ),
216
224
  "4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
@@ -246,6 +254,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
246
254
  read_kwargs_provider=None,
247
255
  drop_duplicates=True,
248
256
  is_inplace=False,
257
+ add_late_deltas=None,
249
258
  skip_enabled_compact_partition_drivers=None,
250
259
  ),
251
260
  "5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
@@ -276,6 +285,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
276
285
  read_kwargs_provider=None,
277
286
  drop_duplicates=True,
278
287
  is_inplace=False,
288
+ add_late_deltas=None,
279
289
  skip_enabled_compact_partition_drivers=None,
280
290
  ),
281
291
  "6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
@@ -306,6 +316,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
306
316
  read_kwargs_provider=None,
307
317
  drop_duplicates=True,
308
318
  is_inplace=False,
319
+ add_late_deltas=None,
309
320
  skip_enabled_compact_partition_drivers=None,
310
321
  ),
311
322
  "7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
@@ -336,6 +347,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
336
347
  read_kwargs_provider=None,
337
348
  drop_duplicates=True,
338
349
  is_inplace=False,
350
+ add_late_deltas=None,
339
351
  skip_enabled_compact_partition_drivers=None,
340
352
  ),
341
353
  "8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
@@ -368,6 +380,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
368
380
  read_kwargs_provider=None,
369
381
  drop_duplicates=True,
370
382
  is_inplace=False,
383
+ add_late_deltas=None,
371
384
  skip_enabled_compact_partition_drivers=None,
372
385
  ),
373
386
  "9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
@@ -398,6 +411,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
398
411
  read_kwargs_provider=None,
399
412
  drop_duplicates=True,
400
413
  is_inplace=False,
414
+ add_late_deltas=None,
401
415
  skip_enabled_compact_partition_drivers=None,
402
416
  ),
403
417
  "10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
@@ -428,6 +442,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
428
442
  read_kwargs_provider=None,
429
443
  drop_duplicates=True,
430
444
  is_inplace=False,
445
+ add_late_deltas=None,
431
446
  skip_enabled_compact_partition_drivers=None,
432
447
  ),
433
448
  "11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
@@ -458,6 +473,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
458
473
  read_kwargs_provider=None,
459
474
  drop_duplicates=True,
460
475
  is_inplace=False,
476
+ add_late_deltas=None,
461
477
  skip_enabled_compact_partition_drivers=None,
462
478
  ),
463
479
  "12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
@@ -488,6 +504,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
488
504
  read_kwargs_provider=None,
489
505
  drop_duplicates=True,
490
506
  is_inplace=False,
507
+ add_late_deltas=None,
491
508
  skip_enabled_compact_partition_drivers=None,
492
509
  ),
493
510
  "13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
@@ -518,6 +535,50 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
518
535
  read_kwargs_provider=None,
519
536
  drop_duplicates=True,
520
537
  is_inplace=True,
538
+ add_late_deltas=[
539
+ (
540
+ pa.Table.from_arrays(
541
+ [
542
+ pa.array([str(i) for i in range(20)]),
543
+ pa.array([i for i in range(20)]),
544
+ ],
545
+ names=["pk_col_1", "sk_col_1"],
546
+ ),
547
+ DeltaType.UPSERT,
548
+ None,
549
+ )
550
+ ],
551
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
552
+ ),
553
+ "14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
554
+ primary_keys={"pk_col_1"},
555
+ sort_keys=[SortKey.of(key_name="sk_col_1")],
556
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
557
+ partition_values=["1"],
558
+ input_deltas=pa.Table.from_arrays(
559
+ [
560
+ pa.array([str(i) for i in range(10)]),
561
+ pa.array([i for i in range(10)]),
562
+ ],
563
+ names=["pk_col_1", "sk_col_1"],
564
+ ),
565
+ input_deltas_delta_type=DeltaType.UPSERT,
566
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
567
+ [
568
+ pa.array([str(i) for i in range(10)]),
569
+ pa.array([i for i in range(10)]),
570
+ ],
571
+ names=["pk_col_1", "sk_col_1"],
572
+ ),
573
+ expected_terminal_exception=AssertionError,
574
+ expected_terminal_exception_message="hash_bucket_count is a required arg for compactor v2",
575
+ do_create_placement_group=False,
576
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
577
+ hash_bucket_count=None,
578
+ read_kwargs_provider=None,
579
+ drop_duplicates=True,
580
+ is_inplace=False,
581
+ add_late_deltas=False,
521
582
  skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
522
583
  ),
523
584
  }
@@ -35,6 +35,8 @@ class TestCompactionSession(unittest.TestCase):
35
35
  @classmethod
36
36
  def doClassCleanups(cls) -> None:
37
37
  os.remove(cls.DB_FILE_PATH)
38
+ ray.shutdown()
39
+ super().tearDownClass()
38
40
 
39
41
  @patch("deltacat.compute.compactor_v2.compaction_session.rcf")
40
42
  @patch("deltacat.compute.compactor_v2.compaction_session.s3_utils")
@@ -2,8 +2,9 @@ import ray
2
2
  from moto import mock_s3
3
3
  import pytest
4
4
  import os
5
+ import logging
5
6
  import boto3
6
- from typing import Any, Callable, Dict, List, Optional, Set
7
+ from typing import Any, Callable, Dict, List, Optional, Set, Tuple
7
8
  from boto3.resources.base import ServiceResource
8
9
  import pyarrow as pa
9
10
  from pytest_benchmark.fixture import BenchmarkFixture
@@ -15,6 +16,7 @@ from deltacat.tests.compute.test_util_common import (
15
16
  from deltacat.tests.test_utils.utils import read_s3_contents
16
17
  from deltacat.tests.compute.test_util_create_table_deltas_repo import (
17
18
  create_src_w_deltas_destination_plus_destination,
19
+ add_late_deltas_to_partition,
18
20
  )
19
21
  from deltacat.tests.compute.compact_partition_test_cases import (
20
22
  INCREMENTAL_TEST_CASES,
@@ -27,12 +29,33 @@ from deltacat.tests.compute.test_util_constant import (
27
29
  from deltacat.compute.compactor import (
28
30
  RoundCompletionInfo,
29
31
  )
32
+ from deltacat.storage import (
33
+ CommitState,
34
+ DeltaType,
35
+ Delta,
36
+ DeltaLocator,
37
+ Partition,
38
+ PartitionLocator,
39
+ )
40
+ from deltacat.types.media import ContentType
41
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
42
+ CompactionSessionAuditInfo,
43
+ )
44
+ from deltacat.compute.compactor.model.compact_partition_params import (
45
+ CompactPartitionParams,
46
+ )
47
+ from deltacat.utils.placement import (
48
+ PlacementGroupManager,
49
+ )
50
+ from deltacat import logs
30
51
 
31
52
  DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
32
53
  "db_file_path",
33
54
  "deltacat/tests/local_deltacat_storage/db_test.sqlite",
34
55
  )
35
56
 
57
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
58
+
36
59
 
37
60
  """
38
61
  MODULE scoped fixtures
@@ -43,6 +66,7 @@ MODULE scoped fixtures
43
66
  def setup_ray_cluster():
44
67
  ray.init(local_mode=True, ignore_reinit_error=True)
45
68
  yield
69
+ ray.shutdown()
46
70
 
47
71
 
48
72
  @pytest.fixture(autouse=True, scope="module")
@@ -58,19 +82,20 @@ def mock_aws_credential():
58
82
  @pytest.fixture(autouse=True, scope="module")
59
83
  def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
60
84
  # make sure the database file is deleted after all the compactor package tests are completed
85
+ yield
61
86
  if os.path.exists(DATABASE_FILE_PATH_VALUE):
62
87
  os.remove(DATABASE_FILE_PATH_VALUE)
63
88
 
64
89
 
65
90
  @pytest.fixture(scope="module")
66
- def setup_s3_resource(mock_aws_credential):
91
+ def s3_resource():
67
92
  with mock_s3():
68
93
  yield boto3.resource("s3")
69
94
 
70
95
 
71
96
  @pytest.fixture(autouse=True, scope="module")
72
- def setup_compaction_artifacts_s3_bucket(setup_s3_resource: ServiceResource):
73
- setup_s3_resource.create_bucket(
97
+ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
98
+ s3_resource.create_bucket(
74
99
  ACL="authenticated-read",
75
100
  Bucket=TEST_S3_RCF_BUCKET_NAME,
76
101
  )
@@ -112,6 +137,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
112
137
  "drop_duplicates_param",
113
138
  "skip_enabled_compact_partition_drivers",
114
139
  "is_inplace",
140
+ "add_late_deltas",
115
141
  "compact_partition_func",
116
142
  ],
117
143
  [
@@ -133,6 +159,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
133
159
  read_kwargs_provider,
134
160
  skip_enabled_compact_partition_drivers,
135
161
  is_inplace,
162
+ add_late_deltas,
136
163
  compact_partition_func,
137
164
  )
138
165
  for test_name, (
@@ -152,13 +179,14 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
152
179
  read_kwargs_provider,
153
180
  skip_enabled_compact_partition_drivers,
154
181
  is_inplace,
182
+ add_late_deltas,
155
183
  compact_partition_func,
156
184
  ) in INCREMENTAL_TEST_CASES.items()
157
185
  ],
158
186
  ids=[test_name for test_name in INCREMENTAL_TEST_CASES],
159
187
  )
160
188
  def test_compact_partition_incremental(
161
- setup_s3_resource: ServiceResource,
189
+ s3_resource: ServiceResource,
162
190
  offer_local_deltacat_storage_kwargs: Dict[str, Any],
163
191
  test_name: str,
164
192
  primary_keys: Set[str],
@@ -177,25 +205,11 @@ def test_compact_partition_incremental(
177
205
  read_kwargs_provider_param: Any,
178
206
  skip_enabled_compact_partition_drivers,
179
207
  is_inplace: bool,
208
+ add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType]]],
180
209
  compact_partition_func: Callable,
181
210
  benchmark: BenchmarkFixture,
182
211
  ):
183
212
  import deltacat.tests.local_deltacat_storage as ds
184
- from deltacat.types.media import ContentType
185
- from deltacat.storage import (
186
- DeltaLocator,
187
- Partition,
188
- PartitionLocator,
189
- )
190
- from deltacat.compute.compactor.model.compaction_session_audit_info import (
191
- CompactionSessionAuditInfo,
192
- )
193
- from deltacat.compute.compactor.model.compact_partition_params import (
194
- CompactPartitionParams,
195
- )
196
- from deltacat.utils.placement import (
197
- PlacementGroupManager,
198
- )
199
213
 
200
214
  ds_mock_kwargs: Dict[str, Any] = offer_local_deltacat_storage_kwargs
201
215
 
@@ -205,6 +219,9 @@ def test_compact_partition_incremental(
205
219
  source_table_stream,
206
220
  destination_table_stream,
207
221
  _,
222
+ source_table_namespace,
223
+ source_table_name,
224
+ source_table_version,
208
225
  ) = create_src_w_deltas_destination_plus_destination(
209
226
  primary_keys,
210
227
  sort_keys,
@@ -227,11 +244,13 @@ def test_compact_partition_incremental(
227
244
  )
228
245
  num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
229
246
  total_cpus: int = num_workers * worker_instance_cpu
230
- pgm: Optional[PlacementGroupManager] = None
231
- if create_placement_group_param:
232
- pgm = PlacementGroupManager(
247
+ pgm: Optional[PlacementGroupManager] = (
248
+ PlacementGroupManager(
233
249
  1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
234
250
  ).pgs[0]
251
+ if create_placement_group_param
252
+ else None
253
+ )
235
254
  compact_partition_params = CompactPartitionParams.of(
236
255
  {
237
256
  "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
@@ -265,24 +284,36 @@ def test_compact_partition_incremental(
265
284
 
266
285
  Returns: args, kwargs
267
286
  """
268
- setup_s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
287
+ s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
269
288
  return (compact_partition_params,), {}
270
289
 
290
+ if add_late_deltas:
291
+ # NOTE: In the case of in-place compaction it is plausible that new deltas may be added to the source partition during compaction
292
+ # (so that the source_partitition.stream_position > last_stream_position_to_compact).
293
+ # This parameter helps simulate the case to check that no late deltas are dropped even when the compacted partition is created.
294
+ latest_delta, _ = add_late_deltas_to_partition(
295
+ add_late_deltas, source_partition, ds_mock_kwargs
296
+ )
297
+ if expected_terminal_exception:
298
+ with pytest.raises(expected_terminal_exception) as exc_info:
299
+ compact_partition_func(compact_partition_params)
300
+ assert expected_terminal_exception_message in str(exc_info.value)
301
+ return
271
302
  rcf_file_s3_uri = benchmark.pedantic(
272
303
  compact_partition_func, setup=_incremental_compaction_setup
273
304
  )
305
+
274
306
  # validate
275
- round_completion_info: RoundCompletionInfo = get_rcf(
276
- setup_s3_resource, rcf_file_s3_uri
277
- )
307
+ round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
278
308
  compacted_delta_locator: DeltaLocator = (
279
309
  round_completion_info.compacted_delta_locator
280
310
  )
281
- audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
282
- "s3://", ""
283
- ).split("/", 1)
311
+ audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
312
+ round_completion_info.compaction_audit_url
313
+ )
314
+
284
315
  compaction_audit_obj: Dict[str, Any] = read_s3_contents(
285
- setup_s3_resource, audit_bucket, audit_key
316
+ s3_resource, audit_bucket, audit_key
286
317
  )
287
318
  compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
288
319
  **compaction_audit_obj
@@ -318,8 +349,34 @@ def test_compact_partition_incremental(
318
349
  == destination_partition_locator.partition_values
319
350
  and source_partition.locator.stream_id
320
351
  == destination_partition_locator.stream_id
321
- ), "The source partition should match the destination partition"
352
+ ), f"The source partition: {source_partition.locator.canonical_string} should match the destination partition: {destination_partition_locator.canonical_string}"
322
353
  assert (
323
354
  compacted_delta_locator.stream_id == source_partition.locator.stream_id
324
355
  ), "The compacted delta should be in the same stream as the source"
356
+ source_partition: Partition = ds.get_partition(
357
+ source_table_stream.locator,
358
+ partition_values_param,
359
+ **ds_mock_kwargs,
360
+ )
361
+ compacted_partition: Optional[Partition] = ds.get_partition(
362
+ compacted_delta_locator.stream_locator,
363
+ partition_values_param,
364
+ **ds_mock_kwargs,
365
+ )
366
+ assert (
367
+ compacted_partition.state == source_partition.state == CommitState.COMMITTED
368
+ ), f"The compacted/source table partition should be in {CommitState.COMMITTED} state and not {CommitState.DEPRECATED}"
369
+ if add_late_deltas:
370
+ compacted_partition_deltas: List[Delta] = ds.list_partition_deltas(
371
+ partition_like=compacted_partition,
372
+ ascending_order=False,
373
+ **ds_mock_kwargs,
374
+ ).all_items()
375
+ assert (
376
+ len(compacted_partition_deltas) == len(add_late_deltas) + 1
377
+ ), f"Expected the number of deltas within the newly promoted partition to equal 1 (the compacted delta) + the # of late deltas: {len(add_late_deltas)}"
378
+ assert (
379
+ compacted_partition_deltas[0].stream_position
380
+ == latest_delta.stream_position
381
+ ), f"Expected the latest delta in the compacted partition: {compacted_partition_deltas[0].stream_position} to have the same stream position as the latest delta: {latest_delta.stream_position}"
325
382
  return
@@ -37,6 +37,19 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple
37
37
  from deltacat.types.media import StorageType
38
38
  from deltacat.storage import (
39
39
  DeltaType,
40
+ DeltaLocator,
41
+ Partition,
42
+ PartitionLocator,
43
+ )
44
+ from deltacat.types.media import ContentType
45
+ from deltacat.compute.compactor.model.compact_partition_params import (
46
+ CompactPartitionParams,
47
+ )
48
+ from deltacat.utils.placement import (
49
+ PlacementGroupManager,
50
+ )
51
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
52
+ CompactionSessionAuditInfo,
40
53
  )
41
54
 
42
55
  DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
@@ -54,6 +67,7 @@ MODULE scoped fixtures
54
67
  def setup_ray_cluster():
55
68
  ray.init(local_mode=True, ignore_reinit_error=True)
56
69
  yield
70
+ ray.shutdown()
57
71
 
58
72
 
59
73
  @pytest.fixture(autouse=True, scope="module")
@@ -74,14 +88,14 @@ def cleanup_the_database_file_after_all_compaction_session_package_tests_complet
74
88
 
75
89
 
76
90
  @pytest.fixture(scope="module")
77
- def setup_s3_resource(mock_aws_credential):
91
+ def s3_resource(mock_aws_credential):
78
92
  with mock_s3():
79
93
  yield boto3.resource("s3")
80
94
 
81
95
 
82
96
  @pytest.fixture(autouse=True, scope="module")
83
- def setup_compaction_artifacts_s3_bucket(setup_s3_resource: ServiceResource):
84
- setup_s3_resource.create_bucket(
97
+ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
98
+ s3_resource.create_bucket(
85
99
  ACL="authenticated-read",
86
100
  Bucket=TEST_S3_RCF_BUCKET_NAME,
87
101
  )
@@ -172,7 +186,7 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
172
186
  ids=[test_name for test_name in REBASE_THEN_INCREMENTAL_TEST_CASES],
173
187
  )
174
188
  def test_compact_partition_rebase_then_incremental(
175
- setup_s3_resource: ServiceResource,
189
+ s3_resource: ServiceResource,
176
190
  local_deltacat_storage_kwargs: Dict[str, Any],
177
191
  test_name: str,
178
192
  primary_keys: Set[str],
@@ -196,25 +210,8 @@ def test_compact_partition_rebase_then_incremental(
196
210
  benchmark: BenchmarkFixture,
197
211
  ):
198
212
  import deltacat.tests.local_deltacat_storage as ds
199
- from deltacat.types.media import ContentType
200
- from deltacat.storage import (
201
- DeltaLocator,
202
- Partition,
203
- PartitionLocator,
204
- )
205
- from deltacat.compute.compactor.model.compact_partition_params import (
206
- CompactPartitionParams,
207
- )
208
- from deltacat.utils.placement import (
209
- PlacementGroupManager,
210
- )
211
- from deltacat.compute.compactor.model.compaction_session_audit_info import (
212
- CompactionSessionAuditInfo,
213
- )
214
213
 
215
214
  ds_mock_kwargs = local_deltacat_storage_kwargs
216
- ray.shutdown()
217
- ray.init(local_mode=True, ignore_reinit_error=True)
218
215
  """
219
216
  REBASE
220
217
  """
@@ -280,7 +277,7 @@ def test_compact_partition_rebase_then_incremental(
280
277
  # execute
281
278
  rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
282
279
  compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
283
- setup_s3_resource, rcf_file_s3_uri
280
+ s3_resource, rcf_file_s3_uri
284
281
  )
285
282
  tables = ds.download_delta(
286
283
  compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
@@ -346,16 +343,14 @@ def test_compact_partition_rebase_then_incremental(
346
343
  assert expected_terminal_exception_message in str(exc_info.value)
347
344
  return
348
345
  rcf_file_s3_uri = compact_partition_func(compact_partition_params)
349
- round_completion_info = get_rcf(setup_s3_resource, rcf_file_s3_uri)
346
+ round_completion_info = get_rcf(s3_resource, rcf_file_s3_uri)
350
347
  compacted_delta_locator_incremental: DeltaLocator = (
351
348
  round_completion_info.compacted_delta_locator
352
349
  )
353
350
  audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
354
351
  "s3://", ""
355
352
  ).split("/", 1)
356
- compaction_audit_obj: dict = read_s3_contents(
357
- setup_s3_resource, audit_bucket, audit_key
358
- )
353
+ compaction_audit_obj: dict = read_s3_contents(s3_resource, audit_bucket, audit_key)
359
354
  compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
360
355
  **compaction_audit_obj
361
356
  )
@@ -19,6 +19,42 @@ from deltacat.tests.compute.test_util_common import (
19
19
  create_destination_table,
20
20
  create_rebase_table,
21
21
  )
22
+ import logging
23
+ from deltacat import logs
24
+
25
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
+
27
+
28
+ def _add_deltas_to_partition(
29
+ deltas_ingredients: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
30
+ partition: Optional[Partition],
31
+ ds_mock_kwargs: Optional[Dict[str, Any]],
32
+ ) -> List[Optional[Delta], int]:
33
+ import deltacat.tests.local_deltacat_storage as ds
34
+
35
+ all_deltas_length = 0
36
+ for (delta_data, delta_type, delete_parameters) in deltas_ingredients:
37
+ staged_delta: Delta = ds.stage_delta(
38
+ delta_data,
39
+ partition,
40
+ delta_type,
41
+ delete_parameters=delete_parameters,
42
+ **ds_mock_kwargs,
43
+ )
44
+ incremental_delta = ds.commit_delta(
45
+ staged_delta,
46
+ **ds_mock_kwargs,
47
+ )
48
+ all_deltas_length += len(delta_data) if delta_data else 0
49
+ return incremental_delta, all_deltas_length
50
+
51
+
52
+ def add_late_deltas_to_partition(
53
+ late_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
54
+ source_partition: Optional[Partition],
55
+ ds_mock_kwargs: Optional[Dict[str, Any]],
56
+ ) -> List[Optional[Delta], int]:
57
+ return _add_deltas_to_partition(late_deltas, source_partition, ds_mock_kwargs)
22
58
 
23
59
 
24
60
  def create_incremental_deltas_on_source_table(
@@ -85,7 +121,7 @@ def create_src_w_deltas_destination_plus_destination(
85
121
  partition_values: Optional[List[Any]],
86
122
  ds_mock_kwargs: Optional[Dict[str, Any]],
87
123
  simulate_is_inplace: bool = False,
88
- ) -> Tuple[Stream, Stream, Optional[Stream]]:
124
+ ) -> Tuple[Stream, Stream, Optional[Stream], str, str, str]:
89
125
  import deltacat.tests.local_deltacat_storage as ds
90
126
 
91
127
  source_namespace, source_table_name, source_table_version = create_src_table(
@@ -137,7 +173,14 @@ def create_src_w_deltas_destination_plus_destination(
137
173
  table_version=destination_table_version,
138
174
  **ds_mock_kwargs,
139
175
  )
140
- return source_table_stream_after_committed, destination_table_stream, None
176
+ return (
177
+ source_table_stream_after_committed,
178
+ destination_table_stream,
179
+ None,
180
+ source_namespace,
181
+ source_table_name,
182
+ source_table_version,
183
+ )
141
184
 
142
185
 
143
186
  def create_src_w_deltas_destination_rebase_w_deltas_strategy(