deltacat 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/constants.py +21 -4
- deltacat/aws/s3u.py +48 -21
- deltacat/compute/compactor/model/round_completion_info.py +4 -0
- deltacat/compute/compactor_v2/compaction_session.py +51 -25
- deltacat/compute/compactor_v2/constants.py +12 -0
- deltacat/compute/compactor_v2/model/compaction_session.py +21 -0
- deltacat/compute/compactor_v2/steps/hash_bucket.py +6 -0
- deltacat/compute/compactor_v2/steps/merge.py +6 -0
- deltacat/compute/compactor_v2/utils/task_options.py +4 -1
- deltacat/storage/interface.py +10 -3
- deltacat/tests/aws/test_s3u.py +84 -3
- deltacat/tests/catalog/test_default_catalog_impl.py +2 -0
- deltacat/tests/compute/compact_partition_test_cases.py +61 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +2 -0
- deltacat/tests/compute/test_compact_partition_incremental.py +89 -32
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +21 -26
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +45 -2
- deltacat/tests/local_deltacat_storage/__init__.py +38 -19
- deltacat/tests/utils/ray_utils/__init__.py +0 -0
- deltacat/tests/utils/ray_utils/test_concurrency.py +50 -0
- deltacat/tests/utils/test_resources.py +28 -0
- deltacat/utils/resources.py +45 -0
- {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/METADATA +5 -6
- {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/RECORD +28 -25
- {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/WHEEL +1 -1
- {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/LICENSE +0 -0
- {deltacat-1.1.6.dist-info → deltacat-1.1.8.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ from deltacat.storage import (
|
|
22
22
|
from deltacat.compute.compactor_v2.compaction_session import (
|
23
23
|
compact_partition as compact_partition_v2,
|
24
24
|
)
|
25
|
+
from deltacat.storage import DeleteParameters
|
25
26
|
|
26
27
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
27
28
|
|
@@ -89,9 +90,13 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
|
|
89
90
|
"""
|
90
91
|
Args:
|
91
92
|
is_inplace: bool - argument to indicate whether to try compacting an in-place compacted table (the source table is the destination table). Also needed to control whether the destination table is created
|
93
|
+
add_late_deltas: List[Tuple[pa.Table, DeltaType]] - argument to indicate whether to add deltas to the source_partition after we've triggered compaction
|
92
94
|
"""
|
93
95
|
|
94
96
|
is_inplace: bool
|
97
|
+
add_late_deltas: Optional[
|
98
|
+
List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
|
99
|
+
]
|
95
100
|
|
96
101
|
|
97
102
|
@dataclass(frozen=True)
|
@@ -148,6 +153,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
148
153
|
read_kwargs_provider=None,
|
149
154
|
drop_duplicates=True,
|
150
155
|
is_inplace=False,
|
156
|
+
add_late_deltas=None,
|
151
157
|
skip_enabled_compact_partition_drivers=None,
|
152
158
|
),
|
153
159
|
"2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
|
@@ -175,6 +181,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
175
181
|
read_kwargs_provider=None,
|
176
182
|
drop_duplicates=True,
|
177
183
|
is_inplace=False,
|
184
|
+
add_late_deltas=None,
|
178
185
|
skip_enabled_compact_partition_drivers=None,
|
179
186
|
),
|
180
187
|
"3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
|
@@ -211,6 +218,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
211
218
|
read_kwargs_provider=None,
|
212
219
|
drop_duplicates=True,
|
213
220
|
is_inplace=False,
|
221
|
+
add_late_deltas=None,
|
214
222
|
skip_enabled_compact_partition_drivers=None,
|
215
223
|
),
|
216
224
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
@@ -246,6 +254,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
246
254
|
read_kwargs_provider=None,
|
247
255
|
drop_duplicates=True,
|
248
256
|
is_inplace=False,
|
257
|
+
add_late_deltas=None,
|
249
258
|
skip_enabled_compact_partition_drivers=None,
|
250
259
|
),
|
251
260
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -276,6 +285,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
276
285
|
read_kwargs_provider=None,
|
277
286
|
drop_duplicates=True,
|
278
287
|
is_inplace=False,
|
288
|
+
add_late_deltas=None,
|
279
289
|
skip_enabled_compact_partition_drivers=None,
|
280
290
|
),
|
281
291
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -306,6 +316,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
306
316
|
read_kwargs_provider=None,
|
307
317
|
drop_duplicates=True,
|
308
318
|
is_inplace=False,
|
319
|
+
add_late_deltas=None,
|
309
320
|
skip_enabled_compact_partition_drivers=None,
|
310
321
|
),
|
311
322
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -336,6 +347,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
336
347
|
read_kwargs_provider=None,
|
337
348
|
drop_duplicates=True,
|
338
349
|
is_inplace=False,
|
350
|
+
add_late_deltas=None,
|
339
351
|
skip_enabled_compact_partition_drivers=None,
|
340
352
|
),
|
341
353
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
@@ -368,6 +380,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
368
380
|
read_kwargs_provider=None,
|
369
381
|
drop_duplicates=True,
|
370
382
|
is_inplace=False,
|
383
|
+
add_late_deltas=None,
|
371
384
|
skip_enabled_compact_partition_drivers=None,
|
372
385
|
),
|
373
386
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
@@ -398,6 +411,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
398
411
|
read_kwargs_provider=None,
|
399
412
|
drop_duplicates=True,
|
400
413
|
is_inplace=False,
|
414
|
+
add_late_deltas=None,
|
401
415
|
skip_enabled_compact_partition_drivers=None,
|
402
416
|
),
|
403
417
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
@@ -428,6 +442,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
428
442
|
read_kwargs_provider=None,
|
429
443
|
drop_duplicates=True,
|
430
444
|
is_inplace=False,
|
445
|
+
add_late_deltas=None,
|
431
446
|
skip_enabled_compact_partition_drivers=None,
|
432
447
|
),
|
433
448
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
@@ -458,6 +473,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
458
473
|
read_kwargs_provider=None,
|
459
474
|
drop_duplicates=True,
|
460
475
|
is_inplace=False,
|
476
|
+
add_late_deltas=None,
|
461
477
|
skip_enabled_compact_partition_drivers=None,
|
462
478
|
),
|
463
479
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
@@ -488,6 +504,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
488
504
|
read_kwargs_provider=None,
|
489
505
|
drop_duplicates=True,
|
490
506
|
is_inplace=False,
|
507
|
+
add_late_deltas=None,
|
491
508
|
skip_enabled_compact_partition_drivers=None,
|
492
509
|
),
|
493
510
|
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
@@ -518,6 +535,50 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
518
535
|
read_kwargs_provider=None,
|
519
536
|
drop_duplicates=True,
|
520
537
|
is_inplace=True,
|
538
|
+
add_late_deltas=[
|
539
|
+
(
|
540
|
+
pa.Table.from_arrays(
|
541
|
+
[
|
542
|
+
pa.array([str(i) for i in range(20)]),
|
543
|
+
pa.array([i for i in range(20)]),
|
544
|
+
],
|
545
|
+
names=["pk_col_1", "sk_col_1"],
|
546
|
+
),
|
547
|
+
DeltaType.UPSERT,
|
548
|
+
None,
|
549
|
+
)
|
550
|
+
],
|
551
|
+
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
552
|
+
),
|
553
|
+
"14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
|
554
|
+
primary_keys={"pk_col_1"},
|
555
|
+
sort_keys=[SortKey.of(key_name="sk_col_1")],
|
556
|
+
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
557
|
+
partition_values=["1"],
|
558
|
+
input_deltas=pa.Table.from_arrays(
|
559
|
+
[
|
560
|
+
pa.array([str(i) for i in range(10)]),
|
561
|
+
pa.array([i for i in range(10)]),
|
562
|
+
],
|
563
|
+
names=["pk_col_1", "sk_col_1"],
|
564
|
+
),
|
565
|
+
input_deltas_delta_type=DeltaType.UPSERT,
|
566
|
+
expected_terminal_compact_partition_result=pa.Table.from_arrays(
|
567
|
+
[
|
568
|
+
pa.array([str(i) for i in range(10)]),
|
569
|
+
pa.array([i for i in range(10)]),
|
570
|
+
],
|
571
|
+
names=["pk_col_1", "sk_col_1"],
|
572
|
+
),
|
573
|
+
expected_terminal_exception=AssertionError,
|
574
|
+
expected_terminal_exception_message="hash_bucket_count is a required arg for compactor v2",
|
575
|
+
do_create_placement_group=False,
|
576
|
+
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
577
|
+
hash_bucket_count=None,
|
578
|
+
read_kwargs_provider=None,
|
579
|
+
drop_duplicates=True,
|
580
|
+
is_inplace=False,
|
581
|
+
add_late_deltas=False,
|
521
582
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
522
583
|
),
|
523
584
|
}
|
@@ -35,6 +35,8 @@ class TestCompactionSession(unittest.TestCase):
|
|
35
35
|
@classmethod
|
36
36
|
def doClassCleanups(cls) -> None:
|
37
37
|
os.remove(cls.DB_FILE_PATH)
|
38
|
+
ray.shutdown()
|
39
|
+
super().tearDownClass()
|
38
40
|
|
39
41
|
@patch("deltacat.compute.compactor_v2.compaction_session.rcf")
|
40
42
|
@patch("deltacat.compute.compactor_v2.compaction_session.s3_utils")
|
@@ -2,8 +2,9 @@ import ray
|
|
2
2
|
from moto import mock_s3
|
3
3
|
import pytest
|
4
4
|
import os
|
5
|
+
import logging
|
5
6
|
import boto3
|
6
|
-
from typing import Any, Callable, Dict, List, Optional, Set
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
7
8
|
from boto3.resources.base import ServiceResource
|
8
9
|
import pyarrow as pa
|
9
10
|
from pytest_benchmark.fixture import BenchmarkFixture
|
@@ -15,6 +16,7 @@ from deltacat.tests.compute.test_util_common import (
|
|
15
16
|
from deltacat.tests.test_utils.utils import read_s3_contents
|
16
17
|
from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
17
18
|
create_src_w_deltas_destination_plus_destination,
|
19
|
+
add_late_deltas_to_partition,
|
18
20
|
)
|
19
21
|
from deltacat.tests.compute.compact_partition_test_cases import (
|
20
22
|
INCREMENTAL_TEST_CASES,
|
@@ -27,12 +29,33 @@ from deltacat.tests.compute.test_util_constant import (
|
|
27
29
|
from deltacat.compute.compactor import (
|
28
30
|
RoundCompletionInfo,
|
29
31
|
)
|
32
|
+
from deltacat.storage import (
|
33
|
+
CommitState,
|
34
|
+
DeltaType,
|
35
|
+
Delta,
|
36
|
+
DeltaLocator,
|
37
|
+
Partition,
|
38
|
+
PartitionLocator,
|
39
|
+
)
|
40
|
+
from deltacat.types.media import ContentType
|
41
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
42
|
+
CompactionSessionAuditInfo,
|
43
|
+
)
|
44
|
+
from deltacat.compute.compactor.model.compact_partition_params import (
|
45
|
+
CompactPartitionParams,
|
46
|
+
)
|
47
|
+
from deltacat.utils.placement import (
|
48
|
+
PlacementGroupManager,
|
49
|
+
)
|
50
|
+
from deltacat import logs
|
30
51
|
|
31
52
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
32
53
|
"db_file_path",
|
33
54
|
"deltacat/tests/local_deltacat_storage/db_test.sqlite",
|
34
55
|
)
|
35
56
|
|
57
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
58
|
+
|
36
59
|
|
37
60
|
"""
|
38
61
|
MODULE scoped fixtures
|
@@ -43,6 +66,7 @@ MODULE scoped fixtures
|
|
43
66
|
def setup_ray_cluster():
|
44
67
|
ray.init(local_mode=True, ignore_reinit_error=True)
|
45
68
|
yield
|
69
|
+
ray.shutdown()
|
46
70
|
|
47
71
|
|
48
72
|
@pytest.fixture(autouse=True, scope="module")
|
@@ -58,19 +82,20 @@ def mock_aws_credential():
|
|
58
82
|
@pytest.fixture(autouse=True, scope="module")
|
59
83
|
def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
|
60
84
|
# make sure the database file is deleted after all the compactor package tests are completed
|
85
|
+
yield
|
61
86
|
if os.path.exists(DATABASE_FILE_PATH_VALUE):
|
62
87
|
os.remove(DATABASE_FILE_PATH_VALUE)
|
63
88
|
|
64
89
|
|
65
90
|
@pytest.fixture(scope="module")
|
66
|
-
def
|
91
|
+
def s3_resource():
|
67
92
|
with mock_s3():
|
68
93
|
yield boto3.resource("s3")
|
69
94
|
|
70
95
|
|
71
96
|
@pytest.fixture(autouse=True, scope="module")
|
72
|
-
def setup_compaction_artifacts_s3_bucket(
|
73
|
-
|
97
|
+
def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
|
98
|
+
s3_resource.create_bucket(
|
74
99
|
ACL="authenticated-read",
|
75
100
|
Bucket=TEST_S3_RCF_BUCKET_NAME,
|
76
101
|
)
|
@@ -112,6 +137,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
112
137
|
"drop_duplicates_param",
|
113
138
|
"skip_enabled_compact_partition_drivers",
|
114
139
|
"is_inplace",
|
140
|
+
"add_late_deltas",
|
115
141
|
"compact_partition_func",
|
116
142
|
],
|
117
143
|
[
|
@@ -133,6 +159,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
133
159
|
read_kwargs_provider,
|
134
160
|
skip_enabled_compact_partition_drivers,
|
135
161
|
is_inplace,
|
162
|
+
add_late_deltas,
|
136
163
|
compact_partition_func,
|
137
164
|
)
|
138
165
|
for test_name, (
|
@@ -152,13 +179,14 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
152
179
|
read_kwargs_provider,
|
153
180
|
skip_enabled_compact_partition_drivers,
|
154
181
|
is_inplace,
|
182
|
+
add_late_deltas,
|
155
183
|
compact_partition_func,
|
156
184
|
) in INCREMENTAL_TEST_CASES.items()
|
157
185
|
],
|
158
186
|
ids=[test_name for test_name in INCREMENTAL_TEST_CASES],
|
159
187
|
)
|
160
188
|
def test_compact_partition_incremental(
|
161
|
-
|
189
|
+
s3_resource: ServiceResource,
|
162
190
|
offer_local_deltacat_storage_kwargs: Dict[str, Any],
|
163
191
|
test_name: str,
|
164
192
|
primary_keys: Set[str],
|
@@ -177,25 +205,11 @@ def test_compact_partition_incremental(
|
|
177
205
|
read_kwargs_provider_param: Any,
|
178
206
|
skip_enabled_compact_partition_drivers,
|
179
207
|
is_inplace: bool,
|
208
|
+
add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType]]],
|
180
209
|
compact_partition_func: Callable,
|
181
210
|
benchmark: BenchmarkFixture,
|
182
211
|
):
|
183
212
|
import deltacat.tests.local_deltacat_storage as ds
|
184
|
-
from deltacat.types.media import ContentType
|
185
|
-
from deltacat.storage import (
|
186
|
-
DeltaLocator,
|
187
|
-
Partition,
|
188
|
-
PartitionLocator,
|
189
|
-
)
|
190
|
-
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
191
|
-
CompactionSessionAuditInfo,
|
192
|
-
)
|
193
|
-
from deltacat.compute.compactor.model.compact_partition_params import (
|
194
|
-
CompactPartitionParams,
|
195
|
-
)
|
196
|
-
from deltacat.utils.placement import (
|
197
|
-
PlacementGroupManager,
|
198
|
-
)
|
199
213
|
|
200
214
|
ds_mock_kwargs: Dict[str, Any] = offer_local_deltacat_storage_kwargs
|
201
215
|
|
@@ -205,6 +219,9 @@ def test_compact_partition_incremental(
|
|
205
219
|
source_table_stream,
|
206
220
|
destination_table_stream,
|
207
221
|
_,
|
222
|
+
source_table_namespace,
|
223
|
+
source_table_name,
|
224
|
+
source_table_version,
|
208
225
|
) = create_src_w_deltas_destination_plus_destination(
|
209
226
|
primary_keys,
|
210
227
|
sort_keys,
|
@@ -227,11 +244,13 @@ def test_compact_partition_incremental(
|
|
227
244
|
)
|
228
245
|
num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
|
229
246
|
total_cpus: int = num_workers * worker_instance_cpu
|
230
|
-
pgm: Optional[PlacementGroupManager] =
|
231
|
-
|
232
|
-
pgm = PlacementGroupManager(
|
247
|
+
pgm: Optional[PlacementGroupManager] = (
|
248
|
+
PlacementGroupManager(
|
233
249
|
1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
|
234
250
|
).pgs[0]
|
251
|
+
if create_placement_group_param
|
252
|
+
else None
|
253
|
+
)
|
235
254
|
compact_partition_params = CompactPartitionParams.of(
|
236
255
|
{
|
237
256
|
"compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
|
@@ -265,24 +284,36 @@ def test_compact_partition_incremental(
|
|
265
284
|
|
266
285
|
Returns: args, kwargs
|
267
286
|
"""
|
268
|
-
|
287
|
+
s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
|
269
288
|
return (compact_partition_params,), {}
|
270
289
|
|
290
|
+
if add_late_deltas:
|
291
|
+
# NOTE: In the case of in-place compaction it is plausible that new deltas may be added to the source partition during compaction
|
292
|
+
# (so that the source_partitition.stream_position > last_stream_position_to_compact).
|
293
|
+
# This parameter helps simulate the case to check that no late deltas are dropped even when the compacted partition is created.
|
294
|
+
latest_delta, _ = add_late_deltas_to_partition(
|
295
|
+
add_late_deltas, source_partition, ds_mock_kwargs
|
296
|
+
)
|
297
|
+
if expected_terminal_exception:
|
298
|
+
with pytest.raises(expected_terminal_exception) as exc_info:
|
299
|
+
compact_partition_func(compact_partition_params)
|
300
|
+
assert expected_terminal_exception_message in str(exc_info.value)
|
301
|
+
return
|
271
302
|
rcf_file_s3_uri = benchmark.pedantic(
|
272
303
|
compact_partition_func, setup=_incremental_compaction_setup
|
273
304
|
)
|
305
|
+
|
274
306
|
# validate
|
275
|
-
round_completion_info: RoundCompletionInfo = get_rcf(
|
276
|
-
setup_s3_resource, rcf_file_s3_uri
|
277
|
-
)
|
307
|
+
round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
|
278
308
|
compacted_delta_locator: DeltaLocator = (
|
279
309
|
round_completion_info.compacted_delta_locator
|
280
310
|
)
|
281
|
-
audit_bucket, audit_key =
|
282
|
-
|
283
|
-
)
|
311
|
+
audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
|
312
|
+
round_completion_info.compaction_audit_url
|
313
|
+
)
|
314
|
+
|
284
315
|
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
285
|
-
|
316
|
+
s3_resource, audit_bucket, audit_key
|
286
317
|
)
|
287
318
|
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
288
319
|
**compaction_audit_obj
|
@@ -318,8 +349,34 @@ def test_compact_partition_incremental(
|
|
318
349
|
== destination_partition_locator.partition_values
|
319
350
|
and source_partition.locator.stream_id
|
320
351
|
== destination_partition_locator.stream_id
|
321
|
-
), "The source partition should match the destination partition"
|
352
|
+
), f"The source partition: {source_partition.locator.canonical_string} should match the destination partition: {destination_partition_locator.canonical_string}"
|
322
353
|
assert (
|
323
354
|
compacted_delta_locator.stream_id == source_partition.locator.stream_id
|
324
355
|
), "The compacted delta should be in the same stream as the source"
|
356
|
+
source_partition: Partition = ds.get_partition(
|
357
|
+
source_table_stream.locator,
|
358
|
+
partition_values_param,
|
359
|
+
**ds_mock_kwargs,
|
360
|
+
)
|
361
|
+
compacted_partition: Optional[Partition] = ds.get_partition(
|
362
|
+
compacted_delta_locator.stream_locator,
|
363
|
+
partition_values_param,
|
364
|
+
**ds_mock_kwargs,
|
365
|
+
)
|
366
|
+
assert (
|
367
|
+
compacted_partition.state == source_partition.state == CommitState.COMMITTED
|
368
|
+
), f"The compacted/source table partition should be in {CommitState.COMMITTED} state and not {CommitState.DEPRECATED}"
|
369
|
+
if add_late_deltas:
|
370
|
+
compacted_partition_deltas: List[Delta] = ds.list_partition_deltas(
|
371
|
+
partition_like=compacted_partition,
|
372
|
+
ascending_order=False,
|
373
|
+
**ds_mock_kwargs,
|
374
|
+
).all_items()
|
375
|
+
assert (
|
376
|
+
len(compacted_partition_deltas) == len(add_late_deltas) + 1
|
377
|
+
), f"Expected the number of deltas within the newly promoted partition to equal 1 (the compacted delta) + the # of late deltas: {len(add_late_deltas)}"
|
378
|
+
assert (
|
379
|
+
compacted_partition_deltas[0].stream_position
|
380
|
+
== latest_delta.stream_position
|
381
|
+
), f"Expected the latest delta in the compacted partition: {compacted_partition_deltas[0].stream_position} to have the same stream position as the latest delta: {latest_delta.stream_position}"
|
325
382
|
return
|
@@ -37,6 +37,19 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
|
37
37
|
from deltacat.types.media import StorageType
|
38
38
|
from deltacat.storage import (
|
39
39
|
DeltaType,
|
40
|
+
DeltaLocator,
|
41
|
+
Partition,
|
42
|
+
PartitionLocator,
|
43
|
+
)
|
44
|
+
from deltacat.types.media import ContentType
|
45
|
+
from deltacat.compute.compactor.model.compact_partition_params import (
|
46
|
+
CompactPartitionParams,
|
47
|
+
)
|
48
|
+
from deltacat.utils.placement import (
|
49
|
+
PlacementGroupManager,
|
50
|
+
)
|
51
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
52
|
+
CompactionSessionAuditInfo,
|
40
53
|
)
|
41
54
|
|
42
55
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
@@ -54,6 +67,7 @@ MODULE scoped fixtures
|
|
54
67
|
def setup_ray_cluster():
|
55
68
|
ray.init(local_mode=True, ignore_reinit_error=True)
|
56
69
|
yield
|
70
|
+
ray.shutdown()
|
57
71
|
|
58
72
|
|
59
73
|
@pytest.fixture(autouse=True, scope="module")
|
@@ -74,14 +88,14 @@ def cleanup_the_database_file_after_all_compaction_session_package_tests_complet
|
|
74
88
|
|
75
89
|
|
76
90
|
@pytest.fixture(scope="module")
|
77
|
-
def
|
91
|
+
def s3_resource(mock_aws_credential):
|
78
92
|
with mock_s3():
|
79
93
|
yield boto3.resource("s3")
|
80
94
|
|
81
95
|
|
82
96
|
@pytest.fixture(autouse=True, scope="module")
|
83
|
-
def setup_compaction_artifacts_s3_bucket(
|
84
|
-
|
97
|
+
def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
|
98
|
+
s3_resource.create_bucket(
|
85
99
|
ACL="authenticated-read",
|
86
100
|
Bucket=TEST_S3_RCF_BUCKET_NAME,
|
87
101
|
)
|
@@ -172,7 +186,7 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
172
186
|
ids=[test_name for test_name in REBASE_THEN_INCREMENTAL_TEST_CASES],
|
173
187
|
)
|
174
188
|
def test_compact_partition_rebase_then_incremental(
|
175
|
-
|
189
|
+
s3_resource: ServiceResource,
|
176
190
|
local_deltacat_storage_kwargs: Dict[str, Any],
|
177
191
|
test_name: str,
|
178
192
|
primary_keys: Set[str],
|
@@ -196,25 +210,8 @@ def test_compact_partition_rebase_then_incremental(
|
|
196
210
|
benchmark: BenchmarkFixture,
|
197
211
|
):
|
198
212
|
import deltacat.tests.local_deltacat_storage as ds
|
199
|
-
from deltacat.types.media import ContentType
|
200
|
-
from deltacat.storage import (
|
201
|
-
DeltaLocator,
|
202
|
-
Partition,
|
203
|
-
PartitionLocator,
|
204
|
-
)
|
205
|
-
from deltacat.compute.compactor.model.compact_partition_params import (
|
206
|
-
CompactPartitionParams,
|
207
|
-
)
|
208
|
-
from deltacat.utils.placement import (
|
209
|
-
PlacementGroupManager,
|
210
|
-
)
|
211
|
-
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
212
|
-
CompactionSessionAuditInfo,
|
213
|
-
)
|
214
213
|
|
215
214
|
ds_mock_kwargs = local_deltacat_storage_kwargs
|
216
|
-
ray.shutdown()
|
217
|
-
ray.init(local_mode=True, ignore_reinit_error=True)
|
218
215
|
"""
|
219
216
|
REBASE
|
220
217
|
"""
|
@@ -280,7 +277,7 @@ def test_compact_partition_rebase_then_incremental(
|
|
280
277
|
# execute
|
281
278
|
rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
|
282
279
|
compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
|
283
|
-
|
280
|
+
s3_resource, rcf_file_s3_uri
|
284
281
|
)
|
285
282
|
tables = ds.download_delta(
|
286
283
|
compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
|
@@ -346,16 +343,14 @@ def test_compact_partition_rebase_then_incremental(
|
|
346
343
|
assert expected_terminal_exception_message in str(exc_info.value)
|
347
344
|
return
|
348
345
|
rcf_file_s3_uri = compact_partition_func(compact_partition_params)
|
349
|
-
round_completion_info = get_rcf(
|
346
|
+
round_completion_info = get_rcf(s3_resource, rcf_file_s3_uri)
|
350
347
|
compacted_delta_locator_incremental: DeltaLocator = (
|
351
348
|
round_completion_info.compacted_delta_locator
|
352
349
|
)
|
353
350
|
audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
|
354
351
|
"s3://", ""
|
355
352
|
).split("/", 1)
|
356
|
-
compaction_audit_obj: dict = read_s3_contents(
|
357
|
-
setup_s3_resource, audit_bucket, audit_key
|
358
|
-
)
|
353
|
+
compaction_audit_obj: dict = read_s3_contents(s3_resource, audit_bucket, audit_key)
|
359
354
|
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
360
355
|
**compaction_audit_obj
|
361
356
|
)
|
@@ -19,6 +19,42 @@ from deltacat.tests.compute.test_util_common import (
|
|
19
19
|
create_destination_table,
|
20
20
|
create_rebase_table,
|
21
21
|
)
|
22
|
+
import logging
|
23
|
+
from deltacat import logs
|
24
|
+
|
25
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
26
|
+
|
27
|
+
|
28
|
+
def _add_deltas_to_partition(
|
29
|
+
deltas_ingredients: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
|
30
|
+
partition: Optional[Partition],
|
31
|
+
ds_mock_kwargs: Optional[Dict[str, Any]],
|
32
|
+
) -> List[Optional[Delta], int]:
|
33
|
+
import deltacat.tests.local_deltacat_storage as ds
|
34
|
+
|
35
|
+
all_deltas_length = 0
|
36
|
+
for (delta_data, delta_type, delete_parameters) in deltas_ingredients:
|
37
|
+
staged_delta: Delta = ds.stage_delta(
|
38
|
+
delta_data,
|
39
|
+
partition,
|
40
|
+
delta_type,
|
41
|
+
delete_parameters=delete_parameters,
|
42
|
+
**ds_mock_kwargs,
|
43
|
+
)
|
44
|
+
incremental_delta = ds.commit_delta(
|
45
|
+
staged_delta,
|
46
|
+
**ds_mock_kwargs,
|
47
|
+
)
|
48
|
+
all_deltas_length += len(delta_data) if delta_data else 0
|
49
|
+
return incremental_delta, all_deltas_length
|
50
|
+
|
51
|
+
|
52
|
+
def add_late_deltas_to_partition(
|
53
|
+
late_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
|
54
|
+
source_partition: Optional[Partition],
|
55
|
+
ds_mock_kwargs: Optional[Dict[str, Any]],
|
56
|
+
) -> List[Optional[Delta], int]:
|
57
|
+
return _add_deltas_to_partition(late_deltas, source_partition, ds_mock_kwargs)
|
22
58
|
|
23
59
|
|
24
60
|
def create_incremental_deltas_on_source_table(
|
@@ -85,7 +121,7 @@ def create_src_w_deltas_destination_plus_destination(
|
|
85
121
|
partition_values: Optional[List[Any]],
|
86
122
|
ds_mock_kwargs: Optional[Dict[str, Any]],
|
87
123
|
simulate_is_inplace: bool = False,
|
88
|
-
) -> Tuple[Stream, Stream, Optional[Stream]]:
|
124
|
+
) -> Tuple[Stream, Stream, Optional[Stream], str, str, str]:
|
89
125
|
import deltacat.tests.local_deltacat_storage as ds
|
90
126
|
|
91
127
|
source_namespace, source_table_name, source_table_version = create_src_table(
|
@@ -137,7 +173,14 @@ def create_src_w_deltas_destination_plus_destination(
|
|
137
173
|
table_version=destination_table_version,
|
138
174
|
**ds_mock_kwargs,
|
139
175
|
)
|
140
|
-
return
|
176
|
+
return (
|
177
|
+
source_table_stream_after_committed,
|
178
|
+
destination_table_stream,
|
179
|
+
None,
|
180
|
+
source_namespace,
|
181
|
+
source_table_name,
|
182
|
+
source_table_version,
|
183
|
+
)
|
141
184
|
|
142
185
|
|
143
186
|
def create_src_w_deltas_destination_rebase_w_deltas_strategy(
|