deltacat 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/constants.py +21 -2
- deltacat/aws/s3u.py +107 -33
- deltacat/compute/compactor/model/round_completion_info.py +4 -0
- deltacat/compute/compactor_v2/compaction_session.py +51 -25
- deltacat/compute/compactor_v2/constants.py +12 -0
- deltacat/compute/compactor_v2/model/compaction_session.py +21 -0
- deltacat/compute/compactor_v2/steps/hash_bucket.py +6 -0
- deltacat/compute/compactor_v2/steps/merge.py +6 -0
- deltacat/compute/compactor_v2/utils/task_options.py +8 -2
- deltacat/storage/interface.py +10 -3
- deltacat/tests/aws/test_s3u.py +193 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +2 -0
- deltacat/tests/compute/compact_partition_test_cases.py +61 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +2 -0
- deltacat/tests/compute/test_compact_partition_incremental.py +89 -32
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +21 -26
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +45 -2
- deltacat/tests/local_deltacat_storage/__init__.py +38 -19
- deltacat/tests/utils/ray_utils/__init__.py +0 -0
- deltacat/tests/utils/ray_utils/test_concurrency.py +50 -0
- deltacat/tests/utils/test_resources.py +28 -0
- deltacat/utils/resources.py +45 -0
- {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/METADATA +1 -1
- {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/RECORD +28 -25
- {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/LICENSE +0 -0
- {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/WHEEL +0 -0
- {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/top_level.txt +0 -0
deltacat/tests/aws/test_s3u.py
CHANGED
@@ -1,7 +1,32 @@
|
|
1
1
|
import unittest
|
2
|
+
|
3
|
+
import botocore
|
4
|
+
|
5
|
+
from deltacat.aws.constants import RETRYABLE_TRANSIENT_ERRORS
|
2
6
|
from deltacat.aws.s3u import UuidBlockWritePathProvider, CapturedBlockWritePaths
|
3
7
|
|
4
8
|
|
9
|
+
import os
|
10
|
+
from unittest import mock
|
11
|
+
from unittest.mock import patch
|
12
|
+
|
13
|
+
import boto3
|
14
|
+
import pytest
|
15
|
+
from boto3.resources.base import ServiceResource
|
16
|
+
from botocore.exceptions import (
|
17
|
+
ClientError,
|
18
|
+
NoCredentialsError,
|
19
|
+
ReadTimeoutError,
|
20
|
+
ConnectTimeoutError,
|
21
|
+
HTTPClientError,
|
22
|
+
)
|
23
|
+
from deltacat.exceptions import NonRetryableError
|
24
|
+
from moto import mock_s3
|
25
|
+
from tenacity import RetryError
|
26
|
+
|
27
|
+
from deltacat.aws import s3u
|
28
|
+
|
29
|
+
|
5
30
|
class TestUuidBlockWritePathProvider(unittest.TestCase):
|
6
31
|
def test_uuid_block_write_provider_sanity(self):
|
7
32
|
capture_object = CapturedBlockWritePaths()
|
@@ -10,3 +35,171 @@ class TestUuidBlockWritePathProvider(unittest.TestCase):
|
|
10
35
|
result = provider("base_path")
|
11
36
|
|
12
37
|
self.assertRegex(result, r"^base_path/[\w-]{36}$")
|
38
|
+
|
39
|
+
|
40
|
+
class TestDownloadUpload(unittest.TestCase):
|
41
|
+
TEST_S3_BUCKET_NAME = "TEST_S3_BUCKET"
|
42
|
+
TEST_S3_KEY = "TEST_S3_KEY"
|
43
|
+
|
44
|
+
@pytest.fixture(autouse=True)
|
45
|
+
def mock_aws_credential(self):
|
46
|
+
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
|
47
|
+
os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
|
48
|
+
os.environ["AWS_SECURITY_TOKEN"] = "testing"
|
49
|
+
os.environ["AWS_SESSION_TOKEN"] = "testing"
|
50
|
+
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
|
51
|
+
yield
|
52
|
+
|
53
|
+
@pytest.fixture(autouse=True)
|
54
|
+
def setup_s3_resource(self):
|
55
|
+
with mock_s3():
|
56
|
+
yield boto3.resource("s3")
|
57
|
+
|
58
|
+
@pytest.fixture(autouse=True)
|
59
|
+
def setup_test_s3_bucket(self, setup_s3_resource: ServiceResource):
|
60
|
+
setup_s3_resource.create_bucket(
|
61
|
+
ACL="authenticated-read",
|
62
|
+
Bucket=self.TEST_S3_BUCKET_NAME,
|
63
|
+
)
|
64
|
+
yield
|
65
|
+
|
66
|
+
def test_sanity(self):
|
67
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
68
|
+
body = "test-body"
|
69
|
+
uploaded_file = s3u.upload(uri, body)
|
70
|
+
assert uploaded_file is not None
|
71
|
+
assert uploaded_file["ResponseMetadata"]["HTTPStatusCode"] == 200
|
72
|
+
downloaded_file = s3u.download(uri)
|
73
|
+
downloaded_body = downloaded_file["Body"].read().decode("utf-8")
|
74
|
+
assert downloaded_file["ResponseMetadata"]["HTTPStatusCode"] == 200
|
75
|
+
assert downloaded_body == body
|
76
|
+
|
77
|
+
@patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
|
78
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
79
|
+
def test_upload_throttled(self, mock_s3_client_cache):
|
80
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
81
|
+
body = "test-body"
|
82
|
+
throttling_err = ClientError({"Error": {"Code": "Throttling"}}, "put_object")
|
83
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
84
|
+
mock_s3.put_object.side_effect = throttling_err
|
85
|
+
with pytest.raises(RetryError):
|
86
|
+
s3u.upload(uri, body)
|
87
|
+
|
88
|
+
slowdown_err = ClientError({"Error": {"Code": "SlowDown"}}, "put_object")
|
89
|
+
mock_s3.put_object.side_effect = slowdown_err
|
90
|
+
with pytest.raises(RetryError):
|
91
|
+
s3u.upload(uri, body)
|
92
|
+
|
93
|
+
no_credentials_err = NoCredentialsError()
|
94
|
+
mock_s3.put_object.side_effect = no_credentials_err
|
95
|
+
with pytest.raises(RetryError):
|
96
|
+
s3u.upload(uri, body)
|
97
|
+
|
98
|
+
assert mock_s3.put_object.call_count > 3
|
99
|
+
|
100
|
+
@patch("deltacat.aws.s3u.UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY", 1)
|
101
|
+
@patch("deltacat.aws.s3u.ManifestEntry")
|
102
|
+
@patch("deltacat.aws.s3u._get_metadata")
|
103
|
+
@patch("deltacat.aws.s3u.CapturedBlockWritePaths")
|
104
|
+
def test_upload_sliced_table_retry(
|
105
|
+
self,
|
106
|
+
mock_captured_block_write_paths,
|
107
|
+
mock_get_metadata,
|
108
|
+
mock_manifest_entry,
|
109
|
+
):
|
110
|
+
mock_manifest_entry.from_s3_obj_url.side_effect = OSError(
|
111
|
+
"Please reduce your request rate.."
|
112
|
+
)
|
113
|
+
mock_get_metadata.return_value = [mock.MagicMock()]
|
114
|
+
cbwp = CapturedBlockWritePaths()
|
115
|
+
cbwp._write_paths = ["s3_write_path"]
|
116
|
+
cbwp._block_refs = [mock.MagicMock()]
|
117
|
+
mock_captured_block_write_paths.return_value = cbwp
|
118
|
+
with pytest.raises(RetryError):
|
119
|
+
s3u.upload_sliced_table(
|
120
|
+
mock.MagicMock(),
|
121
|
+
"s3-prefix",
|
122
|
+
mock.MagicMock(),
|
123
|
+
mock.MagicMock(),
|
124
|
+
mock.MagicMock(),
|
125
|
+
mock.MagicMock(),
|
126
|
+
)
|
127
|
+
|
128
|
+
@patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
|
129
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
130
|
+
def test_upload_transient_error_retry(self, mock_s3_client_cache):
|
131
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
132
|
+
body = "test-body"
|
133
|
+
transient_errors = [*RETRYABLE_TRANSIENT_ERRORS]
|
134
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
135
|
+
|
136
|
+
while transient_errors:
|
137
|
+
err_cls = transient_errors.pop()
|
138
|
+
err_obj = self._populate_error_by_type(err_cls)
|
139
|
+
mock_s3.put_object.side_effect = err_obj
|
140
|
+
with pytest.raises(RetryError):
|
141
|
+
s3u.upload(uri, body)
|
142
|
+
|
143
|
+
assert mock_s3.put_object.call_count > len(RETRYABLE_TRANSIENT_ERRORS)
|
144
|
+
|
145
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
146
|
+
def test_upload_unexpected_error_code(self, mock_s3_client_cache):
|
147
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
148
|
+
body = "test-body"
|
149
|
+
err = ClientError({"Error": {"Code": "UnexpectedError"}}, "put_object")
|
150
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
151
|
+
mock_s3.put_object.side_effect = err
|
152
|
+
file = None
|
153
|
+
with pytest.raises(NonRetryableError):
|
154
|
+
s3u.upload(uri, body)
|
155
|
+
assert file is None
|
156
|
+
assert mock_s3.put_object.call_count == 1
|
157
|
+
|
158
|
+
@patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
|
159
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
160
|
+
def test_download_throttled(self, mock_s3_client_cache):
|
161
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
162
|
+
no_credentials_err = NoCredentialsError()
|
163
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
164
|
+
mock_s3.get_object.side_effect = no_credentials_err
|
165
|
+
file = None
|
166
|
+
with pytest.raises(RetryError):
|
167
|
+
file = s3u.download(uri)
|
168
|
+
assert file is None
|
169
|
+
assert mock_s3.get_object.call_count > 1
|
170
|
+
|
171
|
+
def test_download_not_exists(self):
|
172
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/key-not-exists"
|
173
|
+
file = None
|
174
|
+
with pytest.raises(NonRetryableError):
|
175
|
+
file = s3u.download(uri)
|
176
|
+
assert file is None
|
177
|
+
|
178
|
+
file = s3u.download(uri, fail_if_not_found=False)
|
179
|
+
assert file is None
|
180
|
+
|
181
|
+
@patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
|
182
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
183
|
+
def test_download_transient_error_retry(self, mock_s3_client_cache):
|
184
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
185
|
+
transient_errors = [*RETRYABLE_TRANSIENT_ERRORS]
|
186
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
187
|
+
|
188
|
+
while transient_errors:
|
189
|
+
err_cls = transient_errors.pop()
|
190
|
+
err_obj = self._populate_error_by_type(err_cls)
|
191
|
+
mock_s3.get_object.side_effect = err_obj
|
192
|
+
with pytest.raises(RetryError):
|
193
|
+
s3u.download(uri)
|
194
|
+
|
195
|
+
assert mock_s3.get_object.call_count > len(RETRYABLE_TRANSIENT_ERRORS)
|
196
|
+
|
197
|
+
@staticmethod
|
198
|
+
def _populate_error_by_type(err_cls):
|
199
|
+
if err_cls in (ReadTimeoutError, ConnectTimeoutError):
|
200
|
+
err_obj = err_cls(endpoint_url="127.0.0.1")
|
201
|
+
elif err_cls in (HTTPClientError, botocore.exceptions.ConnectionError):
|
202
|
+
err_obj = err_cls(endpoint_url="127.0.0.1", error=Exception)
|
203
|
+
else:
|
204
|
+
err_obj = err_cls()
|
205
|
+
return err_obj
|
@@ -22,6 +22,7 @@ from deltacat.storage import (
|
|
22
22
|
from deltacat.compute.compactor_v2.compaction_session import (
|
23
23
|
compact_partition as compact_partition_v2,
|
24
24
|
)
|
25
|
+
from deltacat.storage import DeleteParameters
|
25
26
|
|
26
27
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
27
28
|
|
@@ -89,9 +90,13 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
|
|
89
90
|
"""
|
90
91
|
Args:
|
91
92
|
is_inplace: bool - argument to indicate whether to try compacting an in-place compacted table (the source table is the destination table). Also needed to control whether the destination table is created
|
93
|
+
add_late_deltas: List[Tuple[pa.Table, DeltaType]] - argument to indicate whether to add deltas to the source_partition after we've triggered compaction
|
92
94
|
"""
|
93
95
|
|
94
96
|
is_inplace: bool
|
97
|
+
add_late_deltas: Optional[
|
98
|
+
List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
|
99
|
+
]
|
95
100
|
|
96
101
|
|
97
102
|
@dataclass(frozen=True)
|
@@ -148,6 +153,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
148
153
|
read_kwargs_provider=None,
|
149
154
|
drop_duplicates=True,
|
150
155
|
is_inplace=False,
|
156
|
+
add_late_deltas=None,
|
151
157
|
skip_enabled_compact_partition_drivers=None,
|
152
158
|
),
|
153
159
|
"2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
|
@@ -175,6 +181,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
175
181
|
read_kwargs_provider=None,
|
176
182
|
drop_duplicates=True,
|
177
183
|
is_inplace=False,
|
184
|
+
add_late_deltas=None,
|
178
185
|
skip_enabled_compact_partition_drivers=None,
|
179
186
|
),
|
180
187
|
"3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
|
@@ -211,6 +218,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
211
218
|
read_kwargs_provider=None,
|
212
219
|
drop_duplicates=True,
|
213
220
|
is_inplace=False,
|
221
|
+
add_late_deltas=None,
|
214
222
|
skip_enabled_compact_partition_drivers=None,
|
215
223
|
),
|
216
224
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
@@ -246,6 +254,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
246
254
|
read_kwargs_provider=None,
|
247
255
|
drop_duplicates=True,
|
248
256
|
is_inplace=False,
|
257
|
+
add_late_deltas=None,
|
249
258
|
skip_enabled_compact_partition_drivers=None,
|
250
259
|
),
|
251
260
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -276,6 +285,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
276
285
|
read_kwargs_provider=None,
|
277
286
|
drop_duplicates=True,
|
278
287
|
is_inplace=False,
|
288
|
+
add_late_deltas=None,
|
279
289
|
skip_enabled_compact_partition_drivers=None,
|
280
290
|
),
|
281
291
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -306,6 +316,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
306
316
|
read_kwargs_provider=None,
|
307
317
|
drop_duplicates=True,
|
308
318
|
is_inplace=False,
|
319
|
+
add_late_deltas=None,
|
309
320
|
skip_enabled_compact_partition_drivers=None,
|
310
321
|
),
|
311
322
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -336,6 +347,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
336
347
|
read_kwargs_provider=None,
|
337
348
|
drop_duplicates=True,
|
338
349
|
is_inplace=False,
|
350
|
+
add_late_deltas=None,
|
339
351
|
skip_enabled_compact_partition_drivers=None,
|
340
352
|
),
|
341
353
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
@@ -368,6 +380,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
368
380
|
read_kwargs_provider=None,
|
369
381
|
drop_duplicates=True,
|
370
382
|
is_inplace=False,
|
383
|
+
add_late_deltas=None,
|
371
384
|
skip_enabled_compact_partition_drivers=None,
|
372
385
|
),
|
373
386
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
@@ -398,6 +411,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
398
411
|
read_kwargs_provider=None,
|
399
412
|
drop_duplicates=True,
|
400
413
|
is_inplace=False,
|
414
|
+
add_late_deltas=None,
|
401
415
|
skip_enabled_compact_partition_drivers=None,
|
402
416
|
),
|
403
417
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
@@ -428,6 +442,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
428
442
|
read_kwargs_provider=None,
|
429
443
|
drop_duplicates=True,
|
430
444
|
is_inplace=False,
|
445
|
+
add_late_deltas=None,
|
431
446
|
skip_enabled_compact_partition_drivers=None,
|
432
447
|
),
|
433
448
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
@@ -458,6 +473,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
458
473
|
read_kwargs_provider=None,
|
459
474
|
drop_duplicates=True,
|
460
475
|
is_inplace=False,
|
476
|
+
add_late_deltas=None,
|
461
477
|
skip_enabled_compact_partition_drivers=None,
|
462
478
|
),
|
463
479
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
@@ -488,6 +504,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
488
504
|
read_kwargs_provider=None,
|
489
505
|
drop_duplicates=True,
|
490
506
|
is_inplace=False,
|
507
|
+
add_late_deltas=None,
|
491
508
|
skip_enabled_compact_partition_drivers=None,
|
492
509
|
),
|
493
510
|
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
@@ -518,6 +535,50 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
518
535
|
read_kwargs_provider=None,
|
519
536
|
drop_duplicates=True,
|
520
537
|
is_inplace=True,
|
538
|
+
add_late_deltas=[
|
539
|
+
(
|
540
|
+
pa.Table.from_arrays(
|
541
|
+
[
|
542
|
+
pa.array([str(i) for i in range(20)]),
|
543
|
+
pa.array([i for i in range(20)]),
|
544
|
+
],
|
545
|
+
names=["pk_col_1", "sk_col_1"],
|
546
|
+
),
|
547
|
+
DeltaType.UPSERT,
|
548
|
+
None,
|
549
|
+
)
|
550
|
+
],
|
551
|
+
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
552
|
+
),
|
553
|
+
"14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
|
554
|
+
primary_keys={"pk_col_1"},
|
555
|
+
sort_keys=[SortKey.of(key_name="sk_col_1")],
|
556
|
+
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
557
|
+
partition_values=["1"],
|
558
|
+
input_deltas=pa.Table.from_arrays(
|
559
|
+
[
|
560
|
+
pa.array([str(i) for i in range(10)]),
|
561
|
+
pa.array([i for i in range(10)]),
|
562
|
+
],
|
563
|
+
names=["pk_col_1", "sk_col_1"],
|
564
|
+
),
|
565
|
+
input_deltas_delta_type=DeltaType.UPSERT,
|
566
|
+
expected_terminal_compact_partition_result=pa.Table.from_arrays(
|
567
|
+
[
|
568
|
+
pa.array([str(i) for i in range(10)]),
|
569
|
+
pa.array([i for i in range(10)]),
|
570
|
+
],
|
571
|
+
names=["pk_col_1", "sk_col_1"],
|
572
|
+
),
|
573
|
+
expected_terminal_exception=AssertionError,
|
574
|
+
expected_terminal_exception_message="hash_bucket_count is a required arg for compactor v2",
|
575
|
+
do_create_placement_group=False,
|
576
|
+
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
577
|
+
hash_bucket_count=None,
|
578
|
+
read_kwargs_provider=None,
|
579
|
+
drop_duplicates=True,
|
580
|
+
is_inplace=False,
|
581
|
+
add_late_deltas=False,
|
521
582
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
522
583
|
),
|
523
584
|
}
|
@@ -35,6 +35,8 @@ class TestCompactionSession(unittest.TestCase):
|
|
35
35
|
@classmethod
|
36
36
|
def doClassCleanups(cls) -> None:
|
37
37
|
os.remove(cls.DB_FILE_PATH)
|
38
|
+
ray.shutdown()
|
39
|
+
super().tearDownClass()
|
38
40
|
|
39
41
|
@patch("deltacat.compute.compactor_v2.compaction_session.rcf")
|
40
42
|
@patch("deltacat.compute.compactor_v2.compaction_session.s3_utils")
|
@@ -2,8 +2,9 @@ import ray
|
|
2
2
|
from moto import mock_s3
|
3
3
|
import pytest
|
4
4
|
import os
|
5
|
+
import logging
|
5
6
|
import boto3
|
6
|
-
from typing import Any, Callable, Dict, List, Optional, Set
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
7
8
|
from boto3.resources.base import ServiceResource
|
8
9
|
import pyarrow as pa
|
9
10
|
from pytest_benchmark.fixture import BenchmarkFixture
|
@@ -15,6 +16,7 @@ from deltacat.tests.compute.test_util_common import (
|
|
15
16
|
from deltacat.tests.test_utils.utils import read_s3_contents
|
16
17
|
from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
17
18
|
create_src_w_deltas_destination_plus_destination,
|
19
|
+
add_late_deltas_to_partition,
|
18
20
|
)
|
19
21
|
from deltacat.tests.compute.compact_partition_test_cases import (
|
20
22
|
INCREMENTAL_TEST_CASES,
|
@@ -27,12 +29,33 @@ from deltacat.tests.compute.test_util_constant import (
|
|
27
29
|
from deltacat.compute.compactor import (
|
28
30
|
RoundCompletionInfo,
|
29
31
|
)
|
32
|
+
from deltacat.storage import (
|
33
|
+
CommitState,
|
34
|
+
DeltaType,
|
35
|
+
Delta,
|
36
|
+
DeltaLocator,
|
37
|
+
Partition,
|
38
|
+
PartitionLocator,
|
39
|
+
)
|
40
|
+
from deltacat.types.media import ContentType
|
41
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
42
|
+
CompactionSessionAuditInfo,
|
43
|
+
)
|
44
|
+
from deltacat.compute.compactor.model.compact_partition_params import (
|
45
|
+
CompactPartitionParams,
|
46
|
+
)
|
47
|
+
from deltacat.utils.placement import (
|
48
|
+
PlacementGroupManager,
|
49
|
+
)
|
50
|
+
from deltacat import logs
|
30
51
|
|
31
52
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
32
53
|
"db_file_path",
|
33
54
|
"deltacat/tests/local_deltacat_storage/db_test.sqlite",
|
34
55
|
)
|
35
56
|
|
57
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
58
|
+
|
36
59
|
|
37
60
|
"""
|
38
61
|
MODULE scoped fixtures
|
@@ -43,6 +66,7 @@ MODULE scoped fixtures
|
|
43
66
|
def setup_ray_cluster():
|
44
67
|
ray.init(local_mode=True, ignore_reinit_error=True)
|
45
68
|
yield
|
69
|
+
ray.shutdown()
|
46
70
|
|
47
71
|
|
48
72
|
@pytest.fixture(autouse=True, scope="module")
|
@@ -58,19 +82,20 @@ def mock_aws_credential():
|
|
58
82
|
@pytest.fixture(autouse=True, scope="module")
|
59
83
|
def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
|
60
84
|
# make sure the database file is deleted after all the compactor package tests are completed
|
85
|
+
yield
|
61
86
|
if os.path.exists(DATABASE_FILE_PATH_VALUE):
|
62
87
|
os.remove(DATABASE_FILE_PATH_VALUE)
|
63
88
|
|
64
89
|
|
65
90
|
@pytest.fixture(scope="module")
|
66
|
-
def
|
91
|
+
def s3_resource():
|
67
92
|
with mock_s3():
|
68
93
|
yield boto3.resource("s3")
|
69
94
|
|
70
95
|
|
71
96
|
@pytest.fixture(autouse=True, scope="module")
|
72
|
-
def setup_compaction_artifacts_s3_bucket(
|
73
|
-
|
97
|
+
def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
|
98
|
+
s3_resource.create_bucket(
|
74
99
|
ACL="authenticated-read",
|
75
100
|
Bucket=TEST_S3_RCF_BUCKET_NAME,
|
76
101
|
)
|
@@ -112,6 +137,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
112
137
|
"drop_duplicates_param",
|
113
138
|
"skip_enabled_compact_partition_drivers",
|
114
139
|
"is_inplace",
|
140
|
+
"add_late_deltas",
|
115
141
|
"compact_partition_func",
|
116
142
|
],
|
117
143
|
[
|
@@ -133,6 +159,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
133
159
|
read_kwargs_provider,
|
134
160
|
skip_enabled_compact_partition_drivers,
|
135
161
|
is_inplace,
|
162
|
+
add_late_deltas,
|
136
163
|
compact_partition_func,
|
137
164
|
)
|
138
165
|
for test_name, (
|
@@ -152,13 +179,14 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
152
179
|
read_kwargs_provider,
|
153
180
|
skip_enabled_compact_partition_drivers,
|
154
181
|
is_inplace,
|
182
|
+
add_late_deltas,
|
155
183
|
compact_partition_func,
|
156
184
|
) in INCREMENTAL_TEST_CASES.items()
|
157
185
|
],
|
158
186
|
ids=[test_name for test_name in INCREMENTAL_TEST_CASES],
|
159
187
|
)
|
160
188
|
def test_compact_partition_incremental(
|
161
|
-
|
189
|
+
s3_resource: ServiceResource,
|
162
190
|
offer_local_deltacat_storage_kwargs: Dict[str, Any],
|
163
191
|
test_name: str,
|
164
192
|
primary_keys: Set[str],
|
@@ -177,25 +205,11 @@ def test_compact_partition_incremental(
|
|
177
205
|
read_kwargs_provider_param: Any,
|
178
206
|
skip_enabled_compact_partition_drivers,
|
179
207
|
is_inplace: bool,
|
208
|
+
add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType]]],
|
180
209
|
compact_partition_func: Callable,
|
181
210
|
benchmark: BenchmarkFixture,
|
182
211
|
):
|
183
212
|
import deltacat.tests.local_deltacat_storage as ds
|
184
|
-
from deltacat.types.media import ContentType
|
185
|
-
from deltacat.storage import (
|
186
|
-
DeltaLocator,
|
187
|
-
Partition,
|
188
|
-
PartitionLocator,
|
189
|
-
)
|
190
|
-
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
191
|
-
CompactionSessionAuditInfo,
|
192
|
-
)
|
193
|
-
from deltacat.compute.compactor.model.compact_partition_params import (
|
194
|
-
CompactPartitionParams,
|
195
|
-
)
|
196
|
-
from deltacat.utils.placement import (
|
197
|
-
PlacementGroupManager,
|
198
|
-
)
|
199
213
|
|
200
214
|
ds_mock_kwargs: Dict[str, Any] = offer_local_deltacat_storage_kwargs
|
201
215
|
|
@@ -205,6 +219,9 @@ def test_compact_partition_incremental(
|
|
205
219
|
source_table_stream,
|
206
220
|
destination_table_stream,
|
207
221
|
_,
|
222
|
+
source_table_namespace,
|
223
|
+
source_table_name,
|
224
|
+
source_table_version,
|
208
225
|
) = create_src_w_deltas_destination_plus_destination(
|
209
226
|
primary_keys,
|
210
227
|
sort_keys,
|
@@ -227,11 +244,13 @@ def test_compact_partition_incremental(
|
|
227
244
|
)
|
228
245
|
num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
|
229
246
|
total_cpus: int = num_workers * worker_instance_cpu
|
230
|
-
pgm: Optional[PlacementGroupManager] =
|
231
|
-
|
232
|
-
pgm = PlacementGroupManager(
|
247
|
+
pgm: Optional[PlacementGroupManager] = (
|
248
|
+
PlacementGroupManager(
|
233
249
|
1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
|
234
250
|
).pgs[0]
|
251
|
+
if create_placement_group_param
|
252
|
+
else None
|
253
|
+
)
|
235
254
|
compact_partition_params = CompactPartitionParams.of(
|
236
255
|
{
|
237
256
|
"compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
|
@@ -265,24 +284,36 @@ def test_compact_partition_incremental(
|
|
265
284
|
|
266
285
|
Returns: args, kwargs
|
267
286
|
"""
|
268
|
-
|
287
|
+
s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
|
269
288
|
return (compact_partition_params,), {}
|
270
289
|
|
290
|
+
if add_late_deltas:
|
291
|
+
# NOTE: In the case of in-place compaction it is plausible that new deltas may be added to the source partition during compaction
|
292
|
+
# (so that the source_partitition.stream_position > last_stream_position_to_compact).
|
293
|
+
# This parameter helps simulate the case to check that no late deltas are dropped even when the compacted partition is created.
|
294
|
+
latest_delta, _ = add_late_deltas_to_partition(
|
295
|
+
add_late_deltas, source_partition, ds_mock_kwargs
|
296
|
+
)
|
297
|
+
if expected_terminal_exception:
|
298
|
+
with pytest.raises(expected_terminal_exception) as exc_info:
|
299
|
+
compact_partition_func(compact_partition_params)
|
300
|
+
assert expected_terminal_exception_message in str(exc_info.value)
|
301
|
+
return
|
271
302
|
rcf_file_s3_uri = benchmark.pedantic(
|
272
303
|
compact_partition_func, setup=_incremental_compaction_setup
|
273
304
|
)
|
305
|
+
|
274
306
|
# validate
|
275
|
-
round_completion_info: RoundCompletionInfo = get_rcf(
|
276
|
-
setup_s3_resource, rcf_file_s3_uri
|
277
|
-
)
|
307
|
+
round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
|
278
308
|
compacted_delta_locator: DeltaLocator = (
|
279
309
|
round_completion_info.compacted_delta_locator
|
280
310
|
)
|
281
|
-
audit_bucket, audit_key =
|
282
|
-
|
283
|
-
)
|
311
|
+
audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
|
312
|
+
round_completion_info.compaction_audit_url
|
313
|
+
)
|
314
|
+
|
284
315
|
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
285
|
-
|
316
|
+
s3_resource, audit_bucket, audit_key
|
286
317
|
)
|
287
318
|
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
288
319
|
**compaction_audit_obj
|
@@ -318,8 +349,34 @@ def test_compact_partition_incremental(
|
|
318
349
|
== destination_partition_locator.partition_values
|
319
350
|
and source_partition.locator.stream_id
|
320
351
|
== destination_partition_locator.stream_id
|
321
|
-
), "The source partition should match the destination partition"
|
352
|
+
), f"The source partition: {source_partition.locator.canonical_string} should match the destination partition: {destination_partition_locator.canonical_string}"
|
322
353
|
assert (
|
323
354
|
compacted_delta_locator.stream_id == source_partition.locator.stream_id
|
324
355
|
), "The compacted delta should be in the same stream as the source"
|
356
|
+
source_partition: Partition = ds.get_partition(
|
357
|
+
source_table_stream.locator,
|
358
|
+
partition_values_param,
|
359
|
+
**ds_mock_kwargs,
|
360
|
+
)
|
361
|
+
compacted_partition: Optional[Partition] = ds.get_partition(
|
362
|
+
compacted_delta_locator.stream_locator,
|
363
|
+
partition_values_param,
|
364
|
+
**ds_mock_kwargs,
|
365
|
+
)
|
366
|
+
assert (
|
367
|
+
compacted_partition.state == source_partition.state == CommitState.COMMITTED
|
368
|
+
), f"The compacted/source table partition should be in {CommitState.COMMITTED} state and not {CommitState.DEPRECATED}"
|
369
|
+
if add_late_deltas:
|
370
|
+
compacted_partition_deltas: List[Delta] = ds.list_partition_deltas(
|
371
|
+
partition_like=compacted_partition,
|
372
|
+
ascending_order=False,
|
373
|
+
**ds_mock_kwargs,
|
374
|
+
).all_items()
|
375
|
+
assert (
|
376
|
+
len(compacted_partition_deltas) == len(add_late_deltas) + 1
|
377
|
+
), f"Expected the number of deltas within the newly promoted partition to equal 1 (the compacted delta) + the # of late deltas: {len(add_late_deltas)}"
|
378
|
+
assert (
|
379
|
+
compacted_partition_deltas[0].stream_position
|
380
|
+
== latest_delta.stream_position
|
381
|
+
), f"Expected the latest delta in the compacted partition: {compacted_partition_deltas[0].stream_position} to have the same stream position as the latest delta: {latest_delta.stream_position}"
|
325
382
|
return
|