deltacat 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/constants.py +21 -2
  3. deltacat/aws/s3u.py +107 -33
  4. deltacat/compute/compactor/model/round_completion_info.py +4 -0
  5. deltacat/compute/compactor_v2/compaction_session.py +51 -25
  6. deltacat/compute/compactor_v2/constants.py +12 -0
  7. deltacat/compute/compactor_v2/model/compaction_session.py +21 -0
  8. deltacat/compute/compactor_v2/steps/hash_bucket.py +6 -0
  9. deltacat/compute/compactor_v2/steps/merge.py +6 -0
  10. deltacat/compute/compactor_v2/utils/task_options.py +8 -2
  11. deltacat/storage/interface.py +10 -3
  12. deltacat/tests/aws/test_s3u.py +193 -0
  13. deltacat/tests/catalog/test_default_catalog_impl.py +2 -0
  14. deltacat/tests/compute/compact_partition_test_cases.py +61 -0
  15. deltacat/tests/compute/compactor_v2/test_compaction_session.py +2 -0
  16. deltacat/tests/compute/test_compact_partition_incremental.py +89 -32
  17. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +21 -26
  18. deltacat/tests/compute/test_util_create_table_deltas_repo.py +45 -2
  19. deltacat/tests/local_deltacat_storage/__init__.py +38 -19
  20. deltacat/tests/utils/ray_utils/__init__.py +0 -0
  21. deltacat/tests/utils/ray_utils/test_concurrency.py +50 -0
  22. deltacat/tests/utils/test_resources.py +28 -0
  23. deltacat/utils/resources.py +45 -0
  24. {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/METADATA +1 -1
  25. {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/RECORD +28 -25
  26. {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/LICENSE +0 -0
  27. {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/WHEEL +0 -0
  28. {deltacat-1.1.5.dist-info → deltacat-1.1.7.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,32 @@
1
1
  import unittest
2
+
3
+ import botocore
4
+
5
+ from deltacat.aws.constants import RETRYABLE_TRANSIENT_ERRORS
2
6
  from deltacat.aws.s3u import UuidBlockWritePathProvider, CapturedBlockWritePaths
3
7
 
4
8
 
9
+ import os
10
+ from unittest import mock
11
+ from unittest.mock import patch
12
+
13
+ import boto3
14
+ import pytest
15
+ from boto3.resources.base import ServiceResource
16
+ from botocore.exceptions import (
17
+ ClientError,
18
+ NoCredentialsError,
19
+ ReadTimeoutError,
20
+ ConnectTimeoutError,
21
+ HTTPClientError,
22
+ )
23
+ from deltacat.exceptions import NonRetryableError
24
+ from moto import mock_s3
25
+ from tenacity import RetryError
26
+
27
+ from deltacat.aws import s3u
28
+
29
+
5
30
  class TestUuidBlockWritePathProvider(unittest.TestCase):
6
31
  def test_uuid_block_write_provider_sanity(self):
7
32
  capture_object = CapturedBlockWritePaths()
@@ -10,3 +35,171 @@ class TestUuidBlockWritePathProvider(unittest.TestCase):
10
35
  result = provider("base_path")
11
36
 
12
37
  self.assertRegex(result, r"^base_path/[\w-]{36}$")
38
+
39
+
40
+ class TestDownloadUpload(unittest.TestCase):
41
+ TEST_S3_BUCKET_NAME = "TEST_S3_BUCKET"
42
+ TEST_S3_KEY = "TEST_S3_KEY"
43
+
44
+ @pytest.fixture(autouse=True)
45
+ def mock_aws_credential(self):
46
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
47
+ os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
48
+ os.environ["AWS_SECURITY_TOKEN"] = "testing"
49
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
50
+ os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
51
+ yield
52
+
53
+ @pytest.fixture(autouse=True)
54
+ def setup_s3_resource(self):
55
+ with mock_s3():
56
+ yield boto3.resource("s3")
57
+
58
+ @pytest.fixture(autouse=True)
59
+ def setup_test_s3_bucket(self, setup_s3_resource: ServiceResource):
60
+ setup_s3_resource.create_bucket(
61
+ ACL="authenticated-read",
62
+ Bucket=self.TEST_S3_BUCKET_NAME,
63
+ )
64
+ yield
65
+
66
+ def test_sanity(self):
67
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
68
+ body = "test-body"
69
+ uploaded_file = s3u.upload(uri, body)
70
+ assert uploaded_file is not None
71
+ assert uploaded_file["ResponseMetadata"]["HTTPStatusCode"] == 200
72
+ downloaded_file = s3u.download(uri)
73
+ downloaded_body = downloaded_file["Body"].read().decode("utf-8")
74
+ assert downloaded_file["ResponseMetadata"]["HTTPStatusCode"] == 200
75
+ assert downloaded_body == body
76
+
77
+ @patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
78
+ @patch("deltacat.aws.s3u.s3_client_cache")
79
+ def test_upload_throttled(self, mock_s3_client_cache):
80
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
81
+ body = "test-body"
82
+ throttling_err = ClientError({"Error": {"Code": "Throttling"}}, "put_object")
83
+ mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
84
+ mock_s3.put_object.side_effect = throttling_err
85
+ with pytest.raises(RetryError):
86
+ s3u.upload(uri, body)
87
+
88
+ slowdown_err = ClientError({"Error": {"Code": "SlowDown"}}, "put_object")
89
+ mock_s3.put_object.side_effect = slowdown_err
90
+ with pytest.raises(RetryError):
91
+ s3u.upload(uri, body)
92
+
93
+ no_credentials_err = NoCredentialsError()
94
+ mock_s3.put_object.side_effect = no_credentials_err
95
+ with pytest.raises(RetryError):
96
+ s3u.upload(uri, body)
97
+
98
+ assert mock_s3.put_object.call_count > 3
99
+
100
+ @patch("deltacat.aws.s3u.UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY", 1)
101
+ @patch("deltacat.aws.s3u.ManifestEntry")
102
+ @patch("deltacat.aws.s3u._get_metadata")
103
+ @patch("deltacat.aws.s3u.CapturedBlockWritePaths")
104
+ def test_upload_sliced_table_retry(
105
+ self,
106
+ mock_captured_block_write_paths,
107
+ mock_get_metadata,
108
+ mock_manifest_entry,
109
+ ):
110
+ mock_manifest_entry.from_s3_obj_url.side_effect = OSError(
111
+ "Please reduce your request rate.."
112
+ )
113
+ mock_get_metadata.return_value = [mock.MagicMock()]
114
+ cbwp = CapturedBlockWritePaths()
115
+ cbwp._write_paths = ["s3_write_path"]
116
+ cbwp._block_refs = [mock.MagicMock()]
117
+ mock_captured_block_write_paths.return_value = cbwp
118
+ with pytest.raises(RetryError):
119
+ s3u.upload_sliced_table(
120
+ mock.MagicMock(),
121
+ "s3-prefix",
122
+ mock.MagicMock(),
123
+ mock.MagicMock(),
124
+ mock.MagicMock(),
125
+ mock.MagicMock(),
126
+ )
127
+
128
+ @patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
129
+ @patch("deltacat.aws.s3u.s3_client_cache")
130
+ def test_upload_transient_error_retry(self, mock_s3_client_cache):
131
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
132
+ body = "test-body"
133
+ transient_errors = [*RETRYABLE_TRANSIENT_ERRORS]
134
+ mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
135
+
136
+ while transient_errors:
137
+ err_cls = transient_errors.pop()
138
+ err_obj = self._populate_error_by_type(err_cls)
139
+ mock_s3.put_object.side_effect = err_obj
140
+ with pytest.raises(RetryError):
141
+ s3u.upload(uri, body)
142
+
143
+ assert mock_s3.put_object.call_count > len(RETRYABLE_TRANSIENT_ERRORS)
144
+
145
+ @patch("deltacat.aws.s3u.s3_client_cache")
146
+ def test_upload_unexpected_error_code(self, mock_s3_client_cache):
147
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
148
+ body = "test-body"
149
+ err = ClientError({"Error": {"Code": "UnexpectedError"}}, "put_object")
150
+ mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
151
+ mock_s3.put_object.side_effect = err
152
+ file = None
153
+ with pytest.raises(NonRetryableError):
154
+ s3u.upload(uri, body)
155
+ assert file is None
156
+ assert mock_s3.put_object.call_count == 1
157
+
158
+ @patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
159
+ @patch("deltacat.aws.s3u.s3_client_cache")
160
+ def test_download_throttled(self, mock_s3_client_cache):
161
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
162
+ no_credentials_err = NoCredentialsError()
163
+ mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
164
+ mock_s3.get_object.side_effect = no_credentials_err
165
+ file = None
166
+ with pytest.raises(RetryError):
167
+ file = s3u.download(uri)
168
+ assert file is None
169
+ assert mock_s3.get_object.call_count > 1
170
+
171
+ def test_download_not_exists(self):
172
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/key-not-exists"
173
+ file = None
174
+ with pytest.raises(NonRetryableError):
175
+ file = s3u.download(uri)
176
+ assert file is None
177
+
178
+ file = s3u.download(uri, fail_if_not_found=False)
179
+ assert file is None
180
+
181
+ @patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
182
+ @patch("deltacat.aws.s3u.s3_client_cache")
183
+ def test_download_transient_error_retry(self, mock_s3_client_cache):
184
+ uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
185
+ transient_errors = [*RETRYABLE_TRANSIENT_ERRORS]
186
+ mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
187
+
188
+ while transient_errors:
189
+ err_cls = transient_errors.pop()
190
+ err_obj = self._populate_error_by_type(err_cls)
191
+ mock_s3.get_object.side_effect = err_obj
192
+ with pytest.raises(RetryError):
193
+ s3u.download(uri)
194
+
195
+ assert mock_s3.get_object.call_count > len(RETRYABLE_TRANSIENT_ERRORS)
196
+
197
+ @staticmethod
198
+ def _populate_error_by_type(err_cls):
199
+ if err_cls in (ReadTimeoutError, ConnectTimeoutError):
200
+ err_obj = err_cls(endpoint_url="127.0.0.1")
201
+ elif err_cls in (HTTPClientError, botocore.exceptions.ConnectionError):
202
+ err_obj = err_cls(endpoint_url="127.0.0.1", error=Exception)
203
+ else:
204
+ err_obj = err_cls()
205
+ return err_obj
@@ -36,6 +36,8 @@ class TestReadTable(unittest.TestCase):
36
36
  @classmethod
37
37
  def doClassCleanups(cls) -> None:
38
38
  os.remove(cls.DB_FILE_PATH)
39
+ ray.shutdown()
40
+ super().tearDownClass()
39
41
 
40
42
  def test_daft_distributed_read_sanity(self):
41
43
  # setup
@@ -22,6 +22,7 @@ from deltacat.storage import (
22
22
  from deltacat.compute.compactor_v2.compaction_session import (
23
23
  compact_partition as compact_partition_v2,
24
24
  )
25
+ from deltacat.storage import DeleteParameters
25
26
 
26
27
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
27
28
 
@@ -89,9 +90,13 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
89
90
  """
90
91
  Args:
91
92
  is_inplace: bool - argument to indicate whether to try compacting an in-place compacted table (the source table is the destination table). Also needed to control whether the destination table is created
93
+ add_late_deltas: List[Tuple[pa.Table, DeltaType]] - argument to indicate whether to add deltas to the source_partition after we've triggered compaction
92
94
  """
93
95
 
94
96
  is_inplace: bool
97
+ add_late_deltas: Optional[
98
+ List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
99
+ ]
95
100
 
96
101
 
97
102
  @dataclass(frozen=True)
@@ -148,6 +153,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
148
153
  read_kwargs_provider=None,
149
154
  drop_duplicates=True,
150
155
  is_inplace=False,
156
+ add_late_deltas=None,
151
157
  skip_enabled_compact_partition_drivers=None,
152
158
  ),
153
159
  "2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
@@ -175,6 +181,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
175
181
  read_kwargs_provider=None,
176
182
  drop_duplicates=True,
177
183
  is_inplace=False,
184
+ add_late_deltas=None,
178
185
  skip_enabled_compact_partition_drivers=None,
179
186
  ),
180
187
  "3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
@@ -211,6 +218,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
211
218
  read_kwargs_provider=None,
212
219
  drop_duplicates=True,
213
220
  is_inplace=False,
221
+ add_late_deltas=None,
214
222
  skip_enabled_compact_partition_drivers=None,
215
223
  ),
216
224
  "4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
@@ -246,6 +254,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
246
254
  read_kwargs_provider=None,
247
255
  drop_duplicates=True,
248
256
  is_inplace=False,
257
+ add_late_deltas=None,
249
258
  skip_enabled_compact_partition_drivers=None,
250
259
  ),
251
260
  "5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
@@ -276,6 +285,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
276
285
  read_kwargs_provider=None,
277
286
  drop_duplicates=True,
278
287
  is_inplace=False,
288
+ add_late_deltas=None,
279
289
  skip_enabled_compact_partition_drivers=None,
280
290
  ),
281
291
  "6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
@@ -306,6 +316,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
306
316
  read_kwargs_provider=None,
307
317
  drop_duplicates=True,
308
318
  is_inplace=False,
319
+ add_late_deltas=None,
309
320
  skip_enabled_compact_partition_drivers=None,
310
321
  ),
311
322
  "7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
@@ -336,6 +347,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
336
347
  read_kwargs_provider=None,
337
348
  drop_duplicates=True,
338
349
  is_inplace=False,
350
+ add_late_deltas=None,
339
351
  skip_enabled_compact_partition_drivers=None,
340
352
  ),
341
353
  "8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
@@ -368,6 +380,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
368
380
  read_kwargs_provider=None,
369
381
  drop_duplicates=True,
370
382
  is_inplace=False,
383
+ add_late_deltas=None,
371
384
  skip_enabled_compact_partition_drivers=None,
372
385
  ),
373
386
  "9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
@@ -398,6 +411,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
398
411
  read_kwargs_provider=None,
399
412
  drop_duplicates=True,
400
413
  is_inplace=False,
414
+ add_late_deltas=None,
401
415
  skip_enabled_compact_partition_drivers=None,
402
416
  ),
403
417
  "10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
@@ -428,6 +442,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
428
442
  read_kwargs_provider=None,
429
443
  drop_duplicates=True,
430
444
  is_inplace=False,
445
+ add_late_deltas=None,
431
446
  skip_enabled_compact_partition_drivers=None,
432
447
  ),
433
448
  "11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
@@ -458,6 +473,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
458
473
  read_kwargs_provider=None,
459
474
  drop_duplicates=True,
460
475
  is_inplace=False,
476
+ add_late_deltas=None,
461
477
  skip_enabled_compact_partition_drivers=None,
462
478
  ),
463
479
  "12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
@@ -488,6 +504,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
488
504
  read_kwargs_provider=None,
489
505
  drop_duplicates=True,
490
506
  is_inplace=False,
507
+ add_late_deltas=None,
491
508
  skip_enabled_compact_partition_drivers=None,
492
509
  ),
493
510
  "13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
@@ -518,6 +535,50 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
518
535
  read_kwargs_provider=None,
519
536
  drop_duplicates=True,
520
537
  is_inplace=True,
538
+ add_late_deltas=[
539
+ (
540
+ pa.Table.from_arrays(
541
+ [
542
+ pa.array([str(i) for i in range(20)]),
543
+ pa.array([i for i in range(20)]),
544
+ ],
545
+ names=["pk_col_1", "sk_col_1"],
546
+ ),
547
+ DeltaType.UPSERT,
548
+ None,
549
+ )
550
+ ],
551
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
552
+ ),
553
+ "14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
554
+ primary_keys={"pk_col_1"},
555
+ sort_keys=[SortKey.of(key_name="sk_col_1")],
556
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
557
+ partition_values=["1"],
558
+ input_deltas=pa.Table.from_arrays(
559
+ [
560
+ pa.array([str(i) for i in range(10)]),
561
+ pa.array([i for i in range(10)]),
562
+ ],
563
+ names=["pk_col_1", "sk_col_1"],
564
+ ),
565
+ input_deltas_delta_type=DeltaType.UPSERT,
566
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
567
+ [
568
+ pa.array([str(i) for i in range(10)]),
569
+ pa.array([i for i in range(10)]),
570
+ ],
571
+ names=["pk_col_1", "sk_col_1"],
572
+ ),
573
+ expected_terminal_exception=AssertionError,
574
+ expected_terminal_exception_message="hash_bucket_count is a required arg for compactor v2",
575
+ do_create_placement_group=False,
576
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
577
+ hash_bucket_count=None,
578
+ read_kwargs_provider=None,
579
+ drop_duplicates=True,
580
+ is_inplace=False,
581
+ add_late_deltas=False,
521
582
  skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
522
583
  ),
523
584
  }
@@ -35,6 +35,8 @@ class TestCompactionSession(unittest.TestCase):
35
35
  @classmethod
36
36
  def doClassCleanups(cls) -> None:
37
37
  os.remove(cls.DB_FILE_PATH)
38
+ ray.shutdown()
39
+ super().tearDownClass()
38
40
 
39
41
  @patch("deltacat.compute.compactor_v2.compaction_session.rcf")
40
42
  @patch("deltacat.compute.compactor_v2.compaction_session.s3_utils")
@@ -2,8 +2,9 @@ import ray
2
2
  from moto import mock_s3
3
3
  import pytest
4
4
  import os
5
+ import logging
5
6
  import boto3
6
- from typing import Any, Callable, Dict, List, Optional, Set
7
+ from typing import Any, Callable, Dict, List, Optional, Set, Tuple
7
8
  from boto3.resources.base import ServiceResource
8
9
  import pyarrow as pa
9
10
  from pytest_benchmark.fixture import BenchmarkFixture
@@ -15,6 +16,7 @@ from deltacat.tests.compute.test_util_common import (
15
16
  from deltacat.tests.test_utils.utils import read_s3_contents
16
17
  from deltacat.tests.compute.test_util_create_table_deltas_repo import (
17
18
  create_src_w_deltas_destination_plus_destination,
19
+ add_late_deltas_to_partition,
18
20
  )
19
21
  from deltacat.tests.compute.compact_partition_test_cases import (
20
22
  INCREMENTAL_TEST_CASES,
@@ -27,12 +29,33 @@ from deltacat.tests.compute.test_util_constant import (
27
29
  from deltacat.compute.compactor import (
28
30
  RoundCompletionInfo,
29
31
  )
32
+ from deltacat.storage import (
33
+ CommitState,
34
+ DeltaType,
35
+ Delta,
36
+ DeltaLocator,
37
+ Partition,
38
+ PartitionLocator,
39
+ )
40
+ from deltacat.types.media import ContentType
41
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
42
+ CompactionSessionAuditInfo,
43
+ )
44
+ from deltacat.compute.compactor.model.compact_partition_params import (
45
+ CompactPartitionParams,
46
+ )
47
+ from deltacat.utils.placement import (
48
+ PlacementGroupManager,
49
+ )
50
+ from deltacat import logs
30
51
 
31
52
  DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
32
53
  "db_file_path",
33
54
  "deltacat/tests/local_deltacat_storage/db_test.sqlite",
34
55
  )
35
56
 
57
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
58
+
36
59
 
37
60
  """
38
61
  MODULE scoped fixtures
@@ -43,6 +66,7 @@ MODULE scoped fixtures
43
66
  def setup_ray_cluster():
44
67
  ray.init(local_mode=True, ignore_reinit_error=True)
45
68
  yield
69
+ ray.shutdown()
46
70
 
47
71
 
48
72
  @pytest.fixture(autouse=True, scope="module")
@@ -58,19 +82,20 @@ def mock_aws_credential():
58
82
  @pytest.fixture(autouse=True, scope="module")
59
83
  def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
60
84
  # make sure the database file is deleted after all the compactor package tests are completed
85
+ yield
61
86
  if os.path.exists(DATABASE_FILE_PATH_VALUE):
62
87
  os.remove(DATABASE_FILE_PATH_VALUE)
63
88
 
64
89
 
65
90
  @pytest.fixture(scope="module")
66
- def setup_s3_resource(mock_aws_credential):
91
+ def s3_resource():
67
92
  with mock_s3():
68
93
  yield boto3.resource("s3")
69
94
 
70
95
 
71
96
  @pytest.fixture(autouse=True, scope="module")
72
- def setup_compaction_artifacts_s3_bucket(setup_s3_resource: ServiceResource):
73
- setup_s3_resource.create_bucket(
97
+ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
98
+ s3_resource.create_bucket(
74
99
  ACL="authenticated-read",
75
100
  Bucket=TEST_S3_RCF_BUCKET_NAME,
76
101
  )
@@ -112,6 +137,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
112
137
  "drop_duplicates_param",
113
138
  "skip_enabled_compact_partition_drivers",
114
139
  "is_inplace",
140
+ "add_late_deltas",
115
141
  "compact_partition_func",
116
142
  ],
117
143
  [
@@ -133,6 +159,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
133
159
  read_kwargs_provider,
134
160
  skip_enabled_compact_partition_drivers,
135
161
  is_inplace,
162
+ add_late_deltas,
136
163
  compact_partition_func,
137
164
  )
138
165
  for test_name, (
@@ -152,13 +179,14 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
152
179
  read_kwargs_provider,
153
180
  skip_enabled_compact_partition_drivers,
154
181
  is_inplace,
182
+ add_late_deltas,
155
183
  compact_partition_func,
156
184
  ) in INCREMENTAL_TEST_CASES.items()
157
185
  ],
158
186
  ids=[test_name for test_name in INCREMENTAL_TEST_CASES],
159
187
  )
160
188
  def test_compact_partition_incremental(
161
- setup_s3_resource: ServiceResource,
189
+ s3_resource: ServiceResource,
162
190
  offer_local_deltacat_storage_kwargs: Dict[str, Any],
163
191
  test_name: str,
164
192
  primary_keys: Set[str],
@@ -177,25 +205,11 @@ def test_compact_partition_incremental(
177
205
  read_kwargs_provider_param: Any,
178
206
  skip_enabled_compact_partition_drivers,
179
207
  is_inplace: bool,
208
+ add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType]]],
180
209
  compact_partition_func: Callable,
181
210
  benchmark: BenchmarkFixture,
182
211
  ):
183
212
  import deltacat.tests.local_deltacat_storage as ds
184
- from deltacat.types.media import ContentType
185
- from deltacat.storage import (
186
- DeltaLocator,
187
- Partition,
188
- PartitionLocator,
189
- )
190
- from deltacat.compute.compactor.model.compaction_session_audit_info import (
191
- CompactionSessionAuditInfo,
192
- )
193
- from deltacat.compute.compactor.model.compact_partition_params import (
194
- CompactPartitionParams,
195
- )
196
- from deltacat.utils.placement import (
197
- PlacementGroupManager,
198
- )
199
213
 
200
214
  ds_mock_kwargs: Dict[str, Any] = offer_local_deltacat_storage_kwargs
201
215
 
@@ -205,6 +219,9 @@ def test_compact_partition_incremental(
205
219
  source_table_stream,
206
220
  destination_table_stream,
207
221
  _,
222
+ source_table_namespace,
223
+ source_table_name,
224
+ source_table_version,
208
225
  ) = create_src_w_deltas_destination_plus_destination(
209
226
  primary_keys,
210
227
  sort_keys,
@@ -227,11 +244,13 @@ def test_compact_partition_incremental(
227
244
  )
228
245
  num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
229
246
  total_cpus: int = num_workers * worker_instance_cpu
230
- pgm: Optional[PlacementGroupManager] = None
231
- if create_placement_group_param:
232
- pgm = PlacementGroupManager(
247
+ pgm: Optional[PlacementGroupManager] = (
248
+ PlacementGroupManager(
233
249
  1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
234
250
  ).pgs[0]
251
+ if create_placement_group_param
252
+ else None
253
+ )
235
254
  compact_partition_params = CompactPartitionParams.of(
236
255
  {
237
256
  "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
@@ -265,24 +284,36 @@ def test_compact_partition_incremental(
265
284
 
266
285
  Returns: args, kwargs
267
286
  """
268
- setup_s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
287
+ s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
269
288
  return (compact_partition_params,), {}
270
289
 
290
+ if add_late_deltas:
291
+ # NOTE: In the case of in-place compaction it is plausible that new deltas may be added to the source partition during compaction
292
+ # (so that the source_partitition.stream_position > last_stream_position_to_compact).
293
+ # This parameter helps simulate the case to check that no late deltas are dropped even when the compacted partition is created.
294
+ latest_delta, _ = add_late_deltas_to_partition(
295
+ add_late_deltas, source_partition, ds_mock_kwargs
296
+ )
297
+ if expected_terminal_exception:
298
+ with pytest.raises(expected_terminal_exception) as exc_info:
299
+ compact_partition_func(compact_partition_params)
300
+ assert expected_terminal_exception_message in str(exc_info.value)
301
+ return
271
302
  rcf_file_s3_uri = benchmark.pedantic(
272
303
  compact_partition_func, setup=_incremental_compaction_setup
273
304
  )
305
+
274
306
  # validate
275
- round_completion_info: RoundCompletionInfo = get_rcf(
276
- setup_s3_resource, rcf_file_s3_uri
277
- )
307
+ round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
278
308
  compacted_delta_locator: DeltaLocator = (
279
309
  round_completion_info.compacted_delta_locator
280
310
  )
281
- audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
282
- "s3://", ""
283
- ).split("/", 1)
311
+ audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
312
+ round_completion_info.compaction_audit_url
313
+ )
314
+
284
315
  compaction_audit_obj: Dict[str, Any] = read_s3_contents(
285
- setup_s3_resource, audit_bucket, audit_key
316
+ s3_resource, audit_bucket, audit_key
286
317
  )
287
318
  compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
288
319
  **compaction_audit_obj
@@ -318,8 +349,34 @@ def test_compact_partition_incremental(
318
349
  == destination_partition_locator.partition_values
319
350
  and source_partition.locator.stream_id
320
351
  == destination_partition_locator.stream_id
321
- ), "The source partition should match the destination partition"
352
+ ), f"The source partition: {source_partition.locator.canonical_string} should match the destination partition: {destination_partition_locator.canonical_string}"
322
353
  assert (
323
354
  compacted_delta_locator.stream_id == source_partition.locator.stream_id
324
355
  ), "The compacted delta should be in the same stream as the source"
356
+ source_partition: Partition = ds.get_partition(
357
+ source_table_stream.locator,
358
+ partition_values_param,
359
+ **ds_mock_kwargs,
360
+ )
361
+ compacted_partition: Optional[Partition] = ds.get_partition(
362
+ compacted_delta_locator.stream_locator,
363
+ partition_values_param,
364
+ **ds_mock_kwargs,
365
+ )
366
+ assert (
367
+ compacted_partition.state == source_partition.state == CommitState.COMMITTED
368
+ ), f"The compacted/source table partition should be in {CommitState.COMMITTED} state and not {CommitState.DEPRECATED}"
369
+ if add_late_deltas:
370
+ compacted_partition_deltas: List[Delta] = ds.list_partition_deltas(
371
+ partition_like=compacted_partition,
372
+ ascending_order=False,
373
+ **ds_mock_kwargs,
374
+ ).all_items()
375
+ assert (
376
+ len(compacted_partition_deltas) == len(add_late_deltas) + 1
377
+ ), f"Expected the number of deltas within the newly promoted partition to equal 1 (the compacted delta) + the # of late deltas: {len(add_late_deltas)}"
378
+ assert (
379
+ compacted_partition_deltas[0].stream_position
380
+ == latest_delta.stream_position
381
+ ), f"Expected the latest delta in the compacted partition: {compacted_partition_deltas[0].stream_position} to have the same stream position as the latest delta: {latest_delta.stream_position}"
325
382
  return