deltacat 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/constants.py +3 -1
- deltacat/aws/s3u.py +71 -24
- deltacat/compute/compactor_v2/compaction_session.py +24 -5
- deltacat/compute/compactor_v2/utils/task_options.py +8 -1
- deltacat/tests/aws/test_s3u.py +112 -0
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +30 -0
- deltacat/tests/compute/compact_partition_test_cases.py +62 -1
- deltacat/tests/compute/test_compact_partition_incremental.py +34 -9
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +6 -1
- deltacat/tests/compute/test_util_common.py +4 -4
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +18 -7
- deltacat/utils/daft.py +2 -0
- {deltacat-1.1.4.dist-info → deltacat-1.1.6.dist-info}/METADATA +2 -2
- {deltacat-1.1.4.dist-info → deltacat-1.1.6.dist-info}/RECORD +18 -18
- {deltacat-1.1.4.dist-info → deltacat-1.1.6.dist-info}/LICENSE +0 -0
- {deltacat-1.1.4.dist-info → deltacat-1.1.6.dist-info}/WHEEL +0 -0
- {deltacat-1.1.4.dist-info → deltacat-1.1.6.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
deltacat/aws/constants.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
-
from typing import List
|
1
|
+
from typing import List, Set
|
2
2
|
|
3
3
|
from deltacat.utils.common import env_integer, env_string
|
4
4
|
|
5
5
|
DAFT_MAX_S3_CONNECTIONS_PER_FILE = env_integer("DAFT_MAX_S3_CONNECTIONS_PER_FILE", 8)
|
6
6
|
BOTO_MAX_RETRIES = env_integer("BOTO_MAX_RETRIES", 5)
|
7
7
|
TIMEOUT_ERROR_CODES: List[str] = ["ReadTimeoutError", "ConnectTimeoutError"]
|
8
|
+
RETRYABLE_PUT_OBJECT_ERROR_CODES: Set[str] = {"Throttling", "SlowDown"}
|
8
9
|
AWS_REGION = env_string("AWS_REGION", "us-east-1")
|
10
|
+
RETRY_STOP_AFTER_DELAY = env_integer("RETRY_STOP_AFTER_DELAY", 10 * 60)
|
deltacat/aws/s3u.py
CHANGED
@@ -4,14 +4,18 @@ from functools import partial
|
|
4
4
|
from typing import Any, Callable, Dict, Generator, List, Optional, Union
|
5
5
|
from uuid import uuid4
|
6
6
|
from botocore.config import Config
|
7
|
-
from deltacat.aws.constants import
|
7
|
+
from deltacat.aws.constants import (
|
8
|
+
BOTO_MAX_RETRIES,
|
9
|
+
RETRY_STOP_AFTER_DELAY,
|
10
|
+
RETRYABLE_PUT_OBJECT_ERROR_CODES,
|
11
|
+
)
|
8
12
|
|
9
13
|
import pyarrow as pa
|
10
14
|
import ray
|
11
15
|
import s3fs
|
12
16
|
from boto3.resources.base import ServiceResource
|
13
17
|
from botocore.client import BaseClient
|
14
|
-
from botocore.exceptions import ClientError
|
18
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
15
19
|
from ray.data.block import Block, BlockAccessor, BlockMetadata
|
16
20
|
from ray.data.datasource import BlockWritePathProvider
|
17
21
|
from ray.types import ObjectRef
|
@@ -315,7 +319,6 @@ def upload_sliced_table(
|
|
315
319
|
**s3_client_kwargs,
|
316
320
|
)
|
317
321
|
manifest_entries.extend(slice_entries)
|
318
|
-
|
319
322
|
return manifest_entries
|
320
323
|
|
321
324
|
|
@@ -504,41 +507,85 @@ def download_manifest_entries_distributed(
|
|
504
507
|
|
505
508
|
def upload(s3_url: str, body, **s3_client_kwargs) -> Dict[str, Any]:
|
506
509
|
|
507
|
-
# TODO (pdames): add tenacity retrying
|
508
510
|
parsed_s3_url = parse_s3_url(s3_url)
|
509
511
|
s3 = s3_client_cache(None, **s3_client_kwargs)
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
512
|
+
retrying = Retrying(
|
513
|
+
wait=wait_random_exponential(multiplier=1, max=15),
|
514
|
+
stop=stop_after_delay(RETRY_STOP_AFTER_DELAY),
|
515
|
+
retry=retry_if_exception_type(RetryableError),
|
516
|
+
)
|
517
|
+
return retrying(
|
518
|
+
_put_object,
|
519
|
+
s3,
|
520
|
+
body,
|
521
|
+
parsed_s3_url.bucket,
|
522
|
+
parsed_s3_url.key,
|
514
523
|
)
|
515
524
|
|
516
525
|
|
526
|
+
def _put_object(
|
527
|
+
s3_client, body: Any, bucket: str, key: str, **s3_put_object_kwargs
|
528
|
+
) -> Dict[str, Any]:
|
529
|
+
try:
|
530
|
+
return s3_client.put_object(
|
531
|
+
Body=body, Bucket=bucket, Key=key, **s3_put_object_kwargs
|
532
|
+
)
|
533
|
+
except ClientError as e:
|
534
|
+
if e.response["Error"]["Code"] in RETRYABLE_PUT_OBJECT_ERROR_CODES:
|
535
|
+
raise RetryableError(
|
536
|
+
f"Retry upload for: {bucket}/{key} after receiving {e.response['Error']['Code']}"
|
537
|
+
) from e
|
538
|
+
raise NonRetryableError(f"Failed table upload to: {bucket}/{key}") from e
|
539
|
+
except NoCredentialsError as e:
|
540
|
+
raise RetryableError(
|
541
|
+
f"Failed to fetch credentials when putting object into: {bucket}/{key}"
|
542
|
+
) from e
|
543
|
+
except BaseException as e:
|
544
|
+
logger.error(
|
545
|
+
f"Upload has failed for {bucket}/{key}. Error: {e}",
|
546
|
+
exc_info=True,
|
547
|
+
)
|
548
|
+
raise NonRetryableError(f"Failed table upload to: {bucket}/{key}") from e
|
549
|
+
|
550
|
+
|
517
551
|
def download(
|
518
552
|
s3_url: str, fail_if_not_found: bool = True, **s3_client_kwargs
|
519
553
|
) -> Optional[Dict[str, Any]]:
|
520
554
|
|
521
|
-
# TODO (pdames): add tenacity retrying
|
522
555
|
parsed_s3_url = parse_s3_url(s3_url)
|
523
556
|
s3 = s3_client_cache(None, **s3_client_kwargs)
|
557
|
+
retrying = Retrying(
|
558
|
+
wait=wait_random_exponential(multiplier=1, max=15),
|
559
|
+
stop=stop_after_delay(RETRY_STOP_AFTER_DELAY),
|
560
|
+
retry=retry_if_exception_type(RetryableError),
|
561
|
+
)
|
562
|
+
return retrying(
|
563
|
+
_get_object,
|
564
|
+
s3,
|
565
|
+
parsed_s3_url.bucket,
|
566
|
+
parsed_s3_url.key,
|
567
|
+
fail_if_not_found=fail_if_not_found,
|
568
|
+
)
|
569
|
+
|
570
|
+
|
571
|
+
def _get_object(s3_client, bucket: str, key: str, fail_if_not_found: bool = True):
|
524
572
|
try:
|
525
|
-
return
|
526
|
-
Bucket=
|
527
|
-
Key=
|
573
|
+
return s3_client.get_object(
|
574
|
+
Bucket=bucket,
|
575
|
+
Key=key,
|
528
576
|
)
|
529
577
|
except ClientError as e:
|
530
|
-
if
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
logger.info(f"file not found: {s3_url}")
|
578
|
+
if e.response["Error"]["Code"] == "NoSuchKey":
|
579
|
+
if fail_if_not_found:
|
580
|
+
raise NonRetryableError(
|
581
|
+
f"Failed get object from: {bucket}/{key}"
|
582
|
+
) from e
|
583
|
+
logger.info(f"file not found: {bucket}/{key}")
|
584
|
+
except NoCredentialsError as e:
|
585
|
+
raise RetryableError(
|
586
|
+
f"Failed to fetch credentials when getting object from: {bucket}/{key}"
|
587
|
+
) from e
|
588
|
+
|
542
589
|
return None
|
543
590
|
|
544
591
|
|
@@ -17,7 +17,11 @@ from deltacat.compute.compactor_v2.model.merge_input import MergeInput
|
|
17
17
|
from deltacat.aws import s3u as s3_utils
|
18
18
|
import deltacat
|
19
19
|
from deltacat import logs
|
20
|
-
from deltacat.compute.compactor import
|
20
|
+
from deltacat.compute.compactor import (
|
21
|
+
HighWatermark,
|
22
|
+
PyArrowWriteResult,
|
23
|
+
RoundCompletionInfo,
|
24
|
+
)
|
21
25
|
from deltacat.compute.compactor_v2.model.merge_result import MergeResult
|
22
26
|
from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
|
23
27
|
from deltacat.compute.compactor.model.materialize_result import MaterializeResult
|
@@ -37,6 +41,7 @@ from deltacat.compute.compactor_v2.deletes.utils import prepare_deletes
|
|
37
41
|
from deltacat.storage import (
|
38
42
|
Delta,
|
39
43
|
DeltaLocator,
|
44
|
+
Manifest,
|
40
45
|
Partition,
|
41
46
|
)
|
42
47
|
from deltacat.compute.compactor.model.compact_partition_params import (
|
@@ -96,7 +101,7 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
|
|
96
101
|
round_completion_file_s3_url = None
|
97
102
|
if new_partition:
|
98
103
|
logger.info(f"Committing compacted partition to: {new_partition.locator}")
|
99
|
-
partition = params.deltacat_storage.commit_partition(
|
104
|
+
partition: Partition = params.deltacat_storage.commit_partition(
|
100
105
|
new_partition, **params.deltacat_storage_kwargs
|
101
106
|
)
|
102
107
|
logger.info(f"Committed compacted partition: {partition}")
|
@@ -150,9 +155,9 @@ def _execute_compaction(
|
|
150
155
|
compaction_audit.set_total_cluster_memory_bytes(cluster_memory)
|
151
156
|
|
152
157
|
# read the results from any previously completed compaction round
|
153
|
-
round_completion_info = None
|
154
|
-
high_watermark = None
|
155
|
-
previous_compacted_delta_manifest = None
|
158
|
+
round_completion_info: Optional[RoundCompletionInfo] = None
|
159
|
+
high_watermark: Optional[HighWatermark] = None
|
160
|
+
previous_compacted_delta_manifest: Optional[Manifest] = None
|
156
161
|
|
157
162
|
if not params.rebase_source_partition_locator:
|
158
163
|
round_completion_info = rcf.read_round_completion_file(
|
@@ -271,6 +276,7 @@ def _execute_compaction(
|
|
271
276
|
total_hb_record_count = np.int64(0)
|
272
277
|
telemetry_time_hb = 0
|
273
278
|
if params.hash_bucket_count == 1:
|
279
|
+
logger.info("Hash bucket count set to 1. Running local merge")
|
274
280
|
merge_start = time.monotonic()
|
275
281
|
local_merge_input = generate_local_merge_input(
|
276
282
|
params,
|
@@ -634,6 +640,19 @@ def _execute_compaction(
|
|
634
640
|
f"partition-{params.source_partition_locator.partition_values},"
|
635
641
|
f"compacted at: {params.last_stream_position_to_compact},"
|
636
642
|
)
|
643
|
+
is_inplace_compacted: bool = (
|
644
|
+
params.source_partition_locator.partition_values
|
645
|
+
== params.destination_partition_locator.partition_values
|
646
|
+
and params.source_partition_locator.stream_id
|
647
|
+
== params.destination_partition_locator.stream_id
|
648
|
+
)
|
649
|
+
if is_inplace_compacted:
|
650
|
+
logger.info(
|
651
|
+
"Overriding round completion file source partition locator as in-place compacted. "
|
652
|
+
+ f"Got compacted partition partition_id of {compacted_partition.locator.partition_id} "
|
653
|
+
f"and rcf source partition_id of {rcf_source_partition_locator.partition_id}."
|
654
|
+
)
|
655
|
+
rcf_source_partition_locator = compacted_partition.locator
|
637
656
|
return (
|
638
657
|
compacted_partition,
|
639
658
|
new_round_completion_info,
|
@@ -20,6 +20,8 @@ from deltacat.compute.compactor_v2.utils.primary_key_index import (
|
|
20
20
|
from deltacat.compute.compactor_v2.constants import (
|
21
21
|
PARQUET_TO_PYARROW_INFLATION,
|
22
22
|
)
|
23
|
+
from daft.exceptions import DaftTransientError
|
24
|
+
|
23
25
|
|
24
26
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
25
27
|
|
@@ -62,7 +64,11 @@ def get_task_options(
|
|
62
64
|
cpu: float, memory: float, ray_custom_resources: Optional[Dict] = None
|
63
65
|
) -> Dict:
|
64
66
|
|
65
|
-
|
67
|
+
# NOTE: With DEFAULT scheduling strategy in Ray 2.20.0, autoscaler does
|
68
|
+
# not spin up enough nodes fast and hence we see only approximately
|
69
|
+
# 20 tasks get scheduled out of 100 tasks in queue.
|
70
|
+
# https://docs.ray.io/en/latest/ray-core/scheduling/index.html
|
71
|
+
task_opts = {"num_cpus": cpu, "memory": memory, "scheduling_strategy": "SPREAD"}
|
66
72
|
|
67
73
|
if ray_custom_resources:
|
68
74
|
task_opts["resources"] = ray_custom_resources
|
@@ -76,6 +82,7 @@ def get_task_options(
|
|
76
82
|
botocore.exceptions.HTTPClientError,
|
77
83
|
ConnectionError,
|
78
84
|
TimeoutError,
|
85
|
+
DaftTransientError,
|
79
86
|
]
|
80
87
|
|
81
88
|
return task_opts
|
deltacat/tests/aws/test_s3u.py
CHANGED
@@ -2,6 +2,21 @@ import unittest
|
|
2
2
|
from deltacat.aws.s3u import UuidBlockWritePathProvider, CapturedBlockWritePaths
|
3
3
|
|
4
4
|
|
5
|
+
import os
|
6
|
+
from unittest import mock
|
7
|
+
from unittest.mock import patch
|
8
|
+
|
9
|
+
import boto3
|
10
|
+
import pytest
|
11
|
+
from boto3.resources.base import ServiceResource
|
12
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
13
|
+
from deltacat.exceptions import NonRetryableError
|
14
|
+
from moto import mock_s3
|
15
|
+
from tenacity import RetryError
|
16
|
+
|
17
|
+
from deltacat.aws import s3u
|
18
|
+
|
19
|
+
|
5
20
|
class TestUuidBlockWritePathProvider(unittest.TestCase):
|
6
21
|
def test_uuid_block_write_provider_sanity(self):
|
7
22
|
capture_object = CapturedBlockWritePaths()
|
@@ -10,3 +25,100 @@ class TestUuidBlockWritePathProvider(unittest.TestCase):
|
|
10
25
|
result = provider("base_path")
|
11
26
|
|
12
27
|
self.assertRegex(result, r"^base_path/[\w-]{36}$")
|
28
|
+
|
29
|
+
|
30
|
+
class TestDownloadUpload(unittest.TestCase):
|
31
|
+
TEST_S3_BUCKET_NAME = "TEST_S3_BUCKET"
|
32
|
+
TEST_S3_KEY = "TEST_S3_KEY"
|
33
|
+
|
34
|
+
@pytest.fixture(autouse=True)
|
35
|
+
def mock_aws_credential(self):
|
36
|
+
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
|
37
|
+
os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
|
38
|
+
os.environ["AWS_SECURITY_TOKEN"] = "testing"
|
39
|
+
os.environ["AWS_SESSION_TOKEN"] = "testing"
|
40
|
+
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
|
41
|
+
yield
|
42
|
+
|
43
|
+
@pytest.fixture(autouse=True)
|
44
|
+
def setup_s3_resource(self):
|
45
|
+
with mock_s3():
|
46
|
+
yield boto3.resource("s3")
|
47
|
+
|
48
|
+
@pytest.fixture(autouse=True)
|
49
|
+
def setup_test_s3_bucket(self, setup_s3_resource: ServiceResource):
|
50
|
+
setup_s3_resource.create_bucket(
|
51
|
+
ACL="authenticated-read",
|
52
|
+
Bucket=self.TEST_S3_BUCKET_NAME,
|
53
|
+
)
|
54
|
+
yield
|
55
|
+
|
56
|
+
def test_sanity(self):
|
57
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
58
|
+
body = "test-body"
|
59
|
+
uploaded_file = s3u.upload(uri, body)
|
60
|
+
assert uploaded_file is not None
|
61
|
+
assert uploaded_file["ResponseMetadata"]["HTTPStatusCode"] == 200
|
62
|
+
downloaded_file = s3u.download(uri)
|
63
|
+
downloaded_body = downloaded_file["Body"].read().decode("utf-8")
|
64
|
+
assert downloaded_file["ResponseMetadata"]["HTTPStatusCode"] == 200
|
65
|
+
assert downloaded_body == body
|
66
|
+
|
67
|
+
@patch("deltacat.aws.s3u.RETRY_STOP_AFTER_DELAY", 1)
|
68
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
69
|
+
def test_upload_throttled(self, mock_s3_client_cache):
|
70
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
71
|
+
body = "test-body"
|
72
|
+
throttling_err = ClientError({"Error": {"Code": "Throttling"}}, "put_object")
|
73
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
74
|
+
mock_s3.put_object.side_effect = throttling_err
|
75
|
+
with pytest.raises(RetryError):
|
76
|
+
s3u.upload(uri, body)
|
77
|
+
|
78
|
+
slowdown_err = ClientError({"Error": {"Code": "SlowDown"}}, "put_object")
|
79
|
+
mock_s3.put_object.side_effect = slowdown_err
|
80
|
+
with pytest.raises(RetryError):
|
81
|
+
s3u.upload(uri, body)
|
82
|
+
|
83
|
+
no_credentials_err = NoCredentialsError()
|
84
|
+
mock_s3.put_object.side_effect = no_credentials_err
|
85
|
+
with pytest.raises(RetryError):
|
86
|
+
s3u.upload(uri, body)
|
87
|
+
|
88
|
+
assert mock_s3.put_object.call_count > 3
|
89
|
+
|
90
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
91
|
+
def test_upload_unexpected_error_code(self, mock_s3_client_cache):
|
92
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
93
|
+
body = "test-body"
|
94
|
+
err = ClientError({"Error": {"Code": "UnexpectedError"}}, "put_object")
|
95
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
96
|
+
mock_s3.put_object.side_effect = err
|
97
|
+
file = None
|
98
|
+
with pytest.raises(NonRetryableError):
|
99
|
+
s3u.upload(uri, body)
|
100
|
+
assert file is None
|
101
|
+
assert mock_s3.put_object.call_count == 1
|
102
|
+
|
103
|
+
@patch("deltacat.aws.s3u.RETRY_STOP_AFTER_DELAY", 1)
|
104
|
+
@patch("deltacat.aws.s3u.s3_client_cache")
|
105
|
+
def test_download_throttled(self, mock_s3_client_cache):
|
106
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/{self.TEST_S3_KEY}"
|
107
|
+
no_credentials_err = NoCredentialsError()
|
108
|
+
mock_s3_client_cache.return_value = mock_s3 = mock.MagicMock()
|
109
|
+
mock_s3.get_object.side_effect = no_credentials_err
|
110
|
+
file = None
|
111
|
+
with pytest.raises(RetryError):
|
112
|
+
file = s3u.download(uri)
|
113
|
+
assert file is None
|
114
|
+
assert mock_s3.get_object.call_count > 1
|
115
|
+
|
116
|
+
def test_download_not_exists(self):
|
117
|
+
uri = f"s3://{self.TEST_S3_BUCKET_NAME}/key-not-exists"
|
118
|
+
file = None
|
119
|
+
with pytest.raises(NonRetryableError):
|
120
|
+
file = s3u.download(uri)
|
121
|
+
assert file is None
|
122
|
+
|
123
|
+
file = s3u.download(uri, fail_if_not_found=False)
|
124
|
+
assert file is None
|
@@ -107,6 +107,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
107
107
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
108
108
|
),
|
109
109
|
expected_terminal_exception=None,
|
110
|
+
expected_terminal_exception_message=None,
|
110
111
|
do_create_placement_group=False,
|
111
112
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
112
113
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -186,6 +187,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
186
187
|
names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
|
187
188
|
),
|
188
189
|
expected_terminal_exception=None,
|
190
|
+
expected_terminal_exception_message=None,
|
189
191
|
do_create_placement_group=False,
|
190
192
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
191
193
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -234,6 +236,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
234
236
|
names=["pk_col_1", "col_1"],
|
235
237
|
),
|
236
238
|
expected_terminal_exception=None,
|
239
|
+
expected_terminal_exception_message=None,
|
237
240
|
do_create_placement_group=False,
|
238
241
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
239
242
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -282,6 +285,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
282
285
|
names=["pk_col_1", "col_1"],
|
283
286
|
),
|
284
287
|
expected_terminal_exception=None,
|
288
|
+
expected_terminal_exception_message=None,
|
285
289
|
do_create_placement_group=False,
|
286
290
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
287
291
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -336,6 +340,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
336
340
|
names=["pk_col_1", "sk_col_1", "col_1"],
|
337
341
|
),
|
338
342
|
expected_terminal_exception=None,
|
343
|
+
expected_terminal_exception_message=None,
|
339
344
|
do_create_placement_group=False,
|
340
345
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
341
346
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -395,6 +400,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
395
400
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
396
401
|
),
|
397
402
|
expected_terminal_exception=None,
|
403
|
+
expected_terminal_exception_message=None,
|
398
404
|
do_create_placement_group=False,
|
399
405
|
records_per_compacted_file=10,
|
400
406
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT + 10,
|
@@ -445,6 +451,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
445
451
|
names=["sk_col_1", "col_1"],
|
446
452
|
),
|
447
453
|
expected_terminal_exception=None,
|
454
|
+
expected_terminal_exception_message=None,
|
448
455
|
do_create_placement_group=False,
|
449
456
|
records_per_compacted_file=10,
|
450
457
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -500,6 +507,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
500
507
|
names=["pk_col_1", "col_1"],
|
501
508
|
),
|
502
509
|
expected_terminal_exception=None,
|
510
|
+
expected_terminal_exception_message=None,
|
503
511
|
do_create_placement_group=False,
|
504
512
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
505
513
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -559,6 +567,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
559
567
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
560
568
|
),
|
561
569
|
expected_terminal_exception=None,
|
570
|
+
expected_terminal_exception_message=None,
|
562
571
|
do_create_placement_group=False,
|
563
572
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
564
573
|
hash_bucket_count=1,
|
@@ -613,6 +622,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
613
622
|
names=["pk_col_1", "sk_col_1", "col_1"],
|
614
623
|
),
|
615
624
|
expected_terminal_exception=None,
|
625
|
+
expected_terminal_exception_message=None,
|
616
626
|
do_create_placement_group=False,
|
617
627
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
618
628
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -658,6 +668,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
658
668
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
659
669
|
),
|
660
670
|
expected_terminal_exception=None,
|
671
|
+
expected_terminal_exception_message=None,
|
661
672
|
do_create_placement_group=False,
|
662
673
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
663
674
|
hash_bucket_count=3,
|
@@ -717,6 +728,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
717
728
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
718
729
|
),
|
719
730
|
expected_terminal_exception=None,
|
731
|
+
expected_terminal_exception_message=None,
|
720
732
|
do_create_placement_group=False,
|
721
733
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
722
734
|
hash_bucket_count=1,
|
@@ -762,6 +774,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
762
774
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
763
775
|
),
|
764
776
|
expected_terminal_exception=None,
|
777
|
+
expected_terminal_exception_message=None,
|
765
778
|
do_create_placement_group=False,
|
766
779
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
767
780
|
hash_bucket_count=1,
|
@@ -861,6 +874,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
861
874
|
names=["pk_col_1", "col_1"],
|
862
875
|
),
|
863
876
|
expected_terminal_exception=None,
|
877
|
+
expected_terminal_exception_message=None,
|
864
878
|
do_create_placement_group=False,
|
865
879
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
866
880
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -911,6 +925,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
911
925
|
names=["pk_col_1", "pk_col_2", "col_1"],
|
912
926
|
),
|
913
927
|
expected_terminal_exception=None,
|
928
|
+
expected_terminal_exception_message=None,
|
914
929
|
do_create_placement_group=False,
|
915
930
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
916
931
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -967,6 +982,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
967
982
|
),
|
968
983
|
),
|
969
984
|
expected_terminal_exception=None,
|
985
|
+
expected_terminal_exception_message=None,
|
970
986
|
do_create_placement_group=False,
|
971
987
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
972
988
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1019,6 +1035,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1019
1035
|
),
|
1020
1036
|
),
|
1021
1037
|
expected_terminal_exception=None,
|
1038
|
+
expected_terminal_exception_message=None,
|
1022
1039
|
do_create_placement_group=True,
|
1023
1040
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1024
1041
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1099,6 +1116,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1099
1116
|
names=["pk_col_1", "col_1"],
|
1100
1117
|
),
|
1101
1118
|
expected_terminal_exception=None,
|
1119
|
+
expected_terminal_exception_message=None,
|
1102
1120
|
do_create_placement_group=True,
|
1103
1121
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1104
1122
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1168,6 +1186,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1168
1186
|
names=["pk_col_1", "col_1"],
|
1169
1187
|
),
|
1170
1188
|
expected_terminal_exception=None,
|
1189
|
+
expected_terminal_exception_message=None,
|
1171
1190
|
do_create_placement_group=True,
|
1172
1191
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1173
1192
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1231,6 +1250,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1231
1250
|
),
|
1232
1251
|
),
|
1233
1252
|
expected_terminal_exception=None,
|
1253
|
+
expected_terminal_exception_message=None,
|
1234
1254
|
do_create_placement_group=True,
|
1235
1255
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1236
1256
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1321,6 +1341,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1321
1341
|
names=["pk_col_1", "col_1"],
|
1322
1342
|
),
|
1323
1343
|
expected_terminal_exception=None,
|
1344
|
+
expected_terminal_exception_message=None,
|
1324
1345
|
do_create_placement_group=True,
|
1325
1346
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1326
1347
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1384,6 +1405,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1384
1405
|
),
|
1385
1406
|
),
|
1386
1407
|
expected_terminal_exception=None,
|
1408
|
+
expected_terminal_exception_message=None,
|
1387
1409
|
do_create_placement_group=True,
|
1388
1410
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1389
1411
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1453,6 +1475,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1453
1475
|
names=["pk_col_1", "col_1"],
|
1454
1476
|
),
|
1455
1477
|
expected_terminal_exception=None,
|
1478
|
+
expected_terminal_exception_message=None,
|
1456
1479
|
do_create_placement_group=True,
|
1457
1480
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1458
1481
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1516,6 +1539,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1516
1539
|
),
|
1517
1540
|
),
|
1518
1541
|
expected_terminal_exception=AssertionError,
|
1542
|
+
expected_terminal_exception_message="Delete type deltas are required to have delete parameters defined",
|
1519
1543
|
do_create_placement_group=True,
|
1520
1544
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1521
1545
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1580,6 +1604,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1580
1604
|
names=["pk_col_1", "col_1"],
|
1581
1605
|
),
|
1582
1606
|
expected_terminal_exception=None,
|
1607
|
+
expected_terminal_exception_message=None,
|
1583
1608
|
do_create_placement_group=True,
|
1584
1609
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1585
1610
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1638,6 +1663,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1638
1663
|
names=["pk_col_1", "col_1"],
|
1639
1664
|
),
|
1640
1665
|
expected_terminal_exception=None,
|
1666
|
+
expected_terminal_exception_message=None,
|
1641
1667
|
do_create_placement_group=True,
|
1642
1668
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1643
1669
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1706,6 +1732,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1706
1732
|
names=["pk_col_1", "col_1"],
|
1707
1733
|
),
|
1708
1734
|
expected_terminal_exception=None,
|
1735
|
+
expected_terminal_exception_message=None,
|
1709
1736
|
do_create_placement_group=True,
|
1710
1737
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1711
1738
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1795,6 +1822,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1795
1822
|
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
1796
1823
|
),
|
1797
1824
|
expected_terminal_exception=None,
|
1825
|
+
expected_terminal_exception_message=None,
|
1798
1826
|
do_create_placement_group=False,
|
1799
1827
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1800
1828
|
hash_bucket_count=1,
|
@@ -1865,6 +1893,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1865
1893
|
names=["sk_col_1", "col_1"],
|
1866
1894
|
),
|
1867
1895
|
expected_terminal_exception=None,
|
1896
|
+
expected_terminal_exception_message=None,
|
1868
1897
|
do_create_placement_group=False,
|
1869
1898
|
records_per_compacted_file=10,
|
1870
1899
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -1913,6 +1942,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1913
1942
|
names=["pk_col_1", "col_1"],
|
1914
1943
|
),
|
1915
1944
|
expected_terminal_exception=None,
|
1945
|
+
expected_terminal_exception_message=None,
|
1916
1946
|
do_create_placement_group=False,
|
1917
1947
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1918
1948
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -54,6 +54,7 @@ class BaseCompactorTestCase:
|
|
54
54
|
input_deltas_delta_type: DeltaType - enumerated argument required for delta creation during compact_partition test setup. Available values are (DeltaType.APPEND, DeltaType.UPSERT, DeltaType.DELETE). DeltaType.APPEND is not supported by compactor v1 or v2
|
55
55
|
expected_terminal_compact_partition_result: pa.Table - expected PyArrow table after compaction (i.e,. the state of the table after applying all row UPDATES/DELETES/INSERTS)
|
56
56
|
expected_terminal_exception: BaseException - expected exception during compaction
|
57
|
+
expected_terminal_exception_message: Optional[str] - expected exception message if present.
|
57
58
|
do_create_placement_group: bool - toggles whether to create a placement group (https://docs.ray.io/en/latest/ray-core/scheduling/placement-group.html) or not
|
58
59
|
records_per_compacted_file: int - argument for the records_per_compacted_file parameter in compact_partition
|
59
60
|
hash_bucket_count_param: int - argument for the hash_bucket_count parameter in compact_partition
|
@@ -70,6 +71,7 @@ class BaseCompactorTestCase:
|
|
70
71
|
input_deltas_delta_type: DeltaType
|
71
72
|
expected_terminal_compact_partition_result: pa.Table
|
72
73
|
expected_terminal_exception: BaseException
|
74
|
+
expected_terminal_exception_message: str
|
73
75
|
do_create_placement_group: bool
|
74
76
|
records_per_compacted_file: int
|
75
77
|
hash_bucket_count: int
|
@@ -84,7 +86,12 @@ class BaseCompactorTestCase:
|
|
84
86
|
|
85
87
|
@dataclass(frozen=True)
|
86
88
|
class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
|
87
|
-
|
89
|
+
"""
|
90
|
+
Args:
|
91
|
+
is_inplace: bool - argument to indicate whether to try compacting an in-place compacted table (the source table is the destination table). Also needed to control whether the destination table is created
|
92
|
+
"""
|
93
|
+
|
94
|
+
is_inplace: bool
|
88
95
|
|
89
96
|
|
90
97
|
@dataclass(frozen=True)
|
@@ -134,11 +141,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
134
141
|
names=["pk_col_1"],
|
135
142
|
),
|
136
143
|
expected_terminal_exception=None,
|
144
|
+
expected_terminal_exception_message=None,
|
137
145
|
do_create_placement_group=False,
|
138
146
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
139
147
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
140
148
|
read_kwargs_provider=None,
|
141
149
|
drop_duplicates=True,
|
150
|
+
is_inplace=False,
|
142
151
|
skip_enabled_compact_partition_drivers=None,
|
143
152
|
),
|
144
153
|
"2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
|
@@ -159,11 +168,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
159
168
|
names=["pk_col_1", "sk_col_1"],
|
160
169
|
),
|
161
170
|
expected_terminal_exception=None,
|
171
|
+
expected_terminal_exception_message=None,
|
162
172
|
do_create_placement_group=False,
|
163
173
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
164
174
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
165
175
|
read_kwargs_provider=None,
|
166
176
|
drop_duplicates=True,
|
177
|
+
is_inplace=False,
|
167
178
|
skip_enabled_compact_partition_drivers=None,
|
168
179
|
),
|
169
180
|
"3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
|
@@ -193,11 +204,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
193
204
|
names=["pk_col_1", "sk_col_1", "sk_col_2"],
|
194
205
|
),
|
195
206
|
expected_terminal_exception=None,
|
207
|
+
expected_terminal_exception_message=None,
|
196
208
|
do_create_placement_group=False,
|
197
209
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
198
210
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
199
211
|
read_kwargs_provider=None,
|
200
212
|
drop_duplicates=True,
|
213
|
+
is_inplace=False,
|
201
214
|
skip_enabled_compact_partition_drivers=None,
|
202
215
|
),
|
203
216
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
@@ -226,11 +239,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
226
239
|
names=["pk_col_1", "sk_col_1", "sk_col_2"],
|
227
240
|
),
|
228
241
|
expected_terminal_exception=None,
|
242
|
+
expected_terminal_exception_message=None,
|
229
243
|
do_create_placement_group=False,
|
230
244
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
231
245
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
232
246
|
read_kwargs_provider=None,
|
233
247
|
drop_duplicates=True,
|
248
|
+
is_inplace=False,
|
234
249
|
skip_enabled_compact_partition_drivers=None,
|
235
250
|
),
|
236
251
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -254,11 +269,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
254
269
|
names=["pk_col_1", "sk_col_1"],
|
255
270
|
),
|
256
271
|
expected_terminal_exception=None,
|
272
|
+
expected_terminal_exception_message=None,
|
257
273
|
do_create_placement_group=False,
|
258
274
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
259
275
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
260
276
|
read_kwargs_provider=None,
|
261
277
|
drop_duplicates=True,
|
278
|
+
is_inplace=False,
|
262
279
|
skip_enabled_compact_partition_drivers=None,
|
263
280
|
),
|
264
281
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -282,11 +299,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
282
299
|
names=["pk_col_1", "sk_col_1"],
|
283
300
|
),
|
284
301
|
expected_terminal_exception=None,
|
302
|
+
expected_terminal_exception_message=None,
|
285
303
|
do_create_placement_group=False,
|
286
304
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
287
305
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
288
306
|
read_kwargs_provider=None,
|
289
307
|
drop_duplicates=True,
|
308
|
+
is_inplace=False,
|
290
309
|
skip_enabled_compact_partition_drivers=None,
|
291
310
|
),
|
292
311
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
@@ -310,11 +329,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
310
329
|
names=["pk_col_1", "sk_col_1"],
|
311
330
|
),
|
312
331
|
expected_terminal_exception=None,
|
332
|
+
expected_terminal_exception_message=None,
|
313
333
|
do_create_placement_group=False,
|
314
334
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
315
335
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
316
336
|
read_kwargs_provider=None,
|
317
337
|
drop_duplicates=True,
|
338
|
+
is_inplace=False,
|
318
339
|
skip_enabled_compact_partition_drivers=None,
|
319
340
|
),
|
320
341
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
@@ -340,11 +361,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
340
361
|
names=["pk_col_1", "pk_col_2", "sk_col_1"],
|
341
362
|
),
|
342
363
|
expected_terminal_exception=None,
|
364
|
+
expected_terminal_exception_message=None,
|
343
365
|
do_create_placement_group=False,
|
344
366
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
345
367
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
346
368
|
read_kwargs_provider=None,
|
347
369
|
drop_duplicates=True,
|
370
|
+
is_inplace=False,
|
348
371
|
skip_enabled_compact_partition_drivers=None,
|
349
372
|
),
|
350
373
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
@@ -368,11 +391,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
368
391
|
names=["pk_col_1", "sk_col_1"],
|
369
392
|
),
|
370
393
|
expected_terminal_exception=None,
|
394
|
+
expected_terminal_exception_message=None,
|
371
395
|
do_create_placement_group=False,
|
372
396
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
373
397
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
374
398
|
read_kwargs_provider=None,
|
375
399
|
drop_duplicates=True,
|
400
|
+
is_inplace=False,
|
376
401
|
skip_enabled_compact_partition_drivers=None,
|
377
402
|
),
|
378
403
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
@@ -396,11 +421,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
396
421
|
names=["pk_col_1", "sk_col_1"],
|
397
422
|
),
|
398
423
|
expected_terminal_exception=None,
|
424
|
+
expected_terminal_exception_message=None,
|
399
425
|
do_create_placement_group=False,
|
400
426
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
401
427
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
402
428
|
read_kwargs_provider=None,
|
403
429
|
drop_duplicates=True,
|
430
|
+
is_inplace=False,
|
404
431
|
skip_enabled_compact_partition_drivers=None,
|
405
432
|
),
|
406
433
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
@@ -424,11 +451,13 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
424
451
|
names=["pk_col_1", "sk_col_1"],
|
425
452
|
),
|
426
453
|
expected_terminal_exception=None,
|
454
|
+
expected_terminal_exception_message=None,
|
427
455
|
do_create_placement_group=False,
|
428
456
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
429
457
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
430
458
|
read_kwargs_provider=None,
|
431
459
|
drop_duplicates=True,
|
460
|
+
is_inplace=False,
|
432
461
|
skip_enabled_compact_partition_drivers=None,
|
433
462
|
),
|
434
463
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
@@ -452,13 +481,45 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
452
481
|
names=["pk_col_1", "sk_col_1"],
|
453
482
|
),
|
454
483
|
expected_terminal_exception=None,
|
484
|
+
expected_terminal_exception_message=None,
|
455
485
|
do_create_placement_group=False,
|
456
486
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
457
487
|
hash_bucket_count=1,
|
458
488
|
read_kwargs_provider=None,
|
459
489
|
drop_duplicates=True,
|
490
|
+
is_inplace=False,
|
460
491
|
skip_enabled_compact_partition_drivers=None,
|
461
492
|
),
|
493
|
+
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
494
|
+
primary_keys={"pk_col_1"},
|
495
|
+
sort_keys=[SortKey.of(key_name="sk_col_1")],
|
496
|
+
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
497
|
+
partition_values=["1"],
|
498
|
+
input_deltas=pa.Table.from_arrays(
|
499
|
+
[
|
500
|
+
pa.array([str(i) for i in range(10)]),
|
501
|
+
pa.array([i for i in range(10)]),
|
502
|
+
],
|
503
|
+
names=["pk_col_1", "sk_col_1"],
|
504
|
+
),
|
505
|
+
input_deltas_delta_type=DeltaType.UPSERT,
|
506
|
+
expected_terminal_compact_partition_result=pa.Table.from_arrays(
|
507
|
+
[
|
508
|
+
pa.array([str(i) for i in range(10)]),
|
509
|
+
pa.array([i for i in range(10)]),
|
510
|
+
],
|
511
|
+
names=["pk_col_1", "sk_col_1"],
|
512
|
+
),
|
513
|
+
expected_terminal_exception=None,
|
514
|
+
expected_terminal_exception_message=None,
|
515
|
+
do_create_placement_group=False,
|
516
|
+
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
517
|
+
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
518
|
+
read_kwargs_provider=None,
|
519
|
+
drop_duplicates=True,
|
520
|
+
is_inplace=True,
|
521
|
+
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
522
|
+
),
|
462
523
|
}
|
463
524
|
|
464
525
|
INCREMENTAL_TEST_CASES = with_compactor_version_func_test_param(INCREMENTAL_TEST_CASES)
|
@@ -24,6 +24,9 @@ from deltacat.tests.compute.test_util_constant import (
|
|
24
24
|
DEFAULT_NUM_WORKERS,
|
25
25
|
DEFAULT_WORKER_INSTANCE_CPUS,
|
26
26
|
)
|
27
|
+
from deltacat.compute.compactor import (
|
28
|
+
RoundCompletionInfo,
|
29
|
+
)
|
27
30
|
|
28
31
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
29
32
|
"db_file_path",
|
@@ -101,12 +104,14 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
101
104
|
"input_deltas_delta_type",
|
102
105
|
"expected_terminal_compact_partition_result",
|
103
106
|
"expected_terminal_exception",
|
107
|
+
"expected_terminal_exception_message",
|
104
108
|
"create_placement_group_param",
|
105
109
|
"records_per_compacted_file_param",
|
106
110
|
"hash_bucket_count_param",
|
107
111
|
"read_kwargs_provider_param",
|
108
112
|
"drop_duplicates_param",
|
109
113
|
"skip_enabled_compact_partition_drivers",
|
114
|
+
"is_inplace",
|
110
115
|
"compact_partition_func",
|
111
116
|
],
|
112
117
|
[
|
@@ -120,12 +125,14 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
120
125
|
input_deltas_delta_type,
|
121
126
|
expected_terminal_compact_partition_result,
|
122
127
|
expected_terminal_exception,
|
128
|
+
expected_terminal_exception_message,
|
123
129
|
create_placement_group_param,
|
124
130
|
records_per_compacted_file_param,
|
125
131
|
hash_bucket_count_param,
|
126
132
|
drop_duplicates_param,
|
127
133
|
read_kwargs_provider,
|
128
134
|
skip_enabled_compact_partition_drivers,
|
135
|
+
is_inplace,
|
129
136
|
compact_partition_func,
|
130
137
|
)
|
131
138
|
for test_name, (
|
@@ -137,17 +144,18 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
137
144
|
input_deltas_delta_type,
|
138
145
|
expected_terminal_compact_partition_result,
|
139
146
|
expected_terminal_exception,
|
147
|
+
expected_terminal_exception_message,
|
140
148
|
create_placement_group_param,
|
141
149
|
records_per_compacted_file_param,
|
142
150
|
hash_bucket_count_param,
|
143
151
|
drop_duplicates_param,
|
144
152
|
read_kwargs_provider,
|
145
153
|
skip_enabled_compact_partition_drivers,
|
154
|
+
is_inplace,
|
146
155
|
compact_partition_func,
|
147
156
|
) in INCREMENTAL_TEST_CASES.items()
|
148
157
|
],
|
149
158
|
ids=[test_name for test_name in INCREMENTAL_TEST_CASES],
|
150
|
-
indirect=[],
|
151
159
|
)
|
152
160
|
def test_compact_partition_incremental(
|
153
161
|
setup_s3_resource: ServiceResource,
|
@@ -161,12 +169,14 @@ def test_compact_partition_incremental(
|
|
161
169
|
input_deltas_delta_type: str,
|
162
170
|
expected_terminal_compact_partition_result: pa.Table,
|
163
171
|
expected_terminal_exception: BaseException,
|
172
|
+
expected_terminal_exception_message: Optional[str],
|
164
173
|
create_placement_group_param: bool,
|
165
174
|
records_per_compacted_file_param: int,
|
166
175
|
hash_bucket_count_param: int,
|
167
176
|
drop_duplicates_param: bool,
|
168
177
|
read_kwargs_provider_param: Any,
|
169
178
|
skip_enabled_compact_partition_drivers,
|
179
|
+
is_inplace: bool,
|
170
180
|
compact_partition_func: Callable,
|
171
181
|
benchmark: BenchmarkFixture,
|
172
182
|
):
|
@@ -174,6 +184,7 @@ def test_compact_partition_incremental(
|
|
174
184
|
from deltacat.types.media import ContentType
|
175
185
|
from deltacat.storage import (
|
176
186
|
DeltaLocator,
|
187
|
+
Partition,
|
177
188
|
PartitionLocator,
|
178
189
|
)
|
179
190
|
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
@@ -186,7 +197,7 @@ def test_compact_partition_incremental(
|
|
186
197
|
PlacementGroupManager,
|
187
198
|
)
|
188
199
|
|
189
|
-
ds_mock_kwargs = offer_local_deltacat_storage_kwargs
|
200
|
+
ds_mock_kwargs: Dict[str, Any] = offer_local_deltacat_storage_kwargs
|
190
201
|
|
191
202
|
# setup
|
192
203
|
partition_keys = partition_keys_param
|
@@ -202,20 +213,21 @@ def test_compact_partition_incremental(
|
|
202
213
|
input_deltas_delta_type,
|
203
214
|
partition_values_param,
|
204
215
|
ds_mock_kwargs,
|
216
|
+
is_inplace,
|
205
217
|
)
|
206
|
-
source_partition = ds.get_partition(
|
218
|
+
source_partition: Partition = ds.get_partition(
|
207
219
|
source_table_stream.locator,
|
208
220
|
partition_values_param,
|
209
221
|
**ds_mock_kwargs,
|
210
222
|
)
|
211
|
-
destination_partition_locator = PartitionLocator.of(
|
223
|
+
destination_partition_locator: PartitionLocator = PartitionLocator.of(
|
212
224
|
destination_table_stream.locator,
|
213
225
|
partition_values_param,
|
214
226
|
None,
|
215
227
|
)
|
216
228
|
num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
|
217
|
-
total_cpus = num_workers * worker_instance_cpu
|
218
|
-
pgm = None
|
229
|
+
total_cpus: int = num_workers * worker_instance_cpu
|
230
|
+
pgm: Optional[PlacementGroupManager] = None
|
219
231
|
if create_placement_group_param:
|
220
232
|
pgm = PlacementGroupManager(
|
221
233
|
1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
|
@@ -260,14 +272,16 @@ def test_compact_partition_incremental(
|
|
260
272
|
compact_partition_func, setup=_incremental_compaction_setup
|
261
273
|
)
|
262
274
|
# validate
|
263
|
-
round_completion_info = get_rcf(
|
275
|
+
round_completion_info: RoundCompletionInfo = get_rcf(
|
276
|
+
setup_s3_resource, rcf_file_s3_uri
|
277
|
+
)
|
264
278
|
compacted_delta_locator: DeltaLocator = (
|
265
279
|
round_completion_info.compacted_delta_locator
|
266
280
|
)
|
267
281
|
audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
|
268
282
|
"s3://", ""
|
269
283
|
).split("/", 1)
|
270
|
-
compaction_audit_obj:
|
284
|
+
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
271
285
|
setup_s3_resource, audit_bucket, audit_key
|
272
286
|
)
|
273
287
|
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
@@ -281,7 +295,7 @@ def test_compact_partition_incremental(
|
|
281
295
|
sorting_cols: List[Any] = [(val, "ascending") for val in primary_keys]
|
282
296
|
# the compacted table may contain multiple files and chunks
|
283
297
|
# and order of records may be incorrect due to multiple files.
|
284
|
-
expected_terminal_compact_partition_result = (
|
298
|
+
expected_terminal_compact_partition_result: pa.Table = (
|
285
299
|
expected_terminal_compact_partition_result.combine_chunks().sort_by(
|
286
300
|
sorting_cols
|
287
301
|
)
|
@@ -297,4 +311,15 @@ def test_compact_partition_incremental(
|
|
297
311
|
assert actual_compacted_table.equals(
|
298
312
|
expected_terminal_compact_partition_result
|
299
313
|
), f"{actual_compacted_table} does not match {expected_terminal_compact_partition_result}"
|
314
|
+
|
315
|
+
if is_inplace:
|
316
|
+
assert (
|
317
|
+
source_partition.locator.partition_values
|
318
|
+
== destination_partition_locator.partition_values
|
319
|
+
and source_partition.locator.stream_id
|
320
|
+
== destination_partition_locator.stream_id
|
321
|
+
), "The source partition should match the destination partition"
|
322
|
+
assert (
|
323
|
+
compacted_delta_locator.stream_id == source_partition.locator.stream_id
|
324
|
+
), "The compacted delta should be in the same stream as the source"
|
300
325
|
return
|
@@ -115,6 +115,7 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
115
115
|
"input_deltas_delta_type",
|
116
116
|
"expected_terminal_compact_partition_result",
|
117
117
|
"expected_terminal_exception",
|
118
|
+
"expected_terminal_exception_message",
|
118
119
|
"create_placement_group_param",
|
119
120
|
"records_per_compacted_file_param",
|
120
121
|
"hash_bucket_count_param",
|
@@ -136,6 +137,7 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
136
137
|
input_deltas_delta_type,
|
137
138
|
expected_terminal_compact_partition_result,
|
138
139
|
expected_terminal_exception,
|
140
|
+
expected_terminal_exception_message,
|
139
141
|
create_placement_group_param,
|
140
142
|
records_per_compacted_file_param,
|
141
143
|
hash_bucket_count_param,
|
@@ -155,6 +157,7 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
155
157
|
input_deltas_delta_type,
|
156
158
|
expected_terminal_compact_partition_result,
|
157
159
|
expected_terminal_exception,
|
160
|
+
expected_terminal_exception_message,
|
158
161
|
create_placement_group_param,
|
159
162
|
records_per_compacted_file_param,
|
160
163
|
hash_bucket_count_param,
|
@@ -180,6 +183,7 @@ def test_compact_partition_rebase_then_incremental(
|
|
180
183
|
input_deltas_delta_type: str,
|
181
184
|
expected_terminal_compact_partition_result: pa.Table,
|
182
185
|
expected_terminal_exception: BaseException,
|
186
|
+
expected_terminal_exception_message: Optional[str],
|
183
187
|
create_placement_group_param: bool,
|
184
188
|
records_per_compacted_file_param: int,
|
185
189
|
hash_bucket_count_param: int,
|
@@ -337,8 +341,9 @@ def test_compact_partition_rebase_then_incremental(
|
|
337
341
|
}
|
338
342
|
)
|
339
343
|
if expected_terminal_exception:
|
340
|
-
with pytest.raises(expected_terminal_exception):
|
344
|
+
with pytest.raises(expected_terminal_exception) as exc_info:
|
341
345
|
compact_partition_func(compact_partition_params)
|
346
|
+
assert expected_terminal_exception_message in str(exc_info.value)
|
342
347
|
return
|
343
348
|
rcf_file_s3_uri = compact_partition_func(compact_partition_params)
|
344
349
|
round_completion_info = get_rcf(setup_s3_resource, rcf_file_s3_uri)
|
@@ -20,6 +20,9 @@ from deltacat.tests.compute.test_util_constant import (
|
|
20
20
|
REBASING_TABLE_NAME,
|
21
21
|
REBASING_TABLE_VERSION,
|
22
22
|
)
|
23
|
+
from deltacat.compute.compactor import (
|
24
|
+
RoundCompletionInfo,
|
25
|
+
)
|
23
26
|
|
24
27
|
|
25
28
|
class PartitionKeyType(str, Enum):
|
@@ -134,11 +137,8 @@ def create_rebase_table(
|
|
134
137
|
)
|
135
138
|
|
136
139
|
|
137
|
-
def get_rcf(s3_resource, rcf_file_s3_uri: str):
|
140
|
+
def get_rcf(s3_resource, rcf_file_s3_uri: str) -> RoundCompletionInfo:
|
138
141
|
from deltacat.tests.test_utils.utils import read_s3_contents
|
139
|
-
from deltacat.compute.compactor import (
|
140
|
-
RoundCompletionInfo,
|
141
|
-
)
|
142
142
|
|
143
143
|
_, rcf_object_key = rcf_file_s3_uri.rsplit("/", 1)
|
144
144
|
rcf_file_output: Dict[str, Any] = read_s3_contents(
|
@@ -84,6 +84,7 @@ def create_src_w_deltas_destination_plus_destination(
|
|
84
84
|
input_delta_type: DeltaType,
|
85
85
|
partition_values: Optional[List[Any]],
|
86
86
|
ds_mock_kwargs: Optional[Dict[str, Any]],
|
87
|
+
simulate_is_inplace: bool = False,
|
87
88
|
) -> Tuple[Stream, Stream, Optional[Stream]]:
|
88
89
|
import deltacat.tests.local_deltacat_storage as ds
|
89
90
|
|
@@ -113,13 +114,23 @@ def create_src_w_deltas_destination_plus_destination(
|
|
113
114
|
table_version=source_table_version,
|
114
115
|
**ds_mock_kwargs,
|
115
116
|
)
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
117
|
+
destination_table_namespace: Optional[str] = None
|
118
|
+
destination_table_name: Optional[str] = None
|
119
|
+
destination_table_version: Optional[str] = None
|
120
|
+
if not simulate_is_inplace:
|
121
|
+
(
|
122
|
+
destination_table_namespace,
|
123
|
+
destination_table_name,
|
124
|
+
destination_table_version,
|
125
|
+
) = create_destination_table(
|
126
|
+
primary_keys, sort_keys, partition_keys, ds_mock_kwargs
|
127
|
+
)
|
128
|
+
else:
|
129
|
+
# not creating a table as in-place
|
130
|
+
destination_table_namespace = source_namespace
|
131
|
+
destination_table_name = source_table_name
|
132
|
+
destination_table_version = source_table_version
|
133
|
+
|
123
134
|
destination_table_stream: Stream = ds.get_stream(
|
124
135
|
namespace=destination_table_namespace,
|
125
136
|
table_name=destination_table_name,
|
deltacat/utils/daft.py
CHANGED
@@ -163,5 +163,7 @@ def _get_s3_io_config(s3_client_kwargs) -> IOConfig:
|
|
163
163
|
retry_mode="adaptive",
|
164
164
|
num_tries=BOTO_MAX_RETRIES,
|
165
165
|
max_connections=DAFT_MAX_S3_CONNECTIONS_PER_FILE,
|
166
|
+
connect_timeout_ms=5_000, # Timeout to connect to server
|
167
|
+
read_timeout_ms=10_000, # Timeout for first byte from server
|
166
168
|
)
|
167
169
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: deltacat
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.6
|
4
4
|
Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
@@ -27,7 +27,7 @@ Requires-Dist: tenacity ==8.1.0
|
|
27
27
|
Requires-Dist: typing-extensions ==4.4.0
|
28
28
|
Requires-Dist: pymemcache ==4.0.0
|
29
29
|
Requires-Dist: redis ==4.6.0
|
30
|
-
Requires-Dist: getdaft ==0.2.
|
30
|
+
Requires-Dist: getdaft ==0.2.23
|
31
31
|
Requires-Dist: schedule ==1.2.0
|
32
32
|
|
33
33
|
# DeltaCAT
|
@@ -1,11 +1,11 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=01om7qgj3agAF60Q1qwZXAzsUtP7cabwc_1RXqRr0vw,1777
|
2
2
|
deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
|
3
3
|
deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
|
4
4
|
deltacat/logs.py,sha256=6g16VkEFidbaMjgenAjggE1r2l664drMVhreRs8B1IQ,8438
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
deltacat/aws/clients.py,sha256=VgddlV3AEjlBGIFmhhHxokYzwJ-lXnmHAeprVyADduI,6948
|
7
|
-
deltacat/aws/constants.py,sha256=
|
8
|
-
deltacat/aws/s3u.py,sha256=
|
7
|
+
deltacat/aws/constants.py,sha256=OnRbtfFdu4buJEsl39Kg5cH-7A-dEL_ESeBSAlR_1Cs,501
|
8
|
+
deltacat/aws/s3u.py,sha256=qZL5Omz1onW79vB_KrPHQ2Mox4sNPrLXkNxpFl9HFHM,26525
|
9
9
|
deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
|
10
10
|
deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
deltacat/aws/redshift/model/manifest.py,sha256=ThgpdwzaWz493Zz9e8HSWwuxEheA1nDuypM3pe4vozk,12987
|
@@ -50,7 +50,7 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=-j6ZzhJBDrJ6Vz6
|
|
50
50
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
51
51
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
52
52
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
53
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=ovOTJUOtq6tIc7krya9IPtonI-_nHJL9NFl5D_R9i4Q,26087
|
54
54
|
deltacat/compute/compactor_v2/constants.py,sha256=jGLEK5uS7AcnoVjPGUDIO4ljDbBYZlqzQleKJRKvnZM,2118
|
55
55
|
deltacat/compute/compactor_v2/deletes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
56
|
deltacat/compute/compactor_v2/deletes/delete_file_envelope.py,sha256=AeuH9JRMwp6mvQf6P2cqL92hUEtResQq6qUTS0kIKac,3111
|
@@ -74,7 +74,7 @@ deltacat/compute/compactor_v2/utils/delta.py,sha256=8hjkDeIIkSX-gAQ2utQSp2sZcO2t
|
|
74
74
|
deltacat/compute/compactor_v2/utils/io.py,sha256=autXlE3uHICdCCuJoS7mfdeJbRRiz2_xlz-3izlccB4,5264
|
75
75
|
deltacat/compute/compactor_v2/utils/merge.py,sha256=hK4Y7acrtgfvWWTz-fAGznEg6qn6dBYu8blQUQVHhs0,5244
|
76
76
|
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=MAscmL35WfwN7Is72aFlD_cGhxtZgjRwwR5kS9Yn2uU,11393
|
77
|
-
deltacat/compute/compactor_v2/utils/task_options.py,sha256=
|
77
|
+
deltacat/compute/compactor_v2/utils/task_options.py,sha256=n1zKOFmAg2cL7CDpT9y9h-J0aYzTMtOdUjkDm1svo9k,14160
|
78
78
|
deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
|
79
79
|
deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
|
80
80
|
deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -136,18 +136,18 @@ deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
136
136
|
deltacat/tests/test_logs.py,sha256=6BEMw8VApFg2msFwCAVosz8NWJYATtX5furHyz8UluM,3828
|
137
137
|
deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
138
138
|
deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
|
139
|
-
deltacat/tests/aws/test_s3u.py,sha256=
|
139
|
+
deltacat/tests/aws/test_s3u.py,sha256=aFvUa9f63hFU8T4r_cuKYxcFg6jVUoJWygiPwDUd09s,4654
|
140
140
|
deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
141
141
|
deltacat/tests/catalog/test_default_catalog_impl.py,sha256=9srCU5yQ159oZ9_PoJ_mWMzVUW5bKV0mnmPJc5zKCQQ,3125
|
142
142
|
deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
143
|
-
deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=
|
144
|
-
deltacat/tests/compute/compact_partition_test_cases.py,sha256=
|
145
|
-
deltacat/tests/compute/test_compact_partition_incremental.py,sha256=
|
143
|
+
deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=ctJTmL7JKtEc-2UX8dTVPRct0wCKelGf90Jwd0lgE14,73645
|
144
|
+
deltacat/tests/compute/compact_partition_test_cases.py,sha256=uoNRy5oCHXhKktdZABryJ3n_smbimcKINxxNtFfjWsE,22651
|
145
|
+
deltacat/tests/compute/test_compact_partition_incremental.py,sha256=NLhGPOa7Y-ymw3_SlFhqI16MppYLUKc7y5ST9QXUbR8,11301
|
146
146
|
deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
|
147
|
-
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=
|
148
|
-
deltacat/tests/compute/test_util_common.py,sha256=
|
147
|
+
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=nIO2r4tjO0kMhKKEWs2cGALP9-2NiluIGnFCOFmws90,14364
|
148
|
+
deltacat/tests/compute/test_util_common.py,sha256=jGc862Rv1gf51HN_Dl9v5gvhj4bnwLidurz9Z8wWJZ0,6066
|
149
149
|
deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
|
150
|
-
deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=
|
150
|
+
deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=4i4CrFTBK51uzCswUK7KZz7UjutD5r6ptBQJ0Kj3WvA,7613
|
151
151
|
deltacat/tests/compute/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
152
152
|
deltacat/tests/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
153
153
|
deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=0uRguPEKeLSYs746Jv8io-HZMWdyXNcOMBu8GO2mA0M,9305
|
@@ -190,7 +190,7 @@ deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
190
|
deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
|
191
191
|
deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
|
192
192
|
deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
|
193
|
-
deltacat/utils/daft.py,sha256=
|
193
|
+
deltacat/utils/daft.py,sha256=LGxxUtra7CqxcuZ9TuudrNvuyj_mygQfCpnYL9UIdP4,5650
|
194
194
|
deltacat/utils/metrics.py,sha256=HYKyZSrtVLu8gXezg_TMNUKJp4h1WWI0VEzn0Xlzf-I,10778
|
195
195
|
deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
|
196
196
|
deltacat/utils/pandas.py,sha256=GfwjYb8FUSEeoBdXZI1_NJkdjxPMbCCUhlyRfGbDkn8,9562
|
@@ -206,8 +206,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
206
206
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
207
207
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
208
208
|
deltacat/utils/ray_utils/runtime.py,sha256=5eaBWTDm0IXVoc5Y6aacoVB-f0Mnv-K2ewyTSjHKHwM,5009
|
209
|
-
deltacat-1.1.
|
210
|
-
deltacat-1.1.
|
211
|
-
deltacat-1.1.
|
212
|
-
deltacat-1.1.
|
213
|
-
deltacat-1.1.
|
209
|
+
deltacat-1.1.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
210
|
+
deltacat-1.1.6.dist-info/METADATA,sha256=C5eD7a_S7Zxm5W6A5dBUGPKKBnwttmcu2qHELs6YImw,1780
|
211
|
+
deltacat-1.1.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
212
|
+
deltacat-1.1.6.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
213
|
+
deltacat-1.1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|