deltacat 1.1.9__py3-none-any.whl → 1.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/redshift/model/manifest.py +16 -0
  3. deltacat/aws/s3u.py +19 -13
  4. deltacat/compute/compactor/compaction_session.py +5 -1
  5. deltacat/compute/compactor/repartition_session.py +1 -0
  6. deltacat/compute/compactor/utils/round_completion_file.py +39 -9
  7. deltacat/compute/compactor_v2/compaction_session.py +15 -11
  8. deltacat/compute/compactor_v2/constants.py +3 -0
  9. deltacat/compute/compactor_v2/model/{compaction_session.py → evaluate_compaction_result.py} +1 -2
  10. deltacat/compute/compactor_v2/utils/primary_key_index.py +1 -1
  11. deltacat/exceptions.py +5 -2
  12. deltacat/io/dataset.py +5 -17
  13. deltacat/storage/__init__.py +24 -0
  14. deltacat/storage/interface.py +42 -6
  15. deltacat/storage/model/delta.py +23 -3
  16. deltacat/storage/model/partition.py +6 -7
  17. deltacat/storage/model/partition_spec.py +71 -0
  18. deltacat/storage/model/stream.py +38 -1
  19. deltacat/storage/model/transform.py +127 -0
  20. deltacat/tests/aws/test_s3u.py +2 -0
  21. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +231 -0
  22. deltacat/tests/compute/compactor_v2/test_compaction_session.py +201 -36
  23. deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
  24. deltacat/tests/compute/test_util_common.py +19 -4
  25. deltacat/tests/local_deltacat_storage/__init__.py +83 -19
  26. deltacat/tests/test_utils/pyarrow.py +4 -1
  27. deltacat/tests/utils/ray_utils/test_dataset.py +66 -0
  28. deltacat/utils/numpy.py +3 -3
  29. deltacat/utils/pandas.py +3 -3
  30. deltacat/utils/pyarrow.py +3 -3
  31. deltacat/utils/ray_utils/dataset.py +7 -7
  32. {deltacat-1.1.9.dist-info → deltacat-1.1.11.dist-info}/METADATA +6 -5
  33. {deltacat-1.1.9.dist-info → deltacat-1.1.11.dist-info}/RECORD +36 -33
  34. deltacat/io/aws/redshift/redshift_datasource.py +0 -578
  35. {deltacat-1.1.9.dist-info → deltacat-1.1.11.dist-info}/LICENSE +0 -0
  36. {deltacat-1.1.9.dist-info → deltacat-1.1.11.dist-info}/WHEEL +0 -0
  37. {deltacat-1.1.9.dist-info → deltacat-1.1.11.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@ from deltacat.storage.model.stream import StreamLocator
12
12
  from deltacat.storage.model.table import TableLocator
13
13
  from deltacat.storage.model.table_version import TableVersionLocator
14
14
  from deltacat.storage.model.types import DeltaType
15
+ from deltacat.storage.model.partition_spec import DeltaPartitionSpec, PartitionValues
15
16
 
16
17
 
17
18
  class Delta(dict):
@@ -24,6 +25,7 @@ class Delta(dict):
24
25
  manifest: Optional[Manifest],
25
26
  previous_stream_position: Optional[int] = None,
26
27
  delete_parameters: Optional[DeleteParameters] = None,
28
+ partition_spec: Optional[DeltaPartitionSpec] = None,
27
29
  ) -> Delta:
28
30
  """
29
31
  Creates a Delta metadata model with the given Delta Locator, Delta Type,
@@ -38,6 +40,7 @@ class Delta(dict):
38
40
  delta.manifest = manifest
39
41
  delta.previous_stream_position = previous_stream_position
40
42
  delta.delete_parameters = delete_parameters
43
+ delta.partition_spec = partition_spec
41
44
  return delta
42
45
 
43
46
  @staticmethod
@@ -90,6 +93,12 @@ class Delta(dict):
90
93
  f"Deltas to merge must all share the same delta type "
91
94
  f"(found {len(distinct_delta_types)} delta types)."
92
95
  )
96
+ distinct_partition_spec = set([d.partition_spec for d in deltas])
97
+ if len(distinct_partition_spec) > 1:
98
+ raise ValueError(
99
+ f"Deltas to merge must all share the same partition spec "
100
+ f"(found {len(distinct_partition_spec)} partition specs)."
101
+ )
93
102
  merged_manifest = Manifest.merge_manifests(
94
103
  manifests,
95
104
  manifest_author,
@@ -252,7 +261,7 @@ class Delta(dict):
252
261
  return None
253
262
 
254
263
  @property
255
- def partition_values(self) -> Optional[List[Any]]:
264
+ def partition_values(self) -> Optional[PartitionValues]:
256
265
  delta_locator = self.locator
257
266
  if delta_locator:
258
267
  return delta_locator.partition_values
@@ -276,6 +285,17 @@ class Delta(dict):
276
285
  def delete_parameters(self, delete_parameters: Optional[DeleteParameters]) -> None:
277
286
  self["delete_parameters"] = delete_parameters
278
287
 
288
+ @property
289
+ def partition_spec(self) -> Optional[DeltaPartitionSpec]:
290
+ val: Dict[str, Any] = self.get("partitionSpec")
291
+ if val is not None and not isinstance(val, DeltaPartitionSpec):
292
+ self.partition_spec = val = DeltaPartitionSpec(val)
293
+ return val
294
+
295
+ @partition_spec.setter
296
+ def partition_spec(self, value: Optional[DeltaPartitionSpec]) -> None:
297
+ self["partitionSpec"] = value
298
+
279
299
 
280
300
  class DeltaLocator(Locator, dict):
281
301
  @staticmethod
@@ -299,7 +319,7 @@ class DeltaLocator(Locator, dict):
299
319
  table_version: Optional[str],
300
320
  stream_id: Optional[str],
301
321
  storage_type: Optional[str],
302
- partition_values: Optional[List[Any]],
322
+ partition_values: Optional[PartitionValues],
303
323
  partition_id: Optional[str],
304
324
  stream_position: Optional[int],
305
325
  ) -> DeltaLocator:
@@ -365,7 +385,7 @@ class DeltaLocator(Locator, dict):
365
385
  return None
366
386
 
367
387
  @property
368
- def partition_values(self) -> Optional[List[Any]]:
388
+ def partition_values(self) -> Optional[PartitionValues]:
369
389
  partition_locator = self.partition_locator
370
390
  if partition_locator:
371
391
  return partition_locator.partition_values
@@ -1,10 +1,9 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
-
4
3
  from typing import Any, Dict, List, Optional, Union
5
4
 
6
5
  import pyarrow as pa
7
-
6
+ from deltacat.storage.model.partition_spec import PartitionValues
8
7
  from deltacat.storage.model.locator import Locator
9
8
  from deltacat.storage.model.namespace import NamespaceLocator
10
9
  from deltacat.storage.model.stream import StreamLocator
@@ -127,7 +126,7 @@ class Partition(dict):
127
126
  return None
128
127
 
129
128
  @property
130
- def partition_values(self) -> Optional[List[Any]]:
129
+ def partition_values(self) -> Optional[PartitionValues]:
131
130
  partition_locator = self.locator
132
131
  if partition_locator:
133
132
  return partition_locator.partition_values
@@ -199,7 +198,7 @@ class PartitionLocator(Locator, dict):
199
198
  @staticmethod
200
199
  def of(
201
200
  stream_locator: Optional[StreamLocator],
202
- partition_values: Optional[List[Any]],
201
+ partition_values: Optional[PartitionValues],
203
202
  partition_id: Optional[str],
204
203
  ) -> PartitionLocator:
205
204
  """
@@ -225,7 +224,7 @@ class PartitionLocator(Locator, dict):
225
224
  table_version: Optional[str],
226
225
  stream_id: Optional[str],
227
226
  storage_type: Optional[str],
228
- partition_values: Optional[List[Any]],
227
+ partition_values: Optional[PartitionValues],
229
228
  partition_id: Optional[str],
230
229
  ) -> PartitionLocator:
231
230
  stream_locator = StreamLocator.at(
@@ -253,11 +252,11 @@ class PartitionLocator(Locator, dict):
253
252
  self["streamLocator"] = stream_locator
254
253
 
255
254
  @property
256
- def partition_values(self) -> Optional[List[Any]]:
255
+ def partition_values(self) -> Optional[PartitionValues]:
257
256
  return self.get("partitionValues")
258
257
 
259
258
  @partition_values.setter
260
- def partition_values(self, partition_values: Optional[List[Any]]) -> None:
259
+ def partition_values(self, partition_values: Optional[PartitionValues]) -> None:
261
260
  self["partitionValues"] = partition_values
262
261
 
263
262
  @property
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+ from typing import List, Optional, Any
3
+ from deltacat.storage.model.transform import Transform
4
+
5
+ """
6
+ An ordered list of partition values determining the values of
7
+ ordered transforms specified in the partition spec.
8
+ """
9
+ PartitionValues = List[Any]
10
+
11
+
12
+ class PartitionFilter(dict):
13
+ """
14
+ This class represents a filter for partitions.
15
+ It is used to filter partitions based on certain criteria.
16
+ """
17
+
18
+ @staticmethod
19
+ def of(
20
+ partition_values: Optional[PartitionValues] = None,
21
+ ) -> PartitionFilter:
22
+ """
23
+ Creates a new PartitionFilter instance with the specified partition key and value.
24
+ """
25
+ partition_filter = PartitionFilter()
26
+ partition_filter["partitionValues"] = partition_values
27
+ return partition_filter
28
+
29
+ @property
30
+ def partition_values(self) -> Optional[PartitionValues]:
31
+ return self.get("partitionValues")
32
+
33
+
34
+ class PartitionSpec(dict):
35
+ """
36
+ This class determines how the underlying entities in the
37
+ hierarchy are partitioned. Stream partitions deltas and
38
+ delta partitions files.
39
+ """
40
+
41
+ @staticmethod
42
+ def of(ordered_transforms: List[Transform] = None) -> PartitionSpec:
43
+ partition_spec = PartitionSpec()
44
+ partition_spec.ordered_transforms = ordered_transforms
45
+ return partition_spec
46
+
47
+ @property
48
+ def ordered_transforms(self) -> List[Transform]:
49
+ return self.get("orderedTransforms")
50
+
51
+ @ordered_transforms.setter
52
+ def ordered_transforms(self, value: List[Transform]) -> None:
53
+ self["orderedTransforms"] = value
54
+
55
+
56
+ class StreamPartitionSpec(PartitionSpec):
57
+ """
58
+ A class representing a stream partition specification.
59
+ A stream partitions deltas into multiple different Partition
60
+ """
61
+
62
+ pass
63
+
64
+
65
+ class DeltaPartitionSpec(PartitionSpec):
66
+ """
67
+ A class representing delta partition specification.
68
+ The manifest entries in delta are partitioned based on this spec.
69
+ """
70
+
71
+ pass
@@ -8,6 +8,7 @@ from deltacat.storage.model.namespace import NamespaceLocator
8
8
  from deltacat.storage.model.table import TableLocator
9
9
  from deltacat.storage.model.table_version import TableVersionLocator
10
10
  from deltacat.storage.model.types import CommitState
11
+ from deltacat.storage.model.partition_spec import StreamPartitionSpec, PartitionValues
11
12
 
12
13
 
13
14
  class Stream(dict):
@@ -17,12 +18,14 @@ class Stream(dict):
17
18
  partition_keys: Optional[List[Dict[str, Any]]],
18
19
  state: Optional[CommitState] = None,
19
20
  previous_stream_digest: Optional[bytes] = None,
21
+ partition_spec: Optional[StreamPartitionSpec] = None,
20
22
  ) -> Stream:
21
23
  stream = Stream()
22
24
  stream.locator = locator
23
25
  stream.partition_keys = partition_keys
24
26
  stream.state = state
25
27
  stream.previous_stream_digest = previous_stream_digest
28
+ stream.partition_spec = partition_spec
26
29
  return stream
27
30
 
28
31
  @property
@@ -38,6 +41,14 @@ class Stream(dict):
38
41
 
39
42
  @property
40
43
  def partition_keys(self) -> Optional[List[Dict[str, Any]]]:
44
+ """
45
+ Ordered list of unique column names in the table schema on
46
+ which the underlying data is partitioned. Either partition_spec
47
+ or partition_keys must be specified but not both.
48
+
49
+ (Deprecated): Partition keys will be deprecated in the favor
50
+ of partition_spec in future releases.
51
+ """
41
52
  return self.get("partitionKeys")
42
53
 
43
54
  @partition_keys.setter
@@ -46,6 +57,9 @@ class Stream(dict):
46
57
 
47
58
  @property
48
59
  def previous_stream_digest(self) -> Optional[str]:
60
+ """
61
+ Previous stream digest
62
+ """
49
63
  return self.get("previousStreamDigest")
50
64
 
51
65
  @previous_stream_digest.setter
@@ -54,6 +68,9 @@ class Stream(dict):
54
68
 
55
69
  @property
56
70
  def state(self) -> Optional[CommitState]:
71
+ """
72
+ The commit state of a stream.
73
+ """
57
74
  state = self.get("state")
58
75
  return None if state is None else CommitState(state)
59
76
 
@@ -61,6 +78,26 @@ class Stream(dict):
61
78
  def state(self, state: Optional[CommitState]) -> None:
62
79
  self["state"] = state
63
80
 
81
+ @property
82
+ def partition_spec(self) -> Optional[StreamPartitionSpec]:
83
+ """
84
+ If a table uses complex partitioning instead of identity,
85
+ partition spec can be specified to define that strategy.
86
+ For example, a partition spec can define a bucketing strategy
87
+ on composite column values or can define iceberg compliant
88
+ bucketing.
89
+
90
+ Either partition_spec or partition_keys must be specified but not both.
91
+ """
92
+ val: Dict[str, Any] = self.get("partitionSpec")
93
+ if val is not None and not isinstance(val, StreamPartitionSpec):
94
+ self.partition_spec = val = StreamPartitionSpec(val)
95
+ return val
96
+
97
+ @partition_spec.setter
98
+ def partition_spec(self, spec: StreamPartitionSpec) -> None:
99
+ self["partitionSpec"] = spec
100
+
64
101
  @property
65
102
  def namespace_locator(self) -> Optional[NamespaceLocator]:
66
103
  stream_locator = self.locator
@@ -110,7 +147,7 @@ class Stream(dict):
110
147
  return stream_locator.table_version
111
148
  return None
112
149
 
113
- def validate_partition_values(self, partition_values: Optional[List[Any]]):
150
+ def validate_partition_values(self, partition_values: Optional[PartitionValues]):
114
151
  # TODO (pdames): ensure value data types match key data types
115
152
  partition_keys = self.partition_keys
116
153
  num_keys = len(partition_keys) if partition_keys else 0
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+ from typing import List
3
+ from enum import Enum
4
+
5
+
6
+ class TransformName(str, Enum):
7
+ IDENTITY = "identity"
8
+ BUCKET = "bucket"
9
+
10
+
11
+ class TransformParameters(dict):
12
+ """
13
+ This is a parent class that contains properties
14
+ to be passed to the corresponding transform
15
+ """
16
+
17
+ pass
18
+
19
+
20
+ class IdentityTransformParameters(TransformParameters):
21
+ """
22
+ This class is used to pass parameters to the identity transform
23
+ """
24
+
25
+ @staticmethod
26
+ def of(column_name: str) -> IdentityTransformParameters:
27
+ identify_transform_parameters = IdentityTransformParameters()
28
+ identify_transform_parameters["columnName"] = column_name
29
+ return identify_transform_parameters
30
+
31
+ @property
32
+ def column_name(self) -> str:
33
+ """
34
+ The name of the column to use for identity transform
35
+ """
36
+ return self["columnName"]
37
+
38
+ @column_name.setter
39
+ def column_name(self, value: str) -> None:
40
+ self["columnName"] = value
41
+
42
+
43
+ class BucketingStrategy(str, Enum):
44
+ """
45
+ A bucketing strategy for the transform
46
+ """
47
+
48
+ # Uses default deltacat bucketing strategy.
49
+ # This strategy supports hashing on composite keys
50
+ # and uses SHA1 hashing for determining the bucket.
51
+ # If no columns passed, it will use a random UUID
52
+ # for determining the bucket.
53
+ DEFAULT = "default"
54
+
55
+ # Uses iceberg compliant bucketing strategy.
56
+ # As indicated in the iceberg spec, it does not support
57
+ # composite keys and uses murmur3 hash for determining
58
+ # the bucket.
59
+ # See https://iceberg.apache.org/spec/#partitioning
60
+ ICEBERG = "iceberg"
61
+
62
+
63
+ class BucketTransformParameters(TransformParameters):
64
+ """
65
+ Encapsulates parameters for the bucket transform.
66
+ """
67
+
68
+ def of(
69
+ self,
70
+ num_buckets: int,
71
+ column_names: List[str],
72
+ bucketing_strategy: BucketingStrategy,
73
+ ) -> BucketTransformParameters:
74
+ bucket_transform_parameters = BucketTransformParameters()
75
+ bucket_transform_parameters["numBuckets"] = num_buckets
76
+ bucket_transform_parameters["columnNames"] = column_names
77
+ bucket_transform_parameters["bucketingStrategy"] = bucketing_strategy
78
+
79
+ return bucket_transform_parameters
80
+
81
+ @property
82
+ def num_buckets(self) -> int:
83
+ """
84
+ The total number of buckets to create for values of the column
85
+ """
86
+ return self["numBuckets"]
87
+
88
+ @property
89
+ def column_names(self) -> List[str]:
90
+ """
91
+ An ordered list of unique column names from the table schema
92
+ to use for bucketings.
93
+ """
94
+ return self["columnNames"]
95
+
96
+ @property
97
+ def bucketing_strategy(self) -> BucketingStrategy:
98
+ """
99
+ The bucketing strategy to used.
100
+ """
101
+ return self["bucketingStrategy"]
102
+
103
+
104
+ class Transform(dict):
105
+ """
106
+ A transform is represents how a particular column value can be
107
+ transformed into a new value. This is mostly used in the context
108
+ of partitioning the data files in a table.
109
+ """
110
+
111
+ @staticmethod
112
+ def of(
113
+ name: TransformName,
114
+ parameters: TransformParameters,
115
+ ) -> Transform:
116
+ partition_transform = Transform()
117
+ partition_transform["name"] = name
118
+ partition_transform["parameters"] = parameters
119
+ return partition_transform
120
+
121
+ @property
122
+ def name(self) -> TransformName:
123
+ return self["name"]
124
+
125
+ @property
126
+ def parameters(self) -> TransformParameters:
127
+ return self["parameters"]
@@ -20,6 +20,7 @@ from botocore.exceptions import (
20
20
  ConnectTimeoutError,
21
21
  HTTPClientError,
22
22
  )
23
+ from ray.data.datasource import FilenameProvider
23
24
  from deltacat.exceptions import NonRetryableError
24
25
  from moto import mock_s3
25
26
  from tenacity import RetryError
@@ -34,6 +35,7 @@ class TestUuidBlockWritePathProvider(unittest.TestCase):
34
35
 
35
36
  result = provider("base_path")
36
37
 
38
+ self.assertTrue(isinstance(provider, FilenameProvider))
37
39
  self.assertRegex(result, r"^base_path/[\w-]{36}$")
38
40
 
39
41
 
@@ -0,0 +1,231 @@
1
+ import pytest
2
+ import os
3
+ from moto import mock_s3
4
+ import boto3
5
+ from boto3.resources.base import ServiceResource
6
+ from deltacat.compute.compactor.utils.round_completion_file import (
7
+ read_round_completion_file,
8
+ write_round_completion_file,
9
+ )
10
+ from deltacat.tests.compute.test_util_common import get_test_partition_locator
11
+ from deltacat.compute.compactor import RoundCompletionInfo
12
+
13
+ RCF_BUCKET_NAME = "rcf-bucket"
14
+
15
+
16
+ @pytest.fixture(autouse=True, scope="module")
17
+ def mock_aws_credential():
18
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
19
+ os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
20
+ os.environ["AWS_SECURITY_TOKEN"] = "testing"
21
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
22
+ os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
23
+ yield
24
+
25
+
26
+ @pytest.fixture(autouse=True, scope="module")
27
+ def s3_resource(mock_aws_credential):
28
+ with mock_s3():
29
+ yield boto3.resource("s3")
30
+
31
+
32
+ @pytest.fixture(autouse=True, scope="function")
33
+ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
34
+ s3_resource.create_bucket(
35
+ ACL="authenticated-read",
36
+ Bucket=RCF_BUCKET_NAME,
37
+ )
38
+ yield
39
+ s3_resource.Bucket(RCF_BUCKET_NAME).objects.all().delete()
40
+
41
+
42
+ class TestReadWriteRoundCompletionFile:
43
+ def test_read_when_rcf_written_without_destination(self):
44
+ """
45
+ This test case tests the backward compatibility by successfully
46
+ reading the previously written rcf.
47
+ """
48
+
49
+ source_locator = get_test_partition_locator("source")
50
+ destination_locator = get_test_partition_locator("destination")
51
+
52
+ expected_rcf = RoundCompletionInfo.of(
53
+ high_watermark=122,
54
+ compacted_delta_locator={},
55
+ compacted_pyarrow_write_result={},
56
+ sort_keys_bit_width=12,
57
+ )
58
+
59
+ rcf_url = write_round_completion_file(
60
+ RCF_BUCKET_NAME, source_locator, None, expected_rcf
61
+ )
62
+
63
+ rcf = read_round_completion_file(
64
+ RCF_BUCKET_NAME, source_locator, destination_locator
65
+ )
66
+
67
+ assert (
68
+ rcf_url == "s3://rcf-bucket/f9829af39770d904dbb811bd8f4e886dd307f507.json"
69
+ )
70
+ assert rcf == expected_rcf
71
+
72
+ def test_read_when_rcf_written_with_destination(self):
73
+ """
74
+ This test case tests the backward compatibility by successfully
75
+ reading the previously written rcf.
76
+ """
77
+
78
+ source_locator = get_test_partition_locator("source")
79
+ destination_locator = get_test_partition_locator("destination")
80
+
81
+ expected_rcf = RoundCompletionInfo.of(
82
+ high_watermark=122,
83
+ compacted_delta_locator={},
84
+ compacted_pyarrow_write_result={},
85
+ sort_keys_bit_width=12,
86
+ )
87
+
88
+ rcf_url = write_round_completion_file(
89
+ RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf
90
+ )
91
+
92
+ rcf = read_round_completion_file(
93
+ RCF_BUCKET_NAME, source_locator, destination_locator
94
+ )
95
+
96
+ assert (
97
+ rcf_url
98
+ == "s3://rcf-bucket/f9829af39770d904dbb811bd8f4e886dd307f507/e9939deadc091b3289a2eb0ca56b1ba86b9892f4.json"
99
+ )
100
+ assert rcf == expected_rcf
101
+
102
+ def test_read_without_destination_when_rcf_written_with_destination(self):
103
+ """
104
+ This test case tests the backward compatibility by successfully
105
+ reading the previously written rcf.
106
+ """
107
+
108
+ source_locator = get_test_partition_locator("source")
109
+ destination_locator = get_test_partition_locator("destination")
110
+
111
+ expected_rcf = RoundCompletionInfo.of(
112
+ high_watermark=122,
113
+ compacted_delta_locator={},
114
+ compacted_pyarrow_write_result={},
115
+ sort_keys_bit_width=12,
116
+ )
117
+
118
+ write_round_completion_file(
119
+ RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf
120
+ )
121
+
122
+ rcf = read_round_completion_file(RCF_BUCKET_NAME, source_locator, None)
123
+
124
+ assert rcf is None
125
+
126
+ def test_read_without_destination_when_rcf_written_without_destination(self):
127
+ """
128
+ This test case tests the backward compatibility by successfully
129
+ reading the previously written rcf.
130
+ """
131
+
132
+ source_locator = get_test_partition_locator("source")
133
+
134
+ expected_rcf = RoundCompletionInfo.of(
135
+ high_watermark=122,
136
+ compacted_delta_locator={},
137
+ compacted_pyarrow_write_result={},
138
+ sort_keys_bit_width=12,
139
+ )
140
+
141
+ write_round_completion_file(RCF_BUCKET_NAME, source_locator, None, expected_rcf)
142
+
143
+ rcf = read_round_completion_file(RCF_BUCKET_NAME, source_locator, None)
144
+
145
+ assert rcf == expected_rcf
146
+
147
+ def test_read_when_rcf_written_both_with_and_without_destination(self):
148
+ """
149
+ This test case tests the backward compatibility by successfully
150
+ reading the previously written rcf.
151
+ """
152
+
153
+ source_locator = get_test_partition_locator("source")
154
+ destination_locator = get_test_partition_locator("destination")
155
+
156
+ expected_rcf = RoundCompletionInfo.of(
157
+ high_watermark=122,
158
+ compacted_delta_locator={},
159
+ compacted_pyarrow_write_result={},
160
+ sort_keys_bit_width=12,
161
+ )
162
+
163
+ expected_rcf_2 = RoundCompletionInfo.of(
164
+ high_watermark=1223,
165
+ compacted_delta_locator={},
166
+ compacted_pyarrow_write_result={},
167
+ sort_keys_bit_width=1233,
168
+ )
169
+
170
+ write_round_completion_file(RCF_BUCKET_NAME, source_locator, None, expected_rcf)
171
+
172
+ write_round_completion_file(
173
+ RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf_2
174
+ )
175
+
176
+ rcf = read_round_completion_file(
177
+ RCF_BUCKET_NAME, source_locator, destination_locator
178
+ )
179
+
180
+ assert rcf == expected_rcf_2
181
+
182
+ def test_read_when_none_destination_partition_id(self):
183
+
184
+ source_locator = get_test_partition_locator("source")
185
+ destination_locator = get_test_partition_locator(None)
186
+
187
+ expected_rcf = RoundCompletionInfo.of(
188
+ high_watermark=122,
189
+ compacted_delta_locator={},
190
+ compacted_pyarrow_write_result={},
191
+ sort_keys_bit_width=12,
192
+ )
193
+
194
+ write_round_completion_file(
195
+ RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf
196
+ )
197
+
198
+ rcf = read_round_completion_file(
199
+ RCF_BUCKET_NAME, source_locator, destination_locator
200
+ )
201
+
202
+ assert rcf == expected_rcf
203
+
204
+ def test_write_when_custom_url_is_passed(self):
205
+ """
206
+ This test case tests the backward compatibility by successfully
207
+ reading the previously written rcf.
208
+ """
209
+
210
+ source_locator = get_test_partition_locator("source")
211
+
212
+ expected_rcf = RoundCompletionInfo.of(
213
+ high_watermark=122,
214
+ compacted_delta_locator={},
215
+ compacted_pyarrow_write_result={},
216
+ sort_keys_bit_width=12,
217
+ )
218
+
219
+ completion_file_s3_url = f"s3://{RCF_BUCKET_NAME}/test.json"
220
+ rcf_url = write_round_completion_file(
221
+ RCF_BUCKET_NAME,
222
+ source_locator,
223
+ None,
224
+ expected_rcf,
225
+ completion_file_s3_url=completion_file_s3_url,
226
+ )
227
+
228
+ rcf = read_round_completion_file(RCF_BUCKET_NAME, source_locator, None)
229
+
230
+ assert rcf_url == completion_file_s3_url
231
+ assert rcf is None