deltacat 0.1.10.dev0__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -15
- deltacat/aws/clients.py +12 -31
- deltacat/aws/constants.py +1 -1
- deltacat/aws/redshift/__init__.py +7 -2
- deltacat/aws/redshift/model/manifest.py +54 -50
- deltacat/aws/s3u.py +176 -187
- deltacat/catalog/delegate.py +151 -185
- deltacat/catalog/interface.py +78 -97
- deltacat/catalog/model/catalog.py +21 -21
- deltacat/catalog/model/table_definition.py +11 -9
- deltacat/compute/compactor/__init__.py +12 -16
- deltacat/compute/compactor/compaction_session.py +237 -166
- deltacat/compute/compactor/model/delta_annotated.py +60 -44
- deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
- deltacat/compute/compactor/model/delta_file_locator.py +10 -8
- deltacat/compute/compactor/model/materialize_result.py +6 -7
- deltacat/compute/compactor/model/primary_key_index.py +38 -34
- deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
- deltacat/compute/compactor/model/round_completion_info.py +25 -19
- deltacat/compute/compactor/model/sort_key.py +18 -15
- deltacat/compute/compactor/steps/dedupe.py +119 -94
- deltacat/compute/compactor/steps/hash_bucket.py +48 -47
- deltacat/compute/compactor/steps/materialize.py +86 -92
- deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
- deltacat/compute/compactor/steps/rehash/rewrite_index.py +5 -5
- deltacat/compute/compactor/utils/io.py +59 -47
- deltacat/compute/compactor/utils/primary_key_index.py +91 -80
- deltacat/compute/compactor/utils/round_completion_file.py +22 -23
- deltacat/compute/compactor/utils/system_columns.py +33 -45
- deltacat/compute/metastats/meta_stats.py +235 -157
- deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
- deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
- deltacat/compute/metastats/stats.py +95 -64
- deltacat/compute/metastats/utils/io.py +100 -53
- deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
- deltacat/compute/metastats/utils/ray_utils.py +38 -33
- deltacat/compute/stats/basic.py +107 -69
- deltacat/compute/stats/models/delta_column_stats.py +11 -8
- deltacat/compute/stats/models/delta_stats.py +59 -32
- deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
- deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
- deltacat/compute/stats/models/stats_result.py +24 -14
- deltacat/compute/stats/utils/intervals.py +16 -9
- deltacat/compute/stats/utils/io.py +86 -51
- deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
- deltacat/constants.py +4 -13
- deltacat/io/__init__.py +2 -2
- deltacat/io/aws/redshift/redshift_datasource.py +157 -143
- deltacat/io/dataset.py +14 -17
- deltacat/io/read_api.py +36 -33
- deltacat/logs.py +94 -42
- deltacat/storage/__init__.py +18 -8
- deltacat/storage/interface.py +196 -213
- deltacat/storage/model/delta.py +45 -51
- deltacat/storage/model/list_result.py +12 -8
- deltacat/storage/model/namespace.py +4 -5
- deltacat/storage/model/partition.py +42 -42
- deltacat/storage/model/stream.py +29 -30
- deltacat/storage/model/table.py +14 -14
- deltacat/storage/model/table_version.py +32 -31
- deltacat/storage/model/types.py +1 -0
- deltacat/tests/stats/test_intervals.py +11 -24
- deltacat/tests/utils/__init__.py +0 -0
- deltacat/tests/utils/test_record_batch_tables.py +284 -0
- deltacat/types/media.py +3 -4
- deltacat/types/tables.py +31 -21
- deltacat/utils/common.py +5 -11
- deltacat/utils/numpy.py +20 -22
- deltacat/utils/pandas.py +73 -100
- deltacat/utils/performance.py +3 -9
- deltacat/utils/placement.py +259 -230
- deltacat/utils/pyarrow.py +302 -89
- deltacat/utils/ray_utils/collections.py +2 -1
- deltacat/utils/ray_utils/concurrency.py +27 -28
- deltacat/utils/ray_utils/dataset.py +28 -28
- deltacat/utils/ray_utils/performance.py +5 -9
- deltacat/utils/ray_utils/runtime.py +9 -10
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/METADATA +1 -1
- deltacat-0.1.12.dist-info/RECORD +110 -0
- deltacat-0.1.10.dev0.dist-info/RECORD +0 -108
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/LICENSE +0 -0
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/WHEEL +0 -0
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/top_level.txt +0 -0
deltacat/storage/model/delta.py
CHANGED
@@ -1,26 +1,28 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
from
|
4
|
+
from typing import Any, Dict, List, Optional
|
5
|
+
|
6
|
+
from deltacat.aws.redshift import Manifest, ManifestAuthor, ManifestMeta
|
7
|
+
from deltacat.storage.model.locator import Locator
|
5
8
|
from deltacat.storage.model.namespace import NamespaceLocator
|
6
9
|
from deltacat.storage.model.partition import PartitionLocator
|
7
10
|
from deltacat.storage.model.stream import StreamLocator
|
8
11
|
from deltacat.storage.model.table import TableLocator
|
9
12
|
from deltacat.storage.model.table_version import TableVersionLocator
|
10
|
-
from deltacat.storage.model.
|
11
|
-
from deltacat.aws.redshift import Manifest, ManifestMeta, ManifestAuthor
|
12
|
-
|
13
|
-
from typing import Any, Dict, List, Optional
|
13
|
+
from deltacat.storage.model.types import DeltaType
|
14
14
|
|
15
15
|
|
16
16
|
class Delta(dict):
|
17
17
|
@staticmethod
|
18
|
-
def of(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
def of(
|
19
|
+
locator: Optional[DeltaLocator],
|
20
|
+
delta_type: Optional[DeltaType],
|
21
|
+
meta: Optional[ManifestMeta],
|
22
|
+
properties: Optional[Dict[str, str]],
|
23
|
+
manifest: Optional[Manifest],
|
24
|
+
previous_stream_position: Optional[int] = None,
|
25
|
+
) -> Delta:
|
24
26
|
"""
|
25
27
|
Creates a Delta metadata model with the given Delta Locator, Delta Type,
|
26
28
|
manifest metadata, properties, manifest, and previous delta stream
|
@@ -37,10 +39,11 @@ class Delta(dict):
|
|
37
39
|
|
38
40
|
@staticmethod
|
39
41
|
def merge_deltas(
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
deltas: List[Delta],
|
43
|
+
manifest_author: Optional[ManifestAuthor] = None,
|
44
|
+
stream_position: Optional[int] = None,
|
45
|
+
properties: Optional[Dict[str, str]] = None,
|
46
|
+
) -> Delta:
|
44
47
|
"""
|
45
48
|
Merges the input list of deltas into a single delta. All input deltas to
|
46
49
|
merge must belong to the same partition, share the same delta type, and
|
@@ -70,18 +73,20 @@ class Delta(dict):
|
|
70
73
|
if len(distinct_storage_types) > 1:
|
71
74
|
raise NotImplementedError(
|
72
75
|
f"Deltas to merge must all share the same storage type "
|
73
|
-
f"(found {len(distinct_storage_types)} storage types."
|
74
|
-
|
75
|
-
|
76
|
+
f"(found {len(distinct_storage_types)} storage types."
|
77
|
+
)
|
78
|
+
pl_digest_set = set([d.partition_locator.digest() for d in deltas])
|
76
79
|
if len(pl_digest_set) > 1:
|
77
80
|
raise ValueError(
|
78
81
|
f"Deltas to merge must all belong to the same partition "
|
79
|
-
f"(found {len(pl_digest_set)} partitions)."
|
82
|
+
f"(found {len(pl_digest_set)} partitions)."
|
83
|
+
)
|
80
84
|
distinct_delta_types = set([d.type for d in deltas])
|
81
85
|
if len(distinct_delta_types) > 1:
|
82
86
|
raise ValueError(
|
83
87
|
f"Deltas to merge must all share the same delta type "
|
84
|
-
f"(found {len(distinct_delta_types)} delta types)."
|
88
|
+
f"(found {len(distinct_delta_types)} delta types)."
|
89
|
+
)
|
85
90
|
merged_manifest = Manifest.merge_manifests(
|
86
91
|
manifests,
|
87
92
|
manifest_author,
|
@@ -106,9 +111,7 @@ class Delta(dict):
|
|
106
111
|
return val
|
107
112
|
|
108
113
|
@manifest.setter
|
109
|
-
def manifest(
|
110
|
-
self,
|
111
|
-
manifest: Optional[Manifest]) -> None:
|
114
|
+
def manifest(self, manifest: Optional[Manifest]) -> None:
|
112
115
|
self["manifest"] = manifest
|
113
116
|
|
114
117
|
@property
|
@@ -119,9 +122,7 @@ class Delta(dict):
|
|
119
122
|
return val
|
120
123
|
|
121
124
|
@meta.setter
|
122
|
-
def meta(
|
123
|
-
self,
|
124
|
-
meta: Optional[ManifestMeta]) -> None:
|
125
|
+
def meta(self, meta: Optional[ManifestMeta]) -> None:
|
125
126
|
self["meta"] = meta
|
126
127
|
|
127
128
|
@property
|
@@ -129,9 +130,7 @@ class Delta(dict):
|
|
129
130
|
return self.get("properties")
|
130
131
|
|
131
132
|
@properties.setter
|
132
|
-
def properties(
|
133
|
-
self,
|
134
|
-
properties: Optional[Dict[str, str]]) -> None:
|
133
|
+
def properties(self, properties: Optional[Dict[str, str]]) -> None:
|
135
134
|
self["properties"] = properties
|
136
135
|
|
137
136
|
@property
|
@@ -140,9 +139,7 @@ class Delta(dict):
|
|
140
139
|
return None if delta_type is None else DeltaType(delta_type)
|
141
140
|
|
142
141
|
@type.setter
|
143
|
-
def type(
|
144
|
-
self,
|
145
|
-
delta_type: Optional[DeltaType]) -> None:
|
142
|
+
def type(self, delta_type: Optional[DeltaType]) -> None:
|
146
143
|
self["type"] = delta_type
|
147
144
|
|
148
145
|
@property
|
@@ -153,9 +150,7 @@ class Delta(dict):
|
|
153
150
|
return val
|
154
151
|
|
155
152
|
@locator.setter
|
156
|
-
def locator(
|
157
|
-
self,
|
158
|
-
delta_locator: Optional[DeltaLocator]) -> None:
|
153
|
+
def locator(self, delta_locator: Optional[DeltaLocator]) -> None:
|
159
154
|
self["deltaLocator"] = delta_locator
|
160
155
|
|
161
156
|
@property
|
@@ -163,9 +158,7 @@ class Delta(dict):
|
|
163
158
|
return self.get("previousStreamPosition")
|
164
159
|
|
165
160
|
@previous_stream_position.setter
|
166
|
-
def previous_stream_position(
|
167
|
-
self,
|
168
|
-
previous_stream_position: Optional[int]) -> None:
|
161
|
+
def previous_stream_position(self, previous_stream_position: Optional[int]) -> None:
|
169
162
|
self["previousStreamPosition"] = previous_stream_position
|
170
163
|
|
171
164
|
@property
|
@@ -262,8 +255,9 @@ class Delta(dict):
|
|
262
255
|
|
263
256
|
class DeltaLocator(Locator, dict):
|
264
257
|
@staticmethod
|
265
|
-
def of(
|
266
|
-
|
258
|
+
def of(
|
259
|
+
partition_locator: Optional[PartitionLocator], stream_position: Optional[int]
|
260
|
+
) -> DeltaLocator:
|
267
261
|
"""
|
268
262
|
Creates a partition delta locator. Stream Position, if provided, should
|
269
263
|
be greater than that of any prior delta in the partition.
|
@@ -274,14 +268,16 @@ class DeltaLocator(Locator, dict):
|
|
274
268
|
return delta_locator
|
275
269
|
|
276
270
|
@staticmethod
|
277
|
-
def at(
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
271
|
+
def at(
|
272
|
+
namespace: Optional[str],
|
273
|
+
table_name: Optional[str],
|
274
|
+
table_version: Optional[str],
|
275
|
+
stream_id: Optional[str],
|
276
|
+
storage_type: Optional[str],
|
277
|
+
partition_values: Optional[List[Any]],
|
278
|
+
partition_id: Optional[str],
|
279
|
+
stream_position: Optional[int],
|
280
|
+
) -> DeltaLocator:
|
285
281
|
partition_locator = PartitionLocator.at(
|
286
282
|
namespace,
|
287
283
|
table_name,
|
@@ -304,9 +300,7 @@ class DeltaLocator(Locator, dict):
|
|
304
300
|
return val
|
305
301
|
|
306
302
|
@partition_locator.setter
|
307
|
-
def partition_locator(
|
308
|
-
self,
|
309
|
-
partition_locator: Optional[PartitionLocator]) -> None:
|
303
|
+
def partition_locator(self, partition_locator: Optional[PartitionLocator]) -> None:
|
310
304
|
self["partitionLocator"] = partition_locator
|
311
305
|
|
312
306
|
@property
|
@@ -1,18 +1,20 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
import ray
|
5
4
|
from typing import Callable, Generic, List, Optional, TypeVar
|
6
5
|
|
7
|
-
|
6
|
+
import ray
|
7
|
+
|
8
|
+
T = TypeVar("T")
|
8
9
|
|
9
10
|
|
10
11
|
class ListResult(dict, Generic[T]):
|
11
12
|
@staticmethod
|
12
|
-
def of(
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
def of(
|
14
|
+
items: Optional[List[T]],
|
15
|
+
pagination_key: Optional[str],
|
16
|
+
next_page_provider: Optional[Callable[..., ListResult[T]]],
|
17
|
+
) -> ListResult:
|
16
18
|
list_result = ListResult()
|
17
19
|
list_result["items"] = items
|
18
20
|
list_result["paginationKey"] = pagination_key
|
@@ -35,8 +37,10 @@ class ListResult(dict, Generic[T]):
|
|
35
37
|
if pagination_key:
|
36
38
|
next_page_provider = self.next_page_provider
|
37
39
|
if next_page_provider is None:
|
38
|
-
raise ValueError(
|
39
|
-
|
40
|
+
raise ValueError(
|
41
|
+
f"Pagination key ('{pagination_key}') "
|
42
|
+
f"specified without a next page provider!"
|
43
|
+
)
|
40
44
|
next_list_result = next_page_provider(pagination_key)
|
41
45
|
if next_list_result.next_page_provider is None:
|
42
46
|
next_list_result["nextPageProvider"] = next_page_provider
|
@@ -8,8 +8,9 @@ from deltacat.storage.model.locator import Locator
|
|
8
8
|
|
9
9
|
class Namespace(dict):
|
10
10
|
@staticmethod
|
11
|
-
def of(
|
12
|
-
|
11
|
+
def of(
|
12
|
+
locator: Optional[NamespaceLocator], permissions: Optional[Dict[str, Any]]
|
13
|
+
) -> Namespace:
|
13
14
|
namespace = Namespace()
|
14
15
|
namespace.locator = locator
|
15
16
|
namespace.permissions = permissions
|
@@ -23,9 +24,7 @@ class Namespace(dict):
|
|
23
24
|
return val
|
24
25
|
|
25
26
|
@locator.setter
|
26
|
-
def locator(
|
27
|
-
self,
|
28
|
-
namespace_locator: Optional[NamespaceLocator]) -> None:
|
27
|
+
def locator(self, namespace_locator: Optional[NamespaceLocator]) -> None:
|
29
28
|
self["namespaceLocator"] = namespace_locator
|
30
29
|
|
31
30
|
@property
|
@@ -1,29 +1,31 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
5
|
+
|
4
6
|
import pyarrow as pa
|
5
7
|
|
8
|
+
from deltacat.storage.model.locator import Locator
|
6
9
|
from deltacat.storage.model.namespace import NamespaceLocator
|
7
10
|
from deltacat.storage.model.stream import StreamLocator
|
8
11
|
from deltacat.storage.model.table import TableLocator
|
9
12
|
from deltacat.storage.model.table_version import TableVersionLocator
|
10
13
|
from deltacat.storage.model.types import CommitState
|
11
|
-
from deltacat.storage.model.locator import Locator
|
12
14
|
from deltacat.types.media import ContentType
|
13
15
|
|
14
|
-
from typing import Any, Dict, List, Optional, Union
|
15
|
-
|
16
16
|
|
17
17
|
class Partition(dict):
|
18
18
|
@staticmethod
|
19
|
-
def of(
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
19
|
+
def of(
|
20
|
+
locator: Optional[PartitionLocator],
|
21
|
+
schema: Optional[Union[pa.Schema, str, bytes]],
|
22
|
+
content_types: Optional[List[ContentType]],
|
23
|
+
state: Optional[CommitState] = None,
|
24
|
+
previous_stream_position: Optional[int] = None,
|
25
|
+
previous_partition_id: Optional[str] = None,
|
26
|
+
stream_position: Optional[int] = None,
|
27
|
+
next_partition_id: Optional[str] = None,
|
28
|
+
) -> Partition:
|
27
29
|
partition = Partition()
|
28
30
|
partition.locator = locator
|
29
31
|
partition.schema = schema
|
@@ -43,9 +45,7 @@ class Partition(dict):
|
|
43
45
|
return val
|
44
46
|
|
45
47
|
@locator.setter
|
46
|
-
def locator(
|
47
|
-
self,
|
48
|
-
partition_locator: Optional[PartitionLocator]) -> None:
|
48
|
+
def locator(self, partition_locator: Optional[PartitionLocator]) -> None:
|
49
49
|
self["partitionLocator"] = partition_locator
|
50
50
|
|
51
51
|
@property
|
@@ -59,13 +59,16 @@ class Partition(dict):
|
|
59
59
|
@property
|
60
60
|
def content_types(self) -> Optional[List[ContentType]]:
|
61
61
|
content_types = self.get("contentTypes")
|
62
|
-
return
|
63
|
-
|
62
|
+
return (
|
63
|
+
None
|
64
|
+
if content_types is None
|
65
|
+
else [None if _ is None else ContentType(_) for _ in content_types]
|
66
|
+
)
|
64
67
|
|
65
68
|
@content_types.setter
|
66
69
|
def content_types(
|
67
|
-
|
68
|
-
|
70
|
+
self, supported_content_types: Optional[List[ContentType]]
|
71
|
+
) -> None:
|
69
72
|
self["contentTypes"] = supported_content_types
|
70
73
|
|
71
74
|
@property
|
@@ -82,9 +85,7 @@ class Partition(dict):
|
|
82
85
|
return self.get("previousStreamPosition")
|
83
86
|
|
84
87
|
@previous_stream_position.setter
|
85
|
-
def previous_stream_position(
|
86
|
-
self,
|
87
|
-
previous_stream_position: Optional[int]) -> None:
|
88
|
+
def previous_stream_position(self, previous_stream_position: Optional[int]) -> None:
|
88
89
|
self["previousStreamPosition"] = previous_stream_position
|
89
90
|
|
90
91
|
@property
|
@@ -92,9 +93,7 @@ class Partition(dict):
|
|
92
93
|
return self.get("previousPartitionId")
|
93
94
|
|
94
95
|
@previous_partition_id.setter
|
95
|
-
def previous_partition_id(
|
96
|
-
self,
|
97
|
-
previous_partition_id: Optional[str]) -> None:
|
96
|
+
def previous_partition_id(self, previous_partition_id: Optional[str]) -> None:
|
98
97
|
self["previousPartitionId"] = previous_partition_id
|
99
98
|
|
100
99
|
@property
|
@@ -191,15 +190,18 @@ class Partition(dict):
|
|
191
190
|
|
192
191
|
def is_supported_content_type(self, content_type: ContentType) -> bool:
|
193
192
|
supported_content_types = self.content_types
|
194
|
-
return (not supported_content_types) or
|
195
|
-
|
193
|
+
return (not supported_content_types) or (
|
194
|
+
content_type in supported_content_types
|
195
|
+
)
|
196
196
|
|
197
197
|
|
198
198
|
class PartitionLocator(Locator, dict):
|
199
199
|
@staticmethod
|
200
|
-
def of(
|
201
|
-
|
202
|
-
|
200
|
+
def of(
|
201
|
+
stream_locator: Optional[StreamLocator],
|
202
|
+
partition_values: Optional[List[Any]],
|
203
|
+
partition_id: Optional[str],
|
204
|
+
) -> PartitionLocator:
|
203
205
|
"""
|
204
206
|
Creates a stream partition locator. Partition ID is
|
205
207
|
case-sensitive.
|
@@ -217,13 +219,15 @@ class PartitionLocator(Locator, dict):
|
|
217
219
|
return partition_locator
|
218
220
|
|
219
221
|
@staticmethod
|
220
|
-
def at(
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
222
|
+
def at(
|
223
|
+
namespace: Optional[str],
|
224
|
+
table_name: Optional[str],
|
225
|
+
table_version: Optional[str],
|
226
|
+
stream_id: Optional[str],
|
227
|
+
storage_type: Optional[str],
|
228
|
+
partition_values: Optional[List[Any]],
|
229
|
+
partition_id: Optional[str],
|
230
|
+
) -> PartitionLocator:
|
227
231
|
stream_locator = StreamLocator.at(
|
228
232
|
namespace,
|
229
233
|
table_name,
|
@@ -245,9 +249,7 @@ class PartitionLocator(Locator, dict):
|
|
245
249
|
return val
|
246
250
|
|
247
251
|
@stream_locator.setter
|
248
|
-
def stream_locator(
|
249
|
-
self,
|
250
|
-
stream_locator: Optional[StreamLocator]) -> None:
|
252
|
+
def stream_locator(self, stream_locator: Optional[StreamLocator]) -> None:
|
251
253
|
self["streamLocator"] = stream_locator
|
252
254
|
|
253
255
|
@property
|
@@ -263,9 +265,7 @@ class PartitionLocator(Locator, dict):
|
|
263
265
|
return self.get("partitionId")
|
264
266
|
|
265
267
|
@partition_id.setter
|
266
|
-
def partition_id(
|
267
|
-
self,
|
268
|
-
partition_id: Optional[str]) -> None:
|
268
|
+
def partition_id(self, partition_id: Optional[str]) -> None:
|
269
269
|
self["partitionId"] = partition_id
|
270
270
|
|
271
271
|
@property
|
deltacat/storage/model/stream.py
CHANGED
@@ -1,21 +1,23 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
5
|
+
|
6
|
+
from deltacat.storage.model.locator import Locator
|
4
7
|
from deltacat.storage.model.namespace import NamespaceLocator
|
5
8
|
from deltacat.storage.model.table import TableLocator
|
6
9
|
from deltacat.storage.model.table_version import TableVersionLocator
|
7
10
|
from deltacat.storage.model.types import CommitState
|
8
|
-
from deltacat.storage.model.locator import Locator
|
9
|
-
|
10
|
-
from typing import Any, Dict, List, Optional
|
11
11
|
|
12
12
|
|
13
13
|
class Stream(dict):
|
14
14
|
@staticmethod
|
15
|
-
def of(
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
def of(
|
16
|
+
locator: Optional[StreamLocator],
|
17
|
+
partition_keys: Optional[List[Dict[str, Any]]],
|
18
|
+
state: Optional[CommitState] = None,
|
19
|
+
previous_stream_digest: Optional[bytes] = None,
|
20
|
+
) -> Stream:
|
19
21
|
stream = Stream()
|
20
22
|
stream.locator = locator
|
21
23
|
stream.partition_keys = partition_keys
|
@@ -31,9 +33,7 @@ class Stream(dict):
|
|
31
33
|
return val
|
32
34
|
|
33
35
|
@locator.setter
|
34
|
-
def locator(
|
35
|
-
self,
|
36
|
-
stream_locator: Optional[StreamLocator]) -> None:
|
36
|
+
def locator(self, stream_locator: Optional[StreamLocator]) -> None:
|
37
37
|
self["streamLocator"] = stream_locator
|
38
38
|
|
39
39
|
@property
|
@@ -41,9 +41,7 @@ class Stream(dict):
|
|
41
41
|
return self.get("partitionKeys")
|
42
42
|
|
43
43
|
@partition_keys.setter
|
44
|
-
def partition_keys(
|
45
|
-
self,
|
46
|
-
partition_keys: Optional[List[Dict[str, Any]]]) -> None:
|
44
|
+
def partition_keys(self, partition_keys: Optional[List[Dict[str, Any]]]) -> None:
|
47
45
|
self["partitionKeys"] = partition_keys
|
48
46
|
|
49
47
|
@property
|
@@ -51,9 +49,7 @@ class Stream(dict):
|
|
51
49
|
return self.get("previousStreamDigest")
|
52
50
|
|
53
51
|
@previous_stream_digest.setter
|
54
|
-
def previous_stream_digest(
|
55
|
-
self,
|
56
|
-
previous_stream_digest: Optional[str]) -> None:
|
52
|
+
def previous_stream_digest(self, previous_stream_digest: Optional[str]) -> None:
|
57
53
|
self["previousStreamDigest"] = previous_stream_digest
|
58
54
|
|
59
55
|
@property
|
@@ -114,9 +110,7 @@ class Stream(dict):
|
|
114
110
|
return stream_locator.table_version
|
115
111
|
return None
|
116
112
|
|
117
|
-
def validate_partition_values(
|
118
|
-
self,
|
119
|
-
partition_values: Optional[List[Any]]):
|
113
|
+
def validate_partition_values(self, partition_values: Optional[List[Any]]):
|
120
114
|
# TODO (pdames): ensure value data types match key data types
|
121
115
|
partition_keys = self.partition_keys
|
122
116
|
num_keys = len(partition_keys) if partition_keys else 0
|
@@ -124,14 +118,17 @@ class Stream(dict):
|
|
124
118
|
if num_values != num_keys:
|
125
119
|
raise ValueError(
|
126
120
|
f"Found {num_values} partition values but "
|
127
|
-
f"{num_keys} partition keys: {self}"
|
121
|
+
f"{num_keys} partition keys: {self}"
|
122
|
+
)
|
128
123
|
|
129
124
|
|
130
125
|
class StreamLocator(Locator, dict):
|
131
126
|
@staticmethod
|
132
|
-
def of(
|
133
|
-
|
134
|
-
|
127
|
+
def of(
|
128
|
+
table_version_locator: Optional[TableVersionLocator],
|
129
|
+
stream_id: Optional[str],
|
130
|
+
storage_type: Optional[str],
|
131
|
+
) -> StreamLocator:
|
135
132
|
"""
|
136
133
|
Creates a table version Stream Locator. All input parameters are
|
137
134
|
case-sensitive.
|
@@ -143,11 +140,13 @@ class StreamLocator(Locator, dict):
|
|
143
140
|
return stream_locator
|
144
141
|
|
145
142
|
@staticmethod
|
146
|
-
def at(
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
143
|
+
def at(
|
144
|
+
namespace: Optional[str],
|
145
|
+
table_name: Optional[str],
|
146
|
+
table_version: Optional[str],
|
147
|
+
stream_id: Optional[str],
|
148
|
+
storage_type: Optional[str],
|
149
|
+
) -> StreamLocator:
|
151
150
|
table_version_locator = TableVersionLocator.at(
|
152
151
|
namespace,
|
153
152
|
table_name,
|
@@ -168,8 +167,8 @@ class StreamLocator(Locator, dict):
|
|
168
167
|
|
169
168
|
@table_version_locator.setter
|
170
169
|
def table_version_locator(
|
171
|
-
|
172
|
-
|
170
|
+
self, table_version_locator: Optional[TableVersionLocator]
|
171
|
+
) -> None:
|
173
172
|
self["tableVersionLocator"] = table_version_locator
|
174
173
|
|
175
174
|
@property
|
deltacat/storage/model/table.py
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
from deltacat.storage.model.namespace import NamespaceLocator
|
5
|
-
from deltacat.storage.model.locator import Locator
|
6
|
-
|
7
4
|
from typing import Any, Dict, Optional
|
8
5
|
|
6
|
+
from deltacat.storage.model.locator import Locator
|
7
|
+
from deltacat.storage.model.namespace import NamespaceLocator
|
8
|
+
|
9
9
|
|
10
10
|
class Table(dict):
|
11
11
|
@staticmethod
|
12
|
-
def of(
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
def of(
|
13
|
+
locator: Optional[TableLocator],
|
14
|
+
permissions: Optional[Dict[str, Any]] = None,
|
15
|
+
description: Optional[str] = None,
|
16
|
+
properties: Optional[Dict[str, str]] = None,
|
17
|
+
) -> Table:
|
16
18
|
table = Table()
|
17
19
|
table.locator = locator
|
18
20
|
table.permissions = permissions
|
@@ -79,16 +81,16 @@ class Table(dict):
|
|
79
81
|
|
80
82
|
class TableLocator(Locator, dict):
|
81
83
|
@staticmethod
|
82
|
-
def of(
|
83
|
-
|
84
|
+
def of(
|
85
|
+
namespace_locator: Optional[NamespaceLocator], table_name: Optional[str]
|
86
|
+
) -> TableLocator:
|
84
87
|
table_locator = TableLocator()
|
85
88
|
table_locator.namespace_locator = namespace_locator
|
86
89
|
table_locator.table_name = table_name
|
87
90
|
return table_locator
|
88
91
|
|
89
92
|
@staticmethod
|
90
|
-
def at(namespace: Optional[str],
|
91
|
-
table_name: Optional[str]) -> TableLocator:
|
93
|
+
def at(namespace: Optional[str], table_name: Optional[str]) -> TableLocator:
|
92
94
|
namespace_locator = NamespaceLocator.of(namespace)
|
93
95
|
return TableLocator.of(namespace_locator, table_name)
|
94
96
|
|
@@ -100,9 +102,7 @@ class TableLocator(Locator, dict):
|
|
100
102
|
return val
|
101
103
|
|
102
104
|
@namespace_locator.setter
|
103
|
-
def namespace_locator(
|
104
|
-
self,
|
105
|
-
namespace_locator: Optional[NamespaceLocator]) -> None:
|
105
|
+
def namespace_locator(self, namespace_locator: Optional[NamespaceLocator]) -> None:
|
106
106
|
self["namespaceLocator"] = namespace_locator
|
107
107
|
|
108
108
|
@property
|