deltacat 0.1.10.dev0__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -15
- deltacat/aws/clients.py +12 -31
- deltacat/aws/constants.py +1 -1
- deltacat/aws/redshift/__init__.py +7 -2
- deltacat/aws/redshift/model/manifest.py +54 -50
- deltacat/aws/s3u.py +176 -187
- deltacat/catalog/delegate.py +151 -185
- deltacat/catalog/interface.py +78 -97
- deltacat/catalog/model/catalog.py +21 -21
- deltacat/catalog/model/table_definition.py +11 -9
- deltacat/compute/compactor/__init__.py +12 -16
- deltacat/compute/compactor/compaction_session.py +237 -166
- deltacat/compute/compactor/model/delta_annotated.py +60 -44
- deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
- deltacat/compute/compactor/model/delta_file_locator.py +10 -8
- deltacat/compute/compactor/model/materialize_result.py +6 -7
- deltacat/compute/compactor/model/primary_key_index.py +38 -34
- deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
- deltacat/compute/compactor/model/round_completion_info.py +25 -19
- deltacat/compute/compactor/model/sort_key.py +18 -15
- deltacat/compute/compactor/steps/dedupe.py +119 -94
- deltacat/compute/compactor/steps/hash_bucket.py +48 -47
- deltacat/compute/compactor/steps/materialize.py +86 -92
- deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
- deltacat/compute/compactor/steps/rehash/rewrite_index.py +5 -5
- deltacat/compute/compactor/utils/io.py +59 -47
- deltacat/compute/compactor/utils/primary_key_index.py +91 -80
- deltacat/compute/compactor/utils/round_completion_file.py +22 -23
- deltacat/compute/compactor/utils/system_columns.py +33 -45
- deltacat/compute/metastats/meta_stats.py +235 -157
- deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
- deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
- deltacat/compute/metastats/stats.py +95 -64
- deltacat/compute/metastats/utils/io.py +100 -53
- deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
- deltacat/compute/metastats/utils/ray_utils.py +38 -33
- deltacat/compute/stats/basic.py +107 -69
- deltacat/compute/stats/models/delta_column_stats.py +11 -8
- deltacat/compute/stats/models/delta_stats.py +59 -32
- deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
- deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
- deltacat/compute/stats/models/stats_result.py +24 -14
- deltacat/compute/stats/utils/intervals.py +16 -9
- deltacat/compute/stats/utils/io.py +86 -51
- deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
- deltacat/constants.py +4 -13
- deltacat/io/__init__.py +2 -2
- deltacat/io/aws/redshift/redshift_datasource.py +157 -143
- deltacat/io/dataset.py +14 -17
- deltacat/io/read_api.py +36 -33
- deltacat/logs.py +94 -42
- deltacat/storage/__init__.py +18 -8
- deltacat/storage/interface.py +196 -213
- deltacat/storage/model/delta.py +45 -51
- deltacat/storage/model/list_result.py +12 -8
- deltacat/storage/model/namespace.py +4 -5
- deltacat/storage/model/partition.py +42 -42
- deltacat/storage/model/stream.py +29 -30
- deltacat/storage/model/table.py +14 -14
- deltacat/storage/model/table_version.py +32 -31
- deltacat/storage/model/types.py +1 -0
- deltacat/tests/stats/test_intervals.py +11 -24
- deltacat/tests/utils/__init__.py +0 -0
- deltacat/tests/utils/test_record_batch_tables.py +284 -0
- deltacat/types/media.py +3 -4
- deltacat/types/tables.py +31 -21
- deltacat/utils/common.py +5 -11
- deltacat/utils/numpy.py +20 -22
- deltacat/utils/pandas.py +73 -100
- deltacat/utils/performance.py +3 -9
- deltacat/utils/placement.py +259 -230
- deltacat/utils/pyarrow.py +302 -89
- deltacat/utils/ray_utils/collections.py +2 -1
- deltacat/utils/ray_utils/concurrency.py +27 -28
- deltacat/utils/ray_utils/dataset.py +28 -28
- deltacat/utils/ray_utils/performance.py +5 -9
- deltacat/utils/ray_utils/runtime.py +9 -10
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/METADATA +1 -1
- deltacat-0.1.12.dist-info/RECORD +110 -0
- deltacat-0.1.10.dev0.dist-info/RECORD +0 -108
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/LICENSE +0 -0
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/WHEEL +0 -0
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/top_level.txt +0 -0
deltacat/storage/interface.py
CHANGED
@@ -1,19 +1,32 @@
|
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
2
|
+
|
1
3
|
import pyarrow as pa
|
2
4
|
|
3
5
|
from deltacat import SortKey
|
4
|
-
from deltacat.storage import
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
from deltacat.storage import (
|
7
|
+
Delta,
|
8
|
+
DeltaLocator,
|
9
|
+
DeltaType,
|
10
|
+
DistributedDataset,
|
11
|
+
LifecycleState,
|
12
|
+
ListResult,
|
13
|
+
LocalDataset,
|
14
|
+
LocalTable,
|
15
|
+
Manifest,
|
16
|
+
ManifestAuthor,
|
17
|
+
Namespace,
|
18
|
+
Partition,
|
19
|
+
SchemaConsistencyType,
|
20
|
+
Stream,
|
21
|
+
StreamLocator,
|
22
|
+
Table,
|
23
|
+
TableVersion,
|
24
|
+
)
|
25
|
+
from deltacat.types.media import ContentType, StorageType, TableType
|
9
26
|
from deltacat.utils.common import ReadKwargsProvider
|
10
27
|
|
11
|
-
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
12
|
-
|
13
28
|
|
14
|
-
def list_namespaces(
|
15
|
-
*args,
|
16
|
-
**kwargs) -> ListResult[Namespace]:
|
29
|
+
def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
|
17
30
|
"""
|
18
31
|
Lists a page of table namespaces. Namespaces are returned as list result
|
19
32
|
items.
|
@@ -21,10 +34,7 @@ def list_namespaces(
|
|
21
34
|
raise NotImplementedError("list_namespaces not implemented")
|
22
35
|
|
23
36
|
|
24
|
-
def list_tables(
|
25
|
-
namespace: str,
|
26
|
-
*args,
|
27
|
-
**kwargs) -> ListResult[Table]:
|
37
|
+
def list_tables(namespace: str, *args, **kwargs) -> ListResult[Table]:
|
28
38
|
"""
|
29
39
|
Lists a page of tables for the given table namespace. Tables are returned as
|
30
40
|
list result items. Raises an error if the given namespace does not exist.
|
@@ -33,10 +43,8 @@ def list_tables(
|
|
33
43
|
|
34
44
|
|
35
45
|
def list_table_versions(
|
36
|
-
|
37
|
-
|
38
|
-
*args,
|
39
|
-
**kwargs) -> ListResult[TableVersion]:
|
46
|
+
namespace: str, table_name: str, *args, **kwargs
|
47
|
+
) -> ListResult[TableVersion]:
|
40
48
|
"""
|
41
49
|
Lists a page of table versions for the given table. Table versions are
|
42
50
|
returned as list result items. Raises an error if the given table does not
|
@@ -46,11 +54,12 @@ def list_table_versions(
|
|
46
54
|
|
47
55
|
|
48
56
|
def list_partitions(
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
57
|
+
namespace: str,
|
58
|
+
table_name: str,
|
59
|
+
table_version: Optional[str] = None,
|
60
|
+
*args,
|
61
|
+
**kwargs
|
62
|
+
) -> ListResult[Partition]:
|
54
63
|
"""
|
55
64
|
Lists a page of partitions for the given table version. Partitions are
|
56
65
|
returned as list result items. Table version resolves to the latest active
|
@@ -60,10 +69,7 @@ def list_partitions(
|
|
60
69
|
raise NotImplementedError("list_partitions not implemented")
|
61
70
|
|
62
71
|
|
63
|
-
def list_stream_partitions(
|
64
|
-
stream: Stream,
|
65
|
-
*args,
|
66
|
-
**kwargs) -> ListResult[Partition]:
|
72
|
+
def list_stream_partitions(stream: Stream, *args, **kwargs) -> ListResult[Partition]:
|
67
73
|
"""
|
68
74
|
Lists all partitions committed to the given stream.
|
69
75
|
"""
|
@@ -71,16 +77,17 @@ def list_stream_partitions(
|
|
71
77
|
|
72
78
|
|
73
79
|
def list_deltas(
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
80
|
+
namespace: str,
|
81
|
+
table_name: str,
|
82
|
+
partition_values: Optional[List[Any]] = None,
|
83
|
+
table_version: Optional[str] = None,
|
84
|
+
first_stream_position: Optional[int] = None,
|
85
|
+
last_stream_position: Optional[int] = None,
|
86
|
+
ascending_order: Optional[bool] = None,
|
87
|
+
include_manifest: bool = False,
|
88
|
+
*args,
|
89
|
+
**kwargs
|
90
|
+
) -> ListResult[Delta]:
|
84
91
|
"""
|
85
92
|
Lists a page of deltas for the given table version and committed partition.
|
86
93
|
Deltas are returned as list result items. Deltas returned can optionally be
|
@@ -98,10 +105,8 @@ def list_deltas(
|
|
98
105
|
|
99
106
|
|
100
107
|
def list_partition_deltas(
|
101
|
-
|
102
|
-
|
103
|
-
*args,
|
104
|
-
**kwargs) -> ListResult[Delta]:
|
108
|
+
partition: Partition, include_manifest: bool = False, *args, **kwargs
|
109
|
+
) -> ListResult[Delta]:
|
105
110
|
"""
|
106
111
|
Lists a page of deltas committed to the given partition.
|
107
112
|
|
@@ -113,14 +118,15 @@ def list_partition_deltas(
|
|
113
118
|
|
114
119
|
|
115
120
|
def get_delta(
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
121
|
+
namespace: str,
|
122
|
+
table_name: str,
|
123
|
+
stream_position: int,
|
124
|
+
partition_values: Optional[List[Any]] = None,
|
125
|
+
table_version: Optional[str] = None,
|
126
|
+
include_manifest: bool = False,
|
127
|
+
*args,
|
128
|
+
**kwargs
|
129
|
+
) -> Optional[Delta]:
|
124
130
|
"""
|
125
131
|
Gets the delta for the given table version, partition, and stream position.
|
126
132
|
Table version resolves to the latest active table version if not specified.
|
@@ -135,13 +141,14 @@ def get_delta(
|
|
135
141
|
|
136
142
|
|
137
143
|
def get_latest_delta(
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
144
|
+
namespace: str,
|
145
|
+
table_name: str,
|
146
|
+
partition_values: Optional[List[Any]] = None,
|
147
|
+
table_version: Optional[str] = None,
|
148
|
+
include_manifest: bool = False,
|
149
|
+
*args,
|
150
|
+
**kwargs
|
151
|
+
) -> Optional[Delta]:
|
145
152
|
"""
|
146
153
|
Gets the latest delta (i.e. the delta with the greatest stream position) for
|
147
154
|
the given table version and partition. Table version resolves to the latest
|
@@ -157,15 +164,16 @@ def get_latest_delta(
|
|
157
164
|
|
158
165
|
|
159
166
|
def download_delta(
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
167
|
+
delta_like: Union[Delta, DeltaLocator],
|
168
|
+
table_type: TableType = TableType.PYARROW,
|
169
|
+
storage_type: StorageType = StorageType.DISTRIBUTED,
|
170
|
+
max_parallelism: Optional[int] = None,
|
171
|
+
columns: Optional[List[str]] = None,
|
172
|
+
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
173
|
+
ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
|
174
|
+
*args,
|
175
|
+
**kwargs
|
176
|
+
) -> Union[LocalDataset, DistributedDataset]:
|
169
177
|
"""
|
170
178
|
Download the given delta or delta locator into either a list of
|
171
179
|
tables resident in the local node's memory, or into a dataset distributed
|
@@ -177,13 +185,14 @@ def download_delta(
|
|
177
185
|
|
178
186
|
|
179
187
|
def download_delta_manifest_entry(
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
188
|
+
delta_like: Union[Delta, DeltaLocator],
|
189
|
+
entry_index: int,
|
190
|
+
table_type: TableType = TableType.PYARROW,
|
191
|
+
columns: Optional[List[str]] = None,
|
192
|
+
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
193
|
+
*args,
|
194
|
+
**kwargs
|
195
|
+
) -> LocalTable:
|
187
196
|
"""
|
188
197
|
Downloads a single manifest entry into the specified table type for the
|
189
198
|
given delta or delta locator. If a delta is provided with a non-empty
|
@@ -194,9 +203,8 @@ def download_delta_manifest_entry(
|
|
194
203
|
|
195
204
|
|
196
205
|
def get_delta_manifest(
|
197
|
-
|
198
|
-
|
199
|
-
**kwargs) -> Manifest:
|
206
|
+
delta_like: Union[Delta, DeltaLocator], *args, **kwargs
|
207
|
+
) -> Manifest:
|
200
208
|
"""
|
201
209
|
Get the manifest associated with the given delta or delta locator. This
|
202
210
|
always retrieves the authoritative remote copy of the delta manifest, and
|
@@ -206,10 +214,8 @@ def get_delta_manifest(
|
|
206
214
|
|
207
215
|
|
208
216
|
def create_namespace(
|
209
|
-
|
210
|
-
|
211
|
-
*args,
|
212
|
-
**kwargs) -> Namespace:
|
217
|
+
namespace: str, permissions: Dict[str, Any], *args, **kwargs
|
218
|
+
) -> Namespace:
|
213
219
|
"""
|
214
220
|
Creates a table namespace with the given name and permissions. Returns
|
215
221
|
the created namespace.
|
@@ -218,11 +224,12 @@ def create_namespace(
|
|
218
224
|
|
219
225
|
|
220
226
|
def update_namespace(
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
227
|
+
namespace: str,
|
228
|
+
permissions: Optional[Dict[str, Any]] = None,
|
229
|
+
new_namespace: Optional[str] = None,
|
230
|
+
*args,
|
231
|
+
**kwargs
|
232
|
+
) -> None:
|
226
233
|
"""
|
227
234
|
Updates a table namespace's name and/or permissions. Raises an error if the
|
228
235
|
given namespace does not exist.
|
@@ -231,22 +238,23 @@ def update_namespace(
|
|
231
238
|
|
232
239
|
|
233
240
|
def create_table_version(
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
241
|
+
namespace: str,
|
242
|
+
table_name: str,
|
243
|
+
table_version: Optional[str] = None,
|
244
|
+
schema: Optional[Union[pa.Schema, str, bytes]] = None,
|
245
|
+
schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
|
246
|
+
partition_keys: Optional[List[Dict[str, Any]]] = None,
|
247
|
+
primary_key_column_names: Optional[Set[str]] = None,
|
248
|
+
sort_keys: Optional[List[SortKey]] = None,
|
249
|
+
table_version_description: Optional[str] = None,
|
250
|
+
table_version_properties: Optional[Dict[str, str]] = None,
|
251
|
+
table_permissions: Optional[Dict[str, Any]] = None,
|
252
|
+
table_description: Optional[str] = None,
|
253
|
+
table_properties: Optional[Dict[str, str]] = None,
|
254
|
+
supported_content_types: Optional[List[ContentType]] = None,
|
255
|
+
*args,
|
256
|
+
**kwargs
|
257
|
+
) -> Stream:
|
250
258
|
"""
|
251
259
|
Create a table version with an unreleased lifecycle state and an empty delta
|
252
260
|
stream. Table versions may be schemaless and unpartitioned, or partitioned
|
@@ -281,12 +289,13 @@ def create_table_version(
|
|
281
289
|
|
282
290
|
|
283
291
|
def update_table(
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
292
|
+
namespace: str,
|
293
|
+
table_name: str,
|
294
|
+
permissions: Optional[Dict[str, Any]] = None,
|
295
|
+
description: Optional[str] = None,
|
296
|
+
properties: Optional[Dict[str, str]] = None,
|
297
|
+
new_table_name: Optional[str] = None,
|
298
|
+
) -> None:
|
290
299
|
"""
|
291
300
|
Update table metadata describing the table versions it contains. By default,
|
292
301
|
a table's properties are empty, and its description and permissions are
|
@@ -297,16 +306,17 @@ def update_table(
|
|
297
306
|
|
298
307
|
|
299
308
|
def update_table_version(
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
309
|
+
namespace: str,
|
310
|
+
table_name: str,
|
311
|
+
table_version: str,
|
312
|
+
lifecycle_state: Optional[LifecycleState] = None,
|
313
|
+
schema: Optional[Union[pa.Schema, str, bytes]] = None,
|
314
|
+
schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
|
315
|
+
description: Optional[str] = None,
|
316
|
+
properties: Optional[Dict[str, str]] = None,
|
317
|
+
*args,
|
318
|
+
**kwargs
|
319
|
+
) -> None:
|
310
320
|
"""
|
311
321
|
Update a table version. Notably, updating an unreleased table version's
|
312
322
|
lifecycle state to 'active' telegraphs that it is ready for external
|
@@ -320,11 +330,12 @@ def update_table_version(
|
|
320
330
|
|
321
331
|
|
322
332
|
def stage_stream(
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
333
|
+
namespace: str,
|
334
|
+
table_name: str,
|
335
|
+
table_version: Optional[str] = None,
|
336
|
+
*args,
|
337
|
+
**kwargs
|
338
|
+
) -> Stream:
|
328
339
|
"""
|
329
340
|
Stages a new delta stream for the given table version. Resolves to the
|
330
341
|
latest active table version if no table version is given. Returns the
|
@@ -333,10 +344,7 @@ def stage_stream(
|
|
333
344
|
raise NotImplementedError("stage_stream not implemented")
|
334
345
|
|
335
346
|
|
336
|
-
def commit_stream(
|
337
|
-
stream: Stream,
|
338
|
-
*args,
|
339
|
-
**kwargs) -> Stream:
|
347
|
+
def commit_stream(stream: Stream, *args, **kwargs) -> Stream:
|
340
348
|
"""
|
341
349
|
Registers a delta stream with a target table version, replacing any
|
342
350
|
previous stream registered for the same table version. Returns the
|
@@ -346,11 +354,12 @@ def commit_stream(
|
|
346
354
|
|
347
355
|
|
348
356
|
def delete_stream(
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
357
|
+
namespace: str,
|
358
|
+
table_name: str,
|
359
|
+
table_version: Optional[str] = None,
|
360
|
+
*args,
|
361
|
+
**kwargs
|
362
|
+
) -> None:
|
354
363
|
"""
|
355
364
|
Deletes the delta stream currently registered with the given table version.
|
356
365
|
Resolves to the latest active table version if no table version is given.
|
@@ -360,11 +369,12 @@ def delete_stream(
|
|
360
369
|
|
361
370
|
|
362
371
|
def get_stream(
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
372
|
+
namespace: str,
|
373
|
+
table_name: str,
|
374
|
+
table_version: Optional[str] = None,
|
375
|
+
*args,
|
376
|
+
**kwargs
|
377
|
+
) -> Optional[Stream]:
|
368
378
|
"""
|
369
379
|
Gets the most recently committed stream for the given table version and
|
370
380
|
partition key values. Resolves to the latest active table version if no
|
@@ -374,10 +384,8 @@ def get_stream(
|
|
374
384
|
|
375
385
|
|
376
386
|
def stage_partition(
|
377
|
-
|
378
|
-
|
379
|
-
*args,
|
380
|
-
**kwargs) -> Partition:
|
387
|
+
stream: Stream, partition_values: Optional[List[Any]] = None, *args, **kwargs
|
388
|
+
) -> Partition:
|
381
389
|
"""
|
382
390
|
Stages a new partition for the given stream and partition values. Returns
|
383
391
|
the staged partition. If this partition will replace another partition
|
@@ -388,10 +396,7 @@ def stage_partition(
|
|
388
396
|
raise NotImplementedError("stage_partition not implemented")
|
389
397
|
|
390
398
|
|
391
|
-
def commit_partition(
|
392
|
-
partition: Partition,
|
393
|
-
*args,
|
394
|
-
**kwargs) -> Partition:
|
399
|
+
def commit_partition(partition: Partition, *args, **kwargs) -> Partition:
|
395
400
|
"""
|
396
401
|
Commits the given partition to its associated table version stream,
|
397
402
|
replacing any previous partition registered for the same stream and
|
@@ -406,12 +411,13 @@ def commit_partition(
|
|
406
411
|
|
407
412
|
|
408
413
|
def delete_partition(
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
414
|
+
namespace: str,
|
415
|
+
table_name: str,
|
416
|
+
table_version: Optional[str] = None,
|
417
|
+
partition_values: Optional[List[Any]] = None,
|
418
|
+
*args,
|
419
|
+
**kwargs
|
420
|
+
) -> None:
|
415
421
|
"""
|
416
422
|
Deletes the given partition from the specified table version. Resolves to
|
417
423
|
the latest active table version if no table version is given. Partition
|
@@ -422,10 +428,11 @@ def delete_partition(
|
|
422
428
|
|
423
429
|
|
424
430
|
def get_partition(
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
431
|
+
stream_locator: StreamLocator,
|
432
|
+
partition_values: Optional[List[Any]] = None,
|
433
|
+
*args,
|
434
|
+
**kwargs
|
435
|
+
) -> Optional[Partition]:
|
429
436
|
"""
|
430
437
|
Gets the most recently committed partition for the given stream locator and
|
431
438
|
partition key values. Returns None if no partition has been committed for
|
@@ -436,16 +443,17 @@ def get_partition(
|
|
436
443
|
|
437
444
|
|
438
445
|
def stage_delta(
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
446
|
+
data: Union[LocalTable, LocalDataset, DistributedDataset],
|
447
|
+
partition: Partition,
|
448
|
+
delta_type: DeltaType = DeltaType.UPSERT,
|
449
|
+
max_records_per_entry: Optional[int] = None,
|
450
|
+
author: Optional[ManifestAuthor] = None,
|
451
|
+
properties: Optional[Dict[str, str]] = None,
|
452
|
+
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
453
|
+
content_type: ContentType = ContentType.PARQUET,
|
454
|
+
*args,
|
455
|
+
**kwargs
|
456
|
+
) -> Delta:
|
449
457
|
"""
|
450
458
|
Writes the given table to 1 or more S3 files. Returns an unregistered
|
451
459
|
delta whose manifest entries point to the uploaded files. Applies any
|
@@ -454,10 +462,7 @@ def stage_delta(
|
|
454
462
|
raise NotImplementedError("stage_delta not implemented")
|
455
463
|
|
456
464
|
|
457
|
-
def commit_delta(
|
458
|
-
delta: Delta,
|
459
|
-
*args,
|
460
|
-
**kwargs) -> Delta:
|
465
|
+
def commit_delta(delta: Delta, *args, **kwargs) -> Delta:
|
461
466
|
"""
|
462
467
|
Registers a new delta with its associated target table version and
|
463
468
|
partition. Returns the registered delta. If the delta's previous stream
|
@@ -469,10 +474,7 @@ def commit_delta(
|
|
469
474
|
raise NotImplementedError("commit_delta not implemented")
|
470
475
|
|
471
476
|
|
472
|
-
def get_namespace(
|
473
|
-
namespace: str,
|
474
|
-
*args,
|
475
|
-
**kwargs) -> Optional[Namespace]:
|
477
|
+
def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
|
476
478
|
"""
|
477
479
|
Gets table namespace metadata for the specified table namespace. Returns
|
478
480
|
None if the given namespace does not exist.
|
@@ -480,21 +482,14 @@ def get_namespace(
|
|
480
482
|
raise NotImplementedError("get_namespace not implemented")
|
481
483
|
|
482
484
|
|
483
|
-
def namespace_exists(
|
484
|
-
namespace: str,
|
485
|
-
*args,
|
486
|
-
**kwargs) -> bool:
|
485
|
+
def namespace_exists(namespace: str, *args, **kwargs) -> bool:
|
487
486
|
"""
|
488
487
|
Returns True if the given table namespace exists, False if not.
|
489
488
|
"""
|
490
489
|
raise NotImplementedError("namespace_exists not implemented")
|
491
490
|
|
492
491
|
|
493
|
-
def get_table(
|
494
|
-
namespace: str,
|
495
|
-
table_name: str,
|
496
|
-
*args,
|
497
|
-
**kwargs) -> Optional[Table]:
|
492
|
+
def get_table(namespace: str, table_name: str, *args, **kwargs) -> Optional[Table]:
|
498
493
|
"""
|
499
494
|
Gets table metadata for the specified table. Returns None if the given
|
500
495
|
table does not exist.
|
@@ -502,11 +497,7 @@ def get_table(
|
|
502
497
|
raise NotImplementedError("get_table not implemented")
|
503
498
|
|
504
499
|
|
505
|
-
def table_exists(
|
506
|
-
namespace: str,
|
507
|
-
table_name: str,
|
508
|
-
*args,
|
509
|
-
**kwargs) -> bool:
|
500
|
+
def table_exists(namespace: str, table_name: str, *args, **kwargs) -> bool:
|
510
501
|
"""
|
511
502
|
Returns True if the given table exists, False if not.
|
512
503
|
"""
|
@@ -514,11 +505,8 @@ def table_exists(
|
|
514
505
|
|
515
506
|
|
516
507
|
def get_table_version(
|
517
|
-
|
518
|
-
|
519
|
-
table_version: str,
|
520
|
-
*args,
|
521
|
-
**kwargs) -> Optional[TableVersion]:
|
508
|
+
namespace: str, table_name: str, table_version: str, *args, **kwargs
|
509
|
+
) -> Optional[TableVersion]:
|
522
510
|
"""
|
523
511
|
Gets table version metadata for the specified table version. Returns None
|
524
512
|
if the given table version does not exist.
|
@@ -527,10 +515,8 @@ def get_table_version(
|
|
527
515
|
|
528
516
|
|
529
517
|
def get_latest_table_version(
|
530
|
-
|
531
|
-
|
532
|
-
*args,
|
533
|
-
**kwargs) -> Optional[TableVersion]:
|
518
|
+
namespace: str, table_name: str, *args, **kwargs
|
519
|
+
) -> Optional[TableVersion]:
|
534
520
|
"""
|
535
521
|
Gets table version metadata for the latest version of the specified table.
|
536
522
|
Returns None if no table version exists for the given table.
|
@@ -539,10 +525,8 @@ def get_latest_table_version(
|
|
539
525
|
|
540
526
|
|
541
527
|
def get_latest_active_table_version(
|
542
|
-
|
543
|
-
|
544
|
-
*args,
|
545
|
-
**kwargs) -> Optional[TableVersion]:
|
528
|
+
namespace: str, table_name: str, *args, **kwargs
|
529
|
+
) -> Optional[TableVersion]:
|
546
530
|
"""
|
547
531
|
Gets table version metadata for the latest active version of the specified
|
548
532
|
table. Returns None if no active table version exists for the given table.
|
@@ -551,11 +535,12 @@ def get_latest_active_table_version(
|
|
551
535
|
|
552
536
|
|
553
537
|
def get_table_version_column_names(
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
538
|
+
namespace: str,
|
539
|
+
table_name: str,
|
540
|
+
table_version: Optional[str] = None,
|
541
|
+
*args,
|
542
|
+
**kwargs
|
543
|
+
) -> Optional[List[str]]:
|
559
544
|
"""
|
560
545
|
Gets a list of column names for the specified table version, or for the
|
561
546
|
latest active table version if none is specified. The index of each
|
@@ -568,11 +553,12 @@ def get_table_version_column_names(
|
|
568
553
|
|
569
554
|
|
570
555
|
def get_table_version_schema(
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
556
|
+
namespace: str,
|
557
|
+
table_name: str,
|
558
|
+
table_version: Optional[str] = None,
|
559
|
+
*args,
|
560
|
+
**kwargs
|
561
|
+
) -> Optional[Union[pa.Schema, str, bytes]]:
|
576
562
|
"""
|
577
563
|
Gets the schema for the specified table version, or for the latest active
|
578
564
|
table version if none is specified. Returns None if the table version is
|
@@ -582,11 +568,8 @@ def get_table_version_schema(
|
|
582
568
|
|
583
569
|
|
584
570
|
def table_version_exists(
|
585
|
-
|
586
|
-
|
587
|
-
table_version: str,
|
588
|
-
*args,
|
589
|
-
**kwargs) -> bool:
|
571
|
+
namespace: str, table_name: str, table_version: str, *args, **kwargs
|
572
|
+
) -> bool:
|
590
573
|
"""
|
591
574
|
Returns True if the given table version exists, False if not.
|
592
575
|
"""
|