deltacat 0.1.10.dev0__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -15
- deltacat/aws/clients.py +12 -31
- deltacat/aws/constants.py +1 -1
- deltacat/aws/redshift/__init__.py +7 -2
- deltacat/aws/redshift/model/manifest.py +54 -50
- deltacat/aws/s3u.py +176 -187
- deltacat/catalog/delegate.py +151 -185
- deltacat/catalog/interface.py +78 -97
- deltacat/catalog/model/catalog.py +21 -21
- deltacat/catalog/model/table_definition.py +11 -9
- deltacat/compute/compactor/__init__.py +12 -16
- deltacat/compute/compactor/compaction_session.py +237 -166
- deltacat/compute/compactor/model/delta_annotated.py +60 -44
- deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
- deltacat/compute/compactor/model/delta_file_locator.py +10 -8
- deltacat/compute/compactor/model/materialize_result.py +6 -7
- deltacat/compute/compactor/model/primary_key_index.py +38 -34
- deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
- deltacat/compute/compactor/model/round_completion_info.py +25 -19
- deltacat/compute/compactor/model/sort_key.py +18 -15
- deltacat/compute/compactor/steps/dedupe.py +119 -94
- deltacat/compute/compactor/steps/hash_bucket.py +48 -47
- deltacat/compute/compactor/steps/materialize.py +86 -92
- deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
- deltacat/compute/compactor/steps/rehash/rewrite_index.py +5 -5
- deltacat/compute/compactor/utils/io.py +59 -47
- deltacat/compute/compactor/utils/primary_key_index.py +91 -80
- deltacat/compute/compactor/utils/round_completion_file.py +22 -23
- deltacat/compute/compactor/utils/system_columns.py +33 -45
- deltacat/compute/metastats/meta_stats.py +235 -157
- deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
- deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
- deltacat/compute/metastats/stats.py +95 -64
- deltacat/compute/metastats/utils/io.py +100 -53
- deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
- deltacat/compute/metastats/utils/ray_utils.py +38 -33
- deltacat/compute/stats/basic.py +107 -69
- deltacat/compute/stats/models/delta_column_stats.py +11 -8
- deltacat/compute/stats/models/delta_stats.py +59 -32
- deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
- deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
- deltacat/compute/stats/models/stats_result.py +24 -14
- deltacat/compute/stats/utils/intervals.py +16 -9
- deltacat/compute/stats/utils/io.py +86 -51
- deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
- deltacat/constants.py +4 -13
- deltacat/io/__init__.py +2 -2
- deltacat/io/aws/redshift/redshift_datasource.py +157 -143
- deltacat/io/dataset.py +14 -17
- deltacat/io/read_api.py +36 -33
- deltacat/logs.py +94 -42
- deltacat/storage/__init__.py +18 -8
- deltacat/storage/interface.py +196 -213
- deltacat/storage/model/delta.py +45 -51
- deltacat/storage/model/list_result.py +12 -8
- deltacat/storage/model/namespace.py +4 -5
- deltacat/storage/model/partition.py +42 -42
- deltacat/storage/model/stream.py +29 -30
- deltacat/storage/model/table.py +14 -14
- deltacat/storage/model/table_version.py +32 -31
- deltacat/storage/model/types.py +1 -0
- deltacat/tests/stats/test_intervals.py +11 -24
- deltacat/tests/utils/__init__.py +0 -0
- deltacat/tests/utils/test_record_batch_tables.py +284 -0
- deltacat/types/media.py +3 -4
- deltacat/types/tables.py +31 -21
- deltacat/utils/common.py +5 -11
- deltacat/utils/numpy.py +20 -22
- deltacat/utils/pandas.py +73 -100
- deltacat/utils/performance.py +3 -9
- deltacat/utils/placement.py +259 -230
- deltacat/utils/pyarrow.py +302 -89
- deltacat/utils/ray_utils/collections.py +2 -1
- deltacat/utils/ray_utils/concurrency.py +27 -28
- deltacat/utils/ray_utils/dataset.py +28 -28
- deltacat/utils/ray_utils/performance.py +5 -9
- deltacat/utils/ray_utils/runtime.py +9 -10
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/METADATA +1 -1
- deltacat-0.1.12.dist-info/RECORD +110 -0
- deltacat-0.1.10.dev0.dist-info/RECORD +0 -108
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/LICENSE +0 -0
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/WHEEL +0 -0
- {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/top_level.txt +0 -0
deltacat/catalog/delegate.py
CHANGED
@@ -1,11 +1,22 @@
|
|
1
|
-
import
|
1
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
2
2
|
|
3
|
+
import pyarrow as pa
|
3
4
|
import ray
|
4
5
|
|
5
|
-
from deltacat import
|
6
|
-
|
7
|
-
|
8
|
-
from
|
6
|
+
from deltacat.catalog.model.catalog import Catalog, all_catalogs
|
7
|
+
from deltacat.catalog.model.table_definition import TableDefinition
|
8
|
+
from deltacat.compute.compactor.model.sort_key import SortKey
|
9
|
+
from deltacat.storage.model.list_result import ListResult
|
10
|
+
from deltacat.storage.model.namespace import Namespace
|
11
|
+
from deltacat.storage.model.types import (
|
12
|
+
DistributedDataset,
|
13
|
+
LifecycleState,
|
14
|
+
LocalDataset,
|
15
|
+
LocalTable,
|
16
|
+
SchemaConsistencyType,
|
17
|
+
)
|
18
|
+
from deltacat.types.media import ContentType
|
19
|
+
from deltacat.types.tables import TableWriteMode
|
9
20
|
|
10
21
|
|
11
22
|
def _get_catalog(name: Optional[str] = None) -> Catalog:
|
@@ -13,27 +24,32 @@ def _get_catalog(name: Optional[str] = None) -> Catalog:
|
|
13
24
|
raise ValueError(
|
14
25
|
"No catalogs available! Call "
|
15
26
|
"`deltacat.init(catalogs={...})` to register one or more "
|
16
|
-
"catalogs then retry."
|
17
|
-
|
27
|
+
"catalogs then retry."
|
28
|
+
)
|
29
|
+
catalog = (
|
30
|
+
ray.get(all_catalogs.get.remote(name))
|
31
|
+
if name
|
18
32
|
else ray.get(all_catalogs.default.remote())
|
33
|
+
)
|
19
34
|
if not catalog:
|
20
35
|
available_catalogs = ray.get(all_catalogs.all.remote()).values()
|
21
36
|
raise ValueError(
|
22
|
-
f"Catalog '{name}' not found. Available catalogs: "
|
23
|
-
|
37
|
+
f"Catalog '{name}' not found. Available catalogs: " f"{available_catalogs}."
|
38
|
+
)
|
24
39
|
return catalog
|
25
40
|
|
26
41
|
|
27
42
|
# table functions
|
28
43
|
def write_to_table(
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
44
|
+
data: Union[LocalTable, LocalDataset, DistributedDataset],
|
45
|
+
table: str,
|
46
|
+
namespace: Optional[str] = None,
|
47
|
+
catalog: Optional[str] = None,
|
48
|
+
mode: TableWriteMode = TableWriteMode.AUTO,
|
49
|
+
content_type: ContentType = ContentType.PARQUET,
|
50
|
+
*args,
|
51
|
+
**kwargs,
|
52
|
+
) -> None:
|
37
53
|
"""Write local or distributed data to a table. Raises an error if the
|
38
54
|
table does not exist and the table write mode is not CREATE or AUTO.
|
39
55
|
|
@@ -42,42 +58,35 @@ def write_to_table(
|
|
42
58
|
an existing table, all `alter_table` parameters may be optionally specified
|
43
59
|
as additional keyword arguments."""
|
44
60
|
_get_catalog(catalog).impl.write_to_table(
|
45
|
-
data,
|
46
|
-
|
47
|
-
namespace,
|
48
|
-
mode,
|
49
|
-
content_type,
|
50
|
-
*args,
|
51
|
-
**kwargs)
|
61
|
+
data, table, namespace, mode, content_type, *args, **kwargs
|
62
|
+
)
|
52
63
|
|
53
64
|
|
54
65
|
def read_table(
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
66
|
+
table: str,
|
67
|
+
namespace: Optional[str] = None,
|
68
|
+
catalog: Optional[str] = None,
|
69
|
+
*args,
|
70
|
+
**kwargs,
|
71
|
+
) -> DistributedDataset:
|
60
72
|
"""Read a table into a distributed dataset."""
|
61
|
-
return _get_catalog(catalog).impl.read_table(
|
62
|
-
table,
|
63
|
-
namespace,
|
64
|
-
*args,
|
65
|
-
**kwargs)
|
73
|
+
return _get_catalog(catalog).impl.read_table(table, namespace, *args, **kwargs)
|
66
74
|
|
67
75
|
|
68
76
|
def alter_table(
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
77
|
+
table: str,
|
78
|
+
namespace: Optional[str] = None,
|
79
|
+
catalog: Optional[str] = None,
|
80
|
+
lifecycle_state: Optional[LifecycleState] = None,
|
81
|
+
schema_updates: Optional[Dict[str, Any]] = None,
|
82
|
+
partition_updates: Optional[Dict[str, Any]] = None,
|
83
|
+
primary_keys: Optional[Set[str]] = None,
|
84
|
+
sort_keys: Optional[List[SortKey]] = None,
|
85
|
+
description: Optional[str] = None,
|
86
|
+
properties: Optional[Dict[str, str]] = None,
|
87
|
+
*args,
|
88
|
+
**kwargs,
|
89
|
+
) -> None:
|
81
90
|
"""Alter table definition."""
|
82
91
|
_get_catalog(catalog).impl.alter_table(
|
83
92
|
table,
|
@@ -90,26 +99,28 @@ def alter_table(
|
|
90
99
|
description,
|
91
100
|
properties,
|
92
101
|
*args,
|
93
|
-
**kwargs
|
102
|
+
**kwargs,
|
103
|
+
)
|
94
104
|
|
95
105
|
|
96
106
|
def create_table(
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
107
|
+
table: str,
|
108
|
+
namespace: Optional[str] = None,
|
109
|
+
catalog: Optional[str] = None,
|
110
|
+
lifecycle_state: Optional[LifecycleState] = None,
|
111
|
+
schema: Optional[Union[pa.Schema, str, bytes]] = None,
|
112
|
+
schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
|
113
|
+
partition_keys: Optional[List[Dict[str, Any]]] = None,
|
114
|
+
primary_keys: Optional[Set[str]] = None,
|
115
|
+
sort_keys: Optional[List[SortKey]] = None,
|
116
|
+
description: Optional[str] = None,
|
117
|
+
properties: Optional[Dict[str, str]] = None,
|
118
|
+
permissions: Optional[Dict[str, Any]] = None,
|
119
|
+
content_types: Optional[List[ContentType]] = None,
|
120
|
+
replace_existing_table: bool = False,
|
121
|
+
*args,
|
122
|
+
**kwargs,
|
123
|
+
) -> TableDefinition:
|
113
124
|
"""Create an empty table. Raises an error if the table already exists and
|
114
125
|
`replace_existing_table` is False."""
|
115
126
|
return _get_catalog(catalog).impl.create_table(
|
@@ -127,190 +138,145 @@ def create_table(
|
|
127
138
|
content_types,
|
128
139
|
replace_existing_table,
|
129
140
|
*args,
|
130
|
-
**kwargs
|
141
|
+
**kwargs,
|
142
|
+
)
|
131
143
|
|
132
144
|
|
133
145
|
def drop_table(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
146
|
+
table: str,
|
147
|
+
namespace: Optional[str] = None,
|
148
|
+
catalog: Optional[str] = None,
|
149
|
+
purge: bool = False,
|
150
|
+
*args,
|
151
|
+
**kwargs,
|
152
|
+
) -> None:
|
140
153
|
"""Drop a table from the catalog and optionally purge it. Raises an error
|
141
154
|
if the table does not exist."""
|
142
|
-
_get_catalog(catalog).impl.drop_table(
|
143
|
-
table,
|
144
|
-
namespace,
|
145
|
-
purge,
|
146
|
-
*args,
|
147
|
-
**kwargs)
|
155
|
+
_get_catalog(catalog).impl.drop_table(table, namespace, purge, *args, **kwargs)
|
148
156
|
|
149
157
|
|
150
158
|
def refresh_table(
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
159
|
+
table: str,
|
160
|
+
namespace: Optional[str] = None,
|
161
|
+
catalog: Optional[str] = None,
|
162
|
+
*args,
|
163
|
+
**kwargs,
|
164
|
+
) -> None:
|
156
165
|
"""Refresh metadata cached on the Ray cluster for the given table."""
|
157
|
-
_get_catalog(catalog).impl.refresh_table(
|
158
|
-
table,
|
159
|
-
namespace,
|
160
|
-
*args,
|
161
|
-
**kwargs)
|
166
|
+
_get_catalog(catalog).impl.refresh_table(table, namespace, *args, **kwargs)
|
162
167
|
|
163
168
|
|
164
169
|
def list_tables(
|
165
|
-
|
166
|
-
|
167
|
-
*args,
|
168
|
-
**kwargs) -> ListResult[TableDefinition]:
|
170
|
+
namespace: Optional[str] = None, catalog: Optional[str] = None, *args, **kwargs
|
171
|
+
) -> ListResult[TableDefinition]:
|
169
172
|
"""List a page of table definitions. Raises an error if the given namespace
|
170
173
|
does not exist."""
|
171
|
-
return _get_catalog(catalog).impl.list_tables(
|
172
|
-
namespace,
|
173
|
-
*args,
|
174
|
-
**kwargs)
|
174
|
+
return _get_catalog(catalog).impl.list_tables(namespace, *args, **kwargs)
|
175
175
|
|
176
176
|
|
177
177
|
def get_table(
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
178
|
+
table: str,
|
179
|
+
namespace: Optional[str] = None,
|
180
|
+
catalog: Optional[str] = None,
|
181
|
+
*args,
|
182
|
+
**kwargs,
|
183
|
+
) -> Optional[TableDefinition]:
|
183
184
|
"""Get table definition metadata. Returns None if the given table does not
|
184
185
|
exist."""
|
185
|
-
return _get_catalog(catalog).impl.get_table(
|
186
|
-
table,
|
187
|
-
namespace,
|
188
|
-
*args,
|
189
|
-
**kwargs)
|
186
|
+
return _get_catalog(catalog).impl.get_table(table, namespace, *args, **kwargs)
|
190
187
|
|
191
188
|
|
192
189
|
def truncate_table(
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
190
|
+
table: str,
|
191
|
+
namespace: Optional[str] = None,
|
192
|
+
catalog: Optional[str] = None,
|
193
|
+
*args,
|
194
|
+
**kwargs,
|
195
|
+
) -> None:
|
198
196
|
"""Truncate table data. Raises an error if the table does not exist."""
|
199
|
-
_get_catalog(catalog).impl.truncate_table(
|
200
|
-
table,
|
201
|
-
namespace,
|
202
|
-
*args,
|
203
|
-
**kwargs)
|
197
|
+
_get_catalog(catalog).impl.truncate_table(table, namespace, *args, **kwargs)
|
204
198
|
|
205
199
|
|
206
200
|
def rename_table(
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
201
|
+
table: str,
|
202
|
+
new_name: str,
|
203
|
+
namespace: Optional[str] = None,
|
204
|
+
catalog: Optional[str] = None,
|
205
|
+
*args,
|
206
|
+
**kwargs,
|
207
|
+
) -> None:
|
213
208
|
"""Rename a table."""
|
214
|
-
_get_catalog(catalog).impl.rename_table(
|
215
|
-
table,
|
216
|
-
new_name,
|
217
|
-
namespace,
|
218
|
-
*args,
|
219
|
-
**kwargs)
|
209
|
+
_get_catalog(catalog).impl.rename_table(table, new_name, namespace, *args, **kwargs)
|
220
210
|
|
221
211
|
|
222
212
|
def table_exists(
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
213
|
+
table: str,
|
214
|
+
namespace: Optional[str] = None,
|
215
|
+
catalog: Optional[str] = None,
|
216
|
+
*args,
|
217
|
+
**kwargs,
|
218
|
+
) -> bool:
|
228
219
|
"""Returns True if the given table exists, False if not."""
|
229
|
-
return _get_catalog(catalog).impl.table_exists(
|
230
|
-
table,
|
231
|
-
namespace,
|
232
|
-
*args,
|
233
|
-
**kwargs)
|
220
|
+
return _get_catalog(catalog).impl.table_exists(table, namespace, *args, **kwargs)
|
234
221
|
|
235
222
|
|
236
223
|
# namespace functions
|
237
224
|
def list_namespaces(
|
238
|
-
|
239
|
-
|
240
|
-
**kwargs) -> ListResult[Namespace]:
|
225
|
+
catalog: Optional[str] = None, *args, **kwargs
|
226
|
+
) -> ListResult[Namespace]:
|
241
227
|
"""List a page of table namespaces."""
|
242
228
|
return _get_catalog(catalog).impl.list_namespaces(*args, **kwargs)
|
243
229
|
|
244
230
|
|
245
231
|
def get_namespace(
|
246
|
-
|
247
|
-
|
248
|
-
*args,
|
249
|
-
**kwargs) -> Optional[Namespace]:
|
232
|
+
namespace: str, catalog: Optional[str] = None, *args, **kwargs
|
233
|
+
) -> Optional[Namespace]:
|
250
234
|
"""Get table namespace metadata for the specified table namespace. Returns
|
251
235
|
None if the given namespace does not exist."""
|
252
|
-
return _get_catalog(catalog).impl.get_namespace(
|
253
|
-
namespace,
|
254
|
-
*args,
|
255
|
-
**kwargs)
|
236
|
+
return _get_catalog(catalog).impl.get_namespace(namespace, *args, **kwargs)
|
256
237
|
|
257
238
|
|
258
239
|
def namespace_exists(
|
259
|
-
|
260
|
-
|
261
|
-
*args,
|
262
|
-
**kwargs) -> bool:
|
240
|
+
namespace: str, catalog: Optional[str] = None, *args, **kwargs
|
241
|
+
) -> bool:
|
263
242
|
"""Returns True if the given table namespace exists, False if not."""
|
264
|
-
return _get_catalog(catalog).impl.namespace_exists(
|
265
|
-
namespace,
|
266
|
-
*args,
|
267
|
-
**kwargs)
|
243
|
+
return _get_catalog(catalog).impl.namespace_exists(namespace, *args, **kwargs)
|
268
244
|
|
269
245
|
|
270
246
|
def create_namespace(
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
247
|
+
namespace: str,
|
248
|
+
permissions: Dict[str, Any],
|
249
|
+
catalog: Optional[str] = None,
|
250
|
+
*args,
|
251
|
+
**kwargs,
|
252
|
+
) -> Namespace:
|
276
253
|
"""Creates a table namespace with the given name and permissions. Returns
|
277
254
|
the created namespace. Raises an error if the namespace already exists."""
|
278
255
|
return _get_catalog(catalog).impl.create_namespace(
|
279
|
-
namespace,
|
280
|
-
|
281
|
-
*args,
|
282
|
-
**kwargs)
|
256
|
+
namespace, permissions, *args, **kwargs
|
257
|
+
)
|
283
258
|
|
284
259
|
|
285
260
|
def alter_namespace(
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
261
|
+
namespace: str,
|
262
|
+
catalog: Optional[str] = None,
|
263
|
+
permissions: Optional[Dict[str, Any]] = None,
|
264
|
+
new_namespace: Optional[str] = None,
|
265
|
+
*args,
|
266
|
+
**kwargs,
|
267
|
+
) -> None:
|
292
268
|
"""Alter table namespace definition."""
|
293
269
|
_get_catalog(catalog).impl.alter_namespace(
|
294
|
-
namespace,
|
295
|
-
|
296
|
-
new_namespace,
|
297
|
-
*args,
|
298
|
-
**kwargs)
|
270
|
+
namespace, permissions, new_namespace, *args, **kwargs
|
271
|
+
)
|
299
272
|
|
300
273
|
|
301
274
|
def drop_namespace(
|
302
|
-
|
303
|
-
|
304
|
-
purge: bool = False,
|
305
|
-
*args,
|
306
|
-
**kwargs) -> None:
|
275
|
+
namespace: str, catalog: Optional[str] = None, purge: bool = False, *args, **kwargs
|
276
|
+
) -> None:
|
307
277
|
"""Drop the given namespace and all of its tables from the catalog,
|
308
278
|
optionally purging them."""
|
309
|
-
_get_catalog(catalog).impl.drop_namespace(
|
310
|
-
namespace,
|
311
|
-
purge,
|
312
|
-
*args,
|
313
|
-
**kwargs)
|
279
|
+
_get_catalog(catalog).impl.drop_namespace(namespace, purge, *args, **kwargs)
|
314
280
|
|
315
281
|
|
316
282
|
def default_namespace(catalog: Optional[str] = None) -> str:
|