deltacat 1.1.35__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +2 -3
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -1
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -5
- deltacat/compute/compactor_v2/steps/merge.py +11 -80
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.35.dist-info/METADATA +0 -64
- deltacat-1.1.35.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
deltacat/storage/model/table.py
CHANGED
@@ -1,25 +1,45 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
|
4
|
+
import posixpath
|
5
|
+
from typing import Any, Dict, Optional, List
|
5
6
|
|
6
|
-
|
7
|
-
from deltacat.storage.model.namespace import NamespaceLocator
|
7
|
+
import pyarrow
|
8
8
|
|
9
|
+
from deltacat.storage.model.locator import Locator, LocatorName
|
10
|
+
from deltacat.storage.model.namespace import (
|
11
|
+
NamespaceLocator,
|
12
|
+
Namespace,
|
13
|
+
)
|
14
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
15
|
+
from deltacat.constants import TXN_DIR_NAME
|
16
|
+
|
17
|
+
TableProperties = dict[str, Any]
|
18
|
+
|
19
|
+
|
20
|
+
class Table(Metafile):
|
21
|
+
"""
|
22
|
+
Tables store properties common to every table version including the
|
23
|
+
table's name, a high-level description of all table versions, and
|
24
|
+
properties shared by all table versions.
|
25
|
+
"""
|
9
26
|
|
10
|
-
class Table(dict):
|
11
27
|
@staticmethod
|
12
28
|
def of(
|
13
29
|
locator: Optional[TableLocator],
|
14
|
-
permissions: Optional[Dict[str, Any]] = None,
|
15
30
|
description: Optional[str] = None,
|
16
|
-
properties: Optional[
|
31
|
+
properties: Optional[TableProperties] = None,
|
32
|
+
latest_active_table_version: Optional[str] = None,
|
33
|
+
latest_table_version: Optional[str] = None,
|
34
|
+
native_object: Optional[Any] = None,
|
17
35
|
) -> Table:
|
18
36
|
table = Table()
|
19
37
|
table.locator = locator
|
20
|
-
table.permissions = permissions
|
21
38
|
table.description = description
|
22
39
|
table.properties = properties
|
40
|
+
table.latest_active_table_version = latest_active_table_version
|
41
|
+
table.latest_table_version = latest_table_version
|
42
|
+
table.native_object = native_object
|
23
43
|
return table
|
24
44
|
|
25
45
|
@property
|
@@ -33,14 +53,6 @@ class Table(dict):
|
|
33
53
|
def locator(self, table_locator: Optional[TableLocator]) -> None:
|
34
54
|
self["tableLocator"] = table_locator
|
35
55
|
|
36
|
-
@property
|
37
|
-
def permissions(self) -> Optional[Dict[str, Any]]:
|
38
|
-
return self.get("permissions")
|
39
|
-
|
40
|
-
@permissions.setter
|
41
|
-
def permissions(self, permissions: Optional[Dict[str, Any]]) -> None:
|
42
|
-
self["permissions"] = permissions
|
43
|
-
|
44
56
|
@property
|
45
57
|
def description(self) -> Optional[str]:
|
46
58
|
return self.get("description")
|
@@ -50,13 +62,43 @@ class Table(dict):
|
|
50
62
|
self["description"] = description
|
51
63
|
|
52
64
|
@property
|
53
|
-
def properties(self) -> Optional[
|
65
|
+
def properties(self) -> Optional[TableProperties]:
|
54
66
|
return self.get("properties")
|
55
67
|
|
56
68
|
@properties.setter
|
57
|
-
def properties(self, properties: Optional[
|
69
|
+
def properties(self, properties: Optional[TableProperties]) -> None:
|
58
70
|
self["properties"] = properties
|
59
71
|
|
72
|
+
@property
|
73
|
+
def latest_active_table_version(self) -> Optional[str]:
|
74
|
+
return self.get("latest_active_table_version")
|
75
|
+
|
76
|
+
@latest_active_table_version.setter
|
77
|
+
def latest_active_table_version(
|
78
|
+
self,
|
79
|
+
latest_active_table_version: Optional[str],
|
80
|
+
) -> None:
|
81
|
+
self["latest_active_table_version"] = latest_active_table_version
|
82
|
+
|
83
|
+
@property
|
84
|
+
def latest_table_version(self) -> Optional[str]:
|
85
|
+
return self.get("latest_table_version")
|
86
|
+
|
87
|
+
@latest_table_version.setter
|
88
|
+
def latest_table_version(
|
89
|
+
self,
|
90
|
+
latest_table_version: Optional[str],
|
91
|
+
) -> None:
|
92
|
+
self["latest_table_version"] = latest_table_version
|
93
|
+
|
94
|
+
@property
|
95
|
+
def native_object(self) -> Optional[Any]:
|
96
|
+
return self.get("nativeObject")
|
97
|
+
|
98
|
+
@native_object.setter
|
99
|
+
def native_object(self, native_object: Optional[Any]) -> None:
|
100
|
+
self["nativeObject"] = native_object
|
101
|
+
|
60
102
|
@property
|
61
103
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
62
104
|
table_locator = self.locator
|
@@ -78,6 +120,60 @@ class Table(dict):
|
|
78
120
|
return table_locator.table_name
|
79
121
|
return None
|
80
122
|
|
123
|
+
@table_name.setter
|
124
|
+
def table_name(self, table_name: Optional[str]) -> None:
|
125
|
+
table_locator = self.locator
|
126
|
+
if table_locator:
|
127
|
+
table_locator.table_name = table_name
|
128
|
+
|
129
|
+
def to_serializable(self) -> Table:
|
130
|
+
serializable = self
|
131
|
+
if serializable.namespace_locator:
|
132
|
+
serializable: Table = Table.update_for(self)
|
133
|
+
# remove the mutable namespace locator
|
134
|
+
serializable.locator.namespace_locator = NamespaceLocator.of(self.id)
|
135
|
+
return serializable
|
136
|
+
|
137
|
+
def from_serializable(
|
138
|
+
self,
|
139
|
+
path: str,
|
140
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
141
|
+
) -> Table:
|
142
|
+
# restore the namespace locator from its mapped immutable metafile ID
|
143
|
+
if self.namespace_locator and self.namespace_locator.namespace == self.id:
|
144
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
145
|
+
base_metafile_path=path,
|
146
|
+
parent_number=1,
|
147
|
+
)
|
148
|
+
txn_log_dir = posixpath.join(
|
149
|
+
posixpath.dirname(
|
150
|
+
posixpath.dirname(parent_rev_dir_path),
|
151
|
+
),
|
152
|
+
TXN_DIR_NAME,
|
153
|
+
)
|
154
|
+
namespace = Namespace.read(
|
155
|
+
MetafileRevisionInfo.latest_revision(
|
156
|
+
revision_dir_path=parent_rev_dir_path,
|
157
|
+
filesystem=filesystem,
|
158
|
+
success_txn_log_dir=txn_log_dir,
|
159
|
+
).path,
|
160
|
+
filesystem,
|
161
|
+
)
|
162
|
+
self.locator.namespace_locator = namespace.locator
|
163
|
+
return self
|
164
|
+
|
165
|
+
|
166
|
+
class TableLocatorName(LocatorName):
|
167
|
+
def __init__(self, locator: TableLocator):
|
168
|
+
self.locator = locator
|
169
|
+
|
170
|
+
@property
|
171
|
+
def immutable_id(self) -> Optional[str]:
|
172
|
+
return None
|
173
|
+
|
174
|
+
def parts(self) -> List[str]:
|
175
|
+
return [self.locator.table_name]
|
176
|
+
|
81
177
|
|
82
178
|
class TableLocator(Locator, dict):
|
83
179
|
@staticmethod
|
@@ -91,11 +187,19 @@ class TableLocator(Locator, dict):
|
|
91
187
|
|
92
188
|
@staticmethod
|
93
189
|
def at(namespace: Optional[str], table_name: Optional[str]) -> TableLocator:
|
94
|
-
namespace_locator = NamespaceLocator.of(namespace)
|
190
|
+
namespace_locator = NamespaceLocator.of(namespace) if namespace else None
|
95
191
|
return TableLocator.of(namespace_locator, table_name)
|
96
192
|
|
97
193
|
@property
|
98
|
-
def
|
194
|
+
def name(self) -> TableLocatorName:
|
195
|
+
return TableLocatorName(self)
|
196
|
+
|
197
|
+
@property
|
198
|
+
def parent(self) -> Optional[NamespaceLocator]:
|
199
|
+
return self.namespace_locator
|
200
|
+
|
201
|
+
@property
|
202
|
+
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
99
203
|
val: Dict[str, Any] = self.get("namespaceLocator")
|
100
204
|
if val is not None and not isinstance(val, NamespaceLocator):
|
101
205
|
self.namespace_locator = val = NamespaceLocator(val)
|
@@ -119,13 +223,3 @@ class TableLocator(Locator, dict):
|
|
119
223
|
if namespace_locator:
|
120
224
|
return namespace_locator.namespace
|
121
225
|
return None
|
122
|
-
|
123
|
-
def canonical_string(self) -> str:
|
124
|
-
"""
|
125
|
-
Returns a unique string for the given locator that can be used
|
126
|
-
for equality checks (i.e. two locators are equal if they have
|
127
|
-
the same canonical string).
|
128
|
-
"""
|
129
|
-
nl_hexdigest = self.namespace_locator.hexdigest()
|
130
|
-
table_name = self.table_name
|
131
|
-
return f"{nl_hexdigest}|{table_name}"
|
@@ -1,38 +1,76 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
|
4
|
+
import base64
|
5
|
+
import re
|
6
|
+
import posixpath
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple
|
5
8
|
|
9
|
+
import pyarrow
|
6
10
|
import pyarrow as pa
|
7
11
|
|
8
|
-
|
12
|
+
import deltacat.storage.model.partition as partition
|
13
|
+
|
14
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
15
|
+
from deltacat.constants import (
|
16
|
+
METAFILE_FORMAT,
|
17
|
+
METAFILE_FORMAT_JSON,
|
18
|
+
TXN_DIR_NAME,
|
19
|
+
BYTES_PER_KIBIBYTE,
|
20
|
+
)
|
21
|
+
from deltacat.storage.model.schema import (
|
22
|
+
Schema,
|
23
|
+
SchemaList,
|
24
|
+
)
|
25
|
+
from deltacat.storage.model.locator import (
|
26
|
+
Locator,
|
27
|
+
LocatorName,
|
28
|
+
)
|
9
29
|
from deltacat.storage.model.namespace import NamespaceLocator
|
10
|
-
from deltacat.storage.model.table import
|
30
|
+
from deltacat.storage.model.table import (
|
31
|
+
TableLocator,
|
32
|
+
Table,
|
33
|
+
)
|
11
34
|
from deltacat.types.media import ContentType
|
12
|
-
from deltacat.storage.model.sort_key import
|
35
|
+
from deltacat.storage.model.sort_key import SortScheme, SortSchemeList
|
36
|
+
from deltacat.storage.model.types import LifecycleState
|
13
37
|
|
38
|
+
TableVersionProperties = Dict[str, Any]
|
14
39
|
|
15
|
-
|
40
|
+
|
41
|
+
class TableVersion(Metafile):
|
16
42
|
@staticmethod
|
17
43
|
def of(
|
18
44
|
locator: Optional[TableVersionLocator],
|
19
|
-
schema: Optional[
|
20
|
-
|
21
|
-
primary_key_columns: Optional[List[str]] = None,
|
45
|
+
schema: Optional[Schema],
|
46
|
+
partition_scheme: Optional[partition.PartitionScheme] = None,
|
22
47
|
description: Optional[str] = None,
|
23
|
-
properties: Optional[
|
48
|
+
properties: Optional[TableVersionProperties] = None,
|
24
49
|
content_types: Optional[List[ContentType]] = None,
|
25
|
-
|
50
|
+
sort_scheme: Optional[SortScheme] = None,
|
51
|
+
watermark: Optional[int] = None,
|
52
|
+
lifecycle_state: Optional[LifecycleState] = None,
|
53
|
+
schemas: Optional[SchemaList] = None,
|
54
|
+
partition_schemes: Optional[partition.PartitionSchemeList] = None,
|
55
|
+
sort_schemes: Optional[SortSchemeList] = None,
|
56
|
+
previous_table_version: Optional[str] = None,
|
57
|
+
native_object: Optional[Any] = None,
|
26
58
|
) -> TableVersion:
|
27
59
|
table_version = TableVersion()
|
28
60
|
table_version.locator = locator
|
29
61
|
table_version.schema = schema
|
30
|
-
table_version.
|
31
|
-
table_version.primary_keys = primary_key_columns
|
62
|
+
table_version.partition_scheme = partition_scheme
|
32
63
|
table_version.description = description
|
33
64
|
table_version.properties = properties
|
34
65
|
table_version.content_types = content_types
|
35
|
-
table_version.
|
66
|
+
table_version.sort_scheme = sort_scheme
|
67
|
+
table_version.watermark = watermark
|
68
|
+
table_version.state = lifecycle_state
|
69
|
+
table_version.schemas = schemas
|
70
|
+
table_version.partition_schemes = partition_schemes
|
71
|
+
table_version.sort_schemes = sort_schemes
|
72
|
+
table_version.previous_table_version = previous_table_version
|
73
|
+
table_version.native_object = native_object
|
36
74
|
return table_version
|
37
75
|
|
38
76
|
@property
|
@@ -47,36 +85,91 @@ class TableVersion(dict):
|
|
47
85
|
self["tableVersionLocator"] = table_version_locator
|
48
86
|
|
49
87
|
@property
|
50
|
-
def schema(self) -> Optional[
|
51
|
-
|
88
|
+
def schema(self) -> Optional[Schema]:
|
89
|
+
val: Dict[str, Any] = self.get("schema")
|
90
|
+
if val is not None and not isinstance(val, Schema):
|
91
|
+
self.schema = val = Schema(val)
|
92
|
+
return val
|
52
93
|
|
53
94
|
@schema.setter
|
54
|
-
def schema(self, schema: Optional[
|
95
|
+
def schema(self, schema: Optional[Schema]) -> None:
|
55
96
|
self["schema"] = schema
|
56
97
|
|
57
98
|
@property
|
58
|
-
def
|
59
|
-
|
99
|
+
def schemas(self) -> Optional[SchemaList]:
|
100
|
+
val: Optional[SchemaList] = self.get("schemas")
|
101
|
+
if val is not None and not isinstance(val, SchemaList):
|
102
|
+
self["schemas"] = val = SchemaList.of(val)
|
103
|
+
return val
|
104
|
+
|
105
|
+
@schemas.setter
|
106
|
+
def schemas(self, schemas: Optional[SchemaList]) -> None:
|
107
|
+
self["schemas"] = schemas
|
108
|
+
|
109
|
+
@property
|
110
|
+
def sort_scheme(self) -> Optional[SortScheme]:
|
111
|
+
val: Dict[str, Any] = self.get("sortScheme")
|
112
|
+
if val is not None and not isinstance(val, SortScheme):
|
113
|
+
self["sortScheme"] = val = SortScheme(val)
|
114
|
+
return val
|
60
115
|
|
61
|
-
@
|
62
|
-
def
|
63
|
-
self["
|
116
|
+
@sort_scheme.setter
|
117
|
+
def sort_scheme(self, sort_scheme: Optional[SortScheme]) -> None:
|
118
|
+
self["sortScheme"] = sort_scheme
|
64
119
|
|
65
120
|
@property
|
66
|
-
def
|
67
|
-
|
121
|
+
def sort_schemes(self) -> Optional[SortSchemeList]:
|
122
|
+
val: Dict[str, Any] = self.get("sortSchemes")
|
123
|
+
if val is not None and not isinstance(val, SortSchemeList):
|
124
|
+
self["sortSchemes"] = val = SortSchemeList.of(val)
|
125
|
+
return val
|
68
126
|
|
69
|
-
@
|
70
|
-
def
|
71
|
-
self["
|
127
|
+
@sort_schemes.setter
|
128
|
+
def sort_schemes(self, sort_schemes: Optional[SortSchemeList]) -> None:
|
129
|
+
self["sortSchemes"] = sort_schemes
|
72
130
|
|
73
131
|
@property
|
74
|
-
def
|
75
|
-
return self.get("
|
132
|
+
def watermark(self) -> Optional[int]:
|
133
|
+
return self.get("watermark")
|
134
|
+
|
135
|
+
@watermark.setter
|
136
|
+
def watermark(self, watermark: Optional[int]) -> None:
|
137
|
+
self["watermark"] = watermark
|
138
|
+
|
139
|
+
@property
|
140
|
+
def state(self) -> Optional[LifecycleState]:
|
141
|
+
state = self.get("state")
|
142
|
+
return None if state is None else LifecycleState(state)
|
143
|
+
|
144
|
+
@state.setter
|
145
|
+
def state(self, state: Optional[LifecycleState]) -> None:
|
146
|
+
self["state"] = state
|
147
|
+
|
148
|
+
@property
|
149
|
+
def partition_scheme(self) -> Optional[partition.PartitionScheme]:
|
150
|
+
val: Dict[str, Any] = self.get("partitionScheme")
|
151
|
+
if val is not None and not isinstance(val, partition.PartitionScheme):
|
152
|
+
self["partitionScheme"] = val = partition.PartitionScheme(val)
|
153
|
+
return val
|
76
154
|
|
77
|
-
@
|
78
|
-
def
|
79
|
-
self[
|
155
|
+
@partition_scheme.setter
|
156
|
+
def partition_scheme(
|
157
|
+
self, partition_scheme: Optional[partition.PartitionScheme]
|
158
|
+
) -> None:
|
159
|
+
self["partitionScheme"] = partition_scheme
|
160
|
+
|
161
|
+
@property
|
162
|
+
def partition_schemes(self) -> Optional[partition.PartitionSchemeList]:
|
163
|
+
val: Dict[str, Any] = self.get("partitionSchemes")
|
164
|
+
if val is not None and not isinstance(val, partition.PartitionSchemeList):
|
165
|
+
self["partitionSchemes"] = val = partition.PartitionSchemeList.of(val)
|
166
|
+
return val
|
167
|
+
|
168
|
+
@partition_schemes.setter
|
169
|
+
def partition_schemes(
|
170
|
+
self, partition_schemes: Optional[partition.PartitionSchemeList]
|
171
|
+
) -> None:
|
172
|
+
self["partitionSchemes"] = partition_schemes
|
80
173
|
|
81
174
|
@property
|
82
175
|
def description(self) -> Optional[str]:
|
@@ -87,11 +180,19 @@ class TableVersion(dict):
|
|
87
180
|
self["description"] = description
|
88
181
|
|
89
182
|
@property
|
90
|
-
def
|
183
|
+
def previous_table_version(self) -> Optional[str]:
|
184
|
+
return self.get("previous_table_version")
|
185
|
+
|
186
|
+
@previous_table_version.setter
|
187
|
+
def previous_table_version(self, previous_table_version: Optional[str]) -> None:
|
188
|
+
self["previous_table_version"] = previous_table_version
|
189
|
+
|
190
|
+
@property
|
191
|
+
def properties(self) -> Optional[TableVersionProperties]:
|
91
192
|
return self.get("properties")
|
92
193
|
|
93
194
|
@properties.setter
|
94
|
-
def properties(self, properties: Optional[
|
195
|
+
def properties(self, properties: Optional[TableVersionProperties]) -> None:
|
95
196
|
self["properties"] = properties
|
96
197
|
|
97
198
|
@property
|
@@ -107,6 +208,14 @@ class TableVersion(dict):
|
|
107
208
|
def content_types(self, content_types: Optional[List[ContentType]]) -> None:
|
108
209
|
self["contentTypes"] = content_types
|
109
210
|
|
211
|
+
@property
|
212
|
+
def native_object(self) -> Optional[Any]:
|
213
|
+
return self.get("nativeObject")
|
214
|
+
|
215
|
+
@native_object.setter
|
216
|
+
def native_object(self, native_object: Optional[Any]) -> None:
|
217
|
+
self["nativeObject"] = native_object
|
218
|
+
|
110
219
|
@property
|
111
220
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
112
221
|
table_version_locator = self.locator
|
@@ -148,11 +257,174 @@ class TableVersion(dict):
|
|
148
257
|
content_type in supported_content_types
|
149
258
|
)
|
150
259
|
|
260
|
+
def to_serializable(self) -> TableVersion:
|
261
|
+
serializable: TableVersion = TableVersion.update_for(self)
|
262
|
+
if serializable.schema:
|
263
|
+
schema_bytes = serializable.schema.serialize().to_pybytes()
|
264
|
+
serializable.schema = (
|
265
|
+
base64.b64encode(schema_bytes).decode("utf-8")
|
266
|
+
if METAFILE_FORMAT == METAFILE_FORMAT_JSON
|
267
|
+
else schema_bytes
|
268
|
+
)
|
269
|
+
|
270
|
+
if serializable.schemas:
|
271
|
+
serializable.schemas = [
|
272
|
+
base64.b64encode(schema.serialize().to_pybytes()).decode("utf-8")
|
273
|
+
if METAFILE_FORMAT == METAFILE_FORMAT_JSON
|
274
|
+
else schema.serialize().to_pybytes()
|
275
|
+
for schema in serializable.schemas
|
276
|
+
]
|
277
|
+
if serializable.table_locator:
|
278
|
+
# remove the mutable table locator
|
279
|
+
serializable.locator.table_locator = TableLocator.at(
|
280
|
+
namespace=self.id,
|
281
|
+
table_name=self.id,
|
282
|
+
)
|
283
|
+
return serializable
|
284
|
+
|
285
|
+
def from_serializable(
|
286
|
+
self,
|
287
|
+
path: str,
|
288
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
289
|
+
) -> TableVersion:
|
290
|
+
if self.get("schema"):
|
291
|
+
schema_data = self["schema"]
|
292
|
+
schema_bytes = (
|
293
|
+
base64.b64decode(schema_data)
|
294
|
+
if METAFILE_FORMAT == "json"
|
295
|
+
else schema_data
|
296
|
+
)
|
297
|
+
self["schema"] = Schema.deserialize(pa.py_buffer(schema_bytes))
|
298
|
+
else:
|
299
|
+
self["schema"] = None
|
300
|
+
|
301
|
+
if self.get("schemas"):
|
302
|
+
self.schemas = [
|
303
|
+
Schema.deserialize(
|
304
|
+
pa.py_buffer(
|
305
|
+
base64.b64decode(schema)
|
306
|
+
if METAFILE_FORMAT == METAFILE_FORMAT_JSON
|
307
|
+
else schema
|
308
|
+
)
|
309
|
+
)
|
310
|
+
for schema in self["schemas"]
|
311
|
+
]
|
312
|
+
else:
|
313
|
+
self.schemas = None
|
314
|
+
|
315
|
+
if self.sort_scheme:
|
316
|
+
# force list-to-tuple conversion of sort keys via property invocation
|
317
|
+
self.sort_scheme.keys
|
318
|
+
[sort_scheme.keys for sort_scheme in self.sort_schemes]
|
319
|
+
# restore the table locator from its mapped immutable metafile ID
|
320
|
+
if self.table_locator and self.table_locator.table_name == self.id:
|
321
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
322
|
+
base_metafile_path=path,
|
323
|
+
parent_number=1,
|
324
|
+
)
|
325
|
+
txn_log_dir = posixpath.join(
|
326
|
+
posixpath.dirname(
|
327
|
+
posixpath.dirname(
|
328
|
+
posixpath.dirname(parent_rev_dir_path),
|
329
|
+
)
|
330
|
+
),
|
331
|
+
TXN_DIR_NAME,
|
332
|
+
)
|
333
|
+
table = Table.read(
|
334
|
+
MetafileRevisionInfo.latest_revision(
|
335
|
+
revision_dir_path=parent_rev_dir_path,
|
336
|
+
filesystem=filesystem,
|
337
|
+
success_txn_log_dir=txn_log_dir,
|
338
|
+
).path,
|
339
|
+
filesystem,
|
340
|
+
)
|
341
|
+
self.locator.table_locator = table.locator
|
342
|
+
return self
|
343
|
+
|
344
|
+
def current_version_number(self) -> Optional[int]:
|
345
|
+
"""
|
346
|
+
Returns the current table version number as an integer, or None if
|
347
|
+
a table version has not yet been assigned.
|
348
|
+
"""
|
349
|
+
prefix, version_number = (
|
350
|
+
TableVersion.parse_table_version(
|
351
|
+
self.table_version,
|
352
|
+
)
|
353
|
+
if self.table_version is not None
|
354
|
+
else (None, None)
|
355
|
+
)
|
356
|
+
return int(version_number) if version_number is not None else None
|
357
|
+
|
358
|
+
@staticmethod
|
359
|
+
def next_version(previous_version: Optional[str] = None) -> str:
|
360
|
+
"""
|
361
|
+
Assigns the next table version string given the previous table version
|
362
|
+
by incrementing the version number of the given previous table version
|
363
|
+
identifier. Returns "1" if the previous version is undefined.
|
364
|
+
"""
|
365
|
+
prefix, previous_version_number = (
|
366
|
+
TableVersion.parse_table_version(
|
367
|
+
previous_version,
|
368
|
+
)
|
369
|
+
if previous_version is not None
|
370
|
+
else (None, None)
|
371
|
+
)
|
372
|
+
new_version_number = (
|
373
|
+
int(previous_version_number) + 1
|
374
|
+
if previous_version_number is not None
|
375
|
+
else 1
|
376
|
+
)
|
377
|
+
new_prefix = prefix if prefix is not None else ""
|
378
|
+
return f"{new_prefix}{new_version_number}"
|
379
|
+
|
380
|
+
@staticmethod
|
381
|
+
def parse_table_version(table_version: str) -> Tuple[Optional[str], int]:
|
382
|
+
"""
|
383
|
+
Parses a table version string into its prefix and version number.
|
384
|
+
Returns a tuple of the prefix and version number.
|
385
|
+
"""
|
386
|
+
if not table_version:
|
387
|
+
raise ValueError(f"Table version to parse is undefined.")
|
388
|
+
if len(table_version) > BYTES_PER_KIBIBYTE:
|
389
|
+
raise ValueError(
|
390
|
+
f"Invalid table version {table_version}. Table version "
|
391
|
+
f"identifier cannot be greater than {BYTES_PER_KIBIBYTE} "
|
392
|
+
f"characters."
|
393
|
+
)
|
394
|
+
version_match = re.match(
|
395
|
+
rf"^(\w*\.)?(\d+)$",
|
396
|
+
table_version,
|
397
|
+
)
|
398
|
+
if version_match:
|
399
|
+
prefix, version_number = version_match.groups()
|
400
|
+
return prefix, int(version_number)
|
401
|
+
raise ValueError(
|
402
|
+
f"Invalid table version {table_version}. Valid table versions "
|
403
|
+
f"are of the form `TableVersionName.1` or simply `1`.",
|
404
|
+
)
|
405
|
+
|
406
|
+
|
407
|
+
class TableVersionLocatorName(LocatorName):
|
408
|
+
def __init__(self, locator: TableVersionLocator):
|
409
|
+
self.locator = locator
|
410
|
+
|
411
|
+
@property
|
412
|
+
def immutable_id(self) -> Optional[str]:
|
413
|
+
return self.locator.table_version
|
414
|
+
|
415
|
+
@immutable_id.setter
|
416
|
+
def immutable_id(self, immutable_id: Optional[str]):
|
417
|
+
self.locator.table_version = immutable_id
|
418
|
+
|
419
|
+
def parts(self) -> List[str]:
|
420
|
+
return [self.locator.table_version]
|
421
|
+
|
151
422
|
|
152
423
|
class TableVersionLocator(Locator, dict):
|
153
424
|
@staticmethod
|
154
425
|
def of(
|
155
|
-
table_locator: Optional[TableLocator],
|
426
|
+
table_locator: Optional[TableLocator],
|
427
|
+
table_version: Optional[str],
|
156
428
|
) -> TableVersionLocator:
|
157
429
|
table_version_locator = TableVersionLocator()
|
158
430
|
table_version_locator.table_locator = table_locator
|
@@ -165,9 +437,17 @@ class TableVersionLocator(Locator, dict):
|
|
165
437
|
table_name: Optional[str],
|
166
438
|
table_version: Optional[str],
|
167
439
|
) -> TableVersionLocator:
|
168
|
-
table_locator = TableLocator.at(namespace, table_name)
|
440
|
+
table_locator = TableLocator.at(namespace, table_name) if table_name else None
|
169
441
|
return TableVersionLocator.of(table_locator, table_version)
|
170
442
|
|
443
|
+
@property
|
444
|
+
def name(self):
|
445
|
+
return TableVersionLocatorName(self)
|
446
|
+
|
447
|
+
@property
|
448
|
+
def parent(self) -> Optional[TableLocator]:
|
449
|
+
return self.table_locator
|
450
|
+
|
171
451
|
@property
|
172
452
|
def table_locator(self) -> Optional[TableLocator]:
|
173
453
|
val: Dict[str, Any] = self.get("tableLocator")
|
@@ -185,7 +465,13 @@ class TableVersionLocator(Locator, dict):
|
|
185
465
|
|
186
466
|
@table_version.setter
|
187
467
|
def table_version(self, table_version: Optional[str]) -> None:
|
188
|
-
|
468
|
+
# ensure that the table version is valid
|
469
|
+
prefix, version_number = TableVersion.parse_table_version(table_version)
|
470
|
+
# restate the table version number in its canonical form
|
471
|
+
# (e.g., ensure that "MyVersion.0001" is saved as "MyVersion.1")
|
472
|
+
self["tableVersion"] = (
|
473
|
+
f"{prefix}{version_number}" if prefix else str(version_number)
|
474
|
+
)
|
189
475
|
|
190
476
|
@property
|
191
477
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
@@ -207,13 +493,3 @@ class TableVersionLocator(Locator, dict):
|
|
207
493
|
if table_locator:
|
208
494
|
return table_locator.table_name
|
209
495
|
return None
|
210
|
-
|
211
|
-
def canonical_string(self) -> str:
|
212
|
-
"""
|
213
|
-
Returns a unique string for the given locator that can be used
|
214
|
-
for equality checks (i.e. two locators are equal if they have
|
215
|
-
the same canonical string).
|
216
|
-
"""
|
217
|
-
tl_hexdigest = self.table_locator.hexdigest()
|
218
|
-
table_version = self.table_version
|
219
|
-
return f"{tl_hexdigest}|{table_version}"
|