deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
deltacat/storage/model/delta.py
CHANGED
@@ -1,31 +1,50 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
+
import posixpath
|
4
5
|
from typing import Any, Dict, List, Optional
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
from deltacat.storage.model.
|
7
|
+
import pyarrow
|
8
|
+
|
9
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
10
|
+
from deltacat.constants import TXN_DIR_NAME
|
11
|
+
from deltacat.storage.model.manifest import (
|
12
|
+
Manifest,
|
13
|
+
ManifestMeta,
|
14
|
+
ManifestAuthor,
|
15
|
+
)
|
16
|
+
from deltacat.storage.model.locator import (
|
17
|
+
Locator,
|
18
|
+
LocatorName,
|
19
|
+
)
|
9
20
|
from deltacat.storage.model.namespace import NamespaceLocator
|
10
|
-
from deltacat.storage.model.partition import
|
21
|
+
from deltacat.storage.model.partition import (
|
22
|
+
PartitionLocator,
|
23
|
+
PartitionValues,
|
24
|
+
)
|
11
25
|
from deltacat.storage.model.stream import StreamLocator
|
12
|
-
from deltacat.storage.model.table import
|
26
|
+
from deltacat.storage.model.table import (
|
27
|
+
TableLocator,
|
28
|
+
Table,
|
29
|
+
)
|
13
30
|
from deltacat.storage.model.table_version import TableVersionLocator
|
14
|
-
from deltacat.storage.model.types import
|
15
|
-
|
31
|
+
from deltacat.storage.model.types import (
|
32
|
+
DeltaType,
|
33
|
+
StreamFormat,
|
34
|
+
)
|
16
35
|
|
36
|
+
DeltaProperties = Dict[str, Any]
|
17
37
|
|
18
|
-
|
38
|
+
|
39
|
+
class Delta(Metafile):
|
19
40
|
@staticmethod
|
20
41
|
def of(
|
21
42
|
locator: Optional[DeltaLocator],
|
22
43
|
delta_type: Optional[DeltaType],
|
23
44
|
meta: Optional[ManifestMeta],
|
24
|
-
properties: Optional[
|
45
|
+
properties: Optional[DeltaProperties],
|
25
46
|
manifest: Optional[Manifest],
|
26
47
|
previous_stream_position: Optional[int] = None,
|
27
|
-
delete_parameters: Optional[DeleteParameters] = None,
|
28
|
-
partition_spec: Optional[DeltaPartitionSpec] = None,
|
29
48
|
) -> Delta:
|
30
49
|
"""
|
31
50
|
Creates a Delta metadata model with the given Delta Locator, Delta Type,
|
@@ -39,8 +58,6 @@ class Delta(dict):
|
|
39
58
|
delta.properties = properties
|
40
59
|
delta.manifest = manifest
|
41
60
|
delta.previous_stream_position = previous_stream_position
|
42
|
-
delta.delete_parameters = delete_parameters
|
43
|
-
delta.partition_spec = partition_spec
|
44
61
|
return delta
|
45
62
|
|
46
63
|
@staticmethod
|
@@ -48,7 +65,7 @@ class Delta(dict):
|
|
48
65
|
deltas: List[Delta],
|
49
66
|
manifest_author: Optional[ManifestAuthor] = None,
|
50
67
|
stream_position: Optional[int] = None,
|
51
|
-
properties: Optional[
|
68
|
+
properties: Optional[DeltaProperties] = None,
|
52
69
|
) -> Delta:
|
53
70
|
"""
|
54
71
|
Merges the input list of deltas into a single delta. All input deltas to
|
@@ -93,25 +110,10 @@ class Delta(dict):
|
|
93
110
|
f"Deltas to merge must all share the same delta type "
|
94
111
|
f"(found {len(distinct_delta_types)} delta types)."
|
95
112
|
)
|
96
|
-
distinct_partition_spec = set([d.partition_spec for d in deltas])
|
97
|
-
if len(distinct_partition_spec) > 1:
|
98
|
-
raise ValueError(
|
99
|
-
f"Deltas to merge must all share the same partition spec "
|
100
|
-
f"(found {len(distinct_partition_spec)} partition specs)."
|
101
|
-
)
|
102
113
|
merged_manifest = Manifest.merge_manifests(
|
103
114
|
manifests,
|
104
115
|
manifest_author,
|
105
116
|
)
|
106
|
-
distinct_delta_type = list(distinct_delta_types)[0]
|
107
|
-
merged_delete_parameters = None
|
108
|
-
if distinct_delta_type is DeltaType.DELETE:
|
109
|
-
delete_parameters: List[DeleteParameters] = [
|
110
|
-
d.delete_parameters for d in deltas if d.delete_parameters
|
111
|
-
]
|
112
|
-
merged_delete_parameters: Optional[
|
113
|
-
DeleteParameters
|
114
|
-
] = DeleteParameters.merge_delete_parameters(delete_parameters)
|
115
117
|
partition_locator = deltas[0].partition_locator
|
116
118
|
prev_positions = [d.previous_stream_position for d in deltas]
|
117
119
|
prev_position = None if None in prev_positions else max(prev_positions)
|
@@ -122,7 +124,6 @@ class Delta(dict):
|
|
122
124
|
properties,
|
123
125
|
merged_manifest,
|
124
126
|
prev_position,
|
125
|
-
merged_delete_parameters,
|
126
127
|
)
|
127
128
|
|
128
129
|
@property
|
@@ -148,11 +149,11 @@ class Delta(dict):
|
|
148
149
|
self["meta"] = meta
|
149
150
|
|
150
151
|
@property
|
151
|
-
def properties(self) -> Optional[
|
152
|
+
def properties(self) -> Optional[DeltaProperties]:
|
152
153
|
return self.get("properties")
|
153
154
|
|
154
155
|
@properties.setter
|
155
|
-
def properties(self, properties: Optional[
|
156
|
+
def properties(self, properties: Optional[DeltaProperties]) -> None:
|
156
157
|
self["properties"] = properties
|
157
158
|
|
158
159
|
@property
|
@@ -222,7 +223,7 @@ class Delta(dict):
|
|
222
223
|
def storage_type(self) -> Optional[str]:
|
223
224
|
delta_locator = self.locator
|
224
225
|
if delta_locator:
|
225
|
-
return delta_locator.
|
226
|
+
return delta_locator.stream_format
|
226
227
|
return None
|
227
228
|
|
228
229
|
@property
|
@@ -253,6 +254,13 @@ class Delta(dict):
|
|
253
254
|
return delta_locator.stream_id
|
254
255
|
return None
|
255
256
|
|
257
|
+
@property
|
258
|
+
def stream_format(self) -> Optional[str]:
|
259
|
+
delta_locator = self.locator
|
260
|
+
if delta_locator:
|
261
|
+
return delta_locator.stream_format
|
262
|
+
return None
|
263
|
+
|
256
264
|
@property
|
257
265
|
def partition_id(self) -> Optional[str]:
|
258
266
|
delta_locator = self.locator
|
@@ -274,27 +282,64 @@ class Delta(dict):
|
|
274
282
|
return delta_locator.stream_position
|
275
283
|
return None
|
276
284
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
285
|
+
def to_serializable(self) -> Delta:
|
286
|
+
serializable = self
|
287
|
+
if serializable.table_locator:
|
288
|
+
serializable: Delta = Delta.update_for(self)
|
289
|
+
# remove the mutable table locator
|
290
|
+
serializable.table_version_locator.table_locator = TableLocator.at(
|
291
|
+
namespace=self.id,
|
292
|
+
table_name=self.id,
|
293
|
+
)
|
294
|
+
return serializable
|
295
|
+
|
296
|
+
def from_serializable(
|
297
|
+
self,
|
298
|
+
path: str,
|
299
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
300
|
+
) -> Delta:
|
301
|
+
# TODO(pdames): Lazily restore table locator on 1st property get.
|
302
|
+
# Cache Metafile ID <-> Table/Namespace-Name map at Catalog Init, then
|
303
|
+
# swap only Metafile IDs with Names here.
|
304
|
+
if self.table_locator and self.table_locator.table_name == self.id:
|
305
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
306
|
+
base_metafile_path=path,
|
307
|
+
parent_number=4,
|
308
|
+
)
|
309
|
+
txn_log_dir = posixpath.join(
|
310
|
+
posixpath.dirname(
|
311
|
+
posixpath.dirname(
|
312
|
+
posixpath.dirname(parent_rev_dir_path),
|
313
|
+
)
|
314
|
+
),
|
315
|
+
TXN_DIR_NAME,
|
316
|
+
)
|
317
|
+
table = Table.read(
|
318
|
+
MetafileRevisionInfo.latest_revision(
|
319
|
+
revision_dir_path=parent_rev_dir_path,
|
320
|
+
filesystem=filesystem,
|
321
|
+
success_txn_log_dir=txn_log_dir,
|
322
|
+
).path,
|
323
|
+
filesystem,
|
324
|
+
)
|
325
|
+
self.table_version_locator.table_locator = table.locator
|
326
|
+
return self
|
283
327
|
|
284
|
-
|
285
|
-
|
286
|
-
|
328
|
+
|
329
|
+
class DeltaLocatorName(LocatorName):
|
330
|
+
def __init__(self, locator: DeltaLocator):
|
331
|
+
self.locator = locator
|
287
332
|
|
288
333
|
@property
|
289
|
-
def
|
290
|
-
|
291
|
-
if val is not None and not isinstance(val, DeltaPartitionSpec):
|
292
|
-
self.partition_spec = val = DeltaPartitionSpec(val)
|
293
|
-
return val
|
334
|
+
def immutable_id(self) -> Optional[str]:
|
335
|
+
return str(self.locator.stream_position)
|
294
336
|
|
295
|
-
@
|
296
|
-
def
|
297
|
-
self
|
337
|
+
@immutable_id.setter
|
338
|
+
def immutable_id(self, immutable_id: Optional[str]):
|
339
|
+
self.locator.stream_position = int(immutable_id)
|
340
|
+
|
341
|
+
def parts(self) -> List[str]:
|
342
|
+
return [str(self.locator.stream_position)]
|
298
343
|
|
299
344
|
|
300
345
|
class DeltaLocator(Locator, dict):
|
@@ -318,25 +363,37 @@ class DeltaLocator(Locator, dict):
|
|
318
363
|
table_name: Optional[str],
|
319
364
|
table_version: Optional[str],
|
320
365
|
stream_id: Optional[str],
|
321
|
-
|
366
|
+
stream_format: Optional[StreamFormat],
|
322
367
|
partition_values: Optional[PartitionValues],
|
323
368
|
partition_id: Optional[str],
|
324
369
|
stream_position: Optional[int],
|
325
370
|
) -> DeltaLocator:
|
326
|
-
partition_locator =
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
371
|
+
partition_locator = (
|
372
|
+
PartitionLocator.at(
|
373
|
+
namespace,
|
374
|
+
table_name,
|
375
|
+
table_version,
|
376
|
+
stream_id,
|
377
|
+
stream_format,
|
378
|
+
partition_values,
|
379
|
+
partition_id,
|
380
|
+
)
|
381
|
+
if partition_values and partition_id
|
382
|
+
else None
|
334
383
|
)
|
335
384
|
return DeltaLocator.of(
|
336
385
|
partition_locator,
|
337
386
|
stream_position,
|
338
387
|
)
|
339
388
|
|
389
|
+
@property
|
390
|
+
def name(self):
|
391
|
+
return DeltaLocatorName(self)
|
392
|
+
|
393
|
+
@property
|
394
|
+
def parent(self) -> Optional[PartitionLocator]:
|
395
|
+
return self.partition_locator
|
396
|
+
|
340
397
|
@property
|
341
398
|
def partition_locator(self) -> Optional[PartitionLocator]:
|
342
399
|
val: Dict[str, Any] = self.get("partitionLocator")
|
@@ -406,10 +463,10 @@ class DeltaLocator(Locator, dict):
|
|
406
463
|
return None
|
407
464
|
|
408
465
|
@property
|
409
|
-
def
|
466
|
+
def stream_format(self) -> Optional[str]:
|
410
467
|
partition_locator = self.partition_locator
|
411
468
|
if partition_locator:
|
412
|
-
return partition_locator.
|
469
|
+
return partition_locator.stream_format
|
413
470
|
return None
|
414
471
|
|
415
472
|
@property
|
@@ -432,13 +489,3 @@ class DeltaLocator(Locator, dict):
|
|
432
489
|
if partition_locator:
|
433
490
|
return partition_locator.table_version
|
434
491
|
return None
|
435
|
-
|
436
|
-
def canonical_string(self) -> str:
|
437
|
-
"""
|
438
|
-
Returns a unique string for the given locator that can be used
|
439
|
-
for equality checks (i.e. two locators are equal if they have
|
440
|
-
the same canonical string).
|
441
|
-
"""
|
442
|
-
pl_hexdigest = self.partition_locator.hexdigest()
|
443
|
-
stream_position = self.stream_position
|
444
|
-
return f"{pl_hexdigest}|{stream_position}"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Generic, Optional, TypeVar
|
3
|
+
|
4
|
+
T = TypeVar("T")
|
5
|
+
U = TypeVar("U")
|
6
|
+
|
7
|
+
|
8
|
+
class ModelMapper(ABC, Generic[T, U]):
|
9
|
+
@staticmethod
|
10
|
+
@abstractmethod
|
11
|
+
def map(obj: Optional[T], *args, **kwargs) -> Optional[U]:
|
12
|
+
pass
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
@abstractmethod
|
16
|
+
def unmap(obj: Optional[U], **kwargs) -> Optional[T]:
|
17
|
+
pass
|
18
|
+
|
19
|
+
|
20
|
+
class OneWayModelMapper(ABC, Generic[T, U]):
|
21
|
+
@staticmethod
|
22
|
+
@abstractmethod
|
23
|
+
def map(obj: Optional[T], **kwargs) -> Optional[U]:
|
24
|
+
pass
|
@@ -21,6 +21,14 @@ class ListResult(dict, Generic[T]):
|
|
21
21
|
list_result["nextPageProvider"] = next_page_provider
|
22
22
|
return list_result
|
23
23
|
|
24
|
+
@staticmethod
|
25
|
+
def empty() -> ListResult:
|
26
|
+
list_result = ListResult()
|
27
|
+
list_result["items"] = []
|
28
|
+
list_result["paginationKey"] = None
|
29
|
+
list_result["nextPageProvider"] = None
|
30
|
+
return list_result
|
31
|
+
|
24
32
|
def read_page(self) -> Optional[List[T]]:
|
25
33
|
return self.get("items")
|
26
34
|
|
@@ -1,14 +1,104 @@
|
|
1
|
+
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
from typing import Optional, List
|
5
|
+
|
1
6
|
from deltacat.utils.common import sha1_digest, sha1_hexdigest
|
2
7
|
|
8
|
+
DEFAULT_NAME_SEPARATOR = "|"
|
9
|
+
DEFAULT_PATH_SEPARATOR = "/"
|
10
|
+
|
11
|
+
|
12
|
+
class LocatorName:
|
13
|
+
"""
|
14
|
+
Assigns a name to a catalog object. All sibling catalog objects must be
|
15
|
+
assigned unique names (e.g., all namespaces in a catalog must be assigned
|
16
|
+
unique locator names, all tables under a namespace must be assigned unique
|
17
|
+
locator names, etc.). Names may be mutable (e.g., namespace and table names)
|
18
|
+
or immutable (e.g., partition/stream IDs and delta stream positions). Names
|
19
|
+
may be single or multi-part.
|
20
|
+
"""
|
21
|
+
|
22
|
+
@property
|
23
|
+
def immutable_id(self) -> Optional[str]:
|
24
|
+
"""
|
25
|
+
If this locator name is immutable (i.e., if the object it refers to
|
26
|
+
can't be renamed) then returns an immutable ID suitable for use in
|
27
|
+
URLS or filesystem paths. Returns None if this locator name is mutable
|
28
|
+
(i.e., if the object it refers to can be renamed).
|
29
|
+
"""
|
30
|
+
raise NotImplementedError()
|
31
|
+
|
32
|
+
@immutable_id.setter
|
33
|
+
def immutable_id(self, immutable_id: Optional[str]) -> None:
|
34
|
+
"""
|
35
|
+
If this locator name is immutable (i.e., if the object it refers to
|
36
|
+
can't be renamed), then sets an immutable ID for this
|
37
|
+
locator name suitable for use in URLS or filesystem paths. Note that
|
38
|
+
the ID is only considered immutable in durable catalog storage, and
|
39
|
+
remains mutable in transient memory (i.e., this setter remains
|
40
|
+
functional regardless of whether an ID is already assigned, but each
|
41
|
+
update causes it to refer to a new, distinct object in durable storage).
|
42
|
+
"""
|
43
|
+
raise NotImplementedError()
|
44
|
+
|
45
|
+
def parts(self) -> List[str]:
|
46
|
+
"""
|
47
|
+
Returns the ordered parts of this locator's name.
|
48
|
+
"""
|
49
|
+
raise NotImplementedError()
|
50
|
+
|
51
|
+
def join(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
|
52
|
+
"""
|
53
|
+
Returns this locator name as a string by joining its parts with the
|
54
|
+
given separator.
|
55
|
+
"""
|
56
|
+
return separator.join(self.parts())
|
57
|
+
|
58
|
+
def exists(self) -> bool:
|
59
|
+
"""
|
60
|
+
Returns True if this locator name is defined, False otherwise.
|
61
|
+
"""
|
62
|
+
return self.immutable_id or all(self.parts())
|
63
|
+
|
3
64
|
|
4
65
|
class Locator:
|
5
|
-
|
66
|
+
"""
|
67
|
+
Creates a globally unique reference to any named catalog object. Locators
|
68
|
+
are composed of the name of the referenced catalog object and its parent
|
69
|
+
Locator (if any). Every Locator has a canonical string representation that
|
70
|
+
can be used for global equality checks. Cryptographic digests of this
|
71
|
+
canonical string can be used for uniform random hash distribution and
|
72
|
+
path-based references to the underlying catalog object in filesystems or
|
73
|
+
URLs.
|
74
|
+
"""
|
75
|
+
|
76
|
+
@property
|
77
|
+
def name(self) -> LocatorName:
|
78
|
+
"""
|
79
|
+
Returns the name of this locator.
|
80
|
+
"""
|
81
|
+
raise NotImplementedError()
|
82
|
+
|
83
|
+
@property
|
84
|
+
def parent(self) -> Optional[Locator]:
|
85
|
+
"""
|
86
|
+
Returns the parent of this locator, if any.
|
87
|
+
"""
|
88
|
+
raise NotImplementedError()
|
89
|
+
|
90
|
+
def canonical_string(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
|
6
91
|
"""
|
7
92
|
Returns a unique string for the given locator that can be used
|
8
93
|
for equality checks (i.e. two locators are equal if they have
|
9
94
|
the same canonical string).
|
10
95
|
"""
|
11
|
-
|
96
|
+
parts = []
|
97
|
+
parent_hexdigest = self.parent.hexdigest() if self.parent else None
|
98
|
+
if parent_hexdigest:
|
99
|
+
parts.append(parent_hexdigest)
|
100
|
+
parts.extend(self.name.parts())
|
101
|
+
return separator.join([str(part) for part in parts])
|
12
102
|
|
13
103
|
def digest(self) -> bytes:
|
14
104
|
"""
|
@@ -26,7 +116,7 @@ class Locator:
|
|
26
116
|
"""
|
27
117
|
return sha1_hexdigest(self.canonical_string().encode("utf-8"))
|
28
118
|
|
29
|
-
def path(self, root: str, separator: str =
|
119
|
+
def path(self, root: str, separator: str = DEFAULT_PATH_SEPARATOR) -> str:
|
30
120
|
"""
|
31
121
|
Returns a path for the locator of the form: "{root}/{hexdigest}", where
|
32
122
|
the default path separator of "/" may optionally be overridden with
|