deltacat 2.0.0b11__py3-none-any.whl → 2.0.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +78 -3
- deltacat/api.py +122 -67
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/conftest.py +0 -18
- deltacat/catalog/__init__.py +2 -0
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2417 -271
- deltacat/catalog/model/catalog.py +49 -10
- deltacat/catalog/model/properties.py +38 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/converter_session.py +145 -32
- deltacat/compute/converter/model/convert_input.py +26 -19
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +35 -16
- deltacat/compute/converter/model/converter_session_params.py +24 -21
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +18 -9
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +157 -50
- deltacat/compute/converter/steps/dedupe.py +24 -11
- deltacat/compute/converter/utils/convert_task_options.py +27 -12
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +8 -8
- deltacat/compute/converter/utils/io.py +101 -12
- deltacat/compute/converter/utils/s3u.py +33 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +19 -8
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +44 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/impl.py +2 -2
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
- deltacat/experimental/storage/iceberg/impl.py +5 -3
- deltacat/experimental/storage/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/dataset.py +0 -3
- deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
- deltacat/io/datasource/deltacat_datasource.py +0 -1
- deltacat/storage/__init__.py +20 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +182 -6
- deltacat/storage/model/metafile.py +151 -78
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +48 -26
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
- deltacat/tests/catalog/test_catalogs.py +54 -11
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +675 -490
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +988 -4
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +221 -11
- deltacat/types/tables.py +2329 -59
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +411 -150
- deltacat/utils/filesystem.py +100 -0
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +658 -27
- deltacat/utils/pyarrow.py +1258 -213
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +56 -15
- deltacat-2.0.0.post1.dist-info/METADATA +1163 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/RECORD +183 -145
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/WHEEL +1 -1
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.0b11.dist-info/METADATA +0 -67
- /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
|
4
4
|
import copy
|
5
5
|
|
6
|
-
from typing import Optional, Tuple, List, Union
|
6
|
+
from typing import Optional, Tuple, List, Union, Set
|
7
7
|
|
8
8
|
import base64
|
9
9
|
import json
|
@@ -22,6 +22,12 @@ from deltacat.constants import (
|
|
22
22
|
TXN_PART_SEPARATOR,
|
23
23
|
SUCCESS_TXN_DIR_NAME,
|
24
24
|
)
|
25
|
+
from deltacat.exceptions import (
|
26
|
+
ObjectNotFoundError,
|
27
|
+
ObjectDeletedError,
|
28
|
+
ObjectAlreadyExistsError,
|
29
|
+
ConcurrentModificationError,
|
30
|
+
)
|
25
31
|
from deltacat.storage.model.list_result import ListResult
|
26
32
|
from deltacat.storage.model.locator import Locator
|
27
33
|
from deltacat.storage.model.types import TransactionOperationType
|
@@ -74,7 +80,7 @@ class MetafileRevisionInfo(dict):
|
|
74
80
|
) -> List[MetafileRevisionInfo]:
|
75
81
|
if not success_txn_log_dir:
|
76
82
|
err_msg = f"No transaction log found for: {revision_dir_path}."
|
77
|
-
raise
|
83
|
+
raise ObjectNotFoundError(err_msg)
|
78
84
|
# find the latest committed revision of the target metafile
|
79
85
|
sorted_metafile_paths = MetafileRevisionInfo._sorted_file_paths(
|
80
86
|
revision_dir_path=revision_dir_path,
|
@@ -123,7 +129,7 @@ class MetafileRevisionInfo(dict):
|
|
123
129
|
:param revision_dir_path: root path of directory for metafile
|
124
130
|
:param ignore_missing_revision: if True, will return
|
125
131
|
MetafileRevisionInfo.undefined() on no revisions
|
126
|
-
:raises
|
132
|
+
:raises ObjectNotFoundError if no revisions are found AND
|
127
133
|
ignore_missing_revision=False
|
128
134
|
"""
|
129
135
|
revisions = MetafileRevisionInfo.list_revisions(
|
@@ -136,7 +142,7 @@ class MetafileRevisionInfo(dict):
|
|
136
142
|
)
|
137
143
|
if not revisions and not ignore_missing_revision:
|
138
144
|
err_msg = f"No committed revision found at {revision_dir_path}."
|
139
|
-
raise
|
145
|
+
raise ObjectNotFoundError(err_msg)
|
140
146
|
return revisions[0] if revisions else MetafileRevisionInfo.undefined()
|
141
147
|
|
142
148
|
@staticmethod
|
@@ -197,20 +203,20 @@ class MetafileRevisionInfo(dict):
|
|
197
203
|
# update/delete fails if the last metafile was deleted
|
198
204
|
if mri.txn_op_type == TransactionOperationType.DELETE:
|
199
205
|
if current_txn_op_type != TransactionOperationType.CREATE:
|
200
|
-
raise
|
206
|
+
raise ObjectDeletedError(
|
201
207
|
f"Metafile {current_txn_op_type.value} failed "
|
202
208
|
f"for transaction ID {current_txn_id} failed. "
|
203
209
|
f"Metafile state at {mri.path} is deleted."
|
204
210
|
)
|
205
211
|
# create fails unless the last metafile was deleted
|
206
212
|
elif is_create_txn:
|
207
|
-
raise
|
213
|
+
raise ObjectAlreadyExistsError(
|
208
214
|
f"Metafile creation for transaction ID {current_txn_id} "
|
209
215
|
f"failed. Metafile commit at {mri.path} already exists."
|
210
216
|
)
|
211
217
|
elif not is_create_txn:
|
212
218
|
# update/delete fails if the last metafile doesn't exist
|
213
|
-
raise
|
219
|
+
raise ObjectNotFoundError(
|
214
220
|
f"Metafile {current_txn_op_type.value} failed for "
|
215
221
|
f"transaction ID {current_txn_id} failed. Metafile at "
|
216
222
|
f"{mri.path} does not exist."
|
@@ -237,7 +243,7 @@ class MetafileRevisionInfo(dict):
|
|
237
243
|
:param current_txn_revision_file_path: Path to a metafile revision
|
238
244
|
written by the current transaction to check for conflicts against.
|
239
245
|
:param filesystem: Filesystem that can read the metafile revision.
|
240
|
-
:raises
|
246
|
+
:raises ConcurrentModificationError: if a conflict is found with another transaction.
|
241
247
|
"""
|
242
248
|
revision_dir_path = posixpath.dirname(current_txn_revision_file_path)
|
243
249
|
cur_txn_mri = MetafileRevisionInfo.parse(current_txn_revision_file_path)
|
@@ -265,7 +271,7 @@ class MetafileRevisionInfo(dict):
|
|
265
271
|
# it 1-2 seconds per operation, and record known failed
|
266
272
|
# transaction IDs)
|
267
273
|
if mri.txn_id > cur_txn_mri.txn_id:
|
268
|
-
raise
|
274
|
+
raise ConcurrentModificationError(
|
269
275
|
f"Aborting transaction {cur_txn_mri.txn_id} due to "
|
270
276
|
f"concurrent conflict at "
|
271
277
|
f"{current_txn_revision_file_path} with transaction "
|
@@ -291,7 +297,7 @@ class MetafileRevisionInfo(dict):
|
|
291
297
|
# that tells future transactions to only consider this txn
|
292
298
|
# complete if the conflicting txn is not complete, etc.
|
293
299
|
if txn_end_time:
|
294
|
-
raise
|
300
|
+
raise ConcurrentModificationError(
|
295
301
|
f"Aborting transaction {cur_txn_mri.txn_id} due to "
|
296
302
|
f"concurrent conflict at {revision_dir_path} with "
|
297
303
|
f"previously completed transaction {mri.txn_id} at "
|
@@ -314,7 +320,7 @@ class MetafileRevisionInfo(dict):
|
|
314
320
|
f"Expected to find at least 1 Metafile at "
|
315
321
|
f"{revision_dir_path} but found none."
|
316
322
|
)
|
317
|
-
raise
|
323
|
+
raise ObjectNotFoundError(err_msg)
|
318
324
|
return list(list(zip(*file_paths_and_sizes))[0]) if file_paths_and_sizes else []
|
319
325
|
|
320
326
|
@property
|
@@ -610,7 +616,7 @@ class Metafile(dict):
|
|
610
616
|
current_txn_start_time: int,
|
611
617
|
current_txn_id: str,
|
612
618
|
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
613
|
-
) ->
|
619
|
+
) -> Tuple[List[str], List[str]]:
|
614
620
|
"""
|
615
621
|
Serialize and write this object to a metadata file within the context
|
616
622
|
of a transaction.
|
@@ -623,13 +629,15 @@ class Metafile(dict):
|
|
623
629
|
:param filesystem: File system to use for writing the metadata file. If
|
624
630
|
not given, a default filesystem will be automatically selected based on
|
625
631
|
the catalog root path.
|
632
|
+
:return: List of fully qualified paths to the metadata files written.
|
626
633
|
"""
|
627
634
|
if not filesystem:
|
628
635
|
catalog_root_dir, filesystem = resolve_path_and_filesystem(
|
629
636
|
path=catalog_root_dir,
|
630
637
|
filesystem=filesystem,
|
631
638
|
)
|
632
|
-
|
639
|
+
|
640
|
+
return self._write_metafile_revisions(
|
633
641
|
catalog_root=catalog_root_dir,
|
634
642
|
success_txn_log_dir=success_txn_log_dir,
|
635
643
|
current_txn_op=current_txn_op,
|
@@ -686,22 +694,58 @@ class Metafile(dict):
|
|
686
694
|
with filesystem.open_output_stream(path) as file:
|
687
695
|
file.write(serialized)
|
688
696
|
|
697
|
+
@staticmethod
|
698
|
+
def _equivalent_minus_exclusions(d1: dict, d2: dict, exclusions: Set[str]) -> bool:
|
699
|
+
if d1.get("streamLocator") and d2.get("streamLocator"):
|
700
|
+
# stream locators should be equivalent minus streamId
|
701
|
+
exclusions.add("streamId")
|
702
|
+
if not Metafile._equivalent_minus_exclusions(
|
703
|
+
d1["streamLocator"], d2["streamLocator"], exclusions
|
704
|
+
):
|
705
|
+
return False
|
706
|
+
if d1.get("partitionLocator") and d2.get("partitionLocator"):
|
707
|
+
# partition locators should be equivalent minus partitionId and parent stream locator streamId
|
708
|
+
exclusions.add("partitionId")
|
709
|
+
if not Metafile._equivalent_minus_exclusions(
|
710
|
+
d1["partitionLocator"], d2["partitionLocator"], exclusions
|
711
|
+
):
|
712
|
+
return False
|
713
|
+
if d1.get("deltaLocator") and d2.get("deltaLocator"):
|
714
|
+
# delta locators should be equivalent minus parent partition/stream locator partitionId and streamId
|
715
|
+
if not Metafile._equivalent_minus_exclusions(
|
716
|
+
d1["deltaLocator"], d2["deltaLocator"], exclusions
|
717
|
+
):
|
718
|
+
return False
|
719
|
+
for k, v in d1.items():
|
720
|
+
if k == "partitionValues" and not d2.get(k):
|
721
|
+
# consider [] and None equivalent unpartitioned values
|
722
|
+
v = v or d2.get(k)
|
723
|
+
if k not in exclusions and (k not in d2 or d2[k] != v):
|
724
|
+
return False
|
725
|
+
for k in d2.keys():
|
726
|
+
if k not in exclusions and k not in d1:
|
727
|
+
return False
|
728
|
+
return True
|
729
|
+
|
689
730
|
def equivalent_to(self, other: Metafile) -> bool:
|
690
731
|
"""
|
691
732
|
True if this Metafile is equivalent to the other Metafile minus its
|
692
|
-
unique ID and
|
733
|
+
unique ID, ancestor IDs, and other internal system properties.
|
693
734
|
|
694
735
|
:param other: Metafile to compare to.
|
695
736
|
:return: True if the other metafile is equivalent, false if not.
|
696
737
|
"""
|
697
|
-
identifiers = {
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
738
|
+
identifiers = {
|
739
|
+
"id",
|
740
|
+
"ancestor_ids",
|
741
|
+
"previousStreamId",
|
742
|
+
"previousPartitionId",
|
743
|
+
"streamLocator",
|
744
|
+
"partitionLocator",
|
745
|
+
"deltaLocator",
|
746
|
+
"compactionRoundCompletionInfo",
|
747
|
+
}
|
748
|
+
return Metafile._equivalent_minus_exclusions(self, other, identifiers)
|
705
749
|
|
706
750
|
@property
|
707
751
|
def named_immutable_id(self) -> Optional[str]:
|
@@ -746,6 +790,20 @@ class Metafile(dict):
|
|
746
790
|
_id = self["id"] = str(uuid.uuid4())
|
747
791
|
return _id
|
748
792
|
|
793
|
+
@property
|
794
|
+
def name(self) -> Optional[str]:
|
795
|
+
"""
|
796
|
+
Returns the common name of this metafile. Used as a human
|
797
|
+
readable name for this metafile that is unique amongst its
|
798
|
+
siblings (e.g., namespace/table name, table version, stream
|
799
|
+
format, partition values + scheme ID, delta stream position).
|
800
|
+
"""
|
801
|
+
return (
|
802
|
+
self.locator_alias.name.join()
|
803
|
+
if self.locator_alias
|
804
|
+
else self.locator.name.join()
|
805
|
+
)
|
806
|
+
|
749
807
|
@property
|
750
808
|
def locator(self) -> Optional[Locator]:
|
751
809
|
"""
|
@@ -857,10 +915,8 @@ class Metafile(dict):
|
|
857
915
|
current_txn_id=current_txn_id,
|
858
916
|
filesystem=filesystem,
|
859
917
|
)
|
860
|
-
except
|
918
|
+
except ObjectNotFoundError:
|
861
919
|
# one or more ancestor's don't exist - return an empty list result
|
862
|
-
# TODO(pdames): Raise and catch a more explicit AncestorNotFound
|
863
|
-
# error type here.
|
864
920
|
return ListResult.empty()
|
865
921
|
try:
|
866
922
|
locator = (
|
@@ -884,11 +940,11 @@ class Metafile(dict):
|
|
884
940
|
if locator
|
885
941
|
else None
|
886
942
|
)
|
887
|
-
except
|
888
|
-
# the metafile
|
943
|
+
except ObjectNotFoundError:
|
944
|
+
# the metafile does not exist
|
889
945
|
return ListResult.empty()
|
890
946
|
if not immutable_id:
|
891
|
-
# the metafile
|
947
|
+
# the metafile has been deleted
|
892
948
|
return ListResult.empty()
|
893
949
|
revision_dir_path = posixpath.join(
|
894
950
|
parent_root,
|
@@ -1032,7 +1088,7 @@ class Metafile(dict):
|
|
1032
1088
|
Resolves the immutable metafile ID for the given locator.
|
1033
1089
|
|
1034
1090
|
:return: Immutable ID read from mapping file. None if no mapping exists.
|
1035
|
-
:raises:
|
1091
|
+
:raises: ObjectNotFoundError if the id is not found.
|
1036
1092
|
"""
|
1037
1093
|
metafile_id = locator.name.immutable_id
|
1038
1094
|
if not metafile_id:
|
@@ -1055,12 +1111,10 @@ class Metafile(dict):
|
|
1055
1111
|
if not mri.exists():
|
1056
1112
|
return None
|
1057
1113
|
if mri.txn_op_type == TransactionOperationType.DELETE:
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
)
|
1063
|
-
raise ValueError(err_msg)
|
1114
|
+
# Return None for DELETE revisions to allow graceful handling
|
1115
|
+
# of renamed objects. The from_serializable mechanism can then
|
1116
|
+
# restore the correct locator from parent metadata.
|
1117
|
+
return None
|
1064
1118
|
metafile_id = posixpath.splitext(mri.path)[1][1:]
|
1065
1119
|
return metafile_id
|
1066
1120
|
|
@@ -1098,7 +1152,7 @@ class Metafile(dict):
|
|
1098
1152
|
)
|
1099
1153
|
if not ancestor_id:
|
1100
1154
|
err_msg = f"Ancestor does not exist: {parent_locator}."
|
1101
|
-
raise
|
1155
|
+
raise ObjectNotFoundError(err_msg)
|
1102
1156
|
metafile_root = posixpath.join(
|
1103
1157
|
metafile_root,
|
1104
1158
|
ancestor_id,
|
@@ -1109,7 +1163,7 @@ class Metafile(dict):
|
|
1109
1163
|
filesystem=filesystem,
|
1110
1164
|
)
|
1111
1165
|
except FileNotFoundError:
|
1112
|
-
raise
|
1166
|
+
raise ObjectNotFoundError(
|
1113
1167
|
f"Ancestor {parent_locator} does not exist at: " f"{metafile_root}"
|
1114
1168
|
)
|
1115
1169
|
ancestor_ids.append(ancestor_id)
|
@@ -1125,7 +1179,7 @@ class Metafile(dict):
|
|
1125
1179
|
current_txn_start_time: int,
|
1126
1180
|
current_txn_id: str,
|
1127
1181
|
filesystem: pyarrow.fs.FileSystem,
|
1128
|
-
) ->
|
1182
|
+
) -> str:
|
1129
1183
|
name_resolution_dir_path = locator.path(parent_obj_path)
|
1130
1184
|
# TODO(pdames): Don't write updated revisions with the same mapping as
|
1131
1185
|
# the latest revision.
|
@@ -1143,6 +1197,7 @@ class Metafile(dict):
|
|
1143
1197
|
with filesystem.open_output_stream(revision_file_path):
|
1144
1198
|
pass # Just create an empty ID file to map to the locator
|
1145
1199
|
current_txn_op.append_locator_write_path(revision_file_path)
|
1200
|
+
return revision_file_path
|
1146
1201
|
|
1147
1202
|
def _write_metafile_revision(
|
1148
1203
|
self,
|
@@ -1153,7 +1208,7 @@ class Metafile(dict):
|
|
1153
1208
|
current_txn_start_time: int,
|
1154
1209
|
current_txn_id: str,
|
1155
1210
|
filesystem: pyarrow.fs.FileSystem,
|
1156
|
-
) ->
|
1211
|
+
) -> str:
|
1157
1212
|
mri = MetafileRevisionInfo.new_revision(
|
1158
1213
|
revision_dir_path=revision_dir_path,
|
1159
1214
|
current_txn_op_type=current_txn_op_type,
|
@@ -1167,6 +1222,7 @@ class Metafile(dict):
|
|
1167
1222
|
filesystem=filesystem,
|
1168
1223
|
)
|
1169
1224
|
current_txn_op.append_metafile_write_path(mri.path)
|
1225
|
+
return mri.path
|
1170
1226
|
|
1171
1227
|
def _write_metafile_revisions(
|
1172
1228
|
self,
|
@@ -1176,12 +1232,14 @@ class Metafile(dict):
|
|
1176
1232
|
current_txn_start_time: int,
|
1177
1233
|
current_txn_id: str,
|
1178
1234
|
filesystem: pyarrow.fs.FileSystem,
|
1179
|
-
) ->
|
1235
|
+
) -> Tuple[List[str], List[str]]:
|
1180
1236
|
"""
|
1181
1237
|
Generates the fully qualified paths required to write this metafile as
|
1182
1238
|
part of the given transaction. All paths returned will be based in the
|
1183
1239
|
given root directory.
|
1184
1240
|
"""
|
1241
|
+
metafile_write_paths = []
|
1242
|
+
locator_write_paths = []
|
1185
1243
|
parent_obj_path = self.parent_root_path(
|
1186
1244
|
catalog_root=catalog_root,
|
1187
1245
|
current_txn_start_time=current_txn_start_time,
|
@@ -1209,36 +1267,47 @@ class Metafile(dict):
|
|
1209
1267
|
if mutable_dest_locator:
|
1210
1268
|
# the locator name is mutable, so we need to persist a mapping
|
1211
1269
|
# from the locator back to its immutable metafile ID
|
1212
|
-
if
|
1213
|
-
|
1214
|
-
|
1215
|
-
and
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1270
|
+
if current_txn_op.type == TransactionOperationType.UPDATE:
|
1271
|
+
# mutable locator updates are used to either transition
|
1272
|
+
# staged streams/partitions (which have no locator alias) to
|
1273
|
+
# committed (and create the locator alias) or to rename an
|
1274
|
+
# existing mutable locator
|
1275
|
+
if mutable_src_locator != mutable_dest_locator:
|
1276
|
+
if mutable_src_locator is not None:
|
1277
|
+
# this update includes a rename
|
1278
|
+
# mark the source metafile mapping as deleted
|
1279
|
+
locator_write_path = (
|
1280
|
+
current_txn_op.src_metafile._write_locator_to_id_map_file(
|
1281
|
+
locator=mutable_src_locator,
|
1282
|
+
success_txn_log_dir=success_txn_log_dir,
|
1283
|
+
parent_obj_path=parent_obj_path,
|
1284
|
+
current_txn_op=current_txn_op,
|
1285
|
+
current_txn_op_type=TransactionOperationType.DELETE,
|
1286
|
+
current_txn_start_time=current_txn_start_time,
|
1287
|
+
current_txn_id=current_txn_id,
|
1288
|
+
filesystem=filesystem,
|
1289
|
+
)
|
1290
|
+
)
|
1291
|
+
locator_write_paths.append(locator_write_path)
|
1292
|
+
# mark the dest metafile mapping as created
|
1293
|
+
locator_write_path = self._write_locator_to_id_map_file(
|
1294
|
+
locator=mutable_dest_locator,
|
1295
|
+
success_txn_log_dir=success_txn_log_dir,
|
1296
|
+
parent_obj_path=parent_obj_path,
|
1297
|
+
current_txn_op=current_txn_op,
|
1298
|
+
current_txn_op_type=TransactionOperationType.CREATE,
|
1299
|
+
current_txn_start_time=current_txn_start_time,
|
1300
|
+
current_txn_id=current_txn_id,
|
1301
|
+
filesystem=filesystem,
|
1302
|
+
)
|
1303
|
+
locator_write_paths.append(locator_write_path)
|
1304
|
+
# else this is a mutable locator no-op update - do nothing
|
1240
1305
|
else:
|
1241
|
-
|
1306
|
+
# this is either a create/delete operation or a
|
1307
|
+
# replace operation that is part of an overwrite/restate
|
1308
|
+
# transaction (e.g. committing a staged replacement for a
|
1309
|
+
# previously committed stream/partition).
|
1310
|
+
locator_write_path = self._write_locator_to_id_map_file(
|
1242
1311
|
locator=mutable_dest_locator,
|
1243
1312
|
success_txn_log_dir=success_txn_log_dir,
|
1244
1313
|
parent_obj_path=parent_obj_path,
|
@@ -1248,13 +1317,15 @@ class Metafile(dict):
|
|
1248
1317
|
current_txn_id=current_txn_id,
|
1249
1318
|
filesystem=filesystem,
|
1250
1319
|
)
|
1320
|
+
locator_write_paths.append(locator_write_path)
|
1251
1321
|
metafile_revision_dir_path = posixpath.join(
|
1252
1322
|
parent_obj_path,
|
1253
1323
|
self.id,
|
1254
1324
|
REVISION_DIR_NAME,
|
1255
1325
|
)
|
1256
1326
|
if (
|
1257
|
-
current_txn_op.type
|
1327
|
+
current_txn_op.type
|
1328
|
+
in [TransactionOperationType.UPDATE, TransactionOperationType.REPLACE]
|
1258
1329
|
and current_txn_op.src_metafile.id != current_txn_op.dest_metafile.id
|
1259
1330
|
):
|
1260
1331
|
# TODO(pdames): block operations including both a rename & replace?
|
@@ -1265,7 +1336,7 @@ class Metafile(dict):
|
|
1265
1336
|
current_txn_op.src_metafile.id,
|
1266
1337
|
REVISION_DIR_NAME,
|
1267
1338
|
)
|
1268
|
-
self._write_metafile_revision(
|
1339
|
+
metafile_write_path = self._write_metafile_revision(
|
1269
1340
|
success_txn_log_dir=success_txn_log_dir,
|
1270
1341
|
revision_dir_path=src_metafile_revision_dir_path,
|
1271
1342
|
current_txn_op=current_txn_op,
|
@@ -1274,9 +1345,10 @@ class Metafile(dict):
|
|
1274
1345
|
current_txn_id=current_txn_id,
|
1275
1346
|
filesystem=filesystem,
|
1276
1347
|
)
|
1348
|
+
metafile_write_paths.append(metafile_write_path)
|
1277
1349
|
try:
|
1278
1350
|
# mark the dest metafile as created
|
1279
|
-
self._write_metafile_revision(
|
1351
|
+
metafile_write_path = self._write_metafile_revision(
|
1280
1352
|
success_txn_log_dir=success_txn_log_dir,
|
1281
1353
|
revision_dir_path=metafile_revision_dir_path,
|
1282
1354
|
current_txn_op=current_txn_op,
|
@@ -1285,14 +1357,13 @@ class Metafile(dict):
|
|
1285
1357
|
current_txn_id=current_txn_id,
|
1286
1358
|
filesystem=filesystem,
|
1287
1359
|
)
|
1288
|
-
|
1289
|
-
|
1290
|
-
if "already exists" not in str(e):
|
1291
|
-
raise e
|
1360
|
+
metafile_write_paths.append(metafile_write_path)
|
1361
|
+
except ObjectAlreadyExistsError:
|
1292
1362
|
# src metafile is being replaced by an existing dest metafile
|
1363
|
+
pass
|
1293
1364
|
|
1294
1365
|
else:
|
1295
|
-
self._write_metafile_revision(
|
1366
|
+
metafile_write_path = self._write_metafile_revision(
|
1296
1367
|
success_txn_log_dir=success_txn_log_dir,
|
1297
1368
|
revision_dir_path=metafile_revision_dir_path,
|
1298
1369
|
current_txn_op=current_txn_op,
|
@@ -1301,6 +1372,8 @@ class Metafile(dict):
|
|
1301
1372
|
current_txn_id=current_txn_id,
|
1302
1373
|
filesystem=filesystem,
|
1303
1374
|
)
|
1375
|
+
metafile_write_paths.append(metafile_write_path)
|
1376
|
+
return metafile_write_paths, locator_write_paths
|
1304
1377
|
|
1305
1378
|
def _list_metafiles(
|
1306
1379
|
self,
|
@@ -1332,7 +1405,7 @@ class Metafile(dict):
|
|
1332
1405
|
current_txn_id=current_txn_id,
|
1333
1406
|
ignore_missing_revision=True,
|
1334
1407
|
)
|
1335
|
-
if mri.exists():
|
1408
|
+
if mri.exists() and mri.txn_op_type != TransactionOperationType.DELETE:
|
1336
1409
|
item = self.read(
|
1337
1410
|
path=mri.path,
|
1338
1411
|
filesystem=filesystem,
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, Optional, List
|
|
6
6
|
from deltacat.storage.model.metafile import Metafile
|
7
7
|
from deltacat.storage.model.locator import Locator, LocatorName
|
8
8
|
|
9
|
-
NamespaceProperties =
|
9
|
+
NamespaceProperties = Dict[str, Any]
|
10
10
|
|
11
11
|
|
12
12
|
class Namespace(Metafile):
|
@@ -46,6 +46,13 @@ class Namespace(Metafile):
|
|
46
46
|
def properties(self, properties: Optional[NamespaceProperties]) -> None:
|
47
47
|
self["properties"] = properties
|
48
48
|
|
49
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
50
|
+
return (
|
51
|
+
f"dc://{catalog_name}/{self.namespace}/"
|
52
|
+
if catalog_name
|
53
|
+
else f"namespace://{self.namespace}/"
|
54
|
+
)
|
55
|
+
|
49
56
|
|
50
57
|
class NamespaceLocatorName(LocatorName):
|
51
58
|
def __init__(self, locator: NamespaceLocator):
|