deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +117 -18
- deltacat/api.py +536 -126
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +4 -2
- deltacat/benchmarking/conftest.py +1 -19
- deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
- deltacat/catalog/__init__.py +64 -5
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2444 -282
- deltacat/catalog/model/catalog.py +208 -113
- deltacat/catalog/model/properties.py +63 -24
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/constants.py +5 -0
- deltacat/compute/converter/converter_session.py +207 -52
- deltacat/compute/converter/model/convert_input.py +43 -16
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +64 -19
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +193 -65
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +230 -75
- deltacat/compute/converter/steps/dedupe.py +46 -12
- deltacat/compute/converter/utils/convert_task_options.py +66 -22
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +13 -8
- deltacat/compute/converter/utils/io.py +173 -13
- deltacat/compute/converter/utils/s3u.py +42 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +49 -6
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +10 -0
- deltacat/examples/basic_logging.py +6 -6
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
- deltacat/examples/hello_world.py +4 -2
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
- deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
- deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
- deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
- deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
- deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
- deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
- deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/storage/__init__.py +22 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +231 -6
- deltacat/storage/model/metafile.py +224 -119
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/scan/push_down.py +32 -5
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/shard.py +6 -2
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +53 -29
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +103 -106
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +697 -349
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
- deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +3 -1
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1036 -11
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +345 -37
- deltacat/types/tables.py +2344 -46
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +3 -1
- deltacat/utils/filesystem.py +139 -9
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1373 -192
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/catalog/iceberg/__init__.py +0 -4
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/examples/common/fixtures.py +0 -15
- deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
- deltacat/storage/rivulet/__init__.py +0 -11
- deltacat/storage/rivulet/feather/__init__.py +0 -5
- deltacat/storage/rivulet/parquet/__init__.py +0 -5
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.dist-info/METADATA +0 -65
- deltacat-2.0.dist-info/RECORD +0 -347
- /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
- /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
- /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
- /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
- /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
- /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
- /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
- /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
- /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
- /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
- /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
|
4
4
|
import copy
|
5
5
|
|
6
|
-
from typing import Optional, Tuple, List
|
6
|
+
from typing import Optional, Tuple, List, Union, Set
|
7
7
|
|
8
8
|
import base64
|
9
9
|
import json
|
@@ -22,6 +22,12 @@ from deltacat.constants import (
|
|
22
22
|
TXN_PART_SEPARATOR,
|
23
23
|
SUCCESS_TXN_DIR_NAME,
|
24
24
|
)
|
25
|
+
from deltacat.exceptions import (
|
26
|
+
ObjectNotFoundError,
|
27
|
+
ObjectDeletedError,
|
28
|
+
ObjectAlreadyExistsError,
|
29
|
+
ConcurrentModificationError,
|
30
|
+
)
|
25
31
|
from deltacat.storage.model.list_result import ListResult
|
26
32
|
from deltacat.storage.model.locator import Locator
|
27
33
|
from deltacat.storage.model.types import TransactionOperationType
|
@@ -74,7 +80,7 @@ class MetafileRevisionInfo(dict):
|
|
74
80
|
) -> List[MetafileRevisionInfo]:
|
75
81
|
if not success_txn_log_dir:
|
76
82
|
err_msg = f"No transaction log found for: {revision_dir_path}."
|
77
|
-
raise
|
83
|
+
raise ObjectNotFoundError(err_msg)
|
78
84
|
# find the latest committed revision of the target metafile
|
79
85
|
sorted_metafile_paths = MetafileRevisionInfo._sorted_file_paths(
|
80
86
|
revision_dir_path=revision_dir_path,
|
@@ -123,7 +129,7 @@ class MetafileRevisionInfo(dict):
|
|
123
129
|
:param revision_dir_path: root path of directory for metafile
|
124
130
|
:param ignore_missing_revision: if True, will return
|
125
131
|
MetafileRevisionInfo.undefined() on no revisions
|
126
|
-
:raises
|
132
|
+
:raises ObjectNotFoundError if no revisions are found AND
|
127
133
|
ignore_missing_revision=False
|
128
134
|
"""
|
129
135
|
revisions = MetafileRevisionInfo.list_revisions(
|
@@ -136,7 +142,7 @@ class MetafileRevisionInfo(dict):
|
|
136
142
|
)
|
137
143
|
if not revisions and not ignore_missing_revision:
|
138
144
|
err_msg = f"No committed revision found at {revision_dir_path}."
|
139
|
-
raise
|
145
|
+
raise ObjectNotFoundError(err_msg)
|
140
146
|
return revisions[0] if revisions else MetafileRevisionInfo.undefined()
|
141
147
|
|
142
148
|
@staticmethod
|
@@ -197,20 +203,20 @@ class MetafileRevisionInfo(dict):
|
|
197
203
|
# update/delete fails if the last metafile was deleted
|
198
204
|
if mri.txn_op_type == TransactionOperationType.DELETE:
|
199
205
|
if current_txn_op_type != TransactionOperationType.CREATE:
|
200
|
-
raise
|
206
|
+
raise ObjectDeletedError(
|
201
207
|
f"Metafile {current_txn_op_type.value} failed "
|
202
208
|
f"for transaction ID {current_txn_id} failed. "
|
203
209
|
f"Metafile state at {mri.path} is deleted."
|
204
210
|
)
|
205
211
|
# create fails unless the last metafile was deleted
|
206
212
|
elif is_create_txn:
|
207
|
-
raise
|
213
|
+
raise ObjectAlreadyExistsError(
|
208
214
|
f"Metafile creation for transaction ID {current_txn_id} "
|
209
215
|
f"failed. Metafile commit at {mri.path} already exists."
|
210
216
|
)
|
211
217
|
elif not is_create_txn:
|
212
218
|
# update/delete fails if the last metafile doesn't exist
|
213
|
-
raise
|
219
|
+
raise ObjectNotFoundError(
|
214
220
|
f"Metafile {current_txn_op_type.value} failed for "
|
215
221
|
f"transaction ID {current_txn_id} failed. Metafile at "
|
216
222
|
f"{mri.path} does not exist."
|
@@ -237,7 +243,7 @@ class MetafileRevisionInfo(dict):
|
|
237
243
|
:param current_txn_revision_file_path: Path to a metafile revision
|
238
244
|
written by the current transaction to check for conflicts against.
|
239
245
|
:param filesystem: Filesystem that can read the metafile revision.
|
240
|
-
:raises
|
246
|
+
:raises ConcurrentModificationError: if a conflict is found with another transaction.
|
241
247
|
"""
|
242
248
|
revision_dir_path = posixpath.dirname(current_txn_revision_file_path)
|
243
249
|
cur_txn_mri = MetafileRevisionInfo.parse(current_txn_revision_file_path)
|
@@ -265,7 +271,7 @@ class MetafileRevisionInfo(dict):
|
|
265
271
|
# it 1-2 seconds per operation, and record known failed
|
266
272
|
# transaction IDs)
|
267
273
|
if mri.txn_id > cur_txn_mri.txn_id:
|
268
|
-
raise
|
274
|
+
raise ConcurrentModificationError(
|
269
275
|
f"Aborting transaction {cur_txn_mri.txn_id} due to "
|
270
276
|
f"concurrent conflict at "
|
271
277
|
f"{current_txn_revision_file_path} with transaction "
|
@@ -291,7 +297,7 @@ class MetafileRevisionInfo(dict):
|
|
291
297
|
# that tells future transactions to only consider this txn
|
292
298
|
# complete if the conflicting txn is not complete, etc.
|
293
299
|
if txn_end_time:
|
294
|
-
raise
|
300
|
+
raise ConcurrentModificationError(
|
295
301
|
f"Aborting transaction {cur_txn_mri.txn_id} due to "
|
296
302
|
f"concurrent conflict at {revision_dir_path} with "
|
297
303
|
f"previously completed transaction {mri.txn_id} at "
|
@@ -314,7 +320,7 @@ class MetafileRevisionInfo(dict):
|
|
314
320
|
f"Expected to find at least 1 Metafile at "
|
315
321
|
f"{revision_dir_path} but found none."
|
316
322
|
)
|
317
|
-
raise
|
323
|
+
raise ObjectNotFoundError(err_msg)
|
318
324
|
return list(list(zip(*file_paths_and_sizes))[0]) if file_paths_and_sizes else []
|
319
325
|
|
320
326
|
@property
|
@@ -412,7 +418,7 @@ class Metafile(dict):
|
|
412
418
|
@staticmethod
|
413
419
|
def based_on(
|
414
420
|
other: Optional[Metafile],
|
415
|
-
new_id: Optional[
|
421
|
+
new_id: Optional[str] = None,
|
416
422
|
) -> Optional[Metafile]:
|
417
423
|
"""
|
418
424
|
Returns a new metafile equivalent to the input metafile, but with a new
|
@@ -539,29 +545,31 @@ class Metafile(dict):
|
|
539
545
|
f"${serialized_dict}"
|
540
546
|
)
|
541
547
|
|
548
|
+
@staticmethod
|
549
|
+
def get_type_name(serialized_dict: dict):
|
550
|
+
"""
|
551
|
+
Given a serialized dictionary of Metafile data, gets the type name of
|
552
|
+
the metafile class.
|
553
|
+
"""
|
554
|
+
return Metafile.get_class(serialized_dict).__name__
|
555
|
+
|
542
556
|
@classmethod
|
543
|
-
def
|
557
|
+
def deserialize(
|
544
558
|
cls,
|
545
|
-
|
546
|
-
|
547
|
-
format: Optional[str] = METAFILE_FORMAT,
|
559
|
+
serialized: Union[bytes, str],
|
560
|
+
meta_format: Optional[str] = METAFILE_FORMAT,
|
548
561
|
) -> Metafile:
|
549
562
|
"""
|
550
|
-
|
551
|
-
:param
|
552
|
-
:param
|
553
|
-
:
|
554
|
-
:return: Deserialized object from the metadata file.
|
563
|
+
Deserialize a metadata file from the given bytes or string.
|
564
|
+
:param serialized: Serialized metadata file data.
|
565
|
+
:param meta_format: Format to use for deserializing the metadata file.
|
566
|
+
:return: Deserialized metadata file.
|
555
567
|
"""
|
556
|
-
if
|
568
|
+
if meta_format not in SUPPORTED_METAFILE_FORMATS:
|
557
569
|
raise ValueError(
|
558
|
-
f"Unsupported format '{
|
570
|
+
f"Unsupported format '{meta_format}'. "
|
571
|
+
f"Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
|
559
572
|
)
|
560
|
-
|
561
|
-
if not filesystem:
|
562
|
-
path, filesystem = resolve_path_and_filesystem(path, filesystem)
|
563
|
-
with filesystem.open_input_stream(path) as file:
|
564
|
-
binary = file.readall()
|
565
573
|
reader = {
|
566
574
|
"json": lambda b: json.loads(
|
567
575
|
b.decode("utf-8"),
|
@@ -573,12 +581,32 @@ class Metafile(dict):
|
|
573
581
|
},
|
574
582
|
),
|
575
583
|
"msgpack": msgpack.loads,
|
576
|
-
}[
|
577
|
-
data = reader(
|
584
|
+
}[meta_format]
|
585
|
+
data = reader(serialized)
|
578
586
|
# cast this Metafile into the appropriate child class type
|
579
587
|
clazz = Metafile.get_class(data)
|
580
|
-
|
581
|
-
|
588
|
+
return clazz(**data)
|
589
|
+
|
590
|
+
@classmethod
|
591
|
+
def read(
|
592
|
+
cls,
|
593
|
+
path: str,
|
594
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
595
|
+
meta_format: Optional[str] = METAFILE_FORMAT,
|
596
|
+
) -> Metafile:
|
597
|
+
"""
|
598
|
+
Read a metadata file and return the deserialized object.
|
599
|
+
:param path: Metadata file path to read.
|
600
|
+
:param filesystem: File system to use for reading the metadata file.
|
601
|
+
:param meta_format: Format to use for deserializing the metadata file.
|
602
|
+
:return: Deserialized object from the metadata file.
|
603
|
+
"""
|
604
|
+
if not filesystem:
|
605
|
+
path, filesystem = resolve_path_and_filesystem(path, filesystem)
|
606
|
+
with filesystem.open_input_stream(path) as file:
|
607
|
+
serialized = file.readall()
|
608
|
+
metafile = Metafile.deserialize(serialized, meta_format)
|
609
|
+
return metafile.from_serializable(path, filesystem)
|
582
610
|
|
583
611
|
def write_txn(
|
584
612
|
self,
|
@@ -588,7 +616,7 @@ class Metafile(dict):
|
|
588
616
|
current_txn_start_time: int,
|
589
617
|
current_txn_id: str,
|
590
618
|
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
591
|
-
) ->
|
619
|
+
) -> Tuple[List[str], List[str]]:
|
592
620
|
"""
|
593
621
|
Serialize and write this object to a metadata file within the context
|
594
622
|
of a transaction.
|
@@ -601,13 +629,15 @@ class Metafile(dict):
|
|
601
629
|
:param filesystem: File system to use for writing the metadata file. If
|
602
630
|
not given, a default filesystem will be automatically selected based on
|
603
631
|
the catalog root path.
|
632
|
+
:return: List of fully qualified paths to the metadata files written.
|
604
633
|
"""
|
605
634
|
if not filesystem:
|
606
635
|
catalog_root_dir, filesystem = resolve_path_and_filesystem(
|
607
636
|
path=catalog_root_dir,
|
608
637
|
filesystem=filesystem,
|
609
638
|
)
|
610
|
-
|
639
|
+
|
640
|
+
return self._write_metafile_revisions(
|
611
641
|
catalog_root=catalog_root_dir,
|
612
642
|
success_txn_log_dir=success_txn_log_dir,
|
613
643
|
current_txn_op=current_txn_op,
|
@@ -616,11 +646,37 @@ class Metafile(dict):
|
|
616
646
|
filesystem=filesystem,
|
617
647
|
)
|
618
648
|
|
649
|
+
def serialize(
|
650
|
+
self,
|
651
|
+
meta_format: Optional[str] = METAFILE_FORMAT,
|
652
|
+
) -> Union[bytes, str]:
|
653
|
+
"""
|
654
|
+
Serialize this object to the given metafile format.
|
655
|
+
:param meta_format: Format to use for serializing the metadata file.
|
656
|
+
:return: Serialized metadata file bytes or string (format dependent).
|
657
|
+
"""
|
658
|
+
if meta_format not in SUPPORTED_METAFILE_FORMATS:
|
659
|
+
raise ValueError(
|
660
|
+
f"Unsupported format '{meta_format}'. "
|
661
|
+
f"Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
|
662
|
+
)
|
663
|
+
serializer = {
|
664
|
+
"json": lambda data: json.dumps(
|
665
|
+
data,
|
666
|
+
indent=4,
|
667
|
+
default=lambda b: base64.b64encode(b).decode("utf-8")
|
668
|
+
if isinstance(b, bytes)
|
669
|
+
else b,
|
670
|
+
).encode("utf-8"),
|
671
|
+
"msgpack": msgpack.dumps,
|
672
|
+
}[meta_format]
|
673
|
+
return serializer(self.to_serializable())
|
674
|
+
|
619
675
|
def write(
|
620
676
|
self,
|
621
677
|
path: str,
|
622
678
|
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
623
|
-
|
679
|
+
meta_format: Optional[str] = METAFILE_FORMAT,
|
624
680
|
) -> None:
|
625
681
|
"""
|
626
682
|
Serialize and write this object to a metadata file.
|
@@ -628,48 +684,68 @@ class Metafile(dict):
|
|
628
684
|
:param filesystem: File system to use for writing the metadata file. If
|
629
685
|
not given, a default filesystem will be automatically selected based on
|
630
686
|
the catalog root path.
|
631
|
-
param
|
687
|
+
:param meta_format: Format to use for serializing the metadata file.
|
632
688
|
"""
|
633
|
-
|
634
|
-
raise ValueError(
|
635
|
-
f"Unsupported format '{format}'. Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
|
636
|
-
)
|
637
|
-
|
689
|
+
serialized = self.serialize(meta_format)
|
638
690
|
if not filesystem:
|
639
691
|
path, filesystem = resolve_path_and_filesystem(path, filesystem)
|
640
692
|
revision_dir_path = posixpath.dirname(path)
|
641
693
|
filesystem.create_dir(revision_dir_path, recursive=True)
|
642
|
-
|
643
|
-
writer = {
|
644
|
-
"json": lambda data: json.dumps(
|
645
|
-
data,
|
646
|
-
indent=4,
|
647
|
-
default=lambda b: base64.b64encode(b).decode("utf-8")
|
648
|
-
if isinstance(b, bytes)
|
649
|
-
else b,
|
650
|
-
).encode("utf-8"),
|
651
|
-
"msgpack": msgpack.dumps,
|
652
|
-
}[format]
|
653
|
-
|
654
694
|
with filesystem.open_output_stream(path) as file:
|
655
|
-
file.write(
|
695
|
+
file.write(serialized)
|
696
|
+
|
697
|
+
@staticmethod
|
698
|
+
def _equivalent_minus_exclusions(d1: dict, d2: dict, exclusions: Set[str]) -> bool:
|
699
|
+
if d1.get("streamLocator") and d2.get("streamLocator"):
|
700
|
+
# stream locators should be equivalent minus streamId
|
701
|
+
exclusions.add("streamId")
|
702
|
+
if not Metafile._equivalent_minus_exclusions(
|
703
|
+
d1["streamLocator"], d2["streamLocator"], exclusions
|
704
|
+
):
|
705
|
+
return False
|
706
|
+
if d1.get("partitionLocator") and d2.get("partitionLocator"):
|
707
|
+
# partition locators should be equivalent minus partitionId and parent stream locator streamId
|
708
|
+
exclusions.add("partitionId")
|
709
|
+
if not Metafile._equivalent_minus_exclusions(
|
710
|
+
d1["partitionLocator"], d2["partitionLocator"], exclusions
|
711
|
+
):
|
712
|
+
return False
|
713
|
+
if d1.get("deltaLocator") and d2.get("deltaLocator"):
|
714
|
+
# delta locators should be equivalent minus parent partition/stream locator partitionId and streamId
|
715
|
+
if not Metafile._equivalent_minus_exclusions(
|
716
|
+
d1["deltaLocator"], d2["deltaLocator"], exclusions
|
717
|
+
):
|
718
|
+
return False
|
719
|
+
for k, v in d1.items():
|
720
|
+
if k == "partitionValues" and not d2.get(k):
|
721
|
+
# consider [] and None equivalent unpartitioned values
|
722
|
+
v = v or d2.get(k)
|
723
|
+
if k not in exclusions and (k not in d2 or d2[k] != v):
|
724
|
+
return False
|
725
|
+
for k in d2.keys():
|
726
|
+
if k not in exclusions and k not in d1:
|
727
|
+
return False
|
728
|
+
return True
|
656
729
|
|
657
730
|
def equivalent_to(self, other: Metafile) -> bool:
|
658
731
|
"""
|
659
732
|
True if this Metafile is equivalent to the other Metafile minus its
|
660
|
-
unique ID and
|
733
|
+
unique ID, ancestor IDs, and other internal system properties.
|
661
734
|
|
662
735
|
:param other: Metafile to compare to.
|
663
736
|
:return: True if the other metafile is equivalent, false if not.
|
664
737
|
"""
|
665
|
-
identifiers = {
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
738
|
+
identifiers = {
|
739
|
+
"id",
|
740
|
+
"ancestor_ids",
|
741
|
+
"previousStreamId",
|
742
|
+
"previousPartitionId",
|
743
|
+
"streamLocator",
|
744
|
+
"partitionLocator",
|
745
|
+
"deltaLocator",
|
746
|
+
"compactionRoundCompletionInfo",
|
747
|
+
}
|
748
|
+
return Metafile._equivalent_minus_exclusions(self, other, identifiers)
|
673
749
|
|
674
750
|
@property
|
675
751
|
def named_immutable_id(self) -> Optional[str]:
|
@@ -714,6 +790,20 @@ class Metafile(dict):
|
|
714
790
|
_id = self["id"] = str(uuid.uuid4())
|
715
791
|
return _id
|
716
792
|
|
793
|
+
@property
|
794
|
+
def name(self) -> Optional[str]:
|
795
|
+
"""
|
796
|
+
Returns the common name of this metafile. Used as a human
|
797
|
+
readable name for this metafile that is unique amongst its
|
798
|
+
siblings (e.g., namespace/table name, table version, stream
|
799
|
+
format, partition values + scheme ID, delta stream position).
|
800
|
+
"""
|
801
|
+
return (
|
802
|
+
self.locator_alias.name.join()
|
803
|
+
if self.locator_alias
|
804
|
+
else self.locator.name.join()
|
805
|
+
)
|
806
|
+
|
717
807
|
@property
|
718
808
|
def locator(self) -> Optional[Locator]:
|
719
809
|
"""
|
@@ -825,10 +915,8 @@ class Metafile(dict):
|
|
825
915
|
current_txn_id=current_txn_id,
|
826
916
|
filesystem=filesystem,
|
827
917
|
)
|
828
|
-
except
|
918
|
+
except ObjectNotFoundError:
|
829
919
|
# one or more ancestor's don't exist - return an empty list result
|
830
|
-
# TODO(pdames): Raise and catch a more explicit AncestorNotFound
|
831
|
-
# error type here.
|
832
920
|
return ListResult.empty()
|
833
921
|
try:
|
834
922
|
locator = (
|
@@ -852,11 +940,11 @@ class Metafile(dict):
|
|
852
940
|
if locator
|
853
941
|
else None
|
854
942
|
)
|
855
|
-
except
|
856
|
-
# the metafile
|
943
|
+
except ObjectNotFoundError:
|
944
|
+
# the metafile does not exist
|
857
945
|
return ListResult.empty()
|
858
946
|
if not immutable_id:
|
859
|
-
# the metafile
|
947
|
+
# the metafile has been deleted
|
860
948
|
return ListResult.empty()
|
861
949
|
revision_dir_path = posixpath.join(
|
862
950
|
parent_root,
|
@@ -1000,7 +1088,7 @@ class Metafile(dict):
|
|
1000
1088
|
Resolves the immutable metafile ID for the given locator.
|
1001
1089
|
|
1002
1090
|
:return: Immutable ID read from mapping file. None if no mapping exists.
|
1003
|
-
:raises:
|
1091
|
+
:raises: ObjectNotFoundError if the id is not found.
|
1004
1092
|
"""
|
1005
1093
|
metafile_id = locator.name.immutable_id
|
1006
1094
|
if not metafile_id:
|
@@ -1023,12 +1111,10 @@ class Metafile(dict):
|
|
1023
1111
|
if not mri.exists():
|
1024
1112
|
return None
|
1025
1113
|
if mri.txn_op_type == TransactionOperationType.DELETE:
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
)
|
1031
|
-
raise ValueError(err_msg)
|
1114
|
+
# Return None for DELETE revisions to allow graceful handling
|
1115
|
+
# of renamed objects. The from_serializable mechanism can then
|
1116
|
+
# restore the correct locator from parent metadata.
|
1117
|
+
return None
|
1032
1118
|
metafile_id = posixpath.splitext(mri.path)[1][1:]
|
1033
1119
|
return metafile_id
|
1034
1120
|
|
@@ -1066,7 +1152,7 @@ class Metafile(dict):
|
|
1066
1152
|
)
|
1067
1153
|
if not ancestor_id:
|
1068
1154
|
err_msg = f"Ancestor does not exist: {parent_locator}."
|
1069
|
-
raise
|
1155
|
+
raise ObjectNotFoundError(err_msg)
|
1070
1156
|
metafile_root = posixpath.join(
|
1071
1157
|
metafile_root,
|
1072
1158
|
ancestor_id,
|
@@ -1077,7 +1163,7 @@ class Metafile(dict):
|
|
1077
1163
|
filesystem=filesystem,
|
1078
1164
|
)
|
1079
1165
|
except FileNotFoundError:
|
1080
|
-
raise
|
1166
|
+
raise ObjectNotFoundError(
|
1081
1167
|
f"Ancestor {parent_locator} does not exist at: " f"{metafile_root}"
|
1082
1168
|
)
|
1083
1169
|
ancestor_ids.append(ancestor_id)
|
@@ -1093,7 +1179,7 @@ class Metafile(dict):
|
|
1093
1179
|
current_txn_start_time: int,
|
1094
1180
|
current_txn_id: str,
|
1095
1181
|
filesystem: pyarrow.fs.FileSystem,
|
1096
|
-
) ->
|
1182
|
+
) -> str:
|
1097
1183
|
name_resolution_dir_path = locator.path(parent_obj_path)
|
1098
1184
|
# TODO(pdames): Don't write updated revisions with the same mapping as
|
1099
1185
|
# the latest revision.
|
@@ -1111,6 +1197,7 @@ class Metafile(dict):
|
|
1111
1197
|
with filesystem.open_output_stream(revision_file_path):
|
1112
1198
|
pass # Just create an empty ID file to map to the locator
|
1113
1199
|
current_txn_op.append_locator_write_path(revision_file_path)
|
1200
|
+
return revision_file_path
|
1114
1201
|
|
1115
1202
|
def _write_metafile_revision(
|
1116
1203
|
self,
|
@@ -1121,7 +1208,7 @@ class Metafile(dict):
|
|
1121
1208
|
current_txn_start_time: int,
|
1122
1209
|
current_txn_id: str,
|
1123
1210
|
filesystem: pyarrow.fs.FileSystem,
|
1124
|
-
) ->
|
1211
|
+
) -> str:
|
1125
1212
|
mri = MetafileRevisionInfo.new_revision(
|
1126
1213
|
revision_dir_path=revision_dir_path,
|
1127
1214
|
current_txn_op_type=current_txn_op_type,
|
@@ -1135,6 +1222,7 @@ class Metafile(dict):
|
|
1135
1222
|
filesystem=filesystem,
|
1136
1223
|
)
|
1137
1224
|
current_txn_op.append_metafile_write_path(mri.path)
|
1225
|
+
return mri.path
|
1138
1226
|
|
1139
1227
|
def _write_metafile_revisions(
|
1140
1228
|
self,
|
@@ -1144,12 +1232,14 @@ class Metafile(dict):
|
|
1144
1232
|
current_txn_start_time: int,
|
1145
1233
|
current_txn_id: str,
|
1146
1234
|
filesystem: pyarrow.fs.FileSystem,
|
1147
|
-
) ->
|
1235
|
+
) -> Tuple[List[str], List[str]]:
|
1148
1236
|
"""
|
1149
1237
|
Generates the fully qualified paths required to write this metafile as
|
1150
1238
|
part of the given transaction. All paths returned will be based in the
|
1151
1239
|
given root directory.
|
1152
1240
|
"""
|
1241
|
+
metafile_write_paths = []
|
1242
|
+
locator_write_paths = []
|
1153
1243
|
parent_obj_path = self.parent_root_path(
|
1154
1244
|
catalog_root=catalog_root,
|
1155
1245
|
current_txn_start_time=current_txn_start_time,
|
@@ -1177,36 +1267,47 @@ class Metafile(dict):
|
|
1177
1267
|
if mutable_dest_locator:
|
1178
1268
|
# the locator name is mutable, so we need to persist a mapping
|
1179
1269
|
# from the locator back to its immutable metafile ID
|
1180
|
-
if
|
1181
|
-
|
1182
|
-
|
1183
|
-
and
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1270
|
+
if current_txn_op.type == TransactionOperationType.UPDATE:
|
1271
|
+
# mutable locator updates are used to either transition
|
1272
|
+
# staged streams/partitions (which have no locator alias) to
|
1273
|
+
# committed (and create the locator alias) or to rename an
|
1274
|
+
# existing mutable locator
|
1275
|
+
if mutable_src_locator != mutable_dest_locator:
|
1276
|
+
if mutable_src_locator is not None:
|
1277
|
+
# this update includes a rename
|
1278
|
+
# mark the source metafile mapping as deleted
|
1279
|
+
locator_write_path = (
|
1280
|
+
current_txn_op.src_metafile._write_locator_to_id_map_file(
|
1281
|
+
locator=mutable_src_locator,
|
1282
|
+
success_txn_log_dir=success_txn_log_dir,
|
1283
|
+
parent_obj_path=parent_obj_path,
|
1284
|
+
current_txn_op=current_txn_op,
|
1285
|
+
current_txn_op_type=TransactionOperationType.DELETE,
|
1286
|
+
current_txn_start_time=current_txn_start_time,
|
1287
|
+
current_txn_id=current_txn_id,
|
1288
|
+
filesystem=filesystem,
|
1289
|
+
)
|
1290
|
+
)
|
1291
|
+
locator_write_paths.append(locator_write_path)
|
1292
|
+
# mark the dest metafile mapping as created
|
1293
|
+
locator_write_path = self._write_locator_to_id_map_file(
|
1294
|
+
locator=mutable_dest_locator,
|
1295
|
+
success_txn_log_dir=success_txn_log_dir,
|
1296
|
+
parent_obj_path=parent_obj_path,
|
1297
|
+
current_txn_op=current_txn_op,
|
1298
|
+
current_txn_op_type=TransactionOperationType.CREATE,
|
1299
|
+
current_txn_start_time=current_txn_start_time,
|
1300
|
+
current_txn_id=current_txn_id,
|
1301
|
+
filesystem=filesystem,
|
1302
|
+
)
|
1303
|
+
locator_write_paths.append(locator_write_path)
|
1304
|
+
# else this is a mutable locator no-op update - do nothing
|
1208
1305
|
else:
|
1209
|
-
|
1306
|
+
# this is either a create/delete operation or a
|
1307
|
+
# replace operation that is part of an overwrite/restate
|
1308
|
+
# transaction (e.g. committing a staged replacement for a
|
1309
|
+
# previously committed stream/partition).
|
1310
|
+
locator_write_path = self._write_locator_to_id_map_file(
|
1210
1311
|
locator=mutable_dest_locator,
|
1211
1312
|
success_txn_log_dir=success_txn_log_dir,
|
1212
1313
|
parent_obj_path=parent_obj_path,
|
@@ -1216,13 +1317,15 @@ class Metafile(dict):
|
|
1216
1317
|
current_txn_id=current_txn_id,
|
1217
1318
|
filesystem=filesystem,
|
1218
1319
|
)
|
1320
|
+
locator_write_paths.append(locator_write_path)
|
1219
1321
|
metafile_revision_dir_path = posixpath.join(
|
1220
1322
|
parent_obj_path,
|
1221
1323
|
self.id,
|
1222
1324
|
REVISION_DIR_NAME,
|
1223
1325
|
)
|
1224
1326
|
if (
|
1225
|
-
current_txn_op.type
|
1327
|
+
current_txn_op.type
|
1328
|
+
in [TransactionOperationType.UPDATE, TransactionOperationType.REPLACE]
|
1226
1329
|
and current_txn_op.src_metafile.id != current_txn_op.dest_metafile.id
|
1227
1330
|
):
|
1228
1331
|
# TODO(pdames): block operations including both a rename & replace?
|
@@ -1233,7 +1336,7 @@ class Metafile(dict):
|
|
1233
1336
|
current_txn_op.src_metafile.id,
|
1234
1337
|
REVISION_DIR_NAME,
|
1235
1338
|
)
|
1236
|
-
self._write_metafile_revision(
|
1339
|
+
metafile_write_path = self._write_metafile_revision(
|
1237
1340
|
success_txn_log_dir=success_txn_log_dir,
|
1238
1341
|
revision_dir_path=src_metafile_revision_dir_path,
|
1239
1342
|
current_txn_op=current_txn_op,
|
@@ -1242,9 +1345,10 @@ class Metafile(dict):
|
|
1242
1345
|
current_txn_id=current_txn_id,
|
1243
1346
|
filesystem=filesystem,
|
1244
1347
|
)
|
1348
|
+
metafile_write_paths.append(metafile_write_path)
|
1245
1349
|
try:
|
1246
1350
|
# mark the dest metafile as created
|
1247
|
-
self._write_metafile_revision(
|
1351
|
+
metafile_write_path = self._write_metafile_revision(
|
1248
1352
|
success_txn_log_dir=success_txn_log_dir,
|
1249
1353
|
revision_dir_path=metafile_revision_dir_path,
|
1250
1354
|
current_txn_op=current_txn_op,
|
@@ -1253,14 +1357,13 @@ class Metafile(dict):
|
|
1253
1357
|
current_txn_id=current_txn_id,
|
1254
1358
|
filesystem=filesystem,
|
1255
1359
|
)
|
1256
|
-
|
1257
|
-
|
1258
|
-
if "already exists" not in str(e):
|
1259
|
-
raise e
|
1360
|
+
metafile_write_paths.append(metafile_write_path)
|
1361
|
+
except ObjectAlreadyExistsError:
|
1260
1362
|
# src metafile is being replaced by an existing dest metafile
|
1363
|
+
pass
|
1261
1364
|
|
1262
1365
|
else:
|
1263
|
-
self._write_metafile_revision(
|
1366
|
+
metafile_write_path = self._write_metafile_revision(
|
1264
1367
|
success_txn_log_dir=success_txn_log_dir,
|
1265
1368
|
revision_dir_path=metafile_revision_dir_path,
|
1266
1369
|
current_txn_op=current_txn_op,
|
@@ -1269,6 +1372,8 @@ class Metafile(dict):
|
|
1269
1372
|
current_txn_id=current_txn_id,
|
1270
1373
|
filesystem=filesystem,
|
1271
1374
|
)
|
1375
|
+
metafile_write_paths.append(metafile_write_path)
|
1376
|
+
return metafile_write_paths, locator_write_paths
|
1272
1377
|
|
1273
1378
|
def _list_metafiles(
|
1274
1379
|
self,
|
@@ -1300,7 +1405,7 @@ class Metafile(dict):
|
|
1300
1405
|
current_txn_id=current_txn_id,
|
1301
1406
|
ignore_missing_revision=True,
|
1302
1407
|
)
|
1303
|
-
if mri.exists():
|
1408
|
+
if mri.exists() and mri.txn_op_type != TransactionOperationType.DELETE:
|
1304
1409
|
item = self.read(
|
1305
1410
|
path=mri.path,
|
1306
1411
|
filesystem=filesystem,
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, Optional, List
|
|
6
6
|
from deltacat.storage.model.metafile import Metafile
|
7
7
|
from deltacat.storage.model.locator import Locator, LocatorName
|
8
8
|
|
9
|
-
NamespaceProperties =
|
9
|
+
NamespaceProperties = Dict[str, Any]
|
10
10
|
|
11
11
|
|
12
12
|
class Namespace(Metafile):
|
@@ -46,6 +46,13 @@ class Namespace(Metafile):
|
|
46
46
|
def properties(self, properties: Optional[NamespaceProperties]) -> None:
|
47
47
|
self["properties"] = properties
|
48
48
|
|
49
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
50
|
+
return (
|
51
|
+
f"dc://{catalog_name}/{self.namespace}/"
|
52
|
+
if catalog_name
|
53
|
+
else f"namespace://{self.namespace}/"
|
54
|
+
)
|
55
|
+
|
49
56
|
|
50
57
|
class NamespaceLocatorName(LocatorName):
|
51
58
|
def __init__(self, locator: NamespaceLocator):
|