deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +117 -18
- deltacat/api.py +536 -126
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +4 -2
- deltacat/benchmarking/conftest.py +1 -19
- deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
- deltacat/catalog/__init__.py +64 -5
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2444 -282
- deltacat/catalog/model/catalog.py +208 -113
- deltacat/catalog/model/properties.py +63 -24
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/constants.py +5 -0
- deltacat/compute/converter/converter_session.py +207 -52
- deltacat/compute/converter/model/convert_input.py +43 -16
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +64 -19
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +193 -65
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +230 -75
- deltacat/compute/converter/steps/dedupe.py +46 -12
- deltacat/compute/converter/utils/convert_task_options.py +66 -22
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +13 -8
- deltacat/compute/converter/utils/io.py +173 -13
- deltacat/compute/converter/utils/s3u.py +42 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +49 -6
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +10 -0
- deltacat/examples/basic_logging.py +6 -6
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
- deltacat/examples/hello_world.py +4 -2
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
- deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
- deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
- deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
- deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
- deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
- deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
- deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/storage/__init__.py +22 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +231 -6
- deltacat/storage/model/metafile.py +224 -119
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/scan/push_down.py +32 -5
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/shard.py +6 -2
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +53 -29
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +103 -106
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +697 -349
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
- deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +3 -1
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1036 -11
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +345 -37
- deltacat/types/tables.py +2344 -46
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +3 -1
- deltacat/utils/filesystem.py +139 -9
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1373 -192
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/catalog/iceberg/__init__.py +0 -4
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/examples/common/fixtures.py +0 -15
- deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
- deltacat/storage/rivulet/__init__.py +0 -11
- deltacat/storage/rivulet/feather/__init__.py +0 -5
- deltacat/storage/rivulet/parquet/__init__.py +0 -5
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.dist-info/METADATA +0 -65
- deltacat-2.0.dist-info/RECORD +0 -347
- /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
- /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
- /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
- /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
- /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
- /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
- /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
- /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
- /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
- /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
- /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
from typing import List, Optional, Union, Dict, Any
|
2
|
+
|
3
|
+
from ray.data import Dataset as RayDataset
|
4
|
+
from ray.data import read_datasource
|
5
|
+
|
6
|
+
from deltacat.io.datasource.deltacat_datasource import DeltaCatDatasource
|
7
|
+
from deltacat.io.dataset.deltacat_dataset import DeltaCatDataset
|
8
|
+
from deltacat.utils.common import ReadKwargsProvider
|
9
|
+
from deltacat.utils.url import DeltaCatUrl, DeltaCatUrlReader
|
10
|
+
from deltacat.io.datasource.deltacat_datasource import DeltacatReadType
|
11
|
+
|
12
|
+
|
13
|
+
class EmptyReadKwargsProvider(ReadKwargsProvider):
|
14
|
+
def _get_kwargs(
|
15
|
+
self,
|
16
|
+
datasource_type: str,
|
17
|
+
kwargs: Dict[str, Any],
|
18
|
+
) -> Dict[str, Any]:
|
19
|
+
return {}
|
20
|
+
|
21
|
+
|
22
|
+
def read_deltacat(
|
23
|
+
urls: Union[DeltaCatUrl, List[DeltaCatUrl]],
|
24
|
+
*,
|
25
|
+
deltacat_read_type: DeltacatReadType = DeltacatReadType.DATA,
|
26
|
+
timestamp_as_of: Optional[int] = None,
|
27
|
+
merge_on_read: Optional[bool] = False,
|
28
|
+
read_kwargs_provider: Optional[ReadKwargsProvider] = EmptyReadKwargsProvider(),
|
29
|
+
) -> DeltaCatDataset:
|
30
|
+
"""Reads the given DeltaCAT URLs into a Ray Dataset. DeltaCAT URLs can
|
31
|
+
either reference objects registered in a DeltaCAT catalog, or unregistered
|
32
|
+
external objects that are readable into a Ray Dataset.
|
33
|
+
|
34
|
+
Unless `metadata_only` is `True`, all reads of registered DeltaCAT catalog
|
35
|
+
object data must resolve to a single table version.
|
36
|
+
|
37
|
+
When reading unregistered external objects, all additional keyword
|
38
|
+
arguments specified are passed into the Ray Datasource resolved for the
|
39
|
+
given DeltaCAT URLs.
|
40
|
+
|
41
|
+
Examples:
|
42
|
+
>>> # Read the latest active DeltaCAT table version:
|
43
|
+
>>> import deltacat as dc
|
44
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table")
|
45
|
+
>>> # If `my_catalog is the default catalog, this is equivalent to:
|
46
|
+
>>> dc.io.read_deltacat("namespace://my_namespace/my_table")
|
47
|
+
>>> # If `my_namespace` is the default namespace, this is equivalent to:
|
48
|
+
>>> dc.io.read_deltacat("table://my_table")
|
49
|
+
|
50
|
+
>>> # Read metadata from all partitions and deltas of the latest active
|
51
|
+
>>> # DeltaCAT table version:
|
52
|
+
>>> import deltacat as dc
|
53
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table", metadata_only=True)
|
54
|
+
>>> # Since "default" always resolves to the latest active table version.
|
55
|
+
>>> # This is equivalent to:
|
56
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table/default", metadata_only=True)
|
57
|
+
|
58
|
+
>>> # Read only the latest active table version's top-level metadata:
|
59
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table/default", metadata_only=True, recursive=False)
|
60
|
+
|
61
|
+
>>> # Read only top-level metadata from a DeltaCAT table:
|
62
|
+
>>> import deltacat as dc
|
63
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table", metadata_only=True, recursive=False)
|
64
|
+
|
65
|
+
>>> # Read top-level table metadata from all table versions:
|
66
|
+
>>> import deltacat as dc
|
67
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table/*", metadata_only=True, recursive=False)
|
68
|
+
|
69
|
+
>>> # Read metadata from all partitions and deltas of all table versions:
|
70
|
+
>>> import deltacat as dc
|
71
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table/*", metadata_only=True)
|
72
|
+
|
73
|
+
>>> # Read metadata from all tables and table versions of the namespace:
|
74
|
+
>>> import deltacat as dc
|
75
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/*", metadata_only=True)
|
76
|
+
|
77
|
+
>>> # Read metadata from the latest active table version for each
|
78
|
+
>>> # table in the namespace:
|
79
|
+
>>> import deltacat as dc
|
80
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace", metadata_only=True)
|
81
|
+
|
82
|
+
>>> # Read metadata from the latest active table version for each
|
83
|
+
>>> # table in the namespace:
|
84
|
+
>>> import deltacat as dc
|
85
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace", metadata_only=True)
|
86
|
+
|
87
|
+
>>> # Read metadata from the latest active table version for each
|
88
|
+
>>> # table in the catalog's default namespace:
|
89
|
+
>>> import deltacat as dc
|
90
|
+
>>> dc.io.read_deltacat("dc://my_catalog", metadata_only=True)
|
91
|
+
|
92
|
+
>>> # Read metadata from all table versions for each table in each
|
93
|
+
>>> # catalog namespace:
|
94
|
+
>>> import deltacat as dc
|
95
|
+
>>> dc.io.read_deltacat("dc://my_catalog/*", metadata_only=True)
|
96
|
+
|
97
|
+
>>> # Read the Iceberg stream of the latest active DeltaCAT table version,
|
98
|
+
>>> import deltacat as dc
|
99
|
+
>>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table/default/iceberg")
|
100
|
+
>>> # Or, if `my_catalog` is the default catalog, this is equivalent to:
|
101
|
+
>>> dc.io.read_deltacat("namespace://my_namespace/my_table/default/iceberg")
|
102
|
+
>>> # Or, if `my_namespace` is the default namespace, this is equivalent to:
|
103
|
+
>>> dc.io.read_deltacat("table://my_table/default/iceberg")
|
104
|
+
|
105
|
+
>>> # Read an external unregistered Iceberg table `my_db.my_table`:
|
106
|
+
>>> import deltacat as dc
|
107
|
+
>>> dc.io.read_deltacat("iceberg://my_db.my_table")
|
108
|
+
|
109
|
+
>>> # Read an external unregistered audio file from /my/audio.mp4:
|
110
|
+
>>> import deltacat as dc
|
111
|
+
>>> dc.io.read_deltacat("audio+file:///my/audio.mp4")
|
112
|
+
|
113
|
+
>>> # Read an external unregistered audio file from s3://my/audio.mp4:
|
114
|
+
>>> import deltacat as dc
|
115
|
+
>>> dc.io.read_deltacat("audio+s3://my/audio.mp4")
|
116
|
+
|
117
|
+
Args:
|
118
|
+
urls: The DeltaCAT URLs to read.
|
119
|
+
deltacat_read_type: If METADATA, reads only DeltaCAT metadata for the
|
120
|
+
given URL and skips both recursive metadata expansion and reads
|
121
|
+
of the underlying data files. If METADATA_RECURSIVE then recursively
|
122
|
+
expands child metadata but does not read underlying data files. If
|
123
|
+
DATA then recursively expands child metadata to discover and read
|
124
|
+
all underlying data files.
|
125
|
+
timestamp_as_of: Reads a historic snapshot of the given paths as-of the
|
126
|
+
given millisecond-precision epoch timestamp (only used when reading
|
127
|
+
registered DeltaCAT catalog objects).
|
128
|
+
merge_on_read: If True, merges all unmaterialized inserts, updates,
|
129
|
+
and deletes in the registered DeltaCAT table version being read. Only
|
130
|
+
applicable if `metadata_only` is False.
|
131
|
+
ray_remote_args: kwargs passed to `ray.remote` in the read tasks.
|
132
|
+
read_kwargs_provider: Resolves
|
133
|
+
:class:`~deltacat.types.media.DatasourceType` string keys to
|
134
|
+
kwarg dictionaries to pass to the resolved
|
135
|
+
:class:`~ray.data.Datasource` implementation for each distinct
|
136
|
+
DeltaCAT URL type.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
DeltacatDataset holding Arrow records read from the specified URL.
|
140
|
+
"""
|
141
|
+
# TODO(pdames): The below implementation serializes reads of each URL and
|
142
|
+
# then unions their respective datasets together. While this was an easy
|
143
|
+
# starting point to implement, a more efficient implementation should push
|
144
|
+
# all URLs down into `DeltacatDatasource` to parallelize all reads
|
145
|
+
# (i.e., by returning the `ReadTask` for all datasources in
|
146
|
+
# `get_read_tasks()` and estimating the corresponding memory size across
|
147
|
+
# all datasources in `estimate_inmemory_data_size()`.
|
148
|
+
dataset: RayDataset = None
|
149
|
+
for url in urls:
|
150
|
+
if not url.is_deltacat_catalog_url():
|
151
|
+
# this URL points to an external unregistered Ray Datasource
|
152
|
+
# TODO(pdames): Honor metadata only reads of external datasources
|
153
|
+
# by registering only file paths & metadata in delta manifests.
|
154
|
+
reader = DeltaCatUrlReader(url)
|
155
|
+
next_ds = reader.read(read_kwargs_provider(url.datastore_type, {}))
|
156
|
+
else:
|
157
|
+
# this URL points to a registered DeltaCAT object
|
158
|
+
next_ds = read_datasource(
|
159
|
+
DeltaCatDatasource(
|
160
|
+
url=url,
|
161
|
+
deltacat_read_type=deltacat_read_type,
|
162
|
+
timestamp_as_of=timestamp_as_of,
|
163
|
+
merge_on_read=merge_on_read,
|
164
|
+
read_kwargs_provider=read_kwargs_provider,
|
165
|
+
)
|
166
|
+
)
|
167
|
+
# union the last dataset read into the result set
|
168
|
+
if not dataset:
|
169
|
+
dataset = next_ds
|
170
|
+
else:
|
171
|
+
dataset.union(next_ds)
|
172
|
+
return DeltaCatDataset.from_dataset(dataset)
|
deltacat/storage/__init__.py
CHANGED
@@ -20,6 +20,9 @@ from deltacat.storage.model.metafile import (
|
|
20
20
|
from deltacat.storage.model.transaction import (
|
21
21
|
TransactionOperation,
|
22
22
|
Transaction,
|
23
|
+
read_transaction,
|
24
|
+
transactions,
|
25
|
+
transaction,
|
23
26
|
)
|
24
27
|
from deltacat.storage.model.namespace import (
|
25
28
|
Namespace,
|
@@ -31,6 +34,7 @@ from deltacat.storage.model.partition import (
|
|
31
34
|
PartitionLocator,
|
32
35
|
PartitionLocatorAlias,
|
33
36
|
PartitionKey,
|
37
|
+
PartitionKeyList,
|
34
38
|
PartitionScheme,
|
35
39
|
PartitionSchemeList,
|
36
40
|
PartitionValues,
|
@@ -43,6 +47,9 @@ from deltacat.storage.model.schema import (
|
|
43
47
|
NestedFieldName,
|
44
48
|
Schema,
|
45
49
|
SchemaList,
|
50
|
+
SchemaUpdate,
|
51
|
+
SchemaUpdateOperation,
|
52
|
+
SchemaUpdateOperations,
|
46
53
|
)
|
47
54
|
from deltacat.storage.model.stream import (
|
48
55
|
Stream,
|
@@ -75,9 +82,11 @@ from deltacat.storage.model.transform import (
|
|
75
82
|
MonthTransform,
|
76
83
|
YearTransform,
|
77
84
|
TruncateTransform,
|
85
|
+
TruncateStrategy,
|
78
86
|
)
|
79
87
|
from deltacat.storage.model.types import (
|
80
88
|
CommitState,
|
89
|
+
Dataset,
|
81
90
|
DeltaType,
|
82
91
|
DistributedDataset,
|
83
92
|
LifecycleState,
|
@@ -87,11 +96,12 @@ from deltacat.storage.model.types import (
|
|
87
96
|
SchemaConsistencyType,
|
88
97
|
StreamFormat,
|
89
98
|
SortOrder,
|
90
|
-
TransactionType,
|
91
99
|
TransactionOperationType,
|
100
|
+
TransactionStatus,
|
92
101
|
)
|
93
102
|
from deltacat.storage.model.sort_key import (
|
94
103
|
SortKey,
|
104
|
+
SortKeyList,
|
95
105
|
SortScheme,
|
96
106
|
SortSchemeList,
|
97
107
|
)
|
@@ -102,6 +112,7 @@ __all__ = [
|
|
102
112
|
"BucketTransform",
|
103
113
|
"BucketTransformParameters",
|
104
114
|
"CommitState",
|
115
|
+
"Dataset",
|
105
116
|
"DayTransform",
|
106
117
|
"Delta",
|
107
118
|
"DeltaLocator",
|
@@ -136,6 +147,7 @@ __all__ = [
|
|
136
147
|
"NullOrder",
|
137
148
|
"Partition",
|
138
149
|
"PartitionKey",
|
150
|
+
"PartitionKeyList",
|
139
151
|
"PartitionLocator",
|
140
152
|
"PartitionLocatorAlias",
|
141
153
|
"PartitionScheme",
|
@@ -143,8 +155,12 @@ __all__ = [
|
|
143
155
|
"PartitionValues",
|
144
156
|
"Schema",
|
145
157
|
"SchemaList",
|
158
|
+
"SchemaUpdate",
|
159
|
+
"SchemaUpdateOperation",
|
160
|
+
"SchemaUpdateOperations",
|
146
161
|
"SchemaConsistencyType",
|
147
162
|
"SortKey",
|
163
|
+
"SortKeyList",
|
148
164
|
"SortOrder",
|
149
165
|
"SortScheme",
|
150
166
|
"SortSchemeList",
|
@@ -161,13 +177,17 @@ __all__ = [
|
|
161
177
|
"Transaction",
|
162
178
|
"TransactionOperation",
|
163
179
|
"TransactionOperationType",
|
164
|
-
"
|
180
|
+
"TransactionStatus",
|
165
181
|
"Transform",
|
166
182
|
"TransformName",
|
167
183
|
"TransformParameters",
|
168
184
|
"TruncateTransform",
|
169
185
|
"TruncateTransformParameters",
|
186
|
+
"TruncateStrategy",
|
170
187
|
"UnknownTransform",
|
171
188
|
"VoidTransform",
|
172
189
|
"YearTransform",
|
190
|
+
"read_transaction",
|
191
|
+
"transactions",
|
192
|
+
"transaction",
|
173
193
|
]
|
deltacat/storage/interface.py
CHANGED
@@ -2,6 +2,7 @@ from typing import Any, Callable, Dict, List, Optional, Union, Tuple
|
|
2
2
|
|
3
3
|
from deltacat.storage import (
|
4
4
|
EntryParams,
|
5
|
+
EntryType,
|
5
6
|
Delta,
|
6
7
|
DeltaLocator,
|
7
8
|
DeltaProperties,
|
@@ -30,11 +31,12 @@ from deltacat.storage import (
|
|
30
31
|
TableVersionProperties,
|
31
32
|
)
|
32
33
|
from deltacat.storage.model.manifest import Manifest
|
34
|
+
from deltacat.storage.model.partition import UNKNOWN_PARTITION_ID
|
33
35
|
from deltacat.types.media import (
|
34
36
|
ContentType,
|
35
37
|
DistributedDatasetType,
|
36
38
|
StorageType,
|
37
|
-
|
39
|
+
DatasetType,
|
38
40
|
)
|
39
41
|
from deltacat.utils.common import ReadKwargsProvider
|
40
42
|
|
@@ -205,7 +207,7 @@ def get_latest_delta(
|
|
205
207
|
|
206
208
|
def download_delta(
|
207
209
|
delta_like: Union[Delta, DeltaLocator],
|
208
|
-
table_type:
|
210
|
+
table_type: DatasetType = DatasetType.PYARROW,
|
209
211
|
storage_type: StorageType = StorageType.DISTRIBUTED,
|
210
212
|
max_parallelism: Optional[int] = None,
|
211
213
|
columns: Optional[List[str]] = None,
|
@@ -216,7 +218,7 @@ def download_delta(
|
|
216
218
|
**kwargs,
|
217
219
|
) -> Union[LocalDataset, DistributedDataset]: # type: ignore
|
218
220
|
"""
|
219
|
-
|
221
|
+
Reads the given delta or delta locator into either a list of
|
220
222
|
tables resident in the local node's memory, or into a dataset distributed
|
221
223
|
across this Ray cluster's object store memory. Ordered table N of a local
|
222
224
|
table list, or ordered block N of a distributed dataset, always contain
|
@@ -228,19 +230,19 @@ def download_delta(
|
|
228
230
|
def download_delta_manifest_entry(
|
229
231
|
delta_like: Union[Delta, DeltaLocator],
|
230
232
|
entry_index: int,
|
231
|
-
table_type:
|
233
|
+
table_type: DatasetType = DatasetType.PYARROW,
|
232
234
|
columns: Optional[List[str]] = None,
|
233
235
|
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
234
236
|
*args,
|
235
237
|
**kwargs,
|
236
238
|
) -> LocalTable:
|
237
239
|
"""
|
238
|
-
|
240
|
+
Reads a single manifest entry into the specified table type for the
|
239
241
|
given delta or delta locator. If a delta is provided with a non-empty
|
240
|
-
manifest, then the entry is
|
241
|
-
manifest is first retrieved then the given entry index
|
242
|
+
manifest, then the entry is read from this manifest. Otherwise, the
|
243
|
+
manifest is first retrieved then the given entry index read.
|
242
244
|
|
243
|
-
NOTE: The entry will be
|
245
|
+
NOTE: The entry will be read in the current node's memory.
|
244
246
|
"""
|
245
247
|
raise NotImplementedError("download_delta_manifest_entry not implemented")
|
246
248
|
|
@@ -288,9 +290,9 @@ def create_table_version(
|
|
288
290
|
namespace: str,
|
289
291
|
table_name: str,
|
290
292
|
table_version: Optional[str] = None,
|
293
|
+
lifecycle_state: Optional[LifecycleState] = LifecycleState.CREATED,
|
291
294
|
schema: Optional[Schema] = None,
|
292
295
|
partition_scheme: Optional[PartitionScheme] = None,
|
293
|
-
# TODO(pdames): rename to `sort_scheme`
|
294
296
|
sort_keys: Optional[SortScheme] = None,
|
295
297
|
table_version_description: Optional[str] = None,
|
296
298
|
table_version_properties: Optional[TableVersionProperties] = None,
|
@@ -299,9 +301,9 @@ def create_table_version(
|
|
299
301
|
supported_content_types: Optional[List[ContentType]] = None,
|
300
302
|
*args,
|
301
303
|
**kwargs,
|
302
|
-
) -> Tuple[
|
304
|
+
) -> Tuple[Table, TableVersion, Stream]:
|
303
305
|
"""
|
304
|
-
Create a table version with
|
306
|
+
Create a table version with the given or CREATED lifecycle state and an empty delta
|
305
307
|
stream. Table versions may be schemaless and unpartitioned to improve write
|
306
308
|
performance, or have their writes governed by a schema and partition scheme
|
307
309
|
to improve data consistency and read performance.
|
@@ -314,6 +316,20 @@ def create_table_version(
|
|
314
316
|
raise NotImplementedError("create_table_version not implemented")
|
315
317
|
|
316
318
|
|
319
|
+
def create_table(
|
320
|
+
namespace: str,
|
321
|
+
table_name: str,
|
322
|
+
description: Optional[str] = None,
|
323
|
+
properties: Optional[TableProperties] = None,
|
324
|
+
*args,
|
325
|
+
**kwargs,
|
326
|
+
) -> Table:
|
327
|
+
"""
|
328
|
+
Create a new table. Raises an error if the given table already exists.
|
329
|
+
"""
|
330
|
+
raise NotImplementedError("create_table not implemented")
|
331
|
+
|
332
|
+
|
317
333
|
def update_table(
|
318
334
|
namespace: str,
|
319
335
|
table_name: str,
|
@@ -322,7 +338,7 @@ def update_table(
|
|
322
338
|
new_table_name: Optional[str] = None,
|
323
339
|
*args,
|
324
340
|
**kwargs,
|
325
|
-
) ->
|
341
|
+
) -> Table:
|
326
342
|
"""
|
327
343
|
Update table metadata describing the table versions it contains. By default,
|
328
344
|
a table's properties are empty, and its description is equal to that given
|
@@ -345,7 +361,7 @@ def update_table_version(
|
|
345
361
|
sort_keys: Optional[SortScheme] = None,
|
346
362
|
*args,
|
347
363
|
**kwargs,
|
348
|
-
) ->
|
364
|
+
) -> Tuple[Optional[Table], TableVersion, Optional[Stream]]:
|
349
365
|
"""
|
350
366
|
Update a table version. Notably, updating an unreleased table version's
|
351
367
|
lifecycle state to 'active' telegraphs that it is ready for external
|
@@ -410,15 +426,15 @@ def delete_stream(
|
|
410
426
|
|
411
427
|
def delete_table(
|
412
428
|
namespace: str,
|
413
|
-
|
429
|
+
table_name: str,
|
414
430
|
purge: bool = False,
|
415
431
|
*args,
|
416
432
|
**kwargs,
|
417
433
|
) -> None:
|
418
434
|
"""
|
419
|
-
Drops the given table
|
420
|
-
|
421
|
-
|
435
|
+
Drops the given table from the catalog. If purge is True, also removes
|
436
|
+
all data files associated with the table. Raises an error if the given table
|
437
|
+
does not exist.
|
422
438
|
"""
|
423
439
|
raise NotImplementedError("delete_table not implemented")
|
424
440
|
|
@@ -430,10 +446,9 @@ def delete_namespace(
|
|
430
446
|
**kwargs,
|
431
447
|
) -> None:
|
432
448
|
"""
|
433
|
-
Drops
|
434
|
-
|
435
|
-
|
436
|
-
does not exist.
|
449
|
+
Drops the given namespace from the catalog. If purge is True, also removes
|
450
|
+
all data files associated with the namespace. Raises an error if the given
|
451
|
+
namespace does not exist.
|
437
452
|
"""
|
438
453
|
raise NotImplementedError("drop_namespace not implemented")
|
439
454
|
|
@@ -509,6 +524,7 @@ def stage_partition(
|
|
509
524
|
def commit_partition(
|
510
525
|
partition: Partition,
|
511
526
|
previous_partition: Optional[Partition] = None,
|
527
|
+
expected_previous_partition_id: Optional[str] = UNKNOWN_PARTITION_ID,
|
512
528
|
*args,
|
513
529
|
**kwargs,
|
514
530
|
) -> Partition:
|
@@ -586,23 +602,19 @@ def stage_delta(
|
|
586
602
|
max_records_per_entry: Optional[int] = None,
|
587
603
|
author: Optional[ManifestAuthor] = None,
|
588
604
|
properties: Optional[DeltaProperties] = None,
|
589
|
-
|
605
|
+
table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
590
606
|
content_type: ContentType = ContentType.PARQUET,
|
591
607
|
entry_params: Optional[EntryParams] = None,
|
608
|
+
entry_type: Optional[EntryType] = EntryType.DATA,
|
609
|
+
schema: Optional[Schema] = None,
|
610
|
+
sort_scheme_id: Optional[str] = None,
|
592
611
|
*args,
|
593
612
|
**kwargs,
|
594
613
|
) -> Delta:
|
595
614
|
"""
|
596
|
-
Writes the given
|
615
|
+
Writes the given dataset to 1 or more files. Returns an unregistered
|
597
616
|
delta whose manifest entries point to the uploaded files. Applies any
|
598
617
|
schema consistency policies configured for the parent table version.
|
599
|
-
|
600
|
-
The partition spec will be used to split the input table into
|
601
|
-
multiple files. Optionally, partition_values can be provided to avoid
|
602
|
-
this method to recompute partition_values from the provided data.
|
603
|
-
|
604
|
-
Raises an error if the provided data does not conform to a unique ordered
|
605
|
-
list of partition_values
|
606
618
|
"""
|
607
619
|
raise NotImplementedError("stage_delta not implemented")
|
608
620
|
|
@@ -723,13 +735,23 @@ def table_version_exists(
|
|
723
735
|
|
724
736
|
def can_categorize(e: BaseException, *args, **kwargs) -> bool:
|
725
737
|
"""
|
726
|
-
|
738
|
+
True if the input error originated from the storage
|
739
|
+
implementation layer and can be categorized under an
|
740
|
+
existing DeltaCatError. The "categorize_errors" decorator
|
741
|
+
uses this to determine if an unknown error from the storage
|
742
|
+
implementation can be categorized prior to casting it to
|
743
|
+
the equivalent DeltaCatError via `raise_categorized_error`
|
727
744
|
"""
|
728
745
|
raise NotImplementedError
|
729
746
|
|
730
747
|
|
731
748
|
def raise_categorized_error(e: BaseException, *args, **kwargs):
|
732
749
|
"""
|
733
|
-
|
750
|
+
Casts a categorizable error that originaed from the storage
|
751
|
+
implementation layer to its equivalent DeltaCatError
|
752
|
+
for uniform handling (e.g., determining whether an error
|
753
|
+
is retryable or not) via the "categorize_errors" decorator.
|
754
|
+
Raises an UnclassifiedDeltaCatError from the input exception
|
755
|
+
if the error cannot be categorized.
|
734
756
|
"""
|
735
757
|
raise NotImplementedError
|