deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,23 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
from typing import Any, Dict, Optional
|
4
|
+
from typing import Any, Dict, Optional, List
|
5
5
|
|
6
|
-
from deltacat.storage.model.
|
6
|
+
from deltacat.storage.model.metafile import Metafile
|
7
|
+
from deltacat.storage.model.locator import Locator, LocatorName
|
7
8
|
|
9
|
+
NamespaceProperties = Dict[str, Any]
|
8
10
|
|
9
|
-
|
11
|
+
|
12
|
+
class Namespace(Metafile):
|
10
13
|
@staticmethod
|
11
14
|
def of(
|
12
|
-
locator: Optional[NamespaceLocator],
|
15
|
+
locator: Optional[NamespaceLocator],
|
16
|
+
properties: Optional[NamespaceProperties] = None,
|
13
17
|
) -> Namespace:
|
14
18
|
namespace = Namespace()
|
15
19
|
namespace.locator = locator
|
16
|
-
namespace.
|
20
|
+
namespace.properties = properties
|
17
21
|
return namespace
|
18
22
|
|
19
23
|
@property
|
@@ -35,12 +39,31 @@ class Namespace(dict):
|
|
35
39
|
return None
|
36
40
|
|
37
41
|
@property
|
38
|
-
def
|
39
|
-
return self.get("
|
42
|
+
def properties(self) -> Optional[NamespaceProperties]:
|
43
|
+
return self.get("properties")
|
44
|
+
|
45
|
+
@properties.setter
|
46
|
+
def properties(self, properties: Optional[NamespaceProperties]) -> None:
|
47
|
+
self["properties"] = properties
|
48
|
+
|
49
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
50
|
+
return (
|
51
|
+
f"dc://{catalog_name}/{self.namespace}/"
|
52
|
+
if catalog_name
|
53
|
+
else f"namespace://{self.namespace}/"
|
54
|
+
)
|
55
|
+
|
56
|
+
|
57
|
+
class NamespaceLocatorName(LocatorName):
|
58
|
+
def __init__(self, locator: NamespaceLocator):
|
59
|
+
self.locator = locator
|
60
|
+
|
61
|
+
@property
|
62
|
+
def immutable_id(self) -> Optional[str]:
|
63
|
+
return None
|
40
64
|
|
41
|
-
|
42
|
-
|
43
|
-
self["permissions"] = permissions
|
65
|
+
def parts(self) -> List[str]:
|
66
|
+
return [self.locator.namespace]
|
44
67
|
|
45
68
|
|
46
69
|
class NamespaceLocator(Locator, dict):
|
@@ -50,6 +73,14 @@ class NamespaceLocator(Locator, dict):
|
|
50
73
|
namespace_locator.namespace = namespace
|
51
74
|
return namespace_locator
|
52
75
|
|
76
|
+
@property
|
77
|
+
def name(self) -> NamespaceLocatorName:
|
78
|
+
return NamespaceLocatorName(self)
|
79
|
+
|
80
|
+
@property
|
81
|
+
def parent(self) -> Optional[Locator]:
|
82
|
+
return None
|
83
|
+
|
53
84
|
@property
|
54
85
|
def namespace(self) -> Optional[str]:
|
55
86
|
return self.get("namespace")
|
@@ -57,11 +88,3 @@ class NamespaceLocator(Locator, dict):
|
|
57
88
|
@namespace.setter
|
58
89
|
def namespace(self, namespace: Optional[str]) -> None:
|
59
90
|
self["namespace"] = namespace
|
60
|
-
|
61
|
-
def canonical_string(self) -> str:
|
62
|
-
"""
|
63
|
-
Returns a unique string for the given locator that can be used
|
64
|
-
for equality checks (i.e. two locators are equal if they have
|
65
|
-
the same canonical string).
|
66
|
-
"""
|
67
|
-
return self.namespace
|
@@ -1,39 +1,80 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
|
-
from typing import Any, Dict, List, Optional, Union
|
4
3
|
|
5
|
-
import
|
6
|
-
|
7
|
-
|
4
|
+
import json
|
5
|
+
import posixpath
|
6
|
+
|
7
|
+
import pyarrow
|
8
|
+
|
9
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
10
|
+
|
11
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
12
|
+
from deltacat.constants import TXN_DIR_NAME
|
13
|
+
from deltacat.storage.model.schema import (
|
14
|
+
FieldLocator,
|
15
|
+
)
|
16
|
+
from deltacat.storage.model.locator import (
|
17
|
+
Locator,
|
18
|
+
LocatorName,
|
19
|
+
)
|
8
20
|
from deltacat.storage.model.namespace import NamespaceLocator
|
9
21
|
from deltacat.storage.model.stream import StreamLocator
|
10
|
-
from deltacat.storage.model.table import
|
22
|
+
from deltacat.storage.model.table import (
|
23
|
+
TableLocator,
|
24
|
+
Table,
|
25
|
+
)
|
11
26
|
from deltacat.storage.model.table_version import TableVersionLocator
|
12
|
-
from deltacat.storage.model.
|
27
|
+
from deltacat.storage.model.transform import Transform
|
28
|
+
from deltacat.storage.model.types import (
|
29
|
+
CommitState,
|
30
|
+
StreamFormat,
|
31
|
+
)
|
13
32
|
from deltacat.types.media import ContentType
|
14
33
|
|
34
|
+
if TYPE_CHECKING:
|
35
|
+
from deltacat.compute.compactor import RoundCompletionInfo
|
36
|
+
|
37
|
+
|
38
|
+
"""
|
39
|
+
An ordered list of partition values. Partition values are typically derived
|
40
|
+
by applying one or more transforms to a table's fields.
|
41
|
+
"""
|
42
|
+
PartitionValues = List[Any]
|
43
|
+
|
44
|
+
"""
|
45
|
+
Constants for special partition types.
|
46
|
+
"""
|
47
|
+
UNPARTITIONED_SCHEME_NAME = "unpartitioned_scheme"
|
48
|
+
UNPARTITIONED_SCHEME_ID = "deadbeef-7277-49a4-a195-fdc8ed235d42"
|
49
|
+
UNKNOWN_PARTITION_ID = "deadbeef-2fe7-4557-82c9-da53b1862003" # a partition ID that is assumed to exist but is not known
|
50
|
+
UNSPECIFIED_PARTITION_ID = "deadbeef-5bff-41ea-b82c-e531f445632b" # a partition ID that has been left intentionally unspecified
|
51
|
+
|
15
52
|
|
16
|
-
class Partition(
|
53
|
+
class Partition(Metafile):
|
17
54
|
@staticmethod
|
18
55
|
def of(
|
19
56
|
locator: Optional[PartitionLocator],
|
20
|
-
schema: Optional[Union[pa.Schema, str, bytes]],
|
21
57
|
content_types: Optional[List[ContentType]],
|
22
58
|
state: Optional[CommitState] = None,
|
23
59
|
previous_stream_position: Optional[int] = None,
|
24
60
|
previous_partition_id: Optional[str] = None,
|
25
61
|
stream_position: Optional[int] = None,
|
26
|
-
|
62
|
+
partition_scheme_id: Optional[str] = None,
|
63
|
+
compaction_round_completion_info: Optional[RoundCompletionInfo] = None,
|
27
64
|
) -> Partition:
|
28
65
|
partition = Partition()
|
29
66
|
partition.locator = locator
|
30
|
-
partition.schema = schema
|
31
67
|
partition.content_types = content_types
|
32
68
|
partition.state = state
|
33
69
|
partition.previous_stream_position = previous_stream_position
|
34
70
|
partition.previous_partition_id = previous_partition_id
|
35
71
|
partition.stream_position = stream_position
|
36
|
-
partition.
|
72
|
+
partition.partition_scheme_id = (
|
73
|
+
partition_scheme_id
|
74
|
+
if locator and locator.partition_values
|
75
|
+
else UNPARTITIONED_SCHEME_ID
|
76
|
+
)
|
77
|
+
partition.compaction_round_completion_info = compaction_round_completion_info
|
37
78
|
return partition
|
38
79
|
|
39
80
|
@property
|
@@ -48,12 +89,8 @@ class Partition(dict):
|
|
48
89
|
self["partitionLocator"] = partition_locator
|
49
90
|
|
50
91
|
@property
|
51
|
-
def
|
52
|
-
return
|
53
|
-
|
54
|
-
@schema.setter
|
55
|
-
def schema(self, schema: Optional[Union[pa.Schema, str, bytes]]) -> None:
|
56
|
-
self["schema"] = schema
|
92
|
+
def locator_alias(self) -> Optional[PartitionLocatorAlias]:
|
93
|
+
return PartitionLocatorAlias.of(self)
|
57
94
|
|
58
95
|
@property
|
59
96
|
def content_types(self) -> Optional[List[ContentType]]:
|
@@ -104,12 +141,33 @@ class Partition(dict):
|
|
104
141
|
self["streamPosition"] = stream_position
|
105
142
|
|
106
143
|
@property
|
107
|
-
def
|
108
|
-
return self.get("
|
144
|
+
def partition_scheme_id(self) -> Optional[str]:
|
145
|
+
return self.get("partitionSchemeId")
|
109
146
|
|
110
|
-
@
|
111
|
-
def
|
112
|
-
self["
|
147
|
+
@partition_scheme_id.setter
|
148
|
+
def partition_scheme_id(self, partition_scheme_id: Optional[str]) -> None:
|
149
|
+
self["partitionSchemeId"] = partition_scheme_id
|
150
|
+
|
151
|
+
@property
|
152
|
+
def compaction_round_completion_info(self) -> Optional[RoundCompletionInfo]:
|
153
|
+
"""
|
154
|
+
Round completion info for compaction operations.
|
155
|
+
This replaces the need for separate round completion files.
|
156
|
+
"""
|
157
|
+
val: Dict[str, Any] = self.get("compactionRoundCompletionInfo")
|
158
|
+
if val is not None:
|
159
|
+
# Import here to avoid circular imports
|
160
|
+
from deltacat.compute.compactor import RoundCompletionInfo
|
161
|
+
|
162
|
+
if not isinstance(val, RoundCompletionInfo):
|
163
|
+
self["compactionRoundCompletionInfo"] = val = RoundCompletionInfo(val)
|
164
|
+
return val
|
165
|
+
|
166
|
+
@compaction_round_completion_info.setter
|
167
|
+
def compaction_round_completion_info(
|
168
|
+
self, compaction_round_completion_info: Optional[RoundCompletionInfo]
|
169
|
+
) -> None:
|
170
|
+
self["compactionRoundCompletionInfo"] = compaction_round_completion_info
|
113
171
|
|
114
172
|
@property
|
115
173
|
def partition_id(self) -> Optional[str]:
|
@@ -125,11 +183,19 @@ class Partition(dict):
|
|
125
183
|
return partition_locator.stream_id
|
126
184
|
return None
|
127
185
|
|
186
|
+
@property
|
187
|
+
def stream_format(self) -> Optional[str]:
|
188
|
+
partition_locator = self.locator
|
189
|
+
if partition_locator:
|
190
|
+
return partition_locator.stream_format
|
191
|
+
return None
|
192
|
+
|
128
193
|
@property
|
129
194
|
def partition_values(self) -> Optional[PartitionValues]:
|
130
195
|
partition_locator = self.locator
|
131
196
|
if partition_locator:
|
132
197
|
return partition_locator.partition_values
|
198
|
+
return None
|
133
199
|
|
134
200
|
@property
|
135
201
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
@@ -163,7 +229,7 @@ class Partition(dict):
|
|
163
229
|
def storage_type(self) -> Optional[str]:
|
164
230
|
partition_locator = self.locator
|
165
231
|
if partition_locator:
|
166
|
-
return partition_locator.
|
232
|
+
return partition_locator.stream_format
|
167
233
|
return None
|
168
234
|
|
169
235
|
@property
|
@@ -187,12 +253,78 @@ class Partition(dict):
|
|
187
253
|
return partition_locator.table_version
|
188
254
|
return None
|
189
255
|
|
256
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
257
|
+
return (
|
258
|
+
f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{json.dumps(self.partition_values)}/"
|
259
|
+
if catalog_name
|
260
|
+
else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{json.dumps(self.partition_values)}/"
|
261
|
+
)
|
262
|
+
|
190
263
|
def is_supported_content_type(self, content_type: ContentType) -> bool:
|
191
264
|
supported_content_types = self.content_types
|
192
265
|
return (not supported_content_types) or (
|
193
266
|
content_type in supported_content_types
|
194
267
|
)
|
195
268
|
|
269
|
+
def to_serializable(self) -> Partition:
|
270
|
+
serializable: Partition = Partition.update_for(self)
|
271
|
+
if serializable.table_locator:
|
272
|
+
# replace the mutable table locator
|
273
|
+
serializable.table_version_locator.table_locator = TableLocator.at(
|
274
|
+
namespace=self.id,
|
275
|
+
table_name=self.id,
|
276
|
+
)
|
277
|
+
return serializable
|
278
|
+
|
279
|
+
def from_serializable(
|
280
|
+
self,
|
281
|
+
path: str,
|
282
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
283
|
+
) -> Partition:
|
284
|
+
# restore the table locator from its mapped immutable metafile ID
|
285
|
+
if self.table_locator and self.table_locator.table_name == self.id:
|
286
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
287
|
+
base_metafile_path=path,
|
288
|
+
parent_number=3,
|
289
|
+
)
|
290
|
+
txn_log_dir = posixpath.join(
|
291
|
+
posixpath.dirname(
|
292
|
+
posixpath.dirname(
|
293
|
+
posixpath.dirname(parent_rev_dir_path),
|
294
|
+
)
|
295
|
+
),
|
296
|
+
TXN_DIR_NAME,
|
297
|
+
)
|
298
|
+
table = Table.read(
|
299
|
+
MetafileRevisionInfo.latest_revision(
|
300
|
+
revision_dir_path=parent_rev_dir_path,
|
301
|
+
filesystem=filesystem,
|
302
|
+
success_txn_log_dir=txn_log_dir,
|
303
|
+
).path,
|
304
|
+
filesystem,
|
305
|
+
)
|
306
|
+
self.table_version_locator.table_locator = table.locator
|
307
|
+
return self
|
308
|
+
|
309
|
+
|
310
|
+
class PartitionLocatorName(LocatorName):
|
311
|
+
def __init__(self, locator: PartitionLocator):
|
312
|
+
self.locator = locator
|
313
|
+
|
314
|
+
@property
|
315
|
+
def immutable_id(self) -> Optional[str]:
|
316
|
+
return self.locator.partition_id
|
317
|
+
|
318
|
+
@immutable_id.setter
|
319
|
+
def immutable_id(self, immutable_id: Optional[str]):
|
320
|
+
self.locator.partition_id = immutable_id
|
321
|
+
|
322
|
+
def parts(self) -> List[str]:
|
323
|
+
return [
|
324
|
+
str(self.locator.partition_values),
|
325
|
+
self.locator.partition_id,
|
326
|
+
]
|
327
|
+
|
196
328
|
|
197
329
|
class PartitionLocator(Locator, dict):
|
198
330
|
@staticmethod
|
@@ -223,16 +355,20 @@ class PartitionLocator(Locator, dict):
|
|
223
355
|
table_name: Optional[str],
|
224
356
|
table_version: Optional[str],
|
225
357
|
stream_id: Optional[str],
|
226
|
-
|
358
|
+
stream_format: Optional[StreamFormat],
|
227
359
|
partition_values: Optional[PartitionValues],
|
228
360
|
partition_id: Optional[str],
|
229
361
|
) -> PartitionLocator:
|
230
|
-
stream_locator =
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
362
|
+
stream_locator = (
|
363
|
+
StreamLocator.at(
|
364
|
+
namespace,
|
365
|
+
table_name,
|
366
|
+
table_version,
|
367
|
+
stream_id,
|
368
|
+
stream_format,
|
369
|
+
)
|
370
|
+
if stream_format or stream_id
|
371
|
+
else None
|
236
372
|
)
|
237
373
|
return PartitionLocator.of(
|
238
374
|
stream_locator,
|
@@ -240,6 +376,14 @@ class PartitionLocator(Locator, dict):
|
|
240
376
|
partition_id,
|
241
377
|
)
|
242
378
|
|
379
|
+
@property
|
380
|
+
def name(self) -> PartitionLocatorName:
|
381
|
+
return PartitionLocatorName(self)
|
382
|
+
|
383
|
+
@property
|
384
|
+
def parent(self) -> Optional[StreamLocator]:
|
385
|
+
return self.stream_locator
|
386
|
+
|
243
387
|
@property
|
244
388
|
def stream_locator(self) -> Optional[StreamLocator]:
|
245
389
|
val: Dict[str, Any] = self.get("streamLocator")
|
@@ -257,7 +401,9 @@ class PartitionLocator(Locator, dict):
|
|
257
401
|
|
258
402
|
@partition_values.setter
|
259
403
|
def partition_values(self, partition_values: Optional[PartitionValues]) -> None:
|
260
|
-
self["partitionValues"] =
|
404
|
+
self["partitionValues"] = (
|
405
|
+
partition_values or None
|
406
|
+
) # normalize empty partition values to None
|
261
407
|
|
262
408
|
@property
|
263
409
|
def partition_id(self) -> Optional[str]:
|
@@ -296,10 +442,10 @@ class PartitionLocator(Locator, dict):
|
|
296
442
|
return None
|
297
443
|
|
298
444
|
@property
|
299
|
-
def
|
445
|
+
def stream_format(self) -> Optional[str]:
|
300
446
|
stream_locator = self.stream_locator
|
301
447
|
if stream_locator:
|
302
|
-
return stream_locator.
|
448
|
+
return stream_locator.format
|
303
449
|
return None
|
304
450
|
|
305
451
|
@property
|
@@ -323,13 +469,267 @@ class PartitionLocator(Locator, dict):
|
|
323
469
|
return stream_locator.table_version
|
324
470
|
return None
|
325
471
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
472
|
+
|
473
|
+
class PartitionKey(dict):
|
474
|
+
@staticmethod
|
475
|
+
def of(
|
476
|
+
key: List[FieldLocator],
|
477
|
+
name: Optional[str] = None,
|
478
|
+
field_id: Optional[int] = None,
|
479
|
+
transform: Optional[Transform] = None,
|
480
|
+
native_object: Optional[Any] = None,
|
481
|
+
) -> PartitionKey:
|
482
|
+
if (
|
483
|
+
len(key) > 1
|
484
|
+
and transform is not None
|
485
|
+
and not transform.is_multi_field_transform
|
486
|
+
):
|
487
|
+
raise ValueError(f"{len(key)} keys given for 1-key transform.")
|
488
|
+
return PartitionKey(
|
489
|
+
{
|
490
|
+
"key": key,
|
491
|
+
"name": name,
|
492
|
+
"fieldId": field_id,
|
493
|
+
"transform": transform,
|
494
|
+
"nativeObject": native_object,
|
495
|
+
}
|
496
|
+
)
|
497
|
+
|
498
|
+
def equivalent_to(
|
499
|
+
self,
|
500
|
+
other: PartitionKey,
|
501
|
+
check_identifiers: False,
|
502
|
+
):
|
503
|
+
if other is None:
|
504
|
+
return False
|
505
|
+
if not isinstance(other, dict):
|
506
|
+
return False
|
507
|
+
if not isinstance(other, PartitionKey):
|
508
|
+
other = PartitionKey(other)
|
509
|
+
return (
|
510
|
+
self.key == other.key
|
511
|
+
and self.transform == other.transform
|
512
|
+
and not check_identifiers
|
513
|
+
or (self.name == other.name and self.id == other.id)
|
514
|
+
)
|
515
|
+
|
516
|
+
@property
|
517
|
+
def key(self) -> List[FieldLocator]:
|
518
|
+
return self.get("key")
|
519
|
+
|
520
|
+
@property
|
521
|
+
def name(self) -> Optional[str]:
|
522
|
+
return self.get("name")
|
523
|
+
|
524
|
+
@property
|
525
|
+
def id(self) -> Optional[int]:
|
526
|
+
return self.get("fieldId")
|
527
|
+
|
528
|
+
@property
|
529
|
+
def transform(self) -> Optional[Transform]:
|
530
|
+
val: Dict[str, Any] = self.get("transform")
|
531
|
+
if val is not None and not isinstance(val, Transform):
|
532
|
+
self["transform"] = val = Transform(val)
|
533
|
+
return val
|
534
|
+
|
535
|
+
@property
|
536
|
+
def native_object(self) -> Optional[Any]:
|
537
|
+
return self.get("nativeObject")
|
538
|
+
|
539
|
+
|
540
|
+
class PartitionKeyList(List[PartitionKey]):
|
541
|
+
@staticmethod
|
542
|
+
def of(items: List[PartitionKey]) -> PartitionKeyList:
|
543
|
+
typed_items = PartitionKeyList()
|
544
|
+
for item in items:
|
545
|
+
if item is not None and not isinstance(item, PartitionKey):
|
546
|
+
item = PartitionKey(item)
|
547
|
+
typed_items.append(item)
|
548
|
+
return typed_items
|
549
|
+
|
550
|
+
def __getitem__(self, item):
|
551
|
+
val = super().__getitem__(item)
|
552
|
+
if val is not None and not isinstance(val, PartitionKey):
|
553
|
+
self[item] = val = PartitionKey(val)
|
554
|
+
return val
|
555
|
+
|
556
|
+
def __iter__(self):
|
557
|
+
for i in range(len(self)):
|
558
|
+
yield self[i] # This triggers __getitem__ conversion
|
559
|
+
|
560
|
+
|
561
|
+
class PartitionScheme(dict):
|
562
|
+
@staticmethod
|
563
|
+
def of(
|
564
|
+
keys: Optional[PartitionKeyList],
|
565
|
+
name: Optional[str] = None,
|
566
|
+
scheme_id: Optional[str] = None,
|
567
|
+
native_object: Optional[Any] = None,
|
568
|
+
) -> PartitionScheme:
|
569
|
+
# Validate keys if provided
|
570
|
+
if keys is not None:
|
571
|
+
# Check for empty keys list
|
572
|
+
if len(keys) == 0:
|
573
|
+
raise ValueError("Partition scheme cannot have empty keys list")
|
574
|
+
|
575
|
+
# Check for duplicate keys (by field locators and transform types) and names
|
576
|
+
seen_key_transform_pairs = set()
|
577
|
+
seen_names = set()
|
578
|
+
for key in keys:
|
579
|
+
# Check for duplicate field locators with identical transform types
|
580
|
+
key_tuple = tuple(key.key) if key.key else ()
|
581
|
+
transform_type = type(key.transform) if key.transform else None
|
582
|
+
key_transform_pair = (key_tuple, transform_type)
|
583
|
+
|
584
|
+
if key_transform_pair in seen_key_transform_pairs:
|
585
|
+
# Use the first field locator for the error message
|
586
|
+
key_name = key.key[0] if key.key else "unknown"
|
587
|
+
transform_name = (
|
588
|
+
transform_type.__name__ if transform_type else "None"
|
589
|
+
)
|
590
|
+
raise ValueError(
|
591
|
+
f"Duplicate partition key found: {key_name} with transform type {transform_name}"
|
592
|
+
)
|
593
|
+
seen_key_transform_pairs.add(key_transform_pair)
|
594
|
+
|
595
|
+
# Check for duplicate names (when specified)
|
596
|
+
if key.name is not None:
|
597
|
+
if key.name in seen_names:
|
598
|
+
raise ValueError(
|
599
|
+
f"Duplicate partition key name found: {key.name}"
|
600
|
+
)
|
601
|
+
seen_names.add(key.name)
|
602
|
+
|
603
|
+
return PartitionScheme(
|
604
|
+
{
|
605
|
+
"keys": keys,
|
606
|
+
"name": name,
|
607
|
+
"id": scheme_id,
|
608
|
+
"nativeObject": native_object,
|
609
|
+
}
|
610
|
+
)
|
611
|
+
|
612
|
+
def equivalent_to(
|
613
|
+
self,
|
614
|
+
other: PartitionScheme,
|
615
|
+
check_identifiers: bool = False,
|
616
|
+
) -> bool:
|
617
|
+
if other is None:
|
618
|
+
return False
|
619
|
+
if not isinstance(other, dict):
|
620
|
+
return False
|
621
|
+
if not isinstance(other, PartitionScheme):
|
622
|
+
other = PartitionScheme(other)
|
623
|
+
# If both have None keys, they are equivalent (for unpartitioned schemes)
|
624
|
+
if self.keys is None and other.keys is None:
|
625
|
+
return not check_identifiers or (
|
626
|
+
self.name == other.name and self.id == other.id
|
627
|
+
)
|
628
|
+
# If only one has None keys, they are not equivalent
|
629
|
+
if self.keys is None or other.keys is None:
|
630
|
+
return False
|
631
|
+
# Compare keys if both have them
|
632
|
+
for i in range(len(self.keys)):
|
633
|
+
if not self.keys[i].equivalent_to(other.keys[i], check_identifiers):
|
634
|
+
return False
|
635
|
+
return not check_identifiers or (
|
636
|
+
self.name == other.name and self.id == other.id
|
637
|
+
)
|
638
|
+
|
639
|
+
@property
|
640
|
+
def keys(self) -> Optional[PartitionKeyList]:
|
641
|
+
val: List[PartitionKey] = self.get("keys")
|
642
|
+
if val is not None and not isinstance(val, PartitionKeyList):
|
643
|
+
self["keys"] = val = PartitionKeyList.of(val)
|
644
|
+
return val
|
645
|
+
|
646
|
+
@property
|
647
|
+
def name(self) -> Optional[str]:
|
648
|
+
return self.get("name")
|
649
|
+
|
650
|
+
@property
|
651
|
+
def id(self) -> Optional[str]:
|
652
|
+
return self.get("id")
|
653
|
+
|
654
|
+
@property
|
655
|
+
def native_object(self) -> Optional[Any]:
|
656
|
+
return self.get("nativeObject")
|
657
|
+
|
658
|
+
|
659
|
+
UNPARTITIONED_SCHEME = PartitionScheme.of(
|
660
|
+
keys=None,
|
661
|
+
name=UNPARTITIONED_SCHEME_NAME,
|
662
|
+
scheme_id=UNPARTITIONED_SCHEME_ID,
|
663
|
+
)
|
664
|
+
|
665
|
+
|
666
|
+
class PartitionSchemeList(List[PartitionScheme]):
|
667
|
+
@staticmethod
|
668
|
+
def of(items: List[PartitionScheme]) -> PartitionSchemeList:
|
669
|
+
typed_items = PartitionSchemeList()
|
670
|
+
for item in items:
|
671
|
+
if item is not None and not isinstance(item, PartitionScheme):
|
672
|
+
item = PartitionScheme(item)
|
673
|
+
typed_items.append(item)
|
674
|
+
return typed_items
|
675
|
+
|
676
|
+
def __getitem__(self, item):
|
677
|
+
val = super().__getitem__(item)
|
678
|
+
if val is not None and not isinstance(val, PartitionScheme):
|
679
|
+
self[item] = val = PartitionScheme(val)
|
680
|
+
return val
|
681
|
+
|
682
|
+
def __iter__(self):
|
683
|
+
for i in range(len(self)):
|
684
|
+
yield self[i] # This triggers __getitem__ conversion
|
685
|
+
|
686
|
+
|
687
|
+
class PartitionLocatorAliasName(LocatorName):
|
688
|
+
def __init__(self, locator: PartitionLocatorAlias):
|
689
|
+
self.locator = locator
|
690
|
+
|
691
|
+
@property
|
692
|
+
def immutable_id(self) -> Optional[str]:
|
693
|
+
return None
|
694
|
+
|
695
|
+
def parts(self) -> List[str]:
|
696
|
+
return [
|
697
|
+
str(self.locator.partition_values),
|
698
|
+
self.locator.partition_scheme_id,
|
699
|
+
]
|
700
|
+
|
701
|
+
|
702
|
+
class PartitionLocatorAlias(Locator, dict):
|
703
|
+
@staticmethod
|
704
|
+
def of(parent_partition: Partition):
|
705
|
+
return (
|
706
|
+
PartitionLocatorAlias(
|
707
|
+
{
|
708
|
+
"partition_values": parent_partition.partition_values,
|
709
|
+
"partition_scheme_id": parent_partition.partition_scheme_id,
|
710
|
+
"parent": (
|
711
|
+
parent_partition.locator.parent
|
712
|
+
if parent_partition.locator
|
713
|
+
else None
|
714
|
+
),
|
715
|
+
}
|
716
|
+
)
|
717
|
+
if parent_partition.state != CommitState.STAGED
|
718
|
+
else None # staged partitions cannot be resolved by alias
|
719
|
+
)
|
720
|
+
|
721
|
+
@property
|
722
|
+
def partition_values(self) -> Optional[PartitionValues]:
|
723
|
+
return self.get("partition_values")
|
724
|
+
|
725
|
+
@property
|
726
|
+
def partition_scheme_id(self) -> Optional[str]:
|
727
|
+
return self.get("partition_scheme_id")
|
728
|
+
|
729
|
+
@property
|
730
|
+
def name(self) -> PartitionLocatorAliasName:
|
731
|
+
return PartitionLocatorAliasName(self)
|
732
|
+
|
733
|
+
@property
|
734
|
+
def parent(self) -> Optional[Locator]:
|
735
|
+
return self.get("parent")
|
File without changes
|