deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
from typing import Iterable, Optional, Protocol, TypeVar, Union
|
3
|
+
|
4
|
+
from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
|
5
|
+
DatasetMetastore,
|
6
|
+
)
|
7
|
+
|
8
|
+
# TODO: Add type validation in dataset/schema classes
|
9
|
+
T = TypeVar("T", bound=Union[int, str])
|
10
|
+
|
11
|
+
|
12
|
+
class Shard(Protocol[T]):
|
13
|
+
"""
|
14
|
+
Abstract base class representing a shard with defined inclusive boundaries.
|
15
|
+
|
16
|
+
A shard represents a logical partition of data, defined by its
|
17
|
+
minimum and maximum keys. These keys determine the range of data
|
18
|
+
within a dataset that the shard encompasses.
|
19
|
+
"""
|
20
|
+
|
21
|
+
min_key: Optional[T]
|
22
|
+
max_key: Optional[T]
|
23
|
+
|
24
|
+
|
25
|
+
class ShardingStrategy(Protocol):
|
26
|
+
"""
|
27
|
+
A sharding strategy determines how the dataset is divided into shards.
|
28
|
+
"""
|
29
|
+
|
30
|
+
@staticmethod
|
31
|
+
def from_string(strategy: str) -> "ShardingStrategy":
|
32
|
+
"""
|
33
|
+
Factory method to create the appropriate ShardingStrategy from a string.
|
34
|
+
|
35
|
+
param: strategy: The string representation of the sharding strategy.
|
36
|
+
return: ShardingStrategy class.
|
37
|
+
"""
|
38
|
+
if strategy == "range":
|
39
|
+
from deltacat.experimental.storage.rivulet.shard.range_shard import (
|
40
|
+
RangeShardingStrategy,
|
41
|
+
)
|
42
|
+
|
43
|
+
return RangeShardingStrategy()
|
44
|
+
else:
|
45
|
+
raise ValueError(f"Unsupported sharding strategy type: {strategy}")
|
46
|
+
|
47
|
+
@abstractmethod
|
48
|
+
def shards(self, num_shards: int, metastore: DatasetMetastore) -> Iterable[Shard]:
|
49
|
+
"""
|
50
|
+
Generate the shards based on the chosen strategy.
|
51
|
+
"""
|
@@ -1,33 +1,230 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
from
|
4
|
+
from typing import Optional, Any, List, Tuple, Dict
|
5
5
|
|
6
|
+
from pyarrow.compute import SortOptions
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
from deltacat.storage.model.types import (
|
9
|
+
SortOrder,
|
10
|
+
NullOrder,
|
11
|
+
)
|
12
|
+
from deltacat.storage.model.schema import FieldLocator
|
13
|
+
from deltacat.storage.model.transform import Transform
|
14
|
+
|
15
|
+
UNSORTED_SCHEME_NAME = "unsorted_scheme"
|
16
|
+
UNSORTED_SCHEME_ID = "deadbeef-7277-49a4-a195-fdc8ed235d42"
|
10
17
|
|
11
18
|
|
12
19
|
class SortKey(tuple):
|
13
20
|
@staticmethod
|
14
|
-
def of(
|
21
|
+
def of(
|
22
|
+
key: Optional[List[FieldLocator]],
|
23
|
+
sort_order: SortOrder = SortOrder.ASCENDING,
|
24
|
+
null_order: NullOrder = NullOrder.AT_END,
|
25
|
+
transform: Optional[Transform] = None,
|
26
|
+
native_object: Optional[Any] = None,
|
27
|
+
) -> SortKey:
|
15
28
|
"""
|
16
29
|
Create a sort key from a field name to use as the sort key, and
|
17
30
|
the sort order for this key. If no sort order is specified, then the
|
18
|
-
data will be sorted in ascending order by default.
|
19
|
-
always keeps the LAST occurrence of this key post-sort. For example, if
|
20
|
-
you used an integer column as your sort key which contained the values
|
21
|
-
[2, 1, 3] specifying SortOrder.ASCENDING would ensure that the
|
22
|
-
value [3] is kept over [2, 1], and specifying SortOrder.DESCENDING
|
23
|
-
would ensure that [1] is kept over [2, 3].
|
31
|
+
data will be sorted in ascending order by default.
|
24
32
|
"""
|
25
|
-
return SortKey(
|
33
|
+
return SortKey(
|
34
|
+
(
|
35
|
+
key,
|
36
|
+
sort_order.value if isinstance(sort_order, SortOrder) else sort_order,
|
37
|
+
null_order.value if isinstance(null_order, NullOrder) else null_order,
|
38
|
+
transform,
|
39
|
+
native_object,
|
40
|
+
)
|
41
|
+
)
|
42
|
+
|
43
|
+
def equivalent_to(
|
44
|
+
self,
|
45
|
+
other: SortKey,
|
46
|
+
):
|
47
|
+
if other is None:
|
48
|
+
return False
|
49
|
+
if not isinstance(other, tuple):
|
50
|
+
return False
|
51
|
+
if not isinstance(other, SortKey):
|
52
|
+
other = SortKey(other)
|
53
|
+
return (
|
54
|
+
self.key == other.key
|
55
|
+
and self.transform == other.transform
|
56
|
+
and self.sort_order == other.sort_order
|
57
|
+
and self.null_order == other.null_order
|
58
|
+
)
|
26
59
|
|
27
60
|
@property
|
28
|
-
def
|
61
|
+
def key(self) -> Optional[List[FieldLocator]]:
|
29
62
|
return self[0]
|
30
63
|
|
31
64
|
@property
|
32
65
|
def sort_order(self) -> SortOrder:
|
33
66
|
return SortOrder(self[1])
|
67
|
+
|
68
|
+
@property
|
69
|
+
def null_order(self) -> NullOrder:
|
70
|
+
return NullOrder(self[2])
|
71
|
+
|
72
|
+
@property
|
73
|
+
def transform(self) -> Optional[Transform]:
|
74
|
+
val: Dict[str, Any] = (
|
75
|
+
Transform(self[3]) if len(self) >= 4 and self[3] is not None else None
|
76
|
+
)
|
77
|
+
return val
|
78
|
+
|
79
|
+
@property
|
80
|
+
def arrow(self) -> List[Tuple[str, str]]:
|
81
|
+
# TODO(pdames): Convert unsupported field locators to arrow field names,
|
82
|
+
# and transforms/multi-key-sorts to pyarrow compute expressions. Add
|
83
|
+
# null order via SortOptions when supported per field by Arrow.
|
84
|
+
return (
|
85
|
+
[(field_locator, self[1]) for field_locator in self[0]] if self[0] else []
|
86
|
+
)
|
87
|
+
|
88
|
+
@property
|
89
|
+
def native_object(self) -> Optional[Any]:
|
90
|
+
return self[4] if len(self) >= 5 else None
|
91
|
+
|
92
|
+
|
93
|
+
class SortKeyList(List[SortKey]):
|
94
|
+
@staticmethod
|
95
|
+
def of(items: List[SortKey]) -> SortKeyList:
|
96
|
+
typed_items = SortKeyList()
|
97
|
+
for item in items:
|
98
|
+
if item is not None and not isinstance(item, SortKey):
|
99
|
+
item = SortKey(item)
|
100
|
+
typed_items.append(item)
|
101
|
+
return typed_items
|
102
|
+
|
103
|
+
def __getitem__(self, item):
|
104
|
+
val = super().__getitem__(item)
|
105
|
+
if val is not None and not isinstance(val, SortKey):
|
106
|
+
self[item] = val = SortKey(val)
|
107
|
+
return val
|
108
|
+
|
109
|
+
def __iter__(self):
|
110
|
+
for i in range(len(self)):
|
111
|
+
yield self[i] # This triggers __getitem__ conversion
|
112
|
+
|
113
|
+
|
114
|
+
class SortScheme(dict):
|
115
|
+
@staticmethod
|
116
|
+
def of(
|
117
|
+
keys: Optional[SortKeyList],
|
118
|
+
name: Optional[str] = None,
|
119
|
+
scheme_id: Optional[str] = None,
|
120
|
+
native_object: Optional[Any] = None,
|
121
|
+
) -> SortScheme:
|
122
|
+
# Validate keys if provided
|
123
|
+
if keys is not None:
|
124
|
+
# Check for empty keys list
|
125
|
+
if len(keys) == 0:
|
126
|
+
raise ValueError("Sort scheme cannot have empty keys list")
|
127
|
+
|
128
|
+
# Check for duplicate keys
|
129
|
+
key_names = []
|
130
|
+
for key in keys:
|
131
|
+
if key.key[0] in key_names:
|
132
|
+
raise ValueError(f"Duplicate sort key found: {key.key[0]}")
|
133
|
+
key_names.append(key.key[0])
|
134
|
+
|
135
|
+
return SortScheme(
|
136
|
+
{
|
137
|
+
"keys": keys,
|
138
|
+
"name": name,
|
139
|
+
"id": scheme_id,
|
140
|
+
"nativeObject": native_object,
|
141
|
+
}
|
142
|
+
)
|
143
|
+
|
144
|
+
def equivalent_to(
|
145
|
+
self,
|
146
|
+
other: SortScheme,
|
147
|
+
check_identifiers: bool = False,
|
148
|
+
) -> bool:
|
149
|
+
if other is None:
|
150
|
+
return False
|
151
|
+
if not isinstance(other, dict):
|
152
|
+
return False
|
153
|
+
if not isinstance(other, SortScheme):
|
154
|
+
other = SortScheme(other)
|
155
|
+
# If both have None keys, they are equivalent (for unsorted schemes)
|
156
|
+
if self.keys is None and other.keys is None:
|
157
|
+
return not check_identifiers or (
|
158
|
+
self.name == other.name and self.id == other.id
|
159
|
+
)
|
160
|
+
# If only one has None keys, they are not equivalent
|
161
|
+
if self.keys is None or other.keys is None:
|
162
|
+
return False
|
163
|
+
# Compare keys if both have them
|
164
|
+
for i in range(len(self.keys)):
|
165
|
+
if not self.keys[i].equivalent_to(other.keys[i]):
|
166
|
+
return False
|
167
|
+
return not check_identifiers or (
|
168
|
+
self.name == other.name and self.id == other.id
|
169
|
+
)
|
170
|
+
|
171
|
+
@property
|
172
|
+
def keys(self) -> Optional[SortKeyList]:
|
173
|
+
val: List[SortKey] = self.get("keys")
|
174
|
+
if val is not None and not isinstance(val, SortKeyList):
|
175
|
+
self["keys"] = val = SortKeyList.of(val)
|
176
|
+
return val
|
177
|
+
|
178
|
+
@property
|
179
|
+
def name(self) -> Optional[str]:
|
180
|
+
return self.get("name")
|
181
|
+
|
182
|
+
@property
|
183
|
+
def id(self) -> Optional[str]:
|
184
|
+
return self.get("id")
|
185
|
+
|
186
|
+
@property
|
187
|
+
def arrow(self) -> SortOptions:
|
188
|
+
# TODO(pdames): Remove homogenous null ordering when supported by Arrow.
|
189
|
+
if self.keys:
|
190
|
+
if len(set([key.null_order for key in self.keys])) == 1:
|
191
|
+
return SortOptions(
|
192
|
+
sort_keys=[pa_key for k in self.keys for pa_key in k.arrow],
|
193
|
+
null_placement=self.keys[0].null_order.value,
|
194
|
+
)
|
195
|
+
else:
|
196
|
+
err_msg = "All arrow sort keys must use the same null order."
|
197
|
+
raise ValueError(err_msg)
|
198
|
+
return SortOptions()
|
199
|
+
|
200
|
+
@property
|
201
|
+
def native_object(self) -> Optional[Any]:
|
202
|
+
return self.get("nativeObject")
|
203
|
+
|
204
|
+
|
205
|
+
UNSORTED_SCHEME = SortScheme.of(
|
206
|
+
keys=None,
|
207
|
+
name=UNSORTED_SCHEME_NAME,
|
208
|
+
scheme_id=UNSORTED_SCHEME_ID,
|
209
|
+
)
|
210
|
+
|
211
|
+
|
212
|
+
class SortSchemeList(List[SortScheme]):
|
213
|
+
@staticmethod
|
214
|
+
def of(items: List[SortScheme]) -> SortSchemeList:
|
215
|
+
typed_items = SortSchemeList()
|
216
|
+
for item in items:
|
217
|
+
if item is not None and not isinstance(item, SortScheme):
|
218
|
+
item = SortScheme(item)
|
219
|
+
typed_items.append(item)
|
220
|
+
return typed_items
|
221
|
+
|
222
|
+
def __getitem__(self, item):
|
223
|
+
val = super().__getitem__(item)
|
224
|
+
if val is not None and not isinstance(val, SortScheme):
|
225
|
+
self[item] = val = SortScheme(val)
|
226
|
+
return val
|
227
|
+
|
228
|
+
def __iter__(self):
|
229
|
+
for i in range(len(self)):
|
230
|
+
yield self[i] # This triggers __getitem__ conversion
|
deltacat/storage/model/stream.py
CHANGED
@@ -1,31 +1,54 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
|
4
|
+
import posixpath
|
5
5
|
|
6
|
-
|
6
|
+
import pyarrow
|
7
|
+
|
8
|
+
import deltacat.storage.model.partition as partition
|
9
|
+
|
10
|
+
from typing import Any, Dict, Optional, List
|
11
|
+
|
12
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
13
|
+
from deltacat.constants import TXN_DIR_NAME
|
14
|
+
from deltacat.storage.model.locator import (
|
15
|
+
Locator,
|
16
|
+
LocatorName,
|
17
|
+
)
|
7
18
|
from deltacat.storage.model.namespace import NamespaceLocator
|
8
|
-
from deltacat.storage.model.table import
|
19
|
+
from deltacat.storage.model.table import (
|
20
|
+
TableLocator,
|
21
|
+
Table,
|
22
|
+
)
|
9
23
|
from deltacat.storage.model.table_version import TableVersionLocator
|
10
|
-
from deltacat.storage.model.types import
|
11
|
-
|
24
|
+
from deltacat.storage.model.types import (
|
25
|
+
CommitState,
|
26
|
+
StreamFormat,
|
27
|
+
)
|
28
|
+
|
12
29
|
|
30
|
+
class Stream(Metafile):
|
31
|
+
"""
|
32
|
+
An unbounded stream of Deltas, where each delta's records are optionally
|
33
|
+
partitioned according to the given partition scheme.
|
34
|
+
"""
|
13
35
|
|
14
|
-
class Stream(dict):
|
15
36
|
@staticmethod
|
16
37
|
def of(
|
17
38
|
locator: Optional[StreamLocator],
|
18
|
-
|
39
|
+
partition_scheme: Optional[partition.PartitionScheme],
|
19
40
|
state: Optional[CommitState] = None,
|
20
|
-
|
21
|
-
|
41
|
+
previous_stream_id: Optional[str] = None,
|
42
|
+
watermark: Optional[int] = None,
|
43
|
+
native_object: Optional[Any] = None,
|
22
44
|
) -> Stream:
|
23
45
|
stream = Stream()
|
24
46
|
stream.locator = locator
|
25
|
-
stream.
|
47
|
+
stream.partition_scheme = partition_scheme
|
26
48
|
stream.state = state
|
27
|
-
stream.
|
28
|
-
stream.
|
49
|
+
stream.previous_stream_id = previous_stream_id
|
50
|
+
stream.watermark = watermark
|
51
|
+
stream.native_object = native_object
|
29
52
|
return stream
|
30
53
|
|
31
54
|
@property
|
@@ -40,31 +63,44 @@ class Stream(dict):
|
|
40
63
|
self["streamLocator"] = stream_locator
|
41
64
|
|
42
65
|
@property
|
43
|
-
def
|
44
|
-
|
45
|
-
Ordered list of unique column names in the table schema on
|
46
|
-
which the underlying data is partitioned. Either partition_spec
|
47
|
-
or partition_keys must be specified but not both.
|
66
|
+
def locator_alias(self) -> Optional[StreamLocatorAlias]:
|
67
|
+
return StreamLocatorAlias.of(self)
|
48
68
|
|
49
|
-
|
50
|
-
|
69
|
+
@property
|
70
|
+
def partition_scheme(self) -> Optional[partition.PartitionScheme]:
|
51
71
|
"""
|
52
|
-
|
72
|
+
A table's partition keys are defined within the context of a
|
73
|
+
Partition Scheme, which supports defining both fields to partition
|
74
|
+
a table by and optional transforms to apply to those fields to
|
75
|
+
derive the Partition Values that a given field, and its corresponding
|
76
|
+
record, belong to.
|
77
|
+
"""
|
78
|
+
val: Dict[str, Any] = self.get("partitionScheme")
|
79
|
+
if val is not None and not isinstance(val, partition.PartitionScheme):
|
80
|
+
self.partition_scheme = val = partition.PartitionScheme(val)
|
81
|
+
return val
|
53
82
|
|
54
|
-
@
|
55
|
-
def
|
56
|
-
self[
|
83
|
+
@partition_scheme.setter
|
84
|
+
def partition_scheme(
|
85
|
+
self, partition_scheme: Optional[partition.PartitionScheme]
|
86
|
+
) -> None:
|
87
|
+
self["partitionScheme"] = partition_scheme
|
57
88
|
|
58
89
|
@property
|
59
|
-
def
|
60
|
-
""
|
61
|
-
|
62
|
-
|
63
|
-
|
90
|
+
def previous_stream_id(self) -> Optional[str]:
|
91
|
+
return self.get("previousStreamId")
|
92
|
+
|
93
|
+
@previous_stream_id.setter
|
94
|
+
def previous_stream_id(self, previous_stream_id: Optional[str]) -> None:
|
95
|
+
self["previousStreamId"] = previous_stream_id
|
64
96
|
|
65
|
-
@
|
66
|
-
def
|
67
|
-
self
|
97
|
+
@property
|
98
|
+
def watermark(self) -> Optional[int]:
|
99
|
+
return self.get("watermark")
|
100
|
+
|
101
|
+
@watermark.setter
|
102
|
+
def watermark(self, watermark: Optional[int]) -> None:
|
103
|
+
self["watermark"] = watermark
|
68
104
|
|
69
105
|
@property
|
70
106
|
def state(self) -> Optional[CommitState]:
|
@@ -79,24 +115,12 @@ class Stream(dict):
|
|
79
115
|
self["state"] = state
|
80
116
|
|
81
117
|
@property
|
82
|
-
def
|
83
|
-
""
|
84
|
-
If a table uses complex partitioning instead of identity,
|
85
|
-
partition spec can be specified to define that strategy.
|
86
|
-
For example, a partition spec can define a bucketing strategy
|
87
|
-
on composite column values or can define iceberg compliant
|
88
|
-
bucketing.
|
118
|
+
def native_object(self) -> Optional[Any]:
|
119
|
+
return self.get("nativeObject")
|
89
120
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
if val is not None and not isinstance(val, StreamPartitionSpec):
|
94
|
-
self.partition_spec = val = StreamPartitionSpec(val)
|
95
|
-
return val
|
96
|
-
|
97
|
-
@partition_spec.setter
|
98
|
-
def partition_spec(self, spec: StreamPartitionSpec) -> None:
|
99
|
-
self["partitionSpec"] = spec
|
121
|
+
@native_object.setter
|
122
|
+
def native_object(self, native_object: Optional[Any]) -> None:
|
123
|
+
self["nativeObject"] = native_object
|
100
124
|
|
101
125
|
@property
|
102
126
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
@@ -126,6 +150,13 @@ class Stream(dict):
|
|
126
150
|
return stream_locator.stream_id
|
127
151
|
return None
|
128
152
|
|
153
|
+
@property
|
154
|
+
def stream_format(self) -> Optional[str]:
|
155
|
+
stream_locator = self.locator
|
156
|
+
if stream_locator:
|
157
|
+
return stream_locator.format
|
158
|
+
return None
|
159
|
+
|
129
160
|
@property
|
130
161
|
def namespace(self) -> Optional[str]:
|
131
162
|
stream_locator = self.locator
|
@@ -147,16 +178,72 @@ class Stream(dict):
|
|
147
178
|
return stream_locator.table_version
|
148
179
|
return None
|
149
180
|
|
150
|
-
def
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
181
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
182
|
+
return (
|
183
|
+
f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/"
|
184
|
+
if catalog_name
|
185
|
+
else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/"
|
186
|
+
)
|
187
|
+
|
188
|
+
def to_serializable(self) -> Stream:
|
189
|
+
serializable = self
|
190
|
+
if serializable.table_locator:
|
191
|
+
serializable: Stream = Stream.update_for(self)
|
192
|
+
# remove the mutable table locator
|
193
|
+
serializable.table_version_locator.table_locator = TableLocator.at(
|
194
|
+
namespace=self.id,
|
195
|
+
table_name=self.id,
|
196
|
+
)
|
197
|
+
return serializable
|
198
|
+
|
199
|
+
def from_serializable(
|
200
|
+
self,
|
201
|
+
path: str,
|
202
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
203
|
+
) -> Stream:
|
204
|
+
# restore the table locator from its mapped immutable metafile ID
|
205
|
+
if self.table_locator and self.table_locator.table_name == self.id:
|
206
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
207
|
+
base_metafile_path=path,
|
208
|
+
parent_number=2,
|
159
209
|
)
|
210
|
+
txn_log_dir = posixpath.join(
|
211
|
+
posixpath.dirname(
|
212
|
+
posixpath.dirname(
|
213
|
+
posixpath.dirname(parent_rev_dir_path),
|
214
|
+
)
|
215
|
+
),
|
216
|
+
TXN_DIR_NAME,
|
217
|
+
)
|
218
|
+
table = Table.read(
|
219
|
+
MetafileRevisionInfo.latest_revision(
|
220
|
+
revision_dir_path=parent_rev_dir_path,
|
221
|
+
filesystem=filesystem,
|
222
|
+
success_txn_log_dir=txn_log_dir,
|
223
|
+
).path,
|
224
|
+
filesystem,
|
225
|
+
)
|
226
|
+
self.table_version_locator.table_locator = table.locator
|
227
|
+
return self
|
228
|
+
|
229
|
+
|
230
|
+
class StreamLocatorName(LocatorName):
|
231
|
+
def __init__(self, locator: StreamLocator):
|
232
|
+
self.locator = locator
|
233
|
+
|
234
|
+
@property
|
235
|
+
def immutable_id(self) -> Optional[str]:
|
236
|
+
return self.locator.stream_id
|
237
|
+
|
238
|
+
@immutable_id.setter
|
239
|
+
def immutable_id(self, immutable_id: Optional[str]):
|
240
|
+
self.locator.stream_id = immutable_id
|
241
|
+
|
242
|
+
def parts(self) -> List[str]:
|
243
|
+
return [
|
244
|
+
self.locator.stream_id,
|
245
|
+
self.locator.format,
|
246
|
+
]
|
160
247
|
|
161
248
|
|
162
249
|
class StreamLocator(Locator, dict):
|
@@ -164,7 +251,7 @@ class StreamLocator(Locator, dict):
|
|
164
251
|
def of(
|
165
252
|
table_version_locator: Optional[TableVersionLocator],
|
166
253
|
stream_id: Optional[str],
|
167
|
-
|
254
|
+
stream_format: Optional[StreamFormat],
|
168
255
|
) -> StreamLocator:
|
169
256
|
"""
|
170
257
|
Creates a table version Stream Locator. All input parameters are
|
@@ -173,7 +260,11 @@ class StreamLocator(Locator, dict):
|
|
173
260
|
stream_locator = StreamLocator()
|
174
261
|
stream_locator.table_version_locator = table_version_locator
|
175
262
|
stream_locator.stream_id = stream_id
|
176
|
-
stream_locator.
|
263
|
+
stream_locator.format = (
|
264
|
+
stream_format.value
|
265
|
+
if isinstance(stream_format, StreamFormat)
|
266
|
+
else stream_format
|
267
|
+
)
|
177
268
|
return stream_locator
|
178
269
|
|
179
270
|
@staticmethod
|
@@ -182,19 +273,31 @@ class StreamLocator(Locator, dict):
|
|
182
273
|
table_name: Optional[str],
|
183
274
|
table_version: Optional[str],
|
184
275
|
stream_id: Optional[str],
|
185
|
-
|
276
|
+
stream_format: Optional[StreamFormat],
|
186
277
|
) -> StreamLocator:
|
187
|
-
table_version_locator =
|
188
|
-
|
189
|
-
|
190
|
-
|
278
|
+
table_version_locator = (
|
279
|
+
TableVersionLocator.at(
|
280
|
+
namespace,
|
281
|
+
table_name,
|
282
|
+
table_version,
|
283
|
+
)
|
284
|
+
if table_version
|
285
|
+
else None
|
191
286
|
)
|
192
287
|
return StreamLocator.of(
|
193
288
|
table_version_locator,
|
194
289
|
stream_id,
|
195
|
-
|
290
|
+
stream_format,
|
196
291
|
)
|
197
292
|
|
293
|
+
@property
|
294
|
+
def name(self) -> StreamLocatorName:
|
295
|
+
return StreamLocatorName(self)
|
296
|
+
|
297
|
+
@property
|
298
|
+
def parent(self) -> Optional[TableVersionLocator]:
|
299
|
+
return self.table_version_locator
|
300
|
+
|
198
301
|
@property
|
199
302
|
def table_version_locator(self) -> Optional[TableVersionLocator]:
|
200
303
|
val: Dict[str, Any] = self.get("tableVersionLocator")
|
@@ -217,12 +320,12 @@ class StreamLocator(Locator, dict):
|
|
217
320
|
self["streamId"] = stream_id
|
218
321
|
|
219
322
|
@property
|
220
|
-
def
|
221
|
-
return self.get("
|
323
|
+
def format(self) -> Optional[str]:
|
324
|
+
return self.get("format")
|
222
325
|
|
223
|
-
@
|
224
|
-
def
|
225
|
-
self["
|
326
|
+
@format.setter
|
327
|
+
def format(self, stream_format: Optional[str]) -> None:
|
328
|
+
self["format"] = stream_format
|
226
329
|
|
227
330
|
@property
|
228
331
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
@@ -259,13 +362,45 @@ class StreamLocator(Locator, dict):
|
|
259
362
|
return table_version_locator.table_version
|
260
363
|
return None
|
261
364
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
365
|
+
|
366
|
+
class StreamLocatorAliasName(LocatorName):
|
367
|
+
def __init__(self, locator: StreamLocatorAlias):
|
368
|
+
self.locator = locator
|
369
|
+
|
370
|
+
@property
|
371
|
+
def immutable_id(self) -> Optional[str]:
|
372
|
+
return None
|
373
|
+
|
374
|
+
def parts(self) -> List[str]:
|
375
|
+
return [self.locator.format]
|
376
|
+
|
377
|
+
|
378
|
+
class StreamLocatorAlias(Locator, dict):
|
379
|
+
@staticmethod
|
380
|
+
def of(
|
381
|
+
parent_stream: Stream,
|
382
|
+
) -> StreamLocatorAlias:
|
383
|
+
return (
|
384
|
+
StreamLocatorAlias(
|
385
|
+
{
|
386
|
+
"format": parent_stream.stream_format,
|
387
|
+
"parent": (
|
388
|
+
parent_stream.locator.parent if parent_stream.locator else None
|
389
|
+
),
|
390
|
+
}
|
391
|
+
)
|
392
|
+
if parent_stream.state != CommitState.STAGED
|
393
|
+
else None # staged streams cannot be resolved by alias
|
394
|
+
)
|
395
|
+
|
396
|
+
@property
|
397
|
+
def format(self) -> Optional[str]:
|
398
|
+
return self.get("format")
|
399
|
+
|
400
|
+
@property
|
401
|
+
def name(self) -> StreamLocatorAliasName:
|
402
|
+
return StreamLocatorAliasName(self)
|
403
|
+
|
404
|
+
@property
|
405
|
+
def parent(self) -> Optional[Locator]:
|
406
|
+
return self.get("parent")
|