deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
deltacat/storage/model/table.py
CHANGED
@@ -1,25 +1,46 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
|
4
|
+
import posixpath
|
5
|
+
from typing import Any, Dict, Optional, List
|
5
6
|
|
6
|
-
|
7
|
-
from deltacat.storage.model.namespace import NamespaceLocator
|
7
|
+
import pyarrow
|
8
8
|
|
9
|
+
from deltacat.storage.model.locator import Locator, LocatorName
|
10
|
+
from deltacat.storage.model.namespace import (
|
11
|
+
NamespaceLocator,
|
12
|
+
Namespace,
|
13
|
+
)
|
14
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
15
|
+
from deltacat.constants import TXN_DIR_NAME
|
16
|
+
from deltacat.types.tables import TableProperty
|
17
|
+
|
18
|
+
TableProperties = Dict[str, Any]
|
19
|
+
|
20
|
+
|
21
|
+
class Table(Metafile):
|
22
|
+
"""
|
23
|
+
Tables store properties common to every table version including the
|
24
|
+
table's name, a high-level description of all table versions, and
|
25
|
+
properties shared by all table versions.
|
26
|
+
"""
|
9
27
|
|
10
|
-
class Table(dict):
|
11
28
|
@staticmethod
|
12
29
|
def of(
|
13
30
|
locator: Optional[TableLocator],
|
14
|
-
permissions: Optional[Dict[str, Any]] = None,
|
15
31
|
description: Optional[str] = None,
|
16
|
-
properties: Optional[
|
32
|
+
properties: Optional[TableProperties] = None,
|
33
|
+
latest_active_table_version: Optional[str] = None,
|
34
|
+
latest_table_version: Optional[str] = None,
|
35
|
+
native_object: Optional[Any] = None,
|
17
36
|
) -> Table:
|
18
37
|
table = Table()
|
19
38
|
table.locator = locator
|
20
|
-
table.permissions = permissions
|
21
39
|
table.description = description
|
22
40
|
table.properties = properties
|
41
|
+
table.latest_active_table_version = latest_active_table_version
|
42
|
+
table.latest_table_version = latest_table_version
|
43
|
+
table.native_object = native_object
|
23
44
|
return table
|
24
45
|
|
25
46
|
@property
|
@@ -33,14 +54,6 @@ class Table(dict):
|
|
33
54
|
def locator(self, table_locator: Optional[TableLocator]) -> None:
|
34
55
|
self["tableLocator"] = table_locator
|
35
56
|
|
36
|
-
@property
|
37
|
-
def permissions(self) -> Optional[Dict[str, Any]]:
|
38
|
-
return self.get("permissions")
|
39
|
-
|
40
|
-
@permissions.setter
|
41
|
-
def permissions(self, permissions: Optional[Dict[str, Any]]) -> None:
|
42
|
-
self["permissions"] = permissions
|
43
|
-
|
44
57
|
@property
|
45
58
|
def description(self) -> Optional[str]:
|
46
59
|
return self.get("description")
|
@@ -50,13 +63,43 @@ class Table(dict):
|
|
50
63
|
self["description"] = description
|
51
64
|
|
52
65
|
@property
|
53
|
-
def properties(self) -> Optional[
|
66
|
+
def properties(self) -> Optional[TableProperties]:
|
54
67
|
return self.get("properties")
|
55
68
|
|
56
69
|
@properties.setter
|
57
|
-
def properties(self, properties: Optional[
|
70
|
+
def properties(self, properties: Optional[TableProperties]) -> None:
|
58
71
|
self["properties"] = properties
|
59
72
|
|
73
|
+
@property
|
74
|
+
def latest_active_table_version(self) -> Optional[str]:
|
75
|
+
return self.get("latest_active_table_version")
|
76
|
+
|
77
|
+
@latest_active_table_version.setter
|
78
|
+
def latest_active_table_version(
|
79
|
+
self,
|
80
|
+
latest_active_table_version: Optional[str],
|
81
|
+
) -> None:
|
82
|
+
self["latest_active_table_version"] = latest_active_table_version
|
83
|
+
|
84
|
+
@property
|
85
|
+
def latest_table_version(self) -> Optional[str]:
|
86
|
+
return self.get("latest_table_version")
|
87
|
+
|
88
|
+
@latest_table_version.setter
|
89
|
+
def latest_table_version(
|
90
|
+
self,
|
91
|
+
latest_table_version: Optional[str],
|
92
|
+
) -> None:
|
93
|
+
self["latest_table_version"] = latest_table_version
|
94
|
+
|
95
|
+
@property
|
96
|
+
def native_object(self) -> Optional[Any]:
|
97
|
+
return self.get("nativeObject")
|
98
|
+
|
99
|
+
@native_object.setter
|
100
|
+
def native_object(self, native_object: Optional[Any]) -> None:
|
101
|
+
self["nativeObject"] = native_object
|
102
|
+
|
60
103
|
@property
|
61
104
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
62
105
|
table_locator = self.locator
|
@@ -78,6 +121,70 @@ class Table(dict):
|
|
78
121
|
return table_locator.table_name
|
79
122
|
return None
|
80
123
|
|
124
|
+
@table_name.setter
|
125
|
+
def table_name(self, table_name: Optional[str]) -> None:
|
126
|
+
table_locator = self.locator
|
127
|
+
if table_locator:
|
128
|
+
table_locator.table_name = table_name
|
129
|
+
|
130
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
131
|
+
return (
|
132
|
+
f"dc://{catalog_name}/{self.namespace}/{self.table_name}/"
|
133
|
+
if catalog_name
|
134
|
+
else f"table://{self.namespace}/{self.table_name}/"
|
135
|
+
)
|
136
|
+
|
137
|
+
def read_table_property(self, property: TableProperty) -> Any:
|
138
|
+
return TableProperty.read_table_property(self, property)
|
139
|
+
|
140
|
+
def to_serializable(self) -> Table:
|
141
|
+
serializable = self
|
142
|
+
if serializable.namespace_locator:
|
143
|
+
serializable: Table = Table.update_for(self)
|
144
|
+
# remove the mutable namespace locator
|
145
|
+
serializable.locator.namespace_locator = NamespaceLocator.of(self.id)
|
146
|
+
return serializable
|
147
|
+
|
148
|
+
def from_serializable(
|
149
|
+
self,
|
150
|
+
path: str,
|
151
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
152
|
+
) -> Table:
|
153
|
+
# restore the namespace locator from its mapped immutable metafile ID
|
154
|
+
if self.namespace_locator and self.namespace_locator.namespace == self.id:
|
155
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
156
|
+
base_metafile_path=path,
|
157
|
+
parent_number=1,
|
158
|
+
)
|
159
|
+
txn_log_dir = posixpath.join(
|
160
|
+
posixpath.dirname(
|
161
|
+
posixpath.dirname(parent_rev_dir_path),
|
162
|
+
),
|
163
|
+
TXN_DIR_NAME,
|
164
|
+
)
|
165
|
+
namespace = Namespace.read(
|
166
|
+
MetafileRevisionInfo.latest_revision(
|
167
|
+
revision_dir_path=parent_rev_dir_path,
|
168
|
+
filesystem=filesystem,
|
169
|
+
success_txn_log_dir=txn_log_dir,
|
170
|
+
).path,
|
171
|
+
filesystem,
|
172
|
+
)
|
173
|
+
self.locator.namespace_locator = namespace.locator
|
174
|
+
return self
|
175
|
+
|
176
|
+
|
177
|
+
class TableLocatorName(LocatorName):
|
178
|
+
def __init__(self, locator: TableLocator):
|
179
|
+
self.locator = locator
|
180
|
+
|
181
|
+
@property
|
182
|
+
def immutable_id(self) -> Optional[str]:
|
183
|
+
return None
|
184
|
+
|
185
|
+
def parts(self) -> List[str]:
|
186
|
+
return [self.locator.table_name]
|
187
|
+
|
81
188
|
|
82
189
|
class TableLocator(Locator, dict):
|
83
190
|
@staticmethod
|
@@ -91,11 +198,19 @@ class TableLocator(Locator, dict):
|
|
91
198
|
|
92
199
|
@staticmethod
|
93
200
|
def at(namespace: Optional[str], table_name: Optional[str]) -> TableLocator:
|
94
|
-
namespace_locator = NamespaceLocator.of(namespace)
|
201
|
+
namespace_locator = NamespaceLocator.of(namespace) if namespace else None
|
95
202
|
return TableLocator.of(namespace_locator, table_name)
|
96
203
|
|
97
204
|
@property
|
98
|
-
def
|
205
|
+
def name(self) -> TableLocatorName:
|
206
|
+
return TableLocatorName(self)
|
207
|
+
|
208
|
+
@property
|
209
|
+
def parent(self) -> Optional[NamespaceLocator]:
|
210
|
+
return self.namespace_locator
|
211
|
+
|
212
|
+
@property
|
213
|
+
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
99
214
|
val: Dict[str, Any] = self.get("namespaceLocator")
|
100
215
|
if val is not None and not isinstance(val, NamespaceLocator):
|
101
216
|
self.namespace_locator = val = NamespaceLocator(val)
|
@@ -119,13 +234,3 @@ class TableLocator(Locator, dict):
|
|
119
234
|
if namespace_locator:
|
120
235
|
return namespace_locator.namespace
|
121
236
|
return None
|
122
|
-
|
123
|
-
def canonical_string(self) -> str:
|
124
|
-
"""
|
125
|
-
Returns a unique string for the given locator that can be used
|
126
|
-
for equality checks (i.e. two locators are equal if they have
|
127
|
-
the same canonical string).
|
128
|
-
"""
|
129
|
-
nl_hexdigest = self.namespace_locator.hexdigest()
|
130
|
-
table_name = self.table_name
|
131
|
-
return f"{nl_hexdigest}|{table_name}"
|
@@ -1,38 +1,77 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
|
4
|
+
import base64
|
5
|
+
import re
|
6
|
+
import posixpath
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple
|
5
8
|
|
9
|
+
import pyarrow
|
6
10
|
import pyarrow as pa
|
7
11
|
|
8
|
-
|
12
|
+
import deltacat.storage.model.partition as partition
|
13
|
+
|
14
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
15
|
+
from deltacat.constants import (
|
16
|
+
METAFILE_FORMAT,
|
17
|
+
METAFILE_FORMAT_JSON,
|
18
|
+
TXN_DIR_NAME,
|
19
|
+
BYTES_PER_KIBIBYTE,
|
20
|
+
)
|
21
|
+
from deltacat.storage.model.schema import (
|
22
|
+
Schema,
|
23
|
+
SchemaList,
|
24
|
+
)
|
25
|
+
from deltacat.storage.model.locator import (
|
26
|
+
Locator,
|
27
|
+
LocatorName,
|
28
|
+
)
|
9
29
|
from deltacat.storage.model.namespace import NamespaceLocator
|
10
|
-
from deltacat.storage.model.table import
|
30
|
+
from deltacat.storage.model.table import (
|
31
|
+
TableLocator,
|
32
|
+
Table,
|
33
|
+
)
|
11
34
|
from deltacat.types.media import ContentType
|
12
|
-
from deltacat.storage.model.sort_key import
|
35
|
+
from deltacat.storage.model.sort_key import SortScheme, SortSchemeList
|
36
|
+
from deltacat.storage.model.types import LifecycleState
|
37
|
+
from deltacat.types.tables import TableProperty
|
13
38
|
|
39
|
+
TableVersionProperties = Dict[str, Any]
|
14
40
|
|
15
|
-
|
41
|
+
|
42
|
+
class TableVersion(Metafile):
|
16
43
|
@staticmethod
|
17
44
|
def of(
|
18
45
|
locator: Optional[TableVersionLocator],
|
19
|
-
schema: Optional[
|
20
|
-
|
21
|
-
primary_key_columns: Optional[List[str]] = None,
|
46
|
+
schema: Optional[Schema],
|
47
|
+
partition_scheme: Optional[partition.PartitionScheme] = None,
|
22
48
|
description: Optional[str] = None,
|
23
|
-
properties: Optional[
|
49
|
+
properties: Optional[TableVersionProperties] = None,
|
24
50
|
content_types: Optional[List[ContentType]] = None,
|
25
|
-
|
51
|
+
sort_scheme: Optional[SortScheme] = None,
|
52
|
+
watermark: Optional[int] = None,
|
53
|
+
lifecycle_state: Optional[LifecycleState] = None,
|
54
|
+
schemas: Optional[SchemaList] = None,
|
55
|
+
partition_schemes: Optional[partition.PartitionSchemeList] = None,
|
56
|
+
sort_schemes: Optional[SortSchemeList] = None,
|
57
|
+
previous_table_version: Optional[str] = None,
|
58
|
+
native_object: Optional[Any] = None,
|
26
59
|
) -> TableVersion:
|
27
60
|
table_version = TableVersion()
|
28
61
|
table_version.locator = locator
|
29
62
|
table_version.schema = schema
|
30
|
-
table_version.
|
31
|
-
table_version.primary_keys = primary_key_columns
|
63
|
+
table_version.partition_scheme = partition_scheme
|
32
64
|
table_version.description = description
|
33
65
|
table_version.properties = properties
|
34
66
|
table_version.content_types = content_types
|
35
|
-
table_version.
|
67
|
+
table_version.sort_scheme = sort_scheme
|
68
|
+
table_version.watermark = watermark
|
69
|
+
table_version.state = lifecycle_state
|
70
|
+
table_version.schemas = schemas
|
71
|
+
table_version.partition_schemes = partition_schemes
|
72
|
+
table_version.sort_schemes = sort_schemes
|
73
|
+
table_version.previous_table_version = previous_table_version
|
74
|
+
table_version.native_object = native_object
|
36
75
|
return table_version
|
37
76
|
|
38
77
|
@property
|
@@ -47,36 +86,91 @@ class TableVersion(dict):
|
|
47
86
|
self["tableVersionLocator"] = table_version_locator
|
48
87
|
|
49
88
|
@property
|
50
|
-
def schema(self) -> Optional[
|
51
|
-
|
89
|
+
def schema(self) -> Optional[Schema]:
|
90
|
+
val: Dict[str, Any] = self.get("schema")
|
91
|
+
if val is not None and not isinstance(val, Schema):
|
92
|
+
self.schema = val = Schema(val)
|
93
|
+
return val
|
52
94
|
|
53
95
|
@schema.setter
|
54
|
-
def schema(self, schema: Optional[
|
96
|
+
def schema(self, schema: Optional[Schema]) -> None:
|
55
97
|
self["schema"] = schema
|
56
98
|
|
57
99
|
@property
|
58
|
-
def
|
59
|
-
|
100
|
+
def schemas(self) -> Optional[SchemaList]:
|
101
|
+
val: Optional[SchemaList] = self.get("schemas")
|
102
|
+
if val is not None and not isinstance(val, SchemaList):
|
103
|
+
self["schemas"] = val = SchemaList.of(val)
|
104
|
+
return val
|
105
|
+
|
106
|
+
@schemas.setter
|
107
|
+
def schemas(self, schemas: Optional[SchemaList]) -> None:
|
108
|
+
self["schemas"] = schemas
|
109
|
+
|
110
|
+
@property
|
111
|
+
def sort_scheme(self) -> Optional[SortScheme]:
|
112
|
+
val: Dict[str, Any] = self.get("sortScheme")
|
113
|
+
if val is not None and not isinstance(val, SortScheme):
|
114
|
+
self["sortScheme"] = val = SortScheme(val)
|
115
|
+
return val
|
116
|
+
|
117
|
+
@sort_scheme.setter
|
118
|
+
def sort_scheme(self, sort_scheme: Optional[SortScheme]) -> None:
|
119
|
+
self["sortScheme"] = sort_scheme
|
120
|
+
|
121
|
+
@property
|
122
|
+
def sort_schemes(self) -> Optional[SortSchemeList]:
|
123
|
+
val: Dict[str, Any] = self.get("sortSchemes")
|
124
|
+
if val is not None and not isinstance(val, SortSchemeList):
|
125
|
+
self["sortSchemes"] = val = SortSchemeList.of(val)
|
126
|
+
return val
|
127
|
+
|
128
|
+
@sort_schemes.setter
|
129
|
+
def sort_schemes(self, sort_schemes: Optional[SortSchemeList]) -> None:
|
130
|
+
self["sortSchemes"] = sort_schemes
|
131
|
+
|
132
|
+
@property
|
133
|
+
def watermark(self) -> Optional[int]:
|
134
|
+
return self.get("watermark")
|
60
135
|
|
61
|
-
@
|
62
|
-
def
|
63
|
-
self["
|
136
|
+
@watermark.setter
|
137
|
+
def watermark(self, watermark: Optional[int]) -> None:
|
138
|
+
self["watermark"] = watermark
|
64
139
|
|
65
140
|
@property
|
66
|
-
def
|
67
|
-
|
141
|
+
def state(self) -> Optional[LifecycleState]:
|
142
|
+
state = self.get("state")
|
143
|
+
return None if state is None else LifecycleState(state)
|
68
144
|
|
69
|
-
@
|
70
|
-
def
|
71
|
-
self["
|
145
|
+
@state.setter
|
146
|
+
def state(self, state: Optional[LifecycleState]) -> None:
|
147
|
+
self["state"] = state
|
72
148
|
|
73
149
|
@property
|
74
|
-
def
|
75
|
-
|
150
|
+
def partition_scheme(self) -> Optional[partition.PartitionScheme]:
|
151
|
+
val: Dict[str, Any] = self.get("partitionScheme")
|
152
|
+
if val is not None and not isinstance(val, partition.PartitionScheme):
|
153
|
+
self["partitionScheme"] = val = partition.PartitionScheme(val)
|
154
|
+
return val
|
76
155
|
|
77
|
-
@
|
78
|
-
def
|
79
|
-
self[
|
156
|
+
@partition_scheme.setter
|
157
|
+
def partition_scheme(
|
158
|
+
self, partition_scheme: Optional[partition.PartitionScheme]
|
159
|
+
) -> None:
|
160
|
+
self["partitionScheme"] = partition_scheme
|
161
|
+
|
162
|
+
@property
|
163
|
+
def partition_schemes(self) -> Optional[partition.PartitionSchemeList]:
|
164
|
+
val: Dict[str, Any] = self.get("partitionSchemes")
|
165
|
+
if val is not None and not isinstance(val, partition.PartitionSchemeList):
|
166
|
+
self["partitionSchemes"] = val = partition.PartitionSchemeList.of(val)
|
167
|
+
return val
|
168
|
+
|
169
|
+
@partition_schemes.setter
|
170
|
+
def partition_schemes(
|
171
|
+
self, partition_schemes: Optional[partition.PartitionSchemeList]
|
172
|
+
) -> None:
|
173
|
+
self["partitionSchemes"] = partition_schemes
|
80
174
|
|
81
175
|
@property
|
82
176
|
def description(self) -> Optional[str]:
|
@@ -87,11 +181,19 @@ class TableVersion(dict):
|
|
87
181
|
self["description"] = description
|
88
182
|
|
89
183
|
@property
|
90
|
-
def
|
184
|
+
def previous_table_version(self) -> Optional[str]:
|
185
|
+
return self.get("previous_table_version")
|
186
|
+
|
187
|
+
@previous_table_version.setter
|
188
|
+
def previous_table_version(self, previous_table_version: Optional[str]) -> None:
|
189
|
+
self["previous_table_version"] = previous_table_version
|
190
|
+
|
191
|
+
@property
|
192
|
+
def properties(self) -> Optional[TableVersionProperties]:
|
91
193
|
return self.get("properties")
|
92
194
|
|
93
195
|
@properties.setter
|
94
|
-
def properties(self, properties: Optional[
|
196
|
+
def properties(self, properties: Optional[TableVersionProperties]) -> None:
|
95
197
|
self["properties"] = properties
|
96
198
|
|
97
199
|
@property
|
@@ -107,6 +209,14 @@ class TableVersion(dict):
|
|
107
209
|
def content_types(self, content_types: Optional[List[ContentType]]) -> None:
|
108
210
|
self["contentTypes"] = content_types
|
109
211
|
|
212
|
+
@property
|
213
|
+
def native_object(self) -> Optional[Any]:
|
214
|
+
return self.get("nativeObject")
|
215
|
+
|
216
|
+
@native_object.setter
|
217
|
+
def native_object(self, native_object: Optional[Any]) -> None:
|
218
|
+
self["nativeObject"] = native_object
|
219
|
+
|
110
220
|
@property
|
111
221
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
112
222
|
table_version_locator = self.locator
|
@@ -142,17 +252,190 @@ class TableVersion(dict):
|
|
142
252
|
return table_version_locator.table_version
|
143
253
|
return None
|
144
254
|
|
255
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
256
|
+
return (
|
257
|
+
f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/"
|
258
|
+
if catalog_name
|
259
|
+
else f"table://{self.namespace}/{self.table_name}/{self.table_version}/"
|
260
|
+
)
|
261
|
+
|
145
262
|
def is_supported_content_type(self, content_type: ContentType):
|
146
263
|
supported_content_types = self.content_types
|
147
264
|
return (not supported_content_types) or (
|
148
265
|
content_type in supported_content_types
|
149
266
|
)
|
150
267
|
|
268
|
+
def to_serializable(self) -> TableVersion:
|
269
|
+
serializable: TableVersion = TableVersion.update_for(self)
|
270
|
+
if serializable.schema:
|
271
|
+
schema_bytes = serializable.schema.serialize().to_pybytes()
|
272
|
+
serializable.schema = (
|
273
|
+
base64.b64encode(schema_bytes).decode("utf-8")
|
274
|
+
if METAFILE_FORMAT == METAFILE_FORMAT_JSON
|
275
|
+
else schema_bytes
|
276
|
+
)
|
277
|
+
|
278
|
+
if serializable.schemas:
|
279
|
+
serializable.schemas = [
|
280
|
+
base64.b64encode(schema.serialize().to_pybytes()).decode("utf-8")
|
281
|
+
if METAFILE_FORMAT == METAFILE_FORMAT_JSON
|
282
|
+
else schema.serialize().to_pybytes()
|
283
|
+
for schema in serializable.schemas
|
284
|
+
]
|
285
|
+
if serializable.table_locator:
|
286
|
+
# remove the mutable table locator
|
287
|
+
serializable.locator.table_locator = TableLocator.at(
|
288
|
+
namespace=self.id,
|
289
|
+
table_name=self.id,
|
290
|
+
)
|
291
|
+
return serializable
|
292
|
+
|
293
|
+
def from_serializable(
|
294
|
+
self,
|
295
|
+
path: str,
|
296
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
297
|
+
) -> TableVersion:
|
298
|
+
if self.get("schema"):
|
299
|
+
schema_data = self["schema"]
|
300
|
+
schema_bytes = (
|
301
|
+
base64.b64decode(schema_data)
|
302
|
+
if METAFILE_FORMAT == "json"
|
303
|
+
else schema_data
|
304
|
+
)
|
305
|
+
self["schema"] = Schema.deserialize(pa.py_buffer(schema_bytes))
|
306
|
+
else:
|
307
|
+
self["schema"] = None
|
308
|
+
|
309
|
+
if self.get("schemas"):
|
310
|
+
self.schemas = [
|
311
|
+
Schema.deserialize(
|
312
|
+
pa.py_buffer(
|
313
|
+
base64.b64decode(schema)
|
314
|
+
if METAFILE_FORMAT == METAFILE_FORMAT_JSON
|
315
|
+
else schema
|
316
|
+
)
|
317
|
+
)
|
318
|
+
for schema in self["schemas"]
|
319
|
+
]
|
320
|
+
else:
|
321
|
+
self.schemas = None
|
322
|
+
|
323
|
+
if self.sort_scheme:
|
324
|
+
# force list-to-tuple conversion of sort keys via property invocation
|
325
|
+
self.sort_scheme.keys
|
326
|
+
[sort_scheme.keys for sort_scheme in self.sort_schemes]
|
327
|
+
# restore the table locator from its mapped immutable metafile ID
|
328
|
+
if self.table_locator and self.table_locator.table_name == self.id:
|
329
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
330
|
+
base_metafile_path=path,
|
331
|
+
parent_number=1,
|
332
|
+
)
|
333
|
+
txn_log_dir = posixpath.join(
|
334
|
+
posixpath.dirname(
|
335
|
+
posixpath.dirname(
|
336
|
+
posixpath.dirname(parent_rev_dir_path),
|
337
|
+
)
|
338
|
+
),
|
339
|
+
TXN_DIR_NAME,
|
340
|
+
)
|
341
|
+
table = Table.read(
|
342
|
+
MetafileRevisionInfo.latest_revision(
|
343
|
+
revision_dir_path=parent_rev_dir_path,
|
344
|
+
filesystem=filesystem,
|
345
|
+
success_txn_log_dir=txn_log_dir,
|
346
|
+
).path,
|
347
|
+
filesystem,
|
348
|
+
)
|
349
|
+
self.locator.table_locator = table.locator
|
350
|
+
return self
|
351
|
+
|
352
|
+
def current_version_number(self) -> Optional[int]:
|
353
|
+
"""
|
354
|
+
Returns the current table version number as an integer, or None if
|
355
|
+
a table version has not yet been assigned.
|
356
|
+
"""
|
357
|
+
prefix, version_number = (
|
358
|
+
TableVersion.parse_table_version(
|
359
|
+
self.table_version,
|
360
|
+
)
|
361
|
+
if self.table_version is not None
|
362
|
+
else (None, None)
|
363
|
+
)
|
364
|
+
return int(version_number) if version_number is not None else None
|
365
|
+
|
366
|
+
def read_table_property(self, property: TableProperty) -> Any:
|
367
|
+
return TableProperty.read_table_property(self, property)
|
368
|
+
|
369
|
+
@staticmethod
|
370
|
+
def next_version(previous_version: Optional[str] = None) -> str:
|
371
|
+
"""
|
372
|
+
Assigns the next table version string given the previous table version
|
373
|
+
by incrementing the version number of the given previous table version
|
374
|
+
identifier. Returns "1" if the previous version is undefined.
|
375
|
+
"""
|
376
|
+
prefix, previous_version_number = (
|
377
|
+
TableVersion.parse_table_version(
|
378
|
+
previous_version,
|
379
|
+
)
|
380
|
+
if previous_version is not None
|
381
|
+
else (None, None)
|
382
|
+
)
|
383
|
+
new_version_number = (
|
384
|
+
int(previous_version_number) + 1
|
385
|
+
if previous_version_number is not None
|
386
|
+
else 1
|
387
|
+
)
|
388
|
+
new_prefix = prefix if prefix is not None else ""
|
389
|
+
return f"{new_prefix}{new_version_number}"
|
390
|
+
|
391
|
+
@staticmethod
|
392
|
+
def parse_table_version(table_version: str) -> Tuple[Optional[str], int]:
|
393
|
+
"""
|
394
|
+
Parses a table version string into its prefix and version number.
|
395
|
+
Returns a tuple of the prefix and version number.
|
396
|
+
"""
|
397
|
+
if not table_version:
|
398
|
+
raise ValueError(f"Table version to parse is undefined.")
|
399
|
+
if len(table_version) > BYTES_PER_KIBIBYTE:
|
400
|
+
raise ValueError(
|
401
|
+
f"Invalid table version {table_version}. Table version "
|
402
|
+
f"identifier cannot be greater than {BYTES_PER_KIBIBYTE} "
|
403
|
+
f"characters."
|
404
|
+
)
|
405
|
+
version_match = re.match(
|
406
|
+
rf"^(\w*\.)?(\d+)$",
|
407
|
+
table_version,
|
408
|
+
)
|
409
|
+
if version_match:
|
410
|
+
prefix, version_number = version_match.groups()
|
411
|
+
return prefix, int(version_number)
|
412
|
+
raise ValueError(
|
413
|
+
f"Invalid table version {table_version}. Valid table versions "
|
414
|
+
f"are of the form `TableVersionName.1` or simply `1`.",
|
415
|
+
)
|
416
|
+
|
417
|
+
|
418
|
+
class TableVersionLocatorName(LocatorName):
|
419
|
+
def __init__(self, locator: TableVersionLocator):
|
420
|
+
self.locator = locator
|
421
|
+
|
422
|
+
@property
|
423
|
+
def immutable_id(self) -> Optional[str]:
|
424
|
+
return self.locator.table_version
|
425
|
+
|
426
|
+
@immutable_id.setter
|
427
|
+
def immutable_id(self, immutable_id: Optional[str]):
|
428
|
+
self.locator.table_version = immutable_id
|
429
|
+
|
430
|
+
def parts(self) -> List[str]:
|
431
|
+
return [self.locator.table_version]
|
432
|
+
|
151
433
|
|
152
434
|
class TableVersionLocator(Locator, dict):
|
153
435
|
@staticmethod
|
154
436
|
def of(
|
155
|
-
table_locator: Optional[TableLocator],
|
437
|
+
table_locator: Optional[TableLocator],
|
438
|
+
table_version: Optional[str],
|
156
439
|
) -> TableVersionLocator:
|
157
440
|
table_version_locator = TableVersionLocator()
|
158
441
|
table_version_locator.table_locator = table_locator
|
@@ -165,9 +448,17 @@ class TableVersionLocator(Locator, dict):
|
|
165
448
|
table_name: Optional[str],
|
166
449
|
table_version: Optional[str],
|
167
450
|
) -> TableVersionLocator:
|
168
|
-
table_locator = TableLocator.at(namespace, table_name)
|
451
|
+
table_locator = TableLocator.at(namespace, table_name) if table_name else None
|
169
452
|
return TableVersionLocator.of(table_locator, table_version)
|
170
453
|
|
454
|
+
@property
|
455
|
+
def name(self):
|
456
|
+
return TableVersionLocatorName(self)
|
457
|
+
|
458
|
+
@property
|
459
|
+
def parent(self) -> Optional[TableLocator]:
|
460
|
+
return self.table_locator
|
461
|
+
|
171
462
|
@property
|
172
463
|
def table_locator(self) -> Optional[TableLocator]:
|
173
464
|
val: Dict[str, Any] = self.get("tableLocator")
|
@@ -185,7 +476,13 @@ class TableVersionLocator(Locator, dict):
|
|
185
476
|
|
186
477
|
@table_version.setter
|
187
478
|
def table_version(self, table_version: Optional[str]) -> None:
|
188
|
-
|
479
|
+
# ensure that the table version is valid
|
480
|
+
prefix, version_number = TableVersion.parse_table_version(table_version)
|
481
|
+
# restate the table version number in its canonical form
|
482
|
+
# (e.g., ensure that "MyVersion.0001" is saved as "MyVersion.1")
|
483
|
+
self["tableVersion"] = (
|
484
|
+
f"{prefix}{version_number}" if prefix else str(version_number)
|
485
|
+
)
|
189
486
|
|
190
487
|
@property
|
191
488
|
def namespace_locator(self) -> Optional[NamespaceLocator]:
|
@@ -207,13 +504,3 @@ class TableVersionLocator(Locator, dict):
|
|
207
504
|
if table_locator:
|
208
505
|
return table_locator.table_name
|
209
506
|
return None
|
210
|
-
|
211
|
-
def canonical_string(self) -> str:
|
212
|
-
"""
|
213
|
-
Returns a unique string for the given locator that can be used
|
214
|
-
for equality checks (i.e. two locators are equal if they have
|
215
|
-
the same canonical string).
|
216
|
-
"""
|
217
|
-
tl_hexdigest = self.table_locator.hexdigest()
|
218
|
-
table_version = self.table_version
|
219
|
-
return f"{tl_hexdigest}|{table_version}"
|