deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
deltacat/catalog/delegate.py
CHANGED
@@ -1,53 +1,48 @@
|
|
1
|
-
from typing import Any, Dict, List, Optional,
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
2
2
|
|
3
|
-
|
4
|
-
import ray
|
5
|
-
|
6
|
-
from deltacat.catalog.model.catalog import Catalog, all_catalogs
|
3
|
+
from deltacat.catalog.model.catalog import get_catalog
|
7
4
|
from deltacat.catalog.model.table_definition import TableDefinition
|
8
|
-
from deltacat.storage.model.
|
5
|
+
from deltacat.storage.model.partition import (
|
6
|
+
Partition,
|
7
|
+
PartitionLocator,
|
8
|
+
PartitionScheme,
|
9
|
+
)
|
10
|
+
from deltacat.storage.model.sort_key import SortScheme
|
9
11
|
from deltacat.storage.model.list_result import ListResult
|
10
|
-
from deltacat.storage.model.namespace import Namespace
|
12
|
+
from deltacat.storage.model.namespace import Namespace, NamespaceProperties
|
13
|
+
from deltacat.storage.model.schema import (
|
14
|
+
Schema,
|
15
|
+
SchemaUpdateOperations,
|
16
|
+
)
|
17
|
+
from deltacat.storage.model.table import TableProperties
|
18
|
+
from deltacat.storage.model.table_version import TableVersionProperties
|
11
19
|
from deltacat.storage.model.types import (
|
12
|
-
|
20
|
+
Dataset,
|
13
21
|
LifecycleState,
|
14
|
-
|
15
|
-
|
16
|
-
|
22
|
+
StreamFormat,
|
23
|
+
)
|
24
|
+
from deltacat.storage.model.transaction import (
|
25
|
+
Transaction,
|
26
|
+
get_current_transaction,
|
17
27
|
)
|
18
28
|
from deltacat.types.media import ContentType
|
19
|
-
from deltacat.types.tables import
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
if not all_catalogs:
|
24
|
-
raise ValueError(
|
25
|
-
"No catalogs available! Call "
|
26
|
-
"`deltacat.init(catalogs={...})` to register one or more "
|
27
|
-
"catalogs then retry."
|
28
|
-
)
|
29
|
-
catalog = (
|
30
|
-
ray.get(all_catalogs.get.remote(name))
|
31
|
-
if name
|
32
|
-
else ray.get(all_catalogs.default.remote())
|
33
|
-
)
|
34
|
-
if not catalog:
|
35
|
-
available_catalogs = ray.get(all_catalogs.all.remote()).values()
|
36
|
-
raise ValueError(
|
37
|
-
f"Catalog '{name}' not found. Available catalogs: " f"{available_catalogs}."
|
38
|
-
)
|
39
|
-
return catalog
|
29
|
+
from deltacat.types.tables import (
|
30
|
+
DatasetType,
|
31
|
+
TableWriteMode,
|
32
|
+
)
|
40
33
|
|
41
34
|
|
42
35
|
# table functions
|
43
36
|
def write_to_table(
|
44
|
-
data:
|
37
|
+
data: Dataset,
|
45
38
|
table: str,
|
39
|
+
*args,
|
46
40
|
namespace: Optional[str] = None,
|
47
|
-
|
41
|
+
table_version: Optional[str] = None,
|
48
42
|
mode: TableWriteMode = TableWriteMode.AUTO,
|
49
43
|
content_type: ContentType = ContentType.PARQUET,
|
50
|
-
|
44
|
+
transaction: Optional[Transaction] = None,
|
45
|
+
catalog: Optional[str] = None,
|
51
46
|
**kwargs,
|
52
47
|
) -> None:
|
53
48
|
"""Write local or distributed data to a table. Raises an error if the
|
@@ -56,229 +51,709 @@ def write_to_table(
|
|
56
51
|
When creating a table, all `create_table` parameters may be optionally
|
57
52
|
specified as additional keyword arguments. When appending to, or replacing,
|
58
53
|
an existing table, all `alter_table` parameters may be optionally specified
|
59
|
-
as additional keyword arguments.
|
60
|
-
|
61
|
-
|
54
|
+
as additional keyword arguments.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
data: Local or distributed data to write to the table.
|
58
|
+
table: Name of the table to write to.
|
59
|
+
namespace: Optional namespace for the table. Uses default if not specified.
|
60
|
+
table_version: Optional version of the table to write to. If specified,
|
61
|
+
will create this version if it doesn't exist (in CREATE mode) or
|
62
|
+
get this version if it exists (in other modes). If not specified,
|
63
|
+
uses the latest version.
|
64
|
+
mode: Write mode (AUTO, CREATE, APPEND, REPLACE, MERGE, DELETE).
|
65
|
+
content_type: Content type used to write the data files. Defaults to PARQUET.
|
66
|
+
transaction: Optional transaction to append write operations to instead of
|
67
|
+
creating and committing a new transaction.
|
68
|
+
**kwargs: Additional keyword arguments.
|
69
|
+
"""
|
70
|
+
if (transaction or get_current_transaction()) and catalog:
|
71
|
+
raise ValueError(
|
72
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
73
|
+
)
|
74
|
+
catalog_obj = get_catalog(catalog)
|
75
|
+
catalog_obj.impl.write_to_table(
|
76
|
+
data,
|
77
|
+
table,
|
78
|
+
*args,
|
79
|
+
namespace=namespace,
|
80
|
+
table_version=table_version,
|
81
|
+
mode=mode,
|
82
|
+
content_type=content_type,
|
83
|
+
transaction=transaction,
|
84
|
+
inner=catalog_obj.inner,
|
85
|
+
**kwargs,
|
62
86
|
)
|
63
87
|
|
64
88
|
|
65
89
|
def read_table(
|
66
90
|
table: str,
|
91
|
+
*args,
|
67
92
|
namespace: Optional[str] = None,
|
93
|
+
table_version: Optional[str] = None,
|
94
|
+
read_as: DatasetType = DatasetType.DAFT,
|
95
|
+
partition_filter: Optional[List[Union[Partition, PartitionLocator]]] = None,
|
96
|
+
max_parallelism: Optional[int] = None,
|
97
|
+
columns: Optional[List[str]] = None,
|
98
|
+
file_path_column: Optional[str] = None,
|
99
|
+
transaction: Optional[Transaction] = None,
|
68
100
|
catalog: Optional[str] = None,
|
69
|
-
*args,
|
70
101
|
**kwargs,
|
71
|
-
) ->
|
72
|
-
"""Read a table into a
|
73
|
-
|
102
|
+
) -> Dataset:
|
103
|
+
"""Read a table into a dataset.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
table: Name of the table to read.
|
107
|
+
namespace: Optional namespace of the table. Uses default if not specified.
|
108
|
+
table_version: Optional specific version of the table to read.
|
109
|
+
read_as: Dataset type to use for reading table files. Defaults to DatasetType.DAFT.
|
110
|
+
partition_filter: Optional list of partitions to read from.
|
111
|
+
max_parallelism: Optional maximum parallelism for data download. Defaults to the number of
|
112
|
+
available CPU cores for local dataset type reads (i.e., members of DatasetType.local())
|
113
|
+
and 100 for distributed dataset type reads (i.e., members of DatasetType.distributed()).
|
114
|
+
columns: Optional list of columns to include in the result.
|
115
|
+
file_path_column: Optional column name to add file paths to the result.
|
116
|
+
transaction: Optional transaction to chain this read operation to. If provided, uncommitted
|
117
|
+
changes from the transaction will be visible to this read operation.
|
118
|
+
**kwargs: Additional keyword arguments.
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
Dataset containing the table data.
|
122
|
+
"""
|
123
|
+
if (transaction or get_current_transaction()) and catalog:
|
124
|
+
raise ValueError(
|
125
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
126
|
+
)
|
127
|
+
catalog_obj = get_catalog(catalog)
|
128
|
+
return catalog_obj.impl.read_table(
|
129
|
+
table,
|
130
|
+
*args,
|
131
|
+
namespace=namespace,
|
132
|
+
table_version=table_version,
|
133
|
+
read_as=read_as,
|
134
|
+
partition_filter=partition_filter,
|
135
|
+
max_parallelism=max_parallelism,
|
136
|
+
columns=columns,
|
137
|
+
file_path_column=file_path_column,
|
138
|
+
transaction=transaction,
|
139
|
+
inner=catalog_obj.inner,
|
140
|
+
**kwargs,
|
141
|
+
)
|
74
142
|
|
75
143
|
|
76
144
|
def alter_table(
|
77
145
|
table: str,
|
146
|
+
*args,
|
78
147
|
namespace: Optional[str] = None,
|
79
|
-
|
148
|
+
table_version: Optional[str] = None,
|
80
149
|
lifecycle_state: Optional[LifecycleState] = None,
|
81
|
-
schema_updates: Optional[
|
150
|
+
schema_updates: Optional[SchemaUpdateOperations] = None,
|
82
151
|
partition_updates: Optional[Dict[str, Any]] = None,
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
152
|
+
sort_scheme: Optional[SortScheme] = None,
|
153
|
+
table_description: Optional[str] = None,
|
154
|
+
table_version_description: Optional[str] = None,
|
155
|
+
table_properties: Optional[TableProperties] = None,
|
156
|
+
table_version_properties: Optional[TableVersionProperties] = None,
|
157
|
+
transaction: Optional[Transaction] = None,
|
158
|
+
catalog: Optional[str] = None,
|
88
159
|
**kwargs,
|
89
160
|
) -> None:
|
90
|
-
"""Alter table definition.
|
91
|
-
|
161
|
+
"""Alter deltacat table/table_version definition.
|
162
|
+
|
163
|
+
Modifies various aspects of a table's metadata including lifecycle state,
|
164
|
+
schema, partitioning, sort keys, description, and properties.
|
165
|
+
|
166
|
+
Args:
|
167
|
+
table: Name of the table to alter.
|
168
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
169
|
+
table_version: Optional specific version of the table to alter. Defaults to the latest active version.
|
170
|
+
lifecycle_state: New lifecycle state for the table.
|
171
|
+
schema_updates: Schema updates to apply.
|
172
|
+
partition_updates: Partition scheme updates to apply.
|
173
|
+
sort_scheme: New sort scheme.
|
174
|
+
table_description: New description for the table.
|
175
|
+
table_version_description: New description for the table version. Defaults to `table_description` if not specified.
|
176
|
+
table_properties: New table properties.
|
177
|
+
table_version_properties: New table version properties. Defaults to the current parent table properties if not specified.
|
178
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
None
|
182
|
+
|
183
|
+
Raises:
|
184
|
+
TableNotFoundError: If the table does not already exist.
|
185
|
+
TableVersionNotFoundError: If the specified table version or active table version does not exist.
|
186
|
+
"""
|
187
|
+
if (transaction or get_current_transaction()) and catalog:
|
188
|
+
raise ValueError(
|
189
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
190
|
+
)
|
191
|
+
catalog_obj = get_catalog(catalog)
|
192
|
+
catalog_obj.impl.alter_table(
|
92
193
|
table,
|
93
|
-
namespace,
|
94
|
-
lifecycle_state,
|
95
|
-
schema_updates,
|
96
|
-
partition_updates,
|
97
|
-
primary_keys,
|
98
|
-
sort_keys,
|
99
|
-
description,
|
100
|
-
properties,
|
101
194
|
*args,
|
195
|
+
namespace=namespace,
|
196
|
+
table_version=table_version,
|
197
|
+
lifecycle_state=lifecycle_state,
|
198
|
+
schema_updates=schema_updates,
|
199
|
+
partition_updates=partition_updates,
|
200
|
+
sort_scheme=sort_scheme,
|
201
|
+
table_description=table_description,
|
202
|
+
table_version_description=table_version_description,
|
203
|
+
table_properties=table_properties,
|
204
|
+
table_version_properties=table_version_properties,
|
205
|
+
transaction=transaction,
|
206
|
+
inner=catalog_obj.inner,
|
102
207
|
**kwargs,
|
103
208
|
)
|
104
209
|
|
105
210
|
|
106
211
|
def create_table(
|
107
212
|
table: str,
|
213
|
+
*args,
|
108
214
|
namespace: Optional[str] = None,
|
109
|
-
|
110
|
-
lifecycle_state: Optional[LifecycleState] =
|
111
|
-
schema: Optional[
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
215
|
+
table_version: Optional[str] = None,
|
216
|
+
lifecycle_state: Optional[LifecycleState] = LifecycleState.ACTIVE,
|
217
|
+
schema: Optional[Schema] = None,
|
218
|
+
partition_scheme: Optional[PartitionScheme] = None,
|
219
|
+
sort_keys: Optional[SortScheme] = None,
|
220
|
+
table_description: Optional[str] = None,
|
221
|
+
table_version_description: Optional[str] = None,
|
222
|
+
table_properties: Optional[TableProperties] = None,
|
223
|
+
table_version_properties: Optional[TableVersionProperties] = None,
|
224
|
+
namespace_properties: Optional[NamespaceProperties] = None,
|
119
225
|
content_types: Optional[List[ContentType]] = None,
|
120
|
-
|
121
|
-
|
226
|
+
fail_if_exists: bool = True,
|
227
|
+
transaction: Optional[Transaction] = None,
|
228
|
+
catalog: Optional[str] = None,
|
122
229
|
**kwargs,
|
123
230
|
) -> TableDefinition:
|
124
|
-
"""Create an empty table
|
125
|
-
|
126
|
-
|
231
|
+
"""Create an empty table in the catalog.
|
232
|
+
|
233
|
+
If a namespace isn't provided, the table will be created within the default deltacat namespace.
|
234
|
+
Additionally if the provided namespace does not exist, it will be created for you.
|
235
|
+
|
236
|
+
Args:
|
237
|
+
table: Name of the table to create.
|
238
|
+
namespace: Optional namespace for the table. Uses default namespace if not specified.
|
239
|
+
version: Optional version identifier for the table.
|
240
|
+
lifecycle_state: Lifecycle state of the new table. Defaults to ACTIVE.
|
241
|
+
schema: Schema definition for the table.
|
242
|
+
partition_scheme: Optional partitioning scheme for the table.
|
243
|
+
sort_keys: Optional sort keys for the table.
|
244
|
+
table_description: Optional description of the table.
|
245
|
+
table_version_description: Optional description for the table version.
|
246
|
+
table_properties: Optional properties for the table.
|
247
|
+
table_version_properties: Optional properties for the table version. Defaults to the current parent table properties if not specified.
|
248
|
+
namespace_properties: Optional properties for the namespace if it needs to be created.
|
249
|
+
content_types: Optional list of allowed content types for the table.
|
250
|
+
fail_if_exists: If True, raises an error if table already exists. If False, returns existing table.
|
251
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
TableDefinition object for the created or existing table.
|
255
|
+
|
256
|
+
Raises:
|
257
|
+
TableAlreadyExistsError: If the table already exists and fail_if_exists is True.
|
258
|
+
NamespaceNotFoundError: If the provided namespace does not exist.
|
259
|
+
"""
|
260
|
+
if (transaction or get_current_transaction()) and catalog:
|
261
|
+
raise ValueError(
|
262
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
263
|
+
)
|
264
|
+
catalog_obj = get_catalog(catalog)
|
265
|
+
return catalog_obj.impl.create_table(
|
127
266
|
table,
|
128
|
-
namespace,
|
129
|
-
lifecycle_state,
|
130
|
-
schema,
|
131
|
-
schema_consistency,
|
132
|
-
partition_keys,
|
133
|
-
primary_keys,
|
134
|
-
sort_keys,
|
135
|
-
description,
|
136
|
-
properties,
|
137
|
-
permissions,
|
138
|
-
content_types,
|
139
|
-
replace_existing_table,
|
140
267
|
*args,
|
268
|
+
namespace=namespace,
|
269
|
+
table_version=table_version,
|
270
|
+
lifecycle_state=lifecycle_state,
|
271
|
+
schema=schema,
|
272
|
+
partition_scheme=partition_scheme,
|
273
|
+
sort_keys=sort_keys,
|
274
|
+
table_description=table_description,
|
275
|
+
table_version_description=table_version_description,
|
276
|
+
table_version_properties=table_version_properties,
|
277
|
+
table_properties=table_properties,
|
278
|
+
namespace_properties=namespace_properties,
|
279
|
+
content_types=content_types,
|
280
|
+
fail_if_exists=fail_if_exists,
|
281
|
+
transaction=transaction,
|
282
|
+
inner=catalog_obj.inner,
|
141
283
|
**kwargs,
|
142
284
|
)
|
143
285
|
|
144
286
|
|
145
287
|
def drop_table(
|
146
288
|
table: str,
|
289
|
+
*args,
|
147
290
|
namespace: Optional[str] = None,
|
148
|
-
|
291
|
+
table_version: Optional[str] = None,
|
149
292
|
purge: bool = False,
|
150
|
-
|
293
|
+
transaction: Optional[Transaction] = None,
|
294
|
+
catalog: Optional[str] = None,
|
151
295
|
**kwargs,
|
152
296
|
) -> None:
|
153
|
-
"""Drop a table from the catalog and optionally
|
154
|
-
|
155
|
-
|
297
|
+
"""Drop a table from the catalog and optionally purges underlying data.
|
298
|
+
|
299
|
+
Args:
|
300
|
+
name: Name of the table to drop.
|
301
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
302
|
+
table_version: Optional specific version of the table to drop. Defaults to the latest active version.
|
303
|
+
purge: If True, permanently delete the table data. If False, only remove from catalog.
|
304
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
305
|
+
|
306
|
+
Returns:
|
307
|
+
None
|
308
|
+
|
309
|
+
Raises:
|
310
|
+
TableNotFoundError: If the table does not exist.
|
311
|
+
TableVersionNotFoundError: If the table version does not exist.
|
312
|
+
"""
|
313
|
+
if (transaction or get_current_transaction()) and catalog:
|
314
|
+
raise ValueError(
|
315
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
316
|
+
)
|
317
|
+
catalog_obj = get_catalog(catalog)
|
318
|
+
catalog_obj.impl.drop_table(
|
319
|
+
table,
|
320
|
+
*args,
|
321
|
+
namespace=namespace,
|
322
|
+
table_version=table_version,
|
323
|
+
purge=purge,
|
324
|
+
transaction=transaction,
|
325
|
+
inner=catalog_obj.inner,
|
326
|
+
**kwargs,
|
327
|
+
)
|
156
328
|
|
157
329
|
|
158
330
|
def refresh_table(
|
159
331
|
table: str,
|
332
|
+
*args,
|
160
333
|
namespace: Optional[str] = None,
|
334
|
+
table_version: Optional[str] = None,
|
335
|
+
transaction: Optional[Transaction] = None,
|
161
336
|
catalog: Optional[str] = None,
|
162
|
-
*args,
|
163
337
|
**kwargs,
|
164
338
|
) -> None:
|
165
|
-
"""Refresh metadata cached on the Ray cluster for the given table.
|
166
|
-
|
339
|
+
"""Refresh metadata cached on the Ray cluster for the given table.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
table: Name of the table to refresh.
|
343
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
344
|
+
table_version: Optional specific version of the table to refresh. Defaults to the latest active version.
|
345
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
346
|
+
|
347
|
+
Returns:
|
348
|
+
None
|
349
|
+
"""
|
350
|
+
if (transaction or get_current_transaction()) and catalog:
|
351
|
+
raise ValueError(
|
352
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
353
|
+
)
|
354
|
+
catalog_obj = get_catalog(catalog)
|
355
|
+
catalog_obj.impl.refresh_table(
|
356
|
+
table,
|
357
|
+
*args,
|
358
|
+
namespace=namespace,
|
359
|
+
table_version=table_version,
|
360
|
+
transaction=transaction,
|
361
|
+
inner=catalog_obj.inner,
|
362
|
+
**kwargs,
|
363
|
+
)
|
167
364
|
|
168
365
|
|
169
366
|
def list_tables(
|
170
|
-
|
367
|
+
*args,
|
368
|
+
namespace: Optional[str] = None,
|
369
|
+
table: Optional[str] = None,
|
370
|
+
transaction: Optional[Transaction] = None,
|
371
|
+
catalog: Optional[str] = None,
|
372
|
+
**kwargs,
|
171
373
|
) -> ListResult[TableDefinition]:
|
172
|
-
"""List a page of table definitions.
|
173
|
-
|
174
|
-
|
374
|
+
"""List a page of table definitions.
|
375
|
+
|
376
|
+
Args:
|
377
|
+
namespace: Optional namespace to list tables from. Uses default namespace if not specified.
|
378
|
+
table: Optional table to list its table versions. If not specified, lists the latest active version of each table in the namespace.
|
379
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
380
|
+
|
381
|
+
Returns:
|
382
|
+
ListResult containing TableDefinition objects for tables in the namespace.
|
383
|
+
"""
|
384
|
+
if (transaction or get_current_transaction()) and catalog:
|
385
|
+
raise ValueError(
|
386
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
387
|
+
)
|
388
|
+
catalog_obj = get_catalog(catalog)
|
389
|
+
return catalog_obj.impl.list_tables(
|
390
|
+
*args,
|
391
|
+
namespace=namespace,
|
392
|
+
table=table,
|
393
|
+
transaction=transaction,
|
394
|
+
inner=catalog_obj.inner,
|
395
|
+
**kwargs,
|
396
|
+
)
|
175
397
|
|
176
398
|
|
177
399
|
def get_table(
|
178
400
|
table: str,
|
401
|
+
*args,
|
179
402
|
namespace: Optional[str] = None,
|
403
|
+
table_version: Optional[str] = None,
|
404
|
+
stream_format: StreamFormat = StreamFormat.DELTACAT,
|
405
|
+
transaction: Optional[Transaction] = None,
|
180
406
|
catalog: Optional[str] = None,
|
181
|
-
*args,
|
182
407
|
**kwargs,
|
183
408
|
) -> Optional[TableDefinition]:
|
184
|
-
"""Get table definition metadata.
|
185
|
-
|
186
|
-
|
409
|
+
"""Get table definition metadata.
|
410
|
+
|
411
|
+
Args:
|
412
|
+
name: Name of the table to retrieve.
|
413
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
414
|
+
table_version: Optional specific version of the table to retrieve. Defaults to the latest active version.
|
415
|
+
stream_format: Optional stream format to retrieve. Defaults to DELTACAT.
|
416
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
Deltacat TableDefinition if the table exists, None otherwise. The table definition's table version will be
|
420
|
+
None if the requested version is not found. The table definition's stream will be None if the requested stream
|
421
|
+
format is not found.
|
422
|
+
"""
|
423
|
+
if (transaction or get_current_transaction()) and catalog:
|
424
|
+
raise ValueError(
|
425
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
426
|
+
)
|
427
|
+
catalog_obj = get_catalog(catalog)
|
428
|
+
return catalog_obj.impl.get_table(
|
429
|
+
table,
|
430
|
+
*args,
|
431
|
+
namespace=namespace,
|
432
|
+
table_version=table_version,
|
433
|
+
stream_format=stream_format,
|
434
|
+
transaction=transaction,
|
435
|
+
inner=catalog_obj.inner,
|
436
|
+
**kwargs,
|
437
|
+
)
|
187
438
|
|
188
439
|
|
189
440
|
def truncate_table(
|
190
441
|
table: str,
|
442
|
+
*args,
|
191
443
|
namespace: Optional[str] = None,
|
444
|
+
table_version: Optional[str] = None,
|
445
|
+
transaction: Optional[Transaction] = None,
|
192
446
|
catalog: Optional[str] = None,
|
193
|
-
*args,
|
194
447
|
**kwargs,
|
195
448
|
) -> None:
|
196
|
-
"""Truncate table data.
|
197
|
-
|
449
|
+
"""Truncate table data.
|
450
|
+
|
451
|
+
Args:
|
452
|
+
table: Name of the table to truncate.
|
453
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
454
|
+
table_version: Optional specific version of the table to truncate. Defaults to the latest active version.
|
455
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
456
|
+
|
457
|
+
Returns:
|
458
|
+
None
|
459
|
+
"""
|
460
|
+
if (transaction or get_current_transaction()) and catalog:
|
461
|
+
raise ValueError(
|
462
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
463
|
+
)
|
464
|
+
catalog_obj = get_catalog(catalog)
|
465
|
+
catalog_obj.impl.truncate_table(
|
466
|
+
table,
|
467
|
+
*args,
|
468
|
+
namespace=namespace,
|
469
|
+
table_version=table_version,
|
470
|
+
transaction=transaction,
|
471
|
+
inner=catalog_obj.inner,
|
472
|
+
**kwargs,
|
473
|
+
)
|
198
474
|
|
199
475
|
|
200
476
|
def rename_table(
|
201
477
|
table: str,
|
202
478
|
new_name: str,
|
479
|
+
*args,
|
203
480
|
namespace: Optional[str] = None,
|
481
|
+
transaction: Optional[Transaction] = None,
|
204
482
|
catalog: Optional[str] = None,
|
205
|
-
*args,
|
206
483
|
**kwargs,
|
207
484
|
) -> None:
|
208
|
-
"""Rename
|
209
|
-
|
485
|
+
"""Rename an existing table.
|
486
|
+
|
487
|
+
Args:
|
488
|
+
table: Current name of the table.
|
489
|
+
new_name: New name for the table.
|
490
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
491
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
492
|
+
|
493
|
+
Returns:
|
494
|
+
None
|
495
|
+
|
496
|
+
Raises:
|
497
|
+
TableNotFoundError: If the table does not exist.
|
498
|
+
"""
|
499
|
+
if (transaction or get_current_transaction()) and catalog:
|
500
|
+
raise ValueError(
|
501
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
502
|
+
)
|
503
|
+
catalog_obj = get_catalog(catalog)
|
504
|
+
catalog_obj.impl.rename_table(
|
505
|
+
table,
|
506
|
+
new_name,
|
507
|
+
*args,
|
508
|
+
namespace=namespace,
|
509
|
+
transaction=transaction,
|
510
|
+
inner=catalog_obj.inner,
|
511
|
+
**kwargs,
|
512
|
+
)
|
210
513
|
|
211
514
|
|
212
515
|
def table_exists(
|
213
516
|
table: str,
|
517
|
+
*args,
|
214
518
|
namespace: Optional[str] = None,
|
519
|
+
table_version: Optional[str] = None,
|
520
|
+
stream_format: StreamFormat = StreamFormat.DELTACAT,
|
521
|
+
transaction: Optional[Transaction] = None,
|
215
522
|
catalog: Optional[str] = None,
|
216
|
-
*args,
|
217
523
|
**kwargs,
|
218
524
|
) -> bool:
|
219
|
-
"""
|
220
|
-
|
525
|
+
"""Check if a table exists in the catalog.
|
526
|
+
|
527
|
+
Args:
|
528
|
+
table: Name of the table to check.
|
529
|
+
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
530
|
+
table_version: Optional specific version of the table to check. Defaults to the latest active version.
|
531
|
+
stream_format: Optional stream format to check. Defaults to DELTACAT.
|
532
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
533
|
+
|
534
|
+
Returns:
|
535
|
+
True if the table exists, False otherwise.
|
536
|
+
"""
|
537
|
+
if (transaction or get_current_transaction()) and catalog:
|
538
|
+
raise ValueError(
|
539
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
540
|
+
)
|
541
|
+
catalog_obj = get_catalog(catalog)
|
542
|
+
return catalog_obj.impl.table_exists(
|
543
|
+
table,
|
544
|
+
*args,
|
545
|
+
namespace=namespace,
|
546
|
+
table_version=table_version,
|
547
|
+
stream_format=stream_format,
|
548
|
+
transaction=transaction,
|
549
|
+
inner=catalog_obj.inner,
|
550
|
+
**kwargs,
|
551
|
+
)
|
221
552
|
|
222
553
|
|
223
554
|
# namespace functions
|
224
555
|
def list_namespaces(
|
225
|
-
|
556
|
+
*args,
|
557
|
+
transaction: Optional[Transaction] = None,
|
558
|
+
catalog: Optional[str] = None,
|
559
|
+
**kwargs,
|
226
560
|
) -> ListResult[Namespace]:
|
227
|
-
"""List a page of table namespaces.
|
228
|
-
|
561
|
+
"""List a page of table namespaces.
|
562
|
+
|
563
|
+
Args:
|
564
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
565
|
+
|
566
|
+
Returns:
|
567
|
+
ListResult containing Namespace objects.
|
568
|
+
"""
|
569
|
+
if (transaction or get_current_transaction()) and catalog:
|
570
|
+
raise ValueError(
|
571
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
572
|
+
)
|
573
|
+
catalog_obj = get_catalog(catalog)
|
574
|
+
return catalog_obj.impl.list_namespaces(
|
575
|
+
*args,
|
576
|
+
transaction=transaction,
|
577
|
+
inner=catalog_obj.inner,
|
578
|
+
**kwargs,
|
579
|
+
)
|
229
580
|
|
230
581
|
|
231
582
|
def get_namespace(
|
232
|
-
namespace: str,
|
583
|
+
namespace: str,
|
584
|
+
*args,
|
585
|
+
transaction: Optional[Transaction] = None,
|
586
|
+
catalog: Optional[str] = None,
|
587
|
+
**kwargs,
|
233
588
|
) -> Optional[Namespace]:
|
234
|
-
"""Get
|
235
|
-
|
236
|
-
|
589
|
+
"""Get metadata for a specific table namespace.
|
590
|
+
|
591
|
+
Args:
|
592
|
+
namespace: Name of the namespace to retrieve.
|
593
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
594
|
+
|
595
|
+
Returns:
|
596
|
+
Namespace object if the namespace exists, None otherwise.
|
597
|
+
"""
|
598
|
+
if (transaction or get_current_transaction()) and catalog:
|
599
|
+
raise ValueError(
|
600
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
601
|
+
)
|
602
|
+
catalog_obj = get_catalog(catalog)
|
603
|
+
return catalog_obj.impl.get_namespace(
|
604
|
+
namespace,
|
605
|
+
*args,
|
606
|
+
transaction=transaction,
|
607
|
+
inner=catalog_obj.inner,
|
608
|
+
**kwargs,
|
609
|
+
)
|
237
610
|
|
238
611
|
|
239
612
|
def namespace_exists(
|
240
|
-
namespace: str,
|
613
|
+
namespace: str,
|
614
|
+
*args,
|
615
|
+
transaction: Optional[Transaction] = None,
|
616
|
+
catalog: Optional[str] = None,
|
617
|
+
**kwargs,
|
241
618
|
) -> bool:
|
242
|
-
"""
|
243
|
-
|
619
|
+
"""Check if a namespace exists.
|
620
|
+
|
621
|
+
Args:
|
622
|
+
namespace: Name of the namespace to check.
|
623
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
624
|
+
|
625
|
+
Returns:
|
626
|
+
True if the namespace exists, False otherwise.
|
627
|
+
"""
|
628
|
+
if (transaction or get_current_transaction()) and catalog:
|
629
|
+
raise ValueError(
|
630
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
631
|
+
)
|
632
|
+
catalog_obj = get_catalog(catalog)
|
633
|
+
return catalog_obj.impl.namespace_exists(
|
634
|
+
namespace,
|
635
|
+
*args,
|
636
|
+
transaction=transaction,
|
637
|
+
inner=catalog_obj.inner,
|
638
|
+
**kwargs,
|
639
|
+
)
|
244
640
|
|
245
641
|
|
246
642
|
def create_namespace(
|
247
643
|
namespace: str,
|
248
|
-
permissions: Dict[str, Any],
|
249
|
-
catalog: Optional[str] = None,
|
250
644
|
*args,
|
645
|
+
properties: Optional[NamespaceProperties] = None,
|
646
|
+
transaction: Optional[Transaction] = None,
|
647
|
+
catalog: Optional[str] = None,
|
251
648
|
**kwargs,
|
252
649
|
) -> Namespace:
|
253
|
-
"""
|
254
|
-
|
255
|
-
|
256
|
-
namespace
|
650
|
+
"""Create a new namespace.
|
651
|
+
|
652
|
+
Args:
|
653
|
+
namespace: Name of the namespace to create.
|
654
|
+
properties: Optional properties for the namespace.
|
655
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
656
|
+
|
657
|
+
Returns:
|
658
|
+
Created Namespace object.
|
659
|
+
|
660
|
+
Raises:
|
661
|
+
NamespaceAlreadyExistsError: If the namespace already exists.
|
662
|
+
"""
|
663
|
+
if (transaction or get_current_transaction()) and catalog:
|
664
|
+
raise ValueError(
|
665
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
666
|
+
)
|
667
|
+
catalog_obj = get_catalog(catalog)
|
668
|
+
return catalog_obj.impl.create_namespace(
|
669
|
+
namespace,
|
670
|
+
*args,
|
671
|
+
properties=properties,
|
672
|
+
transaction=transaction,
|
673
|
+
inner=catalog_obj.inner,
|
674
|
+
**kwargs,
|
257
675
|
)
|
258
676
|
|
259
677
|
|
260
678
|
def alter_namespace(
|
261
679
|
namespace: str,
|
262
|
-
catalog: Optional[str] = None,
|
263
|
-
permissions: Optional[Dict[str, Any]] = None,
|
264
|
-
new_namespace: Optional[str] = None,
|
265
680
|
*args,
|
681
|
+
properties: Optional[NamespaceProperties] = None,
|
682
|
+
new_namespace: Optional[str] = None,
|
683
|
+
transaction: Optional[Transaction] = None,
|
684
|
+
catalog: Optional[str] = None,
|
266
685
|
**kwargs,
|
267
686
|
) -> None:
|
268
|
-
"""Alter
|
269
|
-
|
270
|
-
|
687
|
+
"""Alter a namespace definition.
|
688
|
+
|
689
|
+
Args:
|
690
|
+
namespace: Name of the namespace to alter.
|
691
|
+
properties: Optional new properties for the namespace.
|
692
|
+
new_namespace: Optional new name for the namespace.
|
693
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
694
|
+
|
695
|
+
Returns:
|
696
|
+
None
|
697
|
+
"""
|
698
|
+
if (transaction or get_current_transaction()) and catalog:
|
699
|
+
raise ValueError(
|
700
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
701
|
+
)
|
702
|
+
catalog_obj = get_catalog(catalog)
|
703
|
+
catalog_obj.impl.alter_namespace(
|
704
|
+
namespace,
|
705
|
+
*args,
|
706
|
+
properties=properties,
|
707
|
+
new_namespace=new_namespace,
|
708
|
+
transaction=transaction,
|
709
|
+
inner=catalog_obj.inner,
|
710
|
+
**kwargs,
|
271
711
|
)
|
272
712
|
|
273
713
|
|
274
714
|
def drop_namespace(
|
275
|
-
namespace: str,
|
715
|
+
namespace: str,
|
716
|
+
*args,
|
717
|
+
purge: bool = False,
|
718
|
+
transaction: Optional[Transaction] = None,
|
719
|
+
catalog: Optional[str] = None,
|
720
|
+
**kwargs,
|
276
721
|
) -> None:
|
277
|
-
"""Drop
|
278
|
-
|
279
|
-
|
722
|
+
"""Drop a namespace and all of its tables from the catalog.
|
723
|
+
|
724
|
+
Args:
|
725
|
+
namespace: Name of the namespace to drop.
|
726
|
+
purge: If True, permanently delete all table data in the namespace.
|
727
|
+
If False, only removes the namespace from the catalog.
|
728
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
729
|
+
|
730
|
+
Returns:
|
731
|
+
None
|
732
|
+
"""
|
733
|
+
if (transaction or get_current_transaction()) and catalog:
|
734
|
+
raise ValueError(
|
735
|
+
"Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
|
736
|
+
)
|
737
|
+
catalog_obj = get_catalog(catalog)
|
738
|
+
catalog_obj.impl.drop_namespace(
|
739
|
+
namespace,
|
740
|
+
*args,
|
741
|
+
purge=purge,
|
742
|
+
transaction=transaction,
|
743
|
+
inner=catalog_obj.inner,
|
744
|
+
**kwargs,
|
745
|
+
)
|
280
746
|
|
281
747
|
|
282
|
-
def default_namespace(
|
283
|
-
|
284
|
-
|
748
|
+
def default_namespace(
|
749
|
+
*args,
|
750
|
+
catalog: Optional[str] = None,
|
751
|
+
**kwargs,
|
752
|
+
) -> str:
|
753
|
+
"""Return the default namespace for the catalog.
|
754
|
+
|
755
|
+
Returns:
|
756
|
+
Name of the default namespace.
|
757
|
+
"""
|
758
|
+
catalog_obj = get_catalog(catalog)
|
759
|
+
return catalog_obj.impl.default_namespace(*args, inner=catalog_obj.inner, **kwargs)
|