deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
deltacat/storage/model/delta.py
CHANGED
@@ -1,36 +1,55 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
+
import json
|
5
|
+
import posixpath
|
4
6
|
from typing import Any, Dict, List, Optional
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
from deltacat.storage.model.
|
8
|
+
import pyarrow
|
9
|
+
|
10
|
+
from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
|
11
|
+
from deltacat.constants import TXN_DIR_NAME
|
12
|
+
from deltacat.storage.model.manifest import (
|
13
|
+
Manifest,
|
14
|
+
ManifestMeta,
|
15
|
+
ManifestAuthor,
|
16
|
+
)
|
17
|
+
from deltacat.storage.model.locator import (
|
18
|
+
Locator,
|
19
|
+
LocatorName,
|
20
|
+
)
|
9
21
|
from deltacat.storage.model.namespace import NamespaceLocator
|
10
|
-
from deltacat.storage.model.partition import
|
22
|
+
from deltacat.storage.model.partition import (
|
23
|
+
PartitionLocator,
|
24
|
+
PartitionValues,
|
25
|
+
)
|
11
26
|
from deltacat.storage.model.stream import StreamLocator
|
12
|
-
from deltacat.storage.model.table import
|
27
|
+
from deltacat.storage.model.table import (
|
28
|
+
TableLocator,
|
29
|
+
Table,
|
30
|
+
)
|
13
31
|
from deltacat.storage.model.table_version import TableVersionLocator
|
14
|
-
from deltacat.storage.model.types import
|
15
|
-
|
32
|
+
from deltacat.storage.model.types import (
|
33
|
+
DeltaType,
|
34
|
+
StreamFormat,
|
35
|
+
)
|
16
36
|
|
37
|
+
DeltaProperties = Dict[str, Any]
|
17
38
|
|
18
|
-
|
39
|
+
|
40
|
+
class Delta(Metafile):
|
19
41
|
@staticmethod
|
20
42
|
def of(
|
21
43
|
locator: Optional[DeltaLocator],
|
22
44
|
delta_type: Optional[DeltaType],
|
23
45
|
meta: Optional[ManifestMeta],
|
24
|
-
properties: Optional[
|
46
|
+
properties: Optional[DeltaProperties],
|
25
47
|
manifest: Optional[Manifest],
|
26
48
|
previous_stream_position: Optional[int] = None,
|
27
|
-
delete_parameters: Optional[DeleteParameters] = None,
|
28
|
-
partition_spec: Optional[DeltaPartitionSpec] = None,
|
29
49
|
) -> Delta:
|
30
50
|
"""
|
31
51
|
Creates a Delta metadata model with the given Delta Locator, Delta Type,
|
32
|
-
manifest metadata, properties, manifest, and previous delta stream
|
33
|
-
position.
|
52
|
+
manifest metadata, properties, manifest, and previous delta stream position.
|
34
53
|
"""
|
35
54
|
delta = Delta()
|
36
55
|
delta.locator = locator
|
@@ -39,8 +58,6 @@ class Delta(dict):
|
|
39
58
|
delta.properties = properties
|
40
59
|
delta.manifest = manifest
|
41
60
|
delta.previous_stream_position = previous_stream_position
|
42
|
-
delta.delete_parameters = delete_parameters
|
43
|
-
delta.partition_spec = partition_spec
|
44
61
|
return delta
|
45
62
|
|
46
63
|
@staticmethod
|
@@ -48,7 +65,7 @@ class Delta(dict):
|
|
48
65
|
deltas: List[Delta],
|
49
66
|
manifest_author: Optional[ManifestAuthor] = None,
|
50
67
|
stream_position: Optional[int] = None,
|
51
|
-
properties: Optional[
|
68
|
+
properties: Optional[DeltaProperties] = None,
|
52
69
|
) -> Delta:
|
53
70
|
"""
|
54
71
|
Merges the input list of deltas into a single delta. All input deltas to
|
@@ -93,25 +110,10 @@ class Delta(dict):
|
|
93
110
|
f"Deltas to merge must all share the same delta type "
|
94
111
|
f"(found {len(distinct_delta_types)} delta types)."
|
95
112
|
)
|
96
|
-
distinct_partition_spec = set([d.partition_spec for d in deltas])
|
97
|
-
if len(distinct_partition_spec) > 1:
|
98
|
-
raise ValueError(
|
99
|
-
f"Deltas to merge must all share the same partition spec "
|
100
|
-
f"(found {len(distinct_partition_spec)} partition specs)."
|
101
|
-
)
|
102
113
|
merged_manifest = Manifest.merge_manifests(
|
103
114
|
manifests,
|
104
115
|
manifest_author,
|
105
116
|
)
|
106
|
-
distinct_delta_type = list(distinct_delta_types)[0]
|
107
|
-
merged_delete_parameters = None
|
108
|
-
if distinct_delta_type is DeltaType.DELETE:
|
109
|
-
delete_parameters: List[DeleteParameters] = [
|
110
|
-
d.delete_parameters for d in deltas if d.delete_parameters
|
111
|
-
]
|
112
|
-
merged_delete_parameters: Optional[
|
113
|
-
DeleteParameters
|
114
|
-
] = DeleteParameters.merge_delete_parameters(delete_parameters)
|
115
117
|
partition_locator = deltas[0].partition_locator
|
116
118
|
prev_positions = [d.previous_stream_position for d in deltas]
|
117
119
|
prev_position = None if None in prev_positions else max(prev_positions)
|
@@ -122,7 +124,6 @@ class Delta(dict):
|
|
122
124
|
properties,
|
123
125
|
merged_manifest,
|
124
126
|
prev_position,
|
125
|
-
merged_delete_parameters,
|
126
127
|
)
|
127
128
|
|
128
129
|
@property
|
@@ -148,11 +149,11 @@ class Delta(dict):
|
|
148
149
|
self["meta"] = meta
|
149
150
|
|
150
151
|
@property
|
151
|
-
def properties(self) -> Optional[
|
152
|
+
def properties(self) -> Optional[DeltaProperties]:
|
152
153
|
return self.get("properties")
|
153
154
|
|
154
155
|
@properties.setter
|
155
|
-
def properties(self, properties: Optional[
|
156
|
+
def properties(self, properties: Optional[DeltaProperties]) -> None:
|
156
157
|
self["properties"] = properties
|
157
158
|
|
158
159
|
@property
|
@@ -222,7 +223,7 @@ class Delta(dict):
|
|
222
223
|
def storage_type(self) -> Optional[str]:
|
223
224
|
delta_locator = self.locator
|
224
225
|
if delta_locator:
|
225
|
-
return delta_locator.
|
226
|
+
return delta_locator.stream_format
|
226
227
|
return None
|
227
228
|
|
228
229
|
@property
|
@@ -253,6 +254,13 @@ class Delta(dict):
|
|
253
254
|
return delta_locator.stream_id
|
254
255
|
return None
|
255
256
|
|
257
|
+
@property
|
258
|
+
def stream_format(self) -> Optional[str]:
|
259
|
+
delta_locator = self.locator
|
260
|
+
if delta_locator:
|
261
|
+
return delta_locator.stream_format
|
262
|
+
return None
|
263
|
+
|
256
264
|
@property
|
257
265
|
def partition_id(self) -> Optional[str]:
|
258
266
|
delta_locator = self.locator
|
@@ -267,6 +275,13 @@ class Delta(dict):
|
|
267
275
|
return delta_locator.partition_values
|
268
276
|
return None
|
269
277
|
|
278
|
+
@property
|
279
|
+
def partition_values_json(self) -> Optional[str]:
|
280
|
+
partition_values = (
|
281
|
+
self.partition_values if self.partition_values is not None else None
|
282
|
+
)
|
283
|
+
return json.dumps(partition_values)
|
284
|
+
|
270
285
|
@property
|
271
286
|
def stream_position(self) -> Optional[int]:
|
272
287
|
delta_locator = self.locator
|
@@ -274,27 +289,71 @@ class Delta(dict):
|
|
274
289
|
return delta_locator.stream_position
|
275
290
|
return None
|
276
291
|
|
277
|
-
|
278
|
-
def delete_parameters(self) -> Optional[DeleteParameters]:
|
279
|
-
delete_parameters = self.get("delete_parameters")
|
292
|
+
def url(self, catalog_name: Optional[str] = None) -> str:
|
280
293
|
return (
|
281
|
-
|
294
|
+
f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{self.partition_values_json}/{self.stream_position}/"
|
295
|
+
if catalog_name
|
296
|
+
else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{self.partition_values_json}/{self.stream_position}/"
|
282
297
|
)
|
283
298
|
|
284
|
-
|
285
|
-
|
286
|
-
|
299
|
+
def to_serializable(self) -> Delta:
|
300
|
+
serializable = self
|
301
|
+
if serializable.table_locator:
|
302
|
+
serializable: Delta = Delta.update_for(self)
|
303
|
+
# remove the mutable table locator
|
304
|
+
serializable.table_version_locator.table_locator = TableLocator.at(
|
305
|
+
namespace=self.id,
|
306
|
+
table_name=self.id,
|
307
|
+
)
|
308
|
+
return serializable
|
309
|
+
|
310
|
+
def from_serializable(
|
311
|
+
self,
|
312
|
+
path: str,
|
313
|
+
filesystem: Optional[pyarrow.fs.FileSystem] = None,
|
314
|
+
) -> Delta:
|
315
|
+
# TODO(pdames): Lazily restore table locator on 1st property get.
|
316
|
+
# Cache Metafile ID <-> Table/Namespace-Name map at Catalog Init, then
|
317
|
+
# swap only Metafile IDs with Names here.
|
318
|
+
if self.table_locator and self.table_locator.table_name == self.id:
|
319
|
+
parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
|
320
|
+
base_metafile_path=path,
|
321
|
+
parent_number=4,
|
322
|
+
)
|
323
|
+
txn_log_dir = posixpath.join(
|
324
|
+
posixpath.dirname(
|
325
|
+
posixpath.dirname(
|
326
|
+
posixpath.dirname(parent_rev_dir_path),
|
327
|
+
)
|
328
|
+
),
|
329
|
+
TXN_DIR_NAME,
|
330
|
+
)
|
331
|
+
table = Table.read(
|
332
|
+
MetafileRevisionInfo.latest_revision(
|
333
|
+
revision_dir_path=parent_rev_dir_path,
|
334
|
+
filesystem=filesystem,
|
335
|
+
success_txn_log_dir=txn_log_dir,
|
336
|
+
).path,
|
337
|
+
filesystem,
|
338
|
+
)
|
339
|
+
self.table_version_locator.table_locator = table.locator
|
340
|
+
return self
|
341
|
+
|
342
|
+
|
343
|
+
class DeltaLocatorName(LocatorName):
|
344
|
+
def __init__(self, locator: DeltaLocator):
|
345
|
+
self.locator = locator
|
287
346
|
|
288
347
|
@property
|
289
|
-
def
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
348
|
+
def immutable_id(self) -> Optional[str]:
|
349
|
+
return str(self.locator.stream_position)
|
350
|
+
|
351
|
+
@immutable_id.setter
|
352
|
+
def immutable_id(self, immutable_id: Optional[str]):
|
353
|
+
self.locator.stream_position = int(immutable_id)
|
294
354
|
|
295
|
-
|
296
|
-
|
297
|
-
self["partitionSpec"] = value
|
355
|
+
def parts(self) -> List[str]:
|
356
|
+
return [str(self.locator.stream_position)]
|
298
357
|
|
299
358
|
|
300
359
|
class DeltaLocator(Locator, dict):
|
@@ -318,25 +377,47 @@ class DeltaLocator(Locator, dict):
|
|
318
377
|
table_name: Optional[str],
|
319
378
|
table_version: Optional[str],
|
320
379
|
stream_id: Optional[str],
|
321
|
-
|
380
|
+
stream_format: Optional[StreamFormat],
|
322
381
|
partition_values: Optional[PartitionValues],
|
323
382
|
partition_id: Optional[str],
|
324
383
|
stream_position: Optional[int],
|
325
384
|
) -> DeltaLocator:
|
326
|
-
partition_locator =
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
385
|
+
partition_locator = (
|
386
|
+
PartitionLocator.at(
|
387
|
+
namespace,
|
388
|
+
table_name,
|
389
|
+
table_version,
|
390
|
+
stream_id,
|
391
|
+
stream_format,
|
392
|
+
partition_values,
|
393
|
+
partition_id,
|
394
|
+
)
|
395
|
+
if any(
|
396
|
+
[
|
397
|
+
partition_id,
|
398
|
+
partition_values,
|
399
|
+
stream_id,
|
400
|
+
stream_format,
|
401
|
+
table_name,
|
402
|
+
table_version,
|
403
|
+
namespace,
|
404
|
+
]
|
405
|
+
)
|
406
|
+
else None
|
334
407
|
)
|
335
408
|
return DeltaLocator.of(
|
336
409
|
partition_locator,
|
337
410
|
stream_position,
|
338
411
|
)
|
339
412
|
|
413
|
+
@property
|
414
|
+
def name(self):
|
415
|
+
return DeltaLocatorName(self)
|
416
|
+
|
417
|
+
@property
|
418
|
+
def parent(self) -> Optional[PartitionLocator]:
|
419
|
+
return self.partition_locator
|
420
|
+
|
340
421
|
@property
|
341
422
|
def partition_locator(self) -> Optional[PartitionLocator]:
|
342
423
|
val: Dict[str, Any] = self.get("partitionLocator")
|
@@ -406,10 +487,10 @@ class DeltaLocator(Locator, dict):
|
|
406
487
|
return None
|
407
488
|
|
408
489
|
@property
|
409
|
-
def
|
490
|
+
def stream_format(self) -> Optional[str]:
|
410
491
|
partition_locator = self.partition_locator
|
411
492
|
if partition_locator:
|
412
|
-
return partition_locator.
|
493
|
+
return partition_locator.stream_format
|
413
494
|
return None
|
414
495
|
|
415
496
|
@property
|
@@ -432,13 +513,3 @@ class DeltaLocator(Locator, dict):
|
|
432
513
|
if partition_locator:
|
433
514
|
return partition_locator.table_version
|
434
515
|
return None
|
435
|
-
|
436
|
-
def canonical_string(self) -> str:
|
437
|
-
"""
|
438
|
-
Returns a unique string for the given locator that can be used
|
439
|
-
for equality checks (i.e. two locators are equal if they have
|
440
|
-
the same canonical string).
|
441
|
-
"""
|
442
|
-
pl_hexdigest = self.partition_locator.hexdigest()
|
443
|
-
stream_position = self.stream_position
|
444
|
-
return f"{pl_hexdigest}|{stream_position}"
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from deltacat.storage.model.expression.expression import (
|
2
|
+
Expression,
|
3
|
+
UnaryExpression,
|
4
|
+
BinaryExpression,
|
5
|
+
BooleanExpression,
|
6
|
+
Reference,
|
7
|
+
Literal,
|
8
|
+
Equal,
|
9
|
+
NotEqual,
|
10
|
+
GreaterThan,
|
11
|
+
LessThan,
|
12
|
+
GreaterThanEqual,
|
13
|
+
LessThanEqual,
|
14
|
+
And,
|
15
|
+
Or,
|
16
|
+
Not,
|
17
|
+
In,
|
18
|
+
Between,
|
19
|
+
Like,
|
20
|
+
IsNull,
|
21
|
+
)
|
22
|
+
|
23
|
+
from deltacat.storage.model.expression.visitor import ExpressionVisitor, DisplayVisitor
|
24
|
+
|
25
|
+
__all__ = [
|
26
|
+
"Expression",
|
27
|
+
"UnaryExpression",
|
28
|
+
"BinaryExpression",
|
29
|
+
"BooleanExpression",
|
30
|
+
"Reference",
|
31
|
+
"Literal",
|
32
|
+
"Equal",
|
33
|
+
"NotEqual",
|
34
|
+
"GreaterThan",
|
35
|
+
"LessThan",
|
36
|
+
"GreaterThanEqual",
|
37
|
+
"LessThanEqual",
|
38
|
+
"And",
|
39
|
+
"Or",
|
40
|
+
"Not",
|
41
|
+
"In",
|
42
|
+
"Between",
|
43
|
+
"Like",
|
44
|
+
"IsNull",
|
45
|
+
"ExpressionVisitor",
|
46
|
+
"DisplayVisitor",
|
47
|
+
]
|