deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,248 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, Generic, TypeVar, Callable, Optional
|
3
|
+
from functools import singledispatchmethod
|
4
|
+
import re
|
5
|
+
|
6
|
+
from deltacat.storage.model.expression import (
|
7
|
+
Expression,
|
8
|
+
Reference,
|
9
|
+
Literal,
|
10
|
+
BinaryExpression,
|
11
|
+
UnaryExpression,
|
12
|
+
In,
|
13
|
+
Between,
|
14
|
+
Like,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
C = TypeVar("C") # Context type
|
19
|
+
R = TypeVar("R") # Return type
|
20
|
+
|
21
|
+
|
22
|
+
class ExpressionVisitor(ABC, Generic[C, R]):
|
23
|
+
"""
|
24
|
+
Visitor pattern for deltacat expressions.
|
25
|
+
|
26
|
+
This base class provides two ways to implement visitors:
|
27
|
+
1. Using a procedure dictionary (_PROCEDURES) - for simple, declarative visitors
|
28
|
+
2. Using specialized visit_xyz methods with snake_case naming - for more control
|
29
|
+
|
30
|
+
Subclasses need only implement visit_reference and visit_literal, plus either:
|
31
|
+
- Define _PROCEDURES dictionary with functions for handling different expression types
|
32
|
+
- Implement specific visit_xyz methods (using snake_case) for individual expressions
|
33
|
+
"""
|
34
|
+
|
35
|
+
# Default procedure dictionary for subclasses to override
|
36
|
+
_PROCEDURES: Dict[str, Callable] = {}
|
37
|
+
|
38
|
+
def __init__(self):
|
39
|
+
"""Initialize visitor and validate required methods."""
|
40
|
+
# Pre-check for required methods
|
41
|
+
if not hasattr(self, "visit_reference") or not callable(
|
42
|
+
getattr(self, "visit_reference")
|
43
|
+
):
|
44
|
+
raise NotImplementedError("Subclasses must implement visit_reference")
|
45
|
+
if not hasattr(self, "visit_literal") or not callable(
|
46
|
+
getattr(self, "visit_literal")
|
47
|
+
):
|
48
|
+
raise NotImplementedError("Subclasses must implement visit_literal")
|
49
|
+
self._setup_default_procedure_handlers()
|
50
|
+
|
51
|
+
def _to_snake_case(self, name: str) -> str:
|
52
|
+
"""Convert PascalCase or camelCase to snake_case."""
|
53
|
+
pattern = re.compile(r"(?<!^)(?=[A-Z])")
|
54
|
+
return pattern.sub("_", name).lower()
|
55
|
+
|
56
|
+
def _setup_default_procedure_handlers(self):
|
57
|
+
"""Set up default procedure application methods if not overridden."""
|
58
|
+
if not hasattr(self, "_apply_binary") or not callable(
|
59
|
+
getattr(self, "_apply_binary")
|
60
|
+
):
|
61
|
+
self._apply_binary = lambda proc, left, right: proc(left, right)
|
62
|
+
if not hasattr(self, "_apply_unary") or not callable(
|
63
|
+
getattr(self, "_apply_unary")
|
64
|
+
):
|
65
|
+
self._apply_unary = lambda proc, operand: proc(operand)
|
66
|
+
if not hasattr(self, "_apply_in") or not callable(getattr(self, "_apply_in")):
|
67
|
+
self._apply_in = lambda proc, value, values: proc(value, values)
|
68
|
+
if not hasattr(self, "_apply_between") or not callable(
|
69
|
+
getattr(self, "_apply_between")
|
70
|
+
):
|
71
|
+
self._apply_between = lambda proc, value, lower, upper: proc(
|
72
|
+
value, lower, upper
|
73
|
+
)
|
74
|
+
if not hasattr(self, "_apply_like") or not callable(
|
75
|
+
getattr(self, "_apply_like")
|
76
|
+
):
|
77
|
+
self._apply_like = lambda proc, value, pattern: proc(value, pattern)
|
78
|
+
|
79
|
+
@singledispatchmethod
|
80
|
+
def visit(self, expr: Expression, context: Optional[C] = None) -> R:
|
81
|
+
"""
|
82
|
+
Generic visit method that dispatches to specific methods based on expression type.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
expr: The expression to visit
|
86
|
+
context: Optional context to pass through the visitor
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
Result of visiting the expression
|
90
|
+
"""
|
91
|
+
expr_type = type(expr).__name__
|
92
|
+
raise NotImplementedError(f"No visit method for type {expr_type}")
|
93
|
+
|
94
|
+
@visit.register
|
95
|
+
def _visit_reference(self, expr: Reference, context: Optional[C] = None) -> R:
|
96
|
+
"""Visit a Reference expression."""
|
97
|
+
return self.visit_reference(expr, context)
|
98
|
+
|
99
|
+
@visit.register
|
100
|
+
def _visit_literal(self, expr: Literal, context: Optional[C] = None) -> R:
|
101
|
+
"""Visit a Literal expression."""
|
102
|
+
return self.visit_literal(expr, context)
|
103
|
+
|
104
|
+
@visit.register
|
105
|
+
def _visit_binary(self, expr: BinaryExpression, context: Optional[C] = None) -> R:
|
106
|
+
"""Visit a binary expression using method specialization or procedures."""
|
107
|
+
expr_type = type(expr).__name__
|
108
|
+
|
109
|
+
left_result = self.visit(expr.left, context)
|
110
|
+
right_result = self.visit(expr.right, context)
|
111
|
+
|
112
|
+
method_name = f"visit_{self._to_snake_case(expr_type)}"
|
113
|
+
if hasattr(self, method_name):
|
114
|
+
method = getattr(self, method_name)
|
115
|
+
return method(expr, context)
|
116
|
+
|
117
|
+
if expr_type in self._PROCEDURES:
|
118
|
+
return self._apply_binary(
|
119
|
+
self._PROCEDURES[expr_type], left_result, right_result
|
120
|
+
)
|
121
|
+
|
122
|
+
try:
|
123
|
+
return self.visit_binary_expression(
|
124
|
+
expr, left_result, right_result, context
|
125
|
+
)
|
126
|
+
except NotImplementedError:
|
127
|
+
raise NotImplementedError(f"No handler for {expr_type}")
|
128
|
+
|
129
|
+
@visit.register
|
130
|
+
def _visit_unary(self, expr: UnaryExpression, context: Optional[C] = None) -> R:
|
131
|
+
"""Visit a unary expression using method specialization or procedures."""
|
132
|
+
expr_type = type(expr).__name__
|
133
|
+
|
134
|
+
operand_result = self.visit(expr.operand, context)
|
135
|
+
|
136
|
+
method_name = f"visit_{self._to_snake_case(expr_type)}"
|
137
|
+
if hasattr(self, method_name):
|
138
|
+
method = getattr(self, method_name)
|
139
|
+
return method(expr, context)
|
140
|
+
|
141
|
+
if expr_type in self._PROCEDURES:
|
142
|
+
return self._apply_unary(self._PROCEDURES[expr_type], operand_result)
|
143
|
+
|
144
|
+
try:
|
145
|
+
return self.visit_unary_expression(expr, operand_result, context)
|
146
|
+
except NotImplementedError:
|
147
|
+
raise NotImplementedError(f"No handler for {expr_type}")
|
148
|
+
|
149
|
+
@visit.register
|
150
|
+
def _visit_in(self, expr: In, context: Optional[C] = None) -> R:
|
151
|
+
"""Visit an In expression."""
|
152
|
+
if hasattr(self, "visit_in"):
|
153
|
+
return self.visit_in(expr, context)
|
154
|
+
|
155
|
+
if "In" in self._PROCEDURES:
|
156
|
+
value_result = self.visit(expr.value, context)
|
157
|
+
values_results = [self.visit(v, context) for v in expr.values]
|
158
|
+
return self._apply_in(self._PROCEDURES["In"], value_result, values_results)
|
159
|
+
|
160
|
+
raise NotImplementedError("No handler for In expression")
|
161
|
+
|
162
|
+
@visit.register
|
163
|
+
def _visit_between(self, expr: Between, context: Optional[C] = None) -> R:
|
164
|
+
"""Visit a Between expression."""
|
165
|
+
if hasattr(self, "visit_between"):
|
166
|
+
return self.visit_between(expr, context)
|
167
|
+
|
168
|
+
if "Between" in self._PROCEDURES:
|
169
|
+
value_result = self.visit(expr.value, context)
|
170
|
+
lower_result = self.visit(expr.lower, context)
|
171
|
+
upper_result = self.visit(expr.upper, context)
|
172
|
+
return self._apply_between(
|
173
|
+
self._PROCEDURES["Between"], value_result, lower_result, upper_result
|
174
|
+
)
|
175
|
+
|
176
|
+
raise NotImplementedError("No handler for Between expression")
|
177
|
+
|
178
|
+
@visit.register
|
179
|
+
def _visit_like(self, expr: Like, context: Optional[C] = None) -> R:
|
180
|
+
"""Visit a Like expression."""
|
181
|
+
if hasattr(self, "visit_like"):
|
182
|
+
return self.visit_like(expr, context)
|
183
|
+
|
184
|
+
if "Like" in self._PROCEDURES:
|
185
|
+
value_result = self.visit(expr.value, context)
|
186
|
+
pattern_result = self.visit(expr.pattern, context)
|
187
|
+
return self._apply_like(
|
188
|
+
self._PROCEDURES["Like"], value_result, pattern_result
|
189
|
+
)
|
190
|
+
|
191
|
+
raise NotImplementedError("No handler for Like expression")
|
192
|
+
|
193
|
+
@abstractmethod
|
194
|
+
def visit_reference(self, expr: Reference, context: Optional[C] = None) -> R:
|
195
|
+
"""Visit a Reference expression."""
|
196
|
+
pass
|
197
|
+
|
198
|
+
@abstractmethod
|
199
|
+
def visit_literal(self, expr: Literal, context: Optional[C] = None) -> R:
|
200
|
+
"""Visit a Literal expression."""
|
201
|
+
pass
|
202
|
+
|
203
|
+
def visit_binary_expression(
|
204
|
+
self, expr: BinaryExpression, left: R, right: R, context: Optional[C] = None
|
205
|
+
) -> R:
|
206
|
+
"""Default fallback handler for binary expressions."""
|
207
|
+
raise NotImplementedError(f"No handler for {type(expr).__name__}")
|
208
|
+
|
209
|
+
def visit_unary_expression(
|
210
|
+
self, expr: UnaryExpression, operand: R, context: Optional[C] = None
|
211
|
+
) -> R:
|
212
|
+
"""Default fallback handler for unary expressions."""
|
213
|
+
raise NotImplementedError(f"No handler for {type(expr).__name__}")
|
214
|
+
|
215
|
+
|
216
|
+
class DisplayVisitor(ExpressionVisitor[Expression, str]):
|
217
|
+
"""
|
218
|
+
Visitor implementation that formats expressions in standard infix notation.
|
219
|
+
For example: "a = b AND c > d" instead of "(AND (= a b) (> c d))".
|
220
|
+
"""
|
221
|
+
|
222
|
+
# Map all expression types to their string formatting procedures with infix notation
|
223
|
+
_PROCEDURES = {
|
224
|
+
# Binary operations with infix notation
|
225
|
+
"Equal": lambda left, right: f"{left} = {right}",
|
226
|
+
"NotEqual": lambda left, right: f"{left} <> {right}",
|
227
|
+
"GreaterThan": lambda left, right: f"{left} > {right}",
|
228
|
+
"LessThan": lambda left, right: f"{left} < {right}",
|
229
|
+
"GreaterThanEqual": lambda left, right: f"{left} >= {right}",
|
230
|
+
"LessThanEqual": lambda left, right: f"{left} <= {right}",
|
231
|
+
"And": lambda left, right: f"({left} AND {right})",
|
232
|
+
"Or": lambda left, right: f"({left} OR {right})",
|
233
|
+
# Unary operations
|
234
|
+
"Not": lambda operand: f"NOT ({operand})",
|
235
|
+
"IsNull": lambda operand: f"({operand}) IS NULL",
|
236
|
+
# Special operations
|
237
|
+
"In": lambda value, values: f"{value} IN ({', '.join(values)})",
|
238
|
+
"Between": lambda value, lower, upper: f"{value} BETWEEN {lower} AND {upper}",
|
239
|
+
"Like": lambda value, pattern: f"{value} LIKE {pattern}",
|
240
|
+
}
|
241
|
+
|
242
|
+
def visit_reference(self, expr: Reference, context=None) -> str:
|
243
|
+
"""Format a field reference."""
|
244
|
+
return expr.field
|
245
|
+
|
246
|
+
def visit_literal(self, expr: Literal, context=None) -> str:
|
247
|
+
"""Format a literal value using its PyArrow representation."""
|
248
|
+
return str(expr.value)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Generic, Optional, TypeVar
|
3
|
+
|
4
|
+
T = TypeVar("T")
|
5
|
+
U = TypeVar("U")
|
6
|
+
|
7
|
+
|
8
|
+
class ModelMapper(ABC, Generic[T, U]):
|
9
|
+
@staticmethod
|
10
|
+
@abstractmethod
|
11
|
+
def map(obj: Optional[T], *args, **kwargs) -> Optional[U]:
|
12
|
+
pass
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
@abstractmethod
|
16
|
+
def unmap(obj: Optional[U], **kwargs) -> Optional[T]:
|
17
|
+
pass
|
18
|
+
|
19
|
+
|
20
|
+
class OneWayModelMapper(ABC, Generic[T, U]):
|
21
|
+
@staticmethod
|
22
|
+
@abstractmethod
|
23
|
+
def map(obj: Optional[T], **kwargs) -> Optional[U]:
|
24
|
+
pass
|
@@ -21,6 +21,14 @@ class ListResult(dict, Generic[T]):
|
|
21
21
|
list_result["nextPageProvider"] = next_page_provider
|
22
22
|
return list_result
|
23
23
|
|
24
|
+
@staticmethod
|
25
|
+
def empty() -> ListResult:
|
26
|
+
list_result = ListResult()
|
27
|
+
list_result["items"] = []
|
28
|
+
list_result["paginationKey"] = None
|
29
|
+
list_result["nextPageProvider"] = None
|
30
|
+
return list_result
|
31
|
+
|
24
32
|
def read_page(self) -> Optional[List[T]]:
|
25
33
|
return self.get("items")
|
26
34
|
|
@@ -1,32 +1,116 @@
|
|
1
|
+
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
from typing import Optional, List
|
5
|
+
|
1
6
|
from deltacat.utils.common import sha1_digest, sha1_hexdigest
|
2
7
|
|
8
|
+
DEFAULT_NAME_SEPARATOR = "|"
|
9
|
+
DEFAULT_PATH_SEPARATOR = "/"
|
10
|
+
|
11
|
+
|
12
|
+
class LocatorName:
|
13
|
+
"""
|
14
|
+
Assigns a name to a catalog object. All sibling catalog objects must be
|
15
|
+
assigned unique names (e.g., all namespaces in a catalog must be assigned
|
16
|
+
unique locator names, all tables under a namespace must be assigned unique
|
17
|
+
locator names, etc.). Names may be mutable (e.g., namespace and table names)
|
18
|
+
or immutable (e.g., partition/stream IDs and delta stream positions). Names
|
19
|
+
may be single or multi-part.
|
20
|
+
"""
|
21
|
+
|
22
|
+
@property
|
23
|
+
def immutable_id(self) -> Optional[str]:
|
24
|
+
"""
|
25
|
+
If this locator name is immutable (i.e., if the object it refers to
|
26
|
+
can't be renamed) then returns an immutable ID suitable for use in
|
27
|
+
URLS or filesystem paths. Returns None if this locator name is mutable
|
28
|
+
(i.e., if the object it refers to can be renamed).
|
29
|
+
"""
|
30
|
+
raise NotImplementedError()
|
31
|
+
|
32
|
+
@immutable_id.setter
|
33
|
+
def immutable_id(self, immutable_id: Optional[str]) -> None:
|
34
|
+
"""
|
35
|
+
If this locator name is immutable (i.e., if the object it refers to
|
36
|
+
can't be renamed), then sets an immutable ID for this
|
37
|
+
locator name suitable for use in URLS or filesystem paths. Note that
|
38
|
+
the ID is only considered immutable in durable catalog storage, and
|
39
|
+
remains mutable in transient memory (i.e., this setter remains
|
40
|
+
functional regardless of whether an ID is already assigned, but each
|
41
|
+
update causes it to refer to a new, distinct object in durable storage).
|
42
|
+
"""
|
43
|
+
raise NotImplementedError()
|
44
|
+
|
45
|
+
def parts(self) -> List[str]:
|
46
|
+
"""
|
47
|
+
Returns the ordered parts of this locator's name.
|
48
|
+
"""
|
49
|
+
raise NotImplementedError()
|
50
|
+
|
51
|
+
def join(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
|
52
|
+
"""
|
53
|
+
Returns this locator name as a string by joining its parts with the
|
54
|
+
given separator.
|
55
|
+
"""
|
56
|
+
return separator.join(self.parts())
|
57
|
+
|
58
|
+
def exists(self) -> bool:
|
59
|
+
"""
|
60
|
+
Returns True if this locator name is defined, False otherwise.
|
61
|
+
"""
|
62
|
+
return self.immutable_id or all(self.parts())
|
63
|
+
|
3
64
|
|
4
65
|
class Locator:
|
5
|
-
|
66
|
+
"""
|
67
|
+
Creates a globally unique reference to any named catalog object. Locators
|
68
|
+
are composed of the name of the referenced catalog object and its parent
|
69
|
+
Locator (if any). Every Locator has a canonical string representation that
|
70
|
+
can be used for global equality checks. Cryptographic digests of this
|
71
|
+
canonical string can be used for uniform random hash distribution and
|
72
|
+
path-based references to the underlying catalog object in filesystems or
|
73
|
+
URLs.
|
74
|
+
"""
|
75
|
+
|
76
|
+
@property
|
77
|
+
def name(self) -> LocatorName:
|
6
78
|
"""
|
7
|
-
Returns
|
8
|
-
for equality checks (i.e. two locators are equal if they have
|
9
|
-
the same canonical string).
|
79
|
+
Returns the name of this locator.
|
10
80
|
"""
|
11
81
|
raise NotImplementedError()
|
12
82
|
|
83
|
+
@property
|
84
|
+
def parent(self) -> Optional[Locator]:
|
85
|
+
"""
|
86
|
+
Returns the parent of this locator, if any.
|
87
|
+
"""
|
88
|
+
raise NotImplementedError()
|
89
|
+
|
90
|
+
def canonical_string(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
|
91
|
+
"""
|
92
|
+
Returns a unique string for the given locator that can be used
|
93
|
+
for equality checks between objects with the same parent.
|
94
|
+
"""
|
95
|
+
return separator.join([str(part) for part in self.name.parts()])
|
96
|
+
|
13
97
|
def digest(self) -> bytes:
|
14
98
|
"""
|
15
99
|
Return a digest of the given locator that can be used for
|
16
|
-
equality checks
|
17
|
-
|
100
|
+
equality checks between objects with the same parent and uniform
|
101
|
+
random hash distribution.
|
18
102
|
"""
|
19
103
|
return sha1_digest(self.canonical_string().encode("utf-8"))
|
20
104
|
|
21
105
|
def hexdigest(self) -> str:
|
22
106
|
"""
|
23
107
|
Returns a hexdigest of the given locator suitable
|
24
|
-
|
25
|
-
|
108
|
+
equality checks between objects with the same parent and
|
109
|
+
inclusion in URLs.
|
26
110
|
"""
|
27
111
|
return sha1_hexdigest(self.canonical_string().encode("utf-8"))
|
28
112
|
|
29
|
-
def path(self, root: str, separator: str =
|
113
|
+
def path(self, root: str, separator: str = DEFAULT_PATH_SEPARATOR) -> str:
|
30
114
|
"""
|
31
115
|
Returns a path for the locator of the form: "{root}/{hexdigest}", where
|
32
116
|
the default path separator of "/" may optionally be overridden with
|