deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
File without changes
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest.mock import MagicMock
|
3
|
+
|
4
|
+
from deltacat import TableDefinition
|
5
|
+
|
6
|
+
|
7
|
+
class TestTableDefinition(unittest.TestCase):
|
8
|
+
def test_create_scan_plan_not_initialized(self):
|
9
|
+
mock_table = MagicMock()
|
10
|
+
mock_table.table_name = "mock_table_name"
|
11
|
+
mock_table.namespace = "mock_namespace"
|
12
|
+
|
13
|
+
table_definition = TableDefinition({"table": mock_table})
|
14
|
+
with self.assertRaises(RuntimeError) as context:
|
15
|
+
table_definition.create_scan_plan()
|
16
|
+
self.assertIn("ScanPlanner is not initialized", str(context.exception))
|
@@ -0,0 +1,321 @@
|
|
1
|
+
import pytest
|
2
|
+
import tempfile
|
3
|
+
import shutil
|
4
|
+
import uuid
|
5
|
+
from unittest import mock
|
6
|
+
import os
|
7
|
+
|
8
|
+
from deltacat.catalog import (
|
9
|
+
CatalogProperties,
|
10
|
+
Catalog,
|
11
|
+
clear_catalogs,
|
12
|
+
get_catalog,
|
13
|
+
init,
|
14
|
+
init_local,
|
15
|
+
is_initialized,
|
16
|
+
put_catalog,
|
17
|
+
)
|
18
|
+
from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
|
19
|
+
from pyiceberg.catalog import Catalog as PyIcebergCatalog
|
20
|
+
|
21
|
+
from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
|
22
|
+
|
23
|
+
from pyiceberg.catalog import CatalogType
|
24
|
+
|
25
|
+
|
26
|
+
# Test module to mock a catalog implementation
|
27
|
+
class MockCatalogImpl:
|
28
|
+
@staticmethod
|
29
|
+
def initialize(config, *args, **kwargs):
|
30
|
+
# Return some state that the catalog would normally maintain
|
31
|
+
return {
|
32
|
+
"initialized": True,
|
33
|
+
"config": config,
|
34
|
+
"args": args,
|
35
|
+
"kwargs": kwargs,
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
@pytest.fixture(scope="function")
|
40
|
+
def reset_catalogs():
|
41
|
+
clear_catalogs()
|
42
|
+
|
43
|
+
|
44
|
+
class TestCatalog:
|
45
|
+
"""Tests for the Catalog class itself, without Ray initialization."""
|
46
|
+
|
47
|
+
def test_catalog_constructor(self):
|
48
|
+
"""Test that the Catalog constructor correctly initializes with the given implementation."""
|
49
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
50
|
+
|
51
|
+
assert catalog.impl == MockCatalogImpl
|
52
|
+
|
53
|
+
# Check that inner state was correctly initialized
|
54
|
+
# This just asserts that kwargs were plumbed through from Catalog constructor
|
55
|
+
assert catalog.inner["initialized"]
|
56
|
+
assert catalog.inner["config"] is None
|
57
|
+
assert catalog.inner["args"] == ()
|
58
|
+
assert catalog.inner["kwargs"] == {}
|
59
|
+
|
60
|
+
def test_iceberg_factory_method(self):
|
61
|
+
"""Test the iceberg factory method correctly creates an Iceberg catalog."""
|
62
|
+
# Create a mock for the Iceberg catalog module
|
63
|
+
with mock.patch(
|
64
|
+
"deltacat.experimental.catalog.iceberg.impl.IcebergCatalog"
|
65
|
+
) as mock_iceberg_catalog:
|
66
|
+
# Configure the mock to return a known value when initialize is called
|
67
|
+
mock_iceberg_catalog.initialize.return_value = {"iceberg": True}
|
68
|
+
|
69
|
+
# Create an Iceberg catalog config and invoke iceberg factory method
|
70
|
+
config = IcebergCatalogConfig(type=CatalogType.IN_MEMORY, properties={})
|
71
|
+
catalog = IcebergCatalog.from_config(config)
|
72
|
+
|
73
|
+
# Check that the implementation is set to iceberg_catalog
|
74
|
+
assert catalog.impl == mock_iceberg_catalog
|
75
|
+
# Check that the inner state is set to the output of initialize
|
76
|
+
assert catalog.inner == {"iceberg": True}
|
77
|
+
|
78
|
+
|
79
|
+
class TestCatalogsIntegration:
|
80
|
+
"""Integration tests for Default catalog functionality."""
|
81
|
+
|
82
|
+
temp_dir = None
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def setup_class(cls):
|
86
|
+
cls.temp_dir = tempfile.mkdtemp()
|
87
|
+
# Other tests are going to have initialized ray catalog. Initialize here to ensure
|
88
|
+
# that when this test class is run individuall it mimicks running with other tests
|
89
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
90
|
+
init(
|
91
|
+
catalog,
|
92
|
+
force=True,
|
93
|
+
)
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def teardown_class(cls):
|
97
|
+
if cls.temp_dir and os.path.exists(cls.temp_dir):
|
98
|
+
shutil.rmtree(cls.temp_dir)
|
99
|
+
|
100
|
+
def test_init_single_catalog(self, reset_catalogs):
|
101
|
+
"""Test initializing a single catalog."""
|
102
|
+
|
103
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
104
|
+
|
105
|
+
# Initialize with a single catalog and Ray init args including the namespace
|
106
|
+
init(catalog, force=True)
|
107
|
+
|
108
|
+
assert is_initialized()
|
109
|
+
|
110
|
+
# Get the default catalog and check it's the same one we initialized with
|
111
|
+
retrieved_catalog = get_catalog()
|
112
|
+
assert retrieved_catalog.impl == MockCatalogImpl
|
113
|
+
assert retrieved_catalog.inner["initialized"]
|
114
|
+
|
115
|
+
def test_init_multiple_catalogs(self, reset_catalogs):
|
116
|
+
"""Test initializing multiple catalogs."""
|
117
|
+
# Create catalogs
|
118
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
119
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
120
|
+
|
121
|
+
# Initialize with multiple catalogs and Ray init args including the namespace
|
122
|
+
catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
|
123
|
+
init(catalogs_dict, force=True)
|
124
|
+
|
125
|
+
assert is_initialized()
|
126
|
+
|
127
|
+
# Get catalogs by name and check they're the same ones we initialized with
|
128
|
+
retrieved_catalog1 = get_catalog("catalog1")
|
129
|
+
assert retrieved_catalog1.impl == MockCatalogImpl
|
130
|
+
assert retrieved_catalog1.inner["kwargs"]["id"] == 1
|
131
|
+
|
132
|
+
retrieved_catalog2 = get_catalog("catalog2")
|
133
|
+
assert retrieved_catalog2.impl == MockCatalogImpl
|
134
|
+
assert retrieved_catalog2.inner["kwargs"]["id"] == 2
|
135
|
+
|
136
|
+
def test_init_with_default_catalog_name(self, reset_catalogs):
|
137
|
+
"""Test initializing with a specified default catalog name."""
|
138
|
+
# Create catalogs
|
139
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
140
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
141
|
+
|
142
|
+
# Initialize with multiple catalogs and specify a default
|
143
|
+
catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
|
144
|
+
init(
|
145
|
+
catalogs_dict,
|
146
|
+
default="catalog2",
|
147
|
+
force=True,
|
148
|
+
)
|
149
|
+
|
150
|
+
# Get the default catalog and check it's catalog2
|
151
|
+
default_catalog = get_catalog()
|
152
|
+
assert default_catalog.impl == MockCatalogImpl
|
153
|
+
assert default_catalog.inner["kwargs"]["id"] == 2
|
154
|
+
|
155
|
+
def test_put_catalog(self, reset_catalogs):
|
156
|
+
"""Test adding a catalog after initialization."""
|
157
|
+
# Initialize with a single catalog
|
158
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
159
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
160
|
+
init({"catalog1": catalog1}, force=True)
|
161
|
+
|
162
|
+
# Add a second catalog
|
163
|
+
put_catalog("catalog2", catalog2)
|
164
|
+
|
165
|
+
# Check both catalogs are available
|
166
|
+
retrieved_catalog1 = get_catalog("catalog1")
|
167
|
+
assert retrieved_catalog1.inner["kwargs"]["id"] == 1
|
168
|
+
|
169
|
+
retrieved_catalog2 = get_catalog("catalog2")
|
170
|
+
assert retrieved_catalog2.inner["kwargs"]["id"] == 2
|
171
|
+
|
172
|
+
def test_put_catalog_that_already_exists(self, reset_catalogs):
|
173
|
+
catalog = Catalog(impl=MockCatalogImpl, id=1)
|
174
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
175
|
+
put_catalog(
|
176
|
+
"test_catalog",
|
177
|
+
catalog,
|
178
|
+
id=1,
|
179
|
+
)
|
180
|
+
|
181
|
+
# Try to add another catalog with the same name. Should not error
|
182
|
+
put_catalog(
|
183
|
+
"test_catalog",
|
184
|
+
catalog2,
|
185
|
+
)
|
186
|
+
|
187
|
+
retrieved_catalog = get_catalog("test_catalog")
|
188
|
+
assert retrieved_catalog.inner["kwargs"]["id"] == 2
|
189
|
+
|
190
|
+
# If fail_if_exists, put call should fail
|
191
|
+
with pytest.raises(ValueError):
|
192
|
+
put_catalog(
|
193
|
+
"test_catalog",
|
194
|
+
catalog,
|
195
|
+
fail_if_exists=True,
|
196
|
+
)
|
197
|
+
|
198
|
+
def test_get_catalog_nonexistent(self, reset_catalogs):
|
199
|
+
"""Test that trying to get a nonexistent catalog raises an error."""
|
200
|
+
# Initialize with a catalog
|
201
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
202
|
+
init({"test_catalog": catalog}, force=True)
|
203
|
+
|
204
|
+
# Try to get a nonexistent catalog
|
205
|
+
with pytest.raises(ValueError):
|
206
|
+
get_catalog("nonexistent")
|
207
|
+
|
208
|
+
def test_get_catalog_no_default(self, reset_catalogs):
|
209
|
+
"""Test that trying to get the default catalog when none is set raises an error."""
|
210
|
+
# Initialize with multiple catalogs but no default
|
211
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
212
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
213
|
+
init({"catalog1": catalog1, "catalog2": catalog2}, force=True)
|
214
|
+
|
215
|
+
# Try to get the default catalog
|
216
|
+
with pytest.raises(ValueError):
|
217
|
+
get_catalog()
|
218
|
+
|
219
|
+
def test_init_local(self, reset_catalogs):
|
220
|
+
"""Test that init_local() creates a default local catalog."""
|
221
|
+
# Initialize with default local catalog
|
222
|
+
init_local(force=True)
|
223
|
+
|
224
|
+
assert is_initialized()
|
225
|
+
|
226
|
+
# Should be able to get the default catalog
|
227
|
+
default_catalog = get_catalog()
|
228
|
+
assert default_catalog is not None
|
229
|
+
|
230
|
+
# The default catalog should be accessible by name "default"
|
231
|
+
named_catalog = get_catalog("default")
|
232
|
+
assert named_catalog is not None
|
233
|
+
assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
234
|
+
|
235
|
+
def test_init_local_with_path(self, reset_catalogs):
|
236
|
+
"""Test that init_local(path) creates a default local catalog with specified path."""
|
237
|
+
# Create a temporary directory for the test
|
238
|
+
custom_path = tempfile.mkdtemp()
|
239
|
+
|
240
|
+
try:
|
241
|
+
# Initialize with custom path
|
242
|
+
init_local(path=custom_path, force=True)
|
243
|
+
|
244
|
+
assert is_initialized()
|
245
|
+
|
246
|
+
# Should be able to get the default catalog
|
247
|
+
default_catalog = get_catalog()
|
248
|
+
assert default_catalog is not None
|
249
|
+
|
250
|
+
# The default catalog should be accessible by name "default"
|
251
|
+
named_catalog = get_catalog("default")
|
252
|
+
assert named_catalog is not None
|
253
|
+
assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
254
|
+
|
255
|
+
# Verify the catalog is using the custom path
|
256
|
+
catalog_properties = named_catalog.inner
|
257
|
+
assert catalog_properties.root == custom_path
|
258
|
+
|
259
|
+
finally:
|
260
|
+
# Clean up the temporary directory
|
261
|
+
if os.path.exists(custom_path):
|
262
|
+
shutil.rmtree(custom_path)
|
263
|
+
|
264
|
+
def test_default_catalog_initialization(self, reset_catalogs):
|
265
|
+
"""Test that a Default catalog can be initialized and accessed using the factory method."""
|
266
|
+
from deltacat.catalog.model.properties import CatalogProperties
|
267
|
+
|
268
|
+
catalog_name = str(uuid.uuid4())
|
269
|
+
|
270
|
+
# Create the catalog properties
|
271
|
+
config = CatalogProperties(root=self.temp_dir)
|
272
|
+
|
273
|
+
# Create the catalog
|
274
|
+
catalog = Catalog(config)
|
275
|
+
|
276
|
+
# Initialize DeltaCAT with this catalog
|
277
|
+
init({catalog_name: catalog}, force=True)
|
278
|
+
|
279
|
+
# Retrieve the catalog and verify it's the same one
|
280
|
+
retrieved_catalog = get_catalog(catalog_name)
|
281
|
+
assert retrieved_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
282
|
+
assert isinstance(retrieved_catalog.inner, CatalogProperties)
|
283
|
+
assert retrieved_catalog.inner.root == self.temp_dir
|
284
|
+
|
285
|
+
def test_default_catalog_initialization_from_kwargs(self, reset_catalogs):
|
286
|
+
|
287
|
+
catalog_name = str(uuid.uuid4())
|
288
|
+
|
289
|
+
# Initialize DeltaCAT with this catalog
|
290
|
+
put_catalog(
|
291
|
+
catalog_name,
|
292
|
+
Catalog(root="test_root"),
|
293
|
+
)
|
294
|
+
|
295
|
+
# Retrieve the catalog and verify it's the same one
|
296
|
+
retrieved_catalog = get_catalog(catalog_name)
|
297
|
+
assert retrieved_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
298
|
+
assert isinstance(retrieved_catalog.inner, CatalogProperties)
|
299
|
+
assert retrieved_catalog.inner.root == "test_root"
|
300
|
+
|
301
|
+
def test_iceberg_catalog_initialization(self, reset_catalogs):
|
302
|
+
"""Test that an Iceberg catalog can be initialized and accessed."""
|
303
|
+
catalog_name = str(uuid.uuid4())
|
304
|
+
|
305
|
+
# Create the Iceberg catalog config
|
306
|
+
config = IcebergCatalogConfig(
|
307
|
+
type=CatalogType.IN_MEMORY, properties={"warehouse": self.temp_dir}
|
308
|
+
)
|
309
|
+
|
310
|
+
# Create the catalog using the factory method
|
311
|
+
catalog = IcebergCatalog.from_config(config)
|
312
|
+
|
313
|
+
put_catalog(catalog_name, catalog)
|
314
|
+
|
315
|
+
# Retrieve the catalog and verify it's the same one
|
316
|
+
retrieved_catalog = get_catalog(catalog_name)
|
317
|
+
assert (
|
318
|
+
retrieved_catalog.impl.__name__
|
319
|
+
== "deltacat.experimental.catalog.iceberg.impl"
|
320
|
+
)
|
321
|
+
assert isinstance(retrieved_catalog.inner, PyIcebergCatalog)
|