deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from deltacat.storage import (
|
4
|
+
PartitionKey,
|
5
|
+
PartitionScheme,
|
6
|
+
IdentityTransform,
|
7
|
+
)
|
8
|
+
|
9
|
+
|
10
|
+
def test_partition_scheme_validates_empty_keys():
|
11
|
+
# When creating a partition scheme with empty keys list
|
12
|
+
with pytest.raises(
|
13
|
+
ValueError, match="Partition scheme cannot have empty keys list"
|
14
|
+
):
|
15
|
+
PartitionScheme.of(
|
16
|
+
keys=[],
|
17
|
+
name="test_partition_scheme",
|
18
|
+
scheme_id="test_partition_scheme_id",
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
def test_partition_scheme_validates_duplicate_keys():
|
23
|
+
# When creating a partition scheme with duplicate keys
|
24
|
+
with pytest.raises(ValueError, match="Duplicate partition key found: col1"):
|
25
|
+
PartitionScheme.of(
|
26
|
+
keys=[
|
27
|
+
PartitionKey.of(
|
28
|
+
key=["col1"],
|
29
|
+
transform=IdentityTransform.of(),
|
30
|
+
),
|
31
|
+
PartitionKey.of(
|
32
|
+
key=["col1"], # Duplicate key
|
33
|
+
transform=IdentityTransform.of(),
|
34
|
+
),
|
35
|
+
],
|
36
|
+
name="test_partition_scheme",
|
37
|
+
scheme_id="test_partition_scheme_id",
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
def test_partition_scheme_validates_duplicate_names():
|
42
|
+
# When creating a partition scheme with duplicate partition key names
|
43
|
+
with pytest.raises(
|
44
|
+
ValueError, match="Duplicate partition key name found: partition_1"
|
45
|
+
):
|
46
|
+
PartitionScheme.of(
|
47
|
+
keys=[
|
48
|
+
PartitionKey.of(
|
49
|
+
key=["col1"],
|
50
|
+
name="partition_1",
|
51
|
+
transform=IdentityTransform.of(),
|
52
|
+
),
|
53
|
+
PartitionKey.of(
|
54
|
+
key=["col2"], # Different field locator
|
55
|
+
name="partition_1", # But duplicate name
|
56
|
+
transform=IdentityTransform.of(),
|
57
|
+
),
|
58
|
+
],
|
59
|
+
name="test_partition_scheme",
|
60
|
+
scheme_id="test_partition_scheme_id",
|
61
|
+
)
|
62
|
+
|
63
|
+
|
64
|
+
def test_partition_scheme_allows_valid_keys():
|
65
|
+
# When creating a partition scheme with valid keys
|
66
|
+
partition_scheme = PartitionScheme.of(
|
67
|
+
keys=[
|
68
|
+
PartitionKey.of(
|
69
|
+
key=["col1"],
|
70
|
+
transform=IdentityTransform.of(),
|
71
|
+
),
|
72
|
+
PartitionKey.of(
|
73
|
+
key=["col2"],
|
74
|
+
transform=IdentityTransform.of(),
|
75
|
+
),
|
76
|
+
],
|
77
|
+
name="test_partition_scheme",
|
78
|
+
scheme_id="test_partition_scheme_id",
|
79
|
+
)
|
80
|
+
|
81
|
+
# Then it should succeed
|
82
|
+
assert partition_scheme is not None
|
83
|
+
assert len(partition_scheme.keys) == 2
|
84
|
+
assert partition_scheme.name == "test_partition_scheme"
|
85
|
+
assert partition_scheme.id == "test_partition_scheme_id"
|
@@ -0,0 +1,479 @@
|
|
1
|
+
import pytest
|
2
|
+
import pyarrow as pa
|
3
|
+
from deltacat.exceptions import SchemaValidationError
|
4
|
+
|
5
|
+
from deltacat.storage.model.schema import (
|
6
|
+
Schema,
|
7
|
+
Field,
|
8
|
+
BASE_SCHEMA_NAME,
|
9
|
+
SchemaConsistencyType,
|
10
|
+
SchemaUpdate,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture
|
15
|
+
def schema_a():
|
16
|
+
return Schema.of(
|
17
|
+
[
|
18
|
+
Field.of(
|
19
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
20
|
+
field_id=1,
|
21
|
+
is_merge_key=True,
|
22
|
+
)
|
23
|
+
]
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
@pytest.fixture
|
28
|
+
def schema_b():
|
29
|
+
return Schema.of(
|
30
|
+
[
|
31
|
+
Field.of(
|
32
|
+
field=pa.field("col2", pa.string(), nullable=True),
|
33
|
+
field_id=2,
|
34
|
+
is_merge_key=False,
|
35
|
+
)
|
36
|
+
]
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
@pytest.fixture
|
41
|
+
def schema_c():
|
42
|
+
return Schema.of(
|
43
|
+
[
|
44
|
+
Field.of(
|
45
|
+
field=pa.field("col3", pa.float64(), nullable=False),
|
46
|
+
field_id=3,
|
47
|
+
is_merge_key=False,
|
48
|
+
)
|
49
|
+
]
|
50
|
+
)
|
51
|
+
|
52
|
+
|
53
|
+
@pytest.fixture
|
54
|
+
def schema_d():
|
55
|
+
schema = Schema.of(
|
56
|
+
[
|
57
|
+
Field.of(
|
58
|
+
field=pa.field("col_named", pa.int64(), nullable=True),
|
59
|
+
field_id=4,
|
60
|
+
is_merge_key=False,
|
61
|
+
)
|
62
|
+
]
|
63
|
+
)
|
64
|
+
return schema
|
65
|
+
|
66
|
+
|
67
|
+
def test_of_with_dict(schema_a, schema_b):
|
68
|
+
input_dict = {"schema_a": schema_a.arrow, "schema_b": schema_b.arrow}
|
69
|
+
schema = Schema.of(input_dict)
|
70
|
+
|
71
|
+
assert isinstance(schema, Schema)
|
72
|
+
assert list(schema.subschemas.keys()) == ["schema_a", "schema_b"]
|
73
|
+
assert list(schema.subschemas_to_field_ids.keys()) == ["schema_a", "schema_b"]
|
74
|
+
assert schema.subschemas["schema_a"].equivalent_to(schema_a, True)
|
75
|
+
assert schema.subschemas["schema_b"].equivalent_to(schema_b, True)
|
76
|
+
|
77
|
+
|
78
|
+
def test_of_invalid_input():
|
79
|
+
with pytest.raises(ValueError):
|
80
|
+
Schema.of(42)
|
81
|
+
with pytest.raises(ValueError):
|
82
|
+
Schema.of(["foo"])
|
83
|
+
with pytest.raises(ValueError):
|
84
|
+
Schema.of({"foo": [42]})
|
85
|
+
|
86
|
+
|
87
|
+
def test_insert_explicit_name(schema_a, schema_b):
|
88
|
+
schema = Schema.of(schema_a.arrow)
|
89
|
+
assert not schema.subschemas
|
90
|
+
new_schema = schema.add_subschema("explicit", schema_b.arrow)
|
91
|
+
assert not schema.subschemas
|
92
|
+
assert "explicit" in new_schema.subschemas
|
93
|
+
assert "explicit" in new_schema.subschemas_to_field_ids
|
94
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas
|
95
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas_to_field_ids
|
96
|
+
assert new_schema.subschemas[BASE_SCHEMA_NAME].equivalent_to(schema_a)
|
97
|
+
assert new_schema.subschemas["explicit"].equivalent_to(schema_b)
|
98
|
+
|
99
|
+
|
100
|
+
def test_insert_reserved_name_fails(schema_a, schema_b):
|
101
|
+
schema = Schema.of(schema_a.arrow)
|
102
|
+
with pytest.raises(ValueError):
|
103
|
+
schema.add_subschema(BASE_SCHEMA_NAME, schema_b.arrow)
|
104
|
+
|
105
|
+
|
106
|
+
def test_insert_duplicate_schema_name_fails(schema_a, schema_b):
|
107
|
+
schema = Schema.of({"dupe": schema_a.arrow})
|
108
|
+
with pytest.raises(ValueError):
|
109
|
+
schema.add_subschema("dupe", schema_b.arrow)
|
110
|
+
|
111
|
+
|
112
|
+
def test_insert_duplicate_field_name_fails(schema_a):
|
113
|
+
schema = Schema.of(schema_a.arrow)
|
114
|
+
with pytest.raises(ValueError):
|
115
|
+
schema.add_subschema("dupe_field_name", schema_a.arrow)
|
116
|
+
|
117
|
+
|
118
|
+
def test_insert_autofill_field_id():
|
119
|
+
schema1 = [
|
120
|
+
Field.of(
|
121
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
122
|
+
is_merge_key=True,
|
123
|
+
)
|
124
|
+
]
|
125
|
+
schema2 = [
|
126
|
+
Field.of(
|
127
|
+
field=pa.field("col2", pa.int32(), nullable=False),
|
128
|
+
is_merge_key=True,
|
129
|
+
)
|
130
|
+
]
|
131
|
+
schema = Schema.of(schema1)
|
132
|
+
new_schema = schema.add_subschema("explicit", schema2)
|
133
|
+
assert not schema.subschemas
|
134
|
+
assert "explicit" in new_schema.subschemas
|
135
|
+
assert "explicit" in new_schema.subschemas_to_field_ids
|
136
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas
|
137
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas_to_field_ids
|
138
|
+
assert len(new_schema.subschemas[BASE_SCHEMA_NAME].fields) == 1
|
139
|
+
assert len(new_schema.subschemas["explicit"].fields) == 1
|
140
|
+
assert new_schema.subschemas[BASE_SCHEMA_NAME].fields[0].id == 0
|
141
|
+
assert new_schema.subschemas["explicit"].fields[0].id == 1
|
142
|
+
assert new_schema.subschemas[BASE_SCHEMA_NAME].equivalent_to(Schema.of(schema1))
|
143
|
+
schema2_with_expected_field_id = [
|
144
|
+
Field.of(
|
145
|
+
field=pa.field("col2", pa.int32(), nullable=False),
|
146
|
+
field_id=1,
|
147
|
+
is_merge_key=True,
|
148
|
+
)
|
149
|
+
]
|
150
|
+
assert new_schema.subschemas["explicit"].equivalent_to(
|
151
|
+
Schema.of(schema2_with_expected_field_id)
|
152
|
+
)
|
153
|
+
|
154
|
+
|
155
|
+
def test_insert_duplicate_field_name_case_insensitive_fails():
|
156
|
+
schema1 = Schema.of(
|
157
|
+
[
|
158
|
+
Field.of(
|
159
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
160
|
+
field_id=1,
|
161
|
+
is_merge_key=True,
|
162
|
+
)
|
163
|
+
]
|
164
|
+
)
|
165
|
+
schema2 = Schema.of(
|
166
|
+
[
|
167
|
+
Field.of(
|
168
|
+
field=pa.field("COL1", pa.int32(), nullable=False),
|
169
|
+
field_id=2,
|
170
|
+
is_merge_key=True,
|
171
|
+
)
|
172
|
+
]
|
173
|
+
)
|
174
|
+
schema = Schema.of(schema1.arrow)
|
175
|
+
with pytest.raises(ValueError):
|
176
|
+
schema.add_subschema("dupe_field_name_case_insensitive", schema2.arrow)
|
177
|
+
|
178
|
+
|
179
|
+
def test_insert_duplicate_field_id_fails():
|
180
|
+
schema1 = Schema.of(
|
181
|
+
[
|
182
|
+
Field.of(
|
183
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
184
|
+
field_id=1,
|
185
|
+
is_merge_key=True,
|
186
|
+
)
|
187
|
+
]
|
188
|
+
)
|
189
|
+
schema2 = Schema.of(
|
190
|
+
[
|
191
|
+
Field.of(
|
192
|
+
field=pa.field("col2", pa.int32(), nullable=False),
|
193
|
+
field_id=1,
|
194
|
+
is_merge_key=True,
|
195
|
+
)
|
196
|
+
]
|
197
|
+
)
|
198
|
+
schema = Schema.of(schema1.arrow)
|
199
|
+
with pytest.raises(ValueError):
|
200
|
+
schema.add_subschema("dupe_field_id", schema2.arrow)
|
201
|
+
|
202
|
+
|
203
|
+
def test_update_success(schema_a, schema_b):
|
204
|
+
schema = Schema.of({"key": schema_a.arrow})
|
205
|
+
new_schema = schema.replace_subschema("key", schema_b.arrow)
|
206
|
+
assert schema.subschemas["key"].equivalent_to(schema_a)
|
207
|
+
assert new_schema.subschemas["key"].equivalent_to(schema_b)
|
208
|
+
|
209
|
+
|
210
|
+
def test_update_not_exist(schema_a):
|
211
|
+
schema = Schema.of({"key": schema_a.arrow})
|
212
|
+
with pytest.raises(ValueError):
|
213
|
+
schema.replace_subschema("nonexistent", schema_a.arrow)
|
214
|
+
|
215
|
+
|
216
|
+
def test_delete_schema_success(schema_a, schema_b):
|
217
|
+
schema = Schema.of({"key1": schema_a.arrow, "key2": schema_b.arrow})
|
218
|
+
new_schema = schema.delete_subschema("key1")
|
219
|
+
assert "key1" in schema.subschemas
|
220
|
+
assert "key2" in schema.subschemas
|
221
|
+
assert "key1" not in new_schema.subschemas
|
222
|
+
assert "key2" in new_schema.subschemas
|
223
|
+
assert "key1" in schema.subschemas_to_field_ids
|
224
|
+
assert "key2" in schema.subschemas_to_field_ids
|
225
|
+
assert "key1" not in new_schema.subschemas_to_field_ids
|
226
|
+
assert "key2" in new_schema.subschemas_to_field_ids
|
227
|
+
|
228
|
+
|
229
|
+
def test_delete_only_schema_fails(schema_a):
|
230
|
+
schema = Schema.of({"key": schema_a.arrow})
|
231
|
+
with pytest.raises(ValueError):
|
232
|
+
schema.delete_subschema("key")
|
233
|
+
|
234
|
+
|
235
|
+
def test_delete_schema_not_exist(schema_a):
|
236
|
+
schema = Schema.of({"key": schema_a.arrow})
|
237
|
+
with pytest.raises(ValueError):
|
238
|
+
schema.delete_subschema("nonexistent")
|
239
|
+
|
240
|
+
|
241
|
+
def test_get_schemas_order(schema_a, schema_b, schema_c):
|
242
|
+
schema = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow, "c": schema_c.arrow})
|
243
|
+
assert list(schema.subschemas.keys()) == ["a", "b", "c"]
|
244
|
+
assert list(schema.subschemas.values()) == [schema_a, schema_b, schema_c]
|
245
|
+
schema = Schema.of({"a": schema_a.arrow})
|
246
|
+
schema = schema.add_subschema("b", schema_b.arrow)
|
247
|
+
assert list(schema.subschemas.keys()) == ["a", "b"]
|
248
|
+
assert list(schema.subschemas.values()) == [schema_a, schema_b]
|
249
|
+
schema = schema.add_subschema("c", schema_c.arrow)
|
250
|
+
assert list(schema.subschemas.keys()) == ["a", "b", "c"]
|
251
|
+
assert list(schema.subschemas.values()) == [schema_a, schema_b, schema_c]
|
252
|
+
|
253
|
+
|
254
|
+
def test_equivalent_to_same(schema_a, schema_b):
|
255
|
+
schema1 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
256
|
+
schema2 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
257
|
+
assert schema1.equivalent_to(schema2)
|
258
|
+
assert schema2.equivalent_to(schema1)
|
259
|
+
|
260
|
+
|
261
|
+
def test_equivalent_to_different_subschema_names(schema_a, schema_b):
|
262
|
+
schema1 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
263
|
+
schema2 = Schema.of({"x": schema_a.arrow, "b": schema_b.arrow})
|
264
|
+
assert schema1.equivalent_to(schema2)
|
265
|
+
assert not schema1.equivalent_to(schema2, True)
|
266
|
+
|
267
|
+
|
268
|
+
def test_not_equivalent_to_different_subschema_order(schema_a, schema_b):
|
269
|
+
schema1 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
270
|
+
schema2 = Schema.of({"b": schema_b.arrow, "a": schema_a.arrow})
|
271
|
+
assert not schema1.equivalent_to(schema2)
|
272
|
+
|
273
|
+
|
274
|
+
def test_equivalent_to_non_schema(schema_a):
|
275
|
+
schema = Schema.of({"a": schema_a.arrow})
|
276
|
+
assert not schema.equivalent_to("not a schema")
|
277
|
+
|
278
|
+
|
279
|
+
def test_equivalent_schemas_different_instances():
|
280
|
+
"""
|
281
|
+
Edge case: ensure equivalent schemas with different instances
|
282
|
+
are considered equivalent.
|
283
|
+
"""
|
284
|
+
schema1 = Schema.of(
|
285
|
+
[
|
286
|
+
Field.of(
|
287
|
+
pa.field("col1", pa.int32(), nullable=False),
|
288
|
+
field_id=1,
|
289
|
+
is_merge_key=True,
|
290
|
+
)
|
291
|
+
]
|
292
|
+
)
|
293
|
+
schema2 = Schema.of(
|
294
|
+
[
|
295
|
+
Field.of(
|
296
|
+
pa.field("col1", pa.int32(), nullable=False),
|
297
|
+
field_id=1,
|
298
|
+
is_merge_key=True,
|
299
|
+
)
|
300
|
+
]
|
301
|
+
)
|
302
|
+
schema1 = Schema.of({"key": schema1.arrow})
|
303
|
+
schema2 = Schema.of({"key": schema2.arrow})
|
304
|
+
assert schema1.equivalent_to(schema2)
|
305
|
+
|
306
|
+
|
307
|
+
def test_empty_schema_fails():
|
308
|
+
with pytest.raises(ValueError):
|
309
|
+
Schema.of({})
|
310
|
+
with pytest.raises(ValueError):
|
311
|
+
Schema.of([])
|
312
|
+
|
313
|
+
|
314
|
+
def test_schema_type_promotion_edge_cases():
|
315
|
+
"""Test edge cases for type promotion with SchemaConsistencyType.NONE."""
|
316
|
+
# Test 1: Same type - no promotion
|
317
|
+
field_int32 = Field.of(
|
318
|
+
pa.field("test", pa.int32()), consistency_type=SchemaConsistencyType.NONE
|
319
|
+
)
|
320
|
+
data_int32 = pa.array([1, 2, 3], type=pa.int32())
|
321
|
+
promoted_data, was_promoted = field_int32.promote_type_if_needed(data_int32)
|
322
|
+
assert not was_promoted, "Same type should not trigger promotion"
|
323
|
+
assert promoted_data.type == pa.int32(), "Data type should remain int32"
|
324
|
+
|
325
|
+
# Test 2: int32 to int64 promotion
|
326
|
+
data_int64 = pa.array([2147483648], type=pa.int64()) # Value requiring int64
|
327
|
+
promoted_data, was_promoted = field_int32.promote_type_if_needed(data_int64)
|
328
|
+
assert was_promoted, "int32 field should promote to int64"
|
329
|
+
assert promoted_data.type == pa.int64(), "Promoted data should be int64"
|
330
|
+
|
331
|
+
# Test 3: Nullability preservation
|
332
|
+
field_nullable = Field.of(
|
333
|
+
pa.field("test", pa.int32(), nullable=True),
|
334
|
+
consistency_type=SchemaConsistencyType.NONE,
|
335
|
+
)
|
336
|
+
data_with_null = pa.array([1, None, 3], type=pa.int32())
|
337
|
+
promoted_data, was_promoted = field_nullable.promote_type_if_needed(data_with_null)
|
338
|
+
assert not was_promoted, "Same nullable type should not promote"
|
339
|
+
|
340
|
+
# Test 4: Cross-type promotion (int to float)
|
341
|
+
field_int = Field.of(
|
342
|
+
pa.field("test", pa.int32()), consistency_type=SchemaConsistencyType.NONE
|
343
|
+
)
|
344
|
+
data_float = pa.array([1.5, 2.7], type=pa.float64())
|
345
|
+
promoted_data, was_promoted = field_int.promote_type_if_needed(data_float)
|
346
|
+
assert was_promoted, "int32 should promote to accommodate float64"
|
347
|
+
assert pa.types.is_floating(
|
348
|
+
promoted_data.type
|
349
|
+
), f"Should promote to float type, got {promoted_data.type}"
|
350
|
+
|
351
|
+
|
352
|
+
def test_schema_update_method(schema_a):
|
353
|
+
"""Test the Schema.update() convenience method."""
|
354
|
+
# Test basic usage
|
355
|
+
update = schema_a.update()
|
356
|
+
assert isinstance(update, SchemaUpdate)
|
357
|
+
assert update.base_schema == schema_a
|
358
|
+
assert not update.allow_incompatible_changes
|
359
|
+
|
360
|
+
# Test with allow_incompatible_changes=True
|
361
|
+
update_permissive = schema_a.update(allow_incompatible_changes=True)
|
362
|
+
assert isinstance(update_permissive, SchemaUpdate)
|
363
|
+
assert update_permissive.base_schema == schema_a
|
364
|
+
assert update_permissive.allow_incompatible_changes
|
365
|
+
|
366
|
+
# Test method chaining with field addition
|
367
|
+
new_field = Field.of(pa.field("name", pa.string(), nullable=True), field_id=4)
|
368
|
+
updated_schema = schema_a.update().add_field(new_field).apply()
|
369
|
+
|
370
|
+
assert len(updated_schema.fields) == 2
|
371
|
+
assert updated_schema.field("col1") == schema_a.field(
|
372
|
+
"col1"
|
373
|
+
) # Original field preserved
|
374
|
+
added_field = updated_schema.field("name")
|
375
|
+
assert added_field.arrow.name == "name"
|
376
|
+
assert added_field.arrow.type == pa.string()
|
377
|
+
assert added_field.id == 2 # requested field_id of 4 is ignored and auto-assigned
|
378
|
+
|
379
|
+
|
380
|
+
def test_default_value_type_promotion():
|
381
|
+
"""Test that default values are correctly cast when field types are promoted."""
|
382
|
+
|
383
|
+
# Test 1: Unit-level default value casting
|
384
|
+
# Create a field with int32 type and default values
|
385
|
+
original_field = Field.of(
|
386
|
+
pa.field("test_field", pa.int32()),
|
387
|
+
past_default=42,
|
388
|
+
future_default=100,
|
389
|
+
consistency_type=SchemaConsistencyType.NONE,
|
390
|
+
)
|
391
|
+
|
392
|
+
# Test casting to int64
|
393
|
+
promoted_past = original_field._cast_default_to_promoted_type(42, pa.int64())
|
394
|
+
promoted_future = original_field._cast_default_to_promoted_type(100, pa.int64())
|
395
|
+
assert promoted_past == 42
|
396
|
+
assert promoted_future == 100
|
397
|
+
|
398
|
+
# Test casting to float64
|
399
|
+
promoted_past_float = original_field._cast_default_to_promoted_type(
|
400
|
+
42, pa.float64()
|
401
|
+
)
|
402
|
+
promoted_future_float = original_field._cast_default_to_promoted_type(
|
403
|
+
100, pa.float64()
|
404
|
+
)
|
405
|
+
assert promoted_past_float == 42.0
|
406
|
+
assert promoted_future_float == 100.0
|
407
|
+
|
408
|
+
# Test casting to string
|
409
|
+
promoted_past_str = original_field._cast_default_to_promoted_type(42, pa.string())
|
410
|
+
promoted_future_str = original_field._cast_default_to_promoted_type(
|
411
|
+
100, pa.string()
|
412
|
+
)
|
413
|
+
assert promoted_past_str == "42"
|
414
|
+
assert promoted_future_str == "100"
|
415
|
+
|
416
|
+
# Test 2: Test that the default casting logic works correctly
|
417
|
+
# Test with None values (should return None)
|
418
|
+
none_result = original_field._cast_default_to_promoted_type(None, pa.string())
|
419
|
+
assert none_result is None, "None default should remain None"
|
420
|
+
|
421
|
+
# Test error handling - incompatible cast should raise SchemaValidationError
|
422
|
+
with pytest.raises(SchemaValidationError):
|
423
|
+
original_field._cast_default_to_promoted_type("not_a_number", pa.int64())
|
424
|
+
|
425
|
+
# Test with a complex type
|
426
|
+
complex_field = Field.of(
|
427
|
+
pa.field("complex", pa.list_(pa.int32())),
|
428
|
+
consistency_type=SchemaConsistencyType.NONE,
|
429
|
+
)
|
430
|
+
with pytest.raises(SchemaValidationError):
|
431
|
+
complex_field._cast_default_to_promoted_type(42, pa.list_(pa.string()))
|
432
|
+
|
433
|
+
|
434
|
+
def test_default_value_backfill_with_promotion():
|
435
|
+
"""Test that default values are correctly backfilled when types are promoted."""
|
436
|
+
|
437
|
+
# Test the interaction between default value casting and binary promotion
|
438
|
+
# This represents a common scenario where defaults need to be promoted to binary
|
439
|
+
field_with_defaults = Field.of(
|
440
|
+
pa.field("test_field", pa.int32()),
|
441
|
+
past_default=42,
|
442
|
+
future_default=100,
|
443
|
+
consistency_type=SchemaConsistencyType.NONE,
|
444
|
+
)
|
445
|
+
|
446
|
+
# Test promotion to string (a common "catch-all" type in type promotion)
|
447
|
+
string_past = field_with_defaults._cast_default_to_promoted_type(42, pa.string())
|
448
|
+
string_future = field_with_defaults._cast_default_to_promoted_type(100, pa.string())
|
449
|
+
|
450
|
+
assert string_past == "42", f"Expected '42', got {string_past}"
|
451
|
+
assert string_future == "100", f"Expected '100', got {string_future}"
|
452
|
+
|
453
|
+
# Also test floats to string
|
454
|
+
float_field = Field.of(
|
455
|
+
pa.field("float_field", pa.float32()),
|
456
|
+
past_default=3.14159,
|
457
|
+
future_default=2.71828,
|
458
|
+
consistency_type=SchemaConsistencyType.NONE,
|
459
|
+
)
|
460
|
+
|
461
|
+
string_past = float_field._cast_default_to_promoted_type(3.14159, pa.string())
|
462
|
+
string_future = float_field._cast_default_to_promoted_type(2.71828, pa.string())
|
463
|
+
|
464
|
+
assert string_past == "3.14159", f"Expected '3.14159', got {string_past}"
|
465
|
+
assert string_future == "2.71828", f"Expected '2.71828', got {string_future}"
|
466
|
+
|
467
|
+
# Test that None defaults are handled correctly
|
468
|
+
none_field = Field.of(
|
469
|
+
pa.field("none_field", pa.int32()),
|
470
|
+
past_default=None,
|
471
|
+
future_default=42,
|
472
|
+
consistency_type=SchemaConsistencyType.NONE,
|
473
|
+
)
|
474
|
+
|
475
|
+
none_past = none_field._cast_default_to_promoted_type(None, pa.string())
|
476
|
+
valid_future = none_field._cast_default_to_promoted_type(42, pa.string())
|
477
|
+
|
478
|
+
assert none_past is None, f"None should remain None, got {none_past}"
|
479
|
+
assert valid_future == "42", f"Expected '42', got {valid_future}"
|