deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
|
|
1
|
-
|
1
|
+
from deltacat.storage import metastore
|
2
2
|
from deltacat.types.media import ContentType
|
3
|
-
import os
|
4
3
|
import pytest
|
5
4
|
from deltacat.storage import Delta
|
6
5
|
from deltacat.compute.resource_estimation.delta import (
|
@@ -22,22 +21,7 @@ Function scoped fixtures
|
|
22
21
|
|
23
22
|
|
24
23
|
@pytest.fixture(scope="function")
|
25
|
-
def
|
26
|
-
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
27
|
-
"db_file_path",
|
28
|
-
"deltacat/tests/local_deltacat_storage/db_test.sqlite",
|
29
|
-
)
|
30
|
-
# see deltacat/tests/local_deltacat_storage/README.md for documentation
|
31
|
-
kwargs_for_local_deltacat_storage = {
|
32
|
-
DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
|
33
|
-
}
|
34
|
-
yield kwargs_for_local_deltacat_storage
|
35
|
-
if os.path.exists(DATABASE_FILE_PATH_VALUE):
|
36
|
-
os.remove(DATABASE_FILE_PATH_VALUE)
|
37
|
-
|
38
|
-
|
39
|
-
@pytest.fixture(scope="function")
|
40
|
-
def parquet_delta_with_manifest(local_deltacat_storage_kwargs):
|
24
|
+
def parquet_delta_with_manifest(main_deltacat_storage_kwargs):
|
41
25
|
"""
|
42
26
|
These fixtures are function scoped as functions can modify the delta.
|
43
27
|
"""
|
@@ -47,7 +31,7 @@ def parquet_delta_with_manifest(local_deltacat_storage_kwargs):
|
|
47
31
|
"test_namespace",
|
48
32
|
file_paths=[DELTA_CSV_FILE_PATH],
|
49
33
|
content_type=ContentType.PARQUET,
|
50
|
-
**
|
34
|
+
**main_deltacat_storage_kwargs
|
51
35
|
)
|
52
36
|
|
53
37
|
result.meta["source_content_length"] = 0
|
@@ -60,14 +44,14 @@ def parquet_delta_with_manifest(local_deltacat_storage_kwargs):
|
|
60
44
|
|
61
45
|
|
62
46
|
@pytest.fixture(scope="function")
|
63
|
-
def utsv_delta_with_manifest(
|
47
|
+
def utsv_delta_with_manifest(main_deltacat_storage_kwargs):
|
64
48
|
from deltacat.tests.test_utils.pyarrow import create_delta_from_csv_file
|
65
49
|
|
66
50
|
result = create_delta_from_csv_file(
|
67
51
|
"test_namespace",
|
68
52
|
file_paths=[DELTA_CSV_FILE_PATH],
|
69
53
|
content_type=ContentType.UNESCAPED_TSV,
|
70
|
-
**
|
54
|
+
**main_deltacat_storage_kwargs
|
71
55
|
)
|
72
56
|
|
73
57
|
result.meta["source_content_length"] = 0
|
@@ -80,14 +64,14 @@ def utsv_delta_with_manifest(local_deltacat_storage_kwargs):
|
|
80
64
|
|
81
65
|
|
82
66
|
@pytest.fixture(scope="function")
|
83
|
-
def delta_without_manifest(
|
67
|
+
def delta_without_manifest(main_deltacat_storage_kwargs):
|
84
68
|
from deltacat.tests.test_utils.pyarrow import create_delta_from_csv_file
|
85
69
|
|
86
70
|
delta = create_delta_from_csv_file(
|
87
71
|
"test_namespace",
|
88
72
|
file_paths=[DELTA_CSV_FILE_PATH],
|
89
73
|
content_type=ContentType.PARQUET,
|
90
|
-
**
|
74
|
+
**main_deltacat_storage_kwargs
|
91
75
|
)
|
92
76
|
|
93
77
|
# now we intentionally remove manifest
|
@@ -99,14 +83,14 @@ def delta_without_manifest(local_deltacat_storage_kwargs):
|
|
99
83
|
|
100
84
|
|
101
85
|
@pytest.fixture(scope="function")
|
102
|
-
def delta_with_populated_meta(
|
86
|
+
def delta_with_populated_meta(main_deltacat_storage_kwargs):
|
103
87
|
from deltacat.tests.test_utils.pyarrow import create_delta_from_csv_file
|
104
88
|
|
105
89
|
delta = create_delta_from_csv_file(
|
106
90
|
"test_namespace",
|
107
91
|
file_paths=[DELTA_CSV_FILE_PATH],
|
108
92
|
content_type=ContentType.PARQUET,
|
109
|
-
**
|
93
|
+
**main_deltacat_storage_kwargs
|
110
94
|
)
|
111
95
|
|
112
96
|
return delta
|
@@ -114,14 +98,14 @@ def delta_with_populated_meta(local_deltacat_storage_kwargs):
|
|
114
98
|
|
115
99
|
class TestEstimateResourcesRequiredToProcessDelta:
|
116
100
|
def test_delta_with_prepopulated_meta_returns_directly(
|
117
|
-
self,
|
101
|
+
self, main_deltacat_storage_kwargs, delta_with_populated_meta: Delta
|
118
102
|
):
|
119
103
|
|
120
104
|
result = estimate_resources_required_to_process_delta(
|
121
105
|
delta=delta_with_populated_meta,
|
122
106
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
123
|
-
deltacat_storage=
|
124
|
-
deltacat_storage_kwargs=
|
107
|
+
deltacat_storage=metastore,
|
108
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
125
109
|
)
|
126
110
|
|
127
111
|
assert (
|
@@ -141,7 +125,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
141
125
|
)
|
142
126
|
|
143
127
|
def test_delta_manifest_empty_when_default_method(
|
144
|
-
self,
|
128
|
+
self, main_deltacat_storage_kwargs, delta_without_manifest: Delta
|
145
129
|
):
|
146
130
|
params = EstimateResourcesParams.of(
|
147
131
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT,
|
@@ -152,8 +136,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
152
136
|
result = estimate_resources_required_to_process_delta(
|
153
137
|
delta=delta_without_manifest,
|
154
138
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
155
|
-
deltacat_storage=
|
156
|
-
deltacat_storage_kwargs=
|
139
|
+
deltacat_storage=metastore,
|
140
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
157
141
|
estimate_resources_params=params,
|
158
142
|
)
|
159
143
|
|
@@ -172,7 +156,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
172
156
|
)
|
173
157
|
|
174
158
|
def test_delta_manifest_exists_when_default_method(
|
175
|
-
self,
|
159
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
176
160
|
):
|
177
161
|
params = EstimateResourcesParams.of(
|
178
162
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT,
|
@@ -183,8 +167,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
183
167
|
result = estimate_resources_required_to_process_delta(
|
184
168
|
delta=parquet_delta_with_manifest,
|
185
169
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
186
|
-
deltacat_storage=
|
187
|
-
deltacat_storage_kwargs=
|
170
|
+
deltacat_storage=metastore,
|
171
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
188
172
|
estimate_resources_params=params,
|
189
173
|
)
|
190
174
|
|
@@ -207,7 +191,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
207
191
|
)
|
208
192
|
|
209
193
|
def test_previous_inflation_arg_not_passed_when_default_method(
|
210
|
-
self,
|
194
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
211
195
|
):
|
212
196
|
with pytest.raises(AssertionError):
|
213
197
|
params = EstimateResourcesParams.of(
|
@@ -218,13 +202,13 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
218
202
|
estimate_resources_required_to_process_delta(
|
219
203
|
delta=parquet_delta_with_manifest,
|
220
204
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
221
|
-
deltacat_storage=
|
222
|
-
deltacat_storage_kwargs=
|
205
|
+
deltacat_storage=metastore,
|
206
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
223
207
|
estimate_resources_params=params,
|
224
208
|
)
|
225
209
|
|
226
210
|
def test_estimate_resources_params_not_passed_assumes_default(
|
227
|
-
self,
|
211
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
228
212
|
):
|
229
213
|
params = EstimateResourcesParams.of(
|
230
214
|
previous_inflation=7,
|
@@ -234,8 +218,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
234
218
|
result = estimate_resources_required_to_process_delta(
|
235
219
|
delta=parquet_delta_with_manifest,
|
236
220
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
237
|
-
deltacat_storage=
|
238
|
-
deltacat_storage_kwargs=
|
221
|
+
deltacat_storage=metastore,
|
222
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
239
223
|
estimate_resources_params=params,
|
240
224
|
)
|
241
225
|
|
@@ -258,7 +242,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
258
242
|
)
|
259
243
|
|
260
244
|
def test_delta_manifest_empty_when_content_type_meta(
|
261
|
-
self,
|
245
|
+
self, main_deltacat_storage_kwargs, delta_without_manifest: Delta
|
262
246
|
):
|
263
247
|
params = EstimateResourcesParams.of(
|
264
248
|
resource_estimation_method=ResourceEstimationMethod.CONTENT_TYPE_META,
|
@@ -268,8 +252,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
268
252
|
result = estimate_resources_required_to_process_delta(
|
269
253
|
delta=delta_without_manifest,
|
270
254
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
271
|
-
deltacat_storage=
|
272
|
-
deltacat_storage_kwargs=
|
255
|
+
deltacat_storage=metastore,
|
256
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
273
257
|
estimate_resources_params=params,
|
274
258
|
)
|
275
259
|
|
@@ -283,7 +267,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
283
267
|
assert result.statistics.record_count == 7
|
284
268
|
|
285
269
|
def test_delta_manifest_exists_when_content_type_meta(
|
286
|
-
self,
|
270
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
287
271
|
):
|
288
272
|
params = EstimateResourcesParams.of(
|
289
273
|
resource_estimation_method=ResourceEstimationMethod.CONTENT_TYPE_META,
|
@@ -293,8 +277,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
293
277
|
result = estimate_resources_required_to_process_delta(
|
294
278
|
delta=parquet_delta_with_manifest,
|
295
279
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
296
|
-
deltacat_storage=
|
297
|
-
deltacat_storage_kwargs=
|
280
|
+
deltacat_storage=metastore,
|
281
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
298
282
|
estimate_resources_params=params,
|
299
283
|
)
|
300
284
|
|
@@ -308,7 +292,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
308
292
|
assert result.statistics.record_count == 7
|
309
293
|
|
310
294
|
def test_delta_manifest_empty_when_intelligent_estimation(
|
311
|
-
self,
|
295
|
+
self, main_deltacat_storage_kwargs, delta_without_manifest: Delta
|
312
296
|
):
|
313
297
|
params = EstimateResourcesParams.of(
|
314
298
|
resource_estimation_method=ResourceEstimationMethod.INTELLIGENT_ESTIMATION,
|
@@ -318,8 +302,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
318
302
|
result = estimate_resources_required_to_process_delta(
|
319
303
|
delta=delta_without_manifest,
|
320
304
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
321
|
-
deltacat_storage=
|
322
|
-
deltacat_storage_kwargs=
|
305
|
+
deltacat_storage=metastore,
|
306
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
323
307
|
estimate_resources_params=params,
|
324
308
|
)
|
325
309
|
|
@@ -333,7 +317,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
333
317
|
assert result.statistics.record_count == 7
|
334
318
|
|
335
319
|
def test_delta_manifest_exists_when_intelligent_estimation(
|
336
|
-
self,
|
320
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
337
321
|
):
|
338
322
|
params = EstimateResourcesParams.of(
|
339
323
|
resource_estimation_method=ResourceEstimationMethod.INTELLIGENT_ESTIMATION,
|
@@ -343,8 +327,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
343
327
|
result = estimate_resources_required_to_process_delta(
|
344
328
|
delta=parquet_delta_with_manifest,
|
345
329
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
346
|
-
deltacat_storage=
|
347
|
-
deltacat_storage_kwargs=
|
330
|
+
deltacat_storage=metastore,
|
331
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
348
332
|
estimate_resources_params=params,
|
349
333
|
)
|
350
334
|
|
@@ -358,7 +342,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
358
342
|
assert result.statistics.record_count == 7
|
359
343
|
|
360
344
|
def test_delta_manifest_exists_inflation_absent_when_intelligent_estimation(
|
361
|
-
self,
|
345
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
362
346
|
):
|
363
347
|
params = EstimateResourcesParams.of(
|
364
348
|
resource_estimation_method=ResourceEstimationMethod.INTELLIGENT_ESTIMATION,
|
@@ -368,15 +352,15 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
368
352
|
result = estimate_resources_required_to_process_delta(
|
369
353
|
delta=parquet_delta_with_manifest,
|
370
354
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
371
|
-
deltacat_storage=
|
372
|
-
deltacat_storage_kwargs=
|
355
|
+
deltacat_storage=metastore,
|
356
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
373
357
|
estimate_resources_params=params,
|
374
358
|
)
|
375
359
|
|
376
360
|
assert result is None
|
377
361
|
|
378
362
|
def test_delta_utsv_data_when_intelligent_estimation(
|
379
|
-
self,
|
363
|
+
self, main_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
380
364
|
):
|
381
365
|
params = EstimateResourcesParams.of(
|
382
366
|
resource_estimation_method=ResourceEstimationMethod.INTELLIGENT_ESTIMATION,
|
@@ -386,15 +370,15 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
386
370
|
result = estimate_resources_required_to_process_delta(
|
387
371
|
delta=utsv_delta_with_manifest,
|
388
372
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
389
|
-
deltacat_storage=
|
390
|
-
deltacat_storage_kwargs=
|
373
|
+
deltacat_storage=metastore,
|
374
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
391
375
|
estimate_resources_params=params,
|
392
376
|
)
|
393
377
|
|
394
378
|
assert result is None
|
395
379
|
|
396
380
|
def test_empty_delta_sampled_when_file_sampling(
|
397
|
-
self,
|
381
|
+
self, main_deltacat_storage_kwargs, delta_without_manifest: Delta
|
398
382
|
):
|
399
383
|
params = EstimateResourcesParams.of(
|
400
384
|
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
|
@@ -404,8 +388,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
404
388
|
result = estimate_resources_required_to_process_delta(
|
405
389
|
delta=delta_without_manifest,
|
406
390
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
407
|
-
deltacat_storage=
|
408
|
-
deltacat_storage_kwargs=
|
391
|
+
deltacat_storage=metastore,
|
392
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
409
393
|
estimate_resources_params=params,
|
410
394
|
)
|
411
395
|
|
@@ -417,7 +401,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
417
401
|
)
|
418
402
|
|
419
403
|
def test_empty_delta_sampled_when_file_sampling_with_previous_inflation(
|
420
|
-
self,
|
404
|
+
self, main_deltacat_storage_kwargs, delta_without_manifest: Delta
|
421
405
|
):
|
422
406
|
params = EstimateResourcesParams.of(
|
423
407
|
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
|
@@ -427,8 +411,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
427
411
|
result = estimate_resources_required_to_process_delta(
|
428
412
|
delta=delta_without_manifest,
|
429
413
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
430
|
-
deltacat_storage=
|
431
|
-
deltacat_storage_kwargs=
|
414
|
+
deltacat_storage=metastore,
|
415
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
432
416
|
estimate_resources_params=params,
|
433
417
|
)
|
434
418
|
|
@@ -440,7 +424,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
440
424
|
)
|
441
425
|
|
442
426
|
def test_delta_manifest_parquet_when_file_sampling(
|
443
|
-
self,
|
427
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
444
428
|
):
|
445
429
|
params = EstimateResourcesParams.of(
|
446
430
|
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
|
@@ -450,8 +434,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
450
434
|
result = estimate_resources_required_to_process_delta(
|
451
435
|
delta=parquet_delta_with_manifest,
|
452
436
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
453
|
-
deltacat_storage=
|
454
|
-
deltacat_storage_kwargs=
|
437
|
+
deltacat_storage=metastore,
|
438
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
455
439
|
estimate_resources_params=params,
|
456
440
|
)
|
457
441
|
assert result.memory_bytes is not None
|
@@ -461,7 +445,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
461
445
|
)
|
462
446
|
|
463
447
|
def test_delta_manifest_parquet_when_file_sampling_with_previous_inflation(
|
464
|
-
self,
|
448
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
465
449
|
):
|
466
450
|
params = EstimateResourcesParams.of(
|
467
451
|
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
|
@@ -471,8 +455,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
471
455
|
result = estimate_resources_required_to_process_delta(
|
472
456
|
delta=parquet_delta_with_manifest,
|
473
457
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
474
|
-
deltacat_storage=
|
475
|
-
deltacat_storage_kwargs=
|
458
|
+
deltacat_storage=metastore,
|
459
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
476
460
|
estimate_resources_params=params,
|
477
461
|
)
|
478
462
|
assert result.memory_bytes is not None
|
@@ -483,7 +467,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
483
467
|
|
484
468
|
def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
|
485
469
|
self,
|
486
|
-
|
470
|
+
main_deltacat_storage_kwargs,
|
487
471
|
parquet_delta_with_manifest: Delta,
|
488
472
|
monkeypatch,
|
489
473
|
):
|
@@ -501,13 +485,13 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
501
485
|
|
502
486
|
return MockedValue()
|
503
487
|
|
504
|
-
monkeypatch.setattr(
|
488
|
+
monkeypatch.setattr(metastore, "download_delta_manifest_entry", mock_func)
|
505
489
|
|
506
490
|
result = estimate_resources_required_to_process_delta(
|
507
491
|
delta=parquet_delta_with_manifest,
|
508
492
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
509
|
-
deltacat_storage=
|
510
|
-
deltacat_storage_kwargs=
|
493
|
+
deltacat_storage=metastore,
|
494
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
511
495
|
estimate_resources_params=params,
|
512
496
|
)
|
513
497
|
|
@@ -519,7 +503,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
519
503
|
)
|
520
504
|
|
521
505
|
def test_delta_manifest_utsv_when_file_sampling(
|
522
|
-
self,
|
506
|
+
self, main_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
523
507
|
):
|
524
508
|
params = EstimateResourcesParams.of(
|
525
509
|
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
|
@@ -529,8 +513,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
529
513
|
result = estimate_resources_required_to_process_delta(
|
530
514
|
delta=utsv_delta_with_manifest,
|
531
515
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
532
|
-
deltacat_storage=
|
533
|
-
deltacat_storage_kwargs=
|
516
|
+
deltacat_storage=metastore,
|
517
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
534
518
|
estimate_resources_params=params,
|
535
519
|
)
|
536
520
|
assert result.memory_bytes is not None
|
@@ -540,7 +524,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
540
524
|
)
|
541
525
|
|
542
526
|
def test_delta_manifest_utsv_when_file_sampling_zero_files_to_sample(
|
543
|
-
self,
|
527
|
+
self, main_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
544
528
|
):
|
545
529
|
params = EstimateResourcesParams.of(
|
546
530
|
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
|
@@ -550,14 +534,14 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
550
534
|
result = estimate_resources_required_to_process_delta(
|
551
535
|
delta=utsv_delta_with_manifest,
|
552
536
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
553
|
-
deltacat_storage=
|
554
|
-
deltacat_storage_kwargs=
|
537
|
+
deltacat_storage=metastore,
|
538
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
555
539
|
estimate_resources_params=params,
|
556
540
|
)
|
557
541
|
assert result is None
|
558
542
|
|
559
543
|
def test_delta_manifest_utsv_when_file_sampling_with_previous_inflation_zero_files_to_sample(
|
560
|
-
self,
|
544
|
+
self, main_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
561
545
|
):
|
562
546
|
previous_inflation = 7
|
563
547
|
params = EstimateResourcesParams.of(
|
@@ -569,8 +553,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
569
553
|
result = estimate_resources_required_to_process_delta(
|
570
554
|
delta=utsv_delta_with_manifest,
|
571
555
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
572
|
-
deltacat_storage=
|
573
|
-
deltacat_storage_kwargs=
|
556
|
+
deltacat_storage=metastore,
|
557
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
574
558
|
estimate_resources_params=params,
|
575
559
|
)
|
576
560
|
assert result is not None
|
@@ -579,7 +563,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
579
563
|
)
|
580
564
|
|
581
565
|
def test_empty_delta_when_default_v2(
|
582
|
-
self,
|
566
|
+
self, main_deltacat_storage_kwargs, delta_without_manifest: Delta
|
583
567
|
):
|
584
568
|
params = EstimateResourcesParams.of(
|
585
569
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -591,8 +575,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
591
575
|
result = estimate_resources_required_to_process_delta(
|
592
576
|
delta=delta_without_manifest,
|
593
577
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
594
|
-
deltacat_storage=
|
595
|
-
deltacat_storage_kwargs=
|
578
|
+
deltacat_storage=metastore,
|
579
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
596
580
|
estimate_resources_params=params,
|
597
581
|
)
|
598
582
|
|
@@ -604,7 +588,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
604
588
|
)
|
605
589
|
|
606
590
|
def test_parquet_delta_when_default_v2(
|
607
|
-
self,
|
591
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
608
592
|
):
|
609
593
|
params = EstimateResourcesParams.of(
|
610
594
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -617,8 +601,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
617
601
|
result = estimate_resources_required_to_process_delta(
|
618
602
|
delta=parquet_delta_with_manifest,
|
619
603
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
620
|
-
deltacat_storage=
|
621
|
-
deltacat_storage_kwargs=
|
604
|
+
deltacat_storage=metastore,
|
605
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
622
606
|
estimate_resources_params=params,
|
623
607
|
)
|
624
608
|
|
@@ -630,7 +614,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
630
614
|
)
|
631
615
|
|
632
616
|
def test_parquet_delta_when_default_v2_without_avg_record_size_and_sampling(
|
633
|
-
self,
|
617
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
634
618
|
):
|
635
619
|
params = EstimateResourcesParams.of(
|
636
620
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -641,8 +625,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
641
625
|
result = estimate_resources_required_to_process_delta(
|
642
626
|
delta=parquet_delta_with_manifest,
|
643
627
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
644
|
-
deltacat_storage=
|
645
|
-
deltacat_storage_kwargs=
|
628
|
+
deltacat_storage=metastore,
|
629
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
646
630
|
estimate_resources_params=params,
|
647
631
|
)
|
648
632
|
|
@@ -654,7 +638,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
654
638
|
)
|
655
639
|
|
656
640
|
def test_parquet_delta_when_default_v2_and_files_to_sample_zero(
|
657
|
-
self,
|
641
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
658
642
|
):
|
659
643
|
params = EstimateResourcesParams.of(
|
660
644
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -667,8 +651,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
667
651
|
result = estimate_resources_required_to_process_delta(
|
668
652
|
delta=parquet_delta_with_manifest,
|
669
653
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
670
|
-
deltacat_storage=
|
671
|
-
deltacat_storage_kwargs=
|
654
|
+
deltacat_storage=metastore,
|
655
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
672
656
|
estimate_resources_params=params,
|
673
657
|
)
|
674
658
|
|
@@ -680,7 +664,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
680
664
|
)
|
681
665
|
|
682
666
|
def test_utsv_delta_when_default_v2(
|
683
|
-
self,
|
667
|
+
self, main_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
684
668
|
):
|
685
669
|
params = EstimateResourcesParams.of(
|
686
670
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -693,8 +677,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
693
677
|
result = estimate_resources_required_to_process_delta(
|
694
678
|
delta=utsv_delta_with_manifest,
|
695
679
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
696
|
-
deltacat_storage=
|
697
|
-
deltacat_storage_kwargs=
|
680
|
+
deltacat_storage=metastore,
|
681
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
698
682
|
estimate_resources_params=params,
|
699
683
|
)
|
700
684
|
|
@@ -706,7 +690,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
706
690
|
)
|
707
691
|
|
708
692
|
def test_utsv_delta_when_default_v2_without_avg_record_size(
|
709
|
-
self,
|
693
|
+
self, main_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
710
694
|
):
|
711
695
|
params = EstimateResourcesParams.of(
|
712
696
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -718,8 +702,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
718
702
|
result = estimate_resources_required_to_process_delta(
|
719
703
|
delta=utsv_delta_with_manifest,
|
720
704
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
721
|
-
deltacat_storage=
|
722
|
-
deltacat_storage_kwargs=
|
705
|
+
deltacat_storage=metastore,
|
706
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
723
707
|
estimate_resources_params=params,
|
724
708
|
)
|
725
709
|
|
@@ -732,7 +716,7 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
732
716
|
)
|
733
717
|
|
734
718
|
def test_parquet_delta_without_inflation_when_default_v2(
|
735
|
-
self,
|
719
|
+
self, main_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
736
720
|
):
|
737
721
|
params = EstimateResourcesParams.of(
|
738
722
|
resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
|
@@ -745,8 +729,8 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
745
729
|
result = estimate_resources_required_to_process_delta(
|
746
730
|
delta=parquet_delta_with_manifest,
|
747
731
|
operation_type=OperationType.PYARROW_DOWNLOAD,
|
748
|
-
deltacat_storage=
|
749
|
-
deltacat_storage_kwargs=
|
732
|
+
deltacat_storage=metastore,
|
733
|
+
deltacat_storage_kwargs=main_deltacat_storage_kwargs,
|
750
734
|
estimate_resources_params=params,
|
751
735
|
)
|
752
736
|
|