deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -20,11 +20,11 @@ from deltacat.utils.common import ReadKwargsProvider
|
|
20
20
|
|
21
21
|
from deltacat.storage import (
|
22
22
|
DeltaType,
|
23
|
+
EntryParams,
|
23
24
|
)
|
24
25
|
from deltacat.compute.compactor_v2.compaction_session import (
|
25
26
|
compact_partition as compact_partition_v2,
|
26
27
|
)
|
27
|
-
from deltacat.storage import DeleteParameters
|
28
28
|
|
29
29
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
30
30
|
|
@@ -53,8 +53,8 @@ class BaseCompactorTestCase:
|
|
53
53
|
Args:
|
54
54
|
primary_keys: Set[str] - argument for the primary_keys parameter in compact_partition. Also needed for table/delta creation
|
55
55
|
sort_keys: List[SortKey] - argument for the sort_keys parameter in compact_partition. Also needed for table/delta creation
|
56
|
-
|
57
|
-
|
56
|
+
partition_keys: List[PartitionKey] - argument for the partition_keys parameter. Needed for table/delta creation
|
57
|
+
partition_values: List[Optional[str]] - argument for the partition_valued parameter. Needed for table/delta creation
|
58
58
|
input_deltas: List[pa.Array] - argument required for delta creation during compact_partition test setup. Actual incoming deltas expressed as a PyArrow array (https://arrow.apache.org/docs/python/generated/pyarrow.array.html)
|
59
59
|
input_deltas_delta_type: DeltaType - enumerated argument required for delta creation during compact_partition test setup. Available values are (DeltaType.APPEND, DeltaType.UPSERT, DeltaType.DELETE). DeltaType.APPEND is not supported by compactor v1 or v2
|
60
60
|
expected_terminal_compact_partition_result: pa.Table - expected PyArrow table after compaction (i.e,. the state of the table after applying all row UPDATES/DELETES/INSERTS)
|
@@ -62,7 +62,7 @@ class BaseCompactorTestCase:
|
|
62
62
|
expected_terminal_exception_message: Optional[str] - expected exception message if present.
|
63
63
|
do_create_placement_group: bool - toggles whether to create a placement group (https://docs.ray.io/en/latest/ray-core/scheduling/placement-group.html) or not
|
64
64
|
records_per_compacted_file: int - argument for the records_per_compacted_file parameter in compact_partition
|
65
|
-
|
65
|
+
hash_bucket_count: int - argument for the hash_bucket_count parameter in compact_partition
|
66
66
|
read_kwargs_provider: Optional[ReadKwargsProvider] - argument for read_kwargs_provider parameter in compact_partition. If None then no ReadKwargsProvider is provided to compact_partition_params
|
67
67
|
drop_duplicates: bool - argument for drop_duplicates parameter in compact_partition. Only recognized by compactor v2.
|
68
68
|
skip_enabled_compact_partition_drivers: List[CompactorVersion] - skip whatever enabled_compact_partition_drivers are included in this list
|
@@ -100,14 +100,7 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
|
|
100
100
|
"""
|
101
101
|
|
102
102
|
is_inplace: bool
|
103
|
-
add_late_deltas: Optional[
|
104
|
-
List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
|
105
|
-
]
|
106
|
-
|
107
|
-
|
108
|
-
@dataclass(frozen=True)
|
109
|
-
class NoRCFOutputCompactionTestCaseParams(BaseCompactorTestCase):
|
110
|
-
pass
|
103
|
+
add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType, Optional[EntryParams]]]]
|
111
104
|
|
112
105
|
|
113
106
|
def with_compactor_version_func_test_param(
|
@@ -137,7 +130,7 @@ def with_compactor_version_func_test_param(
|
|
137
130
|
|
138
131
|
|
139
132
|
INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
140
|
-
"1-incremental-pkstr-sknone-
|
133
|
+
"1-incremental-pkstr-sknone-norci": IncrementalCompactionTestCaseParams(
|
141
134
|
primary_keys={"pk_col_1"},
|
142
135
|
sort_keys=ZERO_VALUED_SORT_KEY,
|
143
136
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
@@ -163,7 +156,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
163
156
|
skip_enabled_compact_partition_drivers=None,
|
164
157
|
assert_compaction_audit=assert_compaction_audit,
|
165
158
|
),
|
166
|
-
"2-incremental-pkstr-skstr-
|
159
|
+
"2-incremental-pkstr-skstr-norci": IncrementalCompactionTestCaseParams(
|
167
160
|
primary_keys={"pk_col_1"},
|
168
161
|
sort_keys=ZERO_VALUED_SORT_KEY,
|
169
162
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
@@ -192,11 +185,11 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
192
185
|
skip_enabled_compact_partition_drivers=None,
|
193
186
|
assert_compaction_audit=assert_compaction_audit,
|
194
187
|
),
|
195
|
-
"3-incremental-pkstr-multiskstr-
|
188
|
+
"3-incremental-pkstr-multiskstr-norci": IncrementalCompactionTestCaseParams(
|
196
189
|
primary_keys={"pk_col_1"},
|
197
190
|
sort_keys=[
|
198
|
-
SortKey.of(
|
199
|
-
SortKey.of(
|
191
|
+
SortKey.of(key=["sk_col_1"]),
|
192
|
+
SortKey.of(key=["sk_col_2"]),
|
200
193
|
],
|
201
194
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
202
195
|
partition_values=["1"],
|
@@ -233,8 +226,8 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
233
226
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
234
227
|
primary_keys={"pk_col_1"},
|
235
228
|
sort_keys=[
|
236
|
-
SortKey.of(
|
237
|
-
SortKey.of(
|
229
|
+
SortKey.of(key=["sk_col_1"]),
|
230
|
+
SortKey.of(key=["sk_col_2"]),
|
238
231
|
],
|
239
232
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
240
233
|
partition_values=["1"],
|
@@ -269,7 +262,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
269
262
|
),
|
270
263
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
271
264
|
primary_keys={"pk_col_1"},
|
272
|
-
sort_keys=[SortKey.of(
|
265
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
273
266
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
274
267
|
partition_values=["1"],
|
275
268
|
input_deltas=pa.Table.from_arrays(
|
@@ -301,7 +294,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
301
294
|
),
|
302
295
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
303
296
|
primary_keys={"pk_col_1"},
|
304
|
-
sort_keys=[SortKey.of(
|
297
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
305
298
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
306
299
|
partition_values=["1"],
|
307
300
|
input_deltas=pa.Table.from_arrays(
|
@@ -333,7 +326,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
333
326
|
),
|
334
327
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
335
328
|
primary_keys={"pk_col_1"},
|
336
|
-
sort_keys=[SortKey.of(
|
329
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
337
330
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
338
331
|
partition_values=["1"],
|
339
332
|
input_deltas=pa.Table.from_arrays(
|
@@ -365,7 +358,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
365
358
|
),
|
366
359
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
367
360
|
primary_keys={"pk_col_1", "pk_col_2"},
|
368
|
-
sort_keys=[SortKey.of(
|
361
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
369
362
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
370
363
|
partition_values=["1"],
|
371
364
|
input_deltas=pa.Table.from_arrays(
|
@@ -399,7 +392,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
399
392
|
),
|
400
393
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
401
394
|
primary_keys={"pk_col_1"},
|
402
|
-
sort_keys=[SortKey.of(
|
395
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
403
396
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
404
397
|
partition_values=["1"],
|
405
398
|
input_deltas=pa.Table.from_arrays(
|
@@ -431,7 +424,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
431
424
|
),
|
432
425
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
433
426
|
primary_keys={"pk_col_1"},
|
434
|
-
sort_keys=[SortKey.of(
|
427
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
435
428
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
436
429
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
437
430
|
input_deltas=pa.Table.from_arrays(
|
@@ -463,7 +456,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
463
456
|
),
|
464
457
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
465
458
|
primary_keys={"pk_col_1"},
|
466
|
-
sort_keys=[SortKey.of(
|
459
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
467
460
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
468
461
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
469
462
|
input_deltas=pa.Table.from_arrays(
|
@@ -495,7 +488,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
495
488
|
),
|
496
489
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
497
490
|
primary_keys={"pk_col_1"},
|
498
|
-
sort_keys=[SortKey.of(
|
491
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
499
492
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
500
493
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
501
494
|
input_deltas=pa.Table.from_arrays(
|
@@ -527,7 +520,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
527
520
|
),
|
528
521
|
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
529
522
|
primary_keys={"pk_col_1"},
|
530
|
-
sort_keys=[SortKey.of(
|
523
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
531
524
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
532
525
|
partition_values=["1"],
|
533
526
|
input_deltas=pa.Table.from_arrays(
|
@@ -571,7 +564,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
571
564
|
),
|
572
565
|
"14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
|
573
566
|
primary_keys={"pk_col_1"},
|
574
|
-
sort_keys=[SortKey.of(
|
567
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
575
568
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
576
569
|
partition_values=["1"],
|
577
570
|
input_deltas=pa.Table.from_arrays(
|
@@ -603,7 +596,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
603
596
|
),
|
604
597
|
"15-incremental-empty-input-with-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
605
598
|
primary_keys={"pk_col_1"},
|
606
|
-
sort_keys=[SortKey.of(
|
599
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
607
600
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
608
601
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
609
602
|
input_deltas=pa.Table.from_arrays(
|
@@ -5,7 +5,7 @@ from deltacat.compute.compactor.steps.repartition import repartition_range
|
|
5
5
|
from deltacat.types.media import ContentType
|
6
6
|
from deltacat.compute.compactor.model.repartition_result import RepartitionResult
|
7
7
|
from deltacat.storage import (
|
8
|
-
|
8
|
+
Partition,
|
9
9
|
)
|
10
10
|
|
11
11
|
"""
|
@@ -46,10 +46,10 @@ class TestRepartitionRange(unittest.TestCase):
|
|
46
46
|
}
|
47
47
|
),
|
48
48
|
]
|
49
|
-
self.destination_partition:
|
49
|
+
self.destination_partition: Partition = MagicMock()
|
50
50
|
self.repartition_args = {"column": "last_updated", "ranges": [1678665487112747]}
|
51
51
|
self.max_records_per_output_file = 2
|
52
|
-
self.
|
52
|
+
self.table_writer_kwargs = {}
|
53
53
|
self.repartitioned_file_content_type = ContentType.PARQUET
|
54
54
|
self.deltacat_storage = MagicMock()
|
55
55
|
self.deltacat_storage_kwargs = MagicMock()
|
@@ -60,7 +60,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
60
60
|
self.destination_partition,
|
61
61
|
self.repartition_args,
|
62
62
|
self.max_records_per_output_file,
|
63
|
-
self.
|
63
|
+
self.table_writer_kwargs,
|
64
64
|
self.repartitioned_file_content_type,
|
65
65
|
self.deltacat_storage,
|
66
66
|
self.deltacat_storage_kwargs,
|
@@ -87,7 +87,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
87
87
|
self.destination_partition,
|
88
88
|
self.repartition_args,
|
89
89
|
self.max_records_per_output_file,
|
90
|
-
self.
|
90
|
+
self.table_writer_kwargs,
|
91
91
|
self.repartitioned_file_content_type,
|
92
92
|
self.deltacat_storage,
|
93
93
|
self.deltacat_storage_kwargs,
|
@@ -101,7 +101,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
101
101
|
self.destination_partition,
|
102
102
|
self.repartition_args,
|
103
103
|
self.max_records_per_output_file,
|
104
|
-
self.
|
104
|
+
self.table_writer_kwargs,
|
105
105
|
self.repartitioned_file_content_type,
|
106
106
|
self.deltacat_storage,
|
107
107
|
self.deltacat_storage_kwargs,
|
@@ -114,7 +114,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
114
114
|
self.destination_partition,
|
115
115
|
self.repartition_args,
|
116
116
|
self.max_records_per_output_file,
|
117
|
-
self.
|
117
|
+
self.table_writer_kwargs,
|
118
118
|
self.repartitioned_file_content_type,
|
119
119
|
self.deltacat_storage,
|
120
120
|
self.deltacat_storage_kwargs,
|
@@ -128,7 +128,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
128
128
|
self.destination_partition,
|
129
129
|
self.repartition_args,
|
130
130
|
self.max_records_per_output_file,
|
131
|
-
self.
|
131
|
+
self.table_writer_kwargs,
|
132
132
|
self.repartitioned_file_content_type,
|
133
133
|
self.deltacat_storage,
|
134
134
|
self.deltacat_storage_kwargs,
|
@@ -143,7 +143,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
143
143
|
self.destination_partition,
|
144
144
|
self.repartition_args,
|
145
145
|
self.max_records_per_output_file,
|
146
|
-
self.
|
146
|
+
self.table_writer_kwargs,
|
147
147
|
self.repartitioned_file_content_type,
|
148
148
|
self.deltacat_storage,
|
149
149
|
self.deltacat_storage_kwargs,
|
@@ -158,7 +158,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
158
158
|
self.destination_partition,
|
159
159
|
self.repartition_args,
|
160
160
|
self.max_records_per_output_file,
|
161
|
-
self.
|
161
|
+
self.table_writer_kwargs,
|
162
162
|
self.repartitioned_file_content_type,
|
163
163
|
self.deltacat_storage,
|
164
164
|
self.deltacat_storage_kwargs,
|
@@ -175,7 +175,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
175
175
|
self.destination_partition,
|
176
176
|
self.repartition_args,
|
177
177
|
self.max_records_per_output_file,
|
178
|
-
self.
|
178
|
+
self.table_writer_kwargs,
|
179
179
|
self.repartitioned_file_content_type,
|
180
180
|
self.deltacat_storage,
|
181
181
|
self.deltacat_storage_kwargs,
|
@@ -189,7 +189,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
189
189
|
self.destination_partition,
|
190
190
|
self.repartition_args,
|
191
191
|
self.max_records_per_output_file,
|
192
|
-
self.
|
192
|
+
self.table_writer_kwargs,
|
193
193
|
self.repartitioned_file_content_type,
|
194
194
|
self.deltacat_storage,
|
195
195
|
self.deltacat_storage_kwargs,
|
@@ -206,7 +206,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
206
206
|
self.destination_partition,
|
207
207
|
self.repartition_args,
|
208
208
|
self.max_records_per_output_file,
|
209
|
-
self.
|
209
|
+
self.table_writer_kwargs,
|
210
210
|
self.repartitioned_file_content_type,
|
211
211
|
self.deltacat_storage,
|
212
212
|
),
|
@@ -233,7 +233,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
233
233
|
self.destination_partition,
|
234
234
|
self.repartition_args,
|
235
235
|
self.max_records_per_output_file,
|
236
|
-
self.
|
236
|
+
self.table_writer_kwargs,
|
237
237
|
self.repartitioned_file_content_type,
|
238
238
|
self.deltacat_storage,
|
239
239
|
self.deltacat_storage_kwargs,
|
@@ -1,133 +1,135 @@
|
|
1
|
-
import
|
1
|
+
import pytest
|
2
2
|
from unittest import mock
|
3
|
-
from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
|
4
|
-
from typing import Any, Dict
|
5
|
-
|
6
|
-
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
7
|
-
"db_file_path",
|
8
|
-
"deltacat/tests/local_deltacat_storage/db_test.sqlite",
|
9
|
-
)
|
10
3
|
|
4
|
+
from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
|
11
5
|
|
12
|
-
class TestFitInputDeltas(unittest.TestCase):
|
13
|
-
@classmethod
|
14
|
-
def setUpClass(cls):
|
15
|
-
cls.module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
|
16
|
-
cls.module_patcher.start()
|
17
6
|
|
18
|
-
|
19
|
-
|
7
|
+
@pytest.fixture(scope="module", autouse=True)
|
8
|
+
def mock_ray():
|
9
|
+
"""Mock ray module for all tests in this module"""
|
10
|
+
module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
|
11
|
+
module_patcher.start()
|
12
|
+
yield
|
13
|
+
module_patcher.stop()
|
14
|
+
|
15
|
+
|
16
|
+
@pytest.fixture
|
17
|
+
def compaction_audit():
|
18
|
+
"""Fixture for CompactionSessionAuditInfo"""
|
19
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
20
|
+
CompactionSessionAuditInfo,
|
21
|
+
)
|
22
|
+
|
23
|
+
return CompactionSessionAuditInfo("1.0", "2.3", "test")
|
24
|
+
|
25
|
+
|
26
|
+
def test_sanity(main_deltacat_storage_kwargs, compaction_audit):
|
27
|
+
from deltacat.compute.compactor.utils import io
|
28
|
+
from deltacat.storage import metastore
|
29
|
+
|
30
|
+
(
|
31
|
+
delta_list,
|
32
|
+
hash_bucket_count,
|
33
|
+
high_watermark,
|
34
|
+
require_multiple_rounds,
|
35
|
+
) = io.fit_input_deltas(
|
36
|
+
[TEST_UPSERT_DELTA],
|
37
|
+
{"CPU": 1, "memory": 20000000},
|
38
|
+
compaction_audit,
|
39
|
+
None,
|
40
|
+
metastore,
|
41
|
+
main_deltacat_storage_kwargs,
|
42
|
+
)
|
43
|
+
|
44
|
+
assert hash_bucket_count is not None
|
45
|
+
assert len(delta_list) == 1
|
46
|
+
assert high_watermark is not None
|
47
|
+
assert require_multiple_rounds is False
|
48
|
+
assert compaction_audit.hash_bucket_count is not None
|
49
|
+
assert compaction_audit.input_file_count is not None
|
50
|
+
assert compaction_audit.input_size_bytes is not None
|
51
|
+
assert compaction_audit.total_cluster_memory_bytes is not None
|
52
|
+
|
53
|
+
|
54
|
+
def test_when_hash_bucket_count_overridden(
|
55
|
+
main_deltacat_storage_kwargs, compaction_audit
|
56
|
+
):
|
57
|
+
from deltacat.compute.compactor.utils import io
|
58
|
+
from deltacat.storage import metastore
|
59
|
+
|
60
|
+
(
|
61
|
+
delta_list,
|
62
|
+
hash_bucket_count,
|
63
|
+
high_watermark,
|
64
|
+
require_multiple_rounds,
|
65
|
+
) = io.fit_input_deltas(
|
66
|
+
[TEST_UPSERT_DELTA],
|
67
|
+
{"CPU": 1, "memory": 20000000},
|
68
|
+
compaction_audit,
|
69
|
+
20,
|
70
|
+
metastore,
|
71
|
+
main_deltacat_storage_kwargs,
|
72
|
+
)
|
73
|
+
|
74
|
+
assert hash_bucket_count == 20
|
75
|
+
assert len(delta_list) == 1
|
76
|
+
assert high_watermark is not None
|
77
|
+
assert require_multiple_rounds is False
|
78
|
+
|
79
|
+
|
80
|
+
def test_when_not_enough_memory_splits_manifest_entries(
|
81
|
+
main_deltacat_storage_kwargs, compaction_audit
|
82
|
+
):
|
83
|
+
from deltacat.compute.compactor.utils import io
|
84
|
+
from deltacat.storage import metastore
|
85
|
+
|
86
|
+
(
|
87
|
+
delta_list,
|
88
|
+
hash_bucket_count,
|
89
|
+
high_watermark,
|
90
|
+
require_multiple_rounds,
|
91
|
+
) = io.fit_input_deltas(
|
92
|
+
[TEST_UPSERT_DELTA],
|
93
|
+
{"CPU": 2, "memory": 10},
|
94
|
+
compaction_audit,
|
95
|
+
20,
|
96
|
+
metastore,
|
97
|
+
main_deltacat_storage_kwargs,
|
98
|
+
)
|
99
|
+
|
100
|
+
assert hash_bucket_count is not None
|
101
|
+
assert len(delta_list) == 2
|
102
|
+
assert high_watermark is not None
|
103
|
+
assert require_multiple_rounds is False
|
104
|
+
|
105
|
+
|
106
|
+
def test_when_no_input_deltas(main_deltacat_storage_kwargs, compaction_audit):
|
107
|
+
from deltacat.compute.compactor.utils import io
|
108
|
+
from deltacat.storage import metastore
|
109
|
+
|
110
|
+
with pytest.raises(AssertionError):
|
111
|
+
io.fit_input_deltas(
|
112
|
+
[],
|
113
|
+
{"CPU": 100, "memory": 20000.0},
|
114
|
+
compaction_audit,
|
115
|
+
None,
|
116
|
+
metastore,
|
117
|
+
main_deltacat_storage_kwargs,
|
20
118
|
)
|
21
119
|
|
22
|
-
cls.kwargs_for_local_deltacat_storage: Dict[str, Any] = {
|
23
|
-
DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
|
24
|
-
}
|
25
|
-
|
26
|
-
cls.COMPACTION_AUDIT = CompactionSessionAuditInfo("1.0", "2.3", "test")
|
27
|
-
|
28
|
-
super().setUpClass()
|
29
120
|
|
30
|
-
|
31
|
-
|
32
|
-
|
121
|
+
def test_when_cpu_resources_is_not_passed(
|
122
|
+
main_deltacat_storage_kwargs, compaction_audit
|
123
|
+
):
|
124
|
+
from deltacat.compute.compactor.utils import io
|
125
|
+
from deltacat.storage import metastore
|
33
126
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
delta_list,
|
40
|
-
hash_bucket_count,
|
41
|
-
high_watermark,
|
42
|
-
require_multiple_rounds,
|
43
|
-
) = io.fit_input_deltas(
|
44
|
-
[TEST_UPSERT_DELTA],
|
45
|
-
{"CPU": 1, "memory": 20000000},
|
46
|
-
self.COMPACTION_AUDIT,
|
127
|
+
with pytest.raises(KeyError):
|
128
|
+
io.fit_input_deltas(
|
129
|
+
[],
|
130
|
+
{},
|
131
|
+
compaction_audit,
|
47
132
|
None,
|
48
|
-
|
49
|
-
|
50
|
-
)
|
51
|
-
|
52
|
-
self.assertIsNotNone(hash_bucket_count)
|
53
|
-
self.assertTrue(1, len(delta_list))
|
54
|
-
self.assertIsNotNone(high_watermark)
|
55
|
-
self.assertFalse(require_multiple_rounds)
|
56
|
-
self.assertIsNotNone(hash_bucket_count, self.COMPACTION_AUDIT.hash_bucket_count)
|
57
|
-
self.assertIsNotNone(self.COMPACTION_AUDIT.input_file_count)
|
58
|
-
self.assertIsNotNone(self.COMPACTION_AUDIT.input_size_bytes)
|
59
|
-
self.assertIsNotNone(self.COMPACTION_AUDIT.total_cluster_memory_bytes)
|
60
|
-
|
61
|
-
def test_when_hash_bucket_count_overridden(self):
|
62
|
-
from deltacat.compute.compactor.utils import io
|
63
|
-
import deltacat.tests.local_deltacat_storage as ds
|
64
|
-
|
65
|
-
(
|
66
|
-
delta_list,
|
67
|
-
hash_bucket_count,
|
68
|
-
high_watermark,
|
69
|
-
require_multiple_rounds,
|
70
|
-
) = io.fit_input_deltas(
|
71
|
-
[TEST_UPSERT_DELTA],
|
72
|
-
{"CPU": 1, "memory": 20000000},
|
73
|
-
self.COMPACTION_AUDIT,
|
74
|
-
20,
|
75
|
-
ds,
|
76
|
-
self.kwargs_for_local_deltacat_storage,
|
133
|
+
metastore,
|
134
|
+
main_deltacat_storage_kwargs,
|
77
135
|
)
|
78
|
-
|
79
|
-
self.assertEqual(20, hash_bucket_count)
|
80
|
-
self.assertEqual(1, len(delta_list))
|
81
|
-
self.assertIsNotNone(high_watermark)
|
82
|
-
self.assertFalse(require_multiple_rounds)
|
83
|
-
|
84
|
-
def test_when_not_enough_memory_splits_manifest_entries(self):
|
85
|
-
from deltacat.compute.compactor.utils import io
|
86
|
-
import deltacat.tests.local_deltacat_storage as ds
|
87
|
-
|
88
|
-
(
|
89
|
-
delta_list,
|
90
|
-
hash_bucket_count,
|
91
|
-
high_watermark,
|
92
|
-
require_multiple_rounds,
|
93
|
-
) = io.fit_input_deltas(
|
94
|
-
[TEST_UPSERT_DELTA],
|
95
|
-
{"CPU": 2, "memory": 10},
|
96
|
-
self.COMPACTION_AUDIT,
|
97
|
-
20,
|
98
|
-
ds,
|
99
|
-
self.kwargs_for_local_deltacat_storage,
|
100
|
-
)
|
101
|
-
|
102
|
-
self.assertIsNotNone(hash_bucket_count)
|
103
|
-
self.assertTrue(2, len(delta_list))
|
104
|
-
self.assertIsNotNone(high_watermark)
|
105
|
-
self.assertFalse(require_multiple_rounds)
|
106
|
-
|
107
|
-
def test_when_no_input_deltas(self):
|
108
|
-
from deltacat.compute.compactor.utils import io
|
109
|
-
import deltacat.tests.local_deltacat_storage as ds
|
110
|
-
|
111
|
-
with self.assertRaises(AssertionError):
|
112
|
-
io.fit_input_deltas(
|
113
|
-
[],
|
114
|
-
{"CPU": 100, "memory": 20000.0},
|
115
|
-
self.COMPACTION_AUDIT,
|
116
|
-
None,
|
117
|
-
ds,
|
118
|
-
self.kwargs_for_local_deltacat_storage,
|
119
|
-
)
|
120
|
-
|
121
|
-
def test_when_cpu_resources_is_not_passed(self):
|
122
|
-
from deltacat.compute.compactor.utils import io
|
123
|
-
import deltacat.tests.local_deltacat_storage as ds
|
124
|
-
|
125
|
-
with self.assertRaises(KeyError):
|
126
|
-
io.fit_input_deltas(
|
127
|
-
[],
|
128
|
-
{},
|
129
|
-
self.COMPACTION_AUDIT,
|
130
|
-
None,
|
131
|
-
ds,
|
132
|
-
self.kwargs_for_local_deltacat_storage,
|
133
|
-
)
|