deltacat 1.1.11__tar.gz → 1.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deltacat-1.1.11/deltacat.egg-info → deltacat-1.1.13}/PKG-INFO +1 -1
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/__init__.py +1 -1
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/clients.py +1 -1
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/compact_partition_params.py +5 -0
- deltacat-1.1.13/deltacat/compute/compactor_v2/compaction_session.py +220 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/task_options.py +0 -1
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +1 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +32 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compact_partition_test_cases.py +19 -1
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_compact_partition_incremental.py +13 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_compact_partition_rebase.py +34 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +12 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_util_common.py +101 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_daft.py +38 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_pyarrow.py +63 -0
- {deltacat-1.1.11 → deltacat-1.1.13/deltacat.egg-info}/PKG-INFO +1 -1
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat.egg-info/requires.txt +2 -2
- {deltacat-1.1.11 → deltacat-1.1.13}/setup.py +2 -2
- deltacat-1.1.11/deltacat/compute/compactor_v2/compaction_session.py +0 -696
- {deltacat-1.1.11 → deltacat-1.1.13}/LICENSE +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/MANIFEST.in +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/README.md +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/constants.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/redshift/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/redshift/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/redshift/model/manifest.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/aws/s3u.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/benchmarking/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/benchmarking/conftest.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/delegate.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/interface.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/model/catalog.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/catalog/model/table_definition.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/compaction_session.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/compactor_version.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/materialize_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/repartition_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/table_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/repartition_session.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/steps/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/steps/dedupe.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/steps/materialize.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/steps/repartition.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/utils/io.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/utils/sort_key.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/utils/system_columns.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/constants.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/io.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/merge_on_read/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/merge_on_read/daft.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/config/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/meta_stats.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/model/partition_stats_dict.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/stats.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/utils/constants.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/utils/io.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/metastats/utils/ray_utils.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/basic.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/models/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/models/delta_stats.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/models/stats_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/types.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/utils/intervals.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/utils/io.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/stats/utils/manifest_stats_file.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/constants.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/exceptions.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/aws/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/aws/redshift/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/dataset.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/file_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/memcached_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/ray_plasma_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/read_api.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/redis_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/io/s3_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/logs.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/interface.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/delete_parameters.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/delta.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/list_result.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/locator.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/namespace.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/partition.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/partition_spec.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/sort_key.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/stream.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/table.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/table_version.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/transform.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/storage/model/types.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/aws/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/aws/test_clients.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/aws/test_s3u.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/catalog/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_util_constant.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/test_file_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/test_memcached_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/test_redis_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/io/test_s3_object_store.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/stats/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/stats/test_intervals.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_exceptions.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_logs.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_utils/constants.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_utils/pyarrow.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_utils/storage.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/test_utils/utils.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/data/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_cloudpickle.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_metrics.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_placement.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/tests/utils/test_resources.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/types/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/types/media.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/types/partial_download.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/types/tables.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/arguments.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/cloudpickle.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/common.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/daft.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/metrics.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/numpy.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/pandas.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/performance.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/placement.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/pyarrow.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/ray_utils/__init__.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/ray_utils/collections.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/ray_utils/concurrency.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/ray_utils/dataset.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/ray_utils/performance.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/ray_utils/runtime.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/resources.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/s3fs.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat/utils/schema.py +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat.egg-info/SOURCES.txt +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat.egg-info/dependency_links.txt +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/deltacat.egg-info/top_level.txt +0 -0
- {deltacat-1.1.11 → deltacat-1.1.13}/setup.cfg +0 -0
@@ -42,7 +42,7 @@ RETRYABLE_HTTP_STATUS_CODES = [
|
|
42
42
|
|
43
43
|
boto_retry_wrapper = Retrying(
|
44
44
|
wait=wait_random_exponential(multiplier=1, max=10),
|
45
|
-
stop=stop_after_delay(60 *
|
45
|
+
stop=stop_after_delay(60 * 10),
|
46
46
|
# CredentialRetrievalError can still be thrown due to throttling, even if IMDS health checks succeed.
|
47
47
|
retry=retry_if_exception_type(CredentialRetrievalError),
|
48
48
|
)
|
{deltacat-1.1.11 → deltacat-1.1.13}/deltacat/compute/compactor/model/compact_partition_params.py
RENAMED
@@ -185,6 +185,11 @@ class CompactPartitionParams(dict):
|
|
185
185
|
|
186
186
|
@property
|
187
187
|
def task_max_parallelism(self) -> int:
|
188
|
+
if self.pg_config:
|
189
|
+
cluster_resources = self.pg_config.resource
|
190
|
+
cluster_cpus = cluster_resources["CPU"]
|
191
|
+
self.task_max_parallelism = cluster_cpus
|
192
|
+
self["task_max_parallelism"] = self.task_max_parallelism
|
188
193
|
return self["task_max_parallelism"]
|
189
194
|
|
190
195
|
@task_max_parallelism.setter
|
@@ -0,0 +1,220 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import importlib
|
3
|
+
from contextlib import nullcontext
|
4
|
+
import logging
|
5
|
+
import time
|
6
|
+
import ray
|
7
|
+
|
8
|
+
import deltacat
|
9
|
+
from deltacat.compute.compactor import (
|
10
|
+
PyArrowWriteResult,
|
11
|
+
RoundCompletionInfo,
|
12
|
+
)
|
13
|
+
from deltacat import logs
|
14
|
+
from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
|
15
|
+
ExecutionCompactionResult,
|
16
|
+
)
|
17
|
+
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
18
|
+
from deltacat.compute.compactor.utils import round_completion_file as rcf
|
19
|
+
from deltacat.compute.compactor import DeltaAnnotated
|
20
|
+
from deltacat.compute.compactor_v2.deletes.delete_strategy import (
|
21
|
+
DeleteStrategy,
|
22
|
+
)
|
23
|
+
from deltacat.compute.compactor.model.materialize_result import MaterializeResult
|
24
|
+
from deltacat.compute.compactor_v2.model.merge_result import MergeResult
|
25
|
+
from deltacat.compute.compactor_v2.deletes.delete_file_envelope import (
|
26
|
+
DeleteFileEnvelope,
|
27
|
+
)
|
28
|
+
from deltacat.storage import (
|
29
|
+
Delta,
|
30
|
+
DeltaLocator,
|
31
|
+
Manifest,
|
32
|
+
Partition,
|
33
|
+
)
|
34
|
+
from deltacat.compute.compactor.model.compact_partition_params import (
|
35
|
+
CompactPartitionParams,
|
36
|
+
)
|
37
|
+
from deltacat.utils.resources import (
|
38
|
+
get_current_process_peak_memory_usage_in_bytes,
|
39
|
+
)
|
40
|
+
from deltacat.compute.compactor_v2.private.compaction_utils import (
|
41
|
+
_fetch_compaction_metadata,
|
42
|
+
_build_uniform_deltas,
|
43
|
+
_run_hash_and_merge,
|
44
|
+
_process_merge_results,
|
45
|
+
_upload_compaction_audit,
|
46
|
+
_write_new_round_completion_file,
|
47
|
+
_commit_compaction_result,
|
48
|
+
)
|
49
|
+
from deltacat.utils.metrics import metrics
|
50
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
51
|
+
CompactionSessionAuditInfo,
|
52
|
+
)
|
53
|
+
|
54
|
+
from typing import List, Optional
|
55
|
+
from deltacat.compute.compactor_v2.utils import io
|
56
|
+
from deltacat.exceptions import categorize_errors
|
57
|
+
from deltacat.compute.compactor_v2.constants import COMPACT_PARTITION_METRIC_PREFIX
|
58
|
+
|
59
|
+
if importlib.util.find_spec("memray"):
|
60
|
+
import memray
|
61
|
+
|
62
|
+
|
63
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
64
|
+
|
65
|
+
|
66
|
+
@metrics(prefix=COMPACT_PARTITION_METRIC_PREFIX)
|
67
|
+
@categorize_errors
|
68
|
+
def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
|
69
|
+
assert (
|
70
|
+
params.hash_bucket_count is not None and params.hash_bucket_count >= 1
|
71
|
+
), "hash_bucket_count is a required arg for compactor v2"
|
72
|
+
|
73
|
+
with memray.Tracker(
|
74
|
+
"compaction_partition.bin"
|
75
|
+
) if params.enable_profiler else nullcontext():
|
76
|
+
execute_compaction_result: ExecutionCompactionResult = _execute_compaction(
|
77
|
+
params,
|
78
|
+
**kwargs,
|
79
|
+
)
|
80
|
+
_commit_compaction_result(params, execute_compaction_result)
|
81
|
+
return execute_compaction_result.round_completion_file_s3_url
|
82
|
+
|
83
|
+
|
84
|
+
def _execute_compaction(
|
85
|
+
params: CompactPartitionParams, **kwargs
|
86
|
+
) -> ExecutionCompactionResult:
|
87
|
+
compaction_start_time: float = time.monotonic()
|
88
|
+
# Fetch round completion info for previously compacted partition, if it exists
|
89
|
+
fetch_compaction_metadata_result: tuple[
|
90
|
+
Optional[Manifest], Optional[RoundCompletionInfo]
|
91
|
+
] = _fetch_compaction_metadata(params)
|
92
|
+
(
|
93
|
+
previous_compacted_delta_manifest,
|
94
|
+
round_completion_info,
|
95
|
+
) = fetch_compaction_metadata_result
|
96
|
+
rcf_source_partition_locator: rcf.PartitionLocator = (
|
97
|
+
params.rebase_source_partition_locator or params.source_partition_locator
|
98
|
+
)
|
99
|
+
|
100
|
+
base_audit_url: str = rcf_source_partition_locator.path(
|
101
|
+
f"s3://{params.compaction_artifact_s3_bucket}/compaction-audit"
|
102
|
+
)
|
103
|
+
audit_url: str = f"{base_audit_url}.json"
|
104
|
+
logger.info(f"Compaction audit will be written to {audit_url}")
|
105
|
+
compaction_audit: CompactionSessionAuditInfo = (
|
106
|
+
CompactionSessionAuditInfo(deltacat.__version__, ray.__version__, audit_url)
|
107
|
+
.set_hash_bucket_count(params.hash_bucket_count)
|
108
|
+
.set_compactor_version(CompactorVersion.V2.value)
|
109
|
+
)
|
110
|
+
|
111
|
+
if params.pg_config:
|
112
|
+
logger.info(
|
113
|
+
"pg_config specified. Tasks will be scheduled in a placement group."
|
114
|
+
)
|
115
|
+
cluster_resources = params.pg_config.resource
|
116
|
+
cluster_memory = cluster_resources["memory"]
|
117
|
+
compaction_audit.set_total_cluster_memory_bytes(cluster_memory)
|
118
|
+
high_watermark = (
|
119
|
+
round_completion_info.high_watermark if round_completion_info else None
|
120
|
+
)
|
121
|
+
audit_url = compaction_audit.audit_url if compaction_audit else None
|
122
|
+
# discover and build uniform deltas
|
123
|
+
delta_discovery_start = time.monotonic()
|
124
|
+
input_deltas: List[Delta] = io.discover_deltas(
|
125
|
+
params.source_partition_locator,
|
126
|
+
params.last_stream_position_to_compact,
|
127
|
+
params.rebase_source_partition_locator,
|
128
|
+
params.rebase_source_partition_high_watermark,
|
129
|
+
high_watermark,
|
130
|
+
params.deltacat_storage,
|
131
|
+
params.deltacat_storage_kwargs,
|
132
|
+
params.list_deltas_kwargs,
|
133
|
+
)
|
134
|
+
if not input_deltas:
|
135
|
+
logger.info("No input deltas found to compact.")
|
136
|
+
return ExecutionCompactionResult(None, None, None, False)
|
137
|
+
build_uniform_deltas_result: tuple[
|
138
|
+
List[DeltaAnnotated], DeleteStrategy, List[DeleteFileEnvelope], Partition
|
139
|
+
] = _build_uniform_deltas(
|
140
|
+
params, compaction_audit, input_deltas, delta_discovery_start
|
141
|
+
)
|
142
|
+
(
|
143
|
+
uniform_deltas,
|
144
|
+
delete_strategy,
|
145
|
+
delete_file_envelopes,
|
146
|
+
) = build_uniform_deltas_result
|
147
|
+
|
148
|
+
# run merge
|
149
|
+
_run_hash_and_merge_result: tuple[
|
150
|
+
Optional[List[MergeResult]],
|
151
|
+
np.float64,
|
152
|
+
np.float64,
|
153
|
+
Partition,
|
154
|
+
] = _run_hash_and_merge(
|
155
|
+
params,
|
156
|
+
uniform_deltas,
|
157
|
+
round_completion_info,
|
158
|
+
delete_strategy,
|
159
|
+
delete_file_envelopes,
|
160
|
+
compaction_audit,
|
161
|
+
previous_compacted_delta_manifest,
|
162
|
+
)
|
163
|
+
(
|
164
|
+
merge_results,
|
165
|
+
telemetry_time_hb,
|
166
|
+
telemetry_time_merge,
|
167
|
+
compacted_partition,
|
168
|
+
) = _run_hash_and_merge_result
|
169
|
+
# process merge results
|
170
|
+
process_merge_results: tuple[
|
171
|
+
Delta, list[MaterializeResult], dict
|
172
|
+
] = _process_merge_results(params, merge_results, compaction_audit)
|
173
|
+
merged_delta, mat_results, hb_id_to_entry_indices_range = process_merge_results
|
174
|
+
# Record information, logging, and return ExecutionCompactionResult
|
175
|
+
record_info_msg: str = f" Materialized records: {merged_delta.meta.record_count}"
|
176
|
+
logger.info(record_info_msg)
|
177
|
+
compacted_delta: Delta = params.deltacat_storage.commit_delta(
|
178
|
+
merged_delta,
|
179
|
+
properties=kwargs.get("properties", {}),
|
180
|
+
**params.deltacat_storage_kwargs,
|
181
|
+
)
|
182
|
+
|
183
|
+
logger.info(f"Committed compacted delta: {compacted_delta}")
|
184
|
+
compaction_end_time: float = time.monotonic()
|
185
|
+
compaction_audit.set_compaction_time_in_seconds(
|
186
|
+
compaction_end_time - compaction_start_time
|
187
|
+
)
|
188
|
+
new_compacted_delta_locator: DeltaLocator = DeltaLocator.of(
|
189
|
+
compacted_partition.locator,
|
190
|
+
compacted_delta.stream_position,
|
191
|
+
)
|
192
|
+
pyarrow_write_result: PyArrowWriteResult = PyArrowWriteResult.union(
|
193
|
+
[m.pyarrow_write_result for m in mat_results]
|
194
|
+
)
|
195
|
+
|
196
|
+
session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
|
197
|
+
compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
|
198
|
+
session_peak_memory
|
199
|
+
)
|
200
|
+
|
201
|
+
compaction_audit.save_round_completion_stats(
|
202
|
+
mat_results, telemetry_time_hb + telemetry_time_merge
|
203
|
+
)
|
204
|
+
|
205
|
+
_upload_compaction_audit(
|
206
|
+
params,
|
207
|
+
compaction_audit,
|
208
|
+
round_completion_info,
|
209
|
+
)
|
210
|
+
compaction_result: ExecutionCompactionResult = _write_new_round_completion_file(
|
211
|
+
params,
|
212
|
+
compaction_audit,
|
213
|
+
compacted_partition,
|
214
|
+
audit_url,
|
215
|
+
hb_id_to_entry_indices_range,
|
216
|
+
rcf_source_partition_locator,
|
217
|
+
new_compacted_delta_locator,
|
218
|
+
pyarrow_write_result,
|
219
|
+
)
|
220
|
+
return compaction_result
|
@@ -145,7 +145,6 @@ def hash_bucket_resource_options_provider(
|
|
145
145
|
size_bytes = 0.0
|
146
146
|
num_rows = 0
|
147
147
|
total_pk_size = 0
|
148
|
-
|
149
148
|
if not item.manifest or not item.manifest.entries:
|
150
149
|
logger.debug(
|
151
150
|
f"[Hash bucket task {index}]: No manifest entries, skipping memory allocation calculation"
|
@@ -4,6 +4,8 @@ from deltacat.tests.compute.test_util_common import (
|
|
4
4
|
offer_iso8601_timestamp_list,
|
5
5
|
PartitionKey,
|
6
6
|
PartitionKeyType,
|
7
|
+
assert_compaction_audit,
|
8
|
+
assert_compaction_audit_no_hash_bucket,
|
7
9
|
)
|
8
10
|
from deltacat.tests.compute.test_util_constant import (
|
9
11
|
DEFAULT_MAX_RECORDS_PER_FILE,
|
@@ -115,6 +117,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
115
117
|
read_kwargs_provider=None,
|
116
118
|
drop_duplicates=True,
|
117
119
|
skip_enabled_compact_partition_drivers=None,
|
120
|
+
assert_compaction_audit=assert_compaction_audit,
|
118
121
|
),
|
119
122
|
"2-rebase-then-incremental-pk-multi": RebaseThenIncrementalCompactionTestCaseParams(
|
120
123
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -195,6 +198,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
195
198
|
read_kwargs_provider=None,
|
196
199
|
drop_duplicates=True,
|
197
200
|
skip_enabled_compact_partition_drivers=None,
|
201
|
+
assert_compaction_audit=assert_compaction_audit,
|
198
202
|
),
|
199
203
|
"3-rebase-then-incremental-no-sk-no-partition-key": RebaseThenIncrementalCompactionTestCaseParams(
|
200
204
|
primary_keys={"pk_col_1"},
|
@@ -244,6 +248,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
244
248
|
read_kwargs_provider=None,
|
245
249
|
drop_duplicates=True,
|
246
250
|
skip_enabled_compact_partition_drivers=None,
|
251
|
+
assert_compaction_audit=assert_compaction_audit,
|
247
252
|
),
|
248
253
|
"4-rebase-then-incremental-partial-deltas-on-incremental-deltas": RebaseThenIncrementalCompactionTestCaseParams(
|
249
254
|
primary_keys={"pk_col_1"},
|
@@ -293,6 +298,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
293
298
|
read_kwargs_provider=None,
|
294
299
|
drop_duplicates=True,
|
295
300
|
skip_enabled_compact_partition_drivers=None,
|
301
|
+
assert_compaction_audit=assert_compaction_audit,
|
296
302
|
),
|
297
303
|
"5-rebase-then-incremental-partial-deltas-on-incremental-deltas-2": RebaseThenIncrementalCompactionTestCaseParams(
|
298
304
|
primary_keys={"pk_col_1"},
|
@@ -348,6 +354,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
348
354
|
read_kwargs_provider=None,
|
349
355
|
drop_duplicates=True,
|
350
356
|
skip_enabled_compact_partition_drivers=None,
|
357
|
+
assert_compaction_audit=assert_compaction_audit,
|
351
358
|
),
|
352
359
|
"6-rebase-then-incremental-hash-bucket-GT-records-per-compacted-file-v2-only": RebaseThenIncrementalCompactionTestCaseParams(
|
353
360
|
primary_keys={"pk_col_1"},
|
@@ -408,6 +415,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
408
415
|
read_kwargs_provider=None,
|
409
416
|
drop_duplicates=True,
|
410
417
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
418
|
+
assert_compaction_audit=assert_compaction_audit,
|
411
419
|
),
|
412
420
|
"7-rebase-then-incremental-no-pk-compactor-v2-only": RebaseThenIncrementalCompactionTestCaseParams(
|
413
421
|
primary_keys=ZERO_VALUED_PRIMARY_KEY,
|
@@ -459,6 +467,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
459
467
|
read_kwargs_provider=None,
|
460
468
|
drop_duplicates=True,
|
461
469
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
470
|
+
assert_compaction_audit=assert_compaction_audit,
|
462
471
|
),
|
463
472
|
"8-rebase-then-incremental-empty-csv-delta-case": RebaseThenIncrementalCompactionTestCaseParams(
|
464
473
|
primary_keys={"pk_col_1"},
|
@@ -515,6 +524,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
515
524
|
read_kwargs_provider=None,
|
516
525
|
drop_duplicates=True,
|
517
526
|
skip_enabled_compact_partition_drivers=None,
|
527
|
+
assert_compaction_audit=None,
|
518
528
|
),
|
519
529
|
"9-rebase-then-incremental-single-hash-bucket": RebaseThenIncrementalCompactionTestCaseParams(
|
520
530
|
primary_keys={"pk_col_1"},
|
@@ -575,6 +585,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
575
585
|
read_kwargs_provider=None,
|
576
586
|
drop_duplicates=True,
|
577
587
|
skip_enabled_compact_partition_drivers=None,
|
588
|
+
assert_compaction_audit=None,
|
578
589
|
),
|
579
590
|
"10-rebase-then-incremental-drop-duplicates-false-on-incremental-v2-only": RebaseThenIncrementalCompactionTestCaseParams(
|
580
591
|
primary_keys={"pk_col_1"},
|
@@ -630,6 +641,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
630
641
|
read_kwargs_provider=None,
|
631
642
|
drop_duplicates=False,
|
632
643
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
644
|
+
assert_compaction_audit=assert_compaction_audit,
|
633
645
|
),
|
634
646
|
"11-rebase-then-empty-incremental-delta": RebaseThenIncrementalCompactionTestCaseParams(
|
635
647
|
primary_keys={"pk_col_1"},
|
@@ -676,6 +688,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
676
688
|
read_kwargs_provider=None,
|
677
689
|
drop_duplicates=True,
|
678
690
|
skip_enabled_compact_partition_drivers=None,
|
691
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
679
692
|
),
|
680
693
|
"12-rebase-then-incremental-hash-bucket-single": RebaseThenIncrementalCompactionTestCaseParams(
|
681
694
|
primary_keys={"pk_col_1"},
|
@@ -736,6 +749,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
736
749
|
read_kwargs_provider=None,
|
737
750
|
drop_duplicates=True,
|
738
751
|
skip_enabled_compact_partition_drivers=None,
|
752
|
+
assert_compaction_audit=None,
|
739
753
|
),
|
740
754
|
"13-rebase-then-empty-incremental-delta-hash-bucket-single": RebaseThenIncrementalCompactionTestCaseParams(
|
741
755
|
primary_keys={"pk_col_1"},
|
@@ -782,6 +796,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
782
796
|
read_kwargs_provider=None,
|
783
797
|
drop_duplicates=True,
|
784
798
|
skip_enabled_compact_partition_drivers=None,
|
799
|
+
assert_compaction_audit=None,
|
785
800
|
),
|
786
801
|
}
|
787
802
|
|
@@ -882,6 +897,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
882
897
|
read_kwargs_provider=None,
|
883
898
|
drop_duplicates=True,
|
884
899
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
900
|
+
assert_compaction_audit=assert_compaction_audit,
|
885
901
|
),
|
886
902
|
"15-rebase-then-incremental-delete-type-delta-on-incremental-multi-pk": RebaseThenIncrementalCompactionTestCaseParams(
|
887
903
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -933,6 +949,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
933
949
|
read_kwargs_provider=None,
|
934
950
|
drop_duplicates=True,
|
935
951
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
952
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
936
953
|
),
|
937
954
|
"16-rebase-then-incremental-delete-type-delta-on-incremental-multi-pk-delete-all": RebaseThenIncrementalCompactionTestCaseParams(
|
938
955
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -990,6 +1007,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
990
1007
|
read_kwargs_provider=None,
|
991
1008
|
drop_duplicates=True,
|
992
1009
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1010
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
993
1011
|
),
|
994
1012
|
"17-rebase-then-incremental-delete-type-delta-delete-entire-base-table": RebaseThenIncrementalCompactionTestCaseParams(
|
995
1013
|
primary_keys={"pk_col_1"},
|
@@ -1043,6 +1061,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1043
1061
|
read_kwargs_provider=None,
|
1044
1062
|
drop_duplicates=True,
|
1045
1063
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1064
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
1046
1065
|
),
|
1047
1066
|
"18-rebase-then-incremental-delete-type-delta-keep-base-table-drop-all-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1048
1067
|
primary_keys={"pk_col_1"},
|
@@ -1124,6 +1143,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1124
1143
|
read_kwargs_provider=None,
|
1125
1144
|
drop_duplicates=True,
|
1126
1145
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1146
|
+
assert_compaction_audit=assert_compaction_audit,
|
1127
1147
|
),
|
1128
1148
|
"19-rebase-then-incremental-delete-type-delta-drop-only-from-base-table-keep-all-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1129
1149
|
primary_keys={"pk_col_1"},
|
@@ -1194,6 +1214,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1194
1214
|
read_kwargs_provider=None,
|
1195
1215
|
drop_duplicates=True,
|
1196
1216
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1217
|
+
assert_compaction_audit=assert_compaction_audit,
|
1197
1218
|
),
|
1198
1219
|
"20-rebase-then-incremental-delete-type-delta-drop-all-base-table-drop-all-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1199
1220
|
primary_keys={"pk_col_1"},
|
@@ -1258,6 +1279,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1258
1279
|
read_kwargs_provider=None,
|
1259
1280
|
drop_duplicates=True,
|
1260
1281
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1282
|
+
assert_compaction_audit=assert_compaction_audit,
|
1261
1283
|
),
|
1262
1284
|
"21-rebase-then-incremental-delete-type-delta-UDDUUDD": RebaseThenIncrementalCompactionTestCaseParams(
|
1263
1285
|
primary_keys={"pk_col_1"},
|
@@ -1349,6 +1371,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1349
1371
|
read_kwargs_provider=None,
|
1350
1372
|
drop_duplicates=True,
|
1351
1373
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1374
|
+
assert_compaction_audit=assert_compaction_audit,
|
1352
1375
|
),
|
1353
1376
|
"22-rebase-then-incremental-delete-type-delta-UD-affects-compacted-and-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1354
1377
|
primary_keys={"pk_col_1"},
|
@@ -1413,6 +1436,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1413
1436
|
read_kwargs_provider=None,
|
1414
1437
|
drop_duplicates=True,
|
1415
1438
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1439
|
+
assert_compaction_audit=assert_compaction_audit,
|
1416
1440
|
),
|
1417
1441
|
"23-rebase-then-incremental-delete-type-delta-UDU-upsert-again": RebaseThenIncrementalCompactionTestCaseParams(
|
1418
1442
|
primary_keys={"pk_col_1"},
|
@@ -1483,6 +1507,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1483
1507
|
read_kwargs_provider=None,
|
1484
1508
|
drop_duplicates=True,
|
1485
1509
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1510
|
+
assert_compaction_audit=assert_compaction_audit,
|
1486
1511
|
),
|
1487
1512
|
"24-rebase-then-incremental-delete-type-no-delete-column-has-delete-deltas-expected-exception": RebaseThenIncrementalCompactionTestCaseParams(
|
1488
1513
|
primary_keys={"pk_col_1"},
|
@@ -1547,6 +1572,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1547
1572
|
read_kwargs_provider=None,
|
1548
1573
|
drop_duplicates=True,
|
1549
1574
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1575
|
+
assert_compaction_audit=assert_compaction_audit,
|
1550
1576
|
),
|
1551
1577
|
"25-rebase-then-incremental-delete-type-delta-has-delete-column-no-delete-records": RebaseThenIncrementalCompactionTestCaseParams(
|
1552
1578
|
primary_keys={"pk_col_1"},
|
@@ -1612,6 +1638,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1612
1638
|
read_kwargs_provider=None,
|
1613
1639
|
drop_duplicates=True,
|
1614
1640
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1641
|
+
assert_compaction_audit=assert_compaction_audit,
|
1615
1642
|
),
|
1616
1643
|
"26-rebase-then-incremental-delete-type-delta-UDU-duplicate-delete-records": RebaseThenIncrementalCompactionTestCaseParams(
|
1617
1644
|
primary_keys={"pk_col_1"},
|
@@ -1671,6 +1698,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1671
1698
|
read_kwargs_provider=None,
|
1672
1699
|
drop_duplicates=True,
|
1673
1700
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1701
|
+
assert_compaction_audit=assert_compaction_audit,
|
1674
1702
|
),
|
1675
1703
|
"27-rebase-then-incremental-delete-type-delta-DDU-deletes-then-upserts": RebaseThenIncrementalCompactionTestCaseParams(
|
1676
1704
|
primary_keys={"pk_col_1"},
|
@@ -1740,6 +1768,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1740
1768
|
read_kwargs_provider=None,
|
1741
1769
|
drop_duplicates=True,
|
1742
1770
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1771
|
+
assert_compaction_audit=assert_compaction_audit,
|
1743
1772
|
),
|
1744
1773
|
"28-rebase-then-incremental-delete-type-delta-hash-bucket-single": RebaseThenIncrementalCompactionTestCaseParams(
|
1745
1774
|
primary_keys={"pk_col_1"},
|
@@ -1830,6 +1859,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1830
1859
|
read_kwargs_provider=None,
|
1831
1860
|
drop_duplicates=True,
|
1832
1861
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1862
|
+
assert_compaction_audit=None,
|
1833
1863
|
),
|
1834
1864
|
"29-rebase-then-incremental-delete-type-delta-no-pk-compactor": RebaseThenIncrementalCompactionTestCaseParams(
|
1835
1865
|
primary_keys=ZERO_VALUED_PRIMARY_KEY,
|
@@ -1901,6 +1931,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1901
1931
|
read_kwargs_provider=None,
|
1902
1932
|
drop_duplicates=True,
|
1903
1933
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1934
|
+
assert_compaction_audit=assert_compaction_audit,
|
1904
1935
|
),
|
1905
1936
|
"30-rebase-then-incremental-delete-type-delta-on-incremental-compactor-v1-v2": RebaseThenIncrementalCompactionTestCaseParams(
|
1906
1937
|
primary_keys={"pk_col_1"},
|
@@ -1950,6 +1981,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1950
1981
|
read_kwargs_provider=None,
|
1951
1982
|
drop_duplicates=True,
|
1952
1983
|
skip_enabled_compact_partition_drivers=None,
|
1984
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
1953
1985
|
),
|
1954
1986
|
}
|
1955
1987
|
|
@@ -4,6 +4,8 @@ from deltacat.tests.compute.test_util_common import (
|
|
4
4
|
offer_iso8601_timestamp_list,
|
5
5
|
PartitionKey,
|
6
6
|
PartitionKeyType,
|
7
|
+
assert_compaction_audit,
|
8
|
+
assert_compaction_audit_no_hash_bucket,
|
7
9
|
)
|
8
10
|
from deltacat.tests.compute.test_util_constant import (
|
9
11
|
DEFAULT_MAX_RECORDS_PER_FILE,
|
@@ -64,6 +66,7 @@ class BaseCompactorTestCase:
|
|
64
66
|
read_kwargs_provider: Optional[ReadKwargsProvider] - argument for read_kwargs_provider parameter in compact_partition. If None then no ReadKwargsProvider is provided to compact_partition_params
|
65
67
|
drop_duplicates: bool - argument for drop_duplicates parameter in compact_partition. Only recognized by compactor v2.
|
66
68
|
skip_enabled_compact_partition_drivers: List[CompactorVersion] - skip whatever enabled_compact_partition_drivers are included in this list
|
69
|
+
assert_compaction_audit: Optional[Callable] - argument that asserts compaction_audit is updated only if compactor_version is v2.
|
67
70
|
"""
|
68
71
|
|
69
72
|
primary_keys: Set[str]
|
@@ -81,6 +84,7 @@ class BaseCompactorTestCase:
|
|
81
84
|
read_kwargs_provider: Optional[ReadKwargsProvider]
|
82
85
|
drop_duplicates: bool
|
83
86
|
skip_enabled_compact_partition_drivers: List[CompactorVersion]
|
87
|
+
assert_compaction_audit: Optional[Callable]
|
84
88
|
|
85
89
|
# makes CompactorTestCase iterable which is required to build the list of pytest.param values to pass to pytest.mark.parametrize
|
86
90
|
def __iter__(self):
|
@@ -127,8 +131,8 @@ def with_compactor_version_func_test_param(
|
|
127
131
|
enriched_test_cases[f"{tc_name}_{compactor_version}"] = [
|
128
132
|
*tc_params,
|
129
133
|
compact_partition_func,
|
134
|
+
compactor_version,
|
130
135
|
]
|
131
|
-
|
132
136
|
return enriched_test_cases
|
133
137
|
|
134
138
|
|
@@ -157,6 +161,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
157
161
|
is_inplace=False,
|
158
162
|
add_late_deltas=None,
|
159
163
|
skip_enabled_compact_partition_drivers=None,
|
164
|
+
assert_compaction_audit=assert_compaction_audit,
|
160
165
|
),
|
161
166
|
"2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
|
162
167
|
primary_keys={"pk_col_1"},
|
@@ -185,6 +190,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
185
190
|
is_inplace=False,
|
186
191
|
add_late_deltas=None,
|
187
192
|
skip_enabled_compact_partition_drivers=None,
|
193
|
+
assert_compaction_audit=assert_compaction_audit,
|
188
194
|
),
|
189
195
|
"3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
|
190
196
|
primary_keys={"pk_col_1"},
|
@@ -222,6 +228,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
222
228
|
is_inplace=False,
|
223
229
|
add_late_deltas=None,
|
224
230
|
skip_enabled_compact_partition_drivers=None,
|
231
|
+
assert_compaction_audit=assert_compaction_audit,
|
225
232
|
),
|
226
233
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
227
234
|
primary_keys={"pk_col_1"},
|
@@ -258,6 +265,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
258
265
|
is_inplace=False,
|
259
266
|
add_late_deltas=None,
|
260
267
|
skip_enabled_compact_partition_drivers=None,
|
268
|
+
assert_compaction_audit=assert_compaction_audit,
|
261
269
|
),
|
262
270
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
263
271
|
primary_keys={"pk_col_1"},
|
@@ -289,6 +297,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
289
297
|
is_inplace=False,
|
290
298
|
add_late_deltas=None,
|
291
299
|
skip_enabled_compact_partition_drivers=None,
|
300
|
+
assert_compaction_audit=assert_compaction_audit,
|
292
301
|
),
|
293
302
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
294
303
|
primary_keys={"pk_col_1"},
|
@@ -320,6 +329,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
320
329
|
is_inplace=False,
|
321
330
|
add_late_deltas=None,
|
322
331
|
skip_enabled_compact_partition_drivers=None,
|
332
|
+
assert_compaction_audit=assert_compaction_audit,
|
323
333
|
),
|
324
334
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
325
335
|
primary_keys={"pk_col_1"},
|
@@ -351,6 +361,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
351
361
|
is_inplace=False,
|
352
362
|
add_late_deltas=None,
|
353
363
|
skip_enabled_compact_partition_drivers=None,
|
364
|
+
assert_compaction_audit=assert_compaction_audit,
|
354
365
|
),
|
355
366
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
356
367
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -384,6 +395,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
384
395
|
is_inplace=False,
|
385
396
|
add_late_deltas=None,
|
386
397
|
skip_enabled_compact_partition_drivers=None,
|
398
|
+
assert_compaction_audit=assert_compaction_audit,
|
387
399
|
),
|
388
400
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
389
401
|
primary_keys={"pk_col_1"},
|
@@ -415,6 +427,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
415
427
|
is_inplace=False,
|
416
428
|
add_late_deltas=None,
|
417
429
|
skip_enabled_compact_partition_drivers=None,
|
430
|
+
assert_compaction_audit=assert_compaction_audit,
|
418
431
|
),
|
419
432
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
420
433
|
primary_keys={"pk_col_1"},
|
@@ -446,6 +459,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
446
459
|
is_inplace=False,
|
447
460
|
add_late_deltas=None,
|
448
461
|
skip_enabled_compact_partition_drivers=None,
|
462
|
+
assert_compaction_audit=assert_compaction_audit,
|
449
463
|
),
|
450
464
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
451
465
|
primary_keys={"pk_col_1"},
|
@@ -477,6 +491,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
477
491
|
is_inplace=False,
|
478
492
|
add_late_deltas=None,
|
479
493
|
skip_enabled_compact_partition_drivers=None,
|
494
|
+
assert_compaction_audit=assert_compaction_audit,
|
480
495
|
),
|
481
496
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
482
497
|
primary_keys={"pk_col_1"},
|
@@ -508,6 +523,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
508
523
|
is_inplace=False,
|
509
524
|
add_late_deltas=None,
|
510
525
|
skip_enabled_compact_partition_drivers=None,
|
526
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
511
527
|
),
|
512
528
|
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
513
529
|
primary_keys={"pk_col_1"},
|
@@ -551,6 +567,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
551
567
|
)
|
552
568
|
],
|
553
569
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
570
|
+
assert_compaction_audit=None,
|
554
571
|
),
|
555
572
|
"14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
|
556
573
|
primary_keys={"pk_col_1"},
|
@@ -582,6 +599,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
582
599
|
is_inplace=False,
|
583
600
|
add_late_deltas=False,
|
584
601
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
602
|
+
assert_compaction_audit=None,
|
585
603
|
),
|
586
604
|
}
|
587
605
|
|