deltacat 1.1.9__tar.gz → 1.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat-1.1.11/PKG-INFO +50 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/__init__.py +1 -1
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/redshift/model/manifest.py +16 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/s3u.py +19 -13
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/compaction_session.py +5 -1
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/repartition_session.py +1 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/round_completion_file.py +39 -9
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/compaction_session.py +15 -11
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/constants.py +3 -0
- deltacat-1.1.9/deltacat/compute/compactor_v2/model/compaction_session.py → deltacat-1.1.11/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +1 -2
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/primary_key_index.py +1 -1
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/exceptions.py +5 -2
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/dataset.py +5 -17
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/__init__.py +24 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/interface.py +42 -6
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/delta.py +23 -3
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/partition.py +6 -7
- deltacat-1.1.11/deltacat/storage/model/partition_spec.py +71 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/stream.py +38 -1
- deltacat-1.1.11/deltacat/storage/model/transform.py +127 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/aws/test_s3u.py +2 -0
- deltacat-1.1.11/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +231 -0
- deltacat-1.1.11/deltacat/tests/compute/compactor_v2/test_compaction_session.py +255 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_util_common.py +19 -4
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/local_deltacat_storage/__init__.py +83 -19
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/pyarrow.py +4 -1
- deltacat-1.1.11/deltacat/tests/utils/ray_utils/test_dataset.py +66 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/numpy.py +3 -3
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/pandas.py +3 -3
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/pyarrow.py +3 -3
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/dataset.py +7 -7
- deltacat-1.1.11/deltacat.egg-info/PKG-INFO +50 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/SOURCES.txt +5 -2
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/requires.txt +2 -2
- {deltacat-1.1.9 → deltacat-1.1.11}/setup.py +4 -4
- deltacat-1.1.9/PKG-INFO +0 -47
- deltacat-1.1.9/deltacat/io/aws/redshift/redshift_datasource.py +0 -578
- deltacat-1.1.9/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -90
- deltacat-1.1.9/deltacat.egg-info/PKG-INFO +0 -47
- {deltacat-1.1.9 → deltacat-1.1.11}/LICENSE +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/MANIFEST.in +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/README.md +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/clients.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/constants.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/redshift/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/redshift/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/benchmarking/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/benchmarking/conftest.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/delegate.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/interface.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/model/catalog.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/model/table_definition.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/compactor_version.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/materialize_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/repartition_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/table_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/dedupe.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/materialize.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/repartition.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/io.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/sort_key.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/system_columns.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/io.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/daft.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/config/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/meta_stats.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/model/partition_stats_dict.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/stats.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/constants.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/io.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/ray_utils.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/basic.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/delta_stats.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/stats_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/types.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/intervals.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/io.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/manifest_stats_file.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/constants.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/aws/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/aws/redshift/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/file_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/memcached_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/ray_plasma_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/read_api.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/redis_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/s3_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/logs.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/delete_parameters.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/list_result.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/locator.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/namespace.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/sort_key.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/table.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/table_version.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/types.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/aws/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/aws/test_clients.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/catalog/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_util_constant.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_file_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_memcached_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_redis_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_s3_object_store.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/stats/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/stats/test_intervals.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_exceptions.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_logs.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/constants.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/storage.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/utils.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/data/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_cloudpickle.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_daft.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_metrics.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_placement.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_pyarrow.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_resources.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/media.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/partial_download.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/tables.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/arguments.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/cloudpickle.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/common.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/daft.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/metrics.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/performance.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/placement.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/__init__.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/collections.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/concurrency.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/performance.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/runtime.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/resources.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/s3fs.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/schema.py +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/dependency_links.txt +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/top_level.txt +0 -0
- {deltacat-1.1.9 → deltacat-1.1.11}/setup.cfg +0 -0
deltacat-1.1.11/PKG-INFO
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: deltacat
|
3
|
+
Version: 1.1.11
|
4
|
+
Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
|
5
|
+
Home-page: https://github.com/ray-project/deltacat
|
6
|
+
Author: Ray Team
|
7
|
+
License: UNKNOWN
|
8
|
+
Platform: UNKNOWN
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
10
|
+
Classifier: Intended Audience :: Developers
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Requires-Python: >=3.9
|
16
|
+
Description-Content-Type: text/markdown
|
17
|
+
License-File: LICENSE
|
18
|
+
|
19
|
+
# DeltaCAT
|
20
|
+
|
21
|
+
DeltaCAT is a Pythonic Data Catalog powered by Ray.
|
22
|
+
|
23
|
+
Its data storage model allows you to define and manage fast, scalable,
|
24
|
+
ACID-compliant data catalogs through git-like stage/commit APIs, and has been
|
25
|
+
used to successfully host exabyte-scale enterprise data lakes.
|
26
|
+
|
27
|
+
DeltaCAT uses the Ray distributed compute framework together with Apache Arrow
|
28
|
+
for common table management tasks, including petabyte-scale
|
29
|
+
change-data-capture, data consistency checks, and table repair.
|
30
|
+
|
31
|
+
## Getting Started
|
32
|
+
|
33
|
+
### Install
|
34
|
+
|
35
|
+
```
|
36
|
+
pip install deltacat
|
37
|
+
```
|
38
|
+
|
39
|
+
### Running Tests
|
40
|
+
|
41
|
+
```
|
42
|
+
pip3 install virtualenv
|
43
|
+
virtualenv test_env
|
44
|
+
source test_env/bin/activate
|
45
|
+
pip3 install -r requirements.txt
|
46
|
+
|
47
|
+
pytest
|
48
|
+
```
|
49
|
+
|
50
|
+
|
@@ -99,6 +99,8 @@ class Manifest(dict):
|
|
99
99
|
total_source_content_length = 0
|
100
100
|
content_type = None
|
101
101
|
content_encoding = None
|
102
|
+
partition_values_set = set()
|
103
|
+
partition_values = None
|
102
104
|
if entries:
|
103
105
|
content_type = entries[0].meta.content_type
|
104
106
|
content_encoding = entries[0].meta.content_encoding
|
@@ -127,6 +129,12 @@ class Manifest(dict):
|
|
127
129
|
total_record_count += meta.record_count or 0
|
128
130
|
total_content_length += meta.content_length or 0
|
129
131
|
total_source_content_length += meta.source_content_length or 0
|
132
|
+
if len(partition_values_set) <= 1:
|
133
|
+
partition_values_set.add(entry.meta.partition_values)
|
134
|
+
|
135
|
+
if len(partition_values_set) == 1:
|
136
|
+
partition_values = partition_values_set.pop()
|
137
|
+
|
130
138
|
meta = ManifestMeta.of(
|
131
139
|
total_record_count,
|
132
140
|
total_content_length,
|
@@ -134,6 +142,7 @@ class Manifest(dict):
|
|
134
142
|
content_encoding,
|
135
143
|
total_source_content_length,
|
136
144
|
entry_type=entry_type,
|
145
|
+
partition_values=partition_values,
|
137
146
|
)
|
138
147
|
manifest = Manifest._build_manifest(meta, entries, author, uuid, entry_type)
|
139
148
|
return manifest
|
@@ -185,6 +194,7 @@ class ManifestMeta(dict):
|
|
185
194
|
credentials: Optional[Dict[str, str]] = None,
|
186
195
|
content_type_parameters: Optional[List[Dict[str, str]]] = None,
|
187
196
|
entry_type: Optional[EntryType] = None,
|
197
|
+
partition_values: Optional[List[str]] = None,
|
188
198
|
) -> ManifestMeta:
|
189
199
|
manifest_meta = ManifestMeta()
|
190
200
|
if record_count is not None:
|
@@ -203,6 +213,8 @@ class ManifestMeta(dict):
|
|
203
213
|
manifest_meta["credentials"] = credentials
|
204
214
|
if entry_type is not None:
|
205
215
|
manifest_meta["entry_type"] = entry_type.value
|
216
|
+
if partition_values is not None:
|
217
|
+
manifest_meta["partition_values"] = partition_values
|
206
218
|
return manifest_meta
|
207
219
|
|
208
220
|
@property
|
@@ -244,6 +256,10 @@ class ManifestMeta(dict):
|
|
244
256
|
return EntryType(self["entry_type"])
|
245
257
|
return val
|
246
258
|
|
259
|
+
@property
|
260
|
+
def partition_values(self) -> Optional[List[str]]:
|
261
|
+
return self.get("partition_values")
|
262
|
+
|
247
263
|
|
248
264
|
class ManifestAuthor(dict):
|
249
265
|
@staticmethod
|
@@ -21,7 +21,7 @@ from boto3.resources.base import ServiceResource
|
|
21
21
|
from botocore.client import BaseClient
|
22
22
|
from botocore.exceptions import ClientError
|
23
23
|
from ray.data.block import Block, BlockAccessor, BlockMetadata
|
24
|
-
from ray.data.datasource import
|
24
|
+
from ray.data.datasource import FilenameProvider
|
25
25
|
from ray.types import ObjectRef
|
26
26
|
from tenacity import (
|
27
27
|
Retrying,
|
@@ -70,9 +70,6 @@ from deltacat.exceptions import categorize_errors
|
|
70
70
|
|
71
71
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
72
72
|
|
73
|
-
# TODO(raghumdani): refactor redshift datasource to reuse the
|
74
|
-
# same module for writing output files.
|
75
|
-
|
76
73
|
|
77
74
|
class CapturedBlockWritePaths:
|
78
75
|
def __init__(self):
|
@@ -100,12 +97,15 @@ class CapturedBlockWritePaths:
|
|
100
97
|
return self._block_refs
|
101
98
|
|
102
99
|
|
103
|
-
class UuidBlockWritePathProvider(
|
100
|
+
class UuidBlockWritePathProvider(FilenameProvider):
|
104
101
|
"""Block write path provider implementation that writes each
|
105
102
|
dataset block out to a file of the form: {base_path}/{uuid}
|
106
103
|
"""
|
107
104
|
|
108
|
-
def __init__(
|
105
|
+
def __init__(
|
106
|
+
self, capture_object: CapturedBlockWritePaths, base_path: Optional[str] = None
|
107
|
+
):
|
108
|
+
self.base_path = base_path
|
109
109
|
self.write_paths: List[str] = []
|
110
110
|
self.block_refs: List[ObjectRef[Block]] = []
|
111
111
|
self.capture_object = capture_object
|
@@ -117,6 +117,19 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
|
|
117
117
|
self.block_refs,
|
118
118
|
)
|
119
119
|
|
120
|
+
def get_filename_for_block(
|
121
|
+
self, block: Any, task_index: int, block_index: int
|
122
|
+
) -> str:
|
123
|
+
if self.base_path is None:
|
124
|
+
raise ValueError(
|
125
|
+
"Base path must be provided to UuidBlockWritePathProvider",
|
126
|
+
)
|
127
|
+
return self._get_write_path_for_block(
|
128
|
+
base_path=self.base_path,
|
129
|
+
block=block,
|
130
|
+
block_index=block_index,
|
131
|
+
)
|
132
|
+
|
120
133
|
def _get_write_path_for_block(
|
121
134
|
self,
|
122
135
|
base_path: str,
|
@@ -143,13 +156,6 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
|
|
143
156
|
block_index: Optional[int] = None,
|
144
157
|
file_format: Optional[str] = None,
|
145
158
|
) -> str:
|
146
|
-
"""
|
147
|
-
TODO: BlockWritePathProvider is deprecated as of Ray version 2.20.0. Please use FilenameProvider.
|
148
|
-
See: https://docs.ray.io/en/master/data/api/doc/ray.data.datasource.FilenameProvider.html
|
149
|
-
Also See: https://github.com/ray-project/deltacat/issues/299
|
150
|
-
|
151
|
-
Hence, this class only works with Ray version 2.20.0 or lower when used in Ray Dataset.
|
152
|
-
"""
|
153
159
|
return self._get_write_path_for_block(
|
154
160
|
base_path,
|
155
161
|
filesystem=filesystem,
|
@@ -193,6 +193,7 @@ def compact_partition(
|
|
193
193
|
round_completion_file_s3_url = rcf.write_round_completion_file(
|
194
194
|
compaction_artifact_s3_bucket,
|
195
195
|
new_rcf_partition_locator,
|
196
|
+
partition.locator,
|
196
197
|
new_rci,
|
197
198
|
**s3_client_kwargs,
|
198
199
|
)
|
@@ -312,7 +313,10 @@ def _execute_compaction_round(
|
|
312
313
|
round_completion_info = None
|
313
314
|
if not rebase_source_partition_locator:
|
314
315
|
round_completion_info = rcf.read_round_completion_file(
|
315
|
-
compaction_artifact_s3_bucket,
|
316
|
+
compaction_artifact_s3_bucket,
|
317
|
+
source_partition_locator,
|
318
|
+
destination_partition_locator,
|
319
|
+
**s3_client_kwargs,
|
316
320
|
)
|
317
321
|
if not round_completion_info:
|
318
322
|
logger.info(
|
{deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/round_completion_file.py
RENAMED
@@ -12,10 +12,17 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
|
12
12
|
|
13
13
|
|
14
14
|
def get_round_completion_file_s3_url(
|
15
|
-
bucket: str,
|
15
|
+
bucket: str,
|
16
|
+
source_partition_locator: PartitionLocator,
|
17
|
+
destination_partition_locator: Optional[PartitionLocator] = None,
|
16
18
|
) -> str:
|
17
19
|
|
18
20
|
base_url = source_partition_locator.path(f"s3://{bucket}")
|
21
|
+
if destination_partition_locator:
|
22
|
+
base_url = destination_partition_locator.path(
|
23
|
+
f"s3://{bucket}/{source_partition_locator.hexdigest()}"
|
24
|
+
)
|
25
|
+
|
19
26
|
return f"{base_url}.json"
|
20
27
|
|
21
28
|
|
@@ -23,20 +30,41 @@ def get_round_completion_file_s3_url(
|
|
23
30
|
def read_round_completion_file(
|
24
31
|
bucket: str,
|
25
32
|
source_partition_locator: PartitionLocator,
|
33
|
+
destination_partition_locator: Optional[PartitionLocator] = None,
|
26
34
|
**s3_client_kwargs: Optional[Dict[str, Any]],
|
27
35
|
) -> RoundCompletionInfo:
|
28
36
|
|
29
|
-
|
37
|
+
all_uris = []
|
38
|
+
if destination_partition_locator:
|
39
|
+
round_completion_file_url_with_destination = get_round_completion_file_s3_url(
|
40
|
+
bucket,
|
41
|
+
source_partition_locator,
|
42
|
+
destination_partition_locator,
|
43
|
+
)
|
44
|
+
all_uris.append(round_completion_file_url_with_destination)
|
45
|
+
|
46
|
+
# Note: we read from RCF at two different URI for backward
|
47
|
+
# compatibility reasons.
|
48
|
+
round_completion_file_url_prev = get_round_completion_file_s3_url(
|
30
49
|
bucket,
|
31
50
|
source_partition_locator,
|
32
51
|
)
|
33
|
-
|
52
|
+
|
53
|
+
all_uris.append(round_completion_file_url_prev)
|
54
|
+
|
34
55
|
round_completion_info = None
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
56
|
+
|
57
|
+
for rcf_uri in all_uris:
|
58
|
+
logger.info(f"Reading round completion file from: {rcf_uri}")
|
59
|
+
result = s3_utils.download(rcf_uri, False, **s3_client_kwargs)
|
60
|
+
if result:
|
61
|
+
json_str = result["Body"].read().decode("utf-8")
|
62
|
+
round_completion_info = RoundCompletionInfo(json.loads(json_str))
|
63
|
+
logger.info(f"Read round completion info: {round_completion_info}")
|
64
|
+
break
|
65
|
+
else:
|
66
|
+
logger.warn(f"Round completion file not present at {rcf_uri}")
|
67
|
+
|
40
68
|
return round_completion_info
|
41
69
|
|
42
70
|
|
@@ -44,8 +72,9 @@ def read_round_completion_file(
|
|
44
72
|
def write_round_completion_file(
|
45
73
|
bucket: Optional[str],
|
46
74
|
source_partition_locator: Optional[PartitionLocator],
|
75
|
+
destination_partition_locator: Optional[PartitionLocator],
|
47
76
|
round_completion_info: RoundCompletionInfo,
|
48
|
-
completion_file_s3_url: str = None,
|
77
|
+
completion_file_s3_url: Optional[str] = None,
|
49
78
|
**s3_client_kwargs: Optional[Dict[str, Any]],
|
50
79
|
) -> str:
|
51
80
|
if bucket is None and completion_file_s3_url is None:
|
@@ -56,6 +85,7 @@ def write_round_completion_file(
|
|
56
85
|
completion_file_s3_url = get_round_completion_file_s3_url(
|
57
86
|
bucket,
|
58
87
|
source_partition_locator,
|
88
|
+
destination_partition_locator,
|
59
89
|
)
|
60
90
|
logger.info(f"writing round completion file to: {completion_file_s3_url}")
|
61
91
|
s3_utils.upload(
|
@@ -24,7 +24,7 @@ from deltacat.compute.compactor import (
|
|
24
24
|
)
|
25
25
|
from deltacat.compute.compactor_v2.model.merge_result import MergeResult
|
26
26
|
from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
|
27
|
-
from deltacat.compute.compactor_v2.model.
|
27
|
+
from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
|
28
28
|
ExecutionCompactionResult,
|
29
29
|
)
|
30
30
|
from deltacat.compute.compactor.model.materialize_result import MaterializeResult
|
@@ -78,6 +78,7 @@ from deltacat.compute.compactor_v2.utils.task_options import (
|
|
78
78
|
)
|
79
79
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
80
80
|
from deltacat.exceptions import categorize_errors
|
81
|
+
from deltacat.compute.compactor_v2.constants import COMPACT_PARTITION_METRIC_PREFIX
|
81
82
|
|
82
83
|
if importlib.util.find_spec("memray"):
|
83
84
|
import memray
|
@@ -86,7 +87,7 @@ if importlib.util.find_spec("memray"):
|
|
86
87
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
87
88
|
|
88
89
|
|
89
|
-
@metrics
|
90
|
+
@metrics(prefix=COMPACT_PARTITION_METRIC_PREFIX)
|
90
91
|
@categorize_errors
|
91
92
|
def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
|
92
93
|
assert (
|
@@ -109,7 +110,6 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
|
|
109
110
|
f"Partition-{params.source_partition_locator} -> "
|
110
111
|
f"{compaction_session_type} Compaction session data processing completed"
|
111
112
|
)
|
112
|
-
round_completion_file_s3_url: Optional[str] = None
|
113
113
|
if execute_compaction_result.new_compacted_partition:
|
114
114
|
previous_partition: Optional[Partition] = None
|
115
115
|
if execute_compaction_result.is_inplace_compacted:
|
@@ -131,19 +131,13 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
|
|
131
131
|
**params.deltacat_storage_kwargs,
|
132
132
|
)
|
133
133
|
logger.info(f"Committed compacted partition: {committed_partition}")
|
134
|
-
round_completion_file_s3_url = rcf.write_round_completion_file(
|
135
|
-
params.compaction_artifact_s3_bucket,
|
136
|
-
execute_compaction_result.new_round_completion_file_partition_locator,
|
137
|
-
execute_compaction_result.new_round_completion_info,
|
138
|
-
**params.s3_client_kwargs,
|
139
|
-
)
|
140
134
|
else:
|
141
135
|
logger.warning("No new partition was committed during compaction.")
|
142
136
|
|
143
137
|
logger.info(
|
144
138
|
f"Completed compaction session for: {params.source_partition_locator}"
|
145
139
|
)
|
146
|
-
return round_completion_file_s3_url
|
140
|
+
return execute_compaction_result.round_completion_file_s3_url
|
147
141
|
|
148
142
|
|
149
143
|
def _execute_compaction(
|
@@ -188,6 +182,7 @@ def _execute_compaction(
|
|
188
182
|
round_completion_info = rcf.read_round_completion_file(
|
189
183
|
params.compaction_artifact_s3_bucket,
|
190
184
|
params.source_partition_locator,
|
185
|
+
params.destination_partition_locator,
|
191
186
|
**params.s3_client_kwargs,
|
192
187
|
)
|
193
188
|
if not round_completion_info:
|
@@ -684,9 +679,18 @@ def _execute_compaction(
|
|
684
679
|
f"and rcf source partition_id of {rcf_source_partition_locator.partition_id}."
|
685
680
|
)
|
686
681
|
rcf_source_partition_locator = compacted_partition.locator
|
682
|
+
|
683
|
+
round_completion_file_s3_url = rcf.write_round_completion_file(
|
684
|
+
params.compaction_artifact_s3_bucket,
|
685
|
+
rcf_source_partition_locator,
|
686
|
+
compacted_partition.locator,
|
687
|
+
new_round_completion_info,
|
688
|
+
**params.s3_client_kwargs,
|
689
|
+
)
|
690
|
+
|
687
691
|
return ExecutionCompactionResult(
|
688
692
|
compacted_partition,
|
689
693
|
new_round_completion_info,
|
690
|
-
|
694
|
+
round_completion_file_s3_url,
|
691
695
|
is_inplace_compacted,
|
692
696
|
)
|
@@ -2,7 +2,6 @@ from dataclasses import dataclass, fields
|
|
2
2
|
|
3
3
|
from deltacat.storage import (
|
4
4
|
Partition,
|
5
|
-
PartitionLocator,
|
6
5
|
)
|
7
6
|
from deltacat.compute.compactor import (
|
8
7
|
RoundCompletionInfo,
|
@@ -14,7 +13,7 @@ from typing import Optional
|
|
14
13
|
class ExecutionCompactionResult:
|
15
14
|
new_compacted_partition: Optional[Partition]
|
16
15
|
new_round_completion_info: Optional[RoundCompletionInfo]
|
17
|
-
|
16
|
+
round_completion_file_s3_url: Optional[str]
|
18
17
|
is_inplace_compacted: bool
|
19
18
|
|
20
19
|
def __iter__(self):
|
@@ -162,7 +162,7 @@ def group_by_pk_hash_bucket(
|
|
162
162
|
len(new_tables) == 1
|
163
163
|
), f"Expected only 1 table in the result but found {len(new_tables)}"
|
164
164
|
|
165
|
-
table =
|
165
|
+
table = new_tables[0]
|
166
166
|
|
167
167
|
# group hash bucket record indices
|
168
168
|
result = group_record_indices_by_hash_bucket(
|
@@ -213,11 +213,14 @@ def categorize_errors(func: Callable):
|
|
213
213
|
except BaseException as e:
|
214
214
|
deltacat_storage = None
|
215
215
|
deltacat_storage_kwargs = {}
|
216
|
+
all_args = args
|
216
217
|
if kwargs:
|
217
218
|
deltacat_storage = kwargs.get(DELTACAT_STORAGE_PARAM)
|
218
219
|
deltacat_storage_kwargs = kwargs.get(DELTACAT_STORAGE_KWARGS_PARAM, {})
|
219
|
-
|
220
|
-
|
220
|
+
all_args = all_args + tuple(kwargs.values())
|
221
|
+
|
222
|
+
if not deltacat_storage and all_args:
|
223
|
+
for arg in all_args:
|
221
224
|
if (
|
222
225
|
isinstance(arg, dict)
|
223
226
|
and arg.get(DELTACAT_STORAGE_PARAM) is not None
|
@@ -6,9 +6,6 @@ from typing import Any, Callable, Dict, Optional, TypeVar, Union, cast
|
|
6
6
|
import pyarrow as pa
|
7
7
|
import s3fs
|
8
8
|
from ray.data import Dataset
|
9
|
-
from ray.data.datasource import BlockWritePathProvider, DefaultBlockWritePathProvider
|
10
|
-
|
11
|
-
from deltacat.io.aws.redshift.redshift_datasource import RedshiftDatasource
|
12
9
|
|
13
10
|
T = TypeVar("T")
|
14
11
|
|
@@ -27,7 +24,6 @@ class DeltacatDataset(Dataset[T]):
|
|
27
24
|
filesystem: Optional[Union[pa.fs.FileSystem, s3fs.S3FileSystem]] = None,
|
28
25
|
try_create_dir: bool = True,
|
29
26
|
arrow_open_stream_args: Optional[Dict[str, Any]] = None,
|
30
|
-
block_path_provider: BlockWritePathProvider = DefaultBlockWritePathProvider(),
|
31
27
|
arrow_parquet_args_fn: Callable[[], Dict[str, Any]] = lambda: {},
|
32
28
|
**arrow_parquet_args,
|
33
29
|
) -> None:
|
@@ -59,9 +55,8 @@ class DeltacatDataset(Dataset[T]):
|
|
59
55
|
if True. Does nothing if all directories already exist.
|
60
56
|
arrow_open_stream_args: kwargs passed to
|
61
57
|
pyarrow.fs.FileSystem.open_output_stream
|
62
|
-
|
63
|
-
to write each dataset block to a custom output path.
|
64
|
-
DefaultBlockWritePathProvider if None.
|
58
|
+
filename_provider: FilenameProvider implementation
|
59
|
+
to write each dataset block to a custom output path.
|
65
60
|
arrow_parquet_args_fn: Callable that returns a dictionary of write
|
66
61
|
arguments to use when writing each block to a file. Overrides
|
67
62
|
any duplicate keys from arrow_parquet_args. This should be used
|
@@ -72,14 +67,7 @@ class DeltacatDataset(Dataset[T]):
|
|
72
67
|
pyarrow.parquet.write_table(), which is used to write out each
|
73
68
|
block to a file.
|
74
69
|
"""
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
dataset_uuid=self._uuid,
|
79
|
-
filesystem=filesystem,
|
80
|
-
try_create_dir=try_create_dir,
|
81
|
-
open_stream_args=arrow_open_stream_args,
|
82
|
-
block_path_provider=block_path_provider,
|
83
|
-
write_args_fn=arrow_parquet_args_fn,
|
84
|
-
**arrow_parquet_args,
|
70
|
+
raise NotImplementedError(
|
71
|
+
"Writing to Redshift is not yet supported. "
|
72
|
+
"Please use DeltacatDataset.write_parquet() instead."
|
85
73
|
)
|
@@ -14,6 +14,20 @@ from deltacat.storage.model.stream import Stream, StreamLocator
|
|
14
14
|
from deltacat.storage.model.table import Table, TableLocator
|
15
15
|
from deltacat.storage.model.table_version import TableVersion, TableVersionLocator
|
16
16
|
from deltacat.storage.model.delete_parameters import DeleteParameters
|
17
|
+
from deltacat.storage.model.partition_spec import (
|
18
|
+
PartitionFilter,
|
19
|
+
PartitionValues,
|
20
|
+
DeltaPartitionSpec,
|
21
|
+
StreamPartitionSpec,
|
22
|
+
)
|
23
|
+
from deltacat.storage.model.transform import (
|
24
|
+
Transform,
|
25
|
+
TransformName,
|
26
|
+
TransformParameters,
|
27
|
+
BucketingStrategy,
|
28
|
+
BucketTransformParameters,
|
29
|
+
IdentityTransformParameters,
|
30
|
+
)
|
17
31
|
|
18
32
|
from deltacat.storage.model.types import (
|
19
33
|
CommitState,
|
@@ -56,4 +70,14 @@ __all__ = [
|
|
56
70
|
"TableVersionLocator",
|
57
71
|
"SortKey",
|
58
72
|
"SortOrder",
|
73
|
+
"PartitionFilter",
|
74
|
+
"PartitionValues",
|
75
|
+
"DeltaPartitionSpec",
|
76
|
+
"StreamPartitionSpec",
|
77
|
+
"Transform",
|
78
|
+
"TransformName",
|
79
|
+
"TransformParameters",
|
80
|
+
"BucketingStrategy",
|
81
|
+
"BucketTransformParameters",
|
82
|
+
"IdentityTransformParameters",
|
59
83
|
]
|