deltacat 2.0.0b3__tar.gz → 2.0.0b7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deltacat-2.0.0b3/deltacat.egg-info → deltacat-2.0.0b7}/PKG-INFO +1 -1
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/__init__.py +1 -1
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/impl.py +15 -2
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/overrides.py +12 -14
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/main/impl.py +1 -1
- deltacat-2.0.0b7/deltacat/daft/daft_scan.py +111 -0
- deltacat-2.0.0b7/deltacat/daft/model.py +258 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/iceberg/iceberg_bucket_writer.py +64 -17
- deltacat-2.0.0b7/deltacat/experimental/daft/__init__.py +4 -0
- deltacat-2.0.0b7/deltacat/experimental/daft/daft_catalog.py +229 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/manifest.py +49 -0
- deltacat-2.0.0b7/deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat-2.0.0b7/deltacat/utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/daft.py +2 -5
- deltacat-2.0.0b7/deltacat/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7/deltacat.egg-info}/PKG-INFO +1 -1
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/SOURCES.txt +7 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/requires.txt +2 -2
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/setup.py +3 -2
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/LICENSE +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/MANIFEST.in +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/README.md +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/annotations.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/api.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/clients.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/constants.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/s3u.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_engine.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_report.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_suite.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/conftest.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/data/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/data/random_row_generator.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/data/row_generator.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/test_benchmark_pipeline.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/delegate.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/iceberg_catalog_config.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/interface.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/catalog.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/properties.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/table_definition.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/compaction_session.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/compactor_version.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/materialize_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/repartition_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/table_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/repartition_session.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/dedupe.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/materialize.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/repartition.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/io.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/sort_key.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/system_columns.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/constants.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/private/compaction_utils.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/io.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/constants.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/converter_session.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/convert_input.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/convert_input_files.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/converter_session_params.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/catalog.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/overrides.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/steps/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/steps/convert.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/steps/dedupe.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/convert_task_options.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/converter_session_utils.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/iceberg_columns.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/io.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/s3u.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/daft.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/delta.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/manifest.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/model.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/parquet.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/delta_stats.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/stats_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/types.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/constants.py +0 -0
- {deltacat-2.0.0b3/deltacat/examples → deltacat-2.0.0b7/deltacat/daft}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/env.py +0 -0
- {deltacat-2.0.0b3/deltacat/examples/common → deltacat-2.0.0b7/deltacat/examples}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/basic_logging.py +0 -0
- {deltacat-2.0.0b3/deltacat/examples/iceberg → deltacat-2.0.0b7/deltacat/examples/common}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/common/fixtures.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/hello_world.py +0 -0
- {deltacat-2.0.0b3/deltacat/io → deltacat-2.0.0b7/deltacat/examples/iceberg}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/iceberg/iceberg_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/exceptions.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/iceberg → deltacat-2.0.0b7/deltacat/experimental}/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/main → deltacat-2.0.0b7/deltacat/io}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/file_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/memcached_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/redis_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/s3_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/logs.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/model → deltacat-2.0.0b7/deltacat/storage/iceberg}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/iceberg/iceberg_scan_planner.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/iceberg/impl.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/iceberg/model.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/interface.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/model/scan → deltacat-2.0.0b7/deltacat/storage/main}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/main/impl.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/rivulet/arrow → deltacat-2.0.0b7/deltacat/storage/model}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/delta.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/interop.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/list_result.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/locator.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/metafile.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/namespace.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/partition.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/rivulet/fs → deltacat-2.0.0b7/deltacat/storage/model/scan}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/scan/push_down.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/scan/scan_plan.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/scan/scan_task.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/schema.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/shard.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/sort_key.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/stream.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/table.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/table_version.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/transaction.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/transform.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/types.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/rivulet/metastore → deltacat-2.0.0b7/deltacat/storage/rivulet/arrow}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/arrow/serializer.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/dataset.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/dataset_executor.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/feather/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/feather/file_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/feather/serializer.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/rivulet/reader → deltacat-2.0.0b7/deltacat/storage/rivulet/fs}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/file_provider.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/file_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/input_file.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/output_file.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/logical_plan.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/rivulet/schema → deltacat-2.0.0b7/deltacat/storage/rivulet/metastore}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/delta.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/json_sst.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/sst.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/sst_interval_tree.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/mvp/Table.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/mvp/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/file_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/serializer.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/rivulet/writer → deltacat-2.0.0b7/deltacat/storage/rivulet/reader}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/block_scanner.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/data_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/data_scan.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/dataset_metastore.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/dataset_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/pyarrow_data_reader.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/reader_type_registrar.py +0 -0
- {deltacat-2.0.0b3/deltacat/storage/util → deltacat-2.0.0b7/deltacat/storage/rivulet/schema}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/schema/datatype.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/schema/schema.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/serializer.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/serializer_factory.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests → deltacat-2.0.0b7/deltacat/storage/rivulet/writer}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/writer/memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/aws → deltacat-2.0.0b7/deltacat/storage/util}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/util/scan_planner.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/catalog → deltacat-2.0.0b7/deltacat/tests}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_cloudpickle_bug_fix.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_file_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_memcached_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_redis_object_store.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_s3_object_store.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute → deltacat-2.0.0b7/deltacat/tests/aws}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/aws/test_clients.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/aws/test_s3u.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/compactor → deltacat-2.0.0b7/deltacat/tests/catalog}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/catalog/test_catalogs.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/compactor/steps → deltacat-2.0.0b7/deltacat/tests/compute}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/compactor/utils → deltacat-2.0.0b7/deltacat/tests/compute/compactor}/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/compactor_v2 → deltacat-2.0.0b7/deltacat/tests/compute/compactor/steps}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/compactor_v2 → deltacat-2.0.0b7/deltacat/tests/compute/compactor}/utils/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/converter → deltacat-2.0.0b7/deltacat/tests/compute/compactor_v2}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/resource_estimation → deltacat-2.0.0b7/deltacat/tests/compute/compactor_v2/utils}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/conftest.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/compute/resource_estimation/data → deltacat-2.0.0b7/deltacat/tests/compute/converter}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/converter/conftest.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/converter/test_convert_session.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/converter/utils.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage → deltacat-2.0.0b7/deltacat/tests/compute/resource_estimation}/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage/main → deltacat-2.0.0b7/deltacat/tests/compute/resource_estimation/data}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_util_common.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_util_constant.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage/model → deltacat-2.0.0b7/deltacat/tests/storage}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/conftest.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage/rivulet → deltacat-2.0.0b7/deltacat/tests/storage/main}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/main/test_main_storage.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage/rivulet/fs → deltacat-2.0.0b7/deltacat/tests/storage/model}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_delete_parameters.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_metafile_io.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_schema.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_shard.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_table_version.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_transaction.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage/rivulet/schema → deltacat-2.0.0b7/deltacat/tests/storage/rivulet}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/conftest.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/storage/rivulet/writer → deltacat-2.0.0b7/deltacat/tests/storage/rivulet/fs}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/test_utils → deltacat-2.0.0b7/deltacat/tests/storage/rivulet/schema}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/schema/test_schema.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_dataset.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_manifest.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_sst_interval_tree.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_utils.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/utils → deltacat-2.0.0b7/deltacat/tests/storage/rivulet/writer}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_deltacat_api.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_exceptions.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_logs.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/utils/data → deltacat-2.0.0b7/deltacat/tests/test_utils}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/constants.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/filesystem.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/message_pack_utils.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/pyarrow.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/storage.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/utils.py +0 -0
- {deltacat-2.0.0b3/deltacat/tests/utils/ray_utils → deltacat-2.0.0b7/deltacat/tests/utils}/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/types → deltacat-2.0.0b7/deltacat/tests/utils/data}/__init__.py +0 -0
- {deltacat-2.0.0b3/deltacat/utils → deltacat-2.0.0b7/deltacat/tests/utils/ray_utils}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_cloudpickle.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_daft.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_metrics.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_placement.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_pyarrow.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_resources.py +0 -0
- {deltacat-2.0.0b3/deltacat/utils/ray_utils → deltacat-2.0.0b7/deltacat/types}/__init__.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/types/media.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/types/partial_download.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/types/tables.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/arguments.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/cloudpickle.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/common.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/export.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/filesystem.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/metafile_locator.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/metrics.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/numpy.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/pandas.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/performance.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/placement.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/pyarrow.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/collections.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/concurrency.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/dataset.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/performance.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/runtime.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/resources.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/s3fs.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/schema.py +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/dependency_links.txt +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/top_level.txt +0 -0
- {deltacat-2.0.0b3 → deltacat-2.0.0b7}/setup.cfg +0 -0
@@ -2,10 +2,13 @@ import logging
|
|
2
2
|
|
3
3
|
from typing import Any, Dict, List, Optional, Union
|
4
4
|
|
5
|
-
from daft import DataFrame
|
5
|
+
from daft import DataFrame, context
|
6
|
+
from daft.daft import ScanOperatorHandle, StorageConfig
|
7
|
+
from daft.logical.builder import LogicalPlanBuilder
|
6
8
|
|
7
9
|
from deltacat import logs
|
8
10
|
from deltacat.catalog.model.table_definition import TableDefinition
|
11
|
+
from deltacat.daft.daft_scan import DeltaCatScanOperator
|
9
12
|
from deltacat.exceptions import TableAlreadyExistsError
|
10
13
|
from deltacat.storage.iceberg.iceberg_scan_planner import IcebergScanPlanner
|
11
14
|
from deltacat.storage.iceberg.model import PartitionSchemeMapper, SchemaMapper
|
@@ -144,7 +147,17 @@ def read_table(
|
|
144
147
|
table: str, *args, namespace: Optional[str] = None, **kwargs
|
145
148
|
) -> DistributedDataset:
|
146
149
|
"""Read a table into a distributed dataset."""
|
147
|
-
|
150
|
+
# TODO: more proper IO configuration
|
151
|
+
io_config = context.get_context().daft_planning_config.default_io_config
|
152
|
+
multithreaded_io = context.get_context().get_or_create_runner().name != "ray"
|
153
|
+
|
154
|
+
storage_config = StorageConfig(multithreaded_io, io_config)
|
155
|
+
|
156
|
+
dc_table = get_table(name=table, namespace=namespace, **kwargs)
|
157
|
+
dc_scan_operator = DeltaCatScanOperator(dc_table, storage_config)
|
158
|
+
handle = ScanOperatorHandle.from_python_scan_operator(dc_scan_operator)
|
159
|
+
builder = LogicalPlanBuilder.from_tabular_scan(scan_operator=handle)
|
160
|
+
return DataFrame(builder)
|
148
161
|
|
149
162
|
|
150
163
|
def alter_table(
|
@@ -5,12 +5,11 @@ from typing import Iterator, List
|
|
5
5
|
from pyarrow.fs import FileSystem
|
6
6
|
|
7
7
|
from pyiceberg.io.pyarrow import (
|
8
|
-
|
8
|
+
data_file_statistics_from_parquet_metadata,
|
9
9
|
compute_statistics_plan,
|
10
10
|
parquet_path_to_id_mapping,
|
11
11
|
)
|
12
|
-
from pyiceberg.table import Table
|
13
|
-
from pyiceberg.table.snapshots import Operation
|
12
|
+
from pyiceberg.table import Table
|
14
13
|
from pyiceberg.manifest import DataFile, DataFileContent, FileFormat
|
15
14
|
from pyiceberg.types import StructType, NestedField, IntegerType
|
16
15
|
from pyiceberg.typedef import Record
|
@@ -24,11 +23,10 @@ def append(table: Table, paths: List[str]) -> None:
|
|
24
23
|
# raise ValueError("Cannot write to tables with a sort-order")
|
25
24
|
|
26
25
|
data_files = write_file(table, paths)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
merge.commit()
|
26
|
+
with table.transaction() as txn:
|
27
|
+
with txn.update_snapshot().fast_append() as snapshot_update:
|
28
|
+
for data_file in data_files:
|
29
|
+
snapshot_update.append_data_file(data_file)
|
32
30
|
|
33
31
|
|
34
32
|
def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
|
@@ -41,6 +39,11 @@ def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
|
|
41
39
|
fs_path = fs_tuple[1]
|
42
40
|
with fs.open_input_file(fs_path) as native_file:
|
43
41
|
parquet_metadata = pq.read_metadata(native_file)
|
42
|
+
statistics = data_file_statistics_from_parquet_metadata(
|
43
|
+
parquet_metadata=parquet_metadata,
|
44
|
+
stats_columns=compute_statistics_plan(table.schema(), table.properties),
|
45
|
+
parquet_column_mapping=parquet_path_to_id_mapping(table.schema()),
|
46
|
+
)
|
44
47
|
data_file = DataFile(
|
45
48
|
content=DataFileContent.DATA,
|
46
49
|
file_path=file_path,
|
@@ -63,12 +66,7 @@ def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
|
|
63
66
|
spec_id=table.spec().spec_id,
|
64
67
|
equality_ids=None,
|
65
68
|
key_metadata=None,
|
66
|
-
|
67
|
-
fill_parquet_file_metadata(
|
68
|
-
data_file=data_file,
|
69
|
-
parquet_metadata=parquet_metadata,
|
70
|
-
stats_columns=compute_statistics_plan(table.schema(), table.properties),
|
71
|
-
parquet_column_mapping=parquet_path_to_id_mapping(table.schema()),
|
69
|
+
**statistics.to_serialized_dict(),
|
72
70
|
)
|
73
71
|
data_files.append(data_file)
|
74
72
|
return data_files
|
@@ -709,7 +709,7 @@ def _get_deltas_from_partition_filter(
|
|
709
709
|
|
710
710
|
def _get_storage(**kwargs):
|
711
711
|
"""
|
712
|
-
Returns the implementation of `deltacat.storage.interface` to use with this catalog
|
712
|
+
Returns the implementation of `deltacat.storage.interface` to use with this catalog
|
713
713
|
|
714
714
|
This is configured in the `CatalogProperties` stored during initialization and passed through `delegate.py`
|
715
715
|
"""
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from typing import Iterator
|
2
|
+
|
3
|
+
from daft import Schema
|
4
|
+
from daft.daft import (
|
5
|
+
StorageConfig,
|
6
|
+
PartitionField,
|
7
|
+
Pushdowns,
|
8
|
+
ScanTask,
|
9
|
+
FileFormatConfig,
|
10
|
+
ParquetSourceConfig,
|
11
|
+
)
|
12
|
+
from daft.io.scan import ScanOperator
|
13
|
+
|
14
|
+
from deltacat.catalog.model.table_definition import TableDefinition
|
15
|
+
from deltacat.daft.model import DaftPartitionKeyMapper
|
16
|
+
|
17
|
+
|
18
|
+
class DeltaCatScanOperator(ScanOperator):
|
19
|
+
def __init__(self, table: TableDefinition, storage_config: StorageConfig) -> None:
|
20
|
+
super().__init__()
|
21
|
+
self.table = table
|
22
|
+
self._schema = self._infer_schema()
|
23
|
+
self.partition_keys = self._infer_partition_keys()
|
24
|
+
self.storage_config = storage_config
|
25
|
+
|
26
|
+
def schema(self) -> Schema:
|
27
|
+
return self._schema
|
28
|
+
|
29
|
+
def name(self) -> str:
|
30
|
+
return "DeltaCatScanOperator"
|
31
|
+
|
32
|
+
def display_name(self) -> str:
|
33
|
+
return f"DeltaCATScanOperator({self.table.table.namespace}.{self.table.table.table_name})"
|
34
|
+
|
35
|
+
def partitioning_keys(self) -> list[PartitionField]:
|
36
|
+
return self.partition_keys
|
37
|
+
|
38
|
+
def multiline_display(self) -> list[str]:
|
39
|
+
return [
|
40
|
+
self.display_name(),
|
41
|
+
f"Schema = {self._schema}",
|
42
|
+
f"Partitioning keys = {self.partitioning_keys}",
|
43
|
+
f"Storage config = {self.storage_config}",
|
44
|
+
]
|
45
|
+
|
46
|
+
def to_scan_tasks(self, pushdowns: Pushdowns) -> Iterator[ScanTask]:
|
47
|
+
# TODO: implement pushdown predicate on DeltaCAT
|
48
|
+
dc_scan_plan = self.table.create_scan_plan()
|
49
|
+
scan_tasks = []
|
50
|
+
file_format_config = FileFormatConfig.from_parquet_config(
|
51
|
+
# maybe this: ParquetSourceConfig(field_id_mapping=self._field_id_mapping)
|
52
|
+
ParquetSourceConfig()
|
53
|
+
)
|
54
|
+
for dc_scan_task in dc_scan_plan.scan_tasks:
|
55
|
+
for data_file in dc_scan_task.data_files():
|
56
|
+
st = ScanTask.catalog_scan_task(
|
57
|
+
file=data_file.file_path,
|
58
|
+
file_format=file_format_config,
|
59
|
+
schema=self._schema._schema,
|
60
|
+
storage_config=self.storage_config,
|
61
|
+
pushdowns=pushdowns,
|
62
|
+
)
|
63
|
+
scan_tasks.append(st)
|
64
|
+
return iter(scan_tasks)
|
65
|
+
|
66
|
+
def can_absorb_filter(self) -> bool:
|
67
|
+
return False
|
68
|
+
|
69
|
+
def can_absorb_limit(self) -> bool:
|
70
|
+
return False
|
71
|
+
|
72
|
+
def can_absorb_select(self) -> bool:
|
73
|
+
return True
|
74
|
+
|
75
|
+
def _infer_schema(self) -> Schema:
|
76
|
+
|
77
|
+
if not (
|
78
|
+
self.table and self.table.table_version and self.table.table_version.schema
|
79
|
+
):
|
80
|
+
raise RuntimeError(
|
81
|
+
f"Failed to infer schema for DeltaCAT Table "
|
82
|
+
f"{self.table.table.namespace}.{self.table.table.table_name}"
|
83
|
+
)
|
84
|
+
|
85
|
+
return Schema.from_pyarrow_schema(self.table.table_version.schema.arrow)
|
86
|
+
|
87
|
+
def _infer_partition_keys(self) -> list[PartitionField]:
|
88
|
+
if not (
|
89
|
+
self.table
|
90
|
+
and self.table.table_version
|
91
|
+
and self.table.table_version.partition_scheme
|
92
|
+
and self.table.table_version.schema
|
93
|
+
):
|
94
|
+
raise RuntimeError(
|
95
|
+
f"Failed to infer partition keys for DeltaCAT Table "
|
96
|
+
f"{self.table.table.namespace}.{self.table.table.table_name}"
|
97
|
+
)
|
98
|
+
|
99
|
+
schema = self.table.table_version.schema
|
100
|
+
partition_keys = self.table.table_version.partition_scheme.keys
|
101
|
+
if not partition_keys:
|
102
|
+
return []
|
103
|
+
|
104
|
+
partition_fields = []
|
105
|
+
for key in partition_keys:
|
106
|
+
field = DaftPartitionKeyMapper.unmap(key, schema)
|
107
|
+
# Assert that the returned value is not None.
|
108
|
+
assert field is not None, f"Unmapping failed for key {key}"
|
109
|
+
partition_fields.append(field)
|
110
|
+
|
111
|
+
return partition_fields
|
@@ -0,0 +1,258 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
from pyarrow import Field as PaField
|
5
|
+
from daft import Schema as DaftSchema, DataType
|
6
|
+
from daft.daft import (
|
7
|
+
PartitionField as DaftPartitionField,
|
8
|
+
PartitionTransform as DaftTransform,
|
9
|
+
)
|
10
|
+
from daft.logical.schema import Field as DaftField
|
11
|
+
from daft.io.scan import make_partition_field
|
12
|
+
|
13
|
+
from deltacat.storage.model.schema import Schema
|
14
|
+
from deltacat.storage.model.interop import ModelMapper
|
15
|
+
from deltacat.storage.model.partition import PartitionKey
|
16
|
+
from deltacat.storage.model.transform import (
|
17
|
+
BucketingStrategy,
|
18
|
+
Transform,
|
19
|
+
BucketTransform,
|
20
|
+
HourTransform,
|
21
|
+
DayTransform,
|
22
|
+
MonthTransform,
|
23
|
+
YearTransform,
|
24
|
+
IdentityTransform,
|
25
|
+
TruncateTransform,
|
26
|
+
)
|
27
|
+
|
28
|
+
|
29
|
+
class DaftFieldMapper(ModelMapper[DaftField, PaField]):
|
30
|
+
@staticmethod
|
31
|
+
def map(
|
32
|
+
obj: Optional[DaftField],
|
33
|
+
**kwargs,
|
34
|
+
) -> Optional[PaField]:
|
35
|
+
"""Convert Daft Field to PyArrow Field.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
obj: The Daft Field to convert
|
39
|
+
**kwargs: Additional arguments
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
Converted PyArrow Field object
|
43
|
+
"""
|
44
|
+
if obj is None:
|
45
|
+
return None
|
46
|
+
|
47
|
+
return pa.field(
|
48
|
+
name=obj.name,
|
49
|
+
type=obj.dtype.to_arrow_dtype(),
|
50
|
+
)
|
51
|
+
|
52
|
+
@staticmethod
|
53
|
+
def unmap(
|
54
|
+
obj: Optional[PaField],
|
55
|
+
**kwargs,
|
56
|
+
) -> Optional[DaftField]:
|
57
|
+
"""Convert PyArrow Field to Daft Field.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
obj: The PyArrow Field to convert
|
61
|
+
**kwargs: Additional arguments
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
Converted Daft Field object
|
65
|
+
"""
|
66
|
+
if obj is None:
|
67
|
+
return None
|
68
|
+
|
69
|
+
return DaftField.create(
|
70
|
+
name=obj.name,
|
71
|
+
dtype=DataType.from_arrow_type(obj.type), # type: ignore
|
72
|
+
)
|
73
|
+
|
74
|
+
|
75
|
+
class DaftTransformMapper(ModelMapper[DaftTransform, Transform]):
|
76
|
+
@staticmethod
|
77
|
+
def map(
|
78
|
+
obj: Optional[DaftTransform],
|
79
|
+
**kwargs,
|
80
|
+
) -> Optional[Transform]:
|
81
|
+
"""Convert DaftTransform to DeltaCAT Transform.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
obj: The DaftTransform to convert
|
85
|
+
**kwargs: Additional arguments
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
Converted Transform object
|
89
|
+
"""
|
90
|
+
|
91
|
+
# daft.PartitionTransform doesn't have a Python interface for accessing its attributes,
|
92
|
+
# thus conversion is not possible.
|
93
|
+
# TODO: request Daft to expose Python friendly interface for daft.PartitionTransform
|
94
|
+
raise NotImplementedError(
|
95
|
+
"Converting transform from Daft to DeltaCAT is not supported"
|
96
|
+
)
|
97
|
+
|
98
|
+
@staticmethod
|
99
|
+
def unmap(
|
100
|
+
obj: Optional[Transform],
|
101
|
+
**kwargs,
|
102
|
+
) -> Optional[DaftTransform]:
|
103
|
+
"""Convert DeltaCAT Transform to DaftTransform.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
obj: The Transform to convert
|
107
|
+
**kwargs: Additional arguments
|
108
|
+
|
109
|
+
Returns:
|
110
|
+
Converted DaftTransform object
|
111
|
+
"""
|
112
|
+
if obj is None:
|
113
|
+
return None
|
114
|
+
|
115
|
+
# Map DeltaCAT transforms to Daft transforms using isinstance
|
116
|
+
|
117
|
+
if isinstance(obj, IdentityTransform):
|
118
|
+
return DaftTransform.identity()
|
119
|
+
elif isinstance(obj, HourTransform):
|
120
|
+
return DaftTransform.hour()
|
121
|
+
elif isinstance(obj, DayTransform):
|
122
|
+
return DaftTransform.day()
|
123
|
+
elif isinstance(obj, MonthTransform):
|
124
|
+
return DaftTransform.month()
|
125
|
+
elif isinstance(obj, YearTransform):
|
126
|
+
return DaftTransform.year()
|
127
|
+
elif isinstance(obj, BucketTransform):
|
128
|
+
if obj.parameters.bucketing_strategy == BucketingStrategy.ICEBERG:
|
129
|
+
return DaftTransform.iceberg_bucket(obj.parameters.num_buckets)
|
130
|
+
else:
|
131
|
+
raise ValueError(
|
132
|
+
f"Unsupported Bucketing Strategy: {obj.parameters.bucketing_strategy}"
|
133
|
+
)
|
134
|
+
elif isinstance(obj, TruncateTransform):
|
135
|
+
return DaftTransform.iceberg_truncate(obj.parameters.width)
|
136
|
+
|
137
|
+
raise ValueError(f"Unsupported Transform: {obj}")
|
138
|
+
|
139
|
+
|
140
|
+
class DaftPartitionKeyMapper(ModelMapper[DaftPartitionField, PartitionKey]):
|
141
|
+
@staticmethod
|
142
|
+
def map(
|
143
|
+
obj: Optional[DaftPartitionField],
|
144
|
+
schema: Optional[DaftSchema] = None,
|
145
|
+
**kwargs,
|
146
|
+
) -> Optional[PartitionKey]:
|
147
|
+
"""Convert DaftPartitionField to PartitionKey.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
obj: The DaftPartitionField to convert
|
151
|
+
schema: The Daft schema containing field information
|
152
|
+
**kwargs: Additional arguments
|
153
|
+
|
154
|
+
Returns:
|
155
|
+
Converted PartitionKey object
|
156
|
+
"""
|
157
|
+
# Daft PartitionField only exposes 1 attribute `field` which is not enough
|
158
|
+
# to convert to DeltaCAT PartitionKey
|
159
|
+
# TODO: request Daft to expose more Python friendly interface for PartitionField
|
160
|
+
raise NotImplementedError(
|
161
|
+
f"Converting Daft PartitionField to DeltaCAT PartitionKey is not supported"
|
162
|
+
)
|
163
|
+
|
164
|
+
@staticmethod
|
165
|
+
def unmap(
|
166
|
+
obj: Optional[PartitionKey],
|
167
|
+
schema: Optional[Schema] = None,
|
168
|
+
**kwargs,
|
169
|
+
) -> Optional[DaftPartitionField]:
|
170
|
+
"""Convert PartitionKey to DaftPartitionField.
|
171
|
+
|
172
|
+
Args:
|
173
|
+
obj: The DeltaCAT PartitionKey to convert
|
174
|
+
schema: The Schema containing field information
|
175
|
+
**kwargs: Additional arguments
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
Converted DaftPartitionField object
|
179
|
+
"""
|
180
|
+
if obj is None:
|
181
|
+
return None
|
182
|
+
if obj.name is None:
|
183
|
+
raise ValueError("Name is required for PartitionKey conversion")
|
184
|
+
if not schema:
|
185
|
+
raise ValueError("Schema is required for PartitionKey conversion")
|
186
|
+
if len(obj.key) < 1:
|
187
|
+
raise ValueError(
|
188
|
+
f"At least 1 PartitionKey FieldLocator is expected, instead got {len(obj.key)}. FieldLocators: {obj.key}."
|
189
|
+
)
|
190
|
+
|
191
|
+
# Get the source field from schema - FieldLocator in PartitionKey.key points to the source field of partition field
|
192
|
+
dc_source_field = schema.field(obj.key[0]).arrow
|
193
|
+
daft_source_field = DaftFieldMapper.unmap(obj=dc_source_field)
|
194
|
+
# Convert transform if present
|
195
|
+
daft_transform = DaftTransformMapper.unmap(obj.transform)
|
196
|
+
daft_partition_field = DaftPartitionKeyMapper.get_daft_partition_field(
|
197
|
+
partition_field_name=obj.name,
|
198
|
+
daft_source_field=daft_source_field,
|
199
|
+
dc_transform=obj.transform,
|
200
|
+
)
|
201
|
+
|
202
|
+
# Create DaftPartitionField
|
203
|
+
return make_partition_field(
|
204
|
+
field=daft_partition_field,
|
205
|
+
source_field=daft_source_field,
|
206
|
+
transform=daft_transform,
|
207
|
+
)
|
208
|
+
|
209
|
+
@staticmethod
|
210
|
+
def get_daft_partition_field(
|
211
|
+
partition_field_name: str,
|
212
|
+
daft_source_field: Optional[DaftField],
|
213
|
+
# TODO: replace DeltaCAT transform with Daft Transform for uniformality
|
214
|
+
# We cannot use Daft Transform here because Daft Transform doesn't have a Python interface for us to
|
215
|
+
# access its attributes.
|
216
|
+
# TODO: request Daft to provide a more python friendly interface for Daft Tranform
|
217
|
+
dc_transform: Optional[Transform],
|
218
|
+
) -> DaftField:
|
219
|
+
"""Generate Daft Partition Field given partition field name, source field and transform.
|
220
|
+
Partition field type is inferred using source field type and transform.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
partition_field_name (str): the specified result field name
|
224
|
+
daft_source_field (DaftField): the source field of the partition field
|
225
|
+
daft_transform (DaftTransform): transform applied on the source field to create partition field
|
226
|
+
|
227
|
+
Returns:
|
228
|
+
DaftField: Daft Field representing the partition field
|
229
|
+
"""
|
230
|
+
if daft_source_field is None:
|
231
|
+
raise ValueError("Source field is required for PartitionField conversion")
|
232
|
+
if dc_transform is None:
|
233
|
+
raise ValueError("Transform is required for PartitionField conversion")
|
234
|
+
|
235
|
+
result_type = None
|
236
|
+
# Below type conversion logic references Daft - Iceberg conversion logic:
|
237
|
+
# https://github.com/Eventual-Inc/Daft/blob/7f2e9b5fb50fdfe858be17572f132b37dd6e5ab2/daft/iceberg/iceberg_scan.py#L61-L85
|
238
|
+
if isinstance(dc_transform, IdentityTransform):
|
239
|
+
result_type = daft_source_field.dtype
|
240
|
+
elif isinstance(dc_transform, YearTransform):
|
241
|
+
result_type = DataType.int32()
|
242
|
+
elif isinstance(dc_transform, MonthTransform):
|
243
|
+
result_type = DataType.int32()
|
244
|
+
elif isinstance(dc_transform, DayTransform):
|
245
|
+
result_type = DataType.int32()
|
246
|
+
elif isinstance(dc_transform, HourTransform):
|
247
|
+
result_type = DataType.int32()
|
248
|
+
elif isinstance(dc_transform, BucketTransform):
|
249
|
+
result_type = DataType.int32()
|
250
|
+
elif isinstance(dc_transform, TruncateTransform):
|
251
|
+
result_type = daft_source_field.dtype
|
252
|
+
else:
|
253
|
+
raise ValueError(f"Unsupported transform: {dc_transform}")
|
254
|
+
|
255
|
+
return DaftField.create(
|
256
|
+
name=partition_field_name,
|
257
|
+
dtype=result_type,
|
258
|
+
)
|
@@ -1,11 +1,15 @@
|
|
1
1
|
import os
|
2
2
|
import logging
|
3
3
|
|
4
|
+
import uuid
|
4
5
|
import daft
|
6
|
+
from pyiceberg.catalog import CatalogType
|
7
|
+
|
5
8
|
import deltacat as dc
|
6
9
|
|
7
10
|
from deltacat import logs
|
8
11
|
from deltacat import IcebergCatalog
|
12
|
+
from deltacat.catalog.iceberg import IcebergCatalogConfig
|
9
13
|
from deltacat.examples.common.fixtures import (
|
10
14
|
store_cli_args_in_os_environ,
|
11
15
|
)
|
@@ -30,6 +34,24 @@ driver_logger = logs.configure_application_logger(logging.getLogger(__name__))
|
|
30
34
|
|
31
35
|
|
32
36
|
def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
|
37
|
+
"""
|
38
|
+
This is an e2e example that
|
39
|
+
1. creates a DeltaCAT Table (backed by an Iceberg Table) in Glue
|
40
|
+
2. writes data into the DeltaCAT Table
|
41
|
+
3. reads data from the DeltaCAT Table using Daft
|
42
|
+
|
43
|
+
To run the script:
|
44
|
+
1. prepare an AWS Account
|
45
|
+
1. prepare a S3 location where the data will be written to, which will be used in Step 3.
|
46
|
+
2. prepare an IAM Role that has access to the S3 location and Glue
|
47
|
+
2. retrieve the IAM Role AWS Credential and cache locally in ~/.aws/credentials
|
48
|
+
3. run below command to execute the example
|
49
|
+
```
|
50
|
+
make venv && source venv/bin/activate
|
51
|
+
python -m deltacat.examples.iceberg.iceberg_bucket_writer --warehouse=s3://<YOUR_S3_LOCATION>
|
52
|
+
```
|
53
|
+
|
54
|
+
"""
|
33
55
|
# create any runtime environment required to run the example
|
34
56
|
runtime_env = create_ray_runtime_environment()
|
35
57
|
|
@@ -38,6 +60,7 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
|
|
38
60
|
# Only the `iceberg` data catalog is provided so it will become the default.
|
39
61
|
# If initializing multiple catalogs, use the `default_catalog_name` param
|
40
62
|
# to specify which catalog should be the default.
|
63
|
+
|
41
64
|
dc.init(
|
42
65
|
catalogs={
|
43
66
|
# the name of the DeltaCAT catalog is "iceberg"
|
@@ -49,11 +72,13 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
|
|
49
72
|
name="example-iceberg-catalog",
|
50
73
|
# for additional properties see:
|
51
74
|
# https://py.iceberg.apache.org/configuration/
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
75
|
+
config=IcebergCatalogConfig(
|
76
|
+
type=CatalogType.GLUE,
|
77
|
+
properties={
|
78
|
+
"warehouse": warehouse,
|
79
|
+
"region_name": "us-east-1",
|
80
|
+
},
|
81
|
+
),
|
57
82
|
)
|
58
83
|
},
|
59
84
|
# pass the runtime environment into ray.init()
|
@@ -89,10 +114,10 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
|
|
89
114
|
}
|
90
115
|
)
|
91
116
|
|
92
|
-
# write to a table named `test_namespace.test_table_bucketed
|
117
|
+
# write to a table named `test_namespace.test_table_bucketed-<SUFFIX>`
|
93
118
|
# we don't need to specify which catalog to create this table in since
|
94
119
|
# only the "iceberg" catalog is available
|
95
|
-
table_name = "test_table_bucketed"
|
120
|
+
table_name = f"test_table_bucketed-{uuid.uuid4().hex[:8]}"
|
96
121
|
namespace = "test_namespace"
|
97
122
|
print(f"Creating Glue Table: {namespace}.{table_name}")
|
98
123
|
dc.write_to_table(
|
@@ -106,9 +131,40 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
|
|
106
131
|
)
|
107
132
|
|
108
133
|
print(f"Getting Glue Table: {namespace}.{table_name}")
|
109
|
-
table_definition = dc.get_table(table_name, namespace)
|
134
|
+
table_definition = dc.get_table(name=table_name, namespace=namespace)
|
110
135
|
print(f"Retrieved Glue Table: {table_definition}")
|
111
136
|
|
137
|
+
# Read Data from DeltaCAT Table (backed by Iceberg) using Daft
|
138
|
+
daft_dataframe = dc.read_table(table=table_name, namespace=namespace)
|
139
|
+
|
140
|
+
daft_dataframe.where(df["bid"] > 200.0).show()
|
141
|
+
# Expected result:
|
142
|
+
# ╭────────┬─────────┬─────────╮
|
143
|
+
# │ symbol ┆ bid ┆ ask │
|
144
|
+
# │ --- ┆ --- ┆ --- │
|
145
|
+
# │ Utf8 ┆ Float64 ┆ Float64 │
|
146
|
+
# ╞════════╪═════════╪═════════╡
|
147
|
+
# │ meta ┆ 392.03 ┆ 392.09 │
|
148
|
+
# ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
149
|
+
# │ msft ┆ 403.25 ┆ 403.27 │
|
150
|
+
# ╰────────┴─────────┴─────────╯
|
151
|
+
|
152
|
+
daft_dataframe.select("symbol").show()
|
153
|
+
# Expected result:
|
154
|
+
# ╭────────╮
|
155
|
+
# │ symbol │
|
156
|
+
# │ --- │
|
157
|
+
# │ Utf8 │
|
158
|
+
# ╞════════╡
|
159
|
+
# │ meta │
|
160
|
+
# ├╌╌╌╌╌╌╌╌┤
|
161
|
+
# │ amzn │
|
162
|
+
# ├╌╌╌╌╌╌╌╌┤
|
163
|
+
# │ goog │
|
164
|
+
# ├╌╌╌╌╌╌╌╌┤
|
165
|
+
# │ msft │
|
166
|
+
# ╰────────╯
|
167
|
+
|
112
168
|
|
113
169
|
if __name__ == "__main__":
|
114
170
|
example_script_args = [
|
@@ -121,15 +177,6 @@ if __name__ == "__main__":
|
|
121
177
|
"type": str,
|
122
178
|
},
|
123
179
|
),
|
124
|
-
(
|
125
|
-
[
|
126
|
-
"--STAGE",
|
127
|
-
],
|
128
|
-
{
|
129
|
-
"help": "Example runtime environment stage (e.g. dev, alpha, beta, prod).",
|
130
|
-
"type": str,
|
131
|
-
},
|
132
|
-
),
|
133
180
|
]
|
134
181
|
|
135
182
|
# store any CLI args in the runtime environment
|