deltacat 2.0.0b12__tar.gz → 2.0.0.post2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deltacat-2.0.0b12/deltacat.egg-info → deltacat-2.0.0.post2}/PKG-INFO +172 -58
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/README.md +170 -56
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/__init__.py +1 -1
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/api.py +44 -7
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/main/impl.py +34 -110
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/hello_world.py +10 -4
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/indexer/indexer.py +3 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/indexer/job_runner.py +6 -1
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/schema.py +17 -4
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/aws/test_s3u.py +9 -1
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/test_default_catalog_impl.py +198 -7
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/types/media.py +282 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/types/tables.py +5 -11
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/pandas.py +11 -3
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/polars.py +3 -1
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/pyarrow.py +7 -3
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/url.py +22 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2/deltacat.egg-info}/PKG-INFO +172 -58
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/setup.py +1 -1
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/LICENSE +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/MANIFEST.in +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/annotations.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/aws/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/aws/clients.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/aws/constants.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/aws/s3u.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/benchmark_engine.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/benchmark_report.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/benchmark_suite.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/conftest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/data/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/data/random_row_generator.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/data/row_generator.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/benchmarking/test_benchmark_pipeline.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/delegate.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/interface.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/model/catalog.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/model/properties.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/catalog/model/table_definition.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/compaction_session.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/compactor_version.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/materialize_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/repartition_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/model/table_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/repartition_session.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/steps/dedupe.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/steps/materialize.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/steps/repartition.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/utils/io.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/utils/round_completion_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/utils/sort_key.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor/utils/system_columns.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/constants.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/private/compaction_utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/io.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/constants.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/converter_session.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/model/convert_input.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/model/convert_input_files.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/model/convert_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/model/converter_session_params.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/pyiceberg/catalog.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/pyiceberg/overrides.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/steps/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/steps/convert.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/steps/dedupe.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/utils/convert_task_options.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/utils/converter_session_utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/utils/iceberg_columns.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/utils/io.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/converter/utils/s3u.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/janitor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/jobs/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/jobs/client.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/resource_estimation/delta.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/resource_estimation/manifest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/resource_estimation/model.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/resource_estimation/parquet.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/models/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/models/delta_stats.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/models/stats_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/compute/stats/types.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/constants.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/docs/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/docs/autogen/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/docs/autogen/schema/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/docs/autogen/schema/inference/generate_type_mappings.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/env.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/basic_logging.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/aws/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/bootstrap.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/compactor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/explorer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/gcp/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/job_runner.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/compactor/utils/common.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/app.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/main.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/experimental/iceberg/iceberg_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/indexer/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/indexer/aws/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/examples/indexer/gcp/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/exceptions.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/catalog/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/catalog/iceberg/impl.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/catalog/iceberg/overrides.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/compatibility/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/converter_agent/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/converter_agent/beam/managed.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/converter_agent/table_monitor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/daft/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/daft/daft_catalog.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/iceberg/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/iceberg/impl.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/iceberg/model.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/iceberg/visitor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/arrow/serializer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/dataset.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/dataset_executor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/feather/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/feather/file_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/feather/serializer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/fs/file_provider.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/fs/file_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/fs/input_file.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/fs/output_file.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/logical_plan.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/metastore/delta.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/metastore/json_sst.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/metastore/sst.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/mvp/Table.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/mvp/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/parquet/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/parquet/file_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/parquet/serializer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/block_scanner.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/data_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/data_scan.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/dataset_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/schema/datatype.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/schema/schema.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/serializer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/serializer_factory.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/shard/range_shard.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/dataset/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/dataset/deltacat_dataset.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/datasink/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/datasink/deltacat_datasink.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/datasource/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/datasource/deltacat_datasource.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/file_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/memcached_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/reader/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/reader/deltacat_read_api.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/redis_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/io/s3_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/logs.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/interface.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/main/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/main/impl.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/delta.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/expression/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/expression/expression.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/expression/visitor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/interop.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/list_result.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/locator.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/manifest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/metafile.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/namespace.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/partition.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/scan/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/scan/push_down.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/scan/scan_plan.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/scan/scan_task.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/shard.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/sort_key.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/stream.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/table.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/table_version.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/transaction.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/transform.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/model/types.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/util/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/storage/util/scan_planner.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/reader/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/test_cloudpickle_bug_fix.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/test_file_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/test_memcached_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/test_ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/test_redis_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/_io/test_s3_object_store.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/aws/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/aws/test_clients.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/data/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/model/test_table_definition.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/catalog/test_catalogs.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/conftest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/converter/conftest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/converter/test_convert_session.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/converter/utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_janitor.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_util_common.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/compute/test_util_constant.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/conftest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/daft/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/daft/test_model.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/catalog/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/compatibility/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/daft/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/conftest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/test_dataset.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/test_manifest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/test_utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/main/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/main/test_main_storage.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_delete_parameters.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_expression.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_manifest.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_metafile_io.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_partition_scheme.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_schema.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_schema_update.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_shard.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_sort_scheme.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_table_version.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_transaction.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/storage/model/test_transaction_history.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_deltacat_api.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_exceptions.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_logs.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/constants.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/filesystem.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/message_pack_utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/pyarrow.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/storage.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/test_utils/utils.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/types/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/types/test_tables.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/data/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/exceptions.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/main_deltacat_storage_mock.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_cloudpickle.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_daft.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_metrics.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_numpy.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_pandas.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_placement.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_polars.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_pyarrow.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/tests/utils/test_resources.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/types/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/types/partial_download.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/arguments.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/cloudpickle.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/common.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/daft.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/export.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/filesystem.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/metafile_locator.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/metrics.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/numpy.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/performance.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/placement.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/ray_utils/collections.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/ray_utils/concurrency.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/ray_utils/dataset.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/ray_utils/performance.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/ray_utils/runtime.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/reader_compatibility_mapping.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/resources.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat/utils/schema.py +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat.egg-info/SOURCES.txt +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat.egg-info/dependency_links.txt +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat.egg-info/requires.txt +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/deltacat.egg-info/top_level.txt +0 -0
- {deltacat-2.0.0b12 → deltacat-2.0.0.post2}/setup.cfg +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deltacat
|
3
|
-
Version: 2.0.
|
4
|
-
Summary:
|
3
|
+
Version: 2.0.0.post2
|
4
|
+
Summary: DeltaCAT is a portable Pythonic Data Lakehouse powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
7
7
|
Classifier: Development Status :: 4 - Beta
|
@@ -50,28 +50,30 @@ Dynamic: requires-python
|
|
50
50
|
Dynamic: summary
|
51
51
|
|
52
52
|
<p align="center">
|
53
|
-
<img src="media/deltacat-logo-alpha-750.png" alt="deltacat logo" style="width:55%; height:auto; text-align: center;">
|
53
|
+
<img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-logo-alpha-750.png" alt="deltacat logo" style="width:55%; height:auto; text-align: center;">
|
54
54
|
</p>
|
55
55
|
|
56
56
|
DeltaCAT is a portable Pythonic Data Lakehouse powered by [Ray](https://github.com/ray-project/ray). It lets you define and manage
|
57
57
|
fast, scalable, ACID-compliant multimodal data lakes, and has been used to [successfully manage exabyte-scale enterprise
|
58
58
|
data lakes](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
|
59
59
|
|
60
|
+
It provides data lake level transactions & time travel, fast schema evolution for feature enrichment, zero-copy multimodal file processing, schemaless dataset management, and transparent dataset optimization. It runs locally for rapid development or in the cloud for production workloads.
|
61
|
+
|
60
62
|
It uses the Ray distributed compute framework together with [Apache Arrow](https://github.com/apache/arrow) and
|
61
63
|
[Daft](https://github.com/Eventual-Inc/Daft) to efficiently scale common table management tasks, like petabyte-scale
|
62
64
|
merge-on-read and copy-on-write operations.
|
63
65
|
|
64
66
|
DeltaCAT provides the following high-level components:
|
65
|
-
1. [**Catalog**](deltacat/catalog/interface.py): High-level APIs to create, discover, organize, share, and manage datasets.
|
66
|
-
2. [**Compute**](deltacat/compute/): Distributed data management procedures to read, write, and optimize datasets.
|
67
|
-
3. [**Storage**](deltacat/storage/): In-memory and on-disk multimodal dataset formats.
|
67
|
+
1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py): High-level APIs to create, discover, organize, share, and manage datasets.
|
68
|
+
2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures to read, write, and optimize datasets.
|
69
|
+
3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/): In-memory and on-disk multimodal dataset formats.
|
68
70
|
4. **Sync** (in development): Synchronize DeltaCAT datasets to data warehouses and other table formats.
|
69
71
|
|
70
72
|
## Overview
|
71
73
|
DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by Ray Data, Daft, Pandas, Polars, PyArrow, or NumPy.
|
72
74
|
|
73
75
|
<p align="center">
|
74
|
-
<img src="media/deltacat-tech-overview.png" alt="deltacat tech overview" style="width:100%; height:auto; text-align: center;">
|
76
|
+
<img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-tech-overview.png" alt="deltacat tech overview" style="width:100%; height:auto; text-align: center;">
|
75
77
|
</p>
|
76
78
|
|
77
79
|
Data consumers that prefer to stay within the ecosystem of Pythonic data management tools can use DeltaCAT's native table format to manage their data with minimal overhead. For integration with other data analytics frameworks (e.g., Apache Spark, Trino, Apache Flink), DeltaCAT's **Sync** component offers zero-copy synchronization of your tables to Apache Iceberg and other table formats.
|
@@ -79,10 +81,14 @@ Data consumers that prefer to stay within the ecosystem of Pythonic data managem
|
|
79
81
|
## Getting Started
|
80
82
|
DeltaCAT applications run anywhere that Ray runs, including your local laptop, cloud computing cluster, or on-premise cluster.
|
81
83
|
|
82
|
-
DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of
|
84
|
+
DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake containing a set of **Tables**. It provides a root location (e.g., a local file path or S3 Bucket) to store table information, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
|
83
85
|
|
84
86
|
### Quick Start
|
85
87
|
|
88
|
+
Install DeltaCAT with: `pip install deltacat`
|
89
|
+
|
90
|
+
Then run this script to create and read your first table:
|
91
|
+
|
86
92
|
```python
|
87
93
|
import deltacat as dc
|
88
94
|
import pandas as pd
|
@@ -109,7 +115,7 @@ daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
|
109
115
|
daft_df.show() # Materialize and print the DataFrame
|
110
116
|
|
111
117
|
# Append more data and add a new column.
|
112
|
-
# Compaction and schema evolution are handled automatically.
|
118
|
+
# Compaction and zero-copy schema evolution are handled automatically.
|
113
119
|
data = pd.DataFrame({
|
114
120
|
"id": [4, 5, 6],
|
115
121
|
"name": ["Tom", "Simpkin", "Delta"],
|
@@ -129,7 +135,7 @@ DeltaCAT can do much more than just append data to tables and read it back again
|
|
129
135
|
|
130
136
|
<details>
|
131
137
|
|
132
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
138
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Idempotent Writes</span></summary>
|
133
139
|
|
134
140
|
If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **appends** table data by default. One way to prevent this perpetual table growth and make the example idempotent is to use the **REPLACE** write mode if the table already exists:
|
135
141
|
|
@@ -239,7 +245,7 @@ assert dc.dataset_length(daft_df) == 6
|
|
239
245
|
|
240
246
|
<details>
|
241
247
|
|
242
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
248
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Multi-Format Data Processing</span></summary>
|
243
249
|
|
244
250
|
DeltaCAT natively supports a variety of open dataset and file formats already integrated with Ray and Arrow. You can use `dc.read` to read tables back as a Daft DataFrame, Ray Dataset, Pandas DataFrame, PyArrow Table, Polars DataFrame, NumPy Array, or list of PyArrow ParquetFile objects:
|
245
251
|
|
@@ -329,7 +335,7 @@ print("\n=== NumPy Table ===")
|
|
329
335
|
dc.read("my_numpy_table").show()
|
330
336
|
```
|
331
337
|
|
332
|
-
|
338
|
+
DeltaCAT tables also support persisting data in heterogeneous table file formats like Avro, ORC, or Feather:
|
333
339
|
|
334
340
|
```python
|
335
341
|
data = pd.DataFrame({"id": [1], "name": ["Cheshire"], "age": [3]})
|
@@ -372,9 +378,9 @@ print(pandas_df)
|
|
372
378
|
|
373
379
|
<details>
|
374
380
|
|
375
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
381
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Live Feature Enrichment</span></summary>
|
376
382
|
|
377
|
-
DeltaCAT can
|
383
|
+
DeltaCAT can update your datasets on-the-fly to keep up with a continuous stream of new insights, and support common ML use-cases like feature enrichment. Just define a table schema with one or more merge keys to start updating and deleting existing records:
|
378
384
|
|
379
385
|
```python
|
380
386
|
import deltacat as dc
|
@@ -385,53 +391,50 @@ import tempfile
|
|
385
391
|
# Initialize DeltaCAT with a fresh temporary catalog
|
386
392
|
dc.init_local(tempfile.mkdtemp())
|
387
393
|
|
388
|
-
#
|
389
|
-
|
394
|
+
# Start with minimal schema - just user_id as merge key and name
|
395
|
+
initial_schema = dc.Schema.of([
|
390
396
|
dc.Field.of(pa.field("user_id", pa.int64()), is_merge_key=True),
|
391
397
|
dc.Field.of(pa.field("name", pa.string())),
|
392
|
-
dc.Field.of(pa.field("age", pa.int32())),
|
393
|
-
dc.Field.of(pa.field("status", pa.string())),
|
394
398
|
])
|
395
399
|
|
396
|
-
# Initial user data
|
400
|
+
# Initial user data - just basic info
|
397
401
|
initial_users = pd.DataFrame({
|
398
402
|
"user_id": [1, 2, 3],
|
399
|
-
"name": ["
|
400
|
-
"age": [3, 7, 2],
|
401
|
-
"status": ["active", "active", "inactive"]
|
403
|
+
"name": ["Jim", "Dinah", "Bob"],
|
402
404
|
})
|
403
405
|
|
404
|
-
# Write initial data with
|
405
|
-
dc.write(initial_users, "users", schema=
|
406
|
+
# Write initial data with minimal schema
|
407
|
+
dc.write(initial_users, "users", schema=initial_schema)
|
406
408
|
|
407
|
-
# Read the data back as a Pandas DataFrame
|
409
|
+
# Read the data back as a Pandas DataFrame
|
408
410
|
df = dc.read("users", read_as=dc.DatasetType.PANDAS)
|
409
|
-
print("=== Initial Users ===")
|
411
|
+
print("=== Initial Users (Basic Info) ===")
|
410
412
|
print(df.sort_values("user_id"))
|
411
413
|
|
412
|
-
#
|
413
|
-
|
414
|
-
"user_id": [
|
415
|
-
"name": ["
|
416
|
-
"age": [
|
417
|
-
"
|
414
|
+
# Later, enrich with new insights: add age/job features + new users
|
415
|
+
enriched_data = pd.DataFrame({
|
416
|
+
"user_id": [1, 3, 4, 5, 6],
|
417
|
+
"name": ["Cheshire", "Felix", "Tom", "Simpkin", "Delta"],
|
418
|
+
"age": [3, 2, 5, 12, 4],
|
419
|
+
"job": ["Tour Guide", "Drifter", "Housekeeper", "Mouser", "Engineer"]
|
418
420
|
})
|
419
421
|
|
420
|
-
#
|
421
|
-
# 1.
|
422
|
-
# 2.
|
423
|
-
|
422
|
+
# DeltaCAT automatically evolves the schema and merges by user_id:
|
423
|
+
# 1. Enriches existing users (Jim -> Cheshire age=3, job="Tour Guide"; Bob -> Felix)
|
424
|
+
# 2. Adds new age/job columns with automatic schema evolution
|
425
|
+
# 3. Inserts new users (Tom, Simpkin, Delta) with full feature set
|
426
|
+
dc.write(enriched_data, "users")
|
424
427
|
|
425
|
-
# Read back to see
|
428
|
+
# Read back to see live feature enrichment results
|
426
429
|
df = dc.read("users", read_as=dc.DatasetType.PANDAS)
|
427
|
-
print("\n===
|
430
|
+
print("\n=== Enriched Users (Age & Job) ===")
|
428
431
|
print(df.sort_values("user_id"))
|
429
432
|
|
430
|
-
# - Cheshire (user_id=1)
|
431
|
-
# - Dinah (user_id=2)
|
432
|
-
# - Felix (user_id=3) updated
|
433
|
-
# - New users (4,5,6)
|
434
|
-
# -
|
433
|
+
# - Cheshire (user_id=1) name updated from Jim, gets age=3, job="Tour Guide"
|
434
|
+
# - Dinah (user_id=2) keeps original name, gets null age/job (missing features)
|
435
|
+
# - Felix (user_id=3) name updated from Bob, gets age=2, job="Drifter"
|
436
|
+
# - New users (4,5,6) added with complete feature set
|
437
|
+
# - Schema automatically evolved to include age/job columns
|
435
438
|
|
436
439
|
# Specify the users to delete.
|
437
440
|
# We only need to specify matching merge key values.
|
@@ -440,7 +443,7 @@ users_to_delete = pd.DataFrame({
|
|
440
443
|
})
|
441
444
|
|
442
445
|
# Delete the records that match our merge keys.
|
443
|
-
dc.write(users_to_delete, "users",
|
446
|
+
dc.write(users_to_delete, "users", mode=dc.TableWriteMode.DELETE)
|
444
447
|
|
445
448
|
# Read the table back to confirm target users have been deleted.
|
446
449
|
df = dc.read("users", read_as=dc.DatasetType.PANDAS)
|
@@ -456,6 +459,117 @@ print(df.sort_values("user_id"))
|
|
456
459
|
|
457
460
|
<details>
|
458
461
|
|
462
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Zero-Copy Multimodal URL Processing</span></summary>
|
463
|
+
|
464
|
+
DeltaCAT can register and process existing multimodal datasets from local or remote URLs. This enables zero-copy distributed processing of images, audio, text, and other file formats:
|
465
|
+
|
466
|
+
```python
|
467
|
+
import deltacat as dc
|
468
|
+
import pandas as pd
|
469
|
+
import pyarrow as pa
|
470
|
+
import tempfile
|
471
|
+
import ray
|
472
|
+
|
473
|
+
# Initialize DeltaCAT with a fresh temporary catalog
|
474
|
+
dc.init_local(tempfile.mkdtemp())
|
475
|
+
|
476
|
+
# Create dataset with DeltaCAT URLs pointing to existing files
|
477
|
+
urls_df = pd.DataFrame({
|
478
|
+
"file_id": [1, 2, 3, 4, 5, 6],
|
479
|
+
"url": [
|
480
|
+
# URLs with common file extensions will have their content type inferred.
|
481
|
+
"https://picsum.photos/id/237/400/300.jpg",
|
482
|
+
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv",
|
483
|
+
"https://raw.githubusercontent.com/SergLam/Audio-Sample-files/master/sample.mp3",
|
484
|
+
"https://raw.githubusercontent.com/burningtree/awesome-json/master/README.md",
|
485
|
+
"https://raw.githubusercontent.com/microsoft/vscode/main/package.json",
|
486
|
+
# URLs without common file extensions will be read as binary by default.
|
487
|
+
"https://picsum.photos/200"
|
488
|
+
]
|
489
|
+
})
|
490
|
+
|
491
|
+
# Create empty table with merge key to efficiently add insights about each file
|
492
|
+
dc.create_table(
|
493
|
+
"multimodal_files",
|
494
|
+
schema=dc.Schema.of([
|
495
|
+
dc.Field.of(pa.field("file_id", pa.int64()), is_merge_key=True),
|
496
|
+
dc.Field.of(pa.field("url", pa.string()))
|
497
|
+
])
|
498
|
+
)
|
499
|
+
|
500
|
+
# Write URLs to DeltaCAT table
|
501
|
+
dc.write(urls_df, "multimodal_files")
|
502
|
+
|
503
|
+
# UDF to process each file in parallel using Ray Dataset map method
|
504
|
+
def analyze_file(row):
|
505
|
+
file_id = row["file_id"]
|
506
|
+
url = row["url"]
|
507
|
+
|
508
|
+
# DeltaCAT automatically infers the right Ray Data reader for the URL
|
509
|
+
dataset = dc.get(url)
|
510
|
+
records = dataset.take_all()
|
511
|
+
url_type = dc.DatastoreType.from_url(url)
|
512
|
+
|
513
|
+
# Extract standard Ray Dataset fields for each file type
|
514
|
+
if url_type == dc.DatastoreType.IMAGES:
|
515
|
+
image = records[0]["image"]
|
516
|
+
analysis = f"Image {image.shape[1]}x{image.shape[0]} pixels"
|
517
|
+
elif url_type == dc.DatastoreType.CSV:
|
518
|
+
analysis = f"CSV with {len(records)} rows, {len(records[0].keys())} columns"
|
519
|
+
elif url_type == dc.DatastoreType.AUDIO:
|
520
|
+
sample_rate = records[0]["sample_rate"]
|
521
|
+
duration = len(records[0]["amplitude"][0]) / sample_rate
|
522
|
+
analysis = f"Audio {duration:.1f}s, {sample_rate}Hz"
|
523
|
+
elif url_type == dc.DatastoreType.JSON:
|
524
|
+
analysis = f"JSON with {len(records[0].keys())} fields"
|
525
|
+
elif url_type == dc.DatastoreType.TEXT:
|
526
|
+
analysis = f"Text with {len(records)} records"
|
527
|
+
else:
|
528
|
+
analysis = f"Binary with {len(records[0]['bytes'])} bytes"
|
529
|
+
|
530
|
+
return {"file_id": file_id, "analysis": analysis}
|
531
|
+
|
532
|
+
# Read the multimodal_files table as a Ray Dataset
|
533
|
+
ray_dataset = dc.read("multimodal_files", read_as=dc.DatasetType.RAY_DATASET)
|
534
|
+
# Download and analyze each URL in parallel using map
|
535
|
+
results_dataset = ray_dataset.map(analyze_file)
|
536
|
+
|
537
|
+
# Write results back to the multimodal_files table
|
538
|
+
dc.write(results_dataset, "multimodal_files", mode=dc.TableWriteMode.MERGE)
|
539
|
+
|
540
|
+
# Read final results and compare to initial dataset
|
541
|
+
print("\n=== Initial Dataset ===")
|
542
|
+
print(dc.to_pandas(ray_dataset))
|
543
|
+
|
544
|
+
print("\n=== Final Results with Analysis ===")
|
545
|
+
print(dc.read("multimodal_files", read_as=dc.DatasetType.PANDAS))
|
546
|
+
```
|
547
|
+
|
548
|
+
The default dataset type used by `dc.get` is a Ray Dataset but, similar to `dc.read`, `dc.get` can also read URLs into other dataset types like Daft:
|
549
|
+
|
550
|
+
```python
|
551
|
+
import deltacat as dc
|
552
|
+
|
553
|
+
# Create dataset with DeltaCAT URLs pointing to existing files
|
554
|
+
urls = [
|
555
|
+
# URLs with common file extensions will have their content type inferred.
|
556
|
+
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv",
|
557
|
+
"https://raw.githubusercontent.com/burningtree/awesome-json/master/README.md",
|
558
|
+
# URLs without common file extensions will be read as binary by default.
|
559
|
+
"https://picsum.photos/200"
|
560
|
+
]
|
561
|
+
|
562
|
+
# Download each URL into a Daft DataFrame serially
|
563
|
+
for url in urls:
|
564
|
+
dataset = dc.get(url, read_as=dc.DatasetType.DAFT)
|
565
|
+
print(f"\n=== {url} ===")
|
566
|
+
print(dataset.show())
|
567
|
+
```
|
568
|
+
|
569
|
+
</details>
|
570
|
+
|
571
|
+
<details>
|
572
|
+
|
459
573
|
<summary><span style="font-size: 1.25em; font-weight: bold;">Organizing Tables with Namespaces</span></summary>
|
460
574
|
|
461
575
|
In DeltaCAT, table **Namespaces** are optional but useful for organizing related tables within a catalog:
|
@@ -534,9 +648,9 @@ print(finance_df)
|
|
534
648
|
|
535
649
|
<details>
|
536
650
|
|
537
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
651
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Transactions</span></summary>
|
538
652
|
|
539
|
-
DeltaCAT transactions can span multiple tables and namespaces. Since all operations within a transaction either succeed or fail together, this simplifies keeping related datasets in sync across your entire catalog.
|
653
|
+
DeltaCAT transactions can span multiple tables and namespaces. Since transaction history is maintained at the catalog level, every transaction operates against a consistent snapshot of every object in your data lake. Since all operations within a transaction either succeed or fail together, this simplifies keeping related datasets in sync across your entire catalog.
|
540
654
|
|
541
655
|
Consider the previous example that organized tables with namespaces. One table tracked customer orders, and another table tracked the lifetime payments of each customer. If one table was updated but not the other, then it would result in an accounting discrepancy. This edge case can be eliminated by using multi-table transactions:
|
542
656
|
|
@@ -630,7 +744,7 @@ print(dc.read("users", namespace="finance", read_as=dc.DatasetType.PANDAS))
|
|
630
744
|
|
631
745
|
<details>
|
632
746
|
|
633
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
747
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Managing Multiple Data Lakes</span></summary>
|
634
748
|
|
635
749
|
DeltaCAT lets you work with multiple catalogs in a single application. All catalogs registered with DeltaCAT are tracked by a Ray Actor to make them available to all workers in your Ray application.
|
636
750
|
|
@@ -652,8 +766,8 @@ dc.init(catalogs={
|
|
652
766
|
filesystem=pa.fs.LocalFileSystem()
|
653
767
|
)),
|
654
768
|
"prod": dc.Catalog(config=dc.CatalogProperties(
|
655
|
-
root=
|
656
|
-
filesystem=pa.fs.
|
769
|
+
root="s3://example/deltacat/", # Use S3 for prod
|
770
|
+
filesystem=pa.fs.S3FileSystem()
|
657
771
|
))
|
658
772
|
})
|
659
773
|
|
@@ -705,9 +819,9 @@ print(dc.read("financial_data", catalog="prod", read_as=dc.DatasetType.PANDAS))
|
|
705
819
|
|
706
820
|
<details>
|
707
821
|
|
708
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
822
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Time Travel</span></summary>
|
709
823
|
|
710
|
-
DeltaCAT supports time travel queries that let you read all tables in a catalog as they existed at any point in the past. Combined with
|
824
|
+
DeltaCAT supports time travel queries that let you read all tables in a catalog as they existed at any point in the past. Combined with catalog-level transactions, this enables consistent point-in-time views across your entire data lake.
|
711
825
|
|
712
826
|
```python
|
713
827
|
import deltacat as dc
|
@@ -847,7 +961,7 @@ print("\nTime travel validation successful!")
|
|
847
961
|
|
848
962
|
<summary><span style="font-size: 1.25em; font-weight: bold;">Multimodal Batch Inference</span></summary>
|
849
963
|
|
850
|
-
DeltaCAT's support for merging new fields into existing records and multimodal datasets can be used to build a multimodal batch inference pipeline. For example, the following code indexes images of cats, then merges in new fields with breed
|
964
|
+
DeltaCAT's support for merging new fields into existing records and multimodal datasets can be used to build a multimodal batch inference pipeline. For example, the following code indexes images of cats, then merges in new fields with breed predictions for each image:
|
851
965
|
|
852
966
|
> **Requirements**: This example requires PyTorch ≥ 2.8.0 and torchvision ≥ 0.23.0. Install via: `pip install torch>=2.8.0 torchvision>=0.23.0`
|
853
967
|
|
@@ -938,7 +1052,7 @@ final_df.show()
|
|
938
1052
|
|
939
1053
|
<summary><span style="font-size: 1.25em; font-weight: bold;">LLM Batch Inference</span></summary>
|
940
1054
|
|
941
|
-
DeltaCAT multi-table transactions, time travel
|
1055
|
+
DeltaCAT multi-table transactions, data lake time travel, and automatic schema evolution can be used to create auditable LLM batch inference pipelines. For example, the following code tries different approaches to analyze the overall tone of customer feedback, then generates customer service responses based on the analysis:
|
942
1056
|
|
943
1057
|
```python
|
944
1058
|
import deltacat as dc
|
@@ -1146,18 +1260,18 @@ Taken together, these requirements make DeltaCAT suitable for production use on
|
|
1146
1260
|
## Additional Resources
|
1147
1261
|
### Table Documentation
|
1148
1262
|
|
1149
|
-
The [Table](deltacat/docs/table/README.md) documentation provides a more comprehensive overview of DeltaCAT's table management APIs, including how to create, read, write, and manage tables.
|
1263
|
+
The [Table](https://github.com/ray-project/deltacat/tree/2.0/deltacat/docs/table/README.md) documentation provides a more comprehensive overview of DeltaCAT's table management APIs, including how to create, read, write, and manage tables.
|
1150
1264
|
|
1151
1265
|
### Schema Documentation
|
1152
1266
|
|
1153
|
-
The [Schema](deltacat/docs/schema/README.md) documentation provides a more comprehensive overview of DeltaCAT's schema management APIs, supported data types, file formats, and data consistency guarantees.
|
1267
|
+
The [Schema](https://github.com/ray-project/deltacat/tree/2.0/deltacat/docs/schema/README.md) documentation provides a more comprehensive overview of DeltaCAT's schema management APIs, supported data types, file formats, and data consistency guarantees.
|
1154
1268
|
|
1155
1269
|
### DeltaCAT URLs and Filesystem APIs
|
1156
|
-
The [DeltaCAT API Tests](deltacat/tests/test_deltacat_api.py) provide examples of how to efficiently explore, clone, and manipulate DeltaCAT catalogs by using DeltaCAT URLs together with filesystem-like list/copy/get/put APIs.
|
1270
|
+
The [DeltaCAT API Tests](https://github.com/ray-project/deltacat/tree/2.0/deltacat/tests/test_deltacat_api.py) provide examples of how to efficiently explore, clone, and manipulate DeltaCAT catalogs by using DeltaCAT URLs together with filesystem-like list/copy/get/put APIs.
|
1157
1271
|
|
1158
1272
|
### DeltaCAT Catalog APIs
|
1159
|
-
The [Default Catalog Tests](deltacat/tests/catalog/test_default_catalog_impl.py) provide more exhaustive examples of DeltaCAT **Catalog** API behavior.
|
1273
|
+
The [Default Catalog Tests](https://github.com/ray-project/deltacat/tree/2.0/deltacat/tests/catalog/test_default_catalog_impl.py) provide more exhaustive examples of DeltaCAT **Catalog** API behavior.
|
1160
1274
|
|
1161
1275
|
### Examples
|
1162
1276
|
|
1163
|
-
The [DeltaCAT Examples](deltacat/examples/) show how to build more advanced applications like external data source indexers and custom dataset compactors. They also demonstrate some experimental features like Apache Iceberg and Apache Beam integrations.
|
1277
|
+
The [DeltaCAT Examples](https://github.com/ray-project/deltacat/tree/2.0/deltacat/examples/) show how to build more advanced applications like external data source indexers and custom dataset compactors. They also demonstrate some experimental features like Apache Iceberg and Apache Beam integrations.
|