deltacat 2.0.0.post2__tar.gz → 2.0.0.post4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deltacat-2.0.0.post2/deltacat.egg-info → deltacat-2.0.0.post4}/PKG-INFO +256 -53
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/README.md +255 -52
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/__init__.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/api.py +83 -15
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/__init__.py +6 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/delegate.py +170 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/interface.py +35 -2
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/main/impl.py +137 -103
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/model/catalog.py +150 -35
- deltacat-2.0.0.post4/deltacat/catalog/model/properties.py +333 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/private/compaction_utils.py +8 -2
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/steps/merge.py +9 -7
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/converter_session.py +15 -10
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +7 -5
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/utils/io.py +22 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/janitor.py +38 -15
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/constants.py +11 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/bootstrap.py +3 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/explorer.py +0 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/utils/common.py +0 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +0 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/exceptions.py +15 -0
- deltacat-2.0.0.post4/deltacat/experimental/compatibility/backfill_transaction_partitions.py +513 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/converter_agent/table_monitor.py +2 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/daft/daft_catalog.py +1 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +7 -2
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/interface.py +6 -7
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/main/impl.py +209 -121
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/delta.py +22 -8
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/manifest.py +81 -9
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/metafile.py +113 -30
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/namespace.py +11 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/partition.py +19 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/stream.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/table.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/table_version.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/transaction.py +259 -108
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/types.py +1 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +57 -6
- deltacat-2.0.0.post4/deltacat/tests/catalog/model/test_properties_transaction_migration.py +232 -0
- deltacat-2.0.0.post4/deltacat/tests/catalog/test_catalogs.py +651 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/test_default_catalog_impl.py +1493 -47
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -18
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/converter/test_convert_session.py +2 -2
- deltacat-2.0.0.post4/deltacat/tests/compute/converter/test_converter_commit_conflict_resolution.py +626 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_janitor.py +60 -38
- deltacat-2.0.0.post4/deltacat/tests/conftest.py +56 -0
- deltacat-2.0.0.post4/deltacat/tests/experimental/compatibility/test_backfill_transaction_partitions.py +477 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/main/test_main_storage.py +17 -8
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_metafile_io.py +142 -18
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_transaction_history.py +128 -68
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_deltacat_api.py +334 -25
- deltacat-2.0.0.post4/deltacat/tests/utils/test_filesystem.py +3319 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/types/media.py +0 -4
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/types/tables.py +111 -113
- deltacat-2.0.0.post4/deltacat/utils/filesystem.py +1590 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/url.py +89 -18
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4/deltacat.egg-info}/PKG-INFO +256 -53
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat.egg-info/SOURCES.txt +5 -2
- deltacat-2.0.0.post2/deltacat/catalog/model/properties.py +0 -155
- deltacat-2.0.0.post2/deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +0 -201
- deltacat-2.0.0.post2/deltacat/tests/catalog/test_catalogs.py +0 -321
- deltacat-2.0.0.post2/deltacat/tests/conftest.py +0 -25
- deltacat-2.0.0.post2/deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +0 -582
- deltacat-2.0.0.post2/deltacat/utils/filesystem.py +0 -450
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/LICENSE +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/MANIFEST.in +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/annotations.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/aws/clients.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/aws/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/aws/s3u.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/benchmark_engine.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/benchmark_report.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/benchmark_suite.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/data/random_row_generator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/data/row_generator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/benchmarking/test_benchmark_pipeline.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/catalog/model/table_definition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/compaction_session.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/compactor_version.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/materialize_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/repartition_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/model/table_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/repartition_session.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/steps/dedupe.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/steps/materialize.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/steps/repartition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/utils/io.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/utils/round_completion_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/utils/sort_key.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor/utils/system_columns.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/io.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/model/convert_input.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/model/convert_input_files.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/model/convert_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/model/converter_session_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/pyiceberg/catalog.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/pyiceberg/overrides.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/steps/convert.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/steps/dedupe.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/utils/convert_task_options.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/utils/converter_session_utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/utils/iceberg_columns.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/converter/utils/s3u.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/jobs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/jobs/client.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/resource_estimation/delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/resource_estimation/manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/resource_estimation/model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/resource_estimation/parquet.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/models/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/models/delta_stats.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/models/stats_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/compute/stats/types.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/docs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/docs/autogen/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/docs/autogen/schema/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/docs/autogen/schema/inference/generate_type_mappings.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/env.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/basic_logging.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/compactor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/gcp/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/job_runner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/app.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/main.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/experimental/iceberg/iceberg_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/hello_world.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/indexer/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/indexer/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/indexer/gcp/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/indexer/indexer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/examples/indexer/job_runner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/catalog/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/catalog/iceberg/impl.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/catalog/iceberg/overrides.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/compatibility/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/converter_agent/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/converter_agent/beam/managed.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/daft/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/iceberg/impl.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/iceberg/model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/iceberg/visitor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/arrow/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/dataset_executor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/feather/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/feather/file_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/feather/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/fs/file_provider.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/fs/file_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/fs/input_file.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/fs/output_file.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/logical_plan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/metastore/delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/metastore/json_sst.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/metastore/sst.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/mvp/Table.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/mvp/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/parquet/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/parquet/file_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/parquet/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/block_scanner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/data_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/data_scan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/dataset_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/schema/datatype.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/schema/schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/serializer_factory.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/shard/range_shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/dataset/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/dataset/deltacat_dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/datasink/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/datasink/deltacat_datasink.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/datasource/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/datasource/deltacat_datasource.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/file_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/memcached_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/reader/deltacat_read_api.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/redis_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/io/s3_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/logs.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/expression/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/expression/expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/expression/visitor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/interop.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/list_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/locator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/scan/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/scan/push_down.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/scan/scan_plan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/scan/scan_task.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/sort_key.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/model/transform.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/util/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/storage/util/scan_planner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/test_cloudpickle_bug_fix.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/test_file_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/test_memcached_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/test_ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/test_redis_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/_io/test_s3_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/aws/test_clients.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/aws/test_s3u.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/catalog/model/test_table_definition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/converter/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/converter/utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_util_common.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/compute/test_util_constant.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/daft/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/daft/test_model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/catalog/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/compatibility/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/daft/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/test_dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/test_manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/test_utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_delete_parameters.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_partition_scheme.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_schema_update.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_sort_scheme.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_table_version.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/storage/model/test_transaction.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_exceptions.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_logs.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/filesystem.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/message_pack_utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/pyarrow.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/storage.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/test_utils/utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/types/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/types/test_tables.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/exceptions.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/main_deltacat_storage_mock.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_cloudpickle.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_daft.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_metrics.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_numpy.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_pandas.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_placement.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_polars.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_pyarrow.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/tests/utils/test_resources.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/types/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/types/partial_download.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/arguments.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/cloudpickle.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/common.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/daft.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/export.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/metafile_locator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/metrics.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/numpy.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/pandas.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/performance.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/placement.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/polars.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/pyarrow.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/ray_utils/collections.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/ray_utils/concurrency.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/ray_utils/dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/ray_utils/performance.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/ray_utils/runtime.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/reader_compatibility_mapping.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/resources.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat/utils/schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat.egg-info/dependency_links.txt +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat.egg-info/requires.txt +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/deltacat.egg-info/top_level.txt +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/setup.cfg +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post4}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deltacat
|
3
|
-
Version: 2.0.0.
|
3
|
+
Version: 2.0.0.post4
|
4
4
|
Summary: DeltaCAT is a portable Pythonic Data Lakehouse powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
@@ -53,24 +53,20 @@ Dynamic: summary
|
|
53
53
|
<img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-logo-alpha-750.png" alt="deltacat logo" style="width:55%; height:auto; text-align: center;">
|
54
54
|
</p>
|
55
55
|
|
56
|
-
DeltaCAT is a portable
|
57
|
-
fast, scalable, ACID-compliant multimodal data lakes, and has been used to [successfully manage exabyte-scale enterprise
|
58
|
-
data lakes](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
|
56
|
+
DeltaCAT is a portable Multimodal Lakehouse powered by [Ray](https://github.com/ray-project/ray), [Apache Arrow](https://github.com/apache/arrow), and [Daft](https://github.com/Eventual-Inc/Daft). It lets you create ACID-compliant multimodal data lakes [that efficiently scale to exabytes of production data](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
|
59
57
|
|
60
|
-
It provides data lake level transactions & time travel,
|
58
|
+
It provides data lake level transactions & time travel, zero-copy schema evolution, zero-copy multimodal file processing (image, audio, video, text, etc.), and transparent dataset optimization. It runs locally for rapid development or in the cloud for production workloads. It runs on any filesystem for easy setup and sharing - no external catalog services, lock managers, or key value stores required.
|
61
59
|
|
62
|
-
|
63
|
-
|
64
|
-
merge-on-read and copy-on-write operations.
|
60
|
+
|
61
|
+
## Overview
|
65
62
|
|
66
63
|
DeltaCAT provides the following high-level components:
|
67
|
-
1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py):
|
68
|
-
2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures
|
69
|
-
3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/):
|
64
|
+
1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py): Pythonic APIs to discover, read, write, and manage datasets.
|
65
|
+
2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures that automatically optimize your datasets.
|
66
|
+
3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/): A portable multimodal data lake format useable with any filesystem.
|
70
67
|
4. **Sync** (in development): Synchronize DeltaCAT datasets to data warehouses and other table formats.
|
71
68
|
|
72
|
-
|
73
|
-
DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by Ray Data, Daft, Pandas, Polars, PyArrow, or NumPy.
|
69
|
+
DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by [Pandas](https://github.com/pandas-dev/pandas), [NumPy](https://github.com/numpy/numpy), [Polars](https://github.com/pola-rs/polars), [PyArrow](https://arrow.apache.org/docs/python/index.html), [Ray Data](https://docs.ray.io/en/latest/data/data.html), and [Daft](https://docs.daft.ai/en/stable/api/dataframe/).
|
74
70
|
|
75
71
|
<p align="center">
|
76
72
|
<img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-tech-overview.png" alt="deltacat tech overview" style="width:100%; height:auto; text-align: center;">
|
@@ -81,7 +77,8 @@ Data consumers that prefer to stay within the ecosystem of Pythonic data managem
|
|
81
77
|
## Getting Started
|
82
78
|
DeltaCAT applications run anywhere that Ray runs, including your local laptop, cloud computing cluster, or on-premise cluster.
|
83
79
|
|
84
|
-
DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake
|
80
|
+
DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake that contains a set of **Tables**. A **Catalog** provides a root location (e.g., a local file path or S3 Bucket) to store information about all your **Tables**, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
|
81
|
+
|
85
82
|
|
86
83
|
### Quick Start
|
87
84
|
|
@@ -114,7 +111,7 @@ dc.write(data, "users")
|
|
114
111
|
daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
115
112
|
daft_df.show() # Materialize and print the DataFrame
|
116
113
|
|
117
|
-
#
|
114
|
+
# Add more data and add a new column.
|
118
115
|
# Compaction and zero-copy schema evolution are handled automatically.
|
119
116
|
data = pd.DataFrame({
|
120
117
|
"id": [4, 5, 6],
|
@@ -131,13 +128,13 @@ daft_df.select("name", "age", "city").show()
|
|
131
128
|
```
|
132
129
|
|
133
130
|
### Core Concepts
|
134
|
-
DeltaCAT can do much more than just
|
131
|
+
DeltaCAT can do much more than just add data to tables and read it back again. Expand the sections below to see examples of other core DeltaCAT concepts and APIs.
|
135
132
|
|
136
133
|
<details>
|
137
134
|
|
138
135
|
<summary><span style="font-size: 1.25em; font-weight: bold;">Idempotent Writes</span></summary>
|
139
136
|
|
140
|
-
If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **
|
137
|
+
If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **adds** table data by default. One way to prevent this perpetual table growth and make the example idempotent is to use the **REPLACE** write mode if the table already exists:
|
141
138
|
|
142
139
|
```python
|
143
140
|
import deltacat as dc
|
@@ -171,7 +168,7 @@ dc.write(data, "users", mode=write_mode)
|
|
171
168
|
daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
172
169
|
daft_df.show() # Materialize and print the DataFrame
|
173
170
|
|
174
|
-
# Explicitly
|
171
|
+
# Explicitly add more data and add a new column.
|
175
172
|
# Compaction and schema evolution are handled automatically.
|
176
173
|
data = pd.DataFrame({
|
177
174
|
"id": [4, 5, 6],
|
@@ -179,7 +176,7 @@ data = pd.DataFrame({
|
|
179
176
|
"age": [2, 12, 4],
|
180
177
|
"city": ["Hollywood", "Gloucester", "San Francisco"]
|
181
178
|
})
|
182
|
-
dc.write(data, "users", mode=dc.TableWriteMode.
|
179
|
+
dc.write(data, "users", mode=dc.TableWriteMode.ADD)
|
183
180
|
|
184
181
|
# Read the full table back into a Daft DataFrame.
|
185
182
|
daft_df = dc.read("users")
|
@@ -223,7 +220,7 @@ dc.write(data, "users", mode=dc.TableWriteMode.CREATE)
|
|
223
220
|
daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
224
221
|
daft_df.show() # Materialize and print the DataFrame
|
225
222
|
|
226
|
-
# Explicitly
|
223
|
+
# Explicitly add more data and add a new column.
|
227
224
|
# Compaction and schema evolution are handled automatically.
|
228
225
|
data = pd.DataFrame({
|
229
226
|
"id": [4, 5, 6],
|
@@ -231,7 +228,7 @@ data = pd.DataFrame({
|
|
231
228
|
"age": [2, 12, 4],
|
232
229
|
"city": ["Hollywood", "Gloucester", "San Francisco"]
|
233
230
|
})
|
234
|
-
dc.write(data, "users", mode=dc.TableWriteMode.
|
231
|
+
dc.write(data, "users", mode=dc.TableWriteMode.ADD)
|
235
232
|
|
236
233
|
# Read the full table back into a Daft DataFrame.
|
237
234
|
daft_df = dc.read("users")
|
@@ -243,9 +240,119 @@ assert dc.dataset_length(daft_df) == 6
|
|
243
240
|
|
244
241
|
</details>
|
245
242
|
|
243
|
+
|
244
|
+
<details>
|
245
|
+
|
246
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Ordered Writes</span></summary>
|
247
|
+
|
248
|
+
DeltaCAT writes are unordered by default, which means that the order of data written to the table isn't guaranteed to match the order that it is read back. While this is useful for preventing conflicts between concurrent writers, you can also use the **APPEND** write mode to preserve write order and raise explicit concurrency conflicts between parallel writers:
|
249
|
+
|
250
|
+
```python
|
251
|
+
import deltacat as dc
|
252
|
+
import pandas as pd
|
253
|
+
|
254
|
+
# Initialize DeltaCAT with a default local catalog.
|
255
|
+
# Ray will be initialized automatically.
|
256
|
+
# Catalog files will be stored in .deltacat/ in the current working directory.
|
257
|
+
dc.init_local()
|
258
|
+
|
259
|
+
# Create data to write.
|
260
|
+
data = pd.DataFrame({
|
261
|
+
"id": [1, 2],
|
262
|
+
"name": ["Cheshire", "Dinah"],
|
263
|
+
"age": [3, 7]
|
264
|
+
})
|
265
|
+
|
266
|
+
# Derive a DeltaCAT schema for the data.
|
267
|
+
schema = dc.Schema.of(dc.dataset_schema(data))
|
268
|
+
|
269
|
+
# Create an empty table to hold ordered user data.
|
270
|
+
if not dc.table_exists("users_ordered"):
|
271
|
+
dc.create_table("users_ordered", schema=schema)
|
272
|
+
|
273
|
+
# Write the first ordered delta to the table.
|
274
|
+
dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
|
275
|
+
|
276
|
+
# Write the second ordered delta to the table.
|
277
|
+
data = pd.DataFrame({
|
278
|
+
"id": [3, 4],
|
279
|
+
"name": ["Felix", "Tom"],
|
280
|
+
"age": [2, 12],
|
281
|
+
"city": ["Hollywood", "Gloucester"]
|
282
|
+
})
|
283
|
+
dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
|
284
|
+
|
285
|
+
# Write the third ordered delta to the table.
|
286
|
+
data = pd.DataFrame({
|
287
|
+
"id": [5, 6],
|
288
|
+
"name": ["Simpkin", "Delta"],
|
289
|
+
"age": [12, 4],
|
290
|
+
"city": ["San Francisco", "San Francisco"]
|
291
|
+
})
|
292
|
+
dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
|
293
|
+
|
294
|
+
# Read the data back as a Pandas DataFrame, and ensure that the
|
295
|
+
# order of the records returned matches the order they were written.
|
296
|
+
pandas_df = dc.read("users_ordered", read_as=dc.DatasetType.PANDAS)
|
297
|
+
print(pandas_df)
|
298
|
+
```
|
299
|
+
|
300
|
+
</details>
|
301
|
+
|
246
302
|
<details>
|
247
303
|
|
248
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
304
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Schemaless Tables</span></summary>
|
305
|
+
|
306
|
+
Tables created automatically via `dc.write` have a schema inferred from the data written by default. However, if you create an empty table without providing a schema, it defaults to schemaless. Writes to schemaless tables are more efficient and flexible, since they simply track the location and basic metadata associated with the data files written to the table. However, if you know that a unified schema can be derived for your schemaless data, then you can you can still read it back as a structured dataset:
|
307
|
+
|
308
|
+
```python
|
309
|
+
import deltacat as dc
|
310
|
+
import pandas as pd
|
311
|
+
|
312
|
+
# Initialize DeltaCAT with a default local catalog.
|
313
|
+
# Ray will be initialized automatically.
|
314
|
+
# Catalog files will be stored in .deltacat/ in the current working directory.
|
315
|
+
dc.init_local()
|
316
|
+
|
317
|
+
# Create data to write.
|
318
|
+
data = pd.DataFrame({
|
319
|
+
"id": [1, 2],
|
320
|
+
"name": ["Cheshire", "Dinah"],
|
321
|
+
"age": [3, 7]
|
322
|
+
})
|
323
|
+
|
324
|
+
# Create an empty schemaless table to hold ordered user data.
|
325
|
+
if not dc.table_exists("users_schemaless"):
|
326
|
+
dc.create_table("users_schemaless")
|
327
|
+
|
328
|
+
# Write the first ordered delta to the table.
|
329
|
+
dc.write(data, "users_schemaless", mode=dc.TableWriteMode.APPEND)
|
330
|
+
|
331
|
+
# Write the second ordered delta to the table.
|
332
|
+
data = pd.DataFrame({
|
333
|
+
"id": [3, 4],
|
334
|
+
"name": ["Felix", "Tom"],
|
335
|
+
"age": [2, 12],
|
336
|
+
"city": ["Hollywood", "Gloucester"]
|
337
|
+
})
|
338
|
+
dc.write(data, "users_schemaless", mode=dc.TableWriteMode.APPEND)
|
339
|
+
|
340
|
+
# Read back the file manifest of the schemaless table.
|
341
|
+
# Notice that file paths, sizes, etc. are returned instead of the dataframes written.
|
342
|
+
manifest_df = dc.read("users_schemaless", read_as=dc.DatasetType.PANDAS)
|
343
|
+
print(manifest_df)
|
344
|
+
|
345
|
+
# Use from_manifest_table to convert the manifest table to a structured dataset.
|
346
|
+
structured_daft_df = dc.from_manifest_table(manifest_df)
|
347
|
+
structured_daft_df.show()
|
348
|
+
```
|
349
|
+
|
350
|
+
</details>
|
351
|
+
|
352
|
+
|
353
|
+
<details>
|
354
|
+
|
355
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Working Across Dataset and File Types</span></summary>
|
249
356
|
|
250
357
|
DeltaCAT natively supports a variety of open dataset and file formats already integrated with Ray and Arrow. You can use `dc.read` to read tables back as a Daft DataFrame, Ray Dataset, Pandas DataFrame, PyArrow Table, Polars DataFrame, NumPy Array, or list of PyArrow ParquetFile objects:
|
251
358
|
|
@@ -600,6 +707,10 @@ order_data = pd.DataFrame({
|
|
600
707
|
"product_id": [101, 102, 103],
|
601
708
|
"quantity": [2, 1, 2]
|
602
709
|
})
|
710
|
+
# Create identity, inventory, and sales namespaces
|
711
|
+
dc.create_namespace("identity")
|
712
|
+
dc.create_namespace("inventory")
|
713
|
+
dc.create_namespace("sales")
|
603
714
|
|
604
715
|
# Write tables to different namespaces to organize them by domain
|
605
716
|
dc.write(user_data, "users", namespace="identity")
|
@@ -625,7 +736,10 @@ finance_users = pd.DataFrame({
|
|
625
736
|
"preferred_payment_method": ["credit", "cash", "paypal"]
|
626
737
|
})
|
627
738
|
|
739
|
+
dc.create_namespace("marketing")
|
628
740
|
dc.write(marketing_users, "users", namespace="marketing")
|
741
|
+
|
742
|
+
dc.create_namespace("finance")
|
629
743
|
dc.write(finance_users, "users", namespace="finance")
|
630
744
|
|
631
745
|
# Each namespace maintains its own "users" table with different schemas
|
@@ -671,6 +785,7 @@ product_data = pd.DataFrame({
|
|
671
785
|
})
|
672
786
|
|
673
787
|
# The product catalog can be created independently.
|
788
|
+
dc.create_namespace("inventory")
|
674
789
|
dc.write(product_data, "catalog", namespace="inventory")
|
675
790
|
|
676
791
|
print(f"\n=== Initial Product Data ===")
|
@@ -697,7 +812,9 @@ finance_schema = dc.Schema.of([
|
|
697
812
|
# Create user identities and user finance data within a single transaction.
|
698
813
|
# Since transactions are atomic, this prevents accounting discrepancies.
|
699
814
|
with dc.transaction():
|
815
|
+
dc.create_namespace("identity")
|
700
816
|
dc.write(user_data, "users", namespace="identity")
|
817
|
+
dc.create_namespace("finance")
|
701
818
|
dc.write(initial_finance, "users", namespace="finance", schema=finance_schema)
|
702
819
|
|
703
820
|
print(f"\n=== Initial User Data ===")
|
@@ -716,6 +833,7 @@ new_orders = pd.DataFrame({
|
|
716
833
|
# Process new orders and update lifetime payment totals within a single transaction.
|
717
834
|
with dc.transaction():
|
718
835
|
# Step 1: Write the new orders
|
836
|
+
dc.create_namespace("sales")
|
719
837
|
dc.write(new_orders, "transactions", namespace="sales")
|
720
838
|
|
721
839
|
# Step 2: Read back transactions and products to compute actual totals
|
@@ -731,6 +849,7 @@ with dc.transaction():
|
|
731
849
|
finance_updates.columns = ["user_id", "lifetime_payments"]
|
732
850
|
|
733
851
|
# Step 4: Write the computed totals
|
852
|
+
dc.create_namespace("finance")
|
734
853
|
dc.write(finance_updates, "users", namespace="finance", mode=dc.TableWriteMode.MERGE)
|
735
854
|
|
736
855
|
# Verify that orders and and lifetime payments are kept in sync.
|
@@ -760,16 +879,14 @@ import tempfile
|
|
760
879
|
from decimal import Decimal
|
761
880
|
|
762
881
|
# Initialize catalogs with separate names and catalog roots.
|
763
|
-
dc.init(
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
))
|
772
|
-
})
|
882
|
+
dc.init(
|
883
|
+
catalogs={
|
884
|
+
# Use temporary directory for staging
|
885
|
+
"staging": dc.Catalog(dc.CatalogProperties(tempfile.mkdtemp())),
|
886
|
+
# Use S3 for prod
|
887
|
+
"prod": dc.Catalog(dc.CatalogProperties("s3://example/deltacat"))
|
888
|
+
}
|
889
|
+
)
|
773
890
|
|
774
891
|
# Create a PyArrow table with decimal256 data
|
775
892
|
decimal_table = pa.table({
|
@@ -817,6 +934,92 @@ print(dc.read("financial_data", catalog="prod", read_as=dc.DatasetType.PANDAS))
|
|
817
934
|
|
818
935
|
</details>
|
819
936
|
|
937
|
+
<details>
|
938
|
+
|
939
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Sharing & Portability</span></summary>
|
940
|
+
|
941
|
+
DeltaCAT catalogs are self-contained directories on a filesystem, so you can easily share your data lake with others. A local catalog on your laptop can be compressed and sent anywhere. A cloud catalog in S3, GCS, or Azure Blog Storage can be shared via URL. The read/write permissions of your catalog are the read/write permissions of your filesystem.
|
942
|
+
|
943
|
+
For example, you can zip up your local catalog and upload it to S3 via:
|
944
|
+
```bash
|
945
|
+
# zip a local catalog
|
946
|
+
zip -r catalog.zip .deltacat/
|
947
|
+
|
948
|
+
# copy the catalog to a cloud bucket
|
949
|
+
aws s3 cp catalog.zip s3://my-bucket/catalog.zip
|
950
|
+
```
|
951
|
+
|
952
|
+
The person you shared it with can retrieve and decompress it via:
|
953
|
+
```bash
|
954
|
+
# copy the cloud catalog to local disk
|
955
|
+
aws s3 cp s3://my-bucket/catalog.zip .
|
956
|
+
|
957
|
+
# unzip the catalog to a local directory
|
958
|
+
unzip catalog.zip -d .deltacat_copy/
|
959
|
+
```
|
960
|
+
|
961
|
+
And then initialize it together with any other catalogs they're working with:
|
962
|
+
```python
|
963
|
+
import deltacat as dc
|
964
|
+
|
965
|
+
# Initialize catalogs with separate names and catalog roots.
|
966
|
+
dc.init(
|
967
|
+
catalogs={
|
968
|
+
"original": dc.Catalog(dc.CatalogProperties(".deltacat")),
|
969
|
+
"copy": dc.Catalog(dc.CatalogProperties(".deltacat_copy")),
|
970
|
+
"prod_aws": dc.Catalog(dc.CatalogProperties("s3://prod/deltacat")),
|
971
|
+
"prod_gcp": dc.Catalog(dc.CatalogProperties("gs://prod/deltacat")),
|
972
|
+
"prod_azure": dc.Catalog(dc.CatalogProperties("az://prod/deltacat")),
|
973
|
+
}
|
974
|
+
)
|
975
|
+
|
976
|
+
# List all namespaces in the original catalog
|
977
|
+
namespaces = dc.list("dc://original")
|
978
|
+
print([namespace.name for namespace in namespaces])
|
979
|
+
|
980
|
+
# List all namespaces in the copy catalog
|
981
|
+
namespaces = dc.list("dc://copy")
|
982
|
+
print([namespace.name for namespace in namespaces])
|
983
|
+
|
984
|
+
# List all tables in the default namespace of the original catalog
|
985
|
+
tables = dc.list("dc://original/default")
|
986
|
+
print([table.name for table in tables])
|
987
|
+
|
988
|
+
# List all tables in the default namespace of the copy catalog
|
989
|
+
tables = dc.list("dc://copy/default")
|
990
|
+
print([table.name for table in tables])
|
991
|
+
```
|
992
|
+
|
993
|
+
`dc.copy` can also be used to copy namespaces and tables between catalogs:
|
994
|
+
```python
|
995
|
+
# Copy the "default" namespace from the original local catalog over to the "myspace" namespace in the copy catalog
|
996
|
+
dc.copy("dc://original/default", "dc://copy/default/myspace")
|
997
|
+
|
998
|
+
# By default, no tables are copied from the source namespace to the destination
|
999
|
+
tables = dc.list("dc://copy/myspace")
|
1000
|
+
print(f"{len(tables)} tables in myspace.")
|
1001
|
+
|
1002
|
+
# Copy the "users" table from the original local catalog over to "local_users" in the prod_aws catalog
|
1003
|
+
dc.copy("dc://original/default/users", "dc://prod_aws/default/local_users")
|
1004
|
+
|
1005
|
+
# Read the copied table back
|
1006
|
+
df = dc.read("local_users", catalog="prod_aws")
|
1007
|
+
df.show()
|
1008
|
+
|
1009
|
+
# We can also copy all tables in the default namespace using **
|
1010
|
+
dc.copy("dc://original/default/**", "dc://copy/default/myspace")
|
1011
|
+
tables = dc.list("dc://copy/myspace")
|
1012
|
+
print(f"{len(tables)} tables in myspace.")
|
1013
|
+
|
1014
|
+
# Or we can copy all namespaces from the original catalog using *
|
1015
|
+
dc.copy("dc://original/*", "dc://copy")
|
1016
|
+
namespaces = dc.list("dc://copy")
|
1017
|
+
print([namespace.name for namespace in namespaces])
|
1018
|
+
```
|
1019
|
+
|
1020
|
+
</details>
|
1021
|
+
|
1022
|
+
|
820
1023
|
<details>
|
821
1024
|
|
822
1025
|
<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Time Travel</span></summary>
|
@@ -858,10 +1061,10 @@ initial_finance = pd.DataFrame({
|
|
858
1061
|
|
859
1062
|
# Write initial state atomically with a commit message
|
860
1063
|
with dc.transaction(commit_message="Initial data load: users, products, orders, and finance"):
|
861
|
-
dc.write(initial_users, "users", namespace="identity")
|
862
|
-
dc.write(initial_products, "catalog", namespace="inventory")
|
863
|
-
dc.write(initial_orders, "transactions", namespace="sales")
|
864
|
-
dc.write(initial_finance, "users", namespace="finance")
|
1064
|
+
dc.write(initial_users, "users", namespace="identity", auto_create_namespace=True)
|
1065
|
+
dc.write(initial_products, "catalog", namespace="inventory", auto_create_namespace=True)
|
1066
|
+
dc.write(initial_orders, "transactions", namespace="sales", auto_create_namespace=True)
|
1067
|
+
dc.write(initial_finance, "users", namespace="finance", auto_create_namespace=True)
|
865
1068
|
|
866
1069
|
# Sleep briefly to ensure transaction timestamp separation
|
867
1070
|
time.sleep(0.1)
|
@@ -1077,7 +1280,7 @@ daft_docs = daft_docs.with_column("content", daft_docs["path"].url.download().de
|
|
1077
1280
|
# Capture basic feedback sentiment analysis in a parallel multi-table transaction
|
1078
1281
|
with dc.transaction():
|
1079
1282
|
# Write the full customer feedback to a new "documents" table.
|
1080
|
-
dc.write(daft_docs, "documents"
|
1283
|
+
dc.write(daft_docs, "documents")
|
1081
1284
|
|
1082
1285
|
# Define a UDF to analyze customer feedback sentiment.
|
1083
1286
|
@daft.udf(return_dtype=daft.DataType.struct({
|
@@ -1114,14 +1317,14 @@ with dc.transaction():
|
|
1114
1317
|
dc.Field.of(pa.field("confidence", pa.float64())),
|
1115
1318
|
dc.Field.of(pa.field("model_version", pa.large_string())),
|
1116
1319
|
])
|
1117
|
-
dc.write(daft_results, "insights",
|
1320
|
+
dc.write(daft_results, "insights", schema=initial_schema)
|
1118
1321
|
|
1119
1322
|
# Write to a new audit trail table.
|
1120
1323
|
audit_df = pd.DataFrame([{
|
1121
1324
|
"version": "v1.0",
|
1122
1325
|
"docs_processed": dc.dataset_length(daft_docs),
|
1123
1326
|
}])
|
1124
|
-
dc.write(audit_df, "audit"
|
1327
|
+
dc.write(audit_df, "audit")
|
1125
1328
|
|
1126
1329
|
print("=== V1.0: Customer feedback sentiment analysis processing complete! ===")
|
1127
1330
|
|
@@ -1162,9 +1365,9 @@ with dc.transaction():
|
|
1162
1365
|
)
|
1163
1366
|
|
1164
1367
|
# Merge new V2.0 insights into the existing V1.0 insights table.
|
1165
|
-
dc.write(daft_emotions, "insights"
|
1368
|
+
dc.write(daft_emotions, "insights")
|
1166
1369
|
audit_df = pd.DataFrame([{"version": "v2.0", "docs_processed": dc.dataset_length(daft_docs)}])
|
1167
|
-
dc.write(audit_df, "audit"
|
1370
|
+
dc.write(audit_df, "audit")
|
1168
1371
|
|
1169
1372
|
print("=== V2.0: Customer feedback emotion analysis processing complete! ===")
|
1170
1373
|
|
@@ -1176,7 +1379,7 @@ time.sleep(0.1)
|
|
1176
1379
|
# Generate customer service responses based on emotion analysis results.
|
1177
1380
|
with dc.transaction():
|
1178
1381
|
# First, read the current insights table with emotion analysis
|
1179
|
-
current_insights = dc.read("insights"
|
1382
|
+
current_insights = dc.read("insights")
|
1180
1383
|
|
1181
1384
|
# Define a UDF to generate customer service responses based on analysis results.
|
1182
1385
|
@daft.udf(return_dtype=daft.DataType.struct({
|
@@ -1223,39 +1426,39 @@ with dc.transaction():
|
|
1223
1426
|
)
|
1224
1427
|
# Merge new V3.0 responses into the existing V2.0 insights table.
|
1225
1428
|
# The new response columns are automatically joined by document ID.
|
1226
|
-
dc.write(daft_responses, "insights"
|
1429
|
+
dc.write(daft_responses, "insights")
|
1227
1430
|
audit_df = pd.DataFrame([{"version": "v3.0", "docs_processed": dc.dataset_length(current_insights)}])
|
1228
|
-
dc.write(audit_df, "audit"
|
1431
|
+
dc.write(audit_df, "audit")
|
1229
1432
|
|
1230
1433
|
print("=== V3.0: Customer service response generation processing complete! ===")
|
1231
1434
|
|
1232
1435
|
print("\n=== Time Travel Comparison of all Versions ===")
|
1233
1436
|
with dc.transaction(as_of=checkpoint_v1):
|
1234
1437
|
print(f"== V1.0 Insights (sentiment) ==")
|
1235
|
-
print(dc.read("insights"
|
1438
|
+
print(dc.read("insights").show())
|
1236
1439
|
print(f"== V1.0 Audit ==")
|
1237
|
-
print(dc.read("audit"
|
1440
|
+
print(dc.read("audit").show())
|
1238
1441
|
|
1239
1442
|
with dc.transaction(as_of=checkpoint_v2):
|
1240
1443
|
print(f"== V2.0 Insights (emotion) ==")
|
1241
|
-
print(dc.read("insights"
|
1444
|
+
print(dc.read("insights").show())
|
1242
1445
|
print(f"== V2.0 Audit ==")
|
1243
|
-
print(dc.read("audit"
|
1446
|
+
print(dc.read("audit").show())
|
1244
1447
|
|
1245
|
-
v3_results = dc.read("insights"
|
1448
|
+
v3_results = dc.read("insights")
|
1246
1449
|
print(f"== V3.0 Insights (customer service response) ==")
|
1247
|
-
print(dc.read("insights"
|
1450
|
+
print(dc.read("insights").show())
|
1248
1451
|
print(f"== V3.0 Audit ==")
|
1249
|
-
print(dc.read("audit"
|
1452
|
+
print(dc.read("audit").show())
|
1250
1453
|
```
|
1251
1454
|
|
1252
1455
|
</details>
|
1253
1456
|
|
1254
1457
|
## Runtime Environment Requirements
|
1255
1458
|
|
1256
|
-
DeltaCAT's transaction system assumes that the host machine provides strong system clock accuracy guarantees, and that the filesystem hosting the catalog root directory offers strong consistency.
|
1459
|
+
DeltaCAT's transaction system assumes that the host machine provides strong system clock accuracy guarantees, and that the filesystem hosting the catalog root directory offers strong read-after-write consistency.
|
1257
1460
|
|
1258
|
-
Taken together, these requirements make DeltaCAT suitable for production use on most major cloud computing hosts (e.g., EC2, GCE, Azure VMs) and storage systems (e.g., S3, GCS, Azure Blob Storage), but local laptops should typically be limited to testing/experimental purposes.
|
1461
|
+
Taken together, these requirements make DeltaCAT suitable for production use on most major cloud computing hosts (e.g., EC2, GCE, Azure VMs) and storage systems (e.g., S3, GCS, Azure Blob Storage), but local laptops should typically be limited to testing/experimental purposes (e.g., due to potential system clock drift).
|
1259
1462
|
|
1260
1463
|
## Additional Resources
|
1261
1464
|
### Table Documentation
|