deltacat 2.0.0.post2__tar.gz → 2.0.0.post3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deltacat-2.0.0.post2/deltacat.egg-info → deltacat-2.0.0.post3}/PKG-INFO +254 -53
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/README.md +253 -52
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/__init__.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/api.py +83 -15
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/__init__.py +6 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/delegate.py +170 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/interface.py +35 -2
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/main/impl.py +125 -97
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/model/catalog.py +150 -35
- deltacat-2.0.0.post3/deltacat/catalog/model/properties.py +333 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/private/compaction_utils.py +8 -2
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/steps/merge.py +9 -7
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/converter_session.py +15 -10
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +7 -5
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/io.py +22 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/janitor.py +38 -15
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/constants.py +11 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/bootstrap.py +3 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/explorer.py +0 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/utils/common.py +0 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +0 -1
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/exceptions.py +15 -0
- deltacat-2.0.0.post3/deltacat/experimental/compatibility/backfill_transaction_partitions.py +513 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/table_monitor.py +2 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/daft/daft_catalog.py +1 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +7 -2
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/interface.py +6 -7
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/main/impl.py +209 -121
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/delta.py +22 -8
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/manifest.py +81 -9
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/metafile.py +113 -30
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/namespace.py +11 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/partition.py +19 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/stream.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/table.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/table_version.py +10 -3
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/transaction.py +259 -108
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/types.py +1 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +57 -6
- deltacat-2.0.0.post3/deltacat/tests/catalog/model/test_properties_transaction_migration.py +232 -0
- deltacat-2.0.0.post3/deltacat/tests/catalog/test_catalogs.py +651 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/test_default_catalog_impl.py +1184 -39
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -18
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/test_convert_session.py +2 -2
- deltacat-2.0.0.post3/deltacat/tests/compute/converter/test_converter_commit_conflict_resolution.py +626 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_janitor.py +60 -38
- deltacat-2.0.0.post3/deltacat/tests/conftest.py +56 -0
- deltacat-2.0.0.post3/deltacat/tests/experimental/compatibility/test_backfill_transaction_partitions.py +477 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/main/test_main_storage.py +17 -8
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_metafile_io.py +142 -18
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_transaction_history.py +128 -68
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_deltacat_api.py +334 -25
- deltacat-2.0.0.post3/deltacat/tests/utils/test_filesystem.py +3319 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/media.py +0 -4
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/tables.py +111 -113
- deltacat-2.0.0.post3/deltacat/utils/filesystem.py +1590 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/url.py +89 -18
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3/deltacat.egg-info}/PKG-INFO +254 -53
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/SOURCES.txt +5 -2
- deltacat-2.0.0.post2/deltacat/catalog/model/properties.py +0 -155
- deltacat-2.0.0.post2/deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +0 -201
- deltacat-2.0.0.post2/deltacat/tests/catalog/test_catalogs.py +0 -321
- deltacat-2.0.0.post2/deltacat/tests/conftest.py +0 -25
- deltacat-2.0.0.post2/deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +0 -582
- deltacat-2.0.0.post2/deltacat/utils/filesystem.py +0 -450
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/LICENSE +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/MANIFEST.in +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/annotations.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/clients.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/s3u.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_engine.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_report.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_suite.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/data/random_row_generator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/data/row_generator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/test_benchmark_pipeline.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/model/table_definition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/compaction_session.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/compactor_version.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/materialize_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/repartition_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/table_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/repartition_session.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/dedupe.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/materialize.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/repartition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/io.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/round_completion_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/sort_key.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/system_columns.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/io.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/convert_input.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/convert_input_files.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/convert_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/converter_session_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/catalog.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/overrides.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/steps/convert.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/steps/dedupe.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/convert_task_options.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/converter_session_utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/iceberg_columns.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/s3u.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/jobs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/jobs/client.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/parquet.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/delta_stats.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/stats_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/types.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/inference/generate_type_mappings.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/env.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/basic_logging.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/compactor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/gcp/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/job_runner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/app.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/main.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/iceberg_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/hello_world.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/gcp/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/indexer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/job_runner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/impl.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/overrides.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/compatibility/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/beam/managed.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/daft/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/impl.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/visitor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/arrow/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/dataset_executor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/feather/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/feather/file_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/feather/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/file_provider.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/file_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/input_file.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/output_file.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/logical_plan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/json_sst.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/sst.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/mvp/Table.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/mvp/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/file_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/block_scanner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/data_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/data_scan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/dataset_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/schema/datatype.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/schema/schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/serializer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/serializer_factory.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/shard/range_shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/dataset/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/dataset/deltacat_dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasink/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasink/deltacat_datasink.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasource/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasource/deltacat_datasource.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/file_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/memcached_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/reader/deltacat_read_api.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/redis_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/s3_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/logs.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/expression/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/expression/expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/expression/visitor.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/interop.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/list_result.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/locator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/push_down.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/scan_plan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/scan_task.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/sort_key.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/transform.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/util/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/util/scan_planner.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_cloudpickle_bug_fix.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_file_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_memcached_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_ray_plasma_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_redis_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_s3_object_store.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/aws/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/aws/test_clients.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/aws/test_s3u.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/model/test_table_definition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_util_common.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_util_constant.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/daft/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/daft/test_model.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/catalog/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/compatibility/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/daft/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/conftest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/main/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_delete_parameters.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_expression.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_manifest.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_partition_scheme.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_schema_update.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_shard.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_sort_scheme.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_table_version.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_transaction.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_exceptions.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_logs.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/constants.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/filesystem.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/message_pack_utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/pyarrow.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/storage.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/utils.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/types/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/types/test_tables.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/data/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/exceptions.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/main_deltacat_storage_mock.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_cloudpickle.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_daft.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_metrics.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_numpy.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_pandas.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_placement.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_polars.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_pyarrow.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_resources.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/partial_download.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/arguments.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/cloudpickle.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/common.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/daft.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/export.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/metafile_locator.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/metrics.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/numpy.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/pandas.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/performance.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/placement.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/polars.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/pyarrow.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/__init__.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/collections.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/concurrency.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/dataset.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/performance.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/runtime.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/reader_compatibility_mapping.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/resources.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/schema.py +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/dependency_links.txt +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/requires.txt +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/top_level.txt +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/setup.cfg +0 -0
- {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deltacat
|
3
|
-
Version: 2.0.0.
|
3
|
+
Version: 2.0.0.post3
|
4
4
|
Summary: DeltaCAT is a portable Pythonic Data Lakehouse powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
@@ -53,24 +53,20 @@ Dynamic: summary
|
|
53
53
|
<img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-logo-alpha-750.png" alt="deltacat logo" style="width:55%; height:auto; text-align: center;">
|
54
54
|
</p>
|
55
55
|
|
56
|
-
DeltaCAT is a portable
|
57
|
-
fast, scalable, ACID-compliant multimodal data lakes, and has been used to [successfully manage exabyte-scale enterprise
|
58
|
-
data lakes](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
|
56
|
+
DeltaCAT is a portable Multimodal Lakehouse powered by [Ray](https://github.com/ray-project/ray), [Apache Arrow](https://github.com/apache/arrow), and [Daft](https://github.com/Eventual-Inc/Daft). It lets you create ACID-compliant multimodal data lakes [that efficiently scale to manage exabytes of production data](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
|
59
57
|
|
60
|
-
It provides data lake level transactions & time travel,
|
58
|
+
It provides data lake level transactions & time travel, zero-copy schema evolution, zero-copy multimodal file processing (image, audio, video, text, etc.), and transparent dataset optimization. It runs locally for rapid development or in the cloud for production workloads. It runs on any filesystem for easy setup and sharing - no external catalog services, lock managers, or key value stores required.
|
61
59
|
|
62
|
-
|
63
|
-
|
64
|
-
merge-on-read and copy-on-write operations.
|
60
|
+
|
61
|
+
## Overview
|
65
62
|
|
66
63
|
DeltaCAT provides the following high-level components:
|
67
|
-
1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py):
|
68
|
-
2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures
|
69
|
-
3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/):
|
64
|
+
1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py): Pythonic APIs to discover, read, write, and manage datasets.
|
65
|
+
2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures that automatically optimize your datasets.
|
66
|
+
3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/): A portable multimodal data lake format useable with any filesystem.
|
70
67
|
4. **Sync** (in development): Synchronize DeltaCAT datasets to data warehouses and other table formats.
|
71
68
|
|
72
|
-
|
73
|
-
DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by Ray Data, Daft, Pandas, Polars, PyArrow, or NumPy.
|
69
|
+
DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by [Pandas](https://github.com/pandas-dev/pandas), [NumPy](https://github.com/numpy/numpy), [Polars](https://github.com/pola-rs/polars), [PyArrow](https://arrow.apache.org/docs/python/index.html), [Ray Data](https://docs.ray.io/en/latest/data/data.html), and [Daft](https://docs.daft.ai/en/stable/api/dataframe/).
|
74
70
|
|
75
71
|
<p align="center">
|
76
72
|
<img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-tech-overview.png" alt="deltacat tech overview" style="width:100%; height:auto; text-align: center;">
|
@@ -81,7 +77,8 @@ Data consumers that prefer to stay within the ecosystem of Pythonic data managem
|
|
81
77
|
## Getting Started
|
82
78
|
DeltaCAT applications run anywhere that Ray runs, including your local laptop, cloud computing cluster, or on-premise cluster.
|
83
79
|
|
84
|
-
DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake
|
80
|
+
DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake that contains a set of **Tables**. A **Catalog** provides a root location (e.g., a local file path or S3 Bucket) to store information about all your **Tables**, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
|
81
|
+
|
85
82
|
|
86
83
|
### Quick Start
|
87
84
|
|
@@ -114,7 +111,7 @@ dc.write(data, "users")
|
|
114
111
|
daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
115
112
|
daft_df.show() # Materialize and print the DataFrame
|
116
113
|
|
117
|
-
#
|
114
|
+
# Add more data and add a new column.
|
118
115
|
# Compaction and zero-copy schema evolution are handled automatically.
|
119
116
|
data = pd.DataFrame({
|
120
117
|
"id": [4, 5, 6],
|
@@ -131,13 +128,13 @@ daft_df.select("name", "age", "city").show()
|
|
131
128
|
```
|
132
129
|
|
133
130
|
### Core Concepts
|
134
|
-
DeltaCAT can do much more than just
|
131
|
+
DeltaCAT can do much more than just add data to tables and read it back again. Expand the sections below to see examples of other core DeltaCAT concepts and APIs.
|
135
132
|
|
136
133
|
<details>
|
137
134
|
|
138
135
|
<summary><span style="font-size: 1.25em; font-weight: bold;">Idempotent Writes</span></summary>
|
139
136
|
|
140
|
-
If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **
|
137
|
+
If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **adds** table data by default. One way to prevent this perpetual table growth and make the example idempotent is to use the **REPLACE** write mode if the table already exists:
|
141
138
|
|
142
139
|
```python
|
143
140
|
import deltacat as dc
|
@@ -171,7 +168,7 @@ dc.write(data, "users", mode=write_mode)
|
|
171
168
|
daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
172
169
|
daft_df.show() # Materialize and print the DataFrame
|
173
170
|
|
174
|
-
# Explicitly
|
171
|
+
# Explicitly add more data and add a new column.
|
175
172
|
# Compaction and schema evolution are handled automatically.
|
176
173
|
data = pd.DataFrame({
|
177
174
|
"id": [4, 5, 6],
|
@@ -179,7 +176,7 @@ data = pd.DataFrame({
|
|
179
176
|
"age": [2, 12, 4],
|
180
177
|
"city": ["Hollywood", "Gloucester", "San Francisco"]
|
181
178
|
})
|
182
|
-
dc.write(data, "users", mode=dc.TableWriteMode.
|
179
|
+
dc.write(data, "users", mode=dc.TableWriteMode.ADD)
|
183
180
|
|
184
181
|
# Read the full table back into a Daft DataFrame.
|
185
182
|
daft_df = dc.read("users")
|
@@ -223,7 +220,7 @@ dc.write(data, "users", mode=dc.TableWriteMode.CREATE)
|
|
223
220
|
daft_df = dc.read("users") # Returns Daft DataFrame (default)
|
224
221
|
daft_df.show() # Materialize and print the DataFrame
|
225
222
|
|
226
|
-
# Explicitly
|
223
|
+
# Explicitly add more data and add a new column.
|
227
224
|
# Compaction and schema evolution are handled automatically.
|
228
225
|
data = pd.DataFrame({
|
229
226
|
"id": [4, 5, 6],
|
@@ -231,7 +228,7 @@ data = pd.DataFrame({
|
|
231
228
|
"age": [2, 12, 4],
|
232
229
|
"city": ["Hollywood", "Gloucester", "San Francisco"]
|
233
230
|
})
|
234
|
-
dc.write(data, "users", mode=dc.TableWriteMode.
|
231
|
+
dc.write(data, "users", mode=dc.TableWriteMode.ADD)
|
235
232
|
|
236
233
|
# Read the full table back into a Daft DataFrame.
|
237
234
|
daft_df = dc.read("users")
|
@@ -243,9 +240,117 @@ assert dc.dataset_length(daft_df) == 6
|
|
243
240
|
|
244
241
|
</details>
|
245
242
|
|
243
|
+
|
246
244
|
<details>
|
247
245
|
|
248
|
-
<summary><span style="font-size: 1.25em; font-weight: bold;">
|
246
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Ordered Writes</span></summary>
|
247
|
+
DeltaCAT writes are unordered by default, which means that the order of data written to the table isn't guaranteed to match the order that it is read back. While this is useful for preventing conflicts between concurrent writers, you can also use the **APPEND** write mode to preserve write order and raise explicit concurrency conflicts between parallel writers:
|
248
|
+
|
249
|
+
```python
|
250
|
+
import deltacat as dc
|
251
|
+
import pandas as pd
|
252
|
+
|
253
|
+
# Initialize DeltaCAT with a default local catalog.
|
254
|
+
# Ray will be initialized automatically.
|
255
|
+
# Catalog files will be stored in .deltacat/ in the current working directory.
|
256
|
+
dc.init_local()
|
257
|
+
|
258
|
+
# Create data to write.
|
259
|
+
data = pd.DataFrame({
|
260
|
+
"id": [1, 2],
|
261
|
+
"name": ["Cheshire", "Dinah"],
|
262
|
+
"age": [3, 7]
|
263
|
+
})
|
264
|
+
|
265
|
+
# Derive a DeltaCAT schema for the data.
|
266
|
+
schema = dc.Schema.of(dc.dataset_schema(data))
|
267
|
+
|
268
|
+
# Create an empty table to hold ordered user data.
|
269
|
+
if not dc.table_exists("users_ordered"):
|
270
|
+
dc.create_table("users_ordered", schema=schema)
|
271
|
+
|
272
|
+
# Write the first ordered delta to the table.
|
273
|
+
dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
|
274
|
+
|
275
|
+
# Write the second ordered delta to the table.
|
276
|
+
data = pd.DataFrame({
|
277
|
+
"id": [3, 4],
|
278
|
+
"name": ["Felix", "Tom"],
|
279
|
+
"age": [2, 12],
|
280
|
+
"city": ["Hollywood", "Gloucester"]
|
281
|
+
})
|
282
|
+
dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
|
283
|
+
|
284
|
+
# Write the third ordered delta to the table.
|
285
|
+
data = pd.DataFrame({
|
286
|
+
"id": [5, 6],
|
287
|
+
"name": ["Simpkin", "Delta"],
|
288
|
+
"age": [12, 4],
|
289
|
+
"city": ["San Francisco", "San Francisco"]
|
290
|
+
})
|
291
|
+
dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
|
292
|
+
|
293
|
+
# Read the data back as a Pandas DataFrame, and ensure that the
|
294
|
+
# order of the records returned matches the order they were written.
|
295
|
+
pandas_df = dc.read("users_ordered", read_as=dc.DatasetType.PANDAS)
|
296
|
+
print(pandas_df)
|
297
|
+
```
|
298
|
+
|
299
|
+
</details>
|
300
|
+
|
301
|
+
<details>
|
302
|
+
|
303
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Schemaless Tables</span></summary>
|
304
|
+
Tables created automatically via `dc.write` have a schema inferred from the data written by default. However, if you create an empty table without providing a schema, it defaults to schemaless. Writes to schemaless tables are more efficient and flexible, since they simply track the location and basic metadata associated with the data files written to the table. However, if you know that a unified schema can be derived for your schemaless data, then you can you can still read it back as a structured dataset:
|
305
|
+
|
306
|
+
```python
|
307
|
+
import deltacat as dc
|
308
|
+
import pandas as pd
|
309
|
+
|
310
|
+
# Initialize DeltaCAT with a default local catalog.
|
311
|
+
# Ray will be initialized automatically.
|
312
|
+
# Catalog files will be stored in .deltacat/ in the current working directory.
|
313
|
+
dc.init_local()
|
314
|
+
|
315
|
+
# Create data to write.
|
316
|
+
data = pd.DataFrame({
|
317
|
+
"id": [1, 2],
|
318
|
+
"name": ["Cheshire", "Dinah"],
|
319
|
+
"age": [3, 7]
|
320
|
+
})
|
321
|
+
|
322
|
+
# Create an empty schemaless table to hold ordered user data.
|
323
|
+
if not dc.table_exists("users_schemaless"):
|
324
|
+
dc.create_table("users_schemaless")
|
325
|
+
|
326
|
+
# Write the first ordered delta to the table.
|
327
|
+
dc.write(data, "users_schemaless", mode=dc.TableWriteMode.APPEND)
|
328
|
+
|
329
|
+
# Write the second ordered delta to the table.
|
330
|
+
data = pd.DataFrame({
|
331
|
+
"id": [3, 4],
|
332
|
+
"name": ["Felix", "Tom"],
|
333
|
+
"age": [2, 12],
|
334
|
+
"city": ["Hollywood", "Gloucester"]
|
335
|
+
})
|
336
|
+
dc.write(data, "users_schemaless", mode=dc.TableWriteMode.APPEND)
|
337
|
+
|
338
|
+
# Read back the file manifest of the schemaless table.
|
339
|
+
# Notice that file paths, sizes, etc. are returned instead of the dataframes written.
|
340
|
+
manifest_df = dc.read("users_schemaless", read_as=dc.DatasetType.PANDAS)
|
341
|
+
print(manifest_df)
|
342
|
+
|
343
|
+
# Use from_manifest_table to convert the manifest table to a structured dataset.
|
344
|
+
structured_daft_df = dc.from_manifest_table(manifest_df)
|
345
|
+
structured_daft_df.show()
|
346
|
+
```
|
347
|
+
|
348
|
+
</details>
|
349
|
+
|
350
|
+
|
351
|
+
<details>
|
352
|
+
|
353
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Working Across Dataset and File Types</span></summary>
|
249
354
|
|
250
355
|
DeltaCAT natively supports a variety of open dataset and file formats already integrated with Ray and Arrow. You can use `dc.read` to read tables back as a Daft DataFrame, Ray Dataset, Pandas DataFrame, PyArrow Table, Polars DataFrame, NumPy Array, or list of PyArrow ParquetFile objects:
|
251
356
|
|
@@ -600,6 +705,10 @@ order_data = pd.DataFrame({
|
|
600
705
|
"product_id": [101, 102, 103],
|
601
706
|
"quantity": [2, 1, 2]
|
602
707
|
})
|
708
|
+
# Create identity, inventory, and sales namespaces
|
709
|
+
dc.create_namespace("identity")
|
710
|
+
dc.create_namespace("inventory")
|
711
|
+
dc.create_namespace("sales")
|
603
712
|
|
604
713
|
# Write tables to different namespaces to organize them by domain
|
605
714
|
dc.write(user_data, "users", namespace="identity")
|
@@ -625,7 +734,10 @@ finance_users = pd.DataFrame({
|
|
625
734
|
"preferred_payment_method": ["credit", "cash", "paypal"]
|
626
735
|
})
|
627
736
|
|
737
|
+
dc.create_namespace("marketing")
|
628
738
|
dc.write(marketing_users, "users", namespace="marketing")
|
739
|
+
|
740
|
+
dc.create_namespace("finance")
|
629
741
|
dc.write(finance_users, "users", namespace="finance")
|
630
742
|
|
631
743
|
# Each namespace maintains its own "users" table with different schemas
|
@@ -671,6 +783,7 @@ product_data = pd.DataFrame({
|
|
671
783
|
})
|
672
784
|
|
673
785
|
# The product catalog can be created independently.
|
786
|
+
dc.create_namespace("inventory")
|
674
787
|
dc.write(product_data, "catalog", namespace="inventory")
|
675
788
|
|
676
789
|
print(f"\n=== Initial Product Data ===")
|
@@ -697,7 +810,9 @@ finance_schema = dc.Schema.of([
|
|
697
810
|
# Create user identities and user finance data within a single transaction.
|
698
811
|
# Since transactions are atomic, this prevents accounting discrepancies.
|
699
812
|
with dc.transaction():
|
813
|
+
dc.create_namespace("identity")
|
700
814
|
dc.write(user_data, "users", namespace="identity")
|
815
|
+
dc.create_namespace("finance")
|
701
816
|
dc.write(initial_finance, "users", namespace="finance", schema=finance_schema)
|
702
817
|
|
703
818
|
print(f"\n=== Initial User Data ===")
|
@@ -716,6 +831,7 @@ new_orders = pd.DataFrame({
|
|
716
831
|
# Process new orders and update lifetime payment totals within a single transaction.
|
717
832
|
with dc.transaction():
|
718
833
|
# Step 1: Write the new orders
|
834
|
+
dc.create_namespace("sales")
|
719
835
|
dc.write(new_orders, "transactions", namespace="sales")
|
720
836
|
|
721
837
|
# Step 2: Read back transactions and products to compute actual totals
|
@@ -731,6 +847,7 @@ with dc.transaction():
|
|
731
847
|
finance_updates.columns = ["user_id", "lifetime_payments"]
|
732
848
|
|
733
849
|
# Step 4: Write the computed totals
|
850
|
+
dc.create_namespace("finance")
|
734
851
|
dc.write(finance_updates, "users", namespace="finance", mode=dc.TableWriteMode.MERGE)
|
735
852
|
|
736
853
|
# Verify that orders and and lifetime payments are kept in sync.
|
@@ -760,16 +877,14 @@ import tempfile
|
|
760
877
|
from decimal import Decimal
|
761
878
|
|
762
879
|
# Initialize catalogs with separate names and catalog roots.
|
763
|
-
dc.init(
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
))
|
772
|
-
})
|
880
|
+
dc.init(
|
881
|
+
catalogs={
|
882
|
+
# Use temporary directory for staging
|
883
|
+
"staging": dc.Catalog(dc.CatalogProperties(tempfile.mkdtemp())),
|
884
|
+
# Use S3 for prod
|
885
|
+
"prod": dc.Catalog(dc.CatalogProperties("s3://example/deltacat"))
|
886
|
+
}
|
887
|
+
)
|
773
888
|
|
774
889
|
# Create a PyArrow table with decimal256 data
|
775
890
|
decimal_table = pa.table({
|
@@ -817,6 +932,92 @@ print(dc.read("financial_data", catalog="prod", read_as=dc.DatasetType.PANDAS))
|
|
817
932
|
|
818
933
|
</details>
|
819
934
|
|
935
|
+
<details>
|
936
|
+
|
937
|
+
<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Sharing & Portability</span></summary>
|
938
|
+
|
939
|
+
DeltaCAT catalogs are self-contained directories on a filesystem, so you can easily share your data lake with others. A local catalog on your laptop can be compressed and sent anywhere. A cloud catalog in S3, GCS, or Azure Blog Storage can be shared via URL. The read/write permissions of your catalog are the read/write permissions of your filesystem.
|
940
|
+
|
941
|
+
For example, you can zip up your local catalog and upload it to S3 via:
|
942
|
+
```bash
|
943
|
+
# zip a local catalog
|
944
|
+
zip -r catalog.zip .deltacat/
|
945
|
+
|
946
|
+
# copy the catalog to a cloud bucket
|
947
|
+
aws s3 cp catalog.zip s3://my-bucket/catalog.zip
|
948
|
+
```
|
949
|
+
|
950
|
+
The person you shared it with can retrieve and decompress it via:
|
951
|
+
```bash
|
952
|
+
# copy the cloud catalog to local disk
|
953
|
+
aws s3 cp s3://my-bucket/catalog.zip .
|
954
|
+
|
955
|
+
# unzip the catalog to a local directory
|
956
|
+
unzip catalog.zip -d .deltacat_copy/
|
957
|
+
```
|
958
|
+
|
959
|
+
And then initialize it together with any other catalogs they're working with:
|
960
|
+
```python
|
961
|
+
import deltacat as dc
|
962
|
+
|
963
|
+
# Initialize catalogs with separate names and catalog roots.
|
964
|
+
dc.init(
|
965
|
+
catalogs={
|
966
|
+
"original": dc.Catalog(dc.CatalogProperties(".deltacat")),
|
967
|
+
"copy": dc.Catalog(dc.CatalogProperties(".deltacat_copy")),
|
968
|
+
"prod_aws": dc.Catalog(dc.CatalogProperties("s3://prod/deltacat")),
|
969
|
+
"prod_gcp": dc.Catalog(dc.CatalogProperties("gs://prod/deltacat")),
|
970
|
+
"prod_azure": dc.Catalog(dc.CatalogProperties("az://prod/deltacat")),
|
971
|
+
}
|
972
|
+
)
|
973
|
+
|
974
|
+
# List all namespaces in the original catalog
|
975
|
+
namespaces = dc.list("dc://original")
|
976
|
+
print([namespace.name for namespace in namespaces])
|
977
|
+
|
978
|
+
# List all namespaces in the copy catalog
|
979
|
+
namespaces = dc.list("dc://copy")
|
980
|
+
print([namespace.name for namespace in namespaces])
|
981
|
+
|
982
|
+
# List all tables in the default namespace of the original catalog
|
983
|
+
tables = dc.list("dc://original/default")
|
984
|
+
print([table.name for table in tables])
|
985
|
+
|
986
|
+
# List all tables in the default namespace of the copy catalog
|
987
|
+
tables = dc.list("dc://copy/default")
|
988
|
+
print([table.name for table in tables])
|
989
|
+
```
|
990
|
+
|
991
|
+
`dc.copy` can also be used to copy namespaces and tables between catalogs:
|
992
|
+
```python
|
993
|
+
# Copy the "default" namespace from the original local catalog over to the "myspace" namespace in the copy catalog
|
994
|
+
dc.copy("dc://original/default", "dc://copy/default/myspace")
|
995
|
+
|
996
|
+
# By default, no tables are copied from the source namespace to the destination
|
997
|
+
tables = dc.list("dc://copy/myspace")
|
998
|
+
print(f"{len(tables)} tables in myspace.")
|
999
|
+
|
1000
|
+
# Copy the "users" table from the original local catalog over to "local_users" in the prod_aws catalog
|
1001
|
+
dc.copy("dc://original/default/users", "dc://prod_aws/default/local_users")
|
1002
|
+
|
1003
|
+
# Read the copied table back
|
1004
|
+
df = dc.read("local_users", catalog="prod_aws")
|
1005
|
+
df.show()
|
1006
|
+
|
1007
|
+
# We can also copy all tables in the default namespace using **
|
1008
|
+
dc.copy("dc://original/default/**", "dc://copy/default/myspace")
|
1009
|
+
tables = dc.list("dc://copy/myspace")
|
1010
|
+
print(f"{len(tables)} tables in myspace.")
|
1011
|
+
|
1012
|
+
# Or we can copy all namespaces from the original catalog using *
|
1013
|
+
dc.copy("dc://original/*", "dc://copy")
|
1014
|
+
namespaces = dc.list("dc://copy")
|
1015
|
+
print([namespace.name for namespace in namespaces])
|
1016
|
+
```
|
1017
|
+
|
1018
|
+
</details>
|
1019
|
+
|
1020
|
+
|
820
1021
|
<details>
|
821
1022
|
|
822
1023
|
<summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Time Travel</span></summary>
|
@@ -858,10 +1059,10 @@ initial_finance = pd.DataFrame({
|
|
858
1059
|
|
859
1060
|
# Write initial state atomically with a commit message
|
860
1061
|
with dc.transaction(commit_message="Initial data load: users, products, orders, and finance"):
|
861
|
-
dc.write(initial_users, "users", namespace="identity")
|
862
|
-
dc.write(initial_products, "catalog", namespace="inventory")
|
863
|
-
dc.write(initial_orders, "transactions", namespace="sales")
|
864
|
-
dc.write(initial_finance, "users", namespace="finance")
|
1062
|
+
dc.write(initial_users, "users", namespace="identity", auto_create_namespace=True)
|
1063
|
+
dc.write(initial_products, "catalog", namespace="inventory", auto_create_namespace=True)
|
1064
|
+
dc.write(initial_orders, "transactions", namespace="sales", auto_create_namespace=True)
|
1065
|
+
dc.write(initial_finance, "users", namespace="finance", auto_create_namespace=True)
|
865
1066
|
|
866
1067
|
# Sleep briefly to ensure transaction timestamp separation
|
867
1068
|
time.sleep(0.1)
|
@@ -1077,7 +1278,7 @@ daft_docs = daft_docs.with_column("content", daft_docs["path"].url.download().de
|
|
1077
1278
|
# Capture basic feedback sentiment analysis in a parallel multi-table transaction
|
1078
1279
|
with dc.transaction():
|
1079
1280
|
# Write the full customer feedback to a new "documents" table.
|
1080
|
-
dc.write(daft_docs, "documents"
|
1281
|
+
dc.write(daft_docs, "documents")
|
1081
1282
|
|
1082
1283
|
# Define a UDF to analyze customer feedback sentiment.
|
1083
1284
|
@daft.udf(return_dtype=daft.DataType.struct({
|
@@ -1114,14 +1315,14 @@ with dc.transaction():
|
|
1114
1315
|
dc.Field.of(pa.field("confidence", pa.float64())),
|
1115
1316
|
dc.Field.of(pa.field("model_version", pa.large_string())),
|
1116
1317
|
])
|
1117
|
-
dc.write(daft_results, "insights",
|
1318
|
+
dc.write(daft_results, "insights", schema=initial_schema)
|
1118
1319
|
|
1119
1320
|
# Write to a new audit trail table.
|
1120
1321
|
audit_df = pd.DataFrame([{
|
1121
1322
|
"version": "v1.0",
|
1122
1323
|
"docs_processed": dc.dataset_length(daft_docs),
|
1123
1324
|
}])
|
1124
|
-
dc.write(audit_df, "audit"
|
1325
|
+
dc.write(audit_df, "audit")
|
1125
1326
|
|
1126
1327
|
print("=== V1.0: Customer feedback sentiment analysis processing complete! ===")
|
1127
1328
|
|
@@ -1162,9 +1363,9 @@ with dc.transaction():
|
|
1162
1363
|
)
|
1163
1364
|
|
1164
1365
|
# Merge new V2.0 insights into the existing V1.0 insights table.
|
1165
|
-
dc.write(daft_emotions, "insights"
|
1366
|
+
dc.write(daft_emotions, "insights")
|
1166
1367
|
audit_df = pd.DataFrame([{"version": "v2.0", "docs_processed": dc.dataset_length(daft_docs)}])
|
1167
|
-
dc.write(audit_df, "audit"
|
1368
|
+
dc.write(audit_df, "audit")
|
1168
1369
|
|
1169
1370
|
print("=== V2.0: Customer feedback emotion analysis processing complete! ===")
|
1170
1371
|
|
@@ -1176,7 +1377,7 @@ time.sleep(0.1)
|
|
1176
1377
|
# Generate customer service responses based on emotion analysis results.
|
1177
1378
|
with dc.transaction():
|
1178
1379
|
# First, read the current insights table with emotion analysis
|
1179
|
-
current_insights = dc.read("insights"
|
1380
|
+
current_insights = dc.read("insights")
|
1180
1381
|
|
1181
1382
|
# Define a UDF to generate customer service responses based on analysis results.
|
1182
1383
|
@daft.udf(return_dtype=daft.DataType.struct({
|
@@ -1223,39 +1424,39 @@ with dc.transaction():
|
|
1223
1424
|
)
|
1224
1425
|
# Merge new V3.0 responses into the existing V2.0 insights table.
|
1225
1426
|
# The new response columns are automatically joined by document ID.
|
1226
|
-
dc.write(daft_responses, "insights"
|
1427
|
+
dc.write(daft_responses, "insights")
|
1227
1428
|
audit_df = pd.DataFrame([{"version": "v3.0", "docs_processed": dc.dataset_length(current_insights)}])
|
1228
|
-
dc.write(audit_df, "audit"
|
1429
|
+
dc.write(audit_df, "audit")
|
1229
1430
|
|
1230
1431
|
print("=== V3.0: Customer service response generation processing complete! ===")
|
1231
1432
|
|
1232
1433
|
print("\n=== Time Travel Comparison of all Versions ===")
|
1233
1434
|
with dc.transaction(as_of=checkpoint_v1):
|
1234
1435
|
print(f"== V1.0 Insights (sentiment) ==")
|
1235
|
-
print(dc.read("insights"
|
1436
|
+
print(dc.read("insights").show())
|
1236
1437
|
print(f"== V1.0 Audit ==")
|
1237
|
-
print(dc.read("audit"
|
1438
|
+
print(dc.read("audit").show())
|
1238
1439
|
|
1239
1440
|
with dc.transaction(as_of=checkpoint_v2):
|
1240
1441
|
print(f"== V2.0 Insights (emotion) ==")
|
1241
|
-
print(dc.read("insights"
|
1442
|
+
print(dc.read("insights").show())
|
1242
1443
|
print(f"== V2.0 Audit ==")
|
1243
|
-
print(dc.read("audit"
|
1444
|
+
print(dc.read("audit").show())
|
1244
1445
|
|
1245
|
-
v3_results = dc.read("insights"
|
1446
|
+
v3_results = dc.read("insights")
|
1246
1447
|
print(f"== V3.0 Insights (customer service response) ==")
|
1247
|
-
print(dc.read("insights"
|
1448
|
+
print(dc.read("insights").show())
|
1248
1449
|
print(f"== V3.0 Audit ==")
|
1249
|
-
print(dc.read("audit"
|
1450
|
+
print(dc.read("audit").show())
|
1250
1451
|
```
|
1251
1452
|
|
1252
1453
|
</details>
|
1253
1454
|
|
1254
1455
|
## Runtime Environment Requirements
|
1255
1456
|
|
1256
|
-
DeltaCAT's transaction system assumes that the host machine provides strong system clock accuracy guarantees, and that the filesystem hosting the catalog root directory offers strong consistency.
|
1457
|
+
DeltaCAT's transaction system assumes that the host machine provides strong system clock accuracy guarantees, and that the filesystem hosting the catalog root directory offers strong read-after-write consistency.
|
1257
1458
|
|
1258
|
-
Taken together, these requirements make DeltaCAT suitable for production use on most major cloud computing hosts (e.g., EC2, GCE, Azure VMs) and storage systems (e.g., S3, GCS, Azure Blob Storage), but local laptops should typically be limited to testing/experimental purposes.
|
1459
|
+
Taken together, these requirements make DeltaCAT suitable for production use on most major cloud computing hosts (e.g., EC2, GCE, Azure VMs) and storage systems (e.g., S3, GCS, Azure Blob Storage), but local laptops should typically be limited to testing/experimental purposes (e.g., due to potential system clock drift).
|
1259
1460
|
|
1260
1461
|
## Additional Resources
|
1261
1462
|
### Table Documentation
|