deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,653 @@
|
|
1
|
+
import pytest
|
2
|
+
import os
|
3
|
+
import pyarrow
|
4
|
+
import msgpack
|
5
|
+
import posixpath
|
6
|
+
|
7
|
+
|
8
|
+
from deltacat.storage import (
|
9
|
+
Transaction,
|
10
|
+
TransactionOperation,
|
11
|
+
TransactionOperationType,
|
12
|
+
Namespace,
|
13
|
+
NamespaceLocator,
|
14
|
+
Metafile,
|
15
|
+
)
|
16
|
+
|
17
|
+
from deltacat.constants import (
|
18
|
+
TXN_DIR_NAME,
|
19
|
+
RUNNING_TXN_DIR_NAME,
|
20
|
+
PAUSED_TXN_DIR_NAME,
|
21
|
+
)
|
22
|
+
|
23
|
+
|
24
|
+
class TestAbsToRelative:
|
25
|
+
@classmethod
|
26
|
+
def setup_method(cls):
|
27
|
+
cls.catalog_root = "/catalog/root/path"
|
28
|
+
|
29
|
+
# Test cases for the abs_to_relative function
|
30
|
+
def test_abs_to_relative_simple(self):
|
31
|
+
"""
|
32
|
+
Tests the function which relativizes absolute paths (string) into relative paths (string)
|
33
|
+
"""
|
34
|
+
catalog_root = TestAbsToRelative.catalog_root
|
35
|
+
absolute_path = "/catalog/root/path/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
|
36
|
+
relative_path = Transaction._abs_txn_meta_path_to_relative(
|
37
|
+
catalog_root, absolute_path
|
38
|
+
)
|
39
|
+
assert (
|
40
|
+
relative_path
|
41
|
+
== "namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
|
42
|
+
)
|
43
|
+
|
44
|
+
def test_abs_to_relative_same_paths(self):
|
45
|
+
catalog_root = TestAbsToRelative.catalog_root
|
46
|
+
absolute_path = TestAbsToRelative.catalog_root
|
47
|
+
with pytest.raises(
|
48
|
+
ValueError,
|
49
|
+
match="Target and root are identical, but expected target to be a child of root.",
|
50
|
+
):
|
51
|
+
Transaction._abs_txn_meta_path_to_relative(catalog_root, absolute_path)
|
52
|
+
|
53
|
+
def test_abs_to_relative_root_with_trailing_slash(self):
|
54
|
+
catalog_root = "/catalog/root/path/"
|
55
|
+
absolute_path = "/catalog/root/path/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
|
56
|
+
relative_path = Transaction._abs_txn_meta_path_to_relative(
|
57
|
+
catalog_root, absolute_path
|
58
|
+
)
|
59
|
+
assert (
|
60
|
+
relative_path
|
61
|
+
== "namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
|
62
|
+
)
|
63
|
+
|
64
|
+
def test_abs_to_relative_bad_root(self):
|
65
|
+
catalog_root = TestAbsToRelative.catalog_root
|
66
|
+
absolute_path = "/cat/rt/pth/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
|
67
|
+
with pytest.raises(ValueError, match="Expected target to be a child of root."):
|
68
|
+
Transaction._abs_txn_meta_path_to_relative(catalog_root, absolute_path)
|
69
|
+
|
70
|
+
def test_abs_to_relative_empty_path(self):
|
71
|
+
with pytest.raises(ValueError, match="Expected target to be a child of root."):
|
72
|
+
Transaction._abs_txn_meta_path_to_relative("", "/lorem/ipsum")
|
73
|
+
with pytest.raises(ValueError, match="Expected target to be a child of root."):
|
74
|
+
Transaction._abs_txn_meta_path_to_relative("/lorem/ipsum/", "")
|
75
|
+
|
76
|
+
# Test cases for the relativize_operation_paths function
|
77
|
+
def test_relativizemetafile_write_paths(self):
|
78
|
+
catalog_root = "/catalog/root"
|
79
|
+
absolute_paths = [
|
80
|
+
"/catalog/root/path/to/metafile1.mpk",
|
81
|
+
"/catalog/root/path/to/metafile2.mpk",
|
82
|
+
"/catalog/root/another/path/lore_ipsum.mpk",
|
83
|
+
"/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
|
84
|
+
"/catalog/root/another/path/lorem_ipsum.mpk",
|
85
|
+
"/catalog/root/here.mpk",
|
86
|
+
]
|
87
|
+
expected_relative_paths = [
|
88
|
+
"path/to/metafile1.mpk",
|
89
|
+
"path/to/metafile2.mpk",
|
90
|
+
"another/path/lore_ipsum.mpk",
|
91
|
+
"another/path/meta/to/lorem_ipsum.mpk",
|
92
|
+
"another/path/lorem_ipsum.mpk",
|
93
|
+
"here.mpk",
|
94
|
+
]
|
95
|
+
# Create a dummy transaction operation with absolute paths
|
96
|
+
dest_metafile = Metafile({"id": "dummy_metafile_id"})
|
97
|
+
transaction_operation = TransactionOperation.of(
|
98
|
+
operation_type=TransactionOperationType.CREATE,
|
99
|
+
dest_metafile=dest_metafile,
|
100
|
+
)
|
101
|
+
# use replace method as setter
|
102
|
+
transaction_operation.metafile_write_paths = absolute_paths
|
103
|
+
# Create a transaction and relativize paths
|
104
|
+
transaction = Transaction.of([transaction_operation])
|
105
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
106
|
+
# Verify the paths have been correctly relativized
|
107
|
+
assert transaction_operation.metafile_write_paths == expected_relative_paths
|
108
|
+
|
109
|
+
def test_relativize_locator_write_paths(self):
|
110
|
+
catalog_root = "/catalog/root"
|
111
|
+
absolute_paths = [
|
112
|
+
"/catalog/root/path/to/loc1.mpk",
|
113
|
+
"/catalog/root/path/to/loc2.mpk",
|
114
|
+
"/catalog/root/another/path/lore_ipsum.mpk",
|
115
|
+
"/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
|
116
|
+
"/catalog/root/another/path/lorem_ipsum.mpk",
|
117
|
+
"/catalog/root/here.mpk",
|
118
|
+
]
|
119
|
+
expected_relative_paths = [
|
120
|
+
"path/to/loc1.mpk",
|
121
|
+
"path/to/loc2.mpk",
|
122
|
+
"another/path/lore_ipsum.mpk",
|
123
|
+
"another/path/meta/to/lorem_ipsum.mpk",
|
124
|
+
"another/path/lorem_ipsum.mpk",
|
125
|
+
"here.mpk",
|
126
|
+
]
|
127
|
+
# Create a dummy transaction operation with absolute paths
|
128
|
+
dest_metafile = Metafile({"id": "dummy_metafile_id"})
|
129
|
+
transaction_operation = TransactionOperation.of(
|
130
|
+
operation_type=TransactionOperationType.CREATE,
|
131
|
+
dest_metafile=dest_metafile,
|
132
|
+
)
|
133
|
+
# use replace as setter
|
134
|
+
transaction_operation.locator_write_paths = absolute_paths
|
135
|
+
# Create a transaction and relativize paths
|
136
|
+
transaction = Transaction.of(txn_operations=[transaction_operation])
|
137
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
138
|
+
# Verify the paths have been correctly relativized
|
139
|
+
assert transaction_operation.locator_write_paths == expected_relative_paths
|
140
|
+
|
141
|
+
def test_relativize_metafile_and_locator_paths(self):
|
142
|
+
catalog_root = "/meta_catalog/root_dir/a/b/c"
|
143
|
+
meta_absolute_paths = [
|
144
|
+
"/meta_catalog/root_dir/a/b/c/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
|
145
|
+
"/meta_catalog/root_dir/a/b/c/namespace/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
|
146
|
+
"/meta_catalog/root_dir/a/b/c/namespace/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
|
147
|
+
]
|
148
|
+
loc_absolute_paths = [
|
149
|
+
"/meta_catalog/root_dir/a/b/c/d/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
|
150
|
+
"/meta_catalog/root_dir/a/b/c/e/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
|
151
|
+
"/meta_catalog/root_dir/a/b/c/f/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
|
152
|
+
]
|
153
|
+
meta_relative_paths = [
|
154
|
+
"namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
|
155
|
+
"namespace/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
|
156
|
+
"namespace/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
|
157
|
+
]
|
158
|
+
loc_relative_paths = [
|
159
|
+
"d/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
|
160
|
+
"e/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
|
161
|
+
"f/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
|
162
|
+
]
|
163
|
+
# Create a dummy transaction operation with absolute paths
|
164
|
+
dest_metafile = Metafile({"id": "dummy_metafile_id"})
|
165
|
+
transaction_operation = TransactionOperation.of(
|
166
|
+
operation_type=TransactionOperationType.CREATE,
|
167
|
+
dest_metafile=dest_metafile,
|
168
|
+
)
|
169
|
+
# use replace as setter
|
170
|
+
transaction_operation.metafile_write_paths = meta_absolute_paths
|
171
|
+
transaction_operation.locator_write_paths = loc_absolute_paths
|
172
|
+
# Create a transaction and relativize paths
|
173
|
+
transaction = Transaction.of([transaction_operation])
|
174
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
175
|
+
# Verify the paths have been correctly relativized
|
176
|
+
assert (
|
177
|
+
transaction_operation.metafile_write_paths == meta_relative_paths
|
178
|
+
), f"Expected: {meta_relative_paths}, but got: {transaction_operation.metafile_write_paths}"
|
179
|
+
assert (
|
180
|
+
transaction_operation.locator_write_paths == loc_relative_paths
|
181
|
+
), f"Expected: {loc_relative_paths}, but got: {transaction_operation.locator_write_paths}"
|
182
|
+
|
183
|
+
def test_multiple_operations_relativize_paths(self):
|
184
|
+
catalog_root = "/catalog/root"
|
185
|
+
meta_absolute_paths = [
|
186
|
+
"/catalog/root/path/to/metafile1.mpk",
|
187
|
+
"/catalog/root/path/to/metafile2.mpk",
|
188
|
+
"/catalog/root/another/path/lore_ipsum.mpk",
|
189
|
+
"/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
|
190
|
+
"/catalog/root/another/path/lorem_ipsum.mpk",
|
191
|
+
"/catalog/root/here.mpk",
|
192
|
+
]
|
193
|
+
loc_absolute_paths = [
|
194
|
+
"/catalog/root/path/to/loc1.mpk",
|
195
|
+
"/catalog/root/path/to/loc2.mpk",
|
196
|
+
"/catalog/root/another/path/lore_ipsum.mpk",
|
197
|
+
"/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
|
198
|
+
"/catalog/root/another/path/lorem_ipsum.mpk",
|
199
|
+
"/catalog/root/here.mpk",
|
200
|
+
]
|
201
|
+
meta_expected_relative_paths = [
|
202
|
+
"path/to/metafile1.mpk",
|
203
|
+
"path/to/metafile2.mpk",
|
204
|
+
"another/path/lore_ipsum.mpk",
|
205
|
+
"another/path/meta/to/lorem_ipsum.mpk",
|
206
|
+
"another/path/lorem_ipsum.mpk",
|
207
|
+
"here.mpk",
|
208
|
+
]
|
209
|
+
loc_expected_relative_paths = [
|
210
|
+
"path/to/loc1.mpk",
|
211
|
+
"path/to/loc2.mpk",
|
212
|
+
"another/path/lore_ipsum.mpk",
|
213
|
+
"another/path/meta/to/lorem_ipsum.mpk",
|
214
|
+
"another/path/lorem_ipsum.mpk",
|
215
|
+
"here.mpk",
|
216
|
+
]
|
217
|
+
# Create a dummy transaction operation with absolute paths
|
218
|
+
dest_metafile = Metafile({"id": "dummy_metafile_id"})
|
219
|
+
transaction_operations = []
|
220
|
+
for i in range(11):
|
221
|
+
transaction_operation = TransactionOperation.of(
|
222
|
+
operation_type=TransactionOperationType.CREATE,
|
223
|
+
dest_metafile=dest_metafile,
|
224
|
+
)
|
225
|
+
transaction_operation.metafile_write_paths = meta_absolute_paths
|
226
|
+
transaction_operation.locator_write_paths = loc_absolute_paths
|
227
|
+
transaction_operations.append(transaction_operation)
|
228
|
+
# Create a transaction and relativize paths
|
229
|
+
transaction = Transaction.of(transaction_operations)
|
230
|
+
for operation in transaction_operations:
|
231
|
+
transaction.relativize_operation_paths(operation, catalog_root)
|
232
|
+
# Verify the paths have been correctly relativized
|
233
|
+
for operation in transaction_operations:
|
234
|
+
assert operation.metafile_write_paths == meta_expected_relative_paths
|
235
|
+
assert operation.locator_write_paths == loc_expected_relative_paths
|
236
|
+
|
237
|
+
def test_empty_metafile_and_locator_write_paths(self):
|
238
|
+
catalog_root = "/catalog/root"
|
239
|
+
transaction_operation = TransactionOperation.of(
|
240
|
+
operation_type=TransactionOperationType.CREATE,
|
241
|
+
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
242
|
+
)
|
243
|
+
# Empty paths
|
244
|
+
transaction_operation.metafile_write_paths = []
|
245
|
+
transaction_operation.locator_write_paths = []
|
246
|
+
transaction = Transaction.of([transaction_operation])
|
247
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
248
|
+
assert transaction_operation.metafile_write_paths == []
|
249
|
+
assert transaction_operation.locator_write_paths == []
|
250
|
+
|
251
|
+
def test_large_number_of_paths(self):
|
252
|
+
catalog_root = "/catalog/root"
|
253
|
+
absolute_paths = [f"/catalog/root/path/to/file{i}.mpk" for i in range(5000)]
|
254
|
+
expected_paths = [f"path/to/file{i}.mpk" for i in range(5000)]
|
255
|
+
transaction_operation = TransactionOperation.of(
|
256
|
+
operation_type=TransactionOperationType.CREATE,
|
257
|
+
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
258
|
+
)
|
259
|
+
transaction_operation.metafile_write_paths = absolute_paths
|
260
|
+
transaction = Transaction.of([transaction_operation])
|
261
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
262
|
+
assert transaction_operation.metafile_write_paths == expected_paths
|
263
|
+
|
264
|
+
def test_large_number_of_paths_multi_ops(self):
|
265
|
+
catalog_root = "/catalog/root"
|
266
|
+
absolute_paths = [f"/catalog/root/path/to/file{i}.mpk" for i in range(1000)]
|
267
|
+
expected_paths = [f"path/to/file{i}.mpk" for i in range(1000)]
|
268
|
+
|
269
|
+
# Different operation types to test
|
270
|
+
operation_types = [
|
271
|
+
TransactionOperationType.CREATE,
|
272
|
+
# TransactionOperationType.UPDATE,
|
273
|
+
TransactionOperationType.DELETE,
|
274
|
+
TransactionOperationType.READ_EXISTS,
|
275
|
+
TransactionOperationType.READ_LATEST,
|
276
|
+
TransactionOperationType.READ_CHILDREN,
|
277
|
+
TransactionOperationType.READ_SIBLINGS,
|
278
|
+
]
|
279
|
+
|
280
|
+
transaction_ops = []
|
281
|
+
for op_type in operation_types:
|
282
|
+
transaction_operation = TransactionOperation.of(
|
283
|
+
operation_type=op_type,
|
284
|
+
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
285
|
+
)
|
286
|
+
transaction_operation.metafile_write_paths = absolute_paths
|
287
|
+
transaction_ops.append(transaction_operation)
|
288
|
+
transaction = Transaction.of([transaction_operation])
|
289
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
290
|
+
# Assert paths are relativized correctly
|
291
|
+
assert (
|
292
|
+
transaction_operation.metafile_write_paths == expected_paths
|
293
|
+
), f"Failed for operation type {op_type}"
|
294
|
+
|
295
|
+
|
296
|
+
class TestTransactionPersistence:
|
297
|
+
|
298
|
+
# Verifies that transactions initialized with empty or None operations are marked interactive,
|
299
|
+
# while valid operations are not
|
300
|
+
def test_create_iterative_transaction(self):
|
301
|
+
txn_1 = Transaction.of(txn_operations=[])
|
302
|
+
txn_2 = Transaction.of(txn_operations=None)
|
303
|
+
op = TransactionOperation.of(
|
304
|
+
operation_type=TransactionOperationType.CREATE,
|
305
|
+
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
306
|
+
)
|
307
|
+
txn_3 = Transaction.of(txn_operations=[op, op])
|
308
|
+
assert (
|
309
|
+
txn_1.interactive
|
310
|
+
) # check if constructor detect empty list --> interactive transaction
|
311
|
+
assert (
|
312
|
+
txn_2.interactive
|
313
|
+
) # check if we can initialize with no list --> interactive transaction
|
314
|
+
assert (
|
315
|
+
not txn_3.interactive
|
316
|
+
) # check that valid operations_list --> not interactive transaction
|
317
|
+
|
318
|
+
# Builds and commits a transaction step-by-step, then validates the output files and transaction success log
|
319
|
+
def test_commit_iterative_transaction(self, temp_dir):
|
320
|
+
# Create two simple namespaces
|
321
|
+
namespace_locator1 = NamespaceLocator.of(namespace="test_ns_1")
|
322
|
+
namespace_locator2 = NamespaceLocator.of(namespace="test_ns_2")
|
323
|
+
ns1 = Namespace.of(locator=namespace_locator1)
|
324
|
+
ns2 = Namespace.of(locator=namespace_locator2)
|
325
|
+
# Start with an empty transaction (interactive)
|
326
|
+
transaction = Transaction.of()
|
327
|
+
txn = transaction.start(temp_dir) # operate on deep-copy
|
328
|
+
# Build operations manually and step them in
|
329
|
+
op1 = TransactionOperation.of(
|
330
|
+
operation_type=TransactionOperationType.CREATE,
|
331
|
+
dest_metafile=ns1,
|
332
|
+
)
|
333
|
+
op2 = TransactionOperation.of(
|
334
|
+
operation_type=TransactionOperationType.CREATE,
|
335
|
+
dest_metafile=ns2,
|
336
|
+
)
|
337
|
+
# steps
|
338
|
+
txn.step(op1)
|
339
|
+
txn.step(op2)
|
340
|
+
|
341
|
+
# seal() for interactive transactions
|
342
|
+
write_paths, success_log_path = txn.seal()
|
343
|
+
|
344
|
+
# Check output files exist and are valid
|
345
|
+
deserialized_ns1 = Namespace.read(write_paths[0])
|
346
|
+
deserialized_ns2 = Namespace.read(write_paths[1])
|
347
|
+
|
348
|
+
assert ns1.equivalent_to(deserialized_ns1)
|
349
|
+
assert ns2.equivalent_to(deserialized_ns2)
|
350
|
+
assert success_log_path.endswith(str(txn.end_time))
|
351
|
+
|
352
|
+
# Ensures that stepping and committing a transaction writes non-empty output files and a valid success log
|
353
|
+
def test_commit_iterative_file_creation(self, temp_dir):
|
354
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="check_writes"))
|
355
|
+
txn = Transaction.of().start(temp_dir)
|
356
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
357
|
+
txn.step(op)
|
358
|
+
write_paths, success_log_path = txn.seal()
|
359
|
+
|
360
|
+
# check the files were created
|
361
|
+
for path in write_paths:
|
362
|
+
abs_path = os.path.join(temp_dir, path)
|
363
|
+
assert os.path.exists(abs_path)
|
364
|
+
assert os.path.getsize(abs_path) > 0
|
365
|
+
|
366
|
+
# check the success log exists
|
367
|
+
assert os.path.exists(success_log_path)
|
368
|
+
assert os.path.getsize(success_log_path) > 0
|
369
|
+
|
370
|
+
# Confirms that a transaction can be paused, resumed, and successfully committed without data los
|
371
|
+
def test_transaction_pause_and_resume_roundtrip(self, temp_dir):
|
372
|
+
# Create a test namespace
|
373
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="paused_resume_ns"))
|
374
|
+
|
375
|
+
# Start interactive transaction
|
376
|
+
txn = Transaction.of().start(temp_dir)
|
377
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
378
|
+
|
379
|
+
txn.step(op)
|
380
|
+
|
381
|
+
# Pause transaction (writes to paused/)
|
382
|
+
txn.pause()
|
383
|
+
|
384
|
+
# Resume transaction (reads from paused/)
|
385
|
+
txn.resume()
|
386
|
+
|
387
|
+
# Commit resumed transaction
|
388
|
+
write_paths, success_log_path = txn.seal()
|
389
|
+
|
390
|
+
# Validate outputs
|
391
|
+
deserialized = Namespace.read(write_paths[0])
|
392
|
+
assert ns.equivalent_to(deserialized)
|
393
|
+
assert os.path.exists(success_log_path)
|
394
|
+
assert success_log_path.endswith(str(txn.end_time))
|
395
|
+
|
396
|
+
# Validates that transaction state, including ID and write paths, is correctly preserved across pause/resume cycles
|
397
|
+
def test_resume_preserves_state_after_pause(self, temp_dir):
|
398
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
|
399
|
+
|
400
|
+
txn = Transaction.of().start(temp_dir)
|
401
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
402
|
+
|
403
|
+
txn.step(op)
|
404
|
+
txn_id_before = txn.id
|
405
|
+
|
406
|
+
txn.pause()
|
407
|
+
txn.resume()
|
408
|
+
|
409
|
+
# Ensure the ID and provider are still valid
|
410
|
+
assert txn.id == txn_id_before
|
411
|
+
assert txn._time_provider is not None
|
412
|
+
assert hasattr(txn, "metafile_write_paths")
|
413
|
+
assert len(txn.metafile_write_paths) == 1
|
414
|
+
|
415
|
+
# Check commit still works
|
416
|
+
_, success_log_path = txn.seal()
|
417
|
+
assert os.path.exists(success_log_path)
|
418
|
+
|
419
|
+
# Explicitly checks that fields are preserved
|
420
|
+
def test_resume_preserves_state_after_pause_deep(self, temp_dir):
|
421
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
|
422
|
+
|
423
|
+
txn = Transaction.of().start(temp_dir)
|
424
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
425
|
+
|
426
|
+
txn.step(op)
|
427
|
+
|
428
|
+
# Save values before pause
|
429
|
+
txn_id_before = txn.id
|
430
|
+
start_time_before = txn.start_time
|
431
|
+
root_before = txn.catalog_root_normalized
|
432
|
+
meta_paths_before = list(txn.metafile_write_paths)
|
433
|
+
locator_paths_before = list(txn.locator_write_paths)
|
434
|
+
|
435
|
+
txn.pause()
|
436
|
+
txn.resume()
|
437
|
+
|
438
|
+
# Field-by-field checks
|
439
|
+
assert txn.id == txn_id_before, "Transaction ID should be preserved"
|
440
|
+
assert txn._time_provider is not None, "Time provider should be reinitialized"
|
441
|
+
assert txn.start_time == start_time_before, "Start time should be preserved"
|
442
|
+
assert txn.catalog_root_normalized == root_before, "Catalog root should match"
|
443
|
+
assert (
|
444
|
+
txn.metafile_write_paths == meta_paths_before
|
445
|
+
), "Metafile paths must match"
|
446
|
+
assert (
|
447
|
+
txn.locator_write_paths == locator_paths_before
|
448
|
+
), "Locator paths must match"
|
449
|
+
assert (
|
450
|
+
isinstance(txn.operations, list) and len(txn.operations) == 1
|
451
|
+
), "Operations must be restored"
|
452
|
+
assert txn.pause_time is not None, "Pause time should be restored"
|
453
|
+
|
454
|
+
# Final commit still works
|
455
|
+
write_paths, success_log_path = txn.seal()
|
456
|
+
assert os.path.exists(success_log_path)
|
457
|
+
|
458
|
+
# Checks that pausing a transaction moves its log from running/ to paused/ and preserves valid transaction state
|
459
|
+
def test_pause_moves_running_to_paused(self, temp_dir):
|
460
|
+
# Set up a transaction and a single operation
|
461
|
+
locator = NamespaceLocator.of(namespace="pause_test")
|
462
|
+
ns = Namespace.of(locator=locator)
|
463
|
+
txn = Transaction.of().start(temp_dir)
|
464
|
+
|
465
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
466
|
+
txn.step(op)
|
467
|
+
|
468
|
+
fs = pyarrow.fs.LocalFileSystem()
|
469
|
+
txn_id = txn.id
|
470
|
+
txn_log_dir = posixpath.join(temp_dir, TXN_DIR_NAME)
|
471
|
+
|
472
|
+
running_path = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME, txn_id)
|
473
|
+
paused_path = posixpath.join(txn_log_dir, PAUSED_TXN_DIR_NAME, txn_id)
|
474
|
+
|
475
|
+
# Sanity check: file should be in running/
|
476
|
+
assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.File
|
477
|
+
|
478
|
+
# Pause transaction
|
479
|
+
txn.pause()
|
480
|
+
# Ensure the running file is deleted
|
481
|
+
assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.NotFound
|
482
|
+
|
483
|
+
# Ensure the paused file exists and contains valid msgpack
|
484
|
+
paused_info = fs.get_file_info(paused_path)
|
485
|
+
assert paused_info.type == pyarrow.fs.FileType.File
|
486
|
+
with fs.open_input_stream(paused_path) as f:
|
487
|
+
data = f.readall()
|
488
|
+
txn_loaded = msgpack.loads(data)
|
489
|
+
assert "operations" in txn_loaded
|
490
|
+
|
491
|
+
# Simulates a full multi-step transaction with multiple pause/resume cycles and verifies correctness of all outputs
|
492
|
+
def test_transaction_pause_and_resume_roundtrip_complex(self, temp_dir):
|
493
|
+
# Step 0: Create an empty interactive transaction
|
494
|
+
txn = Transaction.of().start(temp_dir)
|
495
|
+
|
496
|
+
# Step 1: Add first namespace, pause
|
497
|
+
ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
|
498
|
+
op1 = TransactionOperation.of(
|
499
|
+
TransactionOperationType.CREATE, dest_metafile=ns1
|
500
|
+
)
|
501
|
+
txn.step(op1)
|
502
|
+
txn.pause()
|
503
|
+
|
504
|
+
# Step 2: Resume, add second namespace, pause
|
505
|
+
txn.resume()
|
506
|
+
ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
|
507
|
+
op2 = TransactionOperation.of(
|
508
|
+
TransactionOperationType.CREATE, dest_metafile=ns2
|
509
|
+
)
|
510
|
+
txn.step(op2)
|
511
|
+
txn.pause()
|
512
|
+
|
513
|
+
# Step 3: Resume again, add third namespace, commit
|
514
|
+
txn.resume()
|
515
|
+
ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
|
516
|
+
op3 = TransactionOperation.of(
|
517
|
+
TransactionOperationType.CREATE, dest_metafile=ns3
|
518
|
+
)
|
519
|
+
txn.step(op3)
|
520
|
+
|
521
|
+
# Final commit
|
522
|
+
write_paths, success_log_path = txn.seal()
|
523
|
+
|
524
|
+
# Read and verify written namespaces
|
525
|
+
for i, ns in enumerate([ns1, ns2, ns3]):
|
526
|
+
written_path = write_paths[i]
|
527
|
+
deserialized_ns = Namespace.read(written_path)
|
528
|
+
assert ns.equivalent_to(
|
529
|
+
deserialized_ns
|
530
|
+
), f"Mismatch in ns{i+1}: {ns} != {deserialized_ns}"
|
531
|
+
assert os.path.exists(written_path), f"Missing file: {written_path}"
|
532
|
+
assert os.path.getsize(written_path) > 0
|
533
|
+
|
534
|
+
# Check success log exists and is correct
|
535
|
+
assert os.path.exists(success_log_path)
|
536
|
+
assert success_log_path.endswith(str(txn.end_time))
|
537
|
+
|
538
|
+
# Repeats a complex pause/resume flow with additional assertions on namespace equality and time consistency
|
539
|
+
def test_transaction_pause_and_resume_roundtrip_complex_2(self, temp_dir):
|
540
|
+
# Step 0: Create an empty interactive transaction
|
541
|
+
txn = Transaction.of().start(temp_dir)
|
542
|
+
|
543
|
+
# Step 1: Add first namespace, pause
|
544
|
+
ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
|
545
|
+
op1 = TransactionOperation.of(
|
546
|
+
TransactionOperationType.CREATE, dest_metafile=ns1
|
547
|
+
)
|
548
|
+
txn.step(op1)
|
549
|
+
txn.pause()
|
550
|
+
|
551
|
+
# Step 2: Resume, add second namespace, pause
|
552
|
+
txn.resume()
|
553
|
+
ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
|
554
|
+
op2 = TransactionOperation.of(
|
555
|
+
TransactionOperationType.CREATE, dest_metafile=ns2
|
556
|
+
)
|
557
|
+
txn.step(op2)
|
558
|
+
|
559
|
+
txn.pause()
|
560
|
+
|
561
|
+
# Step 3: Resume again, add third namespace, commit
|
562
|
+
txn.resume()
|
563
|
+
ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
|
564
|
+
op3 = TransactionOperation.of(
|
565
|
+
TransactionOperationType.CREATE, dest_metafile=ns3
|
566
|
+
)
|
567
|
+
txn.step(op3)
|
568
|
+
|
569
|
+
# Final commit
|
570
|
+
write_paths, success_log_path = txn.seal()
|
571
|
+
|
572
|
+
assert txn.start_time < txn.end_time
|
573
|
+
|
574
|
+
# Read and verify written namespaces
|
575
|
+
for i, ns in enumerate([ns1, ns2, ns3]):
|
576
|
+
written_path = write_paths[i]
|
577
|
+
|
578
|
+
# Confirm file was created and is non-empty
|
579
|
+
assert os.path.exists(written_path), f"Missing file: {written_path}"
|
580
|
+
assert os.path.getsize(written_path) > 0, f"Empty file: {written_path}"
|
581
|
+
|
582
|
+
# Deserialize and verify content
|
583
|
+
deserialized_ns = Namespace.read(written_path)
|
584
|
+
assert ns.equivalent_to(deserialized_ns), f"Namespace mismatch at index {i}"
|
585
|
+
assert ns.locator.namespace == deserialized_ns.locator.namespace
|
586
|
+
assert ns.locator_alias == deserialized_ns.locator_alias
|
587
|
+
assert ns.properties == deserialized_ns.properties
|
588
|
+
|
589
|
+
# Verify success log
|
590
|
+
assert os.path.exists(success_log_path)
|
591
|
+
assert success_log_path.endswith(str(txn.end_time))
|
592
|
+
|
593
|
+
|
594
|
+
class TestTransactionCommitMessage:
|
595
|
+
"""Test commit message preservation and retrieval for transactions."""
|
596
|
+
|
597
|
+
def test_transaction_with_commit_message(self):
|
598
|
+
"""Test that commit messages are stored and retrievable from transactions."""
|
599
|
+
commit_msg = "Test commit message for transaction functionality"
|
600
|
+
|
601
|
+
# Create transaction with commit message
|
602
|
+
txn = Transaction.of(commit_message=commit_msg)
|
603
|
+
|
604
|
+
# Verify commit message is stored correctly
|
605
|
+
assert txn.commit_message == commit_msg
|
606
|
+
assert txn.get("commit_message") == commit_msg
|
607
|
+
|
608
|
+
def test_transaction_without_commit_message(self):
|
609
|
+
"""Test that transactions work normally without commit messages."""
|
610
|
+
# Create transaction without commit message
|
611
|
+
txn = Transaction.of()
|
612
|
+
|
613
|
+
# Verify no commit message is stored
|
614
|
+
assert txn.commit_message is None
|
615
|
+
assert txn.get("commit_message") is None
|
616
|
+
|
617
|
+
def test_transaction_commit_message_setter(self):
|
618
|
+
"""Test that commit messages can be set after transaction creation."""
|
619
|
+
# Create transaction without commit message
|
620
|
+
txn = Transaction.of()
|
621
|
+
assert txn.commit_message is None
|
622
|
+
|
623
|
+
# Set commit message using property setter
|
624
|
+
commit_msg = "Added commit message after creation"
|
625
|
+
txn.commit_message = commit_msg
|
626
|
+
|
627
|
+
# Verify commit message is stored correctly
|
628
|
+
assert txn.commit_message == commit_msg
|
629
|
+
assert txn.get("commit_message") == commit_msg
|
630
|
+
|
631
|
+
def test_transaction_serialization_with_commit_message(self, temp_dir):
|
632
|
+
"""Test that commit messages persist through transaction serialization."""
|
633
|
+
commit_msg = "Serialization test commit message"
|
634
|
+
|
635
|
+
# Create namespace for testing
|
636
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="serialization_test"))
|
637
|
+
|
638
|
+
# Create transaction with commit message
|
639
|
+
txn = Transaction.of(commit_message=commit_msg).start(temp_dir)
|
640
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
641
|
+
txn.step(op)
|
642
|
+
|
643
|
+
# Commit transaction (this should serialize the transaction with commit message)
|
644
|
+
_, success_log_path = txn.seal()
|
645
|
+
|
646
|
+
# Read the transaction log and verify commit message persisted
|
647
|
+
txn_read = Transaction.read(success_log_path)
|
648
|
+
assert txn_read.commit_message == commit_msg
|
649
|
+
|
650
|
+
# Verify other transaction properties are intact
|
651
|
+
assert txn_read.start_time == txn.start_time
|
652
|
+
assert txn_read.end_time == txn.end_time
|
653
|
+
assert len(txn_read.operations) == 1
|