deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +150 -12
- deltacat/annotations.py +36 -0
- deltacat/api.py +578 -0
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +84 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +22 -19
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
- deltacat/catalog/__init__.py +73 -0
- deltacat/catalog/delegate.py +615 -140
- deltacat/catalog/interface.py +404 -81
- deltacat/catalog/main/impl.py +2882 -0
- deltacat/catalog/model/catalog.py +348 -46
- deltacat/catalog/model/properties.py +155 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +19 -9
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +9 -22
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +6 -6
- deltacat/compute/compactor/steps/materialize.py +15 -9
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +7 -6
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +13 -14
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +28 -9
- deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +156 -53
- deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +10 -3
- deltacat/compute/compactor_v2/utils/merge.py +14 -2
- deltacat/compute/compactor_v2/utils/task_options.py +2 -10
- deltacat/compute/converter/constants.py +9 -0
- deltacat/compute/converter/converter_session.py +298 -0
- deltacat/compute/converter/model/convert_input.py +96 -0
- deltacat/compute/converter/model/convert_input_files.py +78 -0
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +144 -0
- deltacat/compute/converter/pyiceberg/catalog.py +78 -0
- deltacat/compute/converter/pyiceberg/overrides.py +263 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
- deltacat/compute/converter/steps/convert.py +366 -0
- deltacat/compute/converter/steps/dedupe.py +94 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +132 -0
- deltacat/compute/converter/utils/converter_session_utils.py +175 -0
- deltacat/compute/converter/utils/iceberg_columns.py +87 -0
- deltacat/compute/converter/utils/io.py +203 -0
- deltacat/compute/converter/utils/s3u.py +148 -0
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +11 -1
- deltacat/constants.py +90 -1
- deltacat/docs/__init__.py +0 -0
- deltacat/docs/autogen/__init__.py +0 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +61 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
- deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
- deltacat/examples/hello_world.py +29 -0
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +116 -12
- deltacat/experimental/__init__.py +0 -0
- deltacat/experimental/catalog/__init__.py +0 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/experimental/catalog/iceberg/impl.py +399 -0
- deltacat/experimental/catalog/iceberg/overrides.py +72 -0
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/experimental/storage/iceberg/impl.py +739 -0
- deltacat/experimental/storage/iceberg/model.py +713 -0
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
- deltacat/experimental/storage/rivulet/dataset.py +745 -0
- deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
- deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
- deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
- deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
- deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
- deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
- deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
- deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
- deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
- deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
- deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
- deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
- deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
- deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
- deltacat/experimental/storage/rivulet/serializer.py +40 -0
- deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +138 -28
- deltacat/storage/interface.py +260 -155
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +3030 -0
- deltacat/storage/model/delta.py +142 -71
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -9
- deltacat/storage/model/manifest.py +643 -0
- deltacat/storage/model/metafile.py +1421 -0
- deltacat/storage/model/namespace.py +41 -18
- deltacat/storage/model/partition.py +443 -43
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +46 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +3160 -0
- deltacat/storage/model/shard.py +51 -0
- deltacat/storage/model/sort_key.py +210 -13
- deltacat/storage/model/stream.py +215 -80
- deltacat/storage/model/table.py +134 -29
- deltacat/storage/model/table_version.py +333 -46
- deltacat/storage/model/transaction.py +1733 -0
- deltacat/storage/model/transform.py +274 -58
- deltacat/storage/model/types.py +138 -16
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +321 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +23 -30
- deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
- deltacat/tests/compute/compactor/utils/test_io.py +125 -123
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
- deltacat/tests/compute/conftest.py +39 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +826 -0
- deltacat/tests/compute/converter/utils.py +132 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
- deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
- deltacat/tests/compute/test_compact_partition_params.py +16 -11
- deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +726 -46
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/conftest.py +25 -0
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
- deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +8204 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +2440 -0
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +479 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +24 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +653 -0
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1064 -0
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +50 -26
- deltacat/tests/test_utils/storage.py +256 -4
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +124 -34
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1107 -258
- deltacat/types/media.py +345 -37
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +2345 -47
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +61 -0
- deltacat/utils/filesystem.py +450 -0
- deltacat/utils/metafile_locator.py +74 -0
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1212 -178
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/aws/redshift/model/manifest.py +0 -394
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-1.1.38.dist-info/METADATA +0 -64
- deltacat-1.1.38.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
- /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
- /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
deltacat/io/read_api.py
DELETED
@@ -1,143 +0,0 @@
|
|
1
|
-
from typing import Any, Callable, Dict, List, Optional, Union
|
2
|
-
|
3
|
-
import pyarrow as pa
|
4
|
-
import s3fs
|
5
|
-
from ray.data import read_datasource
|
6
|
-
from ray.data._internal.arrow_block import ArrowRow
|
7
|
-
|
8
|
-
from deltacat import ContentType
|
9
|
-
from deltacat.io.aws.redshift.redshift_datasource import (
|
10
|
-
HivePartitionParser,
|
11
|
-
RedshiftDatasource,
|
12
|
-
RedshiftUnloadTextArgs,
|
13
|
-
S3PathType,
|
14
|
-
)
|
15
|
-
from deltacat.io.dataset import DeltacatDataset
|
16
|
-
from deltacat.utils.common import ReadKwargsProvider
|
17
|
-
|
18
|
-
|
19
|
-
def read_redshift(
|
20
|
-
paths: Union[str, List[str]],
|
21
|
-
*,
|
22
|
-
path_type: S3PathType = S3PathType.MANIFEST,
|
23
|
-
filesystem: Optional[Union[pa.fs.S3FileSystem, s3fs.S3FileSystem]] = None,
|
24
|
-
columns: Optional[List[str]] = None,
|
25
|
-
schema: Optional[pa.Schema] = None,
|
26
|
-
unload_text_args: RedshiftUnloadTextArgs = RedshiftUnloadTextArgs(),
|
27
|
-
partitioning: HivePartitionParser = None,
|
28
|
-
content_type_provider: Callable[[str], ContentType] = lambda p: ContentType.PARQUET
|
29
|
-
if p.endswith(".parquet")
|
30
|
-
else ContentType.CSV,
|
31
|
-
parallelism: int = 200,
|
32
|
-
ray_remote_args: Dict[str, Any] = None,
|
33
|
-
arrow_open_stream_args: Optional[Dict[str, Any]] = None,
|
34
|
-
pa_read_func_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
35
|
-
**kwargs,
|
36
|
-
) -> DeltacatDataset[ArrowRow]:
|
37
|
-
"""Reads Redshift UNLOAD results from either S3 Parquet or delimited text
|
38
|
-
files into a Ray Dataset.
|
39
|
-
|
40
|
-
Examples:
|
41
|
-
>>> # Read all files contained in a Redshift Manifest:
|
42
|
-
>>> import deltacat as dc
|
43
|
-
>>> dc.io.read_redshift("/bucket/dir/manifest")
|
44
|
-
|
45
|
-
>>> # Read all files matching the given key prefix. If this prefix
|
46
|
-
>>> # refers to multiple files, like s3://bucket/data.parquet,
|
47
|
-
>>> # s3://bucket/data.1.csv, etc. then all will be read. The dataset
|
48
|
-
>>> # schema will be inferred from the first parquet file and used for
|
49
|
-
>>> # explicit type conversion of all CSV files:
|
50
|
-
>>> dc.io.read_redshift(
|
51
|
-
>>> "s3://bucket/data.txt",
|
52
|
-
>>> path_type=S3PathType.PREFIX)
|
53
|
-
|
54
|
-
>>> # Read all files matching the given key prefix. If this prefix
|
55
|
-
>>> # refers to multiple files or folders, like s3://bucket/dir/,
|
56
|
-
>>> # s3://bucket/dir1/, s3://bucket/dir.txt, s3://bucket/dir.txt.1,
|
57
|
-
>>> # then all files and subfolder contents will be read.
|
58
|
-
>>> dc.io.read_redshift(
|
59
|
-
>>> "/bucket/dir",
|
60
|
-
>>> path_type=S3PathType.PREFIX)
|
61
|
-
|
62
|
-
>>> # Read multiple files and folders:
|
63
|
-
>>> dc.io.read_redshift(
|
64
|
-
>>> ["/bucket/file1", "/bucket/folder1/"],
|
65
|
-
>>> path_type=S3PathType.FILES_AND_FOLDERS)
|
66
|
-
|
67
|
-
>>> # Read multiple Parquet and CSV files. The dataset schema will be
|
68
|
-
>>> # inferred from the first parquet file and used for explicit type
|
69
|
-
>>> # conversion of all CSV files:
|
70
|
-
>>> dc.io.read_redshift(
|
71
|
-
>>> ["/bucket/file.parquet", "/bucket/file.csv"],
|
72
|
-
>>> path_type=S3PathType.FILES_AND_FOLDERS)
|
73
|
-
|
74
|
-
Args:
|
75
|
-
paths: Paths to S3 files and folders to read. If `path_type` is
|
76
|
-
`MANIFEST` then this must be an S3 Redshift Manifest JSON file. If
|
77
|
-
`path_type` is `PREFIX` then this must be a valid S3 key prefix.
|
78
|
-
All files matching the key prefix, including files in matching
|
79
|
-
subdirectories, will be read. Unless custom
|
80
|
-
`content_type_extensions` are specified, file content types will be
|
81
|
-
inferred by file extension with ".parquet" used for Parquet files,
|
82
|
-
and all others assumed to be delimited text (e.g. CSV). It's
|
83
|
-
recommended to specify the path to a manifest unloaded with the
|
84
|
-
VERBOSE option whenever possible to improve the correctness and
|
85
|
-
performance of Dataset reads, compute operations, and writes.
|
86
|
-
`FILES_AND_FOLDERS` is not recommended when reading thousands of
|
87
|
-
files due to its relatively high-latency.
|
88
|
-
path_type: Determines how the `paths` parameter is interpreted.
|
89
|
-
filesystem: The filesystem implementation to read from. This should be
|
90
|
-
either PyArrow's S3FileSystem or s3fs.
|
91
|
-
columns: A list of column names to read. Reads all columns if None or
|
92
|
-
empty.
|
93
|
-
schema: PyArrow schema used to determine delimited text column
|
94
|
-
names and types. If not specified and both Parquet and delimited
|
95
|
-
text files are read as input, then the first Parquet file schema
|
96
|
-
discovered is used instead.
|
97
|
-
unload_text_args: Arguments used when running Redshift `UNLOAD` to
|
98
|
-
text file formats (e.g. CSV). These arguments ensure that all input
|
99
|
-
text files will be correctly parsed. If not specified, then all
|
100
|
-
text files read are assumed to use Redshift UNLOAD's default
|
101
|
-
pipe-delimited text format.
|
102
|
-
partition_base_dir: Base directory to start searching for partitions
|
103
|
-
(exclusive). File paths outside of this directory will not be parsed
|
104
|
-
for partitions and automatically added to the dataset without passing
|
105
|
-
through any partition filter. Specify `None` or an empty string to
|
106
|
-
search for partitions in all file path directories.
|
107
|
-
partition_filter_fn: Callback used to filter `PARTITION` columns. Receives a
|
108
|
-
dictionary mapping partition keys to values as input, returns `True` to
|
109
|
-
read a partition, and `False` to skip it. Each partition key and value
|
110
|
-
is a string parsed directly from an S3 key using hive-style
|
111
|
-
partition directory names of the form "{key}={value}". For example:
|
112
|
-
``lambda x:
|
113
|
-
True if x["month"] == "January" and x["year"] == "2022" else False``
|
114
|
-
content_type_provider: Takes a file path as input and returns the file
|
115
|
-
content type as output.
|
116
|
-
parallelism: The requested parallelism of the read. Parallelism may be
|
117
|
-
limited by the number of files of the dataset.
|
118
|
-
ray_remote_args: kwargs passed to `ray.remote` in the read tasks.
|
119
|
-
arrow_open_stream_args: kwargs passed to to
|
120
|
-
`pa.fs.open_input_stream()`.
|
121
|
-
pa_read_func_kwargs_provider: Callback that takes a `ContentType` value
|
122
|
-
string as input, and provides read options to pass to either
|
123
|
-
`pa.csv.open_csv()` or `pa.parquet.read_table()` as output.
|
124
|
-
Returns:
|
125
|
-
Dataset holding Arrow records read from the specified paths.
|
126
|
-
"""
|
127
|
-
dataset = read_datasource(
|
128
|
-
RedshiftDatasource(),
|
129
|
-
parallelism=parallelism,
|
130
|
-
paths=paths,
|
131
|
-
content_type_provider=content_type_provider,
|
132
|
-
path_type=path_type,
|
133
|
-
filesystem=filesystem,
|
134
|
-
columns=columns,
|
135
|
-
schema=schema,
|
136
|
-
unload_args=unload_text_args,
|
137
|
-
partitioning=partitioning,
|
138
|
-
ray_remote_args=ray_remote_args,
|
139
|
-
open_stream_args=arrow_open_stream_args,
|
140
|
-
read_kwargs_provider=pa_read_func_kwargs_provider,
|
141
|
-
**kwargs,
|
142
|
-
)
|
143
|
-
return DeltacatDataset.from_dataset(dataset)
|
@@ -1,40 +0,0 @@
|
|
1
|
-
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
|
-
from __future__ import annotations
|
3
|
-
|
4
|
-
from typing import List, Optional
|
5
|
-
|
6
|
-
|
7
|
-
class DeleteParameters(dict):
|
8
|
-
"""
|
9
|
-
Contains all parameters required to support DELETEs
|
10
|
-
equality_column_names: List of column names that would be used to determine row equality for equality deletes. Relevant only to equality deletes
|
11
|
-
"""
|
12
|
-
|
13
|
-
@staticmethod
|
14
|
-
def of(
|
15
|
-
equality_column_names: Optional[List[str]] = None,
|
16
|
-
) -> DeleteParameters:
|
17
|
-
delete_parameters = DeleteParameters()
|
18
|
-
if equality_column_names is not None:
|
19
|
-
delete_parameters["equality_column_names"] = equality_column_names
|
20
|
-
return delete_parameters
|
21
|
-
|
22
|
-
@property
|
23
|
-
def equality_column_names(self) -> Optional[List[str]]:
|
24
|
-
return self.get("equality_column_names")
|
25
|
-
|
26
|
-
@staticmethod
|
27
|
-
def merge_delete_parameters(
|
28
|
-
delete_parameters: List[DeleteParameters],
|
29
|
-
) -> Optional[DeleteParameters]:
|
30
|
-
if len(delete_parameters) < 2:
|
31
|
-
return delete_parameters
|
32
|
-
equality_column_names = delete_parameters[0].equality_column_names
|
33
|
-
assert all(
|
34
|
-
delete_prev.equality_column_names == delete_curr.equality_column_names
|
35
|
-
for delete_prev, delete_curr in zip(
|
36
|
-
delete_parameters, delete_parameters[1:]
|
37
|
-
)
|
38
|
-
), "We cannot merge two delete parameters if their equality column names are different."
|
39
|
-
merge_delete_parameters = DeleteParameters.of(equality_column_names)
|
40
|
-
return merge_delete_parameters
|
@@ -1,71 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
from typing import List, Optional, Any
|
3
|
-
from deltacat.storage.model.transform import Transform
|
4
|
-
|
5
|
-
"""
|
6
|
-
An ordered list of partition values determining the values of
|
7
|
-
ordered transforms specified in the partition spec.
|
8
|
-
"""
|
9
|
-
PartitionValues = List[Any]
|
10
|
-
|
11
|
-
|
12
|
-
class PartitionFilter(dict):
|
13
|
-
"""
|
14
|
-
This class represents a filter for partitions.
|
15
|
-
It is used to filter partitions based on certain criteria.
|
16
|
-
"""
|
17
|
-
|
18
|
-
@staticmethod
|
19
|
-
def of(
|
20
|
-
partition_values: Optional[PartitionValues] = None,
|
21
|
-
) -> PartitionFilter:
|
22
|
-
"""
|
23
|
-
Creates a new PartitionFilter instance with the specified partition key and value.
|
24
|
-
"""
|
25
|
-
partition_filter = PartitionFilter()
|
26
|
-
partition_filter["partitionValues"] = partition_values
|
27
|
-
return partition_filter
|
28
|
-
|
29
|
-
@property
|
30
|
-
def partition_values(self) -> Optional[PartitionValues]:
|
31
|
-
return self.get("partitionValues")
|
32
|
-
|
33
|
-
|
34
|
-
class PartitionSpec(dict):
|
35
|
-
"""
|
36
|
-
This class determines how the underlying entities in the
|
37
|
-
hierarchy are partitioned. Stream partitions deltas and
|
38
|
-
delta partitions files.
|
39
|
-
"""
|
40
|
-
|
41
|
-
@staticmethod
|
42
|
-
def of(ordered_transforms: List[Transform] = None) -> PartitionSpec:
|
43
|
-
partition_spec = PartitionSpec()
|
44
|
-
partition_spec.ordered_transforms = ordered_transforms
|
45
|
-
return partition_spec
|
46
|
-
|
47
|
-
@property
|
48
|
-
def ordered_transforms(self) -> List[Transform]:
|
49
|
-
return self.get("orderedTransforms")
|
50
|
-
|
51
|
-
@ordered_transforms.setter
|
52
|
-
def ordered_transforms(self, value: List[Transform]) -> None:
|
53
|
-
self["orderedTransforms"] = value
|
54
|
-
|
55
|
-
|
56
|
-
class StreamPartitionSpec(PartitionSpec):
|
57
|
-
"""
|
58
|
-
A class representing a stream partition specification.
|
59
|
-
A stream partitions deltas into multiple different Partition
|
60
|
-
"""
|
61
|
-
|
62
|
-
pass
|
63
|
-
|
64
|
-
|
65
|
-
class DeltaPartitionSpec(PartitionSpec):
|
66
|
-
"""
|
67
|
-
A class representing delta partition specification.
|
68
|
-
The manifest entries in delta are partitioned based on this spec.
|
69
|
-
"""
|
70
|
-
|
71
|
-
pass
|
@@ -1,231 +0,0 @@
|
|
1
|
-
import pytest
|
2
|
-
import os
|
3
|
-
from moto import mock_s3
|
4
|
-
import boto3
|
5
|
-
from boto3.resources.base import ServiceResource
|
6
|
-
from deltacat.compute.compactor.utils.round_completion_file import (
|
7
|
-
read_round_completion_file,
|
8
|
-
write_round_completion_file,
|
9
|
-
)
|
10
|
-
from deltacat.tests.compute.test_util_common import get_test_partition_locator
|
11
|
-
from deltacat.compute.compactor import RoundCompletionInfo
|
12
|
-
|
13
|
-
RCF_BUCKET_NAME = "rcf-bucket"
|
14
|
-
|
15
|
-
|
16
|
-
@pytest.fixture(autouse=True, scope="module")
|
17
|
-
def mock_aws_credential():
|
18
|
-
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
|
19
|
-
os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
|
20
|
-
os.environ["AWS_SECURITY_TOKEN"] = "testing"
|
21
|
-
os.environ["AWS_SESSION_TOKEN"] = "testing"
|
22
|
-
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
|
23
|
-
yield
|
24
|
-
|
25
|
-
|
26
|
-
@pytest.fixture(autouse=True, scope="module")
|
27
|
-
def s3_resource(mock_aws_credential):
|
28
|
-
with mock_s3():
|
29
|
-
yield boto3.resource("s3")
|
30
|
-
|
31
|
-
|
32
|
-
@pytest.fixture(autouse=True, scope="function")
|
33
|
-
def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
|
34
|
-
s3_resource.create_bucket(
|
35
|
-
ACL="authenticated-read",
|
36
|
-
Bucket=RCF_BUCKET_NAME,
|
37
|
-
)
|
38
|
-
yield
|
39
|
-
s3_resource.Bucket(RCF_BUCKET_NAME).objects.all().delete()
|
40
|
-
|
41
|
-
|
42
|
-
class TestReadWriteRoundCompletionFile:
|
43
|
-
def test_read_when_rcf_written_without_destination(self):
|
44
|
-
"""
|
45
|
-
This test case tests the backward compatibility by successfully
|
46
|
-
reading the previously written rcf.
|
47
|
-
"""
|
48
|
-
|
49
|
-
source_locator = get_test_partition_locator("source")
|
50
|
-
destination_locator = get_test_partition_locator("destination")
|
51
|
-
|
52
|
-
expected_rcf = RoundCompletionInfo.of(
|
53
|
-
high_watermark=122,
|
54
|
-
compacted_delta_locator={},
|
55
|
-
compacted_pyarrow_write_result={},
|
56
|
-
sort_keys_bit_width=12,
|
57
|
-
)
|
58
|
-
|
59
|
-
rcf_url = write_round_completion_file(
|
60
|
-
RCF_BUCKET_NAME, source_locator, None, expected_rcf
|
61
|
-
)
|
62
|
-
|
63
|
-
rcf = read_round_completion_file(
|
64
|
-
RCF_BUCKET_NAME, source_locator, destination_locator
|
65
|
-
)
|
66
|
-
|
67
|
-
assert (
|
68
|
-
rcf_url == "s3://rcf-bucket/f9829af39770d904dbb811bd8f4e886dd307f507.json"
|
69
|
-
)
|
70
|
-
assert rcf == expected_rcf
|
71
|
-
|
72
|
-
def test_read_when_rcf_written_with_destination(self):
|
73
|
-
"""
|
74
|
-
This test case tests the backward compatibility by successfully
|
75
|
-
reading the previously written rcf.
|
76
|
-
"""
|
77
|
-
|
78
|
-
source_locator = get_test_partition_locator("source")
|
79
|
-
destination_locator = get_test_partition_locator("destination")
|
80
|
-
|
81
|
-
expected_rcf = RoundCompletionInfo.of(
|
82
|
-
high_watermark=122,
|
83
|
-
compacted_delta_locator={},
|
84
|
-
compacted_pyarrow_write_result={},
|
85
|
-
sort_keys_bit_width=12,
|
86
|
-
)
|
87
|
-
|
88
|
-
rcf_url = write_round_completion_file(
|
89
|
-
RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf
|
90
|
-
)
|
91
|
-
|
92
|
-
rcf = read_round_completion_file(
|
93
|
-
RCF_BUCKET_NAME, source_locator, destination_locator
|
94
|
-
)
|
95
|
-
|
96
|
-
assert (
|
97
|
-
rcf_url
|
98
|
-
== "s3://rcf-bucket/f9829af39770d904dbb811bd8f4e886dd307f507/e9939deadc091b3289a2eb0ca56b1ba86b9892f4.json"
|
99
|
-
)
|
100
|
-
assert rcf == expected_rcf
|
101
|
-
|
102
|
-
def test_read_without_destination_when_rcf_written_with_destination(self):
|
103
|
-
"""
|
104
|
-
This test case tests the backward compatibility by successfully
|
105
|
-
reading the previously written rcf.
|
106
|
-
"""
|
107
|
-
|
108
|
-
source_locator = get_test_partition_locator("source")
|
109
|
-
destination_locator = get_test_partition_locator("destination")
|
110
|
-
|
111
|
-
expected_rcf = RoundCompletionInfo.of(
|
112
|
-
high_watermark=122,
|
113
|
-
compacted_delta_locator={},
|
114
|
-
compacted_pyarrow_write_result={},
|
115
|
-
sort_keys_bit_width=12,
|
116
|
-
)
|
117
|
-
|
118
|
-
write_round_completion_file(
|
119
|
-
RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf
|
120
|
-
)
|
121
|
-
|
122
|
-
rcf = read_round_completion_file(RCF_BUCKET_NAME, source_locator, None)
|
123
|
-
|
124
|
-
assert rcf is None
|
125
|
-
|
126
|
-
def test_read_without_destination_when_rcf_written_without_destination(self):
|
127
|
-
"""
|
128
|
-
This test case tests the backward compatibility by successfully
|
129
|
-
reading the previously written rcf.
|
130
|
-
"""
|
131
|
-
|
132
|
-
source_locator = get_test_partition_locator("source")
|
133
|
-
|
134
|
-
expected_rcf = RoundCompletionInfo.of(
|
135
|
-
high_watermark=122,
|
136
|
-
compacted_delta_locator={},
|
137
|
-
compacted_pyarrow_write_result={},
|
138
|
-
sort_keys_bit_width=12,
|
139
|
-
)
|
140
|
-
|
141
|
-
write_round_completion_file(RCF_BUCKET_NAME, source_locator, None, expected_rcf)
|
142
|
-
|
143
|
-
rcf = read_round_completion_file(RCF_BUCKET_NAME, source_locator, None)
|
144
|
-
|
145
|
-
assert rcf == expected_rcf
|
146
|
-
|
147
|
-
def test_read_when_rcf_written_both_with_and_without_destination(self):
|
148
|
-
"""
|
149
|
-
This test case tests the backward compatibility by successfully
|
150
|
-
reading the previously written rcf.
|
151
|
-
"""
|
152
|
-
|
153
|
-
source_locator = get_test_partition_locator("source")
|
154
|
-
destination_locator = get_test_partition_locator("destination")
|
155
|
-
|
156
|
-
expected_rcf = RoundCompletionInfo.of(
|
157
|
-
high_watermark=122,
|
158
|
-
compacted_delta_locator={},
|
159
|
-
compacted_pyarrow_write_result={},
|
160
|
-
sort_keys_bit_width=12,
|
161
|
-
)
|
162
|
-
|
163
|
-
expected_rcf_2 = RoundCompletionInfo.of(
|
164
|
-
high_watermark=1223,
|
165
|
-
compacted_delta_locator={},
|
166
|
-
compacted_pyarrow_write_result={},
|
167
|
-
sort_keys_bit_width=1233,
|
168
|
-
)
|
169
|
-
|
170
|
-
write_round_completion_file(RCF_BUCKET_NAME, source_locator, None, expected_rcf)
|
171
|
-
|
172
|
-
write_round_completion_file(
|
173
|
-
RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf_2
|
174
|
-
)
|
175
|
-
|
176
|
-
rcf = read_round_completion_file(
|
177
|
-
RCF_BUCKET_NAME, source_locator, destination_locator
|
178
|
-
)
|
179
|
-
|
180
|
-
assert rcf == expected_rcf_2
|
181
|
-
|
182
|
-
def test_read_when_none_destination_partition_id(self):
|
183
|
-
|
184
|
-
source_locator = get_test_partition_locator("source")
|
185
|
-
destination_locator = get_test_partition_locator(None)
|
186
|
-
|
187
|
-
expected_rcf = RoundCompletionInfo.of(
|
188
|
-
high_watermark=122,
|
189
|
-
compacted_delta_locator={},
|
190
|
-
compacted_pyarrow_write_result={},
|
191
|
-
sort_keys_bit_width=12,
|
192
|
-
)
|
193
|
-
|
194
|
-
write_round_completion_file(
|
195
|
-
RCF_BUCKET_NAME, source_locator, destination_locator, expected_rcf
|
196
|
-
)
|
197
|
-
|
198
|
-
rcf = read_round_completion_file(
|
199
|
-
RCF_BUCKET_NAME, source_locator, destination_locator
|
200
|
-
)
|
201
|
-
|
202
|
-
assert rcf == expected_rcf
|
203
|
-
|
204
|
-
def test_write_when_custom_url_is_passed(self):
|
205
|
-
"""
|
206
|
-
This test case tests the backward compatibility by successfully
|
207
|
-
reading the previously written rcf.
|
208
|
-
"""
|
209
|
-
|
210
|
-
source_locator = get_test_partition_locator("source")
|
211
|
-
|
212
|
-
expected_rcf = RoundCompletionInfo.of(
|
213
|
-
high_watermark=122,
|
214
|
-
compacted_delta_locator={},
|
215
|
-
compacted_pyarrow_write_result={},
|
216
|
-
sort_keys_bit_width=12,
|
217
|
-
)
|
218
|
-
|
219
|
-
completion_file_s3_url = f"s3://{RCF_BUCKET_NAME}/test.json"
|
220
|
-
rcf_url = write_round_completion_file(
|
221
|
-
RCF_BUCKET_NAME,
|
222
|
-
source_locator,
|
223
|
-
None,
|
224
|
-
expected_rcf,
|
225
|
-
completion_file_s3_url=completion_file_s3_url,
|
226
|
-
)
|
227
|
-
|
228
|
-
rcf = read_round_completion_file(RCF_BUCKET_NAME, source_locator, None)
|
229
|
-
|
230
|
-
assert rcf_url == completion_file_s3_url
|
231
|
-
assert rcf is None
|