pypaimon 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypaimon/__init__.py +38 -0
- pypaimon/acceptance/__init__.py +23 -0
- pypaimon/acceptance/incremental_diff_acceptance_test.py +238 -0
- pypaimon/api/__init__.py +16 -0
- pypaimon/api/api_request.py +96 -0
- pypaimon/api/api_response.py +329 -0
- pypaimon/api/auth/__init__.py +40 -0
- pypaimon/api/auth/base.py +48 -0
- pypaimon/api/auth/bearer.py +34 -0
- pypaimon/api/auth/dlf_provider.py +109 -0
- pypaimon/api/auth/dlf_signer.py +472 -0
- pypaimon/api/auth/factory.py +104 -0
- pypaimon/api/client.py +394 -0
- pypaimon/api/resource_paths.py +85 -0
- pypaimon/api/rest_api.py +441 -0
- pypaimon/api/rest_exception.py +111 -0
- pypaimon/api/rest_util.py +66 -0
- pypaimon/api/token_loader.py +219 -0
- pypaimon/api/typedef.py +29 -0
- pypaimon/branch/__init__.py +22 -0
- pypaimon/branch/branch_manager.py +191 -0
- pypaimon/branch/catalog_branch_manager.py +151 -0
- pypaimon/branch/filesystem_branch_manager.py +332 -0
- pypaimon/catalog/__init__.py +17 -0
- pypaimon/catalog/catalog.py +295 -0
- pypaimon/catalog/catalog_context.py +34 -0
- pypaimon/catalog/catalog_environment.py +120 -0
- pypaimon/catalog/catalog_exception.py +180 -0
- pypaimon/catalog/catalog_factory.py +44 -0
- pypaimon/catalog/catalog_loader.py +24 -0
- pypaimon/catalog/database.py +35 -0
- pypaimon/catalog/filesystem_catalog.py +345 -0
- pypaimon/catalog/filesystem_catalog_loader.py +67 -0
- pypaimon/catalog/rest/__init__.py +17 -0
- pypaimon/catalog/rest/property_change.py +53 -0
- pypaimon/catalog/rest/rest_catalog.py +487 -0
- pypaimon/catalog/rest/rest_catalog_loader.py +69 -0
- pypaimon/catalog/rest/rest_token.py +38 -0
- pypaimon/catalog/rest/rest_token_file_io.py +278 -0
- pypaimon/catalog/rest/table_metadata.py +40 -0
- pypaimon/catalog/table_rollback.py +43 -0
- pypaimon/changelog/__init__.py +23 -0
- pypaimon/changelog/changelog.py +85 -0
- pypaimon/changelog/changelog_manager.py +353 -0
- pypaimon/cli/__init__.py +20 -0
- pypaimon/cli/cli.py +137 -0
- pypaimon/cli/cli_catalog.py +65 -0
- pypaimon/cli/cli_db.py +279 -0
- pypaimon/cli/cli_table.py +844 -0
- pypaimon/cli/where_parser.py +376 -0
- pypaimon/common/__init__.py +17 -0
- pypaimon/common/delta_varint_compressor.py +125 -0
- pypaimon/common/external_path_provider.py +43 -0
- pypaimon/common/file_io.py +277 -0
- pypaimon/common/identifier.py +107 -0
- pypaimon/common/json_util.py +133 -0
- pypaimon/common/memory_size.py +201 -0
- pypaimon/common/options/__init__.py +30 -0
- pypaimon/common/options/config.py +85 -0
- pypaimon/common/options/config_option.py +143 -0
- pypaimon/common/options/config_options.py +215 -0
- pypaimon/common/options/core_options.py +586 -0
- pypaimon/common/options/options.py +62 -0
- pypaimon/common/options/options_utils.py +167 -0
- pypaimon/common/predicate.py +454 -0
- pypaimon/common/predicate_builder.py +138 -0
- pypaimon/common/time_utils.py +81 -0
- pypaimon/common/uri_reader.py +169 -0
- pypaimon/consumer/__init__.py +36 -0
- pypaimon/consumer/consumer.py +76 -0
- pypaimon/consumer/consumer_manager.py +195 -0
- pypaimon/data/__init__.py +21 -0
- pypaimon/data/timestamp.py +167 -0
- pypaimon/deletionvectors/__init__.py +27 -0
- pypaimon/deletionvectors/apply_deletion_vector_reader.py +128 -0
- pypaimon/deletionvectors/bitmap_deletion_vector.py +165 -0
- pypaimon/deletionvectors/deletion_vector.py +143 -0
- pypaimon/filesystem/__init__.py +16 -0
- pypaimon/filesystem/local.py +49 -0
- pypaimon/filesystem/local_file_io.py +455 -0
- pypaimon/filesystem/pvfs.py +892 -0
- pypaimon/filesystem/pyarrow_file_io.py +583 -0
- pypaimon/globalindex/__init__.py +45 -0
- pypaimon/globalindex/btree/__init__.py +25 -0
- pypaimon/globalindex/btree/block_aligned_type.py +36 -0
- pypaimon/globalindex/btree/block_entry.py +55 -0
- pypaimon/globalindex/btree/block_handle.py +41 -0
- pypaimon/globalindex/btree/block_reader.py +254 -0
- pypaimon/globalindex/btree/btree_file_footer.py +129 -0
- pypaimon/globalindex/btree/btree_index_meta.py +63 -0
- pypaimon/globalindex/btree/btree_index_reader.py +423 -0
- pypaimon/globalindex/btree/key_serializer.py +145 -0
- pypaimon/globalindex/btree/memory_slice_input.py +162 -0
- pypaimon/globalindex/btree/sst_file_reader.py +198 -0
- pypaimon/globalindex/global_index_evaluator.py +176 -0
- pypaimon/globalindex/global_index_meta.py +74 -0
- pypaimon/globalindex/global_index_reader.py +108 -0
- pypaimon/globalindex/global_index_result.py +100 -0
- pypaimon/globalindex/global_index_scanner.py +162 -0
- pypaimon/globalindex/indexed_split.py +142 -0
- pypaimon/globalindex/vector_search.py +92 -0
- pypaimon/globalindex/vector_search_result.py +136 -0
- pypaimon/index/__init__.py +17 -0
- pypaimon/index/deletion_vector_meta.py +40 -0
- pypaimon/index/index_file_handler.py +68 -0
- pypaimon/index/index_file_meta.py +57 -0
- pypaimon/manifest/__init__.py +26 -0
- pypaimon/manifest/fastavro_py36_compat.py +77 -0
- pypaimon/manifest/index_manifest_entry.py +62 -0
- pypaimon/manifest/index_manifest_file.py +176 -0
- pypaimon/manifest/manifest_file_manager.py +229 -0
- pypaimon/manifest/manifest_list_manager.py +134 -0
- pypaimon/manifest/schema/__init__.py +17 -0
- pypaimon/manifest/schema/data_file_meta.py +261 -0
- pypaimon/manifest/schema/file_entry.py +127 -0
- pypaimon/manifest/schema/manifest_entry.py +77 -0
- pypaimon/manifest/schema/manifest_file_meta.py +52 -0
- pypaimon/manifest/schema/simple_stats.py +74 -0
- pypaimon/manifest/simple_stats_evolution.py +123 -0
- pypaimon/manifest/simple_stats_evolutions.py +71 -0
- pypaimon/read/__init__.py +17 -0
- pypaimon/read/datasource/__init__.py +25 -0
- pypaimon/read/datasource/ray_datasource.py +230 -0
- pypaimon/read/datasource/torch_dataset.py +223 -0
- pypaimon/read/interval_partition.py +130 -0
- pypaimon/read/partition_info.py +46 -0
- pypaimon/read/plan.py +32 -0
- pypaimon/read/push_down_utils.py +126 -0
- pypaimon/read/read_builder.py +78 -0
- pypaimon/read/reader/__init__.py +17 -0
- pypaimon/read/reader/blob_descriptor_convert_reader.py +81 -0
- pypaimon/read/reader/concat_batch_reader.py +228 -0
- pypaimon/read/reader/concat_record_reader.py +50 -0
- pypaimon/read/reader/data_file_batch_reader.py +235 -0
- pypaimon/read/reader/drop_delete_reader.py +59 -0
- pypaimon/read/reader/empty_record_reader.py +37 -0
- pypaimon/read/reader/field_bunch.py +126 -0
- pypaimon/read/reader/filter_record_batch_reader.py +88 -0
- pypaimon/read/reader/filter_record_reader.py +61 -0
- pypaimon/read/reader/format_avro_reader.py +78 -0
- pypaimon/read/reader/format_blob_reader.py +201 -0
- pypaimon/read/reader/format_lance_reader.py +72 -0
- pypaimon/read/reader/format_pyarrow_reader.py +129 -0
- pypaimon/read/reader/iface/__init__.py +17 -0
- pypaimon/read/reader/iface/record_batch_reader.py +137 -0
- pypaimon/read/reader/iface/record_iterator.py +39 -0
- pypaimon/read/reader/iface/record_reader.py +42 -0
- pypaimon/read/reader/key_value_unwrap_reader.py +63 -0
- pypaimon/read/reader/key_value_wrap_reader.py +70 -0
- pypaimon/read/reader/lance_utils.py +91 -0
- pypaimon/read/reader/row_range_filter_record_reader.py +67 -0
- pypaimon/read/reader/shard_batch_reader.py +61 -0
- pypaimon/read/reader/sort_merge_reader.py +213 -0
- pypaimon/read/scanner/__init__.py +17 -0
- pypaimon/read/scanner/append_table_split_generator.py +173 -0
- pypaimon/read/scanner/changelog_follow_up_scanner.py +29 -0
- pypaimon/read/scanner/data_evolution_split_generator.py +355 -0
- pypaimon/read/scanner/delta_follow_up_scanner.py +28 -0
- pypaimon/read/scanner/file_scanner.py +469 -0
- pypaimon/read/scanner/follow_up_scanner.py +30 -0
- pypaimon/read/scanner/incremental_diff_scanner.py +100 -0
- pypaimon/read/scanner/primary_key_table_split_generator.py +126 -0
- pypaimon/read/scanner/split_generator.py +236 -0
- pypaimon/read/sliced_split.py +188 -0
- pypaimon/read/split.py +202 -0
- pypaimon/read/split_read.py +715 -0
- pypaimon/read/stream_read_builder.py +149 -0
- pypaimon/read/streaming_table_scan.py +416 -0
- pypaimon/read/table_read.py +290 -0
- pypaimon/read/table_scan.py +133 -0
- pypaimon/sample/__init__.py +17 -0
- pypaimon/sample/data/__init__.py +17 -0
- pypaimon/sample/oss_read_and_write.py +70 -0
- pypaimon/sample/rest_catalog_blob_as_descriptor_sample.py +155 -0
- pypaimon/sample/rest_catalog_ray_data_sample.py +245 -0
- pypaimon/sample/rest_catalog_ray_lance_sample.py +212 -0
- pypaimon/sample/rest_catalog_ray_sink_sample.py +153 -0
- pypaimon/sample/rest_catalog_read_write_sample.py +108 -0
- pypaimon/schema/__init__.py +17 -0
- pypaimon/schema/data_types.py +705 -0
- pypaimon/schema/schema.py +95 -0
- pypaimon/schema/schema_change.py +289 -0
- pypaimon/schema/schema_manager.py +439 -0
- pypaimon/schema/table_schema.py +146 -0
- pypaimon/snapshot/__init__.py +17 -0
- pypaimon/snapshot/catalog_snapshot_commit.py +84 -0
- pypaimon/snapshot/renaming_snapshot_commit.py +95 -0
- pypaimon/snapshot/snapshot.py +51 -0
- pypaimon/snapshot/snapshot_commit.py +102 -0
- pypaimon/snapshot/snapshot_loader.py +57 -0
- pypaimon/snapshot/snapshot_manager.py +283 -0
- pypaimon/snapshot/table_snapshot.py +38 -0
- pypaimon/snapshot/time_travel_util.py +75 -0
- pypaimon/table/__init__.py +17 -0
- pypaimon/table/bucket_mode.py +32 -0
- pypaimon/table/file_store_table.py +447 -0
- pypaimon/table/format/__init__.py +36 -0
- pypaimon/table/format/format_batch_write_builder.py +55 -0
- pypaimon/table/format/format_commit_message.py +26 -0
- pypaimon/table/format/format_data_split.py +30 -0
- pypaimon/table/format/format_read_builder.py +82 -0
- pypaimon/table/format/format_table.py +104 -0
- pypaimon/table/format/format_table_commit.py +66 -0
- pypaimon/table/format/format_table_read.py +273 -0
- pypaimon/table/format/format_table_scan.py +130 -0
- pypaimon/table/format/format_table_write.py +245 -0
- pypaimon/table/iceberg/__init__.py +19 -0
- pypaimon/table/iceberg/iceberg_table.py +109 -0
- pypaimon/table/instant.py +135 -0
- pypaimon/table/object/__init__.py +29 -0
- pypaimon/table/object/object_read_builder.py +50 -0
- pypaimon/table/object/object_split.py +31 -0
- pypaimon/table/object/object_table.py +103 -0
- pypaimon/table/object/object_table_read.py +159 -0
- pypaimon/table/object/object_table_scan.py +37 -0
- pypaimon/table/rollback_helper.py +93 -0
- pypaimon/table/row/__init__.py +17 -0
- pypaimon/table/row/binary_row.py +58 -0
- pypaimon/table/row/blob.py +286 -0
- pypaimon/table/row/generic_row.py +454 -0
- pypaimon/table/row/internal_row.py +54 -0
- pypaimon/table/row/key_value.py +57 -0
- pypaimon/table/row/offset_row.py +54 -0
- pypaimon/table/row/projected_row.py +76 -0
- pypaimon/table/row/row_kind.py +61 -0
- pypaimon/table/source/__init__.py +16 -0
- pypaimon/table/source/deletion_file.py +49 -0
- pypaimon/table/special_fields.py +83 -0
- pypaimon/table/table.py +43 -0
- pypaimon/tag/__init__.py +21 -0
- pypaimon/tag/tag.py +44 -0
- pypaimon/tag/tag_manager.py +233 -0
- pypaimon/tests/__init__.py +16 -0
- pypaimon/tests/binary_row_test.py +339 -0
- pypaimon/tests/blob_table_test.py +3029 -0
- pypaimon/tests/blob_test.py +1162 -0
- pypaimon/tests/branch/__init__.py +16 -0
- pypaimon/tests/branch/catalog_branch_manager_test.py +199 -0
- pypaimon/tests/branch/file_store_table_branch_manager_test.py +177 -0
- pypaimon/tests/branch_manager_test.py +183 -0
- pypaimon/tests/changelog_follow_up_scanner_test.py +51 -0
- pypaimon/tests/changelog_manager_test.py +137 -0
- pypaimon/tests/cli_db_test.py +338 -0
- pypaimon/tests/cli_table_test.py +1356 -0
- pypaimon/tests/consumer_manager_test.py +315 -0
- pypaimon/tests/consumer_test.py +204 -0
- pypaimon/tests/data_evolution_test.py +1350 -0
- pypaimon/tests/data_types_test.py +146 -0
- pypaimon/tests/delta_varint_compressor_test.py +379 -0
- pypaimon/tests/e2e/__init__.py +16 -0
- pypaimon/tests/e2e/java_py_read_write_test.py +456 -0
- pypaimon/tests/external_paths_test.py +427 -0
- pypaimon/tests/file_io_test.py +462 -0
- pypaimon/tests/file_store_commit_test.py +467 -0
- pypaimon/tests/filesystem_catalog_test.py +368 -0
- pypaimon/tests/follow_up_scanner_test.py +59 -0
- pypaimon/tests/identifier_test.py +97 -0
- pypaimon/tests/lance_utils_test.py +85 -0
- pypaimon/tests/manifest/__init__.py +17 -0
- pypaimon/tests/manifest/manifest_entry_identifier_test.py +259 -0
- pypaimon/tests/manifest/manifest_manager_test.py +196 -0
- pypaimon/tests/manifest/manifest_schema_test.py +233 -0
- pypaimon/tests/predicates_test.py +570 -0
- pypaimon/tests/pvfs_test.py +201 -0
- pypaimon/tests/py36/__init__.py +17 -0
- pypaimon/tests/py36/ao_predicate_test.py +252 -0
- pypaimon/tests/py36/ao_simple_test.py +480 -0
- pypaimon/tests/py36/data_evolution_test.py +488 -0
- pypaimon/tests/py36/pyarrow_compat.py +40 -0
- pypaimon/tests/py36/reader_predicate_test.py +93 -0
- pypaimon/tests/py36/rest_ao_read_write_test.py +915 -0
- pypaimon/tests/range_test.py +86 -0
- pypaimon/tests/ray_data_test.py +701 -0
- pypaimon/tests/ray_sink_test.py +342 -0
- pypaimon/tests/reader_append_only_test.py +578 -0
- pypaimon/tests/reader_base_test.py +1409 -0
- pypaimon/tests/reader_predicate_test.py +117 -0
- pypaimon/tests/reader_primary_key_test.py +586 -0
- pypaimon/tests/reader_split_generator_test.py +334 -0
- pypaimon/tests/rest/__init__.py +17 -0
- pypaimon/tests/rest/api_test.py +463 -0
- pypaimon/tests/rest/client_test.py +62 -0
- pypaimon/tests/rest/dlf_signer_test.py +155 -0
- pypaimon/tests/rest/rest_base_test.py +347 -0
- pypaimon/tests/rest/rest_catalog_commit_snapshot_test.py +342 -0
- pypaimon/tests/rest/rest_catalog_test.py +373 -0
- pypaimon/tests/rest/rest_format_table_test.py +613 -0
- pypaimon/tests/rest/rest_iceberg_table_test.py +81 -0
- pypaimon/tests/rest/rest_object_table_test.py +250 -0
- pypaimon/tests/rest/rest_permission_test.py +199 -0
- pypaimon/tests/rest/rest_read_write_test.py +689 -0
- pypaimon/tests/rest/rest_server.py +1165 -0
- pypaimon/tests/rest/rest_simple_test.py +859 -0
- pypaimon/tests/rest/rest_token_file_io_test.py +309 -0
- pypaimon/tests/rest/test_exponential_retry_strategy.py +64 -0
- pypaimon/tests/scanner/incremental_diff_scanner_test.py +390 -0
- pypaimon/tests/schema_evolution_read_test.py +434 -0
- pypaimon/tests/schema_manager_test.py +91 -0
- pypaimon/tests/serializable_test.py +113 -0
- pypaimon/tests/shard_table_updator_test.py +596 -0
- pypaimon/tests/snapshot_manager_test.py +143 -0
- pypaimon/tests/stream_read_builder_test.py +149 -0
- pypaimon/tests/streaming_table_scan_test.py +617 -0
- pypaimon/tests/table/file_store_table_test.py +439 -0
- pypaimon/tests/table/simple_table_test.py +687 -0
- pypaimon/tests/table_update_test.py +1057 -0
- pypaimon/tests/table_upsert_by_key_test.py +739 -0
- pypaimon/tests/torch_read_test.py +684 -0
- pypaimon/tests/uri_reader_factory_test.py +228 -0
- pypaimon/tests/where_parser_test.py +404 -0
- pypaimon/tests/write/simple_hash_bucket_assigner_test.py +60 -0
- pypaimon/tests/write/table_write_test.py +438 -0
- pypaimon/utils/__init__.py +17 -0
- pypaimon/utils/file_store_path_factory.py +156 -0
- pypaimon/utils/range.py +206 -0
- pypaimon/utils/range_helper.py +133 -0
- pypaimon/utils/roaring_bitmap.py +251 -0
- pypaimon/write/__init__.py +17 -0
- pypaimon/write/blob_format_writer.py +107 -0
- pypaimon/write/commit/__init__.py +16 -0
- pypaimon/write/commit/commit_rollback.py +62 -0
- pypaimon/write/commit/commit_scanner.py +127 -0
- pypaimon/write/commit/conflict_detection.py +203 -0
- pypaimon/write/commit_message.py +33 -0
- pypaimon/write/file_store_commit.py +728 -0
- pypaimon/write/file_store_write.py +142 -0
- pypaimon/write/ray_datasink.py +194 -0
- pypaimon/write/row_key_extractor.py +332 -0
- pypaimon/write/table_commit.py +99 -0
- pypaimon/write/table_update.py +271 -0
- pypaimon/write/table_update_by_row_id.py +323 -0
- pypaimon/write/table_upsert_by_key.py +378 -0
- pypaimon/write/table_write.py +147 -0
- pypaimon/write/write_builder.py +82 -0
- pypaimon/write/writer/__init__.py +17 -0
- pypaimon/write/writer/append_only_data_writer.py +31 -0
- pypaimon/write/writer/blob_file_writer.py +117 -0
- pypaimon/write/writer/blob_writer.py +247 -0
- pypaimon/write/writer/data_blob_writer.py +372 -0
- pypaimon/write/writer/data_writer.py +329 -0
- pypaimon/write/writer/key_value_data_writer.py +72 -0
- pypaimon-1.4.0.dist-info/METADATA +62 -0
- pypaimon-1.4.0.dist-info/RECORD +347 -0
- pypaimon-1.4.0.dist-info/WHEEL +5 -0
- pypaimon-1.4.0.dist-info/entry_points.txt +2 -0
- pypaimon-1.4.0.dist-info/licenses/LICENSE +202 -0
- pypaimon-1.4.0.dist-info/top_level.txt +1 -0
pypaimon/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
if sys.version_info[:2] == (3, 6):
|
|
21
|
+
try:
|
|
22
|
+
from pypaimon.manifest import fastavro_py36_compat # noqa: F401
|
|
23
|
+
except ImportError:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
from pypaimon.catalog.catalog_factory import CatalogFactory
|
|
27
|
+
from pypaimon.filesystem.pvfs import PaimonVirtualFileSystem
|
|
28
|
+
from pypaimon.schema.schema import Schema
|
|
29
|
+
from pypaimon.tag.tag import Tag
|
|
30
|
+
from pypaimon.tag.tag_manager import TagManager
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"PaimonVirtualFileSystem",
|
|
34
|
+
"CatalogFactory",
|
|
35
|
+
"Schema",
|
|
36
|
+
"Tag",
|
|
37
|
+
"TagManager",
|
|
38
|
+
]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
################################################################################
|
|
18
|
+
"""
|
|
19
|
+
Acceptance tests for pypaimon.
|
|
20
|
+
|
|
21
|
+
These tests use real file I/O with local temp filesystem to verify
|
|
22
|
+
end-to-end behavior, as opposed to unit tests which use mocks.
|
|
23
|
+
"""
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
################################################################################
|
|
18
|
+
"""
|
|
19
|
+
Acceptance tests for IncrementalDiffScanner.
|
|
20
|
+
|
|
21
|
+
These tests verify that the diff approach (reading 2 base_manifest_lists)
|
|
22
|
+
returns the same data as the delta approach (reading N delta_manifest_lists).
|
|
23
|
+
|
|
24
|
+
Uses real file I/O with local temp filesystem.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import os
|
|
28
|
+
import shutil
|
|
29
|
+
import tempfile
|
|
30
|
+
import unittest
|
|
31
|
+
|
|
32
|
+
import pyarrow as pa
|
|
33
|
+
|
|
34
|
+
from pypaimon import CatalogFactory, Schema
|
|
35
|
+
from pypaimon.manifest.manifest_file_manager import ManifestFileManager
|
|
36
|
+
from pypaimon.manifest.manifest_list_manager import ManifestListManager
|
|
37
|
+
from pypaimon.read.scanner.append_table_split_generator import \
|
|
38
|
+
AppendTableSplitGenerator
|
|
39
|
+
from pypaimon.read.scanner.incremental_diff_scanner import \
|
|
40
|
+
IncrementalDiffScanner
|
|
41
|
+
from pypaimon.snapshot.snapshot_manager import SnapshotManager
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class IncrementalDiffAcceptanceTest(unittest.TestCase):
|
|
45
|
+
"""Acceptance tests for diff vs delta equivalence with real data."""
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def setUpClass(cls):
|
|
49
|
+
cls.tempdir = tempfile.mkdtemp()
|
|
50
|
+
cls.warehouse = os.path.join(cls.tempdir, 'warehouse')
|
|
51
|
+
cls.catalog = CatalogFactory.create({'warehouse': cls.warehouse})
|
|
52
|
+
cls.catalog.create_database('default', True)
|
|
53
|
+
|
|
54
|
+
cls.pa_schema = pa.schema([
|
|
55
|
+
('id', pa.int32()),
|
|
56
|
+
('value', pa.string()),
|
|
57
|
+
('partition_col', pa.string())
|
|
58
|
+
])
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def tearDownClass(cls):
|
|
62
|
+
shutil.rmtree(cls.tempdir, ignore_errors=True)
|
|
63
|
+
|
|
64
|
+
def _create_table_with_snapshots(self, name, num_snapshots=5, partition_keys=None):
|
|
65
|
+
"""Create a table and write num_snapshots of data.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Tuple of (table, expected_data_per_snapshot)
|
|
69
|
+
"""
|
|
70
|
+
schema = Schema.from_pyarrow_schema(self.pa_schema, partition_keys=partition_keys)
|
|
71
|
+
self.catalog.create_table(f'default.{name}', schema, False)
|
|
72
|
+
table = self.catalog.get_table(f'default.{name}')
|
|
73
|
+
|
|
74
|
+
all_data = []
|
|
75
|
+
for snap_id in range(1, num_snapshots + 1):
|
|
76
|
+
write_builder = table.new_batch_write_builder()
|
|
77
|
+
table_write = write_builder.new_write()
|
|
78
|
+
table_commit = write_builder.new_commit()
|
|
79
|
+
|
|
80
|
+
data = {
|
|
81
|
+
'id': [snap_id * 10 + i for i in range(5)],
|
|
82
|
+
'value': [f'snap{snap_id}_row{i}' for i in range(5)],
|
|
83
|
+
'partition_col': ['p1' if i % 2 == 0 else 'p2' for i in range(5)]
|
|
84
|
+
}
|
|
85
|
+
all_data.append(data)
|
|
86
|
+
|
|
87
|
+
pa_table = pa.Table.from_pydict(data, schema=self.pa_schema)
|
|
88
|
+
table_write.write_arrow(pa_table)
|
|
89
|
+
table_commit.commit(table_write.prepare_commit())
|
|
90
|
+
table_write.close()
|
|
91
|
+
table_commit.close()
|
|
92
|
+
|
|
93
|
+
return table, all_data
|
|
94
|
+
|
|
95
|
+
def _read_via_diff(self, table, start_snap_id, end_snap_id):
|
|
96
|
+
"""Read data using IncrementalDiffScanner between two snapshots."""
|
|
97
|
+
snapshot_manager = SnapshotManager(table)
|
|
98
|
+
start_snapshot = snapshot_manager.get_snapshot_by_id(start_snap_id)
|
|
99
|
+
end_snapshot = snapshot_manager.get_snapshot_by_id(end_snap_id)
|
|
100
|
+
|
|
101
|
+
scanner = IncrementalDiffScanner(table)
|
|
102
|
+
plan = scanner.scan(start_snapshot, end_snapshot)
|
|
103
|
+
|
|
104
|
+
table_read = table.new_read_builder().new_read()
|
|
105
|
+
return table_read.to_arrow(plan.splits())
|
|
106
|
+
|
|
107
|
+
def _read_via_delta(self, table, start_snap_id, end_snap_id):
|
|
108
|
+
"""Read data by iterating delta_manifest_lists between two snapshots."""
|
|
109
|
+
snapshot_manager = SnapshotManager(table)
|
|
110
|
+
manifest_list_manager = ManifestListManager(table)
|
|
111
|
+
manifest_file_manager = ManifestFileManager(table)
|
|
112
|
+
|
|
113
|
+
all_entries = []
|
|
114
|
+
for snap_id in range(start_snap_id + 1, end_snap_id + 1):
|
|
115
|
+
snapshot = snapshot_manager.get_snapshot_by_id(snap_id)
|
|
116
|
+
if snapshot and snapshot.commit_kind == 'APPEND':
|
|
117
|
+
manifest_files = manifest_list_manager.read_delta(snapshot)
|
|
118
|
+
if manifest_files:
|
|
119
|
+
entries = manifest_file_manager.read_entries_parallel(manifest_files)
|
|
120
|
+
all_entries.extend(entries)
|
|
121
|
+
|
|
122
|
+
# Create splits from entries
|
|
123
|
+
options = table.options
|
|
124
|
+
split_generator = AppendTableSplitGenerator(
|
|
125
|
+
table,
|
|
126
|
+
options.source_split_target_size(),
|
|
127
|
+
options.source_split_open_file_cost(),
|
|
128
|
+
{}
|
|
129
|
+
)
|
|
130
|
+
splits = split_generator.create_splits(all_entries)
|
|
131
|
+
|
|
132
|
+
table_read = table.new_read_builder().new_read()
|
|
133
|
+
return table_read.to_arrow(splits)
|
|
134
|
+
|
|
135
|
+
def _rows_to_set(self, arrow_table):
|
|
136
|
+
"""Convert arrow table to set of (id, value, partition_col) tuples."""
|
|
137
|
+
rows = set()
|
|
138
|
+
for i in range(arrow_table.num_rows):
|
|
139
|
+
row = (
|
|
140
|
+
arrow_table.column('id')[i].as_py(),
|
|
141
|
+
arrow_table.column('value')[i].as_py(),
|
|
142
|
+
arrow_table.column('partition_col')[i].as_py()
|
|
143
|
+
)
|
|
144
|
+
rows.add(row)
|
|
145
|
+
return rows
|
|
146
|
+
|
|
147
|
+
def test_diff_returns_same_rows_as_delta_simple(self):
|
|
148
|
+
"""
|
|
149
|
+
Basic case: 5 snapshots, verify row-level equivalence.
|
|
150
|
+
|
|
151
|
+
Creates a table with 5 snapshots, then reads data from snapshot 1 to 5
|
|
152
|
+
using both diff and delta approaches, verifying they return the same rows.
|
|
153
|
+
"""
|
|
154
|
+
table, all_data = self._create_table_with_snapshots(
|
|
155
|
+
'test_diff_delta_simple',
|
|
156
|
+
num_snapshots=5
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Read using both approaches (from snapshot 1 to 5, so we get snapshots 2-5)
|
|
160
|
+
diff_result = self._read_via_diff(table, 1, 5)
|
|
161
|
+
delta_result = self._read_via_delta(table, 1, 5)
|
|
162
|
+
|
|
163
|
+
# Convert to sets for order-independent comparison
|
|
164
|
+
diff_rows = self._rows_to_set(diff_result)
|
|
165
|
+
delta_rows = self._rows_to_set(delta_result)
|
|
166
|
+
|
|
167
|
+
self.assertEqual(diff_rows, delta_rows)
|
|
168
|
+
|
|
169
|
+
# Verify we got the expected number of rows (snapshots 2-5, 5 rows each = 20)
|
|
170
|
+
self.assertEqual(len(diff_rows), 20)
|
|
171
|
+
|
|
172
|
+
# Verify specific IDs are present (from snapshots 2-5)
|
|
173
|
+
expected_ids = set()
|
|
174
|
+
for snap_id in range(2, 6): # snapshots 2, 3, 4, 5
|
|
175
|
+
for i in range(5):
|
|
176
|
+
expected_ids.add(snap_id * 10 + i)
|
|
177
|
+
|
|
178
|
+
actual_ids = {row[0] for row in diff_rows}
|
|
179
|
+
self.assertEqual(actual_ids, expected_ids)
|
|
180
|
+
|
|
181
|
+
def test_diff_returns_same_rows_as_delta_many_snapshots(self):
|
|
182
|
+
"""
|
|
183
|
+
Stress test: 20 snapshots, verify row-level equivalence.
|
|
184
|
+
|
|
185
|
+
This tests the catch-up scenario where there are many snapshots
|
|
186
|
+
between start and end.
|
|
187
|
+
"""
|
|
188
|
+
table, all_data = self._create_table_with_snapshots(
|
|
189
|
+
'test_diff_delta_many',
|
|
190
|
+
num_snapshots=20
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Read using both approaches (from snapshot 1 to 20)
|
|
194
|
+
diff_result = self._read_via_diff(table, 1, 20)
|
|
195
|
+
delta_result = self._read_via_delta(table, 1, 20)
|
|
196
|
+
|
|
197
|
+
# Convert to sets for order-independent comparison
|
|
198
|
+
diff_rows = self._rows_to_set(diff_result)
|
|
199
|
+
delta_rows = self._rows_to_set(delta_result)
|
|
200
|
+
|
|
201
|
+
self.assertEqual(diff_rows, delta_rows)
|
|
202
|
+
|
|
203
|
+
# Verify we got the expected number of rows (snapshots 2-20, 5 rows each = 95)
|
|
204
|
+
self.assertEqual(len(diff_rows), 95)
|
|
205
|
+
|
|
206
|
+
def test_diff_returns_same_rows_with_mixed_partitions(self):
|
|
207
|
+
"""
|
|
208
|
+
Partitioned table: Verify diff handles multiple partitions correctly.
|
|
209
|
+
|
|
210
|
+
Creates a partitioned table and verifies diff and delta return
|
|
211
|
+
the same rows across all partitions.
|
|
212
|
+
"""
|
|
213
|
+
table, all_data = self._create_table_with_snapshots(
|
|
214
|
+
'test_diff_delta_partitioned',
|
|
215
|
+
num_snapshots=5,
|
|
216
|
+
partition_keys=['partition_col']
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Read using both approaches
|
|
220
|
+
diff_result = self._read_via_diff(table, 1, 5)
|
|
221
|
+
delta_result = self._read_via_delta(table, 1, 5)
|
|
222
|
+
|
|
223
|
+
# Convert to sets for order-independent comparison
|
|
224
|
+
diff_rows = self._rows_to_set(diff_result)
|
|
225
|
+
delta_rows = self._rows_to_set(delta_result)
|
|
226
|
+
|
|
227
|
+
self.assertEqual(diff_rows, delta_rows)
|
|
228
|
+
|
|
229
|
+
# Verify both partitions have data
|
|
230
|
+
p1_rows = {r for r in diff_rows if r[2] == 'p1'}
|
|
231
|
+
p2_rows = {r for r in diff_rows if r[2] == 'p2'}
|
|
232
|
+
|
|
233
|
+
self.assertGreater(len(p1_rows), 0, "Should have rows in partition p1")
|
|
234
|
+
self.assertGreater(len(p2_rows), 0, "Should have rows in partition p2")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
if __name__ == '__main__':
|
|
238
|
+
unittest.main()
|
pypaimon/api/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
or more contributor license agreements. See the NOTICE file
|
|
4
|
+
distributed with this work for additional information
|
|
5
|
+
regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
to you under the Apache License, Version 2.0 (the
|
|
7
|
+
"License"); you may not use this file except in compliance
|
|
8
|
+
with the License. You may obtain a copy of the License at
|
|
9
|
+
|
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
|
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
See the License for the specific language governing permissions and
|
|
16
|
+
limitations under the License.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from abc import ABC
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from typing import Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
from pypaimon.common.identifier import Identifier
|
|
24
|
+
from pypaimon.common.json_util import json_field
|
|
25
|
+
from pypaimon.schema.schema import Schema
|
|
26
|
+
from pypaimon.schema.schema_change import SchemaChange
|
|
27
|
+
from pypaimon.snapshot.snapshot import Snapshot
|
|
28
|
+
from pypaimon.snapshot.snapshot_commit import PartitionStatistics
|
|
29
|
+
from pypaimon.table.instant import Instant
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RESTRequest(ABC):
|
|
33
|
+
"""RESTRequest"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class CreateDatabaseRequest(RESTRequest):
|
|
38
|
+
FIELD_NAME = "name"
|
|
39
|
+
FIELD_OPTIONS = "options"
|
|
40
|
+
|
|
41
|
+
name: str = json_field(FIELD_NAME)
|
|
42
|
+
options: Dict[str, str] = json_field(FIELD_OPTIONS)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class AlterDatabaseRequest(RESTRequest):
|
|
47
|
+
FIELD_REMOVALS = "removals"
|
|
48
|
+
FIELD_UPDATES = "updates"
|
|
49
|
+
|
|
50
|
+
removals: List[str] = json_field(FIELD_REMOVALS)
|
|
51
|
+
updates: Dict[str, str] = json_field(FIELD_UPDATES)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class RenameTableRequest(RESTRequest):
|
|
56
|
+
FIELD_SOURCE = "source"
|
|
57
|
+
FIELD_DESTINATION = "destination"
|
|
58
|
+
|
|
59
|
+
source: Identifier = json_field(FIELD_SOURCE)
|
|
60
|
+
destination: Identifier = json_field(FIELD_DESTINATION)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class CreateTableRequest(RESTRequest):
|
|
65
|
+
FIELD_IDENTIFIER = "identifier"
|
|
66
|
+
FIELD_SCHEMA = "schema"
|
|
67
|
+
|
|
68
|
+
identifier: Identifier = json_field(FIELD_IDENTIFIER)
|
|
69
|
+
schema: Schema = json_field(FIELD_SCHEMA)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class CommitTableRequest(RESTRequest):
|
|
74
|
+
FIELD_TABLE_ID = "tableId"
|
|
75
|
+
FIELD_SNAPSHOT = "snapshot"
|
|
76
|
+
FIELD_STATISTICS = "statistics"
|
|
77
|
+
|
|
78
|
+
table_id: Optional[str] = json_field(FIELD_TABLE_ID)
|
|
79
|
+
snapshot: Snapshot = json_field(FIELD_SNAPSHOT)
|
|
80
|
+
statistics: List[PartitionStatistics] = json_field(FIELD_STATISTICS)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class AlterTableRequest(RESTRequest):
|
|
85
|
+
FIELD_CHANGES = "changes"
|
|
86
|
+
|
|
87
|
+
changes: List[SchemaChange] = json_field(FIELD_CHANGES)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class RollbackTableRequest(RESTRequest):
|
|
92
|
+
FIELD_INSTANT = "instant"
|
|
93
|
+
FIELD_FROM_SNAPSHOT = "fromSnapshot"
|
|
94
|
+
|
|
95
|
+
instant: Instant = json_field(FIELD_INSTANT)
|
|
96
|
+
from_snapshot: Optional[int] = json_field(FIELD_FROM_SNAPSHOT)
|