pypaimon 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. pypaimon/__init__.py +38 -0
  2. pypaimon/acceptance/__init__.py +23 -0
  3. pypaimon/acceptance/incremental_diff_acceptance_test.py +238 -0
  4. pypaimon/api/__init__.py +16 -0
  5. pypaimon/api/api_request.py +96 -0
  6. pypaimon/api/api_response.py +329 -0
  7. pypaimon/api/auth/__init__.py +40 -0
  8. pypaimon/api/auth/base.py +48 -0
  9. pypaimon/api/auth/bearer.py +34 -0
  10. pypaimon/api/auth/dlf_provider.py +109 -0
  11. pypaimon/api/auth/dlf_signer.py +472 -0
  12. pypaimon/api/auth/factory.py +104 -0
  13. pypaimon/api/client.py +394 -0
  14. pypaimon/api/resource_paths.py +85 -0
  15. pypaimon/api/rest_api.py +441 -0
  16. pypaimon/api/rest_exception.py +111 -0
  17. pypaimon/api/rest_util.py +66 -0
  18. pypaimon/api/token_loader.py +219 -0
  19. pypaimon/api/typedef.py +29 -0
  20. pypaimon/branch/__init__.py +22 -0
  21. pypaimon/branch/branch_manager.py +191 -0
  22. pypaimon/branch/catalog_branch_manager.py +151 -0
  23. pypaimon/branch/filesystem_branch_manager.py +332 -0
  24. pypaimon/catalog/__init__.py +17 -0
  25. pypaimon/catalog/catalog.py +295 -0
  26. pypaimon/catalog/catalog_context.py +34 -0
  27. pypaimon/catalog/catalog_environment.py +120 -0
  28. pypaimon/catalog/catalog_exception.py +180 -0
  29. pypaimon/catalog/catalog_factory.py +44 -0
  30. pypaimon/catalog/catalog_loader.py +24 -0
  31. pypaimon/catalog/database.py +35 -0
  32. pypaimon/catalog/filesystem_catalog.py +345 -0
  33. pypaimon/catalog/filesystem_catalog_loader.py +67 -0
  34. pypaimon/catalog/rest/__init__.py +17 -0
  35. pypaimon/catalog/rest/property_change.py +53 -0
  36. pypaimon/catalog/rest/rest_catalog.py +487 -0
  37. pypaimon/catalog/rest/rest_catalog_loader.py +69 -0
  38. pypaimon/catalog/rest/rest_token.py +38 -0
  39. pypaimon/catalog/rest/rest_token_file_io.py +278 -0
  40. pypaimon/catalog/rest/table_metadata.py +40 -0
  41. pypaimon/catalog/table_rollback.py +43 -0
  42. pypaimon/changelog/__init__.py +23 -0
  43. pypaimon/changelog/changelog.py +85 -0
  44. pypaimon/changelog/changelog_manager.py +353 -0
  45. pypaimon/cli/__init__.py +20 -0
  46. pypaimon/cli/cli.py +137 -0
  47. pypaimon/cli/cli_catalog.py +65 -0
  48. pypaimon/cli/cli_db.py +279 -0
  49. pypaimon/cli/cli_table.py +844 -0
  50. pypaimon/cli/where_parser.py +376 -0
  51. pypaimon/common/__init__.py +17 -0
  52. pypaimon/common/delta_varint_compressor.py +125 -0
  53. pypaimon/common/external_path_provider.py +43 -0
  54. pypaimon/common/file_io.py +277 -0
  55. pypaimon/common/identifier.py +107 -0
  56. pypaimon/common/json_util.py +133 -0
  57. pypaimon/common/memory_size.py +201 -0
  58. pypaimon/common/options/__init__.py +30 -0
  59. pypaimon/common/options/config.py +85 -0
  60. pypaimon/common/options/config_option.py +143 -0
  61. pypaimon/common/options/config_options.py +215 -0
  62. pypaimon/common/options/core_options.py +586 -0
  63. pypaimon/common/options/options.py +62 -0
  64. pypaimon/common/options/options_utils.py +167 -0
  65. pypaimon/common/predicate.py +454 -0
  66. pypaimon/common/predicate_builder.py +138 -0
  67. pypaimon/common/time_utils.py +81 -0
  68. pypaimon/common/uri_reader.py +169 -0
  69. pypaimon/consumer/__init__.py +36 -0
  70. pypaimon/consumer/consumer.py +76 -0
  71. pypaimon/consumer/consumer_manager.py +195 -0
  72. pypaimon/data/__init__.py +21 -0
  73. pypaimon/data/timestamp.py +167 -0
  74. pypaimon/deletionvectors/__init__.py +27 -0
  75. pypaimon/deletionvectors/apply_deletion_vector_reader.py +128 -0
  76. pypaimon/deletionvectors/bitmap_deletion_vector.py +165 -0
  77. pypaimon/deletionvectors/deletion_vector.py +143 -0
  78. pypaimon/filesystem/__init__.py +16 -0
  79. pypaimon/filesystem/local.py +49 -0
  80. pypaimon/filesystem/local_file_io.py +455 -0
  81. pypaimon/filesystem/pvfs.py +892 -0
  82. pypaimon/filesystem/pyarrow_file_io.py +583 -0
  83. pypaimon/globalindex/__init__.py +45 -0
  84. pypaimon/globalindex/btree/__init__.py +25 -0
  85. pypaimon/globalindex/btree/block_aligned_type.py +36 -0
  86. pypaimon/globalindex/btree/block_entry.py +55 -0
  87. pypaimon/globalindex/btree/block_handle.py +41 -0
  88. pypaimon/globalindex/btree/block_reader.py +254 -0
  89. pypaimon/globalindex/btree/btree_file_footer.py +129 -0
  90. pypaimon/globalindex/btree/btree_index_meta.py +63 -0
  91. pypaimon/globalindex/btree/btree_index_reader.py +423 -0
  92. pypaimon/globalindex/btree/key_serializer.py +145 -0
  93. pypaimon/globalindex/btree/memory_slice_input.py +162 -0
  94. pypaimon/globalindex/btree/sst_file_reader.py +198 -0
  95. pypaimon/globalindex/global_index_evaluator.py +176 -0
  96. pypaimon/globalindex/global_index_meta.py +74 -0
  97. pypaimon/globalindex/global_index_reader.py +108 -0
  98. pypaimon/globalindex/global_index_result.py +100 -0
  99. pypaimon/globalindex/global_index_scanner.py +162 -0
  100. pypaimon/globalindex/indexed_split.py +142 -0
  101. pypaimon/globalindex/vector_search.py +92 -0
  102. pypaimon/globalindex/vector_search_result.py +136 -0
  103. pypaimon/index/__init__.py +17 -0
  104. pypaimon/index/deletion_vector_meta.py +40 -0
  105. pypaimon/index/index_file_handler.py +68 -0
  106. pypaimon/index/index_file_meta.py +57 -0
  107. pypaimon/manifest/__init__.py +26 -0
  108. pypaimon/manifest/fastavro_py36_compat.py +77 -0
  109. pypaimon/manifest/index_manifest_entry.py +62 -0
  110. pypaimon/manifest/index_manifest_file.py +176 -0
  111. pypaimon/manifest/manifest_file_manager.py +229 -0
  112. pypaimon/manifest/manifest_list_manager.py +134 -0
  113. pypaimon/manifest/schema/__init__.py +17 -0
  114. pypaimon/manifest/schema/data_file_meta.py +261 -0
  115. pypaimon/manifest/schema/file_entry.py +127 -0
  116. pypaimon/manifest/schema/manifest_entry.py +77 -0
  117. pypaimon/manifest/schema/manifest_file_meta.py +52 -0
  118. pypaimon/manifest/schema/simple_stats.py +74 -0
  119. pypaimon/manifest/simple_stats_evolution.py +123 -0
  120. pypaimon/manifest/simple_stats_evolutions.py +71 -0
  121. pypaimon/read/__init__.py +17 -0
  122. pypaimon/read/datasource/__init__.py +25 -0
  123. pypaimon/read/datasource/ray_datasource.py +230 -0
  124. pypaimon/read/datasource/torch_dataset.py +223 -0
  125. pypaimon/read/interval_partition.py +130 -0
  126. pypaimon/read/partition_info.py +46 -0
  127. pypaimon/read/plan.py +32 -0
  128. pypaimon/read/push_down_utils.py +126 -0
  129. pypaimon/read/read_builder.py +78 -0
  130. pypaimon/read/reader/__init__.py +17 -0
  131. pypaimon/read/reader/blob_descriptor_convert_reader.py +81 -0
  132. pypaimon/read/reader/concat_batch_reader.py +228 -0
  133. pypaimon/read/reader/concat_record_reader.py +50 -0
  134. pypaimon/read/reader/data_file_batch_reader.py +235 -0
  135. pypaimon/read/reader/drop_delete_reader.py +59 -0
  136. pypaimon/read/reader/empty_record_reader.py +37 -0
  137. pypaimon/read/reader/field_bunch.py +126 -0
  138. pypaimon/read/reader/filter_record_batch_reader.py +88 -0
  139. pypaimon/read/reader/filter_record_reader.py +61 -0
  140. pypaimon/read/reader/format_avro_reader.py +78 -0
  141. pypaimon/read/reader/format_blob_reader.py +201 -0
  142. pypaimon/read/reader/format_lance_reader.py +72 -0
  143. pypaimon/read/reader/format_pyarrow_reader.py +129 -0
  144. pypaimon/read/reader/iface/__init__.py +17 -0
  145. pypaimon/read/reader/iface/record_batch_reader.py +137 -0
  146. pypaimon/read/reader/iface/record_iterator.py +39 -0
  147. pypaimon/read/reader/iface/record_reader.py +42 -0
  148. pypaimon/read/reader/key_value_unwrap_reader.py +63 -0
  149. pypaimon/read/reader/key_value_wrap_reader.py +70 -0
  150. pypaimon/read/reader/lance_utils.py +91 -0
  151. pypaimon/read/reader/row_range_filter_record_reader.py +67 -0
  152. pypaimon/read/reader/shard_batch_reader.py +61 -0
  153. pypaimon/read/reader/sort_merge_reader.py +213 -0
  154. pypaimon/read/scanner/__init__.py +17 -0
  155. pypaimon/read/scanner/append_table_split_generator.py +173 -0
  156. pypaimon/read/scanner/changelog_follow_up_scanner.py +29 -0
  157. pypaimon/read/scanner/data_evolution_split_generator.py +355 -0
  158. pypaimon/read/scanner/delta_follow_up_scanner.py +28 -0
  159. pypaimon/read/scanner/file_scanner.py +469 -0
  160. pypaimon/read/scanner/follow_up_scanner.py +30 -0
  161. pypaimon/read/scanner/incremental_diff_scanner.py +100 -0
  162. pypaimon/read/scanner/primary_key_table_split_generator.py +126 -0
  163. pypaimon/read/scanner/split_generator.py +236 -0
  164. pypaimon/read/sliced_split.py +188 -0
  165. pypaimon/read/split.py +202 -0
  166. pypaimon/read/split_read.py +715 -0
  167. pypaimon/read/stream_read_builder.py +149 -0
  168. pypaimon/read/streaming_table_scan.py +416 -0
  169. pypaimon/read/table_read.py +290 -0
  170. pypaimon/read/table_scan.py +133 -0
  171. pypaimon/sample/__init__.py +17 -0
  172. pypaimon/sample/data/__init__.py +17 -0
  173. pypaimon/sample/oss_read_and_write.py +70 -0
  174. pypaimon/sample/rest_catalog_blob_as_descriptor_sample.py +155 -0
  175. pypaimon/sample/rest_catalog_ray_data_sample.py +245 -0
  176. pypaimon/sample/rest_catalog_ray_lance_sample.py +212 -0
  177. pypaimon/sample/rest_catalog_ray_sink_sample.py +153 -0
  178. pypaimon/sample/rest_catalog_read_write_sample.py +108 -0
  179. pypaimon/schema/__init__.py +17 -0
  180. pypaimon/schema/data_types.py +705 -0
  181. pypaimon/schema/schema.py +95 -0
  182. pypaimon/schema/schema_change.py +289 -0
  183. pypaimon/schema/schema_manager.py +439 -0
  184. pypaimon/schema/table_schema.py +146 -0
  185. pypaimon/snapshot/__init__.py +17 -0
  186. pypaimon/snapshot/catalog_snapshot_commit.py +84 -0
  187. pypaimon/snapshot/renaming_snapshot_commit.py +95 -0
  188. pypaimon/snapshot/snapshot.py +51 -0
  189. pypaimon/snapshot/snapshot_commit.py +102 -0
  190. pypaimon/snapshot/snapshot_loader.py +57 -0
  191. pypaimon/snapshot/snapshot_manager.py +283 -0
  192. pypaimon/snapshot/table_snapshot.py +38 -0
  193. pypaimon/snapshot/time_travel_util.py +75 -0
  194. pypaimon/table/__init__.py +17 -0
  195. pypaimon/table/bucket_mode.py +32 -0
  196. pypaimon/table/file_store_table.py +447 -0
  197. pypaimon/table/format/__init__.py +36 -0
  198. pypaimon/table/format/format_batch_write_builder.py +55 -0
  199. pypaimon/table/format/format_commit_message.py +26 -0
  200. pypaimon/table/format/format_data_split.py +30 -0
  201. pypaimon/table/format/format_read_builder.py +82 -0
  202. pypaimon/table/format/format_table.py +104 -0
  203. pypaimon/table/format/format_table_commit.py +66 -0
  204. pypaimon/table/format/format_table_read.py +273 -0
  205. pypaimon/table/format/format_table_scan.py +130 -0
  206. pypaimon/table/format/format_table_write.py +245 -0
  207. pypaimon/table/iceberg/__init__.py +19 -0
  208. pypaimon/table/iceberg/iceberg_table.py +109 -0
  209. pypaimon/table/instant.py +135 -0
  210. pypaimon/table/object/__init__.py +29 -0
  211. pypaimon/table/object/object_read_builder.py +50 -0
  212. pypaimon/table/object/object_split.py +31 -0
  213. pypaimon/table/object/object_table.py +103 -0
  214. pypaimon/table/object/object_table_read.py +159 -0
  215. pypaimon/table/object/object_table_scan.py +37 -0
  216. pypaimon/table/rollback_helper.py +93 -0
  217. pypaimon/table/row/__init__.py +17 -0
  218. pypaimon/table/row/binary_row.py +58 -0
  219. pypaimon/table/row/blob.py +286 -0
  220. pypaimon/table/row/generic_row.py +454 -0
  221. pypaimon/table/row/internal_row.py +54 -0
  222. pypaimon/table/row/key_value.py +57 -0
  223. pypaimon/table/row/offset_row.py +54 -0
  224. pypaimon/table/row/projected_row.py +76 -0
  225. pypaimon/table/row/row_kind.py +61 -0
  226. pypaimon/table/source/__init__.py +16 -0
  227. pypaimon/table/source/deletion_file.py +49 -0
  228. pypaimon/table/special_fields.py +83 -0
  229. pypaimon/table/table.py +43 -0
  230. pypaimon/tag/__init__.py +21 -0
  231. pypaimon/tag/tag.py +44 -0
  232. pypaimon/tag/tag_manager.py +233 -0
  233. pypaimon/tests/__init__.py +16 -0
  234. pypaimon/tests/binary_row_test.py +339 -0
  235. pypaimon/tests/blob_table_test.py +3029 -0
  236. pypaimon/tests/blob_test.py +1162 -0
  237. pypaimon/tests/branch/__init__.py +16 -0
  238. pypaimon/tests/branch/catalog_branch_manager_test.py +199 -0
  239. pypaimon/tests/branch/file_store_table_branch_manager_test.py +177 -0
  240. pypaimon/tests/branch_manager_test.py +183 -0
  241. pypaimon/tests/changelog_follow_up_scanner_test.py +51 -0
  242. pypaimon/tests/changelog_manager_test.py +137 -0
  243. pypaimon/tests/cli_db_test.py +338 -0
  244. pypaimon/tests/cli_table_test.py +1356 -0
  245. pypaimon/tests/consumer_manager_test.py +315 -0
  246. pypaimon/tests/consumer_test.py +204 -0
  247. pypaimon/tests/data_evolution_test.py +1350 -0
  248. pypaimon/tests/data_types_test.py +146 -0
  249. pypaimon/tests/delta_varint_compressor_test.py +379 -0
  250. pypaimon/tests/e2e/__init__.py +16 -0
  251. pypaimon/tests/e2e/java_py_read_write_test.py +456 -0
  252. pypaimon/tests/external_paths_test.py +427 -0
  253. pypaimon/tests/file_io_test.py +462 -0
  254. pypaimon/tests/file_store_commit_test.py +467 -0
  255. pypaimon/tests/filesystem_catalog_test.py +368 -0
  256. pypaimon/tests/follow_up_scanner_test.py +59 -0
  257. pypaimon/tests/identifier_test.py +97 -0
  258. pypaimon/tests/lance_utils_test.py +85 -0
  259. pypaimon/tests/manifest/__init__.py +17 -0
  260. pypaimon/tests/manifest/manifest_entry_identifier_test.py +259 -0
  261. pypaimon/tests/manifest/manifest_manager_test.py +196 -0
  262. pypaimon/tests/manifest/manifest_schema_test.py +233 -0
  263. pypaimon/tests/predicates_test.py +570 -0
  264. pypaimon/tests/pvfs_test.py +201 -0
  265. pypaimon/tests/py36/__init__.py +17 -0
  266. pypaimon/tests/py36/ao_predicate_test.py +252 -0
  267. pypaimon/tests/py36/ao_simple_test.py +480 -0
  268. pypaimon/tests/py36/data_evolution_test.py +488 -0
  269. pypaimon/tests/py36/pyarrow_compat.py +40 -0
  270. pypaimon/tests/py36/reader_predicate_test.py +93 -0
  271. pypaimon/tests/py36/rest_ao_read_write_test.py +915 -0
  272. pypaimon/tests/range_test.py +86 -0
  273. pypaimon/tests/ray_data_test.py +701 -0
  274. pypaimon/tests/ray_sink_test.py +342 -0
  275. pypaimon/tests/reader_append_only_test.py +578 -0
  276. pypaimon/tests/reader_base_test.py +1409 -0
  277. pypaimon/tests/reader_predicate_test.py +117 -0
  278. pypaimon/tests/reader_primary_key_test.py +586 -0
  279. pypaimon/tests/reader_split_generator_test.py +334 -0
  280. pypaimon/tests/rest/__init__.py +17 -0
  281. pypaimon/tests/rest/api_test.py +463 -0
  282. pypaimon/tests/rest/client_test.py +62 -0
  283. pypaimon/tests/rest/dlf_signer_test.py +155 -0
  284. pypaimon/tests/rest/rest_base_test.py +347 -0
  285. pypaimon/tests/rest/rest_catalog_commit_snapshot_test.py +342 -0
  286. pypaimon/tests/rest/rest_catalog_test.py +373 -0
  287. pypaimon/tests/rest/rest_format_table_test.py +613 -0
  288. pypaimon/tests/rest/rest_iceberg_table_test.py +81 -0
  289. pypaimon/tests/rest/rest_object_table_test.py +250 -0
  290. pypaimon/tests/rest/rest_permission_test.py +199 -0
  291. pypaimon/tests/rest/rest_read_write_test.py +689 -0
  292. pypaimon/tests/rest/rest_server.py +1165 -0
  293. pypaimon/tests/rest/rest_simple_test.py +859 -0
  294. pypaimon/tests/rest/rest_token_file_io_test.py +309 -0
  295. pypaimon/tests/rest/test_exponential_retry_strategy.py +64 -0
  296. pypaimon/tests/scanner/incremental_diff_scanner_test.py +390 -0
  297. pypaimon/tests/schema_evolution_read_test.py +434 -0
  298. pypaimon/tests/schema_manager_test.py +91 -0
  299. pypaimon/tests/serializable_test.py +113 -0
  300. pypaimon/tests/shard_table_updator_test.py +596 -0
  301. pypaimon/tests/snapshot_manager_test.py +143 -0
  302. pypaimon/tests/stream_read_builder_test.py +149 -0
  303. pypaimon/tests/streaming_table_scan_test.py +617 -0
  304. pypaimon/tests/table/file_store_table_test.py +439 -0
  305. pypaimon/tests/table/simple_table_test.py +687 -0
  306. pypaimon/tests/table_update_test.py +1057 -0
  307. pypaimon/tests/table_upsert_by_key_test.py +739 -0
  308. pypaimon/tests/torch_read_test.py +684 -0
  309. pypaimon/tests/uri_reader_factory_test.py +228 -0
  310. pypaimon/tests/where_parser_test.py +404 -0
  311. pypaimon/tests/write/simple_hash_bucket_assigner_test.py +60 -0
  312. pypaimon/tests/write/table_write_test.py +438 -0
  313. pypaimon/utils/__init__.py +17 -0
  314. pypaimon/utils/file_store_path_factory.py +156 -0
  315. pypaimon/utils/range.py +206 -0
  316. pypaimon/utils/range_helper.py +133 -0
  317. pypaimon/utils/roaring_bitmap.py +251 -0
  318. pypaimon/write/__init__.py +17 -0
  319. pypaimon/write/blob_format_writer.py +107 -0
  320. pypaimon/write/commit/__init__.py +16 -0
  321. pypaimon/write/commit/commit_rollback.py +62 -0
  322. pypaimon/write/commit/commit_scanner.py +127 -0
  323. pypaimon/write/commit/conflict_detection.py +203 -0
  324. pypaimon/write/commit_message.py +33 -0
  325. pypaimon/write/file_store_commit.py +728 -0
  326. pypaimon/write/file_store_write.py +142 -0
  327. pypaimon/write/ray_datasink.py +194 -0
  328. pypaimon/write/row_key_extractor.py +332 -0
  329. pypaimon/write/table_commit.py +99 -0
  330. pypaimon/write/table_update.py +271 -0
  331. pypaimon/write/table_update_by_row_id.py +323 -0
  332. pypaimon/write/table_upsert_by_key.py +378 -0
  333. pypaimon/write/table_write.py +147 -0
  334. pypaimon/write/write_builder.py +82 -0
  335. pypaimon/write/writer/__init__.py +17 -0
  336. pypaimon/write/writer/append_only_data_writer.py +31 -0
  337. pypaimon/write/writer/blob_file_writer.py +117 -0
  338. pypaimon/write/writer/blob_writer.py +247 -0
  339. pypaimon/write/writer/data_blob_writer.py +372 -0
  340. pypaimon/write/writer/data_writer.py +329 -0
  341. pypaimon/write/writer/key_value_data_writer.py +72 -0
  342. pypaimon-1.4.0.dist-info/METADATA +62 -0
  343. pypaimon-1.4.0.dist-info/RECORD +347 -0
  344. pypaimon-1.4.0.dist-info/WHEEL +5 -0
  345. pypaimon-1.4.0.dist-info/entry_points.txt +2 -0
  346. pypaimon-1.4.0.dist-info/licenses/LICENSE +202 -0
  347. pypaimon-1.4.0.dist-info/top_level.txt +1 -0
pypaimon/__init__.py ADDED
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ import sys
19
+
20
+ if sys.version_info[:2] == (3, 6):
21
+ try:
22
+ from pypaimon.manifest import fastavro_py36_compat # noqa: F401
23
+ except ImportError:
24
+ pass
25
+
26
+ from pypaimon.catalog.catalog_factory import CatalogFactory
27
+ from pypaimon.filesystem.pvfs import PaimonVirtualFileSystem
28
+ from pypaimon.schema.schema import Schema
29
+ from pypaimon.tag.tag import Tag
30
+ from pypaimon.tag.tag_manager import TagManager
31
+
32
+ __all__ = [
33
+ "PaimonVirtualFileSystem",
34
+ "CatalogFactory",
35
+ "Schema",
36
+ "Tag",
37
+ "TagManager",
38
+ ]
@@ -0,0 +1,23 @@
1
+ ################################################################################
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ ################################################################################
18
+ """
19
+ Acceptance tests for pypaimon.
20
+
21
+ These tests use real file I/O with local temp filesystem to verify
22
+ end-to-end behavior, as opposed to unit tests which use mocks.
23
+ """
@@ -0,0 +1,238 @@
1
+ ################################################################################
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ ################################################################################
18
+ """
19
+ Acceptance tests for IncrementalDiffScanner.
20
+
21
+ These tests verify that the diff approach (reading 2 base_manifest_lists)
22
+ returns the same data as the delta approach (reading N delta_manifest_lists).
23
+
24
+ Uses real file I/O with local temp filesystem.
25
+ """
26
+
27
+ import os
28
+ import shutil
29
+ import tempfile
30
+ import unittest
31
+
32
+ import pyarrow as pa
33
+
34
+ from pypaimon import CatalogFactory, Schema
35
+ from pypaimon.manifest.manifest_file_manager import ManifestFileManager
36
+ from pypaimon.manifest.manifest_list_manager import ManifestListManager
37
+ from pypaimon.read.scanner.append_table_split_generator import \
38
+ AppendTableSplitGenerator
39
+ from pypaimon.read.scanner.incremental_diff_scanner import \
40
+ IncrementalDiffScanner
41
+ from pypaimon.snapshot.snapshot_manager import SnapshotManager
42
+
43
+
44
+ class IncrementalDiffAcceptanceTest(unittest.TestCase):
45
+ """Acceptance tests for diff vs delta equivalence with real data."""
46
+
47
+ @classmethod
48
+ def setUpClass(cls):
49
+ cls.tempdir = tempfile.mkdtemp()
50
+ cls.warehouse = os.path.join(cls.tempdir, 'warehouse')
51
+ cls.catalog = CatalogFactory.create({'warehouse': cls.warehouse})
52
+ cls.catalog.create_database('default', True)
53
+
54
+ cls.pa_schema = pa.schema([
55
+ ('id', pa.int32()),
56
+ ('value', pa.string()),
57
+ ('partition_col', pa.string())
58
+ ])
59
+
60
+ @classmethod
61
+ def tearDownClass(cls):
62
+ shutil.rmtree(cls.tempdir, ignore_errors=True)
63
+
64
+ def _create_table_with_snapshots(self, name, num_snapshots=5, partition_keys=None):
65
+ """Create a table and write num_snapshots of data.
66
+
67
+ Returns:
68
+ Tuple of (table, expected_data_per_snapshot)
69
+ """
70
+ schema = Schema.from_pyarrow_schema(self.pa_schema, partition_keys=partition_keys)
71
+ self.catalog.create_table(f'default.{name}', schema, False)
72
+ table = self.catalog.get_table(f'default.{name}')
73
+
74
+ all_data = []
75
+ for snap_id in range(1, num_snapshots + 1):
76
+ write_builder = table.new_batch_write_builder()
77
+ table_write = write_builder.new_write()
78
+ table_commit = write_builder.new_commit()
79
+
80
+ data = {
81
+ 'id': [snap_id * 10 + i for i in range(5)],
82
+ 'value': [f'snap{snap_id}_row{i}' for i in range(5)],
83
+ 'partition_col': ['p1' if i % 2 == 0 else 'p2' for i in range(5)]
84
+ }
85
+ all_data.append(data)
86
+
87
+ pa_table = pa.Table.from_pydict(data, schema=self.pa_schema)
88
+ table_write.write_arrow(pa_table)
89
+ table_commit.commit(table_write.prepare_commit())
90
+ table_write.close()
91
+ table_commit.close()
92
+
93
+ return table, all_data
94
+
95
+ def _read_via_diff(self, table, start_snap_id, end_snap_id):
96
+ """Read data using IncrementalDiffScanner between two snapshots."""
97
+ snapshot_manager = SnapshotManager(table)
98
+ start_snapshot = snapshot_manager.get_snapshot_by_id(start_snap_id)
99
+ end_snapshot = snapshot_manager.get_snapshot_by_id(end_snap_id)
100
+
101
+ scanner = IncrementalDiffScanner(table)
102
+ plan = scanner.scan(start_snapshot, end_snapshot)
103
+
104
+ table_read = table.new_read_builder().new_read()
105
+ return table_read.to_arrow(plan.splits())
106
+
107
+ def _read_via_delta(self, table, start_snap_id, end_snap_id):
108
+ """Read data by iterating delta_manifest_lists between two snapshots."""
109
+ snapshot_manager = SnapshotManager(table)
110
+ manifest_list_manager = ManifestListManager(table)
111
+ manifest_file_manager = ManifestFileManager(table)
112
+
113
+ all_entries = []
114
+ for snap_id in range(start_snap_id + 1, end_snap_id + 1):
115
+ snapshot = snapshot_manager.get_snapshot_by_id(snap_id)
116
+ if snapshot and snapshot.commit_kind == 'APPEND':
117
+ manifest_files = manifest_list_manager.read_delta(snapshot)
118
+ if manifest_files:
119
+ entries = manifest_file_manager.read_entries_parallel(manifest_files)
120
+ all_entries.extend(entries)
121
+
122
+ # Create splits from entries
123
+ options = table.options
124
+ split_generator = AppendTableSplitGenerator(
125
+ table,
126
+ options.source_split_target_size(),
127
+ options.source_split_open_file_cost(),
128
+ {}
129
+ )
130
+ splits = split_generator.create_splits(all_entries)
131
+
132
+ table_read = table.new_read_builder().new_read()
133
+ return table_read.to_arrow(splits)
134
+
135
+ def _rows_to_set(self, arrow_table):
136
+ """Convert arrow table to set of (id, value, partition_col) tuples."""
137
+ rows = set()
138
+ for i in range(arrow_table.num_rows):
139
+ row = (
140
+ arrow_table.column('id')[i].as_py(),
141
+ arrow_table.column('value')[i].as_py(),
142
+ arrow_table.column('partition_col')[i].as_py()
143
+ )
144
+ rows.add(row)
145
+ return rows
146
+
147
+ def test_diff_returns_same_rows_as_delta_simple(self):
148
+ """
149
+ Basic case: 5 snapshots, verify row-level equivalence.
150
+
151
+ Creates a table with 5 snapshots, then reads data from snapshot 1 to 5
152
+ using both diff and delta approaches, verifying they return the same rows.
153
+ """
154
+ table, all_data = self._create_table_with_snapshots(
155
+ 'test_diff_delta_simple',
156
+ num_snapshots=5
157
+ )
158
+
159
+ # Read using both approaches (from snapshot 1 to 5, so we get snapshots 2-5)
160
+ diff_result = self._read_via_diff(table, 1, 5)
161
+ delta_result = self._read_via_delta(table, 1, 5)
162
+
163
+ # Convert to sets for order-independent comparison
164
+ diff_rows = self._rows_to_set(diff_result)
165
+ delta_rows = self._rows_to_set(delta_result)
166
+
167
+ self.assertEqual(diff_rows, delta_rows)
168
+
169
+ # Verify we got the expected number of rows (snapshots 2-5, 5 rows each = 20)
170
+ self.assertEqual(len(diff_rows), 20)
171
+
172
+ # Verify specific IDs are present (from snapshots 2-5)
173
+ expected_ids = set()
174
+ for snap_id in range(2, 6): # snapshots 2, 3, 4, 5
175
+ for i in range(5):
176
+ expected_ids.add(snap_id * 10 + i)
177
+
178
+ actual_ids = {row[0] for row in diff_rows}
179
+ self.assertEqual(actual_ids, expected_ids)
180
+
181
+ def test_diff_returns_same_rows_as_delta_many_snapshots(self):
182
+ """
183
+ Stress test: 20 snapshots, verify row-level equivalence.
184
+
185
+ This tests the catch-up scenario where there are many snapshots
186
+ between start and end.
187
+ """
188
+ table, all_data = self._create_table_with_snapshots(
189
+ 'test_diff_delta_many',
190
+ num_snapshots=20
191
+ )
192
+
193
+ # Read using both approaches (from snapshot 1 to 20)
194
+ diff_result = self._read_via_diff(table, 1, 20)
195
+ delta_result = self._read_via_delta(table, 1, 20)
196
+
197
+ # Convert to sets for order-independent comparison
198
+ diff_rows = self._rows_to_set(diff_result)
199
+ delta_rows = self._rows_to_set(delta_result)
200
+
201
+ self.assertEqual(diff_rows, delta_rows)
202
+
203
+ # Verify we got the expected number of rows (snapshots 2-20, 5 rows each = 95)
204
+ self.assertEqual(len(diff_rows), 95)
205
+
206
+ def test_diff_returns_same_rows_with_mixed_partitions(self):
207
+ """
208
+ Partitioned table: Verify diff handles multiple partitions correctly.
209
+
210
+ Creates a partitioned table and verifies diff and delta return
211
+ the same rows across all partitions.
212
+ """
213
+ table, all_data = self._create_table_with_snapshots(
214
+ 'test_diff_delta_partitioned',
215
+ num_snapshots=5,
216
+ partition_keys=['partition_col']
217
+ )
218
+
219
+ # Read using both approaches
220
+ diff_result = self._read_via_diff(table, 1, 5)
221
+ delta_result = self._read_via_delta(table, 1, 5)
222
+
223
+ # Convert to sets for order-independent comparison
224
+ diff_rows = self._rows_to_set(diff_result)
225
+ delta_rows = self._rows_to_set(delta_result)
226
+
227
+ self.assertEqual(diff_rows, delta_rows)
228
+
229
+ # Verify both partitions have data
230
+ p1_rows = {r for r in diff_rows if r[2] == 'p1'}
231
+ p2_rows = {r for r in diff_rows if r[2] == 'p2'}
232
+
233
+ self.assertGreater(len(p1_rows), 0, "Should have rows in partition p1")
234
+ self.assertGreater(len(p2_rows), 0, "Should have rows in partition p2")
235
+
236
+
237
+ if __name__ == '__main__':
238
+ unittest.main()
@@ -0,0 +1,16 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
@@ -0,0 +1,96 @@
1
+ """
2
+ Licensed to the Apache Software Foundation (ASF) under one
3
+ or more contributor license agreements. See the NOTICE file
4
+ distributed with this work for additional information
5
+ regarding copyright ownership. The ASF licenses this file
6
+ to you under the Apache License, Version 2.0 (the
7
+ "License"); you may not use this file except in compliance
8
+ with the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ """
18
+
19
+ from abc import ABC
20
+ from dataclasses import dataclass
21
+ from typing import Dict, List, Optional
22
+
23
+ from pypaimon.common.identifier import Identifier
24
+ from pypaimon.common.json_util import json_field
25
+ from pypaimon.schema.schema import Schema
26
+ from pypaimon.schema.schema_change import SchemaChange
27
+ from pypaimon.snapshot.snapshot import Snapshot
28
+ from pypaimon.snapshot.snapshot_commit import PartitionStatistics
29
+ from pypaimon.table.instant import Instant
30
+
31
+
32
+ class RESTRequest(ABC):
33
+ """RESTRequest"""
34
+
35
+
36
+ @dataclass
37
+ class CreateDatabaseRequest(RESTRequest):
38
+ FIELD_NAME = "name"
39
+ FIELD_OPTIONS = "options"
40
+
41
+ name: str = json_field(FIELD_NAME)
42
+ options: Dict[str, str] = json_field(FIELD_OPTIONS)
43
+
44
+
45
+ @dataclass
46
+ class AlterDatabaseRequest(RESTRequest):
47
+ FIELD_REMOVALS = "removals"
48
+ FIELD_UPDATES = "updates"
49
+
50
+ removals: List[str] = json_field(FIELD_REMOVALS)
51
+ updates: Dict[str, str] = json_field(FIELD_UPDATES)
52
+
53
+
54
+ @dataclass
55
+ class RenameTableRequest(RESTRequest):
56
+ FIELD_SOURCE = "source"
57
+ FIELD_DESTINATION = "destination"
58
+
59
+ source: Identifier = json_field(FIELD_SOURCE)
60
+ destination: Identifier = json_field(FIELD_DESTINATION)
61
+
62
+
63
+ @dataclass
64
+ class CreateTableRequest(RESTRequest):
65
+ FIELD_IDENTIFIER = "identifier"
66
+ FIELD_SCHEMA = "schema"
67
+
68
+ identifier: Identifier = json_field(FIELD_IDENTIFIER)
69
+ schema: Schema = json_field(FIELD_SCHEMA)
70
+
71
+
72
+ @dataclass
73
+ class CommitTableRequest(RESTRequest):
74
+ FIELD_TABLE_ID = "tableId"
75
+ FIELD_SNAPSHOT = "snapshot"
76
+ FIELD_STATISTICS = "statistics"
77
+
78
+ table_id: Optional[str] = json_field(FIELD_TABLE_ID)
79
+ snapshot: Snapshot = json_field(FIELD_SNAPSHOT)
80
+ statistics: List[PartitionStatistics] = json_field(FIELD_STATISTICS)
81
+
82
+
83
+ @dataclass
84
+ class AlterTableRequest(RESTRequest):
85
+ FIELD_CHANGES = "changes"
86
+
87
+ changes: List[SchemaChange] = json_field(FIELD_CHANGES)
88
+
89
+
90
+ @dataclass
91
+ class RollbackTableRequest(RESTRequest):
92
+ FIELD_INSTANT = "instant"
93
+ FIELD_FROM_SNAPSHOT = "fromSnapshot"
94
+
95
+ instant: Instant = json_field(FIELD_INSTANT)
96
+ from_snapshot: Optional[int] = json_field(FIELD_FROM_SNAPSHOT)