deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2440 @@
1
+ import os
2
+ from typing import List, Tuple
3
+
4
+ import time
5
+ import multiprocessing
6
+
7
+ import pyarrow as pa
8
+ import pytest
9
+
10
+ from deltacat import (
11
+ Schema,
12
+ Field,
13
+ PartitionScheme,
14
+ PartitionKey,
15
+ ContentEncoding,
16
+ ContentType,
17
+ SortScheme,
18
+ SortKey,
19
+ SortOrder,
20
+ NullOrder,
21
+ LifecycleState,
22
+ )
23
+ from deltacat.exceptions import (
24
+ ObjectAlreadyExistsError,
25
+ ConcurrentModificationError,
26
+ ObjectDeletedError,
27
+ ObjectNotFoundError,
28
+ )
29
+ from deltacat.storage import (
30
+ BucketTransform,
31
+ BucketTransformParameters,
32
+ BucketingStrategy,
33
+ CommitState,
34
+ DeltaLocator,
35
+ Delta,
36
+ DeltaType,
37
+ EntryParams,
38
+ EntryType,
39
+ Manifest,
40
+ ManifestAuthor,
41
+ ManifestEntry,
42
+ ManifestMeta,
43
+ Namespace,
44
+ NamespaceLocator,
45
+ PartitionLocator,
46
+ Partition,
47
+ StreamLocator,
48
+ StreamFormat,
49
+ Stream,
50
+ Table,
51
+ TableLocator,
52
+ TableVersionLocator,
53
+ TableVersion,
54
+ Transaction,
55
+ TransactionOperation,
56
+ TransactionOperationType,
57
+ TruncateTransform,
58
+ TruncateTransformParameters,
59
+ )
60
+ from deltacat.storage.model.metafile import (
61
+ Metafile,
62
+ MetafileRevisionInfo,
63
+ )
64
+ from deltacat.constants import TXN_DIR_NAME, SUCCESS_TXN_DIR_NAME, NANOS_PER_SEC
65
+ from deltacat.utils.filesystem import resolve_path_and_filesystem
66
+ from deltacat.tests.test_utils.storage import (
67
+ create_test_namespace,
68
+ create_test_table,
69
+ create_test_table_version,
70
+ create_test_stream,
71
+ create_test_partition,
72
+ create_test_delta,
73
+ )
74
+
75
+
76
+ def _commit_single_delta_table(temp_dir: str) -> List[Tuple[Metafile, Metafile, str]]:
77
+ namespace = create_test_namespace()
78
+ table = create_test_table()
79
+ table_version = create_test_table_version()
80
+ stream = create_test_stream()
81
+ partition = create_test_partition()
82
+ delta = create_test_delta()
83
+
84
+ meta_to_create = [
85
+ namespace,
86
+ table,
87
+ table_version,
88
+ stream,
89
+ partition,
90
+ delta,
91
+ ]
92
+ txn_operations = [
93
+ TransactionOperation.of(
94
+ operation_type=TransactionOperationType.CREATE,
95
+ dest_metafile=meta,
96
+ )
97
+ for meta in meta_to_create
98
+ ]
99
+ transaction = Transaction.of(
100
+ txn_operations=txn_operations,
101
+ )
102
+ write_paths, txn_log_path = transaction.commit(temp_dir)
103
+ write_paths_copy = write_paths.copy()
104
+ assert os.path.exists(txn_log_path)
105
+ metafiles_created = [
106
+ Delta.read(write_paths.pop()),
107
+ Partition.read(write_paths.pop()),
108
+ Stream.read(write_paths.pop()),
109
+ TableVersion.read(write_paths.pop()),
110
+ Table.read(write_paths.pop()),
111
+ Namespace.read(write_paths.pop()),
112
+ ]
113
+ metafiles_created.reverse()
114
+ return list(zip(meta_to_create, metafiles_created, write_paths_copy))
115
+
116
+
117
+ def _commit_concurrent_transaction(
118
+ catalog_root: str,
119
+ transaction: Transaction,
120
+ ) -> None:
121
+ try:
122
+ return transaction.commit(catalog_root)
123
+ except (ObjectAlreadyExistsError, ConcurrentModificationError) as e:
124
+ return e
125
+
126
+
127
+ class TestMetafileIO:
128
+ def test_txn_conflict_concurrent_multiprocess_table_create(self, temp_dir):
129
+ base_table_name = "test_table"
130
+ table_locator = TableLocator.at(
131
+ namespace=None,
132
+ table_name=base_table_name,
133
+ )
134
+ # given a transaction to create a table
135
+ table = Table.of(
136
+ locator=table_locator,
137
+ description="test table description",
138
+ )
139
+ transaction = Transaction.of(
140
+ txn_operations=[
141
+ TransactionOperation.of(
142
+ operation_type=TransactionOperationType.CREATE,
143
+ dest_metafile=table,
144
+ )
145
+ ],
146
+ )
147
+ # when K rounds of N concurrent transaction commits try to create the
148
+ # same table
149
+ rounds = 25
150
+ concurrent_commit_count = multiprocessing.cpu_count()
151
+ with multiprocessing.Pool(processes=concurrent_commit_count) as pool:
152
+ for round_number in range(rounds):
153
+ table.locator.table_name = f"{base_table_name}_{round_number}"
154
+ futures = [
155
+ pool.apply_async(
156
+ _commit_concurrent_transaction, (temp_dir, transaction)
157
+ )
158
+ for _ in range(concurrent_commit_count)
159
+ ]
160
+ # expect all but one concurrent transaction to succeed each round
161
+ results = [future.get() for future in futures]
162
+ conflict_exception_count = 0
163
+ for result in results:
164
+ if isinstance(result, ConcurrentModificationError) or isinstance(
165
+ result, ObjectAlreadyExistsError
166
+ ):
167
+ conflict_exception_count += 1
168
+ else:
169
+ write_paths, txn_log_path = result
170
+ deserialized_table = Table.read(write_paths.pop())
171
+ assert table.equivalent_to(deserialized_table)
172
+ assert conflict_exception_count == concurrent_commit_count - 1
173
+
174
+ def test_txn_dual_commit_fails(self, temp_dir):
175
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
176
+ namespace = Namespace.of(locator=namespace_locator)
177
+ # given a transaction that creates a single namespace
178
+ transaction = Transaction.of(
179
+ txn_operations=[
180
+ TransactionOperation.of(
181
+ operation_type=TransactionOperationType.CREATE,
182
+ dest_metafile=namespace,
183
+ )
184
+ ],
185
+ )
186
+ write_paths, txn_log_path = transaction.commit(temp_dir)
187
+ # when the transaction is committed,
188
+ # expect the namespace created to match the namespace given
189
+ deserialized_namespace = Namespace.read(write_paths.pop())
190
+ assert namespace.equivalent_to(deserialized_namespace)
191
+ # if we reread the transaction and commit it again,
192
+ reread_transaction = Transaction.read(txn_log_path)
193
+ # expect an exception to be raised
194
+ with pytest.raises(RuntimeError):
195
+ reread_transaction.commit(temp_dir)
196
+
197
+ def test_txn_bad_end_time_fails(self, temp_dir, mocker):
198
+ commit_results = _commit_single_delta_table(temp_dir)
199
+ for expected, actual, _ in commit_results:
200
+ assert expected.equivalent_to(actual)
201
+ # given a transaction with an ending timestamp set in the past
202
+ past_timestamp = time.time_ns() - NANOS_PER_SEC
203
+ mocker.patch(
204
+ "deltacat.storage.model.transaction.Transaction._parse_end_time",
205
+ return_value=past_timestamp,
206
+ )
207
+ original_delta: Delta = commit_results[5][1]
208
+ new_delta = Delta.update_for(original_delta)
209
+ txn_operations = [
210
+ TransactionOperation.of(
211
+ operation_type=TransactionOperationType.UPDATE,
212
+ dest_metafile=new_delta,
213
+ src_metafile=original_delta,
214
+ )
215
+ ]
216
+ transaction = Transaction.of(
217
+ txn_operations=txn_operations,
218
+ )
219
+ # expect the bad timestamp to be detected and its commit to fail
220
+ with pytest.raises(RuntimeError):
221
+ transaction.commit(temp_dir)
222
+
223
+ def test_txn_conflict_concurrent_complete(self, temp_dir, mocker):
224
+ commit_results = _commit_single_delta_table(temp_dir)
225
+ for expected, actual, _ in commit_results:
226
+ assert expected.equivalent_to(actual)
227
+
228
+ # given an initial metafile revision of a committed delta
229
+ write_paths = [result[2] for result in commit_results]
230
+ orig_delta_write_path = write_paths[5]
231
+
232
+ # a new delta metafile revision written by a transaction that completed
233
+ # before seeing any concurrent conflicts
234
+ mri = MetafileRevisionInfo.parse(orig_delta_write_path)
235
+ mri.txn_id = "0000000000000_test-txn-id"
236
+ mri.txn_op_type = TransactionOperationType.UPDATE
237
+ mri.revision = mri.revision + 1
238
+ conflict_delta_write_path = mri.path
239
+ _, filesystem = resolve_path_and_filesystem(orig_delta_write_path)
240
+ with filesystem.open_output_stream(conflict_delta_write_path):
241
+ pass # Just create an empty conflicting metafile revision
242
+ txn_log_file_dir = os.path.join(
243
+ temp_dir,
244
+ TXN_DIR_NAME,
245
+ SUCCESS_TXN_DIR_NAME,
246
+ mri.txn_id,
247
+ )
248
+ filesystem.create_dir(txn_log_file_dir, recursive=True)
249
+ txn_log_file_path = os.path.join(
250
+ txn_log_file_dir,
251
+ str(time.time_ns()),
252
+ )
253
+ with filesystem.open_output_stream(txn_log_file_path):
254
+ pass # Just create an empty log to mark the txn as complete
255
+
256
+ # and a concurrent transaction that started before that transaction
257
+ # completed, writes the same delta metafile revision, then sees the
258
+ # conflict
259
+ past_timestamp = time.time_ns() - NANOS_PER_SEC
260
+ future_timestamp = 9999999999999
261
+ end_time_mock = mocker.patch(
262
+ "deltacat.storage.model.transaction.Transaction._parse_end_time",
263
+ )
264
+ end_time_mock.side_effect = (
265
+ lambda path: future_timestamp if mri.txn_id in path else past_timestamp
266
+ )
267
+ original_delta = Delta.read(orig_delta_write_path)
268
+ new_delta = Delta.update_for(original_delta)
269
+ txn_operations = [
270
+ TransactionOperation.of(
271
+ operation_type=TransactionOperationType.UPDATE,
272
+ dest_metafile=new_delta,
273
+ src_metafile=original_delta,
274
+ )
275
+ ]
276
+ transaction = Transaction.of(
277
+ txn_operations=txn_operations,
278
+ )
279
+ # expect the commit to fail due to a concurrent modification error
280
+ with pytest.raises(RuntimeError):
281
+ transaction.commit(temp_dir)
282
+
283
+ def test_txn_conflict_concurrent_incomplete(self, temp_dir):
284
+ commit_results = _commit_single_delta_table(temp_dir)
285
+ for expected, actual, _ in commit_results:
286
+ assert expected.equivalent_to(actual)
287
+
288
+ # given an initial metafile revision of a committed delta
289
+ write_paths = [result[2] for result in commit_results]
290
+ orig_delta_write_path = write_paths[5]
291
+
292
+ # and a new delta metafile revision written by an incomplete transaction
293
+ mri = MetafileRevisionInfo.parse(orig_delta_write_path)
294
+ mri.txn_id = "9999999999999_test-txn-id"
295
+ mri.txn_op_type = TransactionOperationType.DELETE
296
+ mri.revision = mri.revision + 1
297
+ conflict_delta_write_path = mri.path
298
+ _, filesystem = resolve_path_and_filesystem(orig_delta_write_path)
299
+ with filesystem.open_output_stream(conflict_delta_write_path):
300
+ pass # Just create an empty conflicting metafile revision
301
+
302
+ # when a concurrent transaction tries to update the same delta
303
+ original_delta = Delta.read(orig_delta_write_path)
304
+ new_delta = Delta.update_for(original_delta)
305
+ transaction = Transaction.of(
306
+ txn_operations=[
307
+ TransactionOperation.of(
308
+ operation_type=TransactionOperationType.UPDATE,
309
+ dest_metafile=new_delta,
310
+ src_metafile=original_delta,
311
+ )
312
+ ],
313
+ )
314
+ # expect the commit to fail due to a concurrent modification error
315
+ with pytest.raises(ConcurrentModificationError):
316
+ transaction.commit(temp_dir)
317
+ # expect a commit retry to also fail
318
+ with pytest.raises(ConcurrentModificationError):
319
+ transaction.commit(temp_dir)
320
+
321
+ def test_append_multiple_deltas(self, temp_dir):
322
+ commit_results = _commit_single_delta_table(temp_dir)
323
+ for expected, actual, _ in commit_results:
324
+ assert expected.equivalent_to(actual)
325
+ original_delta: Delta = commit_results[5][1]
326
+
327
+ # given a transaction containing several deltas to append
328
+ txn_operations = []
329
+
330
+ delta_append_count = 100
331
+ for i in range(delta_append_count):
332
+ new_delta = Delta.based_on(
333
+ original_delta,
334
+ new_id=str(int(original_delta.id) + i + 1),
335
+ )
336
+ txn_operations.append(
337
+ TransactionOperation.of(
338
+ operation_type=TransactionOperationType.CREATE,
339
+ dest_metafile=new_delta,
340
+ )
341
+ )
342
+ transaction = Transaction.of(
343
+ txn_operations=txn_operations,
344
+ )
345
+ # when the transaction is committed
346
+ write_paths, txn_log_path = transaction.commit(temp_dir)
347
+ # expect all new deltas to be successfully written
348
+ assert len(write_paths) == delta_append_count
349
+ for i in range(len(write_paths)):
350
+ actual_delta = Delta.read(write_paths[i])
351
+ assert txn_operations[i].dest_metafile.equivalent_to(actual_delta)
352
+
353
+ def test_bad_update_mismatched_metafile_types(self, temp_dir):
354
+ commit_results = _commit_single_delta_table(temp_dir)
355
+ for expected, actual, _ in commit_results:
356
+ assert expected.equivalent_to(actual)
357
+ original_partition: Partition = commit_results[4][1]
358
+ original_delta: Delta = commit_results[5][1]
359
+
360
+ # given an attempt to replace a delta with a partition
361
+ replacement_partition: Partition = Partition.based_on(
362
+ original_partition,
363
+ new_id=original_partition.id + "_2",
364
+ )
365
+ # expect the transaction operation initialization to raise a value error
366
+ with pytest.raises(ValueError):
367
+ TransactionOperation.of(
368
+ operation_type=TransactionOperationType.UPDATE,
369
+ dest_metafile=replacement_partition,
370
+ src_metafile=original_delta,
371
+ )
372
+
373
+ def test_delete_delta(self, temp_dir):
374
+ commit_results = _commit_single_delta_table(temp_dir)
375
+ for expected, actual, _ in commit_results:
376
+ assert expected.equivalent_to(actual)
377
+ original_delta: Delta = commit_results[5][1]
378
+
379
+ # given a transaction containing a delta to delete
380
+ txn_operations = [
381
+ TransactionOperation.of(
382
+ operation_type=TransactionOperationType.DELETE,
383
+ dest_metafile=original_delta,
384
+ )
385
+ ]
386
+ transaction = Transaction.of(
387
+ txn_operations=txn_operations,
388
+ )
389
+ # when the transaction is committed
390
+ write_paths, txn_log_path = transaction.commit(temp_dir)
391
+
392
+ # expect one new delete metafile to be written
393
+ assert len(write_paths) == 1
394
+ delete_write_path = write_paths[0]
395
+
396
+ # expect the delete metafile to contain the input txn op dest_metafile
397
+ assert TransactionOperationType.DELETE.value in delete_write_path
398
+ actual_delta = Delta.read(delete_write_path)
399
+ assert original_delta.equivalent_to(actual_delta)
400
+
401
+ # expect a subsequent replace of the deleted delta to fail
402
+ replacement_delta: Delta = Delta.based_on(
403
+ original_delta,
404
+ new_id=str(int(original_delta.id) + 1),
405
+ )
406
+ bad_txn_operations = [
407
+ TransactionOperation.of(
408
+ operation_type=TransactionOperationType.UPDATE,
409
+ dest_metafile=replacement_delta,
410
+ src_metafile=original_delta,
411
+ )
412
+ ]
413
+ transaction = Transaction.of(
414
+ txn_operations=bad_txn_operations,
415
+ )
416
+ with pytest.raises(ObjectDeletedError):
417
+ transaction.commit(temp_dir)
418
+
419
+ # expect subsequent deletes of the deleted delta to fail
420
+ bad_txn_operations = [
421
+ TransactionOperation.of(
422
+ operation_type=TransactionOperationType.DELETE,
423
+ dest_metafile=original_delta,
424
+ )
425
+ ]
426
+ transaction = Transaction.of(
427
+ txn_operations=bad_txn_operations,
428
+ )
429
+ with pytest.raises(ObjectDeletedError):
430
+ transaction.commit(temp_dir)
431
+
432
+ def test_replace_delta(self, temp_dir):
433
+ commit_results = _commit_single_delta_table(temp_dir)
434
+ for expected, actual, _ in commit_results:
435
+ assert expected.equivalent_to(actual)
436
+ original_delta: Delta = commit_results[5][1]
437
+
438
+ # given a transaction containing a delta replacement
439
+ replacement_delta: Delta = Delta.based_on(
440
+ original_delta,
441
+ new_id=str(int(original_delta.id) + 1),
442
+ )
443
+
444
+ # expect the proposed replacement delta to be assigned a new ID
445
+ assert replacement_delta.id != original_delta.id
446
+
447
+ txn_operations = [
448
+ TransactionOperation.of(
449
+ operation_type=TransactionOperationType.REPLACE,
450
+ dest_metafile=replacement_delta,
451
+ src_metafile=original_delta,
452
+ )
453
+ ]
454
+ transaction = Transaction.of(
455
+ txn_operations=txn_operations,
456
+ )
457
+ # when the transaction is committed
458
+ write_paths, txn_log_path = transaction.commit(temp_dir)
459
+
460
+ # expect two new metafiles to be written
461
+ # (i.e., delete old delta, create replacement delta)
462
+ assert len(write_paths) == 2
463
+ delete_write_path = write_paths[0]
464
+ create_write_path = write_paths[1]
465
+
466
+ # expect the replacement delta to be successfully written and read
467
+ assert TransactionOperationType.CREATE.value in create_write_path
468
+ actual_delta = Delta.read(create_write_path)
469
+ assert replacement_delta.equivalent_to(actual_delta)
470
+
471
+ # expect the delete metafile to also contain the replacement delta
472
+ assert TransactionOperationType.DELETE.value in delete_write_path
473
+ actual_delta = Delta.read(delete_write_path)
474
+ assert replacement_delta.equivalent_to(actual_delta)
475
+
476
+ # expect a subsequent replace of the original delta to fail
477
+ bad_txn_operations = [
478
+ TransactionOperation.of(
479
+ operation_type=TransactionOperationType.REPLACE,
480
+ dest_metafile=replacement_delta,
481
+ src_metafile=original_delta,
482
+ )
483
+ ]
484
+ transaction = Transaction.of(
485
+ txn_operations=bad_txn_operations,
486
+ )
487
+ with pytest.raises(ObjectDeletedError):
488
+ transaction.commit(temp_dir)
489
+
490
+ # expect deletes of the original delta to fail
491
+ bad_txn_operations = [
492
+ TransactionOperation.of(
493
+ operation_type=TransactionOperationType.DELETE,
494
+ dest_metafile=original_delta,
495
+ )
496
+ ]
497
+ transaction = Transaction.of(
498
+ txn_operations=bad_txn_operations,
499
+ )
500
+ with pytest.raises(ObjectDeletedError):
501
+ transaction.commit(temp_dir)
502
+
503
+ def test_delete_partition(self, temp_dir):
504
+ commit_results = _commit_single_delta_table(temp_dir)
505
+ for expected, actual, _ in commit_results:
506
+ assert expected.equivalent_to(actual)
507
+ original_partition: Partition = commit_results[4][1]
508
+
509
+ txn_operations = [
510
+ TransactionOperation.of(
511
+ operation_type=TransactionOperationType.DELETE,
512
+ dest_metafile=original_partition,
513
+ )
514
+ ]
515
+ transaction = Transaction.of(
516
+ txn_operations=txn_operations,
517
+ )
518
+ # when the transaction is committed
519
+ write_paths, txn_log_path = transaction.commit(temp_dir)
520
+
521
+ # expect 1 new partition metafile to be written
522
+ assert len(write_paths) == 1
523
+ delete_write_path = write_paths[0]
524
+
525
+ # expect the delete metafile to contain the input txn op dest_metafile
526
+ assert TransactionOperationType.DELETE.value in delete_write_path
527
+ actual_partition = Partition.read(delete_write_path)
528
+ assert original_partition.equivalent_to(actual_partition)
529
+
530
+ # expect child metafiles in the deleted partition to remain readable and unchanged
531
+ child_metafiles_read_post_delete = [
532
+ Delta.read(commit_results[5][2]),
533
+ ]
534
+ original_child_metafiles_to_create = [
535
+ Delta(commit_results[5][0]),
536
+ ]
537
+ original_child_metafiles_created = [
538
+ Delta(commit_results[5][1]),
539
+ ]
540
+ for i in range(len(original_child_metafiles_to_create)):
541
+ assert child_metafiles_read_post_delete[i].equivalent_to(
542
+ original_child_metafiles_to_create[i]
543
+ )
544
+ assert child_metafiles_read_post_delete[i].equivalent_to(
545
+ original_child_metafiles_created[i]
546
+ )
547
+
548
+ # expect a subsequent replace of the deleted partition to fail
549
+ replacement_partition: Partition = Partition.based_on(
550
+ original_partition,
551
+ new_id=original_partition.id + "_2",
552
+ )
553
+ bad_txn_operations = [
554
+ TransactionOperation.of(
555
+ operation_type=TransactionOperationType.REPLACE,
556
+ dest_metafile=replacement_partition,
557
+ src_metafile=original_partition,
558
+ )
559
+ ]
560
+ transaction = Transaction.of(
561
+ txn_operations=bad_txn_operations,
562
+ )
563
+ with pytest.raises(ObjectDeletedError):
564
+ transaction.commit(temp_dir)
565
+
566
+ # expect subsequent deletes of the deleted partition to fail
567
+ bad_txn_operations = [
568
+ TransactionOperation.of(
569
+ operation_type=TransactionOperationType.DELETE,
570
+ dest_metafile=original_partition,
571
+ )
572
+ ]
573
+ transaction = Transaction.of(
574
+ txn_operations=bad_txn_operations,
575
+ )
576
+ with pytest.raises(ObjectDeletedError):
577
+ transaction.commit(temp_dir)
578
+
579
+ # expect new child metafile creation under the deleted partition to fail
580
+ for metafile in original_child_metafiles_created:
581
+ bad_txn_operations = [
582
+ TransactionOperation.of(
583
+ operation_type=TransactionOperationType.CREATE,
584
+ dest_metafile=metafile,
585
+ )
586
+ ]
587
+ transaction = Transaction.of(
588
+ txn_operations=bad_txn_operations,
589
+ )
590
+ with pytest.raises(ObjectAlreadyExistsError):
591
+ transaction.commit(temp_dir)
592
+
593
+ def test_replace_partition(self, temp_dir):
594
+ commit_results = _commit_single_delta_table(temp_dir)
595
+ for expected, actual, _ in commit_results:
596
+ assert expected.equivalent_to(actual)
597
+ original_partition: Partition = commit_results[4][1]
598
+
599
+ # given a transaction containing a partition replacement
600
+ replacement_partition: Partition = Partition.based_on(
601
+ original_partition,
602
+ new_id=original_partition.id + "_2",
603
+ )
604
+
605
+ # expect the proposed replacement partition to be assigned a new ID
606
+ assert replacement_partition.id != original_partition.id
607
+
608
+ txn_operations = [
609
+ TransactionOperation.of(
610
+ operation_type=TransactionOperationType.REPLACE,
611
+ dest_metafile=replacement_partition,
612
+ src_metafile=original_partition,
613
+ )
614
+ ]
615
+ transaction = Transaction.of(
616
+ txn_operations=txn_operations,
617
+ )
618
+ # when the transaction is committed
619
+ write_paths, txn_log_path = transaction.commit(temp_dir)
620
+
621
+ # expect two new partition metafiles to be written
622
+ # (i.e., delete old partition, create replacement partition)
623
+ assert len(write_paths) == 2
624
+ delete_write_path = write_paths[0]
625
+ create_write_path = write_paths[1]
626
+
627
+ # expect the replacement partition to be successfully written and read
628
+ assert TransactionOperationType.CREATE.value in create_write_path
629
+ actual_partition = Partition.read(create_write_path)
630
+ assert replacement_partition.equivalent_to(actual_partition)
631
+
632
+ # expect the delete metafile to also contain the replacement partition
633
+ assert TransactionOperationType.DELETE.value in delete_write_path
634
+ actual_partition = Partition.read(delete_write_path)
635
+ assert replacement_partition.equivalent_to(actual_partition)
636
+
637
+ # expect old child metafiles for the replaced partition to remain readable
638
+ child_metafiles_read_post_replace = [
639
+ Delta.read(commit_results[5][2]),
640
+ ]
641
+ # expect old child metafiles read to share the same parent table name as
642
+ # the replacement partition, but have a different parent partition ID
643
+ for metafile in child_metafiles_read_post_replace:
644
+ assert (
645
+ metafile.table_name
646
+ == replacement_partition.table_name
647
+ == original_partition.table_name
648
+ )
649
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
650
+ parent_partition_id = ancestor_ids[4]
651
+ assert parent_partition_id == original_partition.id
652
+
653
+ # expect original child metafiles to share the original parent partition ID
654
+ original_child_metafiles_to_create = [
655
+ Delta(commit_results[5][0]),
656
+ ]
657
+ original_child_metafiles_created = [
658
+ Delta(commit_results[5][1]),
659
+ ]
660
+ for i in range(len(original_child_metafiles_to_create)):
661
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
662
+ parent_partition_id = ancestor_ids[4]
663
+ assert parent_partition_id == original_partition.id
664
+
665
+ # expect a subsequent replace of the original partition to fail
666
+ bad_txn_operations = [
667
+ TransactionOperation.of(
668
+ operation_type=TransactionOperationType.REPLACE,
669
+ dest_metafile=replacement_partition,
670
+ src_metafile=original_partition,
671
+ )
672
+ ]
673
+ transaction = Transaction.of(
674
+ txn_operations=bad_txn_operations,
675
+ )
676
+ with pytest.raises(ObjectDeletedError):
677
+ transaction.commit(temp_dir)
678
+
679
+ # expect deletes of the original partition to fail
680
+ bad_txn_operations = [
681
+ TransactionOperation.of(
682
+ operation_type=TransactionOperationType.DELETE,
683
+ dest_metafile=original_partition,
684
+ )
685
+ ]
686
+ transaction = Transaction.of(
687
+ txn_operations=bad_txn_operations,
688
+ )
689
+ with pytest.raises(ObjectDeletedError):
690
+ transaction.commit(temp_dir)
691
+
692
+ # expect new child metafile creation under the old partition to fail
693
+ for metafile in original_child_metafiles_created:
694
+ bad_txn_operations = [
695
+ TransactionOperation.of(
696
+ operation_type=TransactionOperationType.CREATE,
697
+ dest_metafile=metafile,
698
+ )
699
+ ]
700
+ transaction = Transaction.of(
701
+ txn_operations=bad_txn_operations,
702
+ )
703
+ with pytest.raises(ObjectAlreadyExistsError):
704
+ transaction.commit(temp_dir)
705
+
706
+ def test_delete_stream(self, temp_dir):
707
+ commit_results = _commit_single_delta_table(temp_dir)
708
+ for expected, actual, _ in commit_results:
709
+ assert expected.equivalent_to(actual)
710
+ original_stream: Stream = commit_results[3][1]
711
+
712
+ txn_operations = [
713
+ TransactionOperation.of(
714
+ operation_type=TransactionOperationType.DELETE,
715
+ dest_metafile=original_stream,
716
+ )
717
+ ]
718
+ transaction = Transaction.of(
719
+ txn_operations=txn_operations,
720
+ )
721
+ # when the transaction is committed
722
+ write_paths, txn_log_path = transaction.commit(temp_dir)
723
+
724
+ # expect 1 new stream metafile to be written
725
+ assert len(write_paths) == 1
726
+ delete_write_path = write_paths[0]
727
+
728
+ # expect the delete metafile to contain the input txn op dest_metafile
729
+ assert TransactionOperationType.DELETE.value in delete_write_path
730
+ actual_stream = Stream.read(delete_write_path)
731
+ assert original_stream == actual_stream
732
+
733
+ # expect child metafiles in the deleted stream to remain readable and unchanged
734
+ child_metafiles_read_post_delete = [
735
+ Delta.read(commit_results[5][2]),
736
+ Partition.read(commit_results[4][2]),
737
+ ]
738
+ original_child_metafiles_to_create = [
739
+ Delta(commit_results[5][0]),
740
+ Partition(commit_results[4][0]),
741
+ ]
742
+ original_child_metafiles_created = [
743
+ Delta(commit_results[5][1]),
744
+ Partition(commit_results[4][1]),
745
+ ]
746
+ for i in range(len(original_child_metafiles_to_create)):
747
+ assert child_metafiles_read_post_delete[i].equivalent_to(
748
+ original_child_metafiles_to_create[i]
749
+ )
750
+ assert child_metafiles_read_post_delete[i].equivalent_to(
751
+ original_child_metafiles_created[i]
752
+ )
753
+
754
+ # expect a subsequent replace of the deleted stream to fail
755
+ replacement_stream: Stream = Stream.based_on(
756
+ original_stream,
757
+ new_id=original_stream.id + "_2",
758
+ )
759
+ bad_txn_operations = [
760
+ TransactionOperation.of(
761
+ operation_type=TransactionOperationType.REPLACE,
762
+ dest_metafile=replacement_stream,
763
+ src_metafile=original_stream,
764
+ )
765
+ ]
766
+ transaction = Transaction.of(
767
+ txn_operations=bad_txn_operations,
768
+ )
769
+ with pytest.raises(ObjectDeletedError):
770
+ transaction.commit(temp_dir)
771
+
772
+ # expect subsequent deletes of the deleted stream to fail
773
+ bad_txn_operations = [
774
+ TransactionOperation.of(
775
+ operation_type=TransactionOperationType.DELETE,
776
+ dest_metafile=original_stream,
777
+ )
778
+ ]
779
+ transaction = Transaction.of(
780
+ txn_operations=bad_txn_operations,
781
+ )
782
+ with pytest.raises(ObjectDeletedError):
783
+ transaction.commit(temp_dir)
784
+
785
+ # expect new child metafile creation under the deleted stream to fail
786
+ for metafile in original_child_metafiles_created:
787
+ bad_txn_operations = [
788
+ TransactionOperation.of(
789
+ operation_type=TransactionOperationType.CREATE,
790
+ dest_metafile=metafile,
791
+ )
792
+ ]
793
+ transaction = Transaction.of(
794
+ txn_operations=bad_txn_operations,
795
+ )
796
+ with pytest.raises(ObjectAlreadyExistsError):
797
+ transaction.commit(temp_dir)
798
+
799
+ def test_replace_stream(self, temp_dir):
800
+ commit_results = _commit_single_delta_table(temp_dir)
801
+ for expected, actual, _ in commit_results:
802
+ assert expected.equivalent_to(actual)
803
+ original_stream: Stream = commit_results[3][1]
804
+
805
+ # given a transaction containing a stream replacement
806
+ replacement_stream: Stream = Stream.based_on(
807
+ original_stream,
808
+ new_id=original_stream.id + "_2",
809
+ )
810
+
811
+ # expect the proposed replacement stream to be assigned a new ID
812
+ assert replacement_stream.id != original_stream.id
813
+
814
+ txn_operations = [
815
+ TransactionOperation.of(
816
+ operation_type=TransactionOperationType.REPLACE,
817
+ dest_metafile=replacement_stream,
818
+ src_metafile=original_stream,
819
+ )
820
+ ]
821
+ transaction = Transaction.of(
822
+ txn_operations=txn_operations,
823
+ )
824
+ # when the transaction is committed
825
+ write_paths, txn_log_path = transaction.commit(temp_dir)
826
+
827
+ # expect two new stream metafiles to be written
828
+ # (i.e., delete old stream, create replacement stream)
829
+ assert len(write_paths) == 2
830
+ delete_write_path = write_paths[0]
831
+ create_write_path = write_paths[1]
832
+
833
+ # expect the replacement stream to be successfully written and read
834
+ assert TransactionOperationType.CREATE.value in create_write_path
835
+ actual_stream = Stream.read(create_write_path)
836
+ assert replacement_stream.equivalent_to(actual_stream)
837
+
838
+ # expect the delete metafile to also contain the replacement stream
839
+ assert TransactionOperationType.DELETE.value in delete_write_path
840
+ actual_stream = Stream.read(delete_write_path)
841
+ assert replacement_stream.equivalent_to(actual_stream)
842
+
843
+ # expect old child metafiles for the replaced stream to remain readable
844
+ child_metafiles_read_post_replace = [
845
+ Delta.read(commit_results[5][2]),
846
+ Partition.read(commit_results[4][2]),
847
+ ]
848
+ # expect old child metafiles read to share the same parent table name as
849
+ # the replacement stream, but have a different parent stream ID
850
+ for metafile in child_metafiles_read_post_replace:
851
+ assert (
852
+ metafile.table_name
853
+ == replacement_stream.table_name
854
+ == original_stream.table_name
855
+ )
856
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
857
+ parent_stream_id = ancestor_ids[3]
858
+ assert parent_stream_id == original_stream.id
859
+
860
+ # expect original child metafiles to share the original parent stream ID
861
+ original_child_metafiles_to_create = [
862
+ Delta(commit_results[5][0]),
863
+ Partition(commit_results[4][0]),
864
+ ]
865
+ original_child_metafiles_created = [
866
+ Delta(commit_results[5][1]),
867
+ Partition(commit_results[4][1]),
868
+ ]
869
+ for i in range(len(original_child_metafiles_to_create)):
870
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
871
+ parent_stream_id = ancestor_ids[3]
872
+ assert parent_stream_id == original_stream.id
873
+
874
+ # expect a subsequent replace of the original stream to fail
875
+ bad_txn_operations = [
876
+ TransactionOperation.of(
877
+ operation_type=TransactionOperationType.REPLACE,
878
+ dest_metafile=replacement_stream,
879
+ src_metafile=original_stream,
880
+ )
881
+ ]
882
+ transaction = Transaction.of(
883
+ txn_operations=bad_txn_operations,
884
+ )
885
+ with pytest.raises(ObjectDeletedError):
886
+ transaction.commit(temp_dir)
887
+
888
+ # expect deletes of the original stream to fail
889
+ bad_txn_operations = [
890
+ TransactionOperation.of(
891
+ operation_type=TransactionOperationType.DELETE,
892
+ dest_metafile=original_stream,
893
+ )
894
+ ]
895
+ transaction = Transaction.of(
896
+ txn_operations=bad_txn_operations,
897
+ )
898
+ with pytest.raises(ObjectDeletedError):
899
+ transaction.commit(temp_dir)
900
+
901
+ # expect new child metafile creation under the old stream to fail
902
+ for metafile in original_child_metafiles_created:
903
+ bad_txn_operations = [
904
+ TransactionOperation.of(
905
+ operation_type=TransactionOperationType.CREATE,
906
+ dest_metafile=metafile,
907
+ )
908
+ ]
909
+ transaction = Transaction.of(
910
+ txn_operations=bad_txn_operations,
911
+ )
912
+ with pytest.raises(ObjectAlreadyExistsError):
913
+ transaction.commit(temp_dir)
914
+
915
+ def test_delete_table_version(self, temp_dir):
916
+ commit_results = _commit_single_delta_table(temp_dir)
917
+ for expected, actual, _ in commit_results:
918
+ assert expected.equivalent_to(actual)
919
+ original_table_version: TableVersion = commit_results[2][1]
920
+
921
+ txn_operations = [
922
+ TransactionOperation.of(
923
+ operation_type=TransactionOperationType.DELETE,
924
+ dest_metafile=original_table_version,
925
+ )
926
+ ]
927
+ transaction = Transaction.of(
928
+ txn_operations=txn_operations,
929
+ )
930
+ # when the transaction is committed
931
+ write_paths, txn_log_path = transaction.commit(temp_dir)
932
+
933
+ # expect 1 new table version metafile to be written
934
+ assert len(write_paths) == 1
935
+ delete_write_path = write_paths[0]
936
+
937
+ # expect the delete metafile to contain the input txn op dest_metafile
938
+ assert TransactionOperationType.DELETE.value in delete_write_path
939
+ actual_table_version = TableVersion.read(delete_write_path)
940
+ assert original_table_version.equivalent_to(actual_table_version)
941
+
942
+ # expect child metafiles in the deleted table version to remain readable and unchanged
943
+ child_metafiles_read_post_delete = [
944
+ Delta.read(commit_results[5][2]),
945
+ Partition.read(commit_results[4][2]),
946
+ Stream.read(commit_results[3][2]),
947
+ ]
948
+ original_child_metafiles_to_create = [
949
+ Delta(commit_results[5][0]),
950
+ Partition(commit_results[4][0]),
951
+ Stream(commit_results[3][0]),
952
+ ]
953
+ original_child_metafiles_created = [
954
+ Delta(commit_results[5][1]),
955
+ Partition(commit_results[4][1]),
956
+ Stream(commit_results[3][1]),
957
+ ]
958
+ for i in range(len(original_child_metafiles_to_create)):
959
+ assert child_metafiles_read_post_delete[i].equivalent_to(
960
+ original_child_metafiles_to_create[i]
961
+ )
962
+ assert child_metafiles_read_post_delete[i].equivalent_to(
963
+ original_child_metafiles_created[i]
964
+ )
965
+
966
+ # expect a subsequent replace of the deleted table version to fail
967
+ replacement_table_version: TableVersion = TableVersion.based_on(
968
+ original_table_version,
969
+ new_id=original_table_version.id + "0",
970
+ )
971
+ bad_txn_operations = [
972
+ TransactionOperation.of(
973
+ operation_type=TransactionOperationType.REPLACE,
974
+ dest_metafile=replacement_table_version,
975
+ src_metafile=original_table_version,
976
+ )
977
+ ]
978
+ transaction = Transaction.of(
979
+ txn_operations=bad_txn_operations,
980
+ )
981
+ with pytest.raises(ObjectDeletedError):
982
+ transaction.commit(temp_dir)
983
+
984
+ # expect subsequent deletes of the deleted table version to fail
985
+ bad_txn_operations = [
986
+ TransactionOperation.of(
987
+ operation_type=TransactionOperationType.DELETE,
988
+ dest_metafile=original_table_version,
989
+ )
990
+ ]
991
+ transaction = Transaction.of(
992
+ txn_operations=bad_txn_operations,
993
+ )
994
+ with pytest.raises(ObjectDeletedError):
995
+ transaction.commit(temp_dir)
996
+
997
+ # expect new child metafile creation under the deleted table version to fail
998
+ for metafile in original_child_metafiles_created:
999
+ bad_txn_operations = [
1000
+ TransactionOperation.of(
1001
+ operation_type=TransactionOperationType.CREATE,
1002
+ dest_metafile=metafile,
1003
+ )
1004
+ ]
1005
+ transaction = Transaction.of(
1006
+ txn_operations=bad_txn_operations,
1007
+ )
1008
+ with pytest.raises(ObjectAlreadyExistsError):
1009
+ transaction.commit(temp_dir)
1010
+
1011
+ def test_replace_table_version(self, temp_dir):
1012
+ commit_results = _commit_single_delta_table(temp_dir)
1013
+ for expected, actual, _ in commit_results:
1014
+ assert expected.equivalent_to(actual)
1015
+ original_table_version: TableVersion = commit_results[2][1]
1016
+
1017
+ # given a transaction containing a table version replacement
1018
+ replacement_table_version: TableVersion = TableVersion.based_on(
1019
+ original_table_version,
1020
+ new_id=original_table_version.id + "0",
1021
+ )
1022
+
1023
+ # expect the proposed replacement table version to be assigned a new ID
1024
+ assert replacement_table_version.id != original_table_version.id
1025
+
1026
+ txn_operations = [
1027
+ TransactionOperation.of(
1028
+ operation_type=TransactionOperationType.REPLACE,
1029
+ dest_metafile=replacement_table_version,
1030
+ src_metafile=original_table_version,
1031
+ )
1032
+ ]
1033
+ transaction = Transaction.of(
1034
+ txn_operations=txn_operations,
1035
+ )
1036
+ # when the transaction is committed
1037
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1038
+
1039
+ # expect two new table version metafiles to be written
1040
+ # (i.e., delete old table version, create replacement table version)
1041
+ assert len(write_paths) == 2
1042
+ delete_write_path = write_paths[0]
1043
+ create_write_path = write_paths[1]
1044
+
1045
+ # expect the replacement table version to be successfully written and read
1046
+ assert TransactionOperationType.CREATE.value in create_write_path
1047
+ actual_table_version = TableVersion.read(create_write_path)
1048
+ assert replacement_table_version.equivalent_to(actual_table_version)
1049
+
1050
+ # expect the delete metafile to also contain the replacement table version
1051
+ assert TransactionOperationType.DELETE.value in delete_write_path
1052
+ actual_table_version = TableVersion.read(delete_write_path)
1053
+ assert replacement_table_version.equivalent_to(actual_table_version)
1054
+
1055
+ # expect old child metafiles for the replaced table version to remain readable
1056
+ child_metafiles_read_post_replace = [
1057
+ Delta.read(commit_results[5][2]),
1058
+ Partition.read(commit_results[4][2]),
1059
+ Stream.read(commit_results[3][2]),
1060
+ ]
1061
+ # expect old child metafiles read to share the same parent table name as
1062
+ # the replacement table version, but have a different parent table
1063
+ # version ID
1064
+ for metafile in child_metafiles_read_post_replace:
1065
+ assert (
1066
+ metafile.table_name
1067
+ == replacement_table_version.table_name
1068
+ == original_table_version.table_name
1069
+ )
1070
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1071
+ parent_table_version_id = ancestor_ids[2]
1072
+ assert parent_table_version_id == original_table_version.id
1073
+
1074
+ # expect original child metafiles to share the original parent table version ID
1075
+ original_child_metafiles_to_create = [
1076
+ Delta(commit_results[5][0]),
1077
+ Partition(commit_results[4][0]),
1078
+ Stream(commit_results[3][0]),
1079
+ ]
1080
+ original_child_metafiles_created = [
1081
+ Delta(commit_results[5][1]),
1082
+ Partition(commit_results[4][1]),
1083
+ Stream(commit_results[3][1]),
1084
+ ]
1085
+ for i in range(len(original_child_metafiles_to_create)):
1086
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1087
+ parent_table_version_id = ancestor_ids[2]
1088
+ assert parent_table_version_id == original_table_version.id
1089
+
1090
+ # expect a subsequent replace of the original table version to fail
1091
+ bad_txn_operations = [
1092
+ TransactionOperation.of(
1093
+ operation_type=TransactionOperationType.REPLACE,
1094
+ dest_metafile=replacement_table_version,
1095
+ src_metafile=original_table_version,
1096
+ )
1097
+ ]
1098
+ transaction = Transaction.of(
1099
+ txn_operations=bad_txn_operations,
1100
+ )
1101
+ with pytest.raises(ObjectDeletedError):
1102
+ transaction.commit(temp_dir)
1103
+
1104
+ # expect deletes of the original table version to fail
1105
+ bad_txn_operations = [
1106
+ TransactionOperation.of(
1107
+ operation_type=TransactionOperationType.DELETE,
1108
+ dest_metafile=original_table_version,
1109
+ )
1110
+ ]
1111
+ transaction = Transaction.of(
1112
+ txn_operations=bad_txn_operations,
1113
+ )
1114
+ with pytest.raises(ObjectDeletedError):
1115
+ transaction.commit(temp_dir)
1116
+
1117
+ # expect new child metafile creation under the old table version to fail
1118
+ for metafile in original_child_metafiles_created:
1119
+ bad_txn_operations = [
1120
+ TransactionOperation.of(
1121
+ operation_type=TransactionOperationType.CREATE,
1122
+ dest_metafile=metafile,
1123
+ )
1124
+ ]
1125
+ transaction = Transaction.of(
1126
+ txn_operations=bad_txn_operations,
1127
+ )
1128
+ with pytest.raises(ObjectAlreadyExistsError):
1129
+ transaction.commit(temp_dir)
1130
+
1131
+ def test_delete_table(self, temp_dir):
1132
+ commit_results = _commit_single_delta_table(temp_dir)
1133
+ for expected, actual, _ in commit_results:
1134
+ assert expected.equivalent_to(actual)
1135
+ original_table: Table = commit_results[1][1]
1136
+
1137
+ txn_operations = [
1138
+ TransactionOperation.of(
1139
+ operation_type=TransactionOperationType.DELETE,
1140
+ dest_metafile=original_table,
1141
+ )
1142
+ ]
1143
+ transaction = Transaction.of(
1144
+ txn_operations=txn_operations,
1145
+ )
1146
+ # when the transaction is committed
1147
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1148
+
1149
+ # expect 1 new table metafile to be written
1150
+ assert len(write_paths) == 1
1151
+ delete_write_path = write_paths[0]
1152
+
1153
+ # expect the delete metafile to contain the input txn op dest_metafile
1154
+ assert TransactionOperationType.DELETE.value in delete_write_path
1155
+ actual_table = Table.read(delete_write_path)
1156
+ assert original_table.equivalent_to(actual_table)
1157
+
1158
+ # expect child metafiles in the deleted table to remain readable and unchanged
1159
+ child_metafiles_read_post_delete = [
1160
+ Delta.read(commit_results[5][2]),
1161
+ Partition.read(commit_results[4][2]),
1162
+ Stream.read(commit_results[3][2]),
1163
+ TableVersion.read(commit_results[2][2]),
1164
+ ]
1165
+ original_child_metafiles_to_create = [
1166
+ Delta(commit_results[5][0]),
1167
+ Partition(commit_results[4][0]),
1168
+ Stream(commit_results[3][0]),
1169
+ TableVersion(commit_results[2][0]),
1170
+ ]
1171
+ original_child_metafiles_created = [
1172
+ Delta(commit_results[5][1]),
1173
+ Partition(commit_results[4][1]),
1174
+ Stream(commit_results[3][1]),
1175
+ TableVersion(commit_results[2][1]),
1176
+ ]
1177
+ for i in range(len(original_child_metafiles_to_create)):
1178
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1179
+ original_child_metafiles_to_create[i]
1180
+ )
1181
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1182
+ original_child_metafiles_created[i]
1183
+ )
1184
+
1185
+ # expect a subsequent replace of the deleted table to fail
1186
+ replacement_table: Table = Table.based_on(original_table)
1187
+ bad_txn_operations = [
1188
+ TransactionOperation.of(
1189
+ operation_type=TransactionOperationType.REPLACE,
1190
+ dest_metafile=replacement_table,
1191
+ src_metafile=original_table,
1192
+ )
1193
+ ]
1194
+ transaction = Transaction.of(
1195
+ txn_operations=bad_txn_operations,
1196
+ )
1197
+ with pytest.raises(ObjectDeletedError):
1198
+ transaction.commit(temp_dir)
1199
+
1200
+ # expect subsequent deletes of the deleted table to fail
1201
+ bad_txn_operations = [
1202
+ TransactionOperation.of(
1203
+ operation_type=TransactionOperationType.DELETE,
1204
+ dest_metafile=original_table,
1205
+ )
1206
+ ]
1207
+ transaction = Transaction.of(
1208
+ txn_operations=bad_txn_operations,
1209
+ )
1210
+ with pytest.raises(ObjectDeletedError):
1211
+ transaction.commit(temp_dir)
1212
+
1213
+ # expect new child metafile creation under the deleted table to fail
1214
+ for metafile in original_child_metafiles_created:
1215
+ bad_txn_operations = [
1216
+ TransactionOperation.of(
1217
+ operation_type=TransactionOperationType.CREATE,
1218
+ dest_metafile=metafile,
1219
+ )
1220
+ ]
1221
+ transaction = Transaction.of(
1222
+ txn_operations=bad_txn_operations,
1223
+ )
1224
+ with pytest.raises(ObjectAlreadyExistsError):
1225
+ transaction.commit(temp_dir)
1226
+
1227
+ def test_replace_table(self, temp_dir):
1228
+ commit_results = _commit_single_delta_table(temp_dir)
1229
+ for expected, actual, _ in commit_results:
1230
+ assert expected.equivalent_to(actual)
1231
+ original_table: Table = commit_results[1][1]
1232
+
1233
+ # given a transaction containing a table replacement
1234
+ replacement_table: Table = Table.based_on(original_table)
1235
+
1236
+ # expect the proposed replacement table to be assigned a new ID, but
1237
+ # continue to have the same name as the original table
1238
+ assert replacement_table.id != original_table.id
1239
+ assert replacement_table.table_name == original_table.table_name
1240
+
1241
+ txn_operations = [
1242
+ TransactionOperation.of(
1243
+ operation_type=TransactionOperationType.REPLACE,
1244
+ dest_metafile=replacement_table,
1245
+ src_metafile=original_table,
1246
+ )
1247
+ ]
1248
+ transaction = Transaction.of(
1249
+ txn_operations=txn_operations,
1250
+ )
1251
+ # when the transaction is committed
1252
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1253
+
1254
+ # expect two new table metafiles to be written
1255
+ # (i.e., delete old table, create replacement table)
1256
+ assert len(write_paths) == 2
1257
+ delete_write_path = write_paths[0]
1258
+ create_write_path = write_paths[1]
1259
+
1260
+ # expect the replacement table to be successfully written and read
1261
+ assert TransactionOperationType.CREATE.value in create_write_path
1262
+ actual_table = Table.read(create_write_path)
1263
+ assert replacement_table.equivalent_to(actual_table)
1264
+
1265
+ # expect the delete metafile to also contain the replacement table
1266
+ assert TransactionOperationType.DELETE.value in delete_write_path
1267
+ actual_table = Table.read(delete_write_path)
1268
+ assert replacement_table.equivalent_to(actual_table)
1269
+
1270
+ # expect old child metafiles for the replaced table to remain readable
1271
+ child_metafiles_read_post_replace = [
1272
+ Delta.read(commit_results[5][2]),
1273
+ Partition.read(commit_results[4][2]),
1274
+ Stream.read(commit_results[3][2]),
1275
+ TableVersion.read(commit_results[2][2]),
1276
+ ]
1277
+ # expect old child metafiles read to share the same parent table name as
1278
+ # the replacement table, but have a different parent table ID
1279
+ for metafile in child_metafiles_read_post_replace:
1280
+ assert (
1281
+ metafile.table_name
1282
+ == replacement_table.table_name
1283
+ == original_table.table_name
1284
+ )
1285
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1286
+ parent_table_id = ancestor_ids[1]
1287
+ assert parent_table_id == original_table.id
1288
+
1289
+ # expect original child metafiles to share the original parent table ID
1290
+ original_child_metafiles_to_create = [
1291
+ Delta(commit_results[5][0]),
1292
+ Partition(commit_results[4][0]),
1293
+ Stream(commit_results[3][0]),
1294
+ TableVersion(commit_results[2][0]),
1295
+ ]
1296
+ original_child_metafiles_created = [
1297
+ Delta(commit_results[5][1]),
1298
+ Partition(commit_results[4][1]),
1299
+ Stream(commit_results[3][1]),
1300
+ TableVersion(commit_results[2][1]),
1301
+ ]
1302
+ for i in range(len(original_child_metafiles_to_create)):
1303
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1304
+ parent_table_id = ancestor_ids[1]
1305
+ assert parent_table_id == original_table.id
1306
+
1307
+ # expect a subsequent table replace of the original table to fail
1308
+ bad_txn_operations = [
1309
+ TransactionOperation.of(
1310
+ operation_type=TransactionOperationType.REPLACE,
1311
+ dest_metafile=replacement_table,
1312
+ src_metafile=original_table,
1313
+ )
1314
+ ]
1315
+ transaction = Transaction.of(
1316
+ txn_operations=bad_txn_operations,
1317
+ )
1318
+ with pytest.raises(ObjectDeletedError):
1319
+ transaction.commit(temp_dir)
1320
+
1321
+ # expect table deletes of the original table to fail
1322
+ bad_txn_operations = [
1323
+ TransactionOperation.of(
1324
+ operation_type=TransactionOperationType.DELETE,
1325
+ dest_metafile=original_table,
1326
+ )
1327
+ ]
1328
+ transaction = Transaction.of(
1329
+ txn_operations=bad_txn_operations,
1330
+ )
1331
+ with pytest.raises(ObjectDeletedError):
1332
+ transaction.commit(temp_dir)
1333
+
1334
+ # expect new child metafile creation under the old table to fail
1335
+ for metafile in original_child_metafiles_created:
1336
+ bad_txn_operations = [
1337
+ TransactionOperation.of(
1338
+ operation_type=TransactionOperationType.CREATE,
1339
+ dest_metafile=metafile,
1340
+ )
1341
+ ]
1342
+ transaction = Transaction.of(
1343
+ txn_operations=bad_txn_operations,
1344
+ )
1345
+ with pytest.raises(ObjectAlreadyExistsError):
1346
+ transaction.commit(temp_dir)
1347
+
1348
+ def test_delete_namespace(self, temp_dir):
1349
+ commit_results = _commit_single_delta_table(temp_dir)
1350
+ for expected, actual, _ in commit_results:
1351
+ assert expected.equivalent_to(actual)
1352
+ original_namespace: Namespace = commit_results[0][1]
1353
+
1354
+ txn_operations = [
1355
+ TransactionOperation.of(
1356
+ operation_type=TransactionOperationType.DELETE,
1357
+ dest_metafile=original_namespace,
1358
+ )
1359
+ ]
1360
+ transaction = Transaction.of(
1361
+ txn_operations=txn_operations,
1362
+ )
1363
+ # when the transaction is committed
1364
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1365
+
1366
+ # expect 1 new namespace metafile to be written
1367
+ assert len(write_paths) == 1
1368
+ delete_write_path = write_paths[0]
1369
+
1370
+ # expect the delete metafile to contain the input txn op dest_metafile
1371
+ assert TransactionOperationType.DELETE.value in delete_write_path
1372
+ actual_namespace = Namespace.read(delete_write_path)
1373
+ assert original_namespace.equivalent_to(actual_namespace)
1374
+
1375
+ # expect child metafiles in the deleted namespace to remain readable and unchanged
1376
+ child_metafiles_read_post_delete = [
1377
+ Delta.read(commit_results[5][2]),
1378
+ Partition.read(commit_results[4][2]),
1379
+ Stream.read(commit_results[3][2]),
1380
+ TableVersion.read(commit_results[2][2]),
1381
+ Table.read(commit_results[1][2]),
1382
+ ]
1383
+ original_child_metafiles_to_create = [
1384
+ Delta(commit_results[5][0]),
1385
+ Partition(commit_results[4][0]),
1386
+ Stream(commit_results[3][0]),
1387
+ TableVersion(commit_results[2][0]),
1388
+ Table(commit_results[1][0]),
1389
+ ]
1390
+ original_child_metafiles_created = [
1391
+ Delta(commit_results[5][1]),
1392
+ Partition(commit_results[4][1]),
1393
+ Stream(commit_results[3][1]),
1394
+ TableVersion(commit_results[2][1]),
1395
+ Table(commit_results[1][1]),
1396
+ ]
1397
+ for i in range(len(original_child_metafiles_to_create)):
1398
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1399
+ original_child_metafiles_to_create[i]
1400
+ )
1401
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1402
+ original_child_metafiles_created[i]
1403
+ )
1404
+
1405
+ # expect a subsequent replace of the deleted namespace to fail
1406
+ replacement_namespace: Namespace = Namespace.based_on(original_namespace)
1407
+ bad_txn_operations = [
1408
+ TransactionOperation.of(
1409
+ operation_type=TransactionOperationType.REPLACE,
1410
+ dest_metafile=replacement_namespace,
1411
+ src_metafile=original_namespace,
1412
+ )
1413
+ ]
1414
+ transaction = Transaction.of(
1415
+ txn_operations=bad_txn_operations,
1416
+ )
1417
+ with pytest.raises(ObjectDeletedError):
1418
+ transaction.commit(temp_dir)
1419
+
1420
+ # expect subsequent deletes of the deleted namespace to fail
1421
+ bad_txn_operations = [
1422
+ TransactionOperation.of(
1423
+ operation_type=TransactionOperationType.DELETE,
1424
+ dest_metafile=original_namespace,
1425
+ )
1426
+ ]
1427
+ transaction = Transaction.of(
1428
+ txn_operations=bad_txn_operations,
1429
+ )
1430
+ with pytest.raises(ObjectDeletedError):
1431
+ transaction.commit(temp_dir)
1432
+
1433
+ # expect new child metafile creation under the deleted namespace to fail
1434
+ for metafile in original_child_metafiles_created:
1435
+ bad_txn_operations = [
1436
+ TransactionOperation.of(
1437
+ operation_type=TransactionOperationType.CREATE,
1438
+ dest_metafile=metafile,
1439
+ )
1440
+ ]
1441
+ transaction = Transaction.of(
1442
+ txn_operations=bad_txn_operations,
1443
+ )
1444
+ with pytest.raises(ObjectAlreadyExistsError):
1445
+ transaction.commit(temp_dir)
1446
+
1447
+ def test_replace_namespace(self, temp_dir):
1448
+ commit_results = _commit_single_delta_table(temp_dir)
1449
+ for expected, actual, _ in commit_results:
1450
+ assert expected.equivalent_to(actual)
1451
+ original_namespace: Namespace = commit_results[0][1]
1452
+
1453
+ # given a transaction containing a namespace replacement
1454
+ replacement_namespace: Namespace = Namespace.based_on(original_namespace)
1455
+
1456
+ # expect the proposed replacement namespace to be assigned a new ID, but
1457
+ # continue to have the same name as the original namespace
1458
+ assert replacement_namespace.id != original_namespace.id
1459
+ assert replacement_namespace.namespace == original_namespace.namespace
1460
+
1461
+ txn_operations = [
1462
+ TransactionOperation.of(
1463
+ operation_type=TransactionOperationType.REPLACE,
1464
+ dest_metafile=replacement_namespace,
1465
+ src_metafile=original_namespace,
1466
+ )
1467
+ ]
1468
+ transaction = Transaction.of(
1469
+ txn_operations=txn_operations,
1470
+ )
1471
+ # when the transaction is committed
1472
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1473
+
1474
+ # expect two new namespace metafiles to be written
1475
+ # (i.e., delete old namespace, create replacement namespace)
1476
+ assert len(write_paths) == 2
1477
+ delete_write_path = write_paths[0]
1478
+ create_write_path = write_paths[1]
1479
+
1480
+ # expect the replacement namespace to be successfully written and read
1481
+ assert TransactionOperationType.CREATE.value in create_write_path
1482
+ actual_namespace = Namespace.read(create_write_path)
1483
+ assert replacement_namespace.equivalent_to(actual_namespace)
1484
+
1485
+ # expect the delete metafile to also contain the replacement namespace
1486
+ assert TransactionOperationType.DELETE.value in delete_write_path
1487
+ actual_namespace = Namespace.read(delete_write_path)
1488
+ assert replacement_namespace.equivalent_to(actual_namespace)
1489
+
1490
+ # expect old child metafiles for the replaced namespace to remain readable
1491
+ child_metafiles_read_post_replace = [
1492
+ Delta.read(commit_results[5][2]),
1493
+ Partition.read(commit_results[4][2]),
1494
+ Stream.read(commit_results[3][2]),
1495
+ TableVersion.read(commit_results[2][2]),
1496
+ Table.read(commit_results[1][2]),
1497
+ ]
1498
+ # expect old child metafiles read to share the same parent namespace name as
1499
+ # the replacement namespace, but have a different parent namespace ID
1500
+ for metafile in child_metafiles_read_post_replace:
1501
+ assert (
1502
+ metafile.namespace
1503
+ == replacement_namespace.namespace
1504
+ == original_namespace.namespace
1505
+ )
1506
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1507
+ parent_namespace_id = ancestor_ids[0]
1508
+ assert parent_namespace_id == original_namespace.id
1509
+
1510
+ # expect original child metafiles to share the original parent namespace ID
1511
+ original_child_metafiles_to_create = [
1512
+ Delta(commit_results[5][0]),
1513
+ Partition(commit_results[4][0]),
1514
+ Stream(commit_results[3][0]),
1515
+ TableVersion(commit_results[2][0]),
1516
+ Table(commit_results[1][0]),
1517
+ ]
1518
+ original_child_metafiles_created = [
1519
+ Delta(commit_results[5][1]),
1520
+ Partition(commit_results[4][1]),
1521
+ Stream(commit_results[3][1]),
1522
+ TableVersion(commit_results[2][1]),
1523
+ Table(commit_results[1][1]),
1524
+ ]
1525
+ for i in range(len(original_child_metafiles_to_create)):
1526
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1527
+ parent_namespace_id = ancestor_ids[0]
1528
+ assert parent_namespace_id == original_namespace.id
1529
+
1530
+ # expect a subsequent namespace replace of the original namespace to fail
1531
+ bad_txn_operations = [
1532
+ TransactionOperation.of(
1533
+ operation_type=TransactionOperationType.REPLACE,
1534
+ dest_metafile=replacement_namespace,
1535
+ src_metafile=original_namespace,
1536
+ )
1537
+ ]
1538
+ transaction = Transaction.of(
1539
+ txn_operations=bad_txn_operations,
1540
+ )
1541
+ with pytest.raises(ObjectDeletedError):
1542
+ transaction.commit(temp_dir)
1543
+
1544
+ # expect namespace deletes of the original namespace to fail
1545
+ bad_txn_operations = [
1546
+ TransactionOperation.of(
1547
+ operation_type=TransactionOperationType.DELETE,
1548
+ dest_metafile=original_namespace,
1549
+ )
1550
+ ]
1551
+ transaction = Transaction.of(
1552
+ txn_operations=bad_txn_operations,
1553
+ )
1554
+ with pytest.raises(ObjectDeletedError):
1555
+ transaction.commit(temp_dir)
1556
+
1557
+ # expect new child metafile creation under the old namespace to fail
1558
+ for metafile in original_child_metafiles_created:
1559
+ bad_txn_operations = [
1560
+ TransactionOperation.of(
1561
+ operation_type=TransactionOperationType.CREATE,
1562
+ dest_metafile=metafile,
1563
+ )
1564
+ ]
1565
+ transaction = Transaction.of(
1566
+ txn_operations=bad_txn_operations,
1567
+ )
1568
+ with pytest.raises(ObjectAlreadyExistsError):
1569
+ transaction.commit(temp_dir)
1570
+
1571
+ def test_create_stream_bad_order_txn_op_chaining(self, temp_dir):
1572
+ commit_results = _commit_single_delta_table(temp_dir)
1573
+ for expected, actual, _ in commit_results:
1574
+ assert expected.equivalent_to(actual)
1575
+ # given a transaction containing:
1576
+
1577
+ # 1. a new table version in an existing table
1578
+ original_table_version_created = TableVersion(commit_results[2][1])
1579
+ new_table_version: TableVersion = TableVersion.based_on(
1580
+ other=original_table_version_created,
1581
+ new_id=original_table_version_created.id + "0",
1582
+ )
1583
+ # 2. a new stream in the new table version
1584
+ original_stream_created = Stream(commit_results[3][1])
1585
+ new_stream: Stream = Stream.based_on(
1586
+ other=original_stream_created,
1587
+ new_id="test_stream_id",
1588
+ )
1589
+ new_stream.table_version_locator.table_version = new_table_version.table_version
1590
+
1591
+ # 3. ordered transaction operations that try to put the new stream
1592
+ # in the new table version before it is created
1593
+ txn_operations = [
1594
+ TransactionOperation.of(
1595
+ TransactionOperationType.CREATE,
1596
+ new_stream,
1597
+ ),
1598
+ TransactionOperation.of(
1599
+ TransactionOperationType.CREATE,
1600
+ new_table_version,
1601
+ ),
1602
+ ]
1603
+ transaction = Transaction.of(
1604
+ txn_operations=txn_operations,
1605
+ )
1606
+ # when the transaction is committed,
1607
+ # expect stream creation to fail
1608
+ with pytest.raises(ObjectNotFoundError):
1609
+ transaction.commit(temp_dir)
1610
+ # when a transaction with the operations reversed is committed,
1611
+ transaction = Transaction.of(
1612
+ txn_operations=list(reversed(txn_operations)),
1613
+ )
1614
+ # expect table version and stream creation to succeed
1615
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1616
+ assert len(write_paths) == 2
1617
+
1618
+ def test_table_rename_bad_order_txn_op_chaining(self, temp_dir):
1619
+ commit_results = _commit_single_delta_table(temp_dir)
1620
+ for expected, actual, _ in commit_results:
1621
+ assert expected.equivalent_to(actual)
1622
+ original_table: Table = commit_results[1][1]
1623
+ # given a transaction containing:
1624
+ # 1. a table rename
1625
+ renamed_table: Table = Table.update_for(original_table)
1626
+ renamed_table.locator = TableLocator.at(
1627
+ namespace="test_namespace",
1628
+ table_name="test_table_renamed",
1629
+ )
1630
+ # 2. a new table version in a renamed table
1631
+ original_table_version_created = TableVersion(commit_results[2][1])
1632
+ new_table_version_to_create: TableVersion = TableVersion.based_on(
1633
+ other=original_table_version_created,
1634
+ new_id=original_table_version_created.id + "0",
1635
+ )
1636
+ new_table_version_to_create.table_locator.table_name = renamed_table.table_name
1637
+ # 3. ordered transaction operations that try to put the new table
1638
+ # version in the renamed table before the table is renamed
1639
+ txn_operations = [
1640
+ TransactionOperation.of(
1641
+ TransactionOperationType.CREATE,
1642
+ new_table_version_to_create,
1643
+ ),
1644
+ TransactionOperation.of(
1645
+ TransactionOperationType.UPDATE,
1646
+ renamed_table,
1647
+ original_table,
1648
+ ),
1649
+ ]
1650
+ transaction = Transaction.of(
1651
+ txn_operations=txn_operations,
1652
+ )
1653
+ # when the transaction is committed,
1654
+ # expect the transaction to fail due to incorrect operation order
1655
+ with pytest.raises(ObjectNotFoundError):
1656
+ transaction.commit(temp_dir)
1657
+ # when a transaction with the operations reversed is committed,
1658
+ transaction = Transaction.of(
1659
+ txn_operations=list(reversed(txn_operations)),
1660
+ )
1661
+ # expect table and table version creation to succeed
1662
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1663
+ assert len(write_paths) == 2
1664
+
1665
+ def test_create_duplicate_namespace(self, temp_dir):
1666
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
1667
+ namespace = Namespace.of(locator=namespace_locator)
1668
+ # given serial transaction that try to create two namespaces with
1669
+ # the same name
1670
+ transaction = Transaction.of(
1671
+ txn_operations=[
1672
+ TransactionOperation.of(
1673
+ TransactionOperationType.CREATE,
1674
+ namespace,
1675
+ ),
1676
+ ],
1677
+ )
1678
+ # expect the first transaction to be successfully committed
1679
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1680
+ deserialized_namespace = Namespace.read(write_paths.pop())
1681
+ assert namespace.equivalent_to(deserialized_namespace)
1682
+ # but expect the second transaction to fail
1683
+ with pytest.raises(ObjectAlreadyExistsError):
1684
+ transaction.commit(temp_dir)
1685
+
1686
+ def test_create_duplicate_namespace_txn_op_chaining(self, temp_dir):
1687
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
1688
+ namespace = Namespace.of(locator=namespace_locator)
1689
+ # given a transaction that tries to create the same namespace twice
1690
+ transaction = Transaction.of(
1691
+ txn_operations=[
1692
+ TransactionOperation.of(
1693
+ TransactionOperationType.CREATE,
1694
+ namespace,
1695
+ ),
1696
+ TransactionOperation.of(
1697
+ TransactionOperationType.CREATE,
1698
+ namespace,
1699
+ ),
1700
+ ],
1701
+ )
1702
+ # when the transaction is committed,
1703
+ # expect duplicate namespace creation to fail
1704
+ with pytest.raises(ObjectAlreadyExistsError):
1705
+ transaction.commit(temp_dir)
1706
+
1707
+ def test_create_stream_in_missing_table_version(self, temp_dir):
1708
+ commit_results = _commit_single_delta_table(temp_dir)
1709
+ for expected, actual, _ in commit_results:
1710
+ assert expected.equivalent_to(actual)
1711
+ # given a transaction that tries to create a single stream
1712
+ # in a table version that doesn't exist
1713
+ original_stream_created = Stream(commit_results[3][1])
1714
+ new_stream: Stream = Stream.based_on(
1715
+ other=original_stream_created,
1716
+ new_id="test_stream_id",
1717
+ )
1718
+ new_stream.table_version_locator.table_version = "missing_table_version.0"
1719
+ transaction = Transaction.of(
1720
+ txn_operations=[
1721
+ TransactionOperation.of(
1722
+ TransactionOperationType.CREATE,
1723
+ new_stream,
1724
+ )
1725
+ ],
1726
+ )
1727
+ # when the transaction is committed,
1728
+ # expect stream creation to fail
1729
+ with pytest.raises(ObjectNotFoundError):
1730
+ transaction.commit(temp_dir)
1731
+
1732
+ def test_create_table_version_in_missing_namespace(self, temp_dir):
1733
+ commit_results = _commit_single_delta_table(temp_dir)
1734
+ for expected, actual, _ in commit_results:
1735
+ assert expected.equivalent_to(actual)
1736
+ # given a transaction that tries to create a single table version
1737
+ # in a namespace that doesn't exist
1738
+ original_table_version_created = TableVersion(commit_results[2][1])
1739
+ new_table_version: TableVersion = TableVersion.based_on(
1740
+ other=original_table_version_created,
1741
+ new_id="test_table_version.1",
1742
+ )
1743
+ new_table_version.namespace_locator.namespace = "missing_namespace"
1744
+ transaction = Transaction.of(
1745
+ txn_operations=[
1746
+ TransactionOperation.of(
1747
+ TransactionOperationType.CREATE,
1748
+ new_table_version,
1749
+ )
1750
+ ],
1751
+ )
1752
+ # when the transaction is committed,
1753
+ # expect table version creation to fail
1754
+ with pytest.raises(ObjectNotFoundError):
1755
+ transaction.commit(temp_dir)
1756
+
1757
+ def test_create_table_version_in_missing_table(self, temp_dir):
1758
+ commit_results = _commit_single_delta_table(temp_dir)
1759
+ for expected, actual, _ in commit_results:
1760
+ assert expected.equivalent_to(actual)
1761
+ # given a transaction that tries to create a single table version
1762
+ # in a table that doesn't exist
1763
+ original_table_version_created = TableVersion(commit_results[2][1])
1764
+ new_table_version: TableVersion = TableVersion.based_on(
1765
+ other=original_table_version_created,
1766
+ new_id="test_table_version.1",
1767
+ )
1768
+ new_table_version.table_locator.table_name = "missing_table"
1769
+ transaction = Transaction.of(
1770
+ txn_operations=[
1771
+ TransactionOperation.of(
1772
+ TransactionOperationType.CREATE,
1773
+ new_table_version,
1774
+ )
1775
+ ],
1776
+ )
1777
+ # when the transaction is committed,
1778
+ # expect table version creation to fail
1779
+ with pytest.raises(ObjectNotFoundError):
1780
+ transaction.commit(temp_dir)
1781
+
1782
+ def test_create_table_in_missing_namespace(self, temp_dir):
1783
+ table_locator = TableLocator.at(
1784
+ namespace="missing_namespace",
1785
+ table_name="test_table",
1786
+ )
1787
+ table = Table.of(
1788
+ locator=table_locator,
1789
+ description="test table description",
1790
+ )
1791
+ # given a transaction that tries to create a single table in a
1792
+ # namespace that doesn't exist
1793
+ transaction = Transaction.of(
1794
+ txn_operations=[
1795
+ TransactionOperation.of(
1796
+ TransactionOperationType.CREATE,
1797
+ table,
1798
+ )
1799
+ ],
1800
+ )
1801
+ # when the transaction is committed,
1802
+ # expect table creation to fail
1803
+ with pytest.raises(ObjectNotFoundError):
1804
+ transaction.commit(temp_dir)
1805
+
1806
+ def test_rename_table_txn_op_chaining(self, temp_dir):
1807
+ commit_results = _commit_single_delta_table(temp_dir)
1808
+ for expected, actual, _ in commit_results:
1809
+ assert expected.equivalent_to(actual)
1810
+ original_table: Table = commit_results[1][1]
1811
+ # given a transaction containing:
1812
+ # 1. a table rename
1813
+ renamed_table: Table = Table.update_for(original_table)
1814
+ renamed_table.locator = TableLocator.at(
1815
+ namespace="test_namespace",
1816
+ table_name="test_table_renamed",
1817
+ )
1818
+ original_delta_created = Delta(commit_results[5][1])
1819
+ original_partition_created = Partition(commit_results[4][1])
1820
+ original_stream_created = Stream(commit_results[3][1])
1821
+ original_table_version_created = TableVersion(commit_results[2][1])
1822
+ # 2. a new table version in the renamed table
1823
+ new_table_version_to_create: TableVersion = TableVersion.based_on(
1824
+ other=original_table_version_created,
1825
+ new_id=original_table_version_created.table_version + "0",
1826
+ )
1827
+ new_table_version_to_create.table_locator.table_name = renamed_table.table_name
1828
+ # 3. a new stream in the new table version in the renamed table
1829
+ new_stream_to_create: Stream = Stream.based_on(
1830
+ other=original_stream_created,
1831
+ new_id=original_stream_created.stream_id + "_2",
1832
+ )
1833
+ new_stream_to_create.locator.table_version_locator = (
1834
+ new_table_version_to_create.locator
1835
+ )
1836
+ # 4. a new partition in the new stream in the new table version
1837
+ # in the renamed table
1838
+ new_partition_to_create: Partition = Partition.based_on(
1839
+ other=original_partition_created,
1840
+ new_id=original_partition_created.partition_id + "_2",
1841
+ )
1842
+ new_partition_to_create.locator.stream_locator = new_stream_to_create.locator
1843
+ # 5. a new delta in the new partition in the new stream in the new
1844
+ # table version in the renamed table
1845
+ new_delta_to_create = Delta.based_on(
1846
+ other=original_delta_created,
1847
+ new_id="2",
1848
+ )
1849
+ new_delta_to_create.locator.partition_locator = new_partition_to_create.locator
1850
+ # 6. ordered transaction operations that ensure all prior
1851
+ # dependencies are satisfied
1852
+ txn_operations = [
1853
+ TransactionOperation.of(
1854
+ operation_type=TransactionOperationType.UPDATE,
1855
+ dest_metafile=renamed_table,
1856
+ src_metafile=original_table,
1857
+ ),
1858
+ TransactionOperation.of(
1859
+ operation_type=TransactionOperationType.CREATE,
1860
+ dest_metafile=new_table_version_to_create,
1861
+ ),
1862
+ TransactionOperation.of(
1863
+ operation_type=TransactionOperationType.CREATE,
1864
+ dest_metafile=new_stream_to_create,
1865
+ ),
1866
+ TransactionOperation.of(
1867
+ operation_type=TransactionOperationType.CREATE,
1868
+ dest_metafile=new_partition_to_create,
1869
+ ),
1870
+ TransactionOperation.of(
1871
+ operation_type=TransactionOperationType.CREATE,
1872
+ dest_metafile=new_delta_to_create,
1873
+ ),
1874
+ ]
1875
+ transaction = Transaction.of(
1876
+ txn_operations=txn_operations,
1877
+ )
1878
+ # when the transaction is committed
1879
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1880
+
1881
+ # expect the transaction to successfully create 5 new metafiles
1882
+ assert len(write_paths) == 5
1883
+
1884
+ # expect the table to be successfully renamed
1885
+ actual_table = Table.read(write_paths[0])
1886
+ assert renamed_table.equivalent_to(actual_table)
1887
+
1888
+ # expect the new table version in the renamed table to be
1889
+ # successfully created
1890
+ actual_table_version = TableVersion.read(write_paths[1])
1891
+ assert new_table_version_to_create.equivalent_to(actual_table_version)
1892
+
1893
+ # expect the new stream in the new table version in the renamed
1894
+ # table to be successfully created
1895
+ actual_stream = Stream.read(write_paths[2])
1896
+ assert new_stream_to_create.equivalent_to(actual_stream)
1897
+
1898
+ # expect the new partition in the new stream in the new table
1899
+ # version in the renamed table to be successfully created
1900
+ actual_partition = Partition.read(write_paths[3])
1901
+ assert new_partition_to_create.equivalent_to(actual_partition)
1902
+
1903
+ # expect the new delta in the new partition in the new stream in
1904
+ # the new table version in the renamed table to be successfully
1905
+ # created
1906
+ actual_delta = Delta.read(write_paths[4])
1907
+ assert new_delta_to_create.equivalent_to(actual_delta)
1908
+
1909
+ def test_rename_table(self, temp_dir):
1910
+ commit_results = _commit_single_delta_table(temp_dir)
1911
+ for expected, actual, _ in commit_results:
1912
+ assert expected.equivalent_to(actual)
1913
+ original_table: Table = commit_results[1][1]
1914
+
1915
+ # given a transaction containing a table rename
1916
+ renamed_table: Table = Table.update_for(original_table)
1917
+ renamed_table.locator = TableLocator.at(
1918
+ namespace="test_namespace",
1919
+ table_name="test_table_renamed",
1920
+ )
1921
+ txn_operations = [
1922
+ TransactionOperation.of(
1923
+ operation_type=TransactionOperationType.UPDATE,
1924
+ dest_metafile=renamed_table,
1925
+ src_metafile=original_table,
1926
+ )
1927
+ ]
1928
+ transaction = Transaction.of(
1929
+ txn_operations=txn_operations,
1930
+ )
1931
+ # when the transaction is committed
1932
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1933
+
1934
+ # expect only one new table metafile to be written
1935
+ assert len(write_paths) == 1
1936
+
1937
+ # expect the table to be successfully renamed
1938
+ actual_table = Table.read(write_paths[0])
1939
+ assert renamed_table == actual_table
1940
+
1941
+ # expect all new child metafiles read to return the new table name
1942
+ child_metafiles_read_post_rename = [
1943
+ Delta.read(commit_results[5][2]),
1944
+ Partition.read(commit_results[4][2]),
1945
+ Stream.read(commit_results[3][2]),
1946
+ TableVersion.read(commit_results[2][2]),
1947
+ ]
1948
+ for metafile in child_metafiles_read_post_rename:
1949
+ assert metafile.table_name == renamed_table.table_name
1950
+
1951
+ # expect all original metafiles to return the original table name
1952
+ original_child_metafiles_to_create = [
1953
+ Delta(commit_results[5][0]),
1954
+ Partition(commit_results[4][0]),
1955
+ Stream(commit_results[3][0]),
1956
+ TableVersion(commit_results[2][0]),
1957
+ ]
1958
+ original_child_metafiles_created = [
1959
+ Delta(commit_results[5][1]),
1960
+ Partition(commit_results[4][1]),
1961
+ Stream(commit_results[3][1]),
1962
+ TableVersion(commit_results[2][1]),
1963
+ ]
1964
+ for i in range(len(original_child_metafiles_to_create)):
1965
+ assert (
1966
+ original_child_metafiles_created[i].table_name
1967
+ == original_child_metafiles_to_create[i].table_name
1968
+ == original_table.table_name
1969
+ )
1970
+
1971
+ # expect a subsequent table update from the old table name to fail
1972
+ bad_txn_operations = [
1973
+ TransactionOperation.of(
1974
+ operation_type=TransactionOperationType.UPDATE,
1975
+ dest_metafile=renamed_table,
1976
+ src_metafile=original_table,
1977
+ )
1978
+ ]
1979
+ transaction = Transaction.of(
1980
+ txn_operations=bad_txn_operations,
1981
+ )
1982
+ with pytest.raises(ObjectDeletedError):
1983
+ transaction.commit(temp_dir)
1984
+
1985
+ # expect table deletes of the old table name fail
1986
+ bad_txn_operations = [
1987
+ TransactionOperation.of(
1988
+ operation_type=TransactionOperationType.DELETE,
1989
+ dest_metafile=original_table,
1990
+ )
1991
+ ]
1992
+ transaction = Transaction.of(
1993
+ txn_operations=bad_txn_operations,
1994
+ )
1995
+ with pytest.raises(ObjectDeletedError):
1996
+ transaction.commit(temp_dir)
1997
+
1998
+ # expect child metafile creation under the old table name to fail
1999
+ for metafile in original_child_metafiles_created:
2000
+ bad_txn_operations = [
2001
+ TransactionOperation.of(
2002
+ operation_type=TransactionOperationType.CREATE,
2003
+ dest_metafile=metafile,
2004
+ )
2005
+ ]
2006
+ transaction = Transaction.of(
2007
+ txn_operations=bad_txn_operations,
2008
+ )
2009
+ with pytest.raises(ObjectAlreadyExistsError):
2010
+ transaction.commit(temp_dir)
2011
+
2012
+ def test_rename_namespace(self, temp_dir):
2013
+ commit_results = _commit_single_delta_table(temp_dir)
2014
+ for expected, actual, _ in commit_results:
2015
+ assert expected.equivalent_to(actual)
2016
+ original_namespace = commit_results[0][1]
2017
+ # given a transaction containing a namespace rename
2018
+ renamed_namespace: Namespace = Namespace.update_for(original_namespace)
2019
+ renamed_namespace.locator = NamespaceLocator.of(
2020
+ namespace="test_namespace_renamed",
2021
+ )
2022
+ txn_operations = [
2023
+ TransactionOperation.of(
2024
+ operation_type=TransactionOperationType.UPDATE,
2025
+ dest_metafile=renamed_namespace,
2026
+ src_metafile=original_namespace,
2027
+ )
2028
+ ]
2029
+ transaction = Transaction.of(
2030
+ txn_operations=txn_operations,
2031
+ )
2032
+ # when the transaction is committed
2033
+ write_paths, txn_log_path = transaction.commit(temp_dir)
2034
+
2035
+ # expect only one new namespace metafile to be written
2036
+ assert len(write_paths) == 1
2037
+
2038
+ # expect the namespace to be successfully renamed
2039
+ actual_namespace = Namespace.read(write_paths[0])
2040
+ assert renamed_namespace == actual_namespace
2041
+
2042
+ # expect all child metafiles read to return the new namespace
2043
+ child_metafiles_read_post_rename = [
2044
+ Delta.read(commit_results[5][2]),
2045
+ Partition.read(commit_results[4][2]),
2046
+ Stream.read(commit_results[3][2]),
2047
+ TableVersion.read(commit_results[2][2]),
2048
+ Table.read(commit_results[1][2]),
2049
+ ]
2050
+ for metafile in child_metafiles_read_post_rename:
2051
+ assert metafile.namespace == "test_namespace_renamed"
2052
+
2053
+ # expect the original metafiles to return the original namespace
2054
+ original_child_metafiles_to_create = [
2055
+ Delta(commit_results[5][0]),
2056
+ Partition(commit_results[4][0]),
2057
+ Stream(commit_results[3][0]),
2058
+ TableVersion(commit_results[2][0]),
2059
+ Table(commit_results[1][0]),
2060
+ ]
2061
+ original_child_metafiles_created = [
2062
+ Delta(commit_results[5][1]),
2063
+ Partition(commit_results[4][1]),
2064
+ Stream(commit_results[3][1]),
2065
+ TableVersion(commit_results[2][1]),
2066
+ Table(commit_results[1][1]),
2067
+ ]
2068
+ for i in range(len(original_child_metafiles_to_create)):
2069
+ assert (
2070
+ original_child_metafiles_created[i].namespace
2071
+ == original_child_metafiles_to_create[i].namespace
2072
+ == "test_namespace"
2073
+ )
2074
+
2075
+ # expect a subsequent update of the old namespace name to fail
2076
+ bad_txn_operations = [
2077
+ TransactionOperation.of(
2078
+ operation_type=TransactionOperationType.UPDATE,
2079
+ dest_metafile=renamed_namespace,
2080
+ src_metafile=original_namespace,
2081
+ )
2082
+ ]
2083
+ transaction = Transaction.of(
2084
+ txn_operations=bad_txn_operations,
2085
+ )
2086
+ with pytest.raises(ObjectDeletedError):
2087
+ transaction.commit(temp_dir)
2088
+
2089
+ # expect namespace deletes of the old namespace name fail
2090
+ bad_txn_operations = [
2091
+ TransactionOperation.of(
2092
+ operation_type=TransactionOperationType.DELETE,
2093
+ dest_metafile=original_namespace,
2094
+ )
2095
+ ]
2096
+ transaction = Transaction.of(
2097
+ txn_operations=bad_txn_operations,
2098
+ )
2099
+ with pytest.raises(ObjectDeletedError):
2100
+ transaction.commit(temp_dir)
2101
+
2102
+ # expect child metafile creation under the old namespace to fail
2103
+ for metafile in original_child_metafiles_created:
2104
+ bad_txn_operations = [
2105
+ TransactionOperation.of(
2106
+ operation_type=TransactionOperationType.CREATE,
2107
+ dest_metafile=metafile,
2108
+ )
2109
+ ]
2110
+ transaction = Transaction.of(
2111
+ txn_operations=bad_txn_operations,
2112
+ )
2113
+ with pytest.raises(ObjectAlreadyExistsError):
2114
+ transaction.commit(temp_dir)
2115
+
2116
+ def test_e2e_serde(self, temp_dir):
2117
+ # given a transaction that creates a single namespace, table,
2118
+ # table version, stream, partition, and delta
2119
+ commit_results = _commit_single_delta_table(temp_dir)
2120
+ # when the transaction is committed, expect all actual metafiles
2121
+ # created to match the expected/input metafiles to create
2122
+ for expected, actual, _ in commit_results:
2123
+ assert expected.equivalent_to(actual)
2124
+
2125
+ def test_namespace_serde(self, temp_dir):
2126
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
2127
+ namespace = Namespace.of(locator=namespace_locator)
2128
+ # given a transaction that creates a single namespace
2129
+ write_paths, txn_log_path = Transaction.of(
2130
+ txn_operations=[
2131
+ TransactionOperation.of(
2132
+ operation_type=TransactionOperationType.CREATE,
2133
+ dest_metafile=namespace,
2134
+ )
2135
+ ],
2136
+ ).commit(temp_dir)
2137
+ # when the transaction is committed,
2138
+ # expect the namespace created to match the namespace given
2139
+ deserialized_namespace = Namespace.read(write_paths.pop())
2140
+ assert namespace.equivalent_to(deserialized_namespace)
2141
+
2142
+ def test_table_serde(self, temp_dir):
2143
+ table_locator = TableLocator.at(
2144
+ namespace=None,
2145
+ table_name="test_table",
2146
+ )
2147
+ table = Table.of(
2148
+ locator=table_locator,
2149
+ description="test table description",
2150
+ )
2151
+ # given a transaction that creates a single table
2152
+ write_paths, txn_log_path = Transaction.of(
2153
+ txn_operations=[
2154
+ TransactionOperation.of(
2155
+ operation_type=TransactionOperationType.CREATE,
2156
+ dest_metafile=table,
2157
+ )
2158
+ ],
2159
+ ).commit(temp_dir)
2160
+ # when the transaction is committed,
2161
+ # expect the table created to match the table given
2162
+ deserialized_table = Table.read(write_paths.pop())
2163
+ assert table.equivalent_to(deserialized_table)
2164
+
2165
+ def test_table_version_serde(self, temp_dir):
2166
+ table_version_locator = TableVersionLocator.at(
2167
+ namespace=None,
2168
+ table_name=None,
2169
+ table_version="test_table_version.1",
2170
+ )
2171
+ schema = Schema.of(
2172
+ [
2173
+ Field.of(
2174
+ field=pa.field("some_string", pa.string(), nullable=False),
2175
+ field_id=1,
2176
+ is_merge_key=True,
2177
+ ),
2178
+ Field.of(
2179
+ field=pa.field("some_int32", pa.int32(), nullable=False),
2180
+ field_id=2,
2181
+ is_merge_key=True,
2182
+ ),
2183
+ Field.of(
2184
+ field=pa.field("some_float64", pa.float64()),
2185
+ field_id=3,
2186
+ is_merge_key=False,
2187
+ ),
2188
+ ]
2189
+ )
2190
+ bucket_transform = BucketTransform.of(
2191
+ BucketTransformParameters.of(
2192
+ num_buckets=2,
2193
+ bucketing_strategy=BucketingStrategy.DEFAULT,
2194
+ )
2195
+ )
2196
+ partition_keys = [
2197
+ PartitionKey.of(
2198
+ key=["some_string", "some_int32"],
2199
+ name="test_partition_key",
2200
+ field_id=1,
2201
+ transform=bucket_transform,
2202
+ )
2203
+ ]
2204
+ partition_scheme = PartitionScheme.of(
2205
+ keys=partition_keys,
2206
+ name="test_partition_scheme",
2207
+ scheme_id="test_partition_scheme_id",
2208
+ )
2209
+ sort_keys = [
2210
+ SortKey.of(
2211
+ key=["some_int32"],
2212
+ sort_order=SortOrder.DESCENDING,
2213
+ null_order=NullOrder.AT_START,
2214
+ transform=TruncateTransform.of(
2215
+ TruncateTransformParameters.of(width=3),
2216
+ ),
2217
+ )
2218
+ ]
2219
+ sort_scheme = SortScheme.of(
2220
+ keys=sort_keys,
2221
+ name="test_sort_scheme",
2222
+ scheme_id="test_sort_scheme_id",
2223
+ )
2224
+ table_version = TableVersion.of(
2225
+ locator=table_version_locator,
2226
+ schema=schema,
2227
+ partition_scheme=partition_scheme,
2228
+ description="test table version description",
2229
+ properties={"test_property_key": "test_property_value"},
2230
+ content_types=[ContentType.PARQUET],
2231
+ sort_scheme=sort_scheme,
2232
+ watermark=1,
2233
+ lifecycle_state=LifecycleState.CREATED,
2234
+ schemas=[schema, schema, schema],
2235
+ partition_schemes=[partition_scheme, partition_scheme],
2236
+ sort_schemes=[sort_scheme, sort_scheme],
2237
+ )
2238
+ # given a transaction that creates a single table version
2239
+ write_paths, txn_log_path = Transaction.of(
2240
+ txn_operations=[
2241
+ TransactionOperation.of(
2242
+ operation_type=TransactionOperationType.CREATE,
2243
+ dest_metafile=table_version,
2244
+ )
2245
+ ],
2246
+ ).commit(temp_dir)
2247
+ # when the transaction is committed,
2248
+ # expect the table version created to match the table version given
2249
+ deserialized_table_version = TableVersion.read(write_paths.pop())
2250
+ assert table_version.equivalent_to(deserialized_table_version)
2251
+
2252
+ def test_stream_serde(self, temp_dir):
2253
+ stream_locator = StreamLocator.at(
2254
+ namespace=None,
2255
+ table_name=None,
2256
+ table_version=None,
2257
+ stream_id="test_stream_id",
2258
+ stream_format=StreamFormat.DELTACAT,
2259
+ )
2260
+ bucket_transform = BucketTransform.of(
2261
+ BucketTransformParameters.of(
2262
+ num_buckets=2,
2263
+ bucketing_strategy=BucketingStrategy.DEFAULT,
2264
+ )
2265
+ )
2266
+ partition_keys = [
2267
+ PartitionKey.of(
2268
+ key=["some_string", "some_int32"],
2269
+ name="test_partition_key",
2270
+ field_id=1,
2271
+ transform=bucket_transform,
2272
+ )
2273
+ ]
2274
+ partition_scheme = PartitionScheme.of(
2275
+ keys=partition_keys,
2276
+ name="test_partition_scheme",
2277
+ scheme_id="test_partition_scheme_id",
2278
+ )
2279
+ stream = Stream.of(
2280
+ locator=stream_locator,
2281
+ partition_scheme=partition_scheme,
2282
+ state=CommitState.STAGED,
2283
+ previous_stream_id="test_previous_stream_id",
2284
+ watermark=1,
2285
+ )
2286
+ # given a transaction that creates a single stream
2287
+ write_paths, txn_log_path = Transaction.of(
2288
+ txn_operations=[
2289
+ TransactionOperation.of(
2290
+ operation_type=TransactionOperationType.CREATE,
2291
+ dest_metafile=stream,
2292
+ )
2293
+ ],
2294
+ ).commit(temp_dir)
2295
+ # when the transaction is committed,
2296
+ # expect the stream created to match the stream given
2297
+ deserialized_stream = Stream.read(write_paths.pop())
2298
+ assert stream.equivalent_to(deserialized_stream)
2299
+
2300
+ def test_partition_serde(self, temp_dir):
2301
+ partition_locator = PartitionLocator.at(
2302
+ namespace=None,
2303
+ table_name=None,
2304
+ table_version=None,
2305
+ stream_id=None,
2306
+ stream_format=None,
2307
+ partition_values=["a", 1],
2308
+ partition_id="test_partition_id",
2309
+ )
2310
+ partition = Partition.of(
2311
+ locator=partition_locator,
2312
+ content_types=[ContentType.PARQUET],
2313
+ state=CommitState.STAGED,
2314
+ previous_stream_position=0,
2315
+ previous_partition_id="test_previous_partition_id",
2316
+ stream_position=1,
2317
+ partition_scheme_id="test_partition_scheme_id",
2318
+ )
2319
+ # given a transaction that creates a single partition
2320
+ write_paths, txn_log_path = Transaction.of(
2321
+ txn_operations=[
2322
+ TransactionOperation.of(
2323
+ operation_type=TransactionOperationType.CREATE,
2324
+ dest_metafile=partition,
2325
+ )
2326
+ ],
2327
+ ).commit(temp_dir)
2328
+ # when the transaction is committed,
2329
+ # expect the partition created to match the partition given
2330
+ deserialized_partition = Partition.read(write_paths.pop())
2331
+ assert partition.equivalent_to(deserialized_partition)
2332
+
2333
+ def test_delta_serde(self, temp_dir):
2334
+ delta_locator = DeltaLocator.at(
2335
+ namespace=None,
2336
+ table_name=None,
2337
+ table_version=None,
2338
+ stream_id=None,
2339
+ stream_format=None,
2340
+ partition_values=None,
2341
+ partition_id=None,
2342
+ stream_position=1,
2343
+ )
2344
+ manifest_entry_params = EntryParams.of(
2345
+ equality_field_locators=["some_string", "some_int32"],
2346
+ )
2347
+ manifest_meta = ManifestMeta.of(
2348
+ record_count=1,
2349
+ content_length=10,
2350
+ content_type=ContentType.PARQUET.value,
2351
+ content_encoding=ContentEncoding.IDENTITY.value,
2352
+ source_content_length=100,
2353
+ credentials={"foo": "bar"},
2354
+ content_type_parameters=[{"param1": "value1"}],
2355
+ entry_type=EntryType.EQUALITY_DELETE,
2356
+ entry_params=manifest_entry_params,
2357
+ )
2358
+ manifest = Manifest.of(
2359
+ entries=[
2360
+ ManifestEntry.of(
2361
+ url="s3://test/url",
2362
+ meta=manifest_meta,
2363
+ )
2364
+ ],
2365
+ author=ManifestAuthor.of(
2366
+ name="deltacat",
2367
+ version="2.0",
2368
+ ),
2369
+ entry_type=EntryType.EQUALITY_DELETE,
2370
+ entry_params=manifest_entry_params,
2371
+ )
2372
+ delta = Delta.of(
2373
+ locator=delta_locator,
2374
+ delta_type=DeltaType.APPEND,
2375
+ meta=manifest_meta,
2376
+ properties={"property1": "value1"},
2377
+ manifest=manifest,
2378
+ previous_stream_position=0,
2379
+ )
2380
+ # given a transaction that creates a single delta
2381
+ write_paths, txn_log_path = Transaction.of(
2382
+ txn_operations=[
2383
+ TransactionOperation.of(
2384
+ operation_type=TransactionOperationType.CREATE,
2385
+ dest_metafile=delta,
2386
+ )
2387
+ ],
2388
+ ).commit(temp_dir)
2389
+ # when the transaction is committed,
2390
+ # expect the delta created to match the delta given
2391
+ deserialized_delta = Delta.read(write_paths.pop())
2392
+ assert delta.equivalent_to(deserialized_delta)
2393
+
2394
+ def test_python_type_serde(self, temp_dir):
2395
+ table_locator = TableLocator.at(
2396
+ namespace=None,
2397
+ table_name="test_table",
2398
+ )
2399
+ # given a table whose property values contain every basic python type
2400
+ # except set, frozenset, and range which can't be serialized by msgpack
2401
+ # and memoryview which can't be pickled by copy.deepcopy
2402
+ properties = {
2403
+ "foo": 1,
2404
+ "bar": 2.0,
2405
+ "baz": True,
2406
+ "qux": b"123",
2407
+ "quux": None,
2408
+ "corge": [1, 2, 3],
2409
+ "grault": {"foo": "bar"},
2410
+ "garply": (1, 2, 3),
2411
+ "waldo": bytearray(3),
2412
+ }
2413
+ table = Table.of(
2414
+ locator=table_locator,
2415
+ description="test table description",
2416
+ properties=properties,
2417
+ )
2418
+ # when a transaction commits this table
2419
+ write_paths, txn_log_path = Transaction.of(
2420
+ txn_operations=[
2421
+ TransactionOperation.of(
2422
+ operation_type=TransactionOperationType.CREATE,
2423
+ dest_metafile=table,
2424
+ )
2425
+ ],
2426
+ ).commit(temp_dir)
2427
+ deserialized_table = Table.read(write_paths.pop())
2428
+ # expect the following SerDe transformations of the original properties:
2429
+ expected_properties = properties.copy()
2430
+ # 1. msgpack tranlates tuple to list
2431
+ expected_properties["garply"] = [1, 2, 3]
2432
+ # 2. msgpack unpacks bytearray into bytes
2433
+ expected_properties["waldo"] = b"\x00\x00\x00"
2434
+ # expect the table created to otherwise match the table given
2435
+ table.properties = expected_properties
2436
+ assert table.equivalent_to(deserialized_table)
2437
+
2438
+ def test_metafile_read_bad_path(self, temp_dir):
2439
+ with pytest.raises(FileNotFoundError):
2440
+ Delta.read("foobar")