deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,653 @@
1
+ import pytest
2
+ import os
3
+ import pyarrow
4
+ import msgpack
5
+ import posixpath
6
+
7
+
8
+ from deltacat.storage import (
9
+ Transaction,
10
+ TransactionOperation,
11
+ TransactionOperationType,
12
+ Namespace,
13
+ NamespaceLocator,
14
+ Metafile,
15
+ )
16
+
17
+ from deltacat.constants import (
18
+ TXN_DIR_NAME,
19
+ RUNNING_TXN_DIR_NAME,
20
+ PAUSED_TXN_DIR_NAME,
21
+ )
22
+
23
+
24
+ class TestAbsToRelative:
25
+ @classmethod
26
+ def setup_method(cls):
27
+ cls.catalog_root = "/catalog/root/path"
28
+
29
+ # Test cases for the abs_to_relative function
30
+ def test_abs_to_relative_simple(self):
31
+ """
32
+ Tests the function which relativizes absolute paths (string) into relative paths (string)
33
+ """
34
+ catalog_root = TestAbsToRelative.catalog_root
35
+ absolute_path = "/catalog/root/path/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
36
+ relative_path = Transaction._abs_txn_meta_path_to_relative(
37
+ catalog_root, absolute_path
38
+ )
39
+ assert (
40
+ relative_path
41
+ == "namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
42
+ )
43
+
44
+ def test_abs_to_relative_same_paths(self):
45
+ catalog_root = TestAbsToRelative.catalog_root
46
+ absolute_path = TestAbsToRelative.catalog_root
47
+ with pytest.raises(
48
+ ValueError,
49
+ match="Target and root are identical, but expected target to be a child of root.",
50
+ ):
51
+ Transaction._abs_txn_meta_path_to_relative(catalog_root, absolute_path)
52
+
53
+ def test_abs_to_relative_root_with_trailing_slash(self):
54
+ catalog_root = "/catalog/root/path/"
55
+ absolute_path = "/catalog/root/path/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
56
+ relative_path = Transaction._abs_txn_meta_path_to_relative(
57
+ catalog_root, absolute_path
58
+ )
59
+ assert (
60
+ relative_path
61
+ == "namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
62
+ )
63
+
64
+ def test_abs_to_relative_bad_root(self):
65
+ catalog_root = TestAbsToRelative.catalog_root
66
+ absolute_path = "/cat/rt/pth/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk"
67
+ with pytest.raises(ValueError, match="Expected target to be a child of root."):
68
+ Transaction._abs_txn_meta_path_to_relative(catalog_root, absolute_path)
69
+
70
+ def test_abs_to_relative_empty_path(self):
71
+ with pytest.raises(ValueError, match="Expected target to be a child of root."):
72
+ Transaction._abs_txn_meta_path_to_relative("", "/lorem/ipsum")
73
+ with pytest.raises(ValueError, match="Expected target to be a child of root."):
74
+ Transaction._abs_txn_meta_path_to_relative("/lorem/ipsum/", "")
75
+
76
+ # Test cases for the relativize_operation_paths function
77
+ def test_relativizemetafile_write_paths(self):
78
+ catalog_root = "/catalog/root"
79
+ absolute_paths = [
80
+ "/catalog/root/path/to/metafile1.mpk",
81
+ "/catalog/root/path/to/metafile2.mpk",
82
+ "/catalog/root/another/path/lore_ipsum.mpk",
83
+ "/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
84
+ "/catalog/root/another/path/lorem_ipsum.mpk",
85
+ "/catalog/root/here.mpk",
86
+ ]
87
+ expected_relative_paths = [
88
+ "path/to/metafile1.mpk",
89
+ "path/to/metafile2.mpk",
90
+ "another/path/lore_ipsum.mpk",
91
+ "another/path/meta/to/lorem_ipsum.mpk",
92
+ "another/path/lorem_ipsum.mpk",
93
+ "here.mpk",
94
+ ]
95
+ # Create a dummy transaction operation with absolute paths
96
+ dest_metafile = Metafile({"id": "dummy_metafile_id"})
97
+ transaction_operation = TransactionOperation.of(
98
+ operation_type=TransactionOperationType.CREATE,
99
+ dest_metafile=dest_metafile,
100
+ )
101
+ # use replace method as setter
102
+ transaction_operation.metafile_write_paths = absolute_paths
103
+ # Create a transaction and relativize paths
104
+ transaction = Transaction.of([transaction_operation])
105
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
106
+ # Verify the paths have been correctly relativized
107
+ assert transaction_operation.metafile_write_paths == expected_relative_paths
108
+
109
+ def test_relativize_locator_write_paths(self):
110
+ catalog_root = "/catalog/root"
111
+ absolute_paths = [
112
+ "/catalog/root/path/to/loc1.mpk",
113
+ "/catalog/root/path/to/loc2.mpk",
114
+ "/catalog/root/another/path/lore_ipsum.mpk",
115
+ "/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
116
+ "/catalog/root/another/path/lorem_ipsum.mpk",
117
+ "/catalog/root/here.mpk",
118
+ ]
119
+ expected_relative_paths = [
120
+ "path/to/loc1.mpk",
121
+ "path/to/loc2.mpk",
122
+ "another/path/lore_ipsum.mpk",
123
+ "another/path/meta/to/lorem_ipsum.mpk",
124
+ "another/path/lorem_ipsum.mpk",
125
+ "here.mpk",
126
+ ]
127
+ # Create a dummy transaction operation with absolute paths
128
+ dest_metafile = Metafile({"id": "dummy_metafile_id"})
129
+ transaction_operation = TransactionOperation.of(
130
+ operation_type=TransactionOperationType.CREATE,
131
+ dest_metafile=dest_metafile,
132
+ )
133
+ # use replace as setter
134
+ transaction_operation.locator_write_paths = absolute_paths
135
+ # Create a transaction and relativize paths
136
+ transaction = Transaction.of(txn_operations=[transaction_operation])
137
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
138
+ # Verify the paths have been correctly relativized
139
+ assert transaction_operation.locator_write_paths == expected_relative_paths
140
+
141
+ def test_relativize_metafile_and_locator_paths(self):
142
+ catalog_root = "/meta_catalog/root_dir/a/b/c"
143
+ meta_absolute_paths = [
144
+ "/meta_catalog/root_dir/a/b/c/namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
145
+ "/meta_catalog/root_dir/a/b/c/namespace/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
146
+ "/meta_catalog/root_dir/a/b/c/namespace/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
147
+ ]
148
+ loc_absolute_paths = [
149
+ "/meta_catalog/root_dir/a/b/c/d/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
150
+ "/meta_catalog/root_dir/a/b/c/e/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
151
+ "/meta_catalog/root_dir/a/b/c/f/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
152
+ ]
153
+ meta_relative_paths = [
154
+ "namespace/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
155
+ "namespace/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
156
+ "namespace/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
157
+ ]
158
+ loc_relative_paths = [
159
+ "d/table/table_version/stream_id/partition_id/00000000000000000001.mpk",
160
+ "e/table/table_version/stream_id/partition_id/00000000000000000002.mpk",
161
+ "f/table/table_version/stream_id/partition_id/00000000000000000003.mpk",
162
+ ]
163
+ # Create a dummy transaction operation with absolute paths
164
+ dest_metafile = Metafile({"id": "dummy_metafile_id"})
165
+ transaction_operation = TransactionOperation.of(
166
+ operation_type=TransactionOperationType.CREATE,
167
+ dest_metafile=dest_metafile,
168
+ )
169
+ # use replace as setter
170
+ transaction_operation.metafile_write_paths = meta_absolute_paths
171
+ transaction_operation.locator_write_paths = loc_absolute_paths
172
+ # Create a transaction and relativize paths
173
+ transaction = Transaction.of([transaction_operation])
174
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
175
+ # Verify the paths have been correctly relativized
176
+ assert (
177
+ transaction_operation.metafile_write_paths == meta_relative_paths
178
+ ), f"Expected: {meta_relative_paths}, but got: {transaction_operation.metafile_write_paths}"
179
+ assert (
180
+ transaction_operation.locator_write_paths == loc_relative_paths
181
+ ), f"Expected: {loc_relative_paths}, but got: {transaction_operation.locator_write_paths}"
182
+
183
+ def test_multiple_operations_relativize_paths(self):
184
+ catalog_root = "/catalog/root"
185
+ meta_absolute_paths = [
186
+ "/catalog/root/path/to/metafile1.mpk",
187
+ "/catalog/root/path/to/metafile2.mpk",
188
+ "/catalog/root/another/path/lore_ipsum.mpk",
189
+ "/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
190
+ "/catalog/root/another/path/lorem_ipsum.mpk",
191
+ "/catalog/root/here.mpk",
192
+ ]
193
+ loc_absolute_paths = [
194
+ "/catalog/root/path/to/loc1.mpk",
195
+ "/catalog/root/path/to/loc2.mpk",
196
+ "/catalog/root/another/path/lore_ipsum.mpk",
197
+ "/catalog/root/another/path/meta/to/lorem_ipsum.mpk",
198
+ "/catalog/root/another/path/lorem_ipsum.mpk",
199
+ "/catalog/root/here.mpk",
200
+ ]
201
+ meta_expected_relative_paths = [
202
+ "path/to/metafile1.mpk",
203
+ "path/to/metafile2.mpk",
204
+ "another/path/lore_ipsum.mpk",
205
+ "another/path/meta/to/lorem_ipsum.mpk",
206
+ "another/path/lorem_ipsum.mpk",
207
+ "here.mpk",
208
+ ]
209
+ loc_expected_relative_paths = [
210
+ "path/to/loc1.mpk",
211
+ "path/to/loc2.mpk",
212
+ "another/path/lore_ipsum.mpk",
213
+ "another/path/meta/to/lorem_ipsum.mpk",
214
+ "another/path/lorem_ipsum.mpk",
215
+ "here.mpk",
216
+ ]
217
+ # Create a dummy transaction operation with absolute paths
218
+ dest_metafile = Metafile({"id": "dummy_metafile_id"})
219
+ transaction_operations = []
220
+ for i in range(11):
221
+ transaction_operation = TransactionOperation.of(
222
+ operation_type=TransactionOperationType.CREATE,
223
+ dest_metafile=dest_metafile,
224
+ )
225
+ transaction_operation.metafile_write_paths = meta_absolute_paths
226
+ transaction_operation.locator_write_paths = loc_absolute_paths
227
+ transaction_operations.append(transaction_operation)
228
+ # Create a transaction and relativize paths
229
+ transaction = Transaction.of(transaction_operations)
230
+ for operation in transaction_operations:
231
+ transaction.relativize_operation_paths(operation, catalog_root)
232
+ # Verify the paths have been correctly relativized
233
+ for operation in transaction_operations:
234
+ assert operation.metafile_write_paths == meta_expected_relative_paths
235
+ assert operation.locator_write_paths == loc_expected_relative_paths
236
+
237
+ def test_empty_metafile_and_locator_write_paths(self):
238
+ catalog_root = "/catalog/root"
239
+ transaction_operation = TransactionOperation.of(
240
+ operation_type=TransactionOperationType.CREATE,
241
+ dest_metafile=Metafile({"id": "dummy_metafile_id"}),
242
+ )
243
+ # Empty paths
244
+ transaction_operation.metafile_write_paths = []
245
+ transaction_operation.locator_write_paths = []
246
+ transaction = Transaction.of([transaction_operation])
247
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
248
+ assert transaction_operation.metafile_write_paths == []
249
+ assert transaction_operation.locator_write_paths == []
250
+
251
+ def test_large_number_of_paths(self):
252
+ catalog_root = "/catalog/root"
253
+ absolute_paths = [f"/catalog/root/path/to/file{i}.mpk" for i in range(5000)]
254
+ expected_paths = [f"path/to/file{i}.mpk" for i in range(5000)]
255
+ transaction_operation = TransactionOperation.of(
256
+ operation_type=TransactionOperationType.CREATE,
257
+ dest_metafile=Metafile({"id": "dummy_metafile_id"}),
258
+ )
259
+ transaction_operation.metafile_write_paths = absolute_paths
260
+ transaction = Transaction.of([transaction_operation])
261
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
262
+ assert transaction_operation.metafile_write_paths == expected_paths
263
+
264
+ def test_large_number_of_paths_multi_ops(self):
265
+ catalog_root = "/catalog/root"
266
+ absolute_paths = [f"/catalog/root/path/to/file{i}.mpk" for i in range(1000)]
267
+ expected_paths = [f"path/to/file{i}.mpk" for i in range(1000)]
268
+
269
+ # Different operation types to test
270
+ operation_types = [
271
+ TransactionOperationType.CREATE,
272
+ # TransactionOperationType.UPDATE,
273
+ TransactionOperationType.DELETE,
274
+ TransactionOperationType.READ_EXISTS,
275
+ TransactionOperationType.READ_LATEST,
276
+ TransactionOperationType.READ_CHILDREN,
277
+ TransactionOperationType.READ_SIBLINGS,
278
+ ]
279
+
280
+ transaction_ops = []
281
+ for op_type in operation_types:
282
+ transaction_operation = TransactionOperation.of(
283
+ operation_type=op_type,
284
+ dest_metafile=Metafile({"id": "dummy_metafile_id"}),
285
+ )
286
+ transaction_operation.metafile_write_paths = absolute_paths
287
+ transaction_ops.append(transaction_operation)
288
+ transaction = Transaction.of([transaction_operation])
289
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
290
+ # Assert paths are relativized correctly
291
+ assert (
292
+ transaction_operation.metafile_write_paths == expected_paths
293
+ ), f"Failed for operation type {op_type}"
294
+
295
+
296
+ class TestTransactionPersistence:
297
+
298
+ # Verifies that transactions initialized with empty or None operations are marked interactive,
299
+ # while valid operations are not
300
+ def test_create_iterative_transaction(self):
301
+ txn_1 = Transaction.of(txn_operations=[])
302
+ txn_2 = Transaction.of(txn_operations=None)
303
+ op = TransactionOperation.of(
304
+ operation_type=TransactionOperationType.CREATE,
305
+ dest_metafile=Metafile({"id": "dummy_metafile_id"}),
306
+ )
307
+ txn_3 = Transaction.of(txn_operations=[op, op])
308
+ assert (
309
+ txn_1.interactive
310
+ ) # check if constructor detect empty list --> interactive transaction
311
+ assert (
312
+ txn_2.interactive
313
+ ) # check if we can initialize with no list --> interactive transaction
314
+ assert (
315
+ not txn_3.interactive
316
+ ) # check that valid operations_list --> not interactive transaction
317
+
318
+ # Builds and commits a transaction step-by-step, then validates the output files and transaction success log
319
+ def test_commit_iterative_transaction(self, temp_dir):
320
+ # Create two simple namespaces
321
+ namespace_locator1 = NamespaceLocator.of(namespace="test_ns_1")
322
+ namespace_locator2 = NamespaceLocator.of(namespace="test_ns_2")
323
+ ns1 = Namespace.of(locator=namespace_locator1)
324
+ ns2 = Namespace.of(locator=namespace_locator2)
325
+ # Start with an empty transaction (interactive)
326
+ transaction = Transaction.of()
327
+ txn = transaction.start(temp_dir) # operate on deep-copy
328
+ # Build operations manually and step them in
329
+ op1 = TransactionOperation.of(
330
+ operation_type=TransactionOperationType.CREATE,
331
+ dest_metafile=ns1,
332
+ )
333
+ op2 = TransactionOperation.of(
334
+ operation_type=TransactionOperationType.CREATE,
335
+ dest_metafile=ns2,
336
+ )
337
+ # steps
338
+ txn.step(op1)
339
+ txn.step(op2)
340
+
341
+ # seal() for interactive transactions
342
+ write_paths, success_log_path = txn.seal()
343
+
344
+ # Check output files exist and are valid
345
+ deserialized_ns1 = Namespace.read(write_paths[0])
346
+ deserialized_ns2 = Namespace.read(write_paths[1])
347
+
348
+ assert ns1.equivalent_to(deserialized_ns1)
349
+ assert ns2.equivalent_to(deserialized_ns2)
350
+ assert success_log_path.endswith(str(txn.end_time))
351
+
352
+ # Ensures that stepping and committing a transaction writes non-empty output files and a valid success log
353
+ def test_commit_iterative_file_creation(self, temp_dir):
354
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="check_writes"))
355
+ txn = Transaction.of().start(temp_dir)
356
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
357
+ txn.step(op)
358
+ write_paths, success_log_path = txn.seal()
359
+
360
+ # check the files were created
361
+ for path in write_paths:
362
+ abs_path = os.path.join(temp_dir, path)
363
+ assert os.path.exists(abs_path)
364
+ assert os.path.getsize(abs_path) > 0
365
+
366
+ # check the success log exists
367
+ assert os.path.exists(success_log_path)
368
+ assert os.path.getsize(success_log_path) > 0
369
+
370
+ # Confirms that a transaction can be paused, resumed, and successfully committed without data los
371
+ def test_transaction_pause_and_resume_roundtrip(self, temp_dir):
372
+ # Create a test namespace
373
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="paused_resume_ns"))
374
+
375
+ # Start interactive transaction
376
+ txn = Transaction.of().start(temp_dir)
377
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
378
+
379
+ txn.step(op)
380
+
381
+ # Pause transaction (writes to paused/)
382
+ txn.pause()
383
+
384
+ # Resume transaction (reads from paused/)
385
+ txn.resume()
386
+
387
+ # Commit resumed transaction
388
+ write_paths, success_log_path = txn.seal()
389
+
390
+ # Validate outputs
391
+ deserialized = Namespace.read(write_paths[0])
392
+ assert ns.equivalent_to(deserialized)
393
+ assert os.path.exists(success_log_path)
394
+ assert success_log_path.endswith(str(txn.end_time))
395
+
396
+ # Validates that transaction state, including ID and write paths, is correctly preserved across pause/resume cycles
397
+ def test_resume_preserves_state_after_pause(self, temp_dir):
398
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
399
+
400
+ txn = Transaction.of().start(temp_dir)
401
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
402
+
403
+ txn.step(op)
404
+ txn_id_before = txn.id
405
+
406
+ txn.pause()
407
+ txn.resume()
408
+
409
+ # Ensure the ID and provider are still valid
410
+ assert txn.id == txn_id_before
411
+ assert txn._time_provider is not None
412
+ assert hasattr(txn, "metafile_write_paths")
413
+ assert len(txn.metafile_write_paths) == 1
414
+
415
+ # Check commit still works
416
+ _, success_log_path = txn.seal()
417
+ assert os.path.exists(success_log_path)
418
+
419
+ # Explicitly checks that fields are preserved
420
+ def test_resume_preserves_state_after_pause_deep(self, temp_dir):
421
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
422
+
423
+ txn = Transaction.of().start(temp_dir)
424
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
425
+
426
+ txn.step(op)
427
+
428
+ # Save values before pause
429
+ txn_id_before = txn.id
430
+ start_time_before = txn.start_time
431
+ root_before = txn.catalog_root_normalized
432
+ meta_paths_before = list(txn.metafile_write_paths)
433
+ locator_paths_before = list(txn.locator_write_paths)
434
+
435
+ txn.pause()
436
+ txn.resume()
437
+
438
+ # Field-by-field checks
439
+ assert txn.id == txn_id_before, "Transaction ID should be preserved"
440
+ assert txn._time_provider is not None, "Time provider should be reinitialized"
441
+ assert txn.start_time == start_time_before, "Start time should be preserved"
442
+ assert txn.catalog_root_normalized == root_before, "Catalog root should match"
443
+ assert (
444
+ txn.metafile_write_paths == meta_paths_before
445
+ ), "Metafile paths must match"
446
+ assert (
447
+ txn.locator_write_paths == locator_paths_before
448
+ ), "Locator paths must match"
449
+ assert (
450
+ isinstance(txn.operations, list) and len(txn.operations) == 1
451
+ ), "Operations must be restored"
452
+ assert txn.pause_time is not None, "Pause time should be restored"
453
+
454
+ # Final commit still works
455
+ write_paths, success_log_path = txn.seal()
456
+ assert os.path.exists(success_log_path)
457
+
458
+ # Checks that pausing a transaction moves its log from running/ to paused/ and preserves valid transaction state
459
+ def test_pause_moves_running_to_paused(self, temp_dir):
460
+ # Set up a transaction and a single operation
461
+ locator = NamespaceLocator.of(namespace="pause_test")
462
+ ns = Namespace.of(locator=locator)
463
+ txn = Transaction.of().start(temp_dir)
464
+
465
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
466
+ txn.step(op)
467
+
468
+ fs = pyarrow.fs.LocalFileSystem()
469
+ txn_id = txn.id
470
+ txn_log_dir = posixpath.join(temp_dir, TXN_DIR_NAME)
471
+
472
+ running_path = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME, txn_id)
473
+ paused_path = posixpath.join(txn_log_dir, PAUSED_TXN_DIR_NAME, txn_id)
474
+
475
+ # Sanity check: file should be in running/
476
+ assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.File
477
+
478
+ # Pause transaction
479
+ txn.pause()
480
+ # Ensure the running file is deleted
481
+ assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.NotFound
482
+
483
+ # Ensure the paused file exists and contains valid msgpack
484
+ paused_info = fs.get_file_info(paused_path)
485
+ assert paused_info.type == pyarrow.fs.FileType.File
486
+ with fs.open_input_stream(paused_path) as f:
487
+ data = f.readall()
488
+ txn_loaded = msgpack.loads(data)
489
+ assert "operations" in txn_loaded
490
+
491
+ # Simulates a full multi-step transaction with multiple pause/resume cycles and verifies correctness of all outputs
492
+ def test_transaction_pause_and_resume_roundtrip_complex(self, temp_dir):
493
+ # Step 0: Create an empty interactive transaction
494
+ txn = Transaction.of().start(temp_dir)
495
+
496
+ # Step 1: Add first namespace, pause
497
+ ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
498
+ op1 = TransactionOperation.of(
499
+ TransactionOperationType.CREATE, dest_metafile=ns1
500
+ )
501
+ txn.step(op1)
502
+ txn.pause()
503
+
504
+ # Step 2: Resume, add second namespace, pause
505
+ txn.resume()
506
+ ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
507
+ op2 = TransactionOperation.of(
508
+ TransactionOperationType.CREATE, dest_metafile=ns2
509
+ )
510
+ txn.step(op2)
511
+ txn.pause()
512
+
513
+ # Step 3: Resume again, add third namespace, commit
514
+ txn.resume()
515
+ ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
516
+ op3 = TransactionOperation.of(
517
+ TransactionOperationType.CREATE, dest_metafile=ns3
518
+ )
519
+ txn.step(op3)
520
+
521
+ # Final commit
522
+ write_paths, success_log_path = txn.seal()
523
+
524
+ # Read and verify written namespaces
525
+ for i, ns in enumerate([ns1, ns2, ns3]):
526
+ written_path = write_paths[i]
527
+ deserialized_ns = Namespace.read(written_path)
528
+ assert ns.equivalent_to(
529
+ deserialized_ns
530
+ ), f"Mismatch in ns{i+1}: {ns} != {deserialized_ns}"
531
+ assert os.path.exists(written_path), f"Missing file: {written_path}"
532
+ assert os.path.getsize(written_path) > 0
533
+
534
+ # Check success log exists and is correct
535
+ assert os.path.exists(success_log_path)
536
+ assert success_log_path.endswith(str(txn.end_time))
537
+
538
+ # Repeats a complex pause/resume flow with additional assertions on namespace equality and time consistency
539
+ def test_transaction_pause_and_resume_roundtrip_complex_2(self, temp_dir):
540
+ # Step 0: Create an empty interactive transaction
541
+ txn = Transaction.of().start(temp_dir)
542
+
543
+ # Step 1: Add first namespace, pause
544
+ ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
545
+ op1 = TransactionOperation.of(
546
+ TransactionOperationType.CREATE, dest_metafile=ns1
547
+ )
548
+ txn.step(op1)
549
+ txn.pause()
550
+
551
+ # Step 2: Resume, add second namespace, pause
552
+ txn.resume()
553
+ ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
554
+ op2 = TransactionOperation.of(
555
+ TransactionOperationType.CREATE, dest_metafile=ns2
556
+ )
557
+ txn.step(op2)
558
+
559
+ txn.pause()
560
+
561
+ # Step 3: Resume again, add third namespace, commit
562
+ txn.resume()
563
+ ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
564
+ op3 = TransactionOperation.of(
565
+ TransactionOperationType.CREATE, dest_metafile=ns3
566
+ )
567
+ txn.step(op3)
568
+
569
+ # Final commit
570
+ write_paths, success_log_path = txn.seal()
571
+
572
+ assert txn.start_time < txn.end_time
573
+
574
+ # Read and verify written namespaces
575
+ for i, ns in enumerate([ns1, ns2, ns3]):
576
+ written_path = write_paths[i]
577
+
578
+ # Confirm file was created and is non-empty
579
+ assert os.path.exists(written_path), f"Missing file: {written_path}"
580
+ assert os.path.getsize(written_path) > 0, f"Empty file: {written_path}"
581
+
582
+ # Deserialize and verify content
583
+ deserialized_ns = Namespace.read(written_path)
584
+ assert ns.equivalent_to(deserialized_ns), f"Namespace mismatch at index {i}"
585
+ assert ns.locator.namespace == deserialized_ns.locator.namespace
586
+ assert ns.locator_alias == deserialized_ns.locator_alias
587
+ assert ns.properties == deserialized_ns.properties
588
+
589
+ # Verify success log
590
+ assert os.path.exists(success_log_path)
591
+ assert success_log_path.endswith(str(txn.end_time))
592
+
593
+
594
+ class TestTransactionCommitMessage:
595
+ """Test commit message preservation and retrieval for transactions."""
596
+
597
+ def test_transaction_with_commit_message(self):
598
+ """Test that commit messages are stored and retrievable from transactions."""
599
+ commit_msg = "Test commit message for transaction functionality"
600
+
601
+ # Create transaction with commit message
602
+ txn = Transaction.of(commit_message=commit_msg)
603
+
604
+ # Verify commit message is stored correctly
605
+ assert txn.commit_message == commit_msg
606
+ assert txn.get("commit_message") == commit_msg
607
+
608
+ def test_transaction_without_commit_message(self):
609
+ """Test that transactions work normally without commit messages."""
610
+ # Create transaction without commit message
611
+ txn = Transaction.of()
612
+
613
+ # Verify no commit message is stored
614
+ assert txn.commit_message is None
615
+ assert txn.get("commit_message") is None
616
+
617
+ def test_transaction_commit_message_setter(self):
618
+ """Test that commit messages can be set after transaction creation."""
619
+ # Create transaction without commit message
620
+ txn = Transaction.of()
621
+ assert txn.commit_message is None
622
+
623
+ # Set commit message using property setter
624
+ commit_msg = "Added commit message after creation"
625
+ txn.commit_message = commit_msg
626
+
627
+ # Verify commit message is stored correctly
628
+ assert txn.commit_message == commit_msg
629
+ assert txn.get("commit_message") == commit_msg
630
+
631
+ def test_transaction_serialization_with_commit_message(self, temp_dir):
632
+ """Test that commit messages persist through transaction serialization."""
633
+ commit_msg = "Serialization test commit message"
634
+
635
+ # Create namespace for testing
636
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="serialization_test"))
637
+
638
+ # Create transaction with commit message
639
+ txn = Transaction.of(commit_message=commit_msg).start(temp_dir)
640
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
641
+ txn.step(op)
642
+
643
+ # Commit transaction (this should serialize the transaction with commit message)
644
+ _, success_log_path = txn.seal()
645
+
646
+ # Read the transaction log and verify commit message persisted
647
+ txn_read = Transaction.read(success_log_path)
648
+ assert txn_read.commit_message == commit_msg
649
+
650
+ # Verify other transaction properties are intact
651
+ assert txn_read.start_time == txn.start_time
652
+ assert txn_read.end_time == txn.end_time
653
+ assert len(txn_read.operations) == 1