deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. deltacat/__init__.py +96 -17
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +0 -18
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2435 -279
  12. deltacat/catalog/model/catalog.py +154 -77
  13. deltacat/catalog/model/properties.py +63 -22
  14. deltacat/compute/compactor/compaction_session.py +97 -75
  15. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  16. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  17. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  18. deltacat/compute/compactor/repartition_session.py +8 -21
  19. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  20. deltacat/compute/compactor/steps/materialize.py +9 -7
  21. deltacat/compute/compactor/steps/repartition.py +12 -11
  22. deltacat/compute/compactor/utils/io.py +6 -5
  23. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  24. deltacat/compute/compactor/utils/system_columns.py +3 -1
  25. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  26. deltacat/compute/compactor_v2/constants.py +30 -1
  27. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  28. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  29. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  30. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  31. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  32. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  33. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  34. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  35. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  36. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  37. deltacat/compute/compactor_v2/utils/io.py +11 -4
  38. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  39. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  40. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  41. deltacat/compute/converter/converter_session.py +145 -32
  42. deltacat/compute/converter/model/convert_input.py +26 -19
  43. deltacat/compute/converter/model/convert_input_files.py +33 -16
  44. deltacat/compute/converter/model/convert_result.py +35 -16
  45. deltacat/compute/converter/model/converter_session_params.py +24 -21
  46. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  47. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  48. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  49. deltacat/compute/converter/steps/convert.py +157 -50
  50. deltacat/compute/converter/steps/dedupe.py +24 -11
  51. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  52. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  53. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  54. deltacat/compute/converter/utils/io.py +101 -12
  55. deltacat/compute/converter/utils/s3u.py +33 -27
  56. deltacat/compute/janitor.py +205 -0
  57. deltacat/compute/jobs/client.py +25 -12
  58. deltacat/compute/resource_estimation/delta.py +38 -6
  59. deltacat/compute/resource_estimation/model.py +8 -0
  60. deltacat/constants.py +45 -2
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/env.py +10 -0
  64. deltacat/examples/basic_logging.py +1 -3
  65. deltacat/examples/compactor/aws/__init__.py +1 -0
  66. deltacat/examples/compactor/bootstrap.py +863 -0
  67. deltacat/examples/compactor/compactor.py +373 -0
  68. deltacat/examples/compactor/explorer.py +473 -0
  69. deltacat/examples/compactor/gcp/__init__.py +1 -0
  70. deltacat/examples/compactor/job_runner.py +439 -0
  71. deltacat/examples/compactor/utils/__init__.py +1 -0
  72. deltacat/examples/compactor/utils/common.py +261 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  79. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  80. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  81. deltacat/examples/indexer/indexer.py +2 -2
  82. deltacat/examples/indexer/job_runner.py +1 -2
  83. deltacat/exceptions.py +66 -4
  84. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  85. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  86. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +29 -11
  87. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  88. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  89. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  90. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  91. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  92. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  93. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  94. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  95. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  96. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  97. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  98. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  99. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  100. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  101. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  102. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  103. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  104. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  105. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  107. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  108. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  109. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  110. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  111. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  112. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  113. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  114. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  115. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  116. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  117. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  118. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  119. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  120. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  121. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  122. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  123. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  124. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  125. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  126. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  127. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  128. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  129. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  130. deltacat/io/datasource/deltacat_datasource.py +0 -1
  131. deltacat/io/reader/deltacat_read_api.py +1 -1
  132. deltacat/storage/__init__.py +20 -2
  133. deltacat/storage/interface.py +54 -32
  134. deltacat/storage/main/impl.py +1494 -541
  135. deltacat/storage/model/delta.py +27 -3
  136. deltacat/storage/model/locator.py +6 -12
  137. deltacat/storage/model/manifest.py +182 -6
  138. deltacat/storage/model/metafile.py +151 -78
  139. deltacat/storage/model/namespace.py +8 -1
  140. deltacat/storage/model/partition.py +117 -42
  141. deltacat/storage/model/schema.py +2427 -159
  142. deltacat/storage/model/shard.py +6 -2
  143. deltacat/storage/model/sort_key.py +40 -0
  144. deltacat/storage/model/stream.py +9 -2
  145. deltacat/storage/model/table.py +12 -1
  146. deltacat/storage/model/table_version.py +11 -0
  147. deltacat/storage/model/transaction.py +1184 -208
  148. deltacat/storage/model/transform.py +81 -2
  149. deltacat/storage/model/types.py +48 -26
  150. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  151. deltacat/tests/aws/test_s3u.py +2 -31
  152. deltacat/tests/catalog/data/__init__.py +0 -0
  153. deltacat/tests/catalog/main/__init__.py +0 -0
  154. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  155. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  156. deltacat/tests/catalog/model/__init__.py +0 -0
  157. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  158. deltacat/tests/catalog/test_catalogs.py +103 -106
  159. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  160. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  161. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  162. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  163. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  164. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  165. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  166. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  167. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  168. deltacat/tests/compute/conftest.py +8 -44
  169. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  170. deltacat/tests/compute/converter/utils.py +15 -6
  171. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  172. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  173. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  174. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  175. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  176. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  177. deltacat/tests/compute/test_janitor.py +236 -0
  178. deltacat/tests/compute/test_util_common.py +716 -43
  179. deltacat/tests/compute/test_util_constant.py +0 -1
  180. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  181. deltacat/tests/daft/__init__.py +0 -0
  182. deltacat/tests/daft/test_model.py +97 -0
  183. deltacat/tests/experimental/__init__.py +1 -0
  184. deltacat/tests/experimental/catalog/__init__.py +0 -0
  185. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  186. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  187. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  188. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  189. deltacat/tests/experimental/daft/__init__.py +0 -0
  190. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  191. deltacat/tests/experimental/storage/__init__.py +0 -0
  192. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  193. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  194. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  195. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
  196. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  197. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  198. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  199. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  200. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  201. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  202. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  203. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  204. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
  205. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  206. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  207. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  208. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  209. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  210. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  211. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  212. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  213. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  214. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  215. deltacat/tests/storage/model/test_schema.py +171 -0
  216. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  217. deltacat/tests/storage/model/test_shard.py +3 -1
  218. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  219. deltacat/tests/storage/model/test_transaction.py +393 -48
  220. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  221. deltacat/tests/test_deltacat_api.py +988 -4
  222. deltacat/tests/test_exceptions.py +9 -5
  223. deltacat/tests/test_utils/pyarrow.py +52 -21
  224. deltacat/tests/test_utils/storage.py +23 -34
  225. deltacat/tests/types/__init__.py +0 -0
  226. deltacat/tests/types/test_tables.py +104 -0
  227. deltacat/tests/utils/exceptions.py +22 -0
  228. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  229. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  230. deltacat/tests/utils/test_daft.py +121 -31
  231. deltacat/tests/utils/test_numpy.py +1193 -0
  232. deltacat/tests/utils/test_pandas.py +1106 -0
  233. deltacat/tests/utils/test_polars.py +1040 -0
  234. deltacat/tests/utils/test_pyarrow.py +1370 -89
  235. deltacat/types/media.py +224 -14
  236. deltacat/types/tables.py +2329 -59
  237. deltacat/utils/arguments.py +33 -1
  238. deltacat/utils/daft.py +823 -36
  239. deltacat/utils/export.py +3 -1
  240. deltacat/utils/filesystem.py +100 -0
  241. deltacat/utils/metafile_locator.py +2 -1
  242. deltacat/utils/numpy.py +118 -26
  243. deltacat/utils/pandas.py +577 -48
  244. deltacat/utils/polars.py +658 -27
  245. deltacat/utils/pyarrow.py +1258 -213
  246. deltacat/utils/ray_utils/dataset.py +101 -10
  247. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  248. deltacat/utils/url.py +57 -16
  249. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  250. deltacat-2.0.0b12.dist-info/RECORD +439 -0
  251. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  252. deltacat/catalog/iceberg/__init__.py +0 -4
  253. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  254. deltacat/compute/merge_on_read/__init__.py +0 -4
  255. deltacat/compute/merge_on_read/daft.py +0 -40
  256. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  257. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  258. deltacat/daft/daft_scan.py +0 -115
  259. deltacat/daft/model.py +0 -258
  260. deltacat/daft/translator.py +0 -126
  261. deltacat/examples/common/fixtures.py +0 -15
  262. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  263. deltacat/storage/rivulet/__init__.py +0 -11
  264. deltacat/storage/rivulet/feather/__init__.py +0 -5
  265. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  266. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  267. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  268. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  269. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  270. deltacat/utils/s3fs.py +0 -21
  271. deltacat-2.0.0b10.dist-info/METADATA +0 -68
  272. deltacat-2.0.0b10.dist-info/RECORD +0 -381
  273. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  274. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  275. /deltacat/{daft → docs/autogen/schema}/__init__.py +0 -0
  276. /deltacat/{examples/common → docs/autogen/schema/inference}/__init__.py +0 -0
  277. /deltacat/examples/{iceberg → compactor}/__init__.py +0 -0
  278. /deltacat/{storage/iceberg → examples/experimental}/__init__.py +0 -0
  279. /deltacat/{storage/rivulet/arrow → examples/experimental/iceberg}/__init__.py +0 -0
  280. /deltacat/{storage/rivulet/fs → examples/experimental/iceberg/converter}/__init__.py +0 -0
  281. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  282. /deltacat/{storage/rivulet/reader → experimental/catalog}/__init__.py +0 -0
  283. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  284. /deltacat/{storage/rivulet/schema → experimental/compatibility}/__init__.py +0 -0
  285. /deltacat/{storage/rivulet/writer → experimental/converter_agent}/__init__.py +0 -0
  286. /deltacat/{tests/storage/rivulet → experimental/converter_agent/beam}/__init__.py +0 -0
  287. /deltacat/{tests/storage/rivulet/fs → experimental/storage}/__init__.py +0 -0
  288. /deltacat/{tests/storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
  289. /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  290. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/storage/rivulet/fs/__init__.py} +0 -0
  291. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  292. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  293. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  294. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  295. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  296. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  297. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  298. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,9 @@ def test_sharding_strategy_from_string_range():
7
7
  """
8
8
  Tests that from_string('range') returns an instance of RangeShardingStrategy.
9
9
  """
10
- from deltacat.storage.rivulet.shard.range_shard import RangeShardingStrategy
10
+ from deltacat.experimental.storage.rivulet.shard.range_shard import (
11
+ RangeShardingStrategy,
12
+ )
11
13
 
12
14
  strategy = ShardingStrategy.from_string("range")
13
15
  assert isinstance(strategy, RangeShardingStrategy)
@@ -0,0 +1,90 @@
1
+ import pytest
2
+
3
+ from deltacat.storage import (
4
+ SortKey,
5
+ SortScheme,
6
+ SortOrder,
7
+ NullOrder,
8
+ )
9
+
10
+
11
+ def test_sort_scheme_validates_empty_keys():
12
+ # When creating a sort scheme with empty keys list
13
+ with pytest.raises(ValueError, match="Sort scheme cannot have empty keys list"):
14
+ SortScheme.of(
15
+ keys=[],
16
+ name="test_sort_scheme",
17
+ scheme_id="test_sort_scheme_id",
18
+ )
19
+
20
+
21
+ def test_sort_scheme_validates_duplicate_keys():
22
+ # When creating a sort scheme with duplicate keys
23
+ with pytest.raises(ValueError, match="Duplicate sort key found: col1"):
24
+ SortScheme.of(
25
+ keys=[
26
+ SortKey.of(
27
+ key=["col1"],
28
+ sort_order=SortOrder.ASCENDING,
29
+ null_order=NullOrder.AT_END,
30
+ ),
31
+ SortKey.of(
32
+ key=["col1"], # Duplicate key
33
+ sort_order=SortOrder.DESCENDING,
34
+ null_order=NullOrder.AT_START,
35
+ ),
36
+ ],
37
+ name="test_sort_scheme",
38
+ scheme_id="test_sort_scheme_id",
39
+ )
40
+
41
+
42
+ def test_sort_scheme_allows_valid_keys():
43
+ # When creating a sort scheme with valid keys
44
+ sort_scheme = SortScheme.of(
45
+ keys=[
46
+ SortKey.of(
47
+ key=["col1"],
48
+ sort_order=SortOrder.ASCENDING,
49
+ null_order=NullOrder.AT_END,
50
+ ),
51
+ SortKey.of(
52
+ key=["col2"],
53
+ sort_order=SortOrder.DESCENDING,
54
+ null_order=NullOrder.AT_END,
55
+ ),
56
+ ],
57
+ name="test_sort_scheme",
58
+ scheme_id="test_sort_scheme_id",
59
+ )
60
+
61
+ # Then it should succeed
62
+ assert sort_scheme is not None
63
+ assert len(sort_scheme.keys) == 2
64
+ assert sort_scheme.name == "test_sort_scheme"
65
+ assert sort_scheme.id == "test_sort_scheme_id"
66
+
67
+
68
+ def test_sort_scheme_validates_null_order_consistency():
69
+ # When creating a sort scheme with inconsistent null orders
70
+ with pytest.raises(
71
+ ValueError, match="All arrow sort keys must use the same null order"
72
+ ):
73
+ sort_scheme = SortScheme.of(
74
+ keys=[
75
+ SortKey.of(
76
+ key=["col1"],
77
+ sort_order=SortOrder.ASCENDING,
78
+ null_order=NullOrder.AT_END,
79
+ ),
80
+ SortKey.of(
81
+ key=["col2"],
82
+ sort_order=SortOrder.DESCENDING,
83
+ null_order=NullOrder.AT_START, # Different null order
84
+ ),
85
+ ],
86
+ name="test_sort_scheme",
87
+ scheme_id="test_sort_scheme_id",
88
+ )
89
+ # Access arrow property to trigger validation
90
+ sort_scheme.arrow
@@ -1,15 +1,25 @@
1
1
  import pytest
2
+ import os
3
+ import pyarrow
4
+ import msgpack
5
+ import posixpath
6
+
2
7
 
3
8
  from deltacat.storage import (
4
9
  Transaction,
5
10
  TransactionOperation,
6
- TransactionType,
7
11
  TransactionOperationType,
8
- )
9
- from deltacat.storage.model.metafile import (
12
+ Namespace,
13
+ NamespaceLocator,
10
14
  Metafile,
11
15
  )
12
16
 
17
+ from deltacat.constants import (
18
+ TXN_DIR_NAME,
19
+ RUNNING_TXN_DIR_NAME,
20
+ PAUSED_TXN_DIR_NAME,
21
+ )
22
+
13
23
 
14
24
  class TestAbsToRelative:
15
25
  @classmethod
@@ -64,7 +74,7 @@ class TestAbsToRelative:
64
74
  Transaction._abs_txn_meta_path_to_relative("/lorem/ipsum/", "")
65
75
 
66
76
  # Test cases for the relativize_operation_paths function
67
- def test_relativize_metafile_write_paths(self):
77
+ def test_relativizemetafile_write_paths(self):
68
78
  catalog_root = "/catalog/root"
69
79
  absolute_paths = [
70
80
  "/catalog/root/path/to/metafile1.mpk",
@@ -91,9 +101,7 @@ class TestAbsToRelative:
91
101
  # use replace method as setter
92
102
  transaction_operation.metafile_write_paths = absolute_paths
93
103
  # Create a transaction and relativize paths
94
- transaction = Transaction.of(
95
- txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
96
- )
104
+ transaction = Transaction.of([transaction_operation])
97
105
  transaction.relativize_operation_paths(transaction_operation, catalog_root)
98
106
  # Verify the paths have been correctly relativized
99
107
  assert transaction_operation.metafile_write_paths == expected_relative_paths
@@ -125,9 +133,7 @@ class TestAbsToRelative:
125
133
  # use replace as setter
126
134
  transaction_operation.locator_write_paths = absolute_paths
127
135
  # Create a transaction and relativize paths
128
- transaction = Transaction.of(
129
- txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
130
- )
136
+ transaction = Transaction.of(txn_operations=[transaction_operation])
131
137
  transaction.relativize_operation_paths(transaction_operation, catalog_root)
132
138
  # Verify the paths have been correctly relativized
133
139
  assert transaction_operation.locator_write_paths == expected_relative_paths
@@ -164,9 +170,7 @@ class TestAbsToRelative:
164
170
  transaction_operation.metafile_write_paths = meta_absolute_paths
165
171
  transaction_operation.locator_write_paths = loc_absolute_paths
166
172
  # Create a transaction and relativize paths
167
- transaction = Transaction.of(
168
- txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
169
- )
173
+ transaction = Transaction.of([transaction_operation])
170
174
  transaction.relativize_operation_paths(transaction_operation, catalog_root)
171
175
  # Verify the paths have been correctly relativized
172
176
  assert (
@@ -222,9 +226,7 @@ class TestAbsToRelative:
222
226
  transaction_operation.locator_write_paths = loc_absolute_paths
223
227
  transaction_operations.append(transaction_operation)
224
228
  # Create a transaction and relativize paths
225
- transaction = Transaction.of(
226
- txn_type=TransactionType.APPEND, txn_operations=transaction_operations
227
- )
229
+ transaction = Transaction.of(transaction_operations)
228
230
  for operation in transaction_operations:
229
231
  transaction.relativize_operation_paths(operation, catalog_root)
230
232
  # Verify the paths have been correctly relativized
@@ -241,9 +243,7 @@ class TestAbsToRelative:
241
243
  # Empty paths
242
244
  transaction_operation.metafile_write_paths = []
243
245
  transaction_operation.locator_write_paths = []
244
- transaction = Transaction.of(
245
- txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
246
- )
246
+ transaction = Transaction.of([transaction_operation])
247
247
  transaction.relativize_operation_paths(transaction_operation, catalog_root)
248
248
  assert transaction_operation.metafile_write_paths == []
249
249
  assert transaction_operation.locator_write_paths == []
@@ -257,9 +257,7 @@ class TestAbsToRelative:
257
257
  dest_metafile=Metafile({"id": "dummy_metafile_id"}),
258
258
  )
259
259
  transaction_operation.metafile_write_paths = absolute_paths
260
- transaction = Transaction.of(
261
- txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
262
- )
260
+ transaction = Transaction.of([transaction_operation])
263
261
  transaction.relativize_operation_paths(transaction_operation, catalog_root)
264
262
  assert transaction_operation.metafile_write_paths == expected_paths
265
263
 
@@ -279,30 +277,377 @@ class TestAbsToRelative:
279
277
  TransactionOperationType.READ_SIBLINGS,
280
278
  ]
281
279
 
282
- # Different transaction types to test
283
- txn_types = [
284
- TransactionType.APPEND,
285
- TransactionType.ALTER,
286
- TransactionType.DELETE,
287
- TransactionType.OVERWRITE,
288
- TransactionType.READ,
289
- TransactionType.RESTATE,
290
- ]
291
-
292
- for txn_type in txn_types:
293
- transaction_ops = []
294
- for op_type in operation_types:
295
- transaction_operation = TransactionOperation.of(
296
- operation_type=op_type,
297
- dest_metafile=Metafile({"id": "dummy_metafile_id"}),
298
- )
299
- transaction_operation.metafile_write_paths = absolute_paths
300
- transaction_ops.append(transaction_operation)
301
- transaction = Transaction.of(
302
- txn_type=txn_type, txn_operations=[transaction_operation]
280
+ transaction_ops = []
281
+ for op_type in operation_types:
282
+ transaction_operation = TransactionOperation.of(
283
+ operation_type=op_type,
284
+ dest_metafile=Metafile({"id": "dummy_metafile_id"}),
303
285
  )
304
- transaction.relativize_operation_paths(transaction_operation, catalog_root)
305
- # Assert paths are relativized correctly
306
- assert (
307
- transaction_operation.metafile_write_paths == expected_paths
308
- ), f"Failed for transaction type {txn_type} and operation type {op_type}"
286
+ transaction_operation.metafile_write_paths = absolute_paths
287
+ transaction_ops.append(transaction_operation)
288
+ transaction = Transaction.of([transaction_operation])
289
+ transaction.relativize_operation_paths(transaction_operation, catalog_root)
290
+ # Assert paths are relativized correctly
291
+ assert (
292
+ transaction_operation.metafile_write_paths == expected_paths
293
+ ), f"Failed for operation type {op_type}"
294
+
295
+
296
+ class TestTransactionPersistence:
297
+
298
+ # Verifies that transactions initialized with empty or None operations are marked interactive,
299
+ # while valid operations are not
300
+ def test_create_iterative_transaction(self):
301
+ txn_1 = Transaction.of(txn_operations=[])
302
+ txn_2 = Transaction.of(txn_operations=None)
303
+ op = TransactionOperation.of(
304
+ operation_type=TransactionOperationType.CREATE,
305
+ dest_metafile=Metafile({"id": "dummy_metafile_id"}),
306
+ )
307
+ txn_3 = Transaction.of(txn_operations=[op, op])
308
+ assert (
309
+ txn_1.interactive
310
+ ) # check if constructor detect empty list --> interactive transaction
311
+ assert (
312
+ txn_2.interactive
313
+ ) # check if we can initialize with no list --> interactive transaction
314
+ assert (
315
+ not txn_3.interactive
316
+ ) # check that valid operations_list --> not interactive transaction
317
+
318
+ # Builds and commits a transaction step-by-step, then validates the output files and transaction success log
319
+ def test_commit_iterative_transaction(self, temp_dir):
320
+ # Create two simple namespaces
321
+ namespace_locator1 = NamespaceLocator.of(namespace="test_ns_1")
322
+ namespace_locator2 = NamespaceLocator.of(namespace="test_ns_2")
323
+ ns1 = Namespace.of(locator=namespace_locator1)
324
+ ns2 = Namespace.of(locator=namespace_locator2)
325
+ # Start with an empty transaction (interactive)
326
+ transaction = Transaction.of()
327
+ txn = transaction.start(temp_dir) # operate on deep-copy
328
+ # Build operations manually and step them in
329
+ op1 = TransactionOperation.of(
330
+ operation_type=TransactionOperationType.CREATE,
331
+ dest_metafile=ns1,
332
+ )
333
+ op2 = TransactionOperation.of(
334
+ operation_type=TransactionOperationType.CREATE,
335
+ dest_metafile=ns2,
336
+ )
337
+ # steps
338
+ txn.step(op1)
339
+ txn.step(op2)
340
+
341
+ # seal() for interactive transactions
342
+ write_paths, success_log_path = txn.seal()
343
+
344
+ # Check output files exist and are valid
345
+ deserialized_ns1 = Namespace.read(write_paths[0])
346
+ deserialized_ns2 = Namespace.read(write_paths[1])
347
+
348
+ assert ns1.equivalent_to(deserialized_ns1)
349
+ assert ns2.equivalent_to(deserialized_ns2)
350
+ assert success_log_path.endswith(str(txn.end_time))
351
+
352
+ # Ensures that stepping and committing a transaction writes non-empty output files and a valid success log
353
+ def test_commit_iterative_file_creation(self, temp_dir):
354
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="check_writes"))
355
+ txn = Transaction.of().start(temp_dir)
356
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
357
+ txn.step(op)
358
+ write_paths, success_log_path = txn.seal()
359
+
360
+ # check the files were created
361
+ for path in write_paths:
362
+ abs_path = os.path.join(temp_dir, path)
363
+ assert os.path.exists(abs_path)
364
+ assert os.path.getsize(abs_path) > 0
365
+
366
+ # check the success log exists
367
+ assert os.path.exists(success_log_path)
368
+ assert os.path.getsize(success_log_path) > 0
369
+
370
+ # Confirms that a transaction can be paused, resumed, and successfully committed without data los
371
+ def test_transaction_pause_and_resume_roundtrip(self, temp_dir):
372
+ # Create a test namespace
373
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="paused_resume_ns"))
374
+
375
+ # Start interactive transaction
376
+ txn = Transaction.of().start(temp_dir)
377
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
378
+
379
+ txn.step(op)
380
+
381
+ # Pause transaction (writes to paused/)
382
+ txn.pause()
383
+
384
+ # Resume transaction (reads from paused/)
385
+ txn.resume()
386
+
387
+ # Commit resumed transaction
388
+ write_paths, success_log_path = txn.seal()
389
+
390
+ # Validate outputs
391
+ deserialized = Namespace.read(write_paths[0])
392
+ assert ns.equivalent_to(deserialized)
393
+ assert os.path.exists(success_log_path)
394
+ assert success_log_path.endswith(str(txn.end_time))
395
+
396
+ # Validates that transaction state, including ID and write paths, is correctly preserved across pause/resume cycles
397
+ def test_resume_preserves_state_after_pause(self, temp_dir):
398
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
399
+
400
+ txn = Transaction.of().start(temp_dir)
401
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
402
+
403
+ txn.step(op)
404
+ txn_id_before = txn.id
405
+
406
+ txn.pause()
407
+ txn.resume()
408
+
409
+ # Ensure the ID and provider are still valid
410
+ assert txn.id == txn_id_before
411
+ assert txn._time_provider is not None
412
+ assert hasattr(txn, "metafile_write_paths")
413
+ assert len(txn.metafile_write_paths) == 1
414
+
415
+ # Check commit still works
416
+ _, success_log_path = txn.seal()
417
+ assert os.path.exists(success_log_path)
418
+
419
+ # Explicitly checks that fields are preserved
420
+ def test_resume_preserves_state_after_pause_deep(self, temp_dir):
421
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
422
+
423
+ txn = Transaction.of().start(temp_dir)
424
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
425
+
426
+ txn.step(op)
427
+
428
+ # Save values before pause
429
+ txn_id_before = txn.id
430
+ start_time_before = txn.start_time
431
+ root_before = txn.catalog_root_normalized
432
+ meta_paths_before = list(txn.metafile_write_paths)
433
+ locator_paths_before = list(txn.locator_write_paths)
434
+
435
+ txn.pause()
436
+ txn.resume()
437
+
438
+ # Field-by-field checks
439
+ assert txn.id == txn_id_before, "Transaction ID should be preserved"
440
+ assert txn._time_provider is not None, "Time provider should be reinitialized"
441
+ assert txn.start_time == start_time_before, "Start time should be preserved"
442
+ assert txn.catalog_root_normalized == root_before, "Catalog root should match"
443
+ assert (
444
+ txn.metafile_write_paths == meta_paths_before
445
+ ), "Metafile paths must match"
446
+ assert (
447
+ txn.locator_write_paths == locator_paths_before
448
+ ), "Locator paths must match"
449
+ assert (
450
+ isinstance(txn.operations, list) and len(txn.operations) == 1
451
+ ), "Operations must be restored"
452
+ assert txn.pause_time is not None, "Pause time should be restored"
453
+
454
+ # Final commit still works
455
+ write_paths, success_log_path = txn.seal()
456
+ assert os.path.exists(success_log_path)
457
+
458
+ # Checks that pausing a transaction moves its log from running/ to paused/ and preserves valid transaction state
459
+ def test_pause_moves_running_to_paused(self, temp_dir):
460
+ # Set up a transaction and a single operation
461
+ locator = NamespaceLocator.of(namespace="pause_test")
462
+ ns = Namespace.of(locator=locator)
463
+ txn = Transaction.of().start(temp_dir)
464
+
465
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
466
+ txn.step(op)
467
+
468
+ fs = pyarrow.fs.LocalFileSystem()
469
+ txn_id = txn.id
470
+ txn_log_dir = posixpath.join(temp_dir, TXN_DIR_NAME)
471
+
472
+ running_path = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME, txn_id)
473
+ paused_path = posixpath.join(txn_log_dir, PAUSED_TXN_DIR_NAME, txn_id)
474
+
475
+ # Sanity check: file should be in running/
476
+ assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.File
477
+
478
+ # Pause transaction
479
+ txn.pause()
480
+ # Ensure the running file is deleted
481
+ assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.NotFound
482
+
483
+ # Ensure the paused file exists and contains valid msgpack
484
+ paused_info = fs.get_file_info(paused_path)
485
+ assert paused_info.type == pyarrow.fs.FileType.File
486
+ with fs.open_input_stream(paused_path) as f:
487
+ data = f.readall()
488
+ txn_loaded = msgpack.loads(data)
489
+ assert "operations" in txn_loaded
490
+
491
+ # Simulates a full multi-step transaction with multiple pause/resume cycles and verifies correctness of all outputs
492
+ def test_transaction_pause_and_resume_roundtrip_complex(self, temp_dir):
493
+ # Step 0: Create an empty interactive transaction
494
+ txn = Transaction.of().start(temp_dir)
495
+
496
+ # Step 1: Add first namespace, pause
497
+ ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
498
+ op1 = TransactionOperation.of(
499
+ TransactionOperationType.CREATE, dest_metafile=ns1
500
+ )
501
+ txn.step(op1)
502
+ txn.pause()
503
+
504
+ # Step 2: Resume, add second namespace, pause
505
+ txn.resume()
506
+ ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
507
+ op2 = TransactionOperation.of(
508
+ TransactionOperationType.CREATE, dest_metafile=ns2
509
+ )
510
+ txn.step(op2)
511
+ txn.pause()
512
+
513
+ # Step 3: Resume again, add third namespace, commit
514
+ txn.resume()
515
+ ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
516
+ op3 = TransactionOperation.of(
517
+ TransactionOperationType.CREATE, dest_metafile=ns3
518
+ )
519
+ txn.step(op3)
520
+
521
+ # Final commit
522
+ write_paths, success_log_path = txn.seal()
523
+
524
+ # Read and verify written namespaces
525
+ for i, ns in enumerate([ns1, ns2, ns3]):
526
+ written_path = write_paths[i]
527
+ deserialized_ns = Namespace.read(written_path)
528
+ assert ns.equivalent_to(
529
+ deserialized_ns
530
+ ), f"Mismatch in ns{i+1}: {ns} != {deserialized_ns}"
531
+ assert os.path.exists(written_path), f"Missing file: {written_path}"
532
+ assert os.path.getsize(written_path) > 0
533
+
534
+ # Check success log exists and is correct
535
+ assert os.path.exists(success_log_path)
536
+ assert success_log_path.endswith(str(txn.end_time))
537
+
538
+ # Repeats a complex pause/resume flow with additional assertions on namespace equality and time consistency
539
+ def test_transaction_pause_and_resume_roundtrip_complex_2(self, temp_dir):
540
+ # Step 0: Create an empty interactive transaction
541
+ txn = Transaction.of().start(temp_dir)
542
+
543
+ # Step 1: Add first namespace, pause
544
+ ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
545
+ op1 = TransactionOperation.of(
546
+ TransactionOperationType.CREATE, dest_metafile=ns1
547
+ )
548
+ txn.step(op1)
549
+ txn.pause()
550
+
551
+ # Step 2: Resume, add second namespace, pause
552
+ txn.resume()
553
+ ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
554
+ op2 = TransactionOperation.of(
555
+ TransactionOperationType.CREATE, dest_metafile=ns2
556
+ )
557
+ txn.step(op2)
558
+
559
+ txn.pause()
560
+
561
+ # Step 3: Resume again, add third namespace, commit
562
+ txn.resume()
563
+ ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
564
+ op3 = TransactionOperation.of(
565
+ TransactionOperationType.CREATE, dest_metafile=ns3
566
+ )
567
+ txn.step(op3)
568
+
569
+ # Final commit
570
+ write_paths, success_log_path = txn.seal()
571
+
572
+ assert txn.start_time < txn.end_time
573
+
574
+ # Read and verify written namespaces
575
+ for i, ns in enumerate([ns1, ns2, ns3]):
576
+ written_path = write_paths[i]
577
+
578
+ # Confirm file was created and is non-empty
579
+ assert os.path.exists(written_path), f"Missing file: {written_path}"
580
+ assert os.path.getsize(written_path) > 0, f"Empty file: {written_path}"
581
+
582
+ # Deserialize and verify content
583
+ deserialized_ns = Namespace.read(written_path)
584
+ assert ns.equivalent_to(deserialized_ns), f"Namespace mismatch at index {i}"
585
+ assert ns.locator.namespace == deserialized_ns.locator.namespace
586
+ assert ns.locator_alias == deserialized_ns.locator_alias
587
+ assert ns.properties == deserialized_ns.properties
588
+
589
+ # Verify success log
590
+ assert os.path.exists(success_log_path)
591
+ assert success_log_path.endswith(str(txn.end_time))
592
+
593
+
594
+ class TestTransactionCommitMessage:
595
+ """Test commit message preservation and retrieval for transactions."""
596
+
597
+ def test_transaction_with_commit_message(self):
598
+ """Test that commit messages are stored and retrievable from transactions."""
599
+ commit_msg = "Test commit message for transaction functionality"
600
+
601
+ # Create transaction with commit message
602
+ txn = Transaction.of(commit_message=commit_msg)
603
+
604
+ # Verify commit message is stored correctly
605
+ assert txn.commit_message == commit_msg
606
+ assert txn.get("commit_message") == commit_msg
607
+
608
+ def test_transaction_without_commit_message(self):
609
+ """Test that transactions work normally without commit messages."""
610
+ # Create transaction without commit message
611
+ txn = Transaction.of()
612
+
613
+ # Verify no commit message is stored
614
+ assert txn.commit_message is None
615
+ assert txn.get("commit_message") is None
616
+
617
+ def test_transaction_commit_message_setter(self):
618
+ """Test that commit messages can be set after transaction creation."""
619
+ # Create transaction without commit message
620
+ txn = Transaction.of()
621
+ assert txn.commit_message is None
622
+
623
+ # Set commit message using property setter
624
+ commit_msg = "Added commit message after creation"
625
+ txn.commit_message = commit_msg
626
+
627
+ # Verify commit message is stored correctly
628
+ assert txn.commit_message == commit_msg
629
+ assert txn.get("commit_message") == commit_msg
630
+
631
+ def test_transaction_serialization_with_commit_message(self, temp_dir):
632
+ """Test that commit messages persist through transaction serialization."""
633
+ commit_msg = "Serialization test commit message"
634
+
635
+ # Create namespace for testing
636
+ ns = Namespace.of(locator=NamespaceLocator.of(namespace="serialization_test"))
637
+
638
+ # Create transaction with commit message
639
+ txn = Transaction.of(commit_message=commit_msg).start(temp_dir)
640
+ op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
641
+ txn.step(op)
642
+
643
+ # Commit transaction (this should serialize the transaction with commit message)
644
+ _, success_log_path = txn.seal()
645
+
646
+ # Read the transaction log and verify commit message persisted
647
+ txn_read = Transaction.read(success_log_path)
648
+ assert txn_read.commit_message == commit_msg
649
+
650
+ # Verify other transaction properties are intact
651
+ assert txn_read.start_time == txn.start_time
652
+ assert txn_read.end_time == txn.end_time
653
+ assert len(txn_read.operations) == 1