tigerbeetle 0.0.34 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/client.rb +1 -1
  245. data/lib/tigerbeetle/platforms.rb +9 -0
  246. data/lib/tigerbeetle/version.rb +2 -2
  247. data/tigerbeetle.gemspec +22 -5
  248. metadata +242 -3
  249. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,1000 @@
1
+ //! Reconstruct a cluster from one or more AOF files.
2
+ //!
3
+ //! Note that a AOF-recovered cluster is *not* physically identical to the original cluster.
4
+ //! It should be logically identical though -- the same data (minus the client table), just in
5
+ //! different places.
6
+ const std = @import("std");
7
+ const assert = std.debug.assert;
8
+
9
+ const constants = @import("constants.zig");
10
+ const vsr = @import("vsr.zig");
11
+ const tb = vsr.tigerbeetle;
12
+
13
+ const stdx = @import("stdx");
14
+ const MessagePool = vsr.message_pool.MessagePool;
15
+ const Message = MessagePool.Message;
16
+ const MessageBus = vsr.message_bus.MessageBusType(vsr.io.IO);
17
+ const Header = vsr.Header;
18
+
19
+ const MiB = stdx.MiB;
20
+
21
+ const log = std.log.scoped(.aof);
22
+
23
+ pub const std_options: std.Options = .{
24
+ .log_level = .info,
25
+ .logFn = stdx.log_with_timestamp,
26
+ };
27
+
28
+ const magic_number: u128 = 0xbcd8d3fee406119ed192c4f4c4fc82;
29
+
30
+ pub const AOFEntry = extern struct {
31
+ /// In case of extreme corruption, start each entry with a fixed random integer,
32
+ /// to allow skipping over corrupted entries.
33
+ magic_number: u128 = magic_number,
34
+
35
+ /// The main Message to log. This is written _without_ O_DIRECT, so sector alignment is not
36
+ /// a concern.
37
+ message: [constants.message_size_max]u8 align(16),
38
+
39
+ comptime {
40
+ assert(stdx.no_padding(AOFEntry));
41
+
42
+ // Ensure the message is the last field in the struct. When writing, the struct is truncated
43
+ // based on the message length, so any fields after it would be truncated.
44
+ assert(std.meta.fieldIndex(AOFEntry, "message").? == std.meta.fields(AOFEntry).len - 1);
45
+ }
46
+
47
+ /// Calculate the actual length of the AOFEntry that needs to be written to disk.
48
+ pub fn size_disk(self: *AOFEntry) u64 {
49
+ return @sizeOf(AOFEntry) - self.message.len + self.header().size;
50
+ }
51
+
52
+ /// The minimum size of an AOFEntry is when `message` is a Header with no body.
53
+ pub fn size_minimum(self: *AOFEntry) u64 {
54
+ return @sizeOf(AOFEntry) - self.message.len + @sizeOf(Header);
55
+ }
56
+
57
+ pub fn header(self: *AOFEntry) *Header.Prepare {
58
+ return @ptrCast(&self.message);
59
+ }
60
+
61
+ /// Turn an AOFEntry back into a Message.
62
+ pub fn to_message(self: *AOFEntry, target: *Message.Prepare) void {
63
+ stdx.copy_disjoint(.inexact, u8, target.buffer, self.message[0..self.header().size]);
64
+ }
65
+
66
+ pub fn from_message(
67
+ self: *AOFEntry,
68
+ message: *const Message.Prepare,
69
+ last_checksum: *?u128,
70
+ ) void {
71
+ assert(message.header.size <= self.message.len);
72
+
73
+ // When writing, entries can backtrack / duplicate, so we don't necessarily have a valid
74
+ // chain. Still, log when that happens. The `aof merge` command can generate a consistent
75
+ // file from entries like these.
76
+ log.debug("from_message: parent {x:0>32} (should == {x:0>32}) our checksum {x:0>32}", .{
77
+ message.header.parent,
78
+ last_checksum.* orelse 0,
79
+ message.header.checksum,
80
+ });
81
+ if (last_checksum.* == null or last_checksum.*.? != message.header.parent) {
82
+ log.info("from_message: parent {x:0>32}, expected {x:0>32} instead", .{
83
+ message.header.parent,
84
+ last_checksum.* orelse 0,
85
+ });
86
+ }
87
+ last_checksum.* = message.header.checksum;
88
+
89
+ // The cluster identifier is in the VSR header so we don't need to store it explicitly.
90
+ // The replica that this was logged on will be the replica with this file. If uploaded to
91
+ // object storage, this must be embedded in the filename or path.
92
+ // Whether this replica is the primary can be determined by the view number from the
93
+ // relevant op.
94
+ comptime {
95
+ const fields = std.meta.fieldNames(AOFEntry);
96
+ assert(fields.len == 2);
97
+ assert(std.mem.eql(u8, fields[0], "magic_number"));
98
+ assert(std.mem.eql(u8, fields[1], "message"));
99
+ }
100
+
101
+ // Using self.* = .{ .message = undefined } notation causes a `constants.message_size_max`
102
+ // increase in binary size, since Zig embeds the entire static initialization payload in the
103
+ // binary.
104
+ self.* = undefined;
105
+ self.magic_number = magic_number;
106
+ stdx.copy_disjoint(
107
+ .exact,
108
+ u8,
109
+ self.message[0..message.header.size],
110
+ message.buffer[0..message.header.size],
111
+ );
112
+ @memset(self.message[message.header.size..self.message.len], 0);
113
+ }
114
+ };
115
+
116
+ /// The AOF itself is simple and deterministic - but it logs data like the client's id
117
+ /// which make things trickier. If you want to compare AOFs between runs, the `debug`
118
+ /// CLI command does it by hashing together all checksum_body, operation and timestamp
119
+ /// fields.
120
+ pub fn AOFType(comptime IO: type) type {
121
+ return struct {
122
+ const AOF = @This();
123
+
124
+ io: *IO,
125
+ path: []const u8,
126
+ fd: ?IO.fd_t = null,
127
+ last_checksum: ?u128 = null,
128
+
129
+ state: union(enum) {
130
+ /// Store the number of unflushed entries - that is, calls to write() without
131
+ /// checkpoint() to ensure we don't ever buffer more than the WAL can hold.
132
+ writing: struct { unflushed: u64 },
133
+
134
+ /// Keep an opaque pointer to the replica to workaround AOF being ?*AOF in Replica, and
135
+ /// @fieldParentPtr being cumbersome with that.
136
+ checkpoint: struct {
137
+ replica: *anyopaque,
138
+ replica_callback: *const fn (*anyopaque) void,
139
+ fsync_completion: IO.Completion,
140
+ },
141
+ } = .{ .writing = .{ .unflushed = 0 } },
142
+ size: usize = 0,
143
+
144
+ /// Create an AOF in the dir_fd when given a file name. dir_fd must be opened read write
145
+ /// (except on Windows). This ensures everything (including the dir) is fsync'd
146
+ /// appropriately. Closing dir_fd is the responsibility of the caller, which can be done
147
+ /// immediately after .init() finishes.
148
+ pub fn init(
149
+ io: *IO,
150
+ path: []const u8,
151
+ ) !AOF {
152
+ stdx.maybe(std.fs.path.isAbsolute(path));
153
+ assert(std.mem.endsWith(u8, path, ".aof"));
154
+
155
+ return AOF{
156
+ .io = io,
157
+ .path = path,
158
+ .fd = try io.aof_blocking_open(path),
159
+ };
160
+ }
161
+
162
+ pub fn close(self: *AOF) void {
163
+ assert(self.fd != null);
164
+
165
+ self.io.aof_blocking_close(self.fd.?);
166
+ self.fd = null;
167
+ }
168
+
169
+ /// Write a message to disk, with standard blocking IO but using the OS's page cache. The
170
+ /// AOF borrows durability from the write ahead log: if the AOF hasn't been flushed, and the
171
+ /// machine loses power, the op is guaranteed to still be in the WAL.
172
+ pub fn write(self: *AOF, message: *const Message.Prepare) !void {
173
+ assert(self.state == .writing);
174
+ assert(self.state.writing.unflushed < constants.journal_slot_count);
175
+
176
+ var entry: AOFEntry align(constants.sector_size) = undefined;
177
+ entry.from_message(
178
+ message,
179
+ &self.last_checksum,
180
+ );
181
+
182
+ const size_disk = entry.size_disk();
183
+ const bytes = std.mem.asBytes(&entry);
184
+
185
+ try self.io.aof_blocking_write_all(self.fd.?, bytes[0..size_disk]);
186
+
187
+ self.size += size_disk;
188
+ self.state.writing.unflushed += 1;
189
+ }
190
+
191
+ pub fn sync(self: *AOF) void {
192
+ assert(self.state == .writing);
193
+ assert(self.state.writing.unflushed <= constants.journal_slot_count);
194
+ self.state.writing.unflushed = 0;
195
+ }
196
+
197
+ pub fn checkpoint(
198
+ self: *AOF,
199
+ replica: *anyopaque,
200
+ callback: *const fn (*anyopaque) void,
201
+ ) void {
202
+ assert(self.state == .writing);
203
+ assert(self.state.writing.unflushed <= constants.journal_slot_count);
204
+
205
+ self.state = .{
206
+ .checkpoint = .{
207
+ .replica = replica,
208
+ .fsync_completion = undefined,
209
+ .replica_callback = callback,
210
+ },
211
+ };
212
+
213
+ self.io.fsync(
214
+ *AOF,
215
+ self,
216
+ on_fsync,
217
+ &self.state.checkpoint.fsync_completion,
218
+ self.fd.?,
219
+ );
220
+ }
221
+
222
+ fn on_fsync(self: *AOF, completion: *IO.Completion, result: IO.FsyncError!void) void {
223
+ _ = completion;
224
+ _ = result catch @panic("aof fsync failure");
225
+
226
+ assert(self.state == .checkpoint);
227
+ const replica = self.state.checkpoint.replica;
228
+ const replica_callback = self.state.checkpoint.replica_callback;
229
+ self.state = .{ .writing = .{ .unflushed = 0 } };
230
+
231
+ const stat_file = self.io.aof_blocking_stat(self.path) catch |err| switch (err) {
232
+ error.FileNotFound => blk: {
233
+ log.info("{s} not found; creating", .{self.path});
234
+ self.close();
235
+ assert(self.fd == null);
236
+ self.fd = self.io.aof_blocking_open(self.path) catch |e| {
237
+ std.debug.panic("failed to reopen {s} after rotate: {}", .{ self.path, e });
238
+ };
239
+
240
+ break :blk self.io.aof_blocking_stat(self.path) catch |e| {
241
+ log.warn("failed to stat aof ({s}): {}", .{ self.path, e });
242
+ break :blk null;
243
+ };
244
+ },
245
+ else => blk: {
246
+ log.warn("failed to stat aof ({s}): {}", .{ self.path, err });
247
+ break :blk null;
248
+ },
249
+ };
250
+
251
+ const stat_fd = self.io.aof_blocking_fstat(self.fd.?) catch |err| blk: {
252
+ log.warn("failed to fstat aof ({s}): {}", .{ self.path, err });
253
+ break :blk null;
254
+ };
255
+
256
+ // AOF change detection relies on detecting the file being removed, and *it* will
257
+ // recreate it. It is an error for the operator to try and create file externally
258
+ // (eg, touch tigerbeetle.aof).
259
+ //
260
+ // Warn the operator strongly if this happens.
261
+ if (stat_fd != null and stat_file != null and stat_fd.?.inode != stat_file.?.inode) {
262
+ log.err("AOF inode mismatch detected - the AOF file path is not the same as " ++
263
+ "the open file descriptor being written to.", .{});
264
+ log.err(
265
+ "Move {s} out the way, and let tigerbeetle recreate the AOF.",
266
+ .{self.path},
267
+ );
268
+ }
269
+
270
+ replica_callback(replica);
271
+ }
272
+
273
+ pub fn validate(self: *AOF, allocator: std.mem.Allocator, last_checksum: ?u128) !void {
274
+ var validation_target: AOFEntry = undefined;
275
+
276
+ var validation_checksums = std.AutoHashMap(u128, void).init(allocator);
277
+ defer validation_checksums.deinit();
278
+
279
+ var it = Iterator{
280
+ .file_descriptor = self.fd.?,
281
+ .io = self.io,
282
+ .size = self.size,
283
+ };
284
+
285
+ // The iterator only does simple chain validation, but we can have backtracking
286
+ // or duplicates, and still have a valid AOF. Handle this by keeping track of
287
+ // every checksum we've seen so far, and considering it OK as long as we've seen
288
+ // a parent.
289
+ it.validate_chain = false;
290
+
291
+ var last_entry: ?*AOFEntry = null;
292
+
293
+ while (try it.next(&validation_target)) |entry| {
294
+ const header = entry.header();
295
+
296
+ if (entry.header().op == 1) {
297
+ // For op=1, put its parent in our list of seen checksums too.
298
+ // This handles the case where it gets replayed, but we don't record
299
+ // op=0 so the assert below would fail.
300
+ // It's needed for simulator validation only (aof merge uses a
301
+ // different method to walk down AOF entries).
302
+ try validation_checksums.put(header.parent, {});
303
+ } else {
304
+ // (Null due to state sync skipping commits.)
305
+ stdx.maybe(validation_checksums.get(header.parent) == null);
306
+ }
307
+
308
+ try validation_checksums.put(header.checksum, {});
309
+
310
+ last_entry = entry;
311
+ }
312
+
313
+ if (last_checksum) |checksum| {
314
+ if (last_entry.?.header().checksum != checksum) {
315
+ return error.ChecksumMismatch;
316
+ }
317
+ log.info("validated all aof entries. last entry checksum {x:0>32} matches " ++
318
+ " supplied {x:0>32}", .{ last_entry.?.header().checksum, checksum });
319
+ } else {
320
+ log.info("validated present aof entries.", .{});
321
+ }
322
+ }
323
+
324
+ pub fn reset(self: *AOF) void {
325
+ self.state = .{ .writing = .{ .unflushed = 0 } };
326
+ }
327
+
328
+ pub const ReplayClient = struct {
329
+ const Client = vsr.ClientType(tb.Operation, MessageBus);
330
+
331
+ client: *Client,
332
+ io: *IO,
333
+ message_pool: *MessagePool,
334
+ inflight_message: ?*Message.Request = null,
335
+
336
+ pub fn init(
337
+ io: *IO,
338
+ allocator: std.mem.Allocator,
339
+ time: vsr.time.Time,
340
+ cluster: u128,
341
+ addresses: []std.net.Address,
342
+ ) !ReplayClient {
343
+ assert(addresses.len > 0);
344
+ assert(addresses.len <= constants.replicas_max);
345
+
346
+ var message_pool = try allocator.create(MessagePool);
347
+ errdefer allocator.destroy(message_pool);
348
+
349
+ var client = try allocator.create(Client);
350
+ errdefer allocator.destroy(client);
351
+
352
+ message_pool.* = try MessagePool.init(allocator, .client);
353
+ errdefer message_pool.deinit(allocator);
354
+
355
+ client.* = try Client.init(
356
+ allocator,
357
+ time,
358
+ message_pool,
359
+ .{
360
+ // Use a deterministic client id, so that replaying the same AOF against
361
+ // different new clusters yields physically identical data files.
362
+ // (It must be based on release so that clusters which have upgraded at some
363
+ // point will need a separate "aof recover" invocation for each release.)
364
+ .id = constants.config.process.release.value,
365
+ .cluster = cluster,
366
+ .replica_count = @intCast(addresses.len),
367
+ .aof_recovery = true,
368
+ .message_bus_options = .{
369
+ .configuration = addresses,
370
+ .io = io,
371
+ .trace = null,
372
+ },
373
+ },
374
+ );
375
+ errdefer client.deinit(allocator);
376
+
377
+ client.register(register_callback, undefined);
378
+ while (client.request_inflight != null) {
379
+ client.tick();
380
+ try io.run_for_ns(constants.tick_ms * std.time.ns_per_ms);
381
+ }
382
+
383
+ return .{
384
+ .io = io,
385
+ .message_pool = message_pool,
386
+ .client = client,
387
+ };
388
+ }
389
+
390
+ pub fn deinit(self: *ReplayClient, allocator: std.mem.Allocator) void {
391
+ self.client.deinit(allocator);
392
+ self.message_pool.deinit(allocator);
393
+
394
+ allocator.destroy(self.client);
395
+ allocator.destroy(self.message_pool);
396
+ }
397
+
398
+ pub fn replay(self: *ReplayClient, iterator: *Iterator) !void {
399
+ var target: AOFEntry = undefined;
400
+
401
+ while (try iterator.next(&target)) |entry| {
402
+ // Skip replaying reserved messages and messages not marked for playback.
403
+ const header = entry.header();
404
+ assert(header.cluster == self.client.cluster);
405
+ if (!ReplayClient.replay_message(header)) continue;
406
+
407
+ const message = self.client.get_message().build(.request);
408
+ errdefer self.client.release_message(message.base());
409
+
410
+ assert(self.inflight_message == null);
411
+ self.inflight_message = message;
412
+
413
+ entry.to_message(message.base().build(.prepare));
414
+
415
+ message.header.* = .{
416
+ .client = self.client.id,
417
+ .cluster = self.client.cluster,
418
+ .command = .request,
419
+ .operation = header.operation,
420
+ .size = header.size,
421
+ .timestamp = header.timestamp,
422
+ .view = 0,
423
+ .parent = 0,
424
+ .session = 0,
425
+ .request = 0,
426
+ .release = header.release,
427
+ .previous_request_latency = 0,
428
+ };
429
+
430
+ self.client.raw_request(
431
+ ReplayClient.replay_callback,
432
+ @intFromPtr(self),
433
+ message,
434
+ );
435
+
436
+ // Process messages one by one for now
437
+ while (self.client.request_inflight != null) {
438
+ self.client.tick();
439
+ try self.io.run_for_ns(constants.tick_ms * std.time.ns_per_ms);
440
+ }
441
+ }
442
+ }
443
+
444
+ /// If a message should be replayed when recovering the AOF. This allows skipping over
445
+ /// things like lookup_ and queries, that have no affect on the final state, but take up
446
+ /// a lot of time when replaying.
447
+ pub fn replay_message(header: *Header.Prepare) bool {
448
+ if (header.operation.vsr_reserved()) return false;
449
+ const state_machine_operation = header.operation.cast(tb.Operation);
450
+ switch (state_machine_operation) {
451
+ .create_accounts, .create_transfers => return true,
452
+
453
+ // Pulses are replayed to handle pending transfer expiry.
454
+ .pulse => return true,
455
+
456
+ else => return false,
457
+ }
458
+ }
459
+
460
+ fn register_callback(
461
+ user_data: u128,
462
+ result: *const vsr.RegisterResult,
463
+ ) void {
464
+ _ = user_data;
465
+ _ = result;
466
+ }
467
+
468
+ fn replay_callback(
469
+ user_data: u128,
470
+ operation: vsr.Operation,
471
+ timestamp: u64,
472
+ result: []u8,
473
+ ) void {
474
+ _ = operation;
475
+ _ = timestamp;
476
+ _ = result;
477
+
478
+ const self: *ReplayClient = @ptrFromInt(@as(usize, @intCast(user_data)));
479
+ assert(self.inflight_message != null);
480
+ self.inflight_message = null;
481
+ }
482
+ };
483
+
484
+ /// Return an iterator into an AOF, to read entries one by one. This also validates that
485
+ /// both the header and body checksums of the read entry are valid, and that all checksums
486
+ /// chain correctly.
487
+ pub const Iterator = struct {
488
+ io: *IO,
489
+ file_descriptor: IO.fd_t,
490
+ size: u64,
491
+ offset: u64 = 0,
492
+
493
+ validate_chain: bool = true,
494
+ last_checksum: ?u128 = null,
495
+
496
+ pub fn init(io: *IO, path: []const u8) !Iterator {
497
+ const file = try std.fs.cwd().openFile(path, .{ .mode = .read_only });
498
+ errdefer file.close();
499
+
500
+ const size = (try file.stat()).size;
501
+
502
+ return Iterator{ .io = io, .file_descriptor = file.handle, .size = size };
503
+ }
504
+
505
+ pub fn next(it: *Iterator, target: *AOFEntry) !?*AOFEntry {
506
+ if (it.offset >= it.size) return null;
507
+
508
+ const buf = std.mem.asBytes(target);
509
+ const bytes_read = try it.io.aof_blocking_pread_all(
510
+ it.file_descriptor,
511
+ buf,
512
+ it.offset,
513
+ );
514
+
515
+ // size_disk relies on information that was stored on disk, so further verify we
516
+ // have read at least the minimum permissible.
517
+ if (bytes_read < target.size_minimum() or
518
+ bytes_read < target.size_disk())
519
+ {
520
+ return error.AOFShortRead;
521
+ }
522
+
523
+ if (target.magic_number != magic_number) {
524
+ return error.AOFMagicNumberMismatch;
525
+ }
526
+
527
+ const header = target.header();
528
+ if (!header.valid_checksum()) {
529
+ return error.AOFChecksumMismatch;
530
+ }
531
+
532
+ if (!header.valid_checksum_body(target.message[@sizeOf(Header)..header.size])) {
533
+ return error.AOFBodyChecksumMismatch;
534
+ }
535
+
536
+ // Ensure this file has a consistent hash chain
537
+ if (it.validate_chain) {
538
+ if (it.last_checksum != null and it.last_checksum.? != header.parent) {
539
+ return error.AOFChecksumChainMismatch;
540
+ }
541
+ }
542
+
543
+ it.last_checksum = header.checksum;
544
+
545
+ it.offset += target.size_disk();
546
+
547
+ return target;
548
+ }
549
+
550
+ pub fn reset(it: *Iterator) !void {
551
+ it.offset = 0;
552
+ }
553
+
554
+ pub fn close(it: *Iterator) void {
555
+ it.io.aof_blocking_close(it.file_descriptor);
556
+ }
557
+
558
+ /// Try skip ahead to the next entry in a potentially corrupted AOF file
559
+ /// by searching from our current position for the next magic_number, seeking
560
+ /// to it, and setting our internal position correctly.
561
+ pub fn skip(it: *Iterator, allocator: std.mem.Allocator, count: usize) !void {
562
+ var skip_buffer = try allocator.alloc(u8, 1 * MiB);
563
+ defer allocator.free(skip_buffer);
564
+
565
+ while (it.offset < it.size) {
566
+ const bytes_read = try it.io.aof_blocking_pread_all(
567
+ it.file_descriptor,
568
+ skip_buffer,
569
+ it.offset,
570
+ );
571
+ const offset = std.mem.indexOfPos(
572
+ u8,
573
+ skip_buffer[0..bytes_read],
574
+ count,
575
+ std.mem.asBytes(&magic_number),
576
+ );
577
+
578
+ if (offset) |offset_bytes| {
579
+ it.offset += offset_bytes;
580
+ break;
581
+ } else {
582
+ it.offset += skip_buffer.len;
583
+ }
584
+ }
585
+ }
586
+ };
587
+
588
+ pub fn merge(
589
+ io: *IO,
590
+ allocator: std.mem.Allocator,
591
+ input_paths: []const []const u8,
592
+ output_path: []const u8,
593
+ ) !void {
594
+ const stdout = std.io.getStdOut().writer();
595
+
596
+ var aofs: [constants.members_max]Iterator = undefined;
597
+ var aof_count: usize = 0;
598
+ defer for (aofs[0..aof_count]) |*it| it.close();
599
+
600
+ assert(input_paths.len < aofs.len);
601
+
602
+ const EntryInfo = struct {
603
+ aof: *Iterator,
604
+ index: u64,
605
+ size: u64,
606
+ checksum: u128,
607
+ parent: u128,
608
+ };
609
+
610
+ var message_pool = try MessagePool.init_capacity(allocator, 1);
611
+ defer message_pool.deinit(allocator);
612
+
613
+ var entries_by_parent = std.AutoHashMap(u128, EntryInfo).init(allocator);
614
+ defer entries_by_parent.deinit();
615
+
616
+ var target = try allocator.create(AOFEntry);
617
+ defer allocator.destroy(target);
618
+
619
+ const dir_fd = try IO.open_dir(std.fs.path.dirname(output_path) orelse ".");
620
+ defer std.posix.close(dir_fd);
621
+
622
+ for (input_paths) |input_path| {
623
+ aofs[aof_count] = try Iterator.init(io, input_path);
624
+ aof_count += 1;
625
+ }
626
+
627
+ var output_aof = try AOF.init(io, output_path);
628
+
629
+ // First, iterate all AOFs and build a mapping between parent checksums and where the
630
+ // entry is located.
631
+ try stdout.print("Building checksum map...\n", .{});
632
+ var current_parent: ?u128 = null;
633
+ for (aofs[0..aof_count], 0..) |*aof, i| {
634
+ // While building our checksum map, don't validate our hash chain. We might have a
635
+ // file that has a broken chain, but still contains valid data that can be used for
636
+ // recovery with other files.
637
+ aof.validate_chain = false;
638
+
639
+ while (true) {
640
+ var entry = aof.next(target) catch |err| {
641
+ switch (err) {
642
+ // If our magic number is corrupted, skip to the next entry.
643
+ error.AOFMagicNumberMismatch => {
644
+ try stdout.print(
645
+ "{s}: Skipping entry with corrupted magic number.\n",
646
+ .{input_paths[i]},
647
+ );
648
+ try aof.skip(allocator, 0);
649
+ continue;
650
+ },
651
+
652
+ // Otherwise, we need to skip over our valid magic number, to the next
653
+ // one (since the pointer is only updated after a successful read,
654
+ // calling .skip(0)) will not do anything here.
655
+ error.AOFChecksumMismatch, error.AOFBodyChecksumMismatch => {
656
+ try stdout.print(
657
+ "{s}: Skipping entry with corrupted checksum.\n",
658
+ .{input_paths[i]},
659
+ );
660
+ try aof.skip(allocator, 1);
661
+ continue;
662
+ },
663
+
664
+ error.AOFShortRead => {
665
+ try stdout.print(
666
+ "{s}: Skipping truncated entry at EOF.\n",
667
+ .{input_paths[i]},
668
+ );
669
+ break;
670
+ },
671
+
672
+ else => @panic("Unexpected Error"),
673
+ }
674
+ break;
675
+ };
676
+
677
+ if (entry == null) {
678
+ break;
679
+ }
680
+
681
+ const header = entry.?.header();
682
+ const checksum = header.checksum;
683
+ const parent = header.parent;
684
+
685
+ if (current_parent == null) {
686
+ try stdout.print(
687
+ "The root checksum will be {x:0>32} from {s}.\n",
688
+ .{ parent, input_paths[i] },
689
+ );
690
+ current_parent = parent;
691
+ }
692
+
693
+ const v = try entries_by_parent.getOrPut(parent);
694
+ if (v.found_existing) {
695
+ // If the entry already exists in our mapping, and it's identical, that's
696
+ // OK. If it's not however, it indicates the log has been forked somehow.
697
+ assert(v.value_ptr.checksum == checksum);
698
+ } else {
699
+ v.value_ptr.* = .{
700
+ .aof = aof,
701
+ .index = aof.offset - entry.?.size_disk(),
702
+ .size = entry.?.size_disk(),
703
+ .checksum = checksum,
704
+ .parent = parent,
705
+ };
706
+ }
707
+ }
708
+ try stdout.print(
709
+ "Finished processing {s} - extracted {} usable entries.\n",
710
+ .{ input_paths[i], entries_by_parent.count() },
711
+ );
712
+ }
713
+
714
+ // Next, start from our root checksum, walk down the hash chain until there's nothing
715
+ // left. We currently take the root checksum as the first entry in the first AOF.
716
+ while (entries_by_parent.count() > 0) {
717
+ const message = message_pool.get_message(.prepare);
718
+ defer message_pool.unref(message);
719
+
720
+ assert(current_parent != null);
721
+ const entry = entries_by_parent.getPtr(current_parent.?) orelse unreachable;
722
+
723
+ const buf = std.mem.asBytes(target)[0..entry.size];
724
+ const bytes_read = try io.aof_blocking_pread_all(
725
+ entry.aof.file_descriptor,
726
+ buf,
727
+ entry.index,
728
+ );
729
+
730
+ // None of these conditions should happen, but double check them to prevent TOCTOUs.
731
+ if (bytes_read != target.size_disk()) {
732
+ @panic("unexpected short read while reading AOF entry");
733
+ }
734
+
735
+ const header = target.header();
736
+ if (!header.valid_checksum()) {
737
+ @panic("unexpected checksum error while merging");
738
+ }
739
+
740
+ if (!header.valid_checksum_body(target.message[@sizeOf(Header)..header.size])) {
741
+ @panic("unexpected body checksum error while merging");
742
+ }
743
+
744
+ target.to_message(message);
745
+ try output_aof.write(
746
+ message,
747
+ );
748
+
749
+ current_parent = entry.checksum;
750
+ _ = entries_by_parent.remove(entry.parent);
751
+ }
752
+
753
+ output_aof.close();
754
+
755
+ // Validate the newly created output file
756
+ try stdout.print("Validating Output {s}\n", .{output_path});
757
+
758
+ var it = try Iterator.init(io, output_path);
759
+ defer it.close();
760
+
761
+ var first_checksum: ?u128 = null;
762
+ var last_checksum: ?u128 = null;
763
+
764
+ while (try it.next(target)) |entry| {
765
+ const header = entry.header();
766
+ if (first_checksum == null) {
767
+ first_checksum = header.checksum;
768
+ }
769
+
770
+ last_checksum = header.checksum;
771
+ }
772
+
773
+ try stdout.print(
774
+ "AOF {s} validated. Starting checksum: {x:0>32} Ending checksum: {x:0>32}\n",
775
+ .{ output_path, first_checksum orelse 0, last_checksum orelse 0 },
776
+ );
777
+ }
778
+ };
779
+ }
780
+
781
+ const testing = std.testing;
782
+
783
+ test "aof write / read" {
784
+ const IO = @import("io.zig").IO;
785
+ const AOF = AOFType(IO);
786
+ const AOFIterator = AOF.Iterator;
787
+
788
+ const aof_file = "test.aof";
789
+ std.fs.cwd().deleteFile(aof_file) catch {};
790
+ defer std.fs.cwd().deleteFile(aof_file) catch {};
791
+
792
+ const allocator = std.testing.allocator;
793
+
794
+ var io = try IO.init(32, 0);
795
+ defer io.deinit();
796
+
797
+ const dir_fd = try IO.open_dir(".");
798
+ defer std.posix.close(dir_fd);
799
+
800
+ var aof = try AOF.init(&io, aof_file);
801
+
802
+ var message_pool = try MessagePool.init_capacity(allocator, 2);
803
+ defer message_pool.deinit(allocator);
804
+
805
+ const demo_message = message_pool.get_message(.prepare);
806
+ defer message_pool.unref(demo_message);
807
+
808
+ const target = try allocator.create(AOFEntry);
809
+ defer allocator.destroy(target);
810
+
811
+ const demo_payload = "hello world";
812
+
813
+ // The command / operation used here don't matter - we verify things bitwise.
814
+ demo_message.header.* = .{
815
+ .op = 0,
816
+ .commit = 0,
817
+ .view = 0,
818
+ .client = 0,
819
+ .request = 0,
820
+ .parent = 0,
821
+ .request_checksum = 0,
822
+ .cluster = 0,
823
+ .timestamp = 0,
824
+ .checkpoint_id = 0,
825
+ .release = vsr.Release.minimum,
826
+ .command = .prepare,
827
+ .operation = @enumFromInt(4),
828
+ .size = @intCast(@sizeOf(Header) + demo_payload.len),
829
+ };
830
+
831
+ stdx.copy_disjoint(.exact, u8, demo_message.body_used(), demo_payload);
832
+ demo_message.header.set_checksum_body(demo_payload);
833
+ demo_message.header.set_checksum();
834
+
835
+ try aof.write(demo_message);
836
+ aof.close();
837
+
838
+ var it = try AOFIterator.init(&io, aof_file);
839
+ defer it.close();
840
+
841
+ const read_entry = (try it.next(target)).?;
842
+
843
+ // Check that to_message also works as expected
844
+ const read_message = message_pool.get_message(.prepare);
845
+ defer message_pool.unref(read_message);
846
+
847
+ read_entry.to_message(read_message);
848
+ try testing.expect(std.mem.eql(
849
+ u8,
850
+ demo_message.buffer[0..demo_message.header.size],
851
+ read_message.buffer[0..read_message.header.size],
852
+ ));
853
+
854
+ try testing.expect(std.mem.eql(
855
+ u8,
856
+ demo_message.buffer[0..demo_message.header.size],
857
+ read_entry.message[0..read_entry.header().size],
858
+ ));
859
+
860
+ // Ensure our iterator works correctly and stops at EOF.
861
+ try testing.expect((try it.next(target)) == null);
862
+ }
863
+
864
+ test "aof merge" {}
865
+
866
+ const CLIArgs = union(enum) {
867
+ recover: struct {
868
+ cluster: u128,
869
+ addresses: []const u8,
870
+ @"--": void,
871
+ path: []const u8,
872
+ },
873
+ debug: struct {
874
+ @"--": void,
875
+ path: []const u8,
876
+ },
877
+ merge: struct {
878
+ @"--": void,
879
+ // (One or more AOF file paths.)
880
+ },
881
+
882
+ pub const help =
883
+ \\Usage:
884
+ \\
885
+ \\ aof [-h | --help]
886
+ \\
887
+ \\ aof recover --cluster=<integer> --addresses=<addresses> <path>
888
+ \\
889
+ \\ aof debug <path>
890
+ \\
891
+ \\ aof merge -- path.aof ... <path.aof n>
892
+ \\
893
+ \\
894
+ \\Commands:
895
+ \\
896
+ \\ recover Recover a recorded AOF file at <path> to a TigerBeetle cluster running
897
+ \\ at <addresses>. Said cluster must be running with aof_recovery = true
898
+ \\ and have the same cluster ID as the source. The AOF must have a consistent
899
+ \\ hash chain, which can be ensured using the `merge` subcommand.
900
+ \\
901
+ \\ debug Print all entries that have been recorded in the AOF file at <path>
902
+ \\ to stdout. Checksums are verified, and aof will panic if an invalid
903
+ \\ checksum is encountered, so this can be used to check the validity
904
+ \\ of an AOF file. Prints a final hash of all data entries in the AOF.
905
+ \\
906
+ \\ merge Walk through multiple AOF files, extracting entries from each one
907
+ \\ that pass validation, and build a single valid AOF. The first entry
908
+ \\ of the first specified AOF file will be considered the root hash.
909
+ \\ Can also be used to merge multiple incomplete AOF files into one,
910
+ \\ or re-order a single AOF file. Will output to `merged.aof`.
911
+ \\
912
+ \\ NB: Make sure to run merge with at least half of the replicas' AOFs,
913
+ \\ otherwise entries might be lost.
914
+ \\
915
+ \\Options:
916
+ \\
917
+ \\ -h, --help
918
+ \\ Print this help message and exit.
919
+ \\
920
+ ;
921
+ };
922
+
923
+ pub fn main() !void {
924
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
925
+ const allocator = gpa.allocator();
926
+
927
+ var time_os: vsr.time.TimeOS = .{};
928
+ const time = time_os.time();
929
+
930
+ var args_iterator = try std.process.argsWithAllocator(allocator);
931
+ defer args_iterator.deinit();
932
+
933
+ const args = stdx.flags(&args_iterator, CLIArgs);
934
+
935
+ const target = try allocator.create(AOFEntry);
936
+ defer allocator.destroy(target);
937
+
938
+ const IO = @import("io.zig").IO;
939
+ var io = try IO.init(32, 0);
940
+ defer io.deinit();
941
+
942
+ const AOF = AOFType(IO);
943
+ const AOFReplayClient = AOF.ReplayClient;
944
+ const AOFIterator = AOF.Iterator;
945
+
946
+ switch (args) {
947
+ .recover => |command| {
948
+ var it = try AOFIterator.init(&io, command.path);
949
+ defer it.close();
950
+
951
+ var addresses_buffer: [constants.replicas_max]std.net.Address = undefined;
952
+ const addresses_parsed = try vsr.parse_addresses(command.addresses, &addresses_buffer);
953
+ var replay =
954
+ try AOFReplayClient.init(&io, allocator, time, command.cluster, addresses_parsed);
955
+ defer replay.deinit(allocator);
956
+
957
+ try replay.replay(&it);
958
+ },
959
+ .debug => |command| {
960
+ var it = try AOFIterator.init(&io, command.path);
961
+ defer it.close();
962
+
963
+ var data_checksum: [32]u8 = undefined;
964
+ var blake3 = std.crypto.hash.Blake3.init(.{});
965
+
966
+ const stdout = std.io.getStdOut().writer();
967
+ while (try it.next(target)) |entry| {
968
+ const header = entry.header();
969
+ if (!AOFReplayClient.replay_message(header)) continue;
970
+
971
+ try stdout.print("{}\n", .{
972
+ header,
973
+ });
974
+
975
+ // The body isn't the only important information, there's also the operation
976
+ // and the timestamp which are in the header. Include those in our hash too.
977
+ blake3.update(std.mem.asBytes(&header.checksum_body));
978
+ blake3.update(std.mem.asBytes(&header.timestamp));
979
+ blake3.update(std.mem.asBytes(&header.operation));
980
+ }
981
+ blake3.final(data_checksum[0..]);
982
+ try stdout.print(
983
+ "\nData checksum chain: {}\n",
984
+ .{@as(u128, @bitCast(data_checksum[0..@sizeOf(u128)].*))},
985
+ );
986
+ },
987
+ .merge => |_| {
988
+ var paths: [constants.members_max][:0]const u8 = undefined;
989
+ var paths_count: u32 = 0;
990
+ for (&paths) |*path| {
991
+ path.* = args_iterator.next() orelse break;
992
+ paths_count += 1;
993
+ }
994
+ if (paths_count == 0) vsr.fatal(.cli, "missing paths", .{});
995
+ if (args_iterator.next()) |_| vsr.fatal(.cli, "too many paths", .{});
996
+
997
+ try AOF.merge(&io, allocator, paths[0..paths_count], "prepared.aof");
998
+ },
999
+ }
1000
+ }