tigerbeetle 0.0.40 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -25
  3. data/README.md +670 -80
  4. data/docs/migration.md +201 -0
  5. data/sig/tigerbeetle.rbs +271 -0
  6. data/src/ext/tigerbeetle/extconf.rb +47 -0
  7. data/src/ext/tigerbeetle/lib/aarch64-linux-gnu.2.27/libtb_client.so +0 -0
  8. data/src/ext/tigerbeetle/lib/aarch64-linux-musl/libtb_client.so +0 -0
  9. data/src/ext/tigerbeetle/lib/aarch64-macos/libtb_client.dylib +0 -0
  10. data/src/ext/tigerbeetle/lib/x86_64-linux-gnu.2.27/libtb_client.so +0 -0
  11. data/src/ext/tigerbeetle/lib/x86_64-linux-musl/libtb_client.so +0 -0
  12. data/src/ext/tigerbeetle/lib/x86_64-macos/libtb_client.dylib +0 -0
  13. data/src/ext/tigerbeetle/lib/x86_64-windows/tb_client.dll +0 -0
  14. data/src/ext/tigerbeetle/rb_tb_gen.h +458 -0
  15. data/{ext/tb_client/tigerbeetle/src/clients/rust/assets → src/ext/tigerbeetle}/tb_client.h +18 -16
  16. data/src/ext/tigerbeetle/tigerbeetle.c +310 -0
  17. data/src/tigerbeetle/bindings.rb +347 -0
  18. data/src/tigerbeetle/client.rb +129 -0
  19. data/src/tigerbeetle/completion_dispatcher.rb +108 -0
  20. data/src/tigerbeetle/id.rb +40 -0
  21. data/src/tigerbeetle/tb.rb +3 -0
  22. data/src/tigerbeetle/version.rb +3 -0
  23. data/src/tigerbeetle.rb +39 -0
  24. metadata +33 -350
  25. data/CHANGELOG.md +0 -162
  26. data/ext/tb_client/extconf.rb +0 -41
  27. data/ext/tb_client/tigerbeetle/LICENSE +0 -177
  28. data/ext/tb_client/tigerbeetle/build.zig +0 -2296
  29. data/ext/tb_client/tigerbeetle/src/aof.zig +0 -1000
  30. data/ext/tb_client/tigerbeetle/src/build/fetch.zig +0 -112
  31. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +0 -808
  32. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +0 -1283
  33. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +0 -1704
  34. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +0 -341
  35. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +0 -1450
  36. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +0 -1659
  37. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +0 -406
  38. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +0 -1092
  39. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +0 -286
  40. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +0 -158
  41. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +0 -229
  42. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +0 -110
  43. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +0 -386
  44. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +0 -34
  45. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +0 -281
  46. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +0 -312
  47. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +0 -138
  48. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +0 -466
  49. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +0 -157
  50. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +0 -90
  51. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +0 -203
  52. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +0 -79
  53. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +0 -542
  54. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +0 -109
  55. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +0 -86
  56. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +0 -370
  57. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +0 -386
  58. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +0 -167
  59. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +0 -126
  60. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +0 -996
  61. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +0 -748
  62. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +0 -3238
  63. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +0 -1718
  64. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +0 -190
  65. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +0 -104
  66. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +0 -75
  67. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +0 -522
  68. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +0 -267
  69. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +0 -3
  70. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +0 -379
  71. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +0 -131
  72. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +0 -63
  73. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +0 -588
  74. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +0 -73
  75. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +0 -106
  76. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +0 -305
  77. data/ext/tb_client/tigerbeetle/src/config.zig +0 -296
  78. data/ext/tb_client/tigerbeetle/src/constants.zig +0 -790
  79. data/ext/tb_client/tigerbeetle/src/copyhound.zig +0 -202
  80. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +0 -72
  81. data/ext/tb_client/tigerbeetle/src/direction.zig +0 -120
  82. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +0 -158
  83. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +0 -156
  84. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +0 -252
  85. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +0 -313
  86. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +0 -87
  87. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +0 -63
  88. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +0 -47
  89. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +0 -28
  90. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +0 -61
  91. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +0 -169
  92. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +0 -46
  93. data/ext/tb_client/tigerbeetle/src/ewah.zig +0 -445
  94. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +0 -128
  95. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +0 -171
  96. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +0 -179
  97. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +0 -662
  98. data/ext/tb_client/tigerbeetle/src/io/common.zig +0 -155
  99. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +0 -1093
  100. data/ext/tb_client/tigerbeetle/src/io/linux.zig +0 -1880
  101. data/ext/tb_client/tigerbeetle/src/io/test.zig +0 -1005
  102. data/ext/tb_client/tigerbeetle/src/io/windows.zig +0 -1598
  103. data/ext/tb_client/tigerbeetle/src/io.zig +0 -34
  104. data/ext/tb_client/tigerbeetle/src/iops.zig +0 -134
  105. data/ext/tb_client/tigerbeetle/src/list.zig +0 -236
  106. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +0 -848
  107. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +0 -179
  108. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +0 -424
  109. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +0 -420
  110. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +0 -2114
  111. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +0 -185
  112. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +0 -1146
  113. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +0 -1102
  114. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +0 -200
  115. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +0 -1495
  116. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +0 -739
  117. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +0 -166
  118. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +0 -754
  119. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +0 -1294
  120. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +0 -510
  121. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +0 -1241
  122. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -628
  123. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +0 -247
  124. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +0 -116
  125. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +0 -543
  126. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +0 -938
  127. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +0 -293
  128. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +0 -359
  129. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +0 -99
  130. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +0 -17
  131. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +0 -962
  132. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +0 -617
  133. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +0 -84
  134. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +0 -1500
  135. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -149
  136. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -7
  137. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +0 -865
  138. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +0 -607
  139. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +0 -843
  140. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +0 -90
  141. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +0 -40
  142. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +0 -629
  143. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +0 -933
  144. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +0 -534
  145. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +0 -469
  146. data/ext/tb_client/tigerbeetle/src/message_bus.zig +0 -1219
  147. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +0 -936
  148. data/ext/tb_client/tigerbeetle/src/message_pool.zig +0 -343
  149. data/ext/tb_client/tigerbeetle/src/multiversion.zig +0 -2195
  150. data/ext/tb_client/tigerbeetle/src/queue.zig +0 -390
  151. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +0 -201
  152. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +0 -1356
  153. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +0 -496
  154. data/ext/tb_client/tigerbeetle/src/repl.zig +0 -1034
  155. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +0 -973
  156. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +0 -1866
  157. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +0 -304
  158. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +0 -227
  159. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +0 -658
  160. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +0 -466
  161. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +0 -1058
  162. data/ext/tb_client/tigerbeetle/src/scripts.zig +0 -105
  163. data/ext/tb_client/tigerbeetle/src/shell.zig +0 -1195
  164. data/ext/tb_client/tigerbeetle/src/stack.zig +0 -260
  165. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +0 -911
  166. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +0 -2079
  167. data/ext/tb_client/tigerbeetle/src/state_machine.zig +0 -4872
  168. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +0 -288
  169. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +0 -3128
  170. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +0 -82
  171. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +0 -157
  172. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +0 -292
  173. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +0 -65
  174. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +0 -1414
  175. data/ext/tb_client/tigerbeetle/src/stdx/huge_page_allocator.zig +0 -115
  176. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +0 -92
  177. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +0 -677
  178. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +0 -336
  179. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +0 -511
  180. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +0 -112
  181. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +0 -1163
  182. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +0 -142
  183. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +0 -361
  184. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +0 -275
  185. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +0 -295
  186. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +0 -436
  187. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +0 -48
  188. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +0 -402
  189. data/ext/tb_client/tigerbeetle/src/storage.zig +0 -489
  190. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +0 -180
  191. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +0 -146
  192. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +0 -53
  193. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +0 -61
  194. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +0 -76
  195. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +0 -110
  196. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +0 -412
  197. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +0 -331
  198. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -458
  199. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +0 -1198
  200. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +0 -128
  201. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +0 -181
  202. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +0 -144
  203. data/ext/tb_client/tigerbeetle/src/testing/id.zig +0 -97
  204. data/ext/tb_client/tigerbeetle/src/testing/io.zig +0 -317
  205. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +0 -126
  206. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +0 -533
  207. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +0 -154
  208. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +0 -389
  209. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +0 -1247
  210. data/ext/tb_client/tigerbeetle/src/testing/table.zig +0 -249
  211. data/ext/tb_client/tigerbeetle/src/testing/time.zig +0 -98
  212. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +0 -212
  213. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +0 -26
  214. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +0 -579
  215. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +0 -39
  216. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +0 -214
  217. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +0 -34
  218. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +0 -785
  219. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +0 -543
  220. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +0 -181
  221. data/ext/tb_client/tigerbeetle/src/tidy.zig +0 -1449
  222. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +0 -227
  223. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +0 -1069
  224. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +0 -1422
  225. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +0 -1658
  226. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +0 -518
  227. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +0 -36
  228. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +0 -646
  229. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +0 -958
  230. data/ext/tb_client/tigerbeetle/src/time.zig +0 -236
  231. data/ext/tb_client/tigerbeetle/src/trace/event.zig +0 -745
  232. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +0 -462
  233. data/ext/tb_client/tigerbeetle/src/trace.zig +0 -556
  234. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +0 -321
  235. data/ext/tb_client/tigerbeetle/src/vopr.zig +0 -1785
  236. data/ext/tb_client/tigerbeetle/src/vortex.zig +0 -101
  237. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +0 -473
  238. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +0 -208
  239. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +0 -43
  240. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +0 -768
  241. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +0 -532
  242. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +0 -338
  243. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +0 -1019
  244. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +0 -279
  245. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +0 -1381
  246. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +0 -315
  247. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +0 -1460
  248. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +0 -757
  249. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +0 -797
  250. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +0 -2586
  251. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +0 -308
  252. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +0 -1777
  253. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +0 -715
  254. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +0 -185
  255. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +0 -333
  256. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +0 -12356
  257. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +0 -416
  258. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +0 -165
  259. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +0 -2928
  260. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +0 -1075
  261. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +0 -1603
  262. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -484
  263. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +0 -405
  264. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -355
  265. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +0 -29
  266. data/ext/tb_client/tigerbeetle/src/vsr.zig +0 -1727
  267. data/lib/tb_client/shared_lib.rb +0 -66
  268. data/lib/tb_client.rb +0 -282
  269. data/lib/tigerbeetle/account.rb +0 -38
  270. data/lib/tigerbeetle/account_balance.rb +0 -23
  271. data/lib/tigerbeetle/account_filter.rb +0 -31
  272. data/lib/tigerbeetle/atomic_counter.rb +0 -14
  273. data/lib/tigerbeetle/client.rb +0 -214
  274. data/lib/tigerbeetle/converters/account.rb +0 -63
  275. data/lib/tigerbeetle/converters/account_balance.rb +0 -31
  276. data/lib/tigerbeetle/converters/account_filter.rb +0 -32
  277. data/lib/tigerbeetle/converters/base.rb +0 -35
  278. data/lib/tigerbeetle/converters/create_accounts_result.rb +0 -21
  279. data/lib/tigerbeetle/converters/create_transfers_result.rb +0 -21
  280. data/lib/tigerbeetle/converters/query_filter.rb +0 -33
  281. data/lib/tigerbeetle/converters/time.rb +0 -23
  282. data/lib/tigerbeetle/converters/transfer.rb +0 -64
  283. data/lib/tigerbeetle/converters/uint_128.rb +0 -24
  284. data/lib/tigerbeetle/converters.rb +0 -12
  285. data/lib/tigerbeetle/error.rb +0 -4
  286. data/lib/tigerbeetle/id.rb +0 -30
  287. data/lib/tigerbeetle/platforms.rb +0 -9
  288. data/lib/tigerbeetle/query_filter.rb +0 -31
  289. data/lib/tigerbeetle/request.rb +0 -7
  290. data/lib/tigerbeetle/transfer.rb +0 -40
  291. data/lib/tigerbeetle/version.rb +0 -4
  292. data/lib/tigerbeetle.rb +0 -13
  293. data/tigerbeetle.gemspec +0 -60
@@ -1,1603 +0,0 @@
1
- //! SuperBlock invariants:
2
- //!
3
- //! * vsr_state
4
- //! - vsr_state.replica and vsr_state.replica_count are immutable for now.
5
- //! - vsr_state.checkpoint.header.op is initially 0 (for a newly-formatted replica).
6
- //! - vsr_state.checkpoint.header.op ≤ vsr_state.commit_max
7
- //! - vsr_state.checkpoint.header.op_before ≤ vsr_state.checkpoint.header.op
8
- //! - vsr_state.log_view ≤ vsr_state.view
9
- //! - vsr_state.sync_op_min ≤ vsr_state.sync_op_max
10
- //!
11
- //! - vsr_state.checkpoint.manifest_block_count = 0 implies:
12
- //! vsr_state.checkpoint.manifest_oldest_address=0
13
- //! vsr_state.checkpoint.manifest_oldest_checksum=0
14
- //! vsr_state.checkpoint.manifest_newest_address=0
15
- //! vsr_state.checkpoint.manifest_newest_checksum=0
16
- //! vsr_state.checkpoint.manifest_oldest_address=0
17
- //!
18
- //! - vsr_state.checkpoint.manifest_block_count > 0 implies:
19
- //! vsr_state.checkpoint.manifest_oldest_address>0
20
- //! vsr_state.checkpoint.manifest_newest_address>0
21
- //!
22
- //! - checkpoint() must advance the superblock's vsr_state.checkpoint.header.op.
23
- //! - view_change() must not advance the superblock's vsr_state.checkpoint.header.op.
24
- //! - The following are monotonically increasing:
25
- //! - vsr_state.log_view
26
- //! - vsr_state.view
27
- //! - vsr_state.commit_max
28
- //! - vsr_state.checkpoint.header.op may backtrack due to state sync.
29
- //!
30
- const std = @import("std");
31
- const assert = std.debug.assert;
32
- const maybe = stdx.maybe;
33
- const mem = std.mem;
34
- const meta = std.meta;
35
-
36
- const constants = @import("../constants.zig");
37
- const stdx = @import("stdx");
38
- const vsr = @import("../vsr.zig");
39
- const log = std.log.scoped(.superblock);
40
-
41
- pub const Quorums = @import("superblock_quorums.zig").QuorumsType(.{
42
- .superblock_copies = constants.superblock_copies,
43
- });
44
-
45
- pub const SuperBlockVersion: u16 =
46
- // Make sure that data files created by development builds are distinguished through version.
47
- if (constants.config.process.release.value == vsr.Release.minimum.value) 0 else 2;
48
-
49
- const view_headers_reserved_size = constants.sector_size -
50
- ((constants.view_headers_max * @sizeOf(vsr.Header)) % constants.sector_size);
51
-
52
- // Fields are aligned to work as an extern or packed struct.
53
- pub const SuperBlockHeader = extern struct {
54
- checksum: u128 = undefined,
55
- checksum_padding: u128 = 0,
56
-
57
- /// Protects against misdirected reads at startup.
58
- /// For example, if multiple reads are all misdirected to a single copy of the superblock.
59
- /// Excluded from the checksum calculation to ensure that all copies have the same checksum.
60
- /// This simplifies writing and comparing multiple copies.
61
- /// TODO: u8 should be enough here, we use u16 only for alignment.
62
- copy: u16 = 0,
63
-
64
- /// The version of the superblock format in use, reserved for major breaking changes.
65
- version: u16,
66
-
67
- /// The release that the data file was originally formatted by.
68
- /// (Upgrades do not update this field.)
69
- release_format: vsr.Release,
70
-
71
- /// A monotonically increasing counter to locate the latest superblock at startup.
72
- sequence: u64,
73
-
74
- /// Protects against writing to or reading from the wrong data file.
75
- cluster: u128,
76
-
77
- /// The checksum of the previous superblock to hash chain across sequence numbers.
78
- parent: u128,
79
- parent_padding: u128 = 0,
80
-
81
- /// State stored on stable storage for the Viewstamped Replication consensus protocol.
82
- vsr_state: VSRState,
83
-
84
- /// Reserved for future minor features (e.g. changing a compression algorithm).
85
- flags: u64 = 0,
86
-
87
- /// The number of headers in view_headers_all.
88
- view_headers_count: u32,
89
-
90
- reserved: [1940]u8 = @splat(0),
91
-
92
- /// SV/DVC header suffix. Headers are ordered from high-to-low op.
93
- /// Unoccupied headers (after view_headers_count) are zeroed.
94
- ///
95
- /// When `vsr_state.log_view < vsr_state.view`, the headers are for a DVC.
96
- /// When `vsr_state.log_view = vsr_state.view`, the headers are for a SV.
97
- view_headers_all: [constants.view_headers_max]vsr.Header.Prepare,
98
- view_headers_reserved: [view_headers_reserved_size]u8 = @splat(0),
99
-
100
- comptime {
101
- assert(@sizeOf(SuperBlockHeader) % constants.sector_size == 0);
102
- assert(@divExact(@sizeOf(SuperBlockHeader), constants.sector_size) >= 2);
103
- assert(@offsetOf(SuperBlockHeader, "parent") % @sizeOf(u256) == 0);
104
- assert(@offsetOf(SuperBlockHeader, "vsr_state") % @sizeOf(u256) == 0);
105
- assert(@offsetOf(SuperBlockHeader, "view_headers_all") == constants.sector_size);
106
- // Assert that there is no implicit padding in the struct.
107
- assert(stdx.no_padding(SuperBlockHeader));
108
- }
109
-
110
- pub const VSRState = extern struct {
111
- checkpoint: CheckpointState,
112
-
113
- /// Globally unique identifier of the replica, must be non-zero.
114
- replica_id: u128,
115
-
116
- members: vsr.Members,
117
-
118
- /// The highest operation up to which we may commit.
119
- commit_max: u64,
120
-
121
- /// See `sync_op_max`.
122
- sync_op_min: u64,
123
-
124
- /// When zero, all of the grid blocks and replies are synced.
125
- /// (When zero, `sync_op_min` is also zero.)
126
- ///
127
- /// When nonzero, we must repair grid-blocks/client-replies that would have been written
128
- /// during the commits between `sync_op_min` and `sync_op_max` (inclusive).
129
- /// (Those grid-blocks and client-replies were not written normally because we "skipped"
130
- /// past them via state sync.)
131
- sync_op_max: u64,
132
-
133
- /// This field was used by the old state sync protocol, but is now unused and is always set
134
- /// to zero.
135
- /// TODO: rename to reserved and assert that it is zero, once it is actually set to zero
136
- /// in all superblocks (in the next release).
137
- sync_view: u32 = 0,
138
-
139
- /// The last view in which the replica's status was normal.
140
- log_view: u32,
141
-
142
- /// The view number of the replica.
143
- view: u32,
144
-
145
- /// Number of replicas (determines sizes of the quorums), part of VSR configuration.
146
- replica_count: u8,
147
-
148
- reserved: [779]u8 = @splat(0),
149
-
150
- comptime {
151
- assert(@sizeOf(VSRState) == 2048);
152
- // Assert that there is no implicit padding in the struct.
153
- assert(stdx.no_padding(VSRState));
154
- }
155
-
156
- pub fn root(options: struct {
157
- cluster: u128,
158
- replica_id: u128,
159
- members: vsr.Members,
160
- replica_count: u8,
161
- release: vsr.Release,
162
- view: u32,
163
- }) VSRState {
164
- return .{
165
- .checkpoint = .{
166
- .header = vsr.Header.Prepare.root(options.cluster),
167
- .parent_checkpoint_id = 0,
168
- .grandparent_checkpoint_id = 0,
169
- .free_set_blocks_acquired_checksum = comptime vsr.checksum(&.{}),
170
- .free_set_blocks_released_checksum = comptime vsr.checksum(&.{}),
171
- .free_set_blocks_acquired_last_block_checksum = 0,
172
- .free_set_blocks_released_last_block_checksum = 0,
173
- .free_set_blocks_acquired_last_block_address = 0,
174
- .free_set_blocks_released_last_block_address = 0,
175
- .free_set_blocks_acquired_size = 0,
176
- .free_set_blocks_released_size = 0,
177
- .client_sessions_checksum = comptime vsr.checksum(&.{}),
178
- .client_sessions_last_block_checksum = 0,
179
- .client_sessions_last_block_address = 0,
180
- .client_sessions_size = 0,
181
- .manifest_oldest_checksum = 0,
182
- .manifest_oldest_address = 0,
183
- .manifest_newest_checksum = 0,
184
- .manifest_newest_address = 0,
185
- .manifest_block_count = 0,
186
- .snapshots_block_checksum = 0,
187
- .snapshots_block_address = 0,
188
- .storage_size = data_file_size_min,
189
- .release = options.release,
190
- },
191
- .replica_id = options.replica_id,
192
- .members = options.members,
193
- .replica_count = options.replica_count,
194
- .commit_max = 0,
195
- .sync_op_min = 0,
196
- .sync_op_max = 0,
197
- .log_view = 0,
198
- .view = options.view,
199
- };
200
- }
201
-
202
- pub fn assert_internally_consistent(state: VSRState) void {
203
- assert(state.commit_max >= state.checkpoint.header.op);
204
- assert(state.sync_op_max >= state.sync_op_min);
205
- assert(state.view >= state.log_view);
206
- assert(state.replica_count > 0);
207
- assert(state.replica_count <= constants.replicas_max);
208
- assert(vsr.member_index(&state.members, state.replica_id) != null);
209
-
210
- // These fields are unused at the moment:
211
- assert(state.checkpoint.snapshots_block_checksum == 0);
212
- assert(state.checkpoint.snapshots_block_address == 0);
213
-
214
- assert(state.checkpoint.manifest_oldest_checksum_padding == 0);
215
- assert(state.checkpoint.manifest_newest_checksum_padding == 0);
216
- assert(state.checkpoint.snapshots_block_checksum_padding == 0);
217
- assert(state.checkpoint.free_set_blocks_acquired_last_block_checksum_padding == 0);
218
- assert(state.checkpoint.free_set_blocks_released_last_block_checksum_padding == 0);
219
-
220
- assert(state.checkpoint.client_sessions_last_block_checksum_padding == 0);
221
- assert(state.checkpoint.storage_size >= data_file_size_min);
222
-
223
- if (state.checkpoint.free_set_blocks_acquired_last_block_address == 0) {
224
- assert(state.checkpoint.free_set_blocks_acquired_size == 0);
225
- assert(state.checkpoint.free_set_blocks_acquired_checksum ==
226
- comptime vsr.checksum(&.{}));
227
- assert(state.checkpoint.free_set_blocks_acquired_last_block_checksum == 0);
228
- } else {
229
- assert(state.checkpoint.free_set_blocks_acquired_size > 0);
230
- }
231
-
232
- if (state.checkpoint.free_set_blocks_released_last_block_address == 0) {
233
- assert(state.checkpoint.free_set_blocks_released_size == 0);
234
- assert(state.checkpoint.free_set_blocks_released_checksum ==
235
- comptime vsr.checksum(&.{}));
236
- assert(state.checkpoint.free_set_blocks_released_last_block_checksum == 0);
237
- } else {
238
- assert(state.checkpoint.free_set_blocks_released_size > 0);
239
- }
240
-
241
- if (state.checkpoint.client_sessions_last_block_address == 0) {
242
- assert(state.checkpoint.client_sessions_last_block_checksum == 0);
243
- assert(state.checkpoint.client_sessions_size == 0);
244
- assert(state.checkpoint.client_sessions_checksum == comptime vsr.checksum(&.{}));
245
- } else {
246
- assert(state.checkpoint.client_sessions_size == vsr.ClientSessions.encode_size);
247
- }
248
-
249
- if (state.checkpoint.manifest_block_count == 0) {
250
- assert(state.checkpoint.manifest_oldest_address == 0);
251
- assert(state.checkpoint.manifest_newest_address == 0);
252
- assert(state.checkpoint.manifest_oldest_checksum == 0);
253
- assert(state.checkpoint.manifest_newest_checksum == 0);
254
- } else {
255
- assert(state.checkpoint.manifest_oldest_address != 0);
256
- assert(state.checkpoint.manifest_newest_address != 0);
257
-
258
- assert((state.checkpoint.manifest_block_count == 1) ==
259
- (state.checkpoint.manifest_oldest_address ==
260
- state.checkpoint.manifest_newest_address));
261
-
262
- assert((state.checkpoint.manifest_block_count == 1) ==
263
- (state.checkpoint.manifest_oldest_checksum ==
264
- state.checkpoint.manifest_newest_checksum));
265
- }
266
- }
267
-
268
- pub fn monotonic(old: VSRState, new: VSRState) bool {
269
- old.assert_internally_consistent();
270
- new.assert_internally_consistent();
271
- if (old.checkpoint.header.op == new.checkpoint.header.op) {
272
- if (old.checkpoint.header.checksum == 0 and old.checkpoint.header.op == 0) {
273
- // "old" is the root VSRState.
274
- assert(old.commit_max == 0);
275
- assert(old.sync_op_min == 0);
276
- assert(old.sync_op_max == 0);
277
- assert(old.log_view == 0);
278
- assert(old.view == 0);
279
- } else {
280
- assert(stdx.equal_bytes(CheckpointState, &old.checkpoint, &new.checkpoint));
281
- }
282
- } else {
283
- assert(old.checkpoint.header.checksum != new.checkpoint.header.checksum);
284
- assert(old.checkpoint.parent_checkpoint_id !=
285
- new.checkpoint.parent_checkpoint_id);
286
- }
287
- assert(old.replica_id == new.replica_id);
288
- assert(old.replica_count == new.replica_count);
289
- assert(stdx.equal_bytes([constants.members_max]u128, &old.members, &new.members));
290
-
291
- if (old.checkpoint.header.op > new.checkpoint.header.op) return false;
292
- if (old.view > new.view) return false;
293
- if (old.log_view > new.log_view) return false;
294
- if (old.commit_max > new.commit_max) return false;
295
-
296
- return true;
297
- }
298
-
299
- pub fn would_be_updated_by(old: VSRState, new: VSRState) bool {
300
- assert(monotonic(old, new));
301
-
302
- return !stdx.equal_bytes(VSRState, &old, &new);
303
- }
304
-
305
- /// Compaction is one bar ahead of superblock's commit_min.
306
- /// The commits from the bar following commit_min were in the mutable table, and
307
- /// thus not preserved in the checkpoint.
308
- /// But the corresponding `compact()` updates were preserved, and must not be repeated
309
- /// to ensure deterministic storage.
310
- pub fn op_compacted(state: VSRState, op: u64) bool {
311
- // If commit_min is 0, we have never checkpointed, so no compactions are checkpointed.
312
- return state.checkpoint.header.op > 0 and
313
- op <= vsr.Checkpoint.trigger_for_checkpoint(state.checkpoint.header.op).?;
314
- }
315
- };
316
-
317
- /// The content of CheckpointState is deterministic for the corresponding checkpoint.
318
- ///
319
- /// This struct is sent in a `start_view` message from the primary to a syncing replica.
320
- pub const CheckpointState = extern struct {
321
- /// The last prepare of the checkpoint committed to the state machine.
322
- /// At startup, replay the log hereafter.
323
- header: vsr.Header.Prepare,
324
-
325
- free_set_blocks_acquired_last_block_checksum: u128,
326
- free_set_blocks_acquired_last_block_checksum_padding: u128 = 0,
327
-
328
- free_set_blocks_released_last_block_checksum: u128,
329
- free_set_blocks_released_last_block_checksum_padding: u128 = 0,
330
-
331
- client_sessions_last_block_checksum: u128,
332
- client_sessions_last_block_checksum_padding: u128 = 0,
333
- manifest_oldest_checksum: u128,
334
- manifest_oldest_checksum_padding: u128 = 0,
335
- manifest_newest_checksum: u128,
336
- manifest_newest_checksum_padding: u128 = 0,
337
- snapshots_block_checksum: u128,
338
- snapshots_block_checksum_padding: u128 = 0,
339
-
340
- /// Checksum covering the entire encoded free set. Strictly speaking it is redundant:
341
- /// free_set_last_block_checksum indirectly covers the same data. It is still useful
342
- /// to protect from encoding-decoding bugs as a defense in depth.
343
- free_set_blocks_acquired_checksum: u128,
344
- free_set_blocks_released_checksum: u128,
345
-
346
- /// Checksum covering the entire client sessions, as defense-in-depth.
347
- client_sessions_checksum: u128,
348
-
349
- /// The checkpoint_id() of the checkpoint which last updated our commit_min.
350
- /// Following state sync, this is set to the last checkpoint that we skipped.
351
- parent_checkpoint_id: u128,
352
- /// The parent_checkpoint_id of the parent checkpoint.
353
- /// TODO We might be able to remove this when
354
- /// https://github.com/tigerbeetle/tigerbeetle/issues/1378 is fixed.
355
- grandparent_checkpoint_id: u128,
356
-
357
- free_set_blocks_acquired_last_block_address: u64,
358
- free_set_blocks_released_last_block_address: u64,
359
-
360
- client_sessions_last_block_address: u64,
361
- manifest_oldest_address: u64,
362
- manifest_newest_address: u64,
363
- snapshots_block_address: u64,
364
-
365
- // Logical storage size in bytes.
366
- //
367
- // If storage_size is less than the data file size, then the grid blocks beyond storage_size
368
- // were used previously, but have since been freed.
369
- //
370
- // If storage_size is more than the data file size, then the data file might have been
371
- // truncated/corrupted.
372
- storage_size: u64,
373
-
374
- // Size of the encoded trailers in bytes.
375
- // It is equal to the sum of sizes of individual trailer blocks and is used for assertions.
376
- free_set_blocks_acquired_size: u64,
377
- free_set_blocks_released_size: u64,
378
-
379
- client_sessions_size: u64,
380
-
381
- /// The number of manifest blocks in the manifest log.
382
- manifest_block_count: u32,
383
-
384
- /// All prepares between `CheckpointState.commit_min` (i.e. `op_checkpoint`) and
385
- /// `trigger_for_checkpoint(checkpoint_after(commit_min))` must be executed by this release.
386
- /// (Prepares with `operation=upgrade` are the exception – upgrades in the last
387
- /// `lsm_compaction_ops` before a checkpoint trigger may be replayed by a different release.
388
- release: vsr.Release,
389
-
390
- reserved: [408]u8 = @splat(0),
391
-
392
- comptime {
393
- assert(@sizeOf(CheckpointState) % @sizeOf(u128) == 0);
394
- assert(@sizeOf(CheckpointState) == 1024);
395
- assert(stdx.no_padding(CheckpointState));
396
- }
397
- };
398
-
399
- pub fn calculate_checksum(superblock: *const SuperBlockHeader) u128 {
400
- comptime assert(meta.fieldIndex(SuperBlockHeader, "checksum") == 0);
401
- comptime assert(meta.fieldIndex(SuperBlockHeader, "checksum_padding") == 1);
402
- comptime assert(meta.fieldIndex(SuperBlockHeader, "copy") == 2);
403
-
404
- const checksum_size = @sizeOf(@TypeOf(superblock.checksum));
405
- comptime assert(checksum_size == @sizeOf(u128));
406
-
407
- const checksum_padding_size = @sizeOf(@TypeOf(superblock.checksum_padding));
408
- comptime assert(checksum_padding_size == @sizeOf(u128));
409
-
410
- const copy_size = @sizeOf(@TypeOf(superblock.copy));
411
- comptime assert(copy_size == 2);
412
-
413
- const ignore_size = checksum_size + checksum_padding_size + copy_size;
414
-
415
- return vsr.checksum(std.mem.asBytes(superblock)[ignore_size..]);
416
- }
417
-
418
- pub fn set_checksum(superblock: *SuperBlockHeader) void {
419
- // `copy` is not covered by the checksum, but for our staging/working superblock headers it
420
- // should always be zero.
421
- assert(superblock.copy < constants.superblock_copies);
422
- assert(superblock.copy == 0);
423
-
424
- assert(superblock.version == SuperBlockVersion);
425
- assert(superblock.release_format.value > 0);
426
- assert(superblock.flags == 0);
427
-
428
- assert(stdx.zeroed(&superblock.reserved));
429
- assert(stdx.zeroed(&superblock.vsr_state.reserved));
430
- assert(stdx.zeroed(&superblock.vsr_state.checkpoint.reserved));
431
- assert(stdx.zeroed(&superblock.view_headers_reserved));
432
-
433
- assert(superblock.checksum_padding == 0);
434
- assert(superblock.parent_padding == 0);
435
-
436
- superblock.checksum = superblock.calculate_checksum();
437
- }
438
-
439
- pub fn valid_checksum(superblock: *const SuperBlockHeader) bool {
440
- return superblock.checksum == superblock.calculate_checksum() and
441
- superblock.checksum_padding == 0;
442
- }
443
-
444
- pub fn checkpoint_id(superblock: *const SuperBlockHeader) u128 {
445
- return vsr.checksum(std.mem.asBytes(&superblock.vsr_state.checkpoint));
446
- }
447
-
448
- pub fn parent_checkpoint_id(superblock: *const SuperBlockHeader) u128 {
449
- return superblock.vsr_state.checkpoint.parent_checkpoint_id;
450
- }
451
-
452
- /// Does not consider { checksum, copy } when comparing equality.
453
- pub fn equal(a: *const SuperBlockHeader, b: *const SuperBlockHeader) bool {
454
- assert(a.release_format.value == b.release_format.value);
455
-
456
- assert(stdx.zeroed(&a.reserved));
457
- assert(stdx.zeroed(&b.reserved));
458
-
459
- assert(stdx.zeroed(&a.vsr_state.reserved));
460
- assert(stdx.zeroed(&b.vsr_state.reserved));
461
-
462
- assert(stdx.zeroed(&a.view_headers_reserved));
463
- assert(stdx.zeroed(&b.view_headers_reserved));
464
-
465
- assert(a.checksum_padding == 0);
466
- assert(b.checksum_padding == 0);
467
- assert(a.parent_padding == 0);
468
- assert(b.parent_padding == 0);
469
-
470
- if (a.version != b.version) return false;
471
- if (a.cluster != b.cluster) return false;
472
- if (a.sequence != b.sequence) return false;
473
- if (a.parent != b.parent) return false;
474
- if (!stdx.equal_bytes(VSRState, &a.vsr_state, &b.vsr_state)) return false;
475
- if (a.view_headers_count != b.view_headers_count) return false;
476
- if (!stdx.equal_bytes(
477
- [constants.view_headers_max]vsr.Header.Prepare,
478
- &a.view_headers_all,
479
- &b.view_headers_all,
480
- )) return false;
481
-
482
- return true;
483
- }
484
-
485
- pub fn view_headers(superblock: *const SuperBlockHeader) vsr.Headers.ViewChangeSlice {
486
- return vsr.Headers.ViewChangeSlice.init(
487
- if (superblock.vsr_state.log_view < superblock.vsr_state.view)
488
- .do_view_change
489
- else
490
- .start_view,
491
- superblock.view_headers_all[0..superblock.view_headers_count],
492
- );
493
- }
494
-
495
- pub fn manifest_references(superblock: *const SuperBlockHeader) ManifestReferences {
496
- const checkpoint_state = &superblock.vsr_state.checkpoint;
497
- return .{
498
- .oldest_address = checkpoint_state.manifest_oldest_address,
499
- .oldest_checksum = checkpoint_state.manifest_oldest_checksum,
500
- .newest_address = checkpoint_state.manifest_newest_address,
501
- .newest_checksum = checkpoint_state.manifest_newest_checksum,
502
- .block_count = checkpoint_state.manifest_block_count,
503
- };
504
- }
505
-
506
- pub fn free_set_reference(
507
- superblock: *const SuperBlockHeader,
508
- bitset: vsr.FreeSet.BitsetKind,
509
- ) TrailerReference {
510
- switch (bitset) {
511
- .blocks_acquired => {
512
- return .{
513
- .checksum = superblock.vsr_state.checkpoint
514
- .free_set_blocks_acquired_checksum,
515
- .last_block_address = superblock.vsr_state.checkpoint
516
- .free_set_blocks_acquired_last_block_address,
517
- .last_block_checksum = superblock.vsr_state.checkpoint
518
- .free_set_blocks_acquired_last_block_checksum,
519
- .trailer_size = superblock.vsr_state.checkpoint
520
- .free_set_blocks_acquired_size,
521
- };
522
- },
523
- .blocks_released => {
524
- return .{
525
- .checksum = superblock.vsr_state.checkpoint
526
- .free_set_blocks_released_checksum,
527
- .last_block_address = superblock.vsr_state.checkpoint
528
- .free_set_blocks_released_last_block_address,
529
- .last_block_checksum = superblock.vsr_state.checkpoint
530
- .free_set_blocks_released_last_block_checksum,
531
- .trailer_size = superblock.vsr_state.checkpoint
532
- .free_set_blocks_released_size,
533
- };
534
- },
535
- }
536
- }
537
-
538
- pub fn client_sessions_reference(superblock: *const SuperBlockHeader) TrailerReference {
539
- const checkpoint = &superblock.vsr_state.checkpoint;
540
- return .{
541
- .checksum = checkpoint.client_sessions_checksum,
542
- .last_block_address = checkpoint.client_sessions_last_block_address,
543
- .last_block_checksum = checkpoint.client_sessions_last_block_checksum,
544
- .trailer_size = checkpoint.client_sessions_size,
545
- };
546
- }
547
- };
548
-
549
- pub const ManifestReferences = struct {
550
- /// The chronologically first manifest block in the chain.
551
- oldest_checksum: u128,
552
- oldest_address: u64,
553
- /// The chronologically last manifest block in the chain.
554
- newest_checksum: u128,
555
- newest_address: u64,
556
- /// The number of manifest blocks in the chain.
557
- block_count: u32,
558
-
559
- pub fn empty(references: *const ManifestReferences) bool {
560
- if (references.block_count == 0) {
561
- assert(references.oldest_address == 0);
562
- assert(references.oldest_checksum == 0);
563
- assert(references.newest_address == 0);
564
- assert(references.newest_checksum == 0);
565
- return true;
566
- } else {
567
- assert(references.oldest_address != 0);
568
- assert(references.newest_address != 0);
569
- return false;
570
- }
571
- }
572
- };
573
-
574
- pub const TrailerReference = struct {
575
- /// Checksum over the entire encoded trailer.
576
- checksum: u128,
577
- last_block_address: u64,
578
- last_block_checksum: u128,
579
- trailer_size: u64,
580
-
581
- pub fn empty(reference: *const TrailerReference) bool {
582
- if (reference.trailer_size == 0) {
583
- assert(reference.checksum == vsr.checksum(&.{}));
584
- assert(reference.last_block_address == 0);
585
- assert(reference.last_block_checksum == 0);
586
- return true;
587
- } else {
588
- assert(reference.last_block_address > 0);
589
- return false;
590
- }
591
- }
592
- };
593
-
594
- comptime {
595
- switch (constants.superblock_copies) {
596
- 4, 6, 8 => {},
597
- else => @compileError("superblock_copies must be either { 4, 6, 8 } for flexible quorums."),
598
- }
599
- }
600
-
601
- /// The size of the entire superblock storage zone.
602
- pub const superblock_zone_size = superblock_copy_size * constants.superblock_copies;
603
-
604
- /// Leave enough padding after every superblock copy so that it is feasible, in the future, to
605
- /// modify the `pipeline_prepare_queue_max` of an existing cluster (up to a maximum of clients_max).
606
- /// (That is, this space is reserved for potential `view_headers`).
607
- const superblock_copy_padding: comptime_int = stdx.div_ceil(
608
- (constants.clients_max - constants.pipeline_prepare_queue_max) * @sizeOf(vsr.Header),
609
- constants.sector_size,
610
- ) * constants.sector_size;
611
-
612
- /// The size of an individual superblock header copy, including padding.
613
- pub const superblock_copy_size = @sizeOf(SuperBlockHeader) + superblock_copy_padding;
614
- comptime {
615
- assert(superblock_copy_padding % constants.sector_size == 0);
616
- assert(superblock_copy_size % constants.sector_size == 0);
617
- }
618
-
619
- /// The size of a data file that has an empty grid.
620
- pub const data_file_size_min =
621
- superblock_zone_size +
622
- constants.journal_size +
623
- constants.client_replies_size +
624
- vsr.Zone.size(.grid_padding).?;
625
-
626
- /// This table shows the sequence number progression of the SuperBlock's headers.
627
- ///
628
- /// action working staging disk
629
- /// format seq seq seq
630
- /// 0 - Initially the file has no headers.
631
- /// 0 1 -
632
- /// 0 1 1 Write a copyset for the first sequence.
633
- /// 1 1 1 Read quorum; verify 3/4 are valid.
634
- ///
635
- /// open seq seq seq
636
- /// a
637
- /// a a Read quorum; verify 2/4 are valid.
638
- /// a (a) a Repair any broken copies of `a`.
639
- ///
640
- /// checkpoint seq seq seq
641
- /// a a a
642
- /// a a+1
643
- /// a a+1 a+1
644
- /// a+1 a+1 a+1 Read quorum; verify 3/4 are valid.
645
- ///
646
- /// view_change seq seq seq
647
- /// a a
648
- /// a a+1 a The new sequence reuses the original parent.
649
- /// a a+1 a+1
650
- /// a+1 a+1 a+1 Read quorum; verify 3/4 are valid.
651
- /// working staging disk
652
- ///
653
- pub fn SuperBlockType(comptime Storage: type) type {
654
- return struct {
655
- const SuperBlock = @This();
656
-
657
- pub const Context = struct {
658
- superblock: *SuperBlock,
659
- callback: *const fn (context: *Context) void,
660
- caller: Caller,
661
-
662
- write: Storage.Write = undefined,
663
- read: Storage.Read = undefined,
664
- read_threshold: ?Quorums.Threshold = null,
665
- copy: ?u8 = null,
666
- /// Used by format(), checkpoint(), view_change().
667
- vsr_state: ?SuperBlockHeader.VSRState = null,
668
- /// Used by format() and view_change().
669
- view_headers: ?vsr.Headers.ViewChangeArray = null,
670
- repairs: ?Quorums.RepairIterator = null, // Used by open().
671
- };
672
-
673
- storage: *Storage,
674
-
675
- /// The superblock that was recovered at startup after a crash or that was last written.
676
- working: *align(constants.sector_size) SuperBlockHeader,
677
-
678
- /// The superblock that will replace the current working superblock once written.
679
- /// We cannot mutate any working state directly until it is safely on stable storage.
680
- /// Otherwise, we may accidentally externalize guarantees that are not yet durable.
681
- staging: *align(constants.sector_size) SuperBlockHeader,
682
-
683
- /// The copies that we read into at startup or when verifying the written superblock.
684
- reading: []align(constants.sector_size) SuperBlockHeader,
685
-
686
- /// It might seem that, at startup, we simply install the copy with the highest sequence.
687
- ///
688
- /// However, there's a scenario where:
689
- /// 1. We are able to write sequence 7 to 3/4 copies, with the last write being lost.
690
- /// 2. We startup and read all copies, with reads misdirected to the copy with sequence 6.
691
- ///
692
- /// Another scenario:
693
- /// 1. We begin to write sequence 7 to 1 copy and then crash.
694
- /// 2. At startup, the read to this copy fails, and we recover at sequence=6.
695
- /// 3. We then checkpoint another sequence 7 to 3/4 copies and crash.
696
- /// 4. At startup, we then see 4 copies with the same sequence with 1 checksum different.
697
- ///
698
- /// To mitigate these scenarios, we ensure that we are able to read a quorum of copies.
699
- /// This also gives us confidence that our working superblock has sufficient redundancy.
700
- quorums: Quorums = Quorums{},
701
-
702
- /// Whether the superblock has been opened. An open superblock may not be formatted.
703
- opened: bool = false,
704
- /// Runtime limit on the size of the datafile.
705
- storage_size_limit: u64,
706
-
707
- /// There may only be a single caller queued at a time, to ensure that the VSR protocol is
708
- /// careful to submit at most one view change at a time.
709
- queue_head: ?*Context = null,
710
- queue_tail: ?*Context = null,
711
-
712
- /// Set to non-null after open().
713
- /// Used for logging.
714
- replica_index: ?u8 = null,
715
-
716
- pub fn init(gpa: mem.Allocator, storage: *Storage, options: struct {
717
- storage_size_limit: u64,
718
- }) !SuperBlock {
719
- assert(options.storage_size_limit >= data_file_size_min);
720
- assert(options.storage_size_limit <= constants.storage_size_limit_max);
721
- assert(options.storage_size_limit % constants.sector_size == 0);
722
-
723
- const a = try gpa.alignedAlloc(SuperBlockHeader, constants.sector_size, 1);
724
- errdefer gpa.free(a);
725
-
726
- const b = try gpa.alignedAlloc(SuperBlockHeader, constants.sector_size, 1);
727
- errdefer gpa.free(b);
728
-
729
- const reading = try gpa.alignedAlloc(
730
- [constants.superblock_copies]SuperBlockHeader,
731
- constants.sector_size,
732
- 1,
733
- );
734
- errdefer gpa.free(reading);
735
-
736
- return SuperBlock{
737
- .storage = storage,
738
- .working = &a[0],
739
- .staging = &b[0],
740
- .reading = &reading[0],
741
- .storage_size_limit = options.storage_size_limit,
742
- };
743
- }
744
-
745
- pub fn deinit(superblock: *SuperBlock, gpa: mem.Allocator) void {
746
- gpa.destroy(superblock.working);
747
- gpa.destroy(superblock.staging);
748
- gpa.free(superblock.reading);
749
- }
750
-
751
- pub const FormatOptions = struct {
752
- cluster: u128,
753
- release: vsr.Release,
754
- replica: u8,
755
- replica_count: u8,
756
- /// Set to null during initial cluster formatting.
757
- /// Set to the target view when constructing a new data file for a reformatted replica.
758
- view: ?u32,
759
- };
760
-
761
- pub fn format(
762
- superblock: *SuperBlock,
763
- callback: *const fn (context: *Context) void,
764
- context: *Context,
765
- options: FormatOptions,
766
- ) void {
767
- assert(!superblock.opened);
768
- assert(superblock.replica_index == null);
769
-
770
- assert(options.release.value > 0);
771
- assert(options.replica_count > 0);
772
- assert(options.replica_count <= constants.replicas_max);
773
- assert(options.replica < options.replica_count + constants.standbys_max);
774
- if (options.view) |view| {
775
- assert(view > 1);
776
- assert(options.replica < options.replica_count);
777
- }
778
-
779
- const members = vsr.root_members(options.cluster);
780
- const replica_id = members[options.replica];
781
-
782
- superblock.replica_index = vsr.member_index(&members, replica_id);
783
-
784
- // This working copy provides the parent checksum, and will not be written to disk.
785
- // We therefore use zero values to make this parent checksum as stable as possible.
786
- superblock.working.* = .{
787
- .copy = 0,
788
- .version = SuperBlockVersion,
789
- .sequence = 0,
790
- .release_format = options.release,
791
- .cluster = options.cluster,
792
- .parent = 0,
793
- .vsr_state = .{
794
- .checkpoint = .{
795
- .header = mem.zeroes(vsr.Header.Prepare),
796
- .parent_checkpoint_id = 0,
797
- .grandparent_checkpoint_id = 0,
798
- .manifest_oldest_checksum = 0,
799
- .manifest_oldest_address = 0,
800
- .manifest_newest_checksum = 0,
801
- .manifest_newest_address = 0,
802
- .manifest_block_count = 0,
803
- .free_set_blocks_acquired_checksum = 0,
804
- .free_set_blocks_released_checksum = 0,
805
- .free_set_blocks_acquired_last_block_checksum = 0,
806
- .free_set_blocks_released_last_block_checksum = 0,
807
- .free_set_blocks_acquired_last_block_address = 0,
808
- .free_set_blocks_released_last_block_address = 0,
809
- .free_set_blocks_acquired_size = 0,
810
- .free_set_blocks_released_size = 0,
811
- .client_sessions_checksum = 0,
812
- .client_sessions_last_block_checksum = 0,
813
- .client_sessions_last_block_address = 0,
814
- .client_sessions_size = 0,
815
- .storage_size = 0,
816
- .snapshots_block_checksum = 0,
817
- .snapshots_block_address = 0,
818
- .release = vsr.Release.zero,
819
- },
820
- .replica_id = replica_id,
821
- .members = members,
822
- .commit_max = 0,
823
- .sync_op_min = 0,
824
- .sync_op_max = 0,
825
- .sync_view = 0,
826
- .log_view = 0,
827
- .view = 0,
828
- .replica_count = options.replica_count,
829
- },
830
- .view_headers_count = 0,
831
- .view_headers_all = @splat(mem.zeroes(vsr.Header.Prepare)),
832
- };
833
-
834
- superblock.working.set_checksum();
835
-
836
- context.* = .{
837
- .superblock = superblock,
838
- .callback = callback,
839
- .caller = .format,
840
- .vsr_state = SuperBlockHeader.VSRState.root(.{
841
- .cluster = options.cluster,
842
- .release = options.release,
843
- .replica_id = replica_id,
844
- .members = members,
845
- .replica_count = options.replica_count,
846
- .view = options.view orelse 0,
847
- }),
848
- .view_headers = vsr.Headers.ViewChangeArray.root(options.cluster),
849
- };
850
-
851
- superblock.acquire(context);
852
- }
853
-
854
- pub fn open(
855
- superblock: *SuperBlock,
856
- callback: *const fn (context: *Context) void,
857
- context: *Context,
858
- ) void {
859
- assert(!superblock.opened);
860
-
861
- context.* = .{
862
- .superblock = superblock,
863
- .callback = callback,
864
- .caller = .open,
865
- };
866
-
867
- superblock.acquire(context);
868
- }
869
-
870
- const UpdateCheckpoint = struct {
871
- header: vsr.Header.Prepare,
872
- view_attributes: ?struct {
873
- log_view: u32,
874
- view: u32,
875
- headers: *const vsr.Headers.ViewChangeArray,
876
- },
877
- commit_max: u64,
878
- sync_op_min: u64,
879
- sync_op_max: u64,
880
- manifest_references: ManifestReferences,
881
- free_set_references: struct {
882
- blocks_acquired: TrailerReference,
883
- blocks_released: TrailerReference,
884
- },
885
- client_sessions_reference: TrailerReference,
886
- storage_size: u64,
887
- release: vsr.Release,
888
- };
889
-
890
- /// Must update the commit_min and commit_min_checksum.
891
- pub fn checkpoint(
892
- superblock: *SuperBlock,
893
- callback: *const fn (context: *Context) void,
894
- context: *Context,
895
- update: UpdateCheckpoint,
896
- ) void {
897
- assert(superblock.opened);
898
- assert(update.header.op <= update.commit_max);
899
- assert(update.header.op > superblock.staging.vsr_state.checkpoint.header.op);
900
- assert(update.header.checksum !=
901
- superblock.staging.vsr_state.checkpoint.header.checksum);
902
- assert(update.sync_op_min <= update.sync_op_max);
903
- assert(update.release.value >= superblock.staging.vsr_state.checkpoint.release.value);
904
-
905
- assert(update.storage_size <= superblock.storage_size_limit);
906
- assert(update.storage_size >= data_file_size_min);
907
- assert((update.storage_size == data_file_size_min) ==
908
- (update.free_set_references.blocks_acquired.empty() and
909
- update.free_set_references.blocks_released.empty()));
910
-
911
- // NOTE: Within the vsr_state.checkpoint assignment below, do not read from vsr_state
912
- // directly. A miscompilation bug (as of Zig 0.11.0) causes fields to receive the
913
- // incorrect values.
914
- const vsr_state_staging = superblock.staging.vsr_state;
915
- const update_client_sessions = &update.client_sessions_reference;
916
-
917
- var vsr_state = superblock.staging.vsr_state;
918
- vsr_state.checkpoint = .{
919
- .header = update.header,
920
- .parent_checkpoint_id = superblock.staging.checkpoint_id(),
921
- .grandparent_checkpoint_id = vsr_state_staging.checkpoint.parent_checkpoint_id,
922
-
923
- .free_set_blocks_acquired_checksum = update.free_set_references
924
- .blocks_acquired.checksum,
925
- .free_set_blocks_released_checksum = update.free_set_references
926
- .blocks_released.checksum,
927
-
928
- .free_set_blocks_acquired_size = update.free_set_references
929
- .blocks_acquired.trailer_size,
930
- .free_set_blocks_released_size = update.free_set_references
931
- .blocks_released.trailer_size,
932
-
933
- .free_set_blocks_acquired_last_block_checksum = update.free_set_references
934
- .blocks_acquired.last_block_checksum,
935
- .free_set_blocks_released_last_block_checksum = update.free_set_references
936
- .blocks_released.last_block_checksum,
937
-
938
- .free_set_blocks_acquired_last_block_address = update.free_set_references
939
- .blocks_acquired.last_block_address,
940
- .free_set_blocks_released_last_block_address = update.free_set_references
941
- .blocks_released.last_block_address,
942
-
943
- .client_sessions_checksum = update_client_sessions.checksum,
944
- .client_sessions_last_block_checksum = update_client_sessions.last_block_checksum,
945
- .client_sessions_last_block_address = update_client_sessions.last_block_address,
946
- .client_sessions_size = update.client_sessions_reference.trailer_size,
947
-
948
- .manifest_oldest_checksum = update.manifest_references.oldest_checksum,
949
- .manifest_oldest_address = update.manifest_references.oldest_address,
950
- .manifest_newest_checksum = update.manifest_references.newest_checksum,
951
- .manifest_newest_address = update.manifest_references.newest_address,
952
- .manifest_block_count = update.manifest_references.block_count,
953
-
954
- .storage_size = update.storage_size,
955
- .snapshots_block_checksum = vsr_state_staging.checkpoint.snapshots_block_checksum,
956
- .snapshots_block_address = vsr_state_staging.checkpoint.snapshots_block_address,
957
- .release = update.release,
958
- };
959
- vsr_state.commit_max = update.commit_max;
960
- vsr_state.sync_op_min = update.sync_op_min;
961
- vsr_state.sync_op_max = update.sync_op_max;
962
- vsr_state.sync_view = 0;
963
- if (update.view_attributes) |*view_attributes| {
964
- assert(view_attributes.log_view <= view_attributes.view);
965
- view_attributes.headers.verify();
966
- vsr_state.log_view = view_attributes.log_view;
967
- vsr_state.view = view_attributes.view;
968
- }
969
-
970
- assert(superblock.staging.vsr_state.would_be_updated_by(vsr_state));
971
-
972
- context.* = .{
973
- .superblock = superblock,
974
- .callback = callback,
975
- .caller = .checkpoint,
976
- .vsr_state = vsr_state,
977
- .view_headers = if (update.view_attributes) |*view_attributes|
978
- view_attributes.headers.*
979
- else
980
- vsr.Headers.ViewChangeArray.init(
981
- superblock.staging.view_headers().command,
982
- superblock.staging.view_headers().slice,
983
- ),
984
- };
985
- superblock.log_context(context);
986
- superblock.acquire(context);
987
- }
988
-
989
- const UpdateViewChange = struct {
990
- commit_max: u64,
991
- log_view: u32,
992
- view: u32,
993
- headers: *const vsr.Headers.ViewChangeArray,
994
- sync_checkpoint: ?struct {
995
- checkpoint: *const vsr.CheckpointState,
996
- sync_op_min: u64,
997
- sync_op_max: u64,
998
- },
999
- };
1000
-
1001
- /// The replica calls view_change():
1002
- ///
1003
- /// - to persist its view/log_view — it cannot advertise either value until it is certain
1004
- /// they will never backtrack.
1005
- /// - to update checkpoint during sync
1006
- ///
1007
- /// The update must advance view/log_view (monotonically increasing) or checkpoint.
1008
- // TODO: the current naming confusing and needs changing: during sync, this function doesn't
1009
- // necessary advance the view.
1010
- pub fn view_change(
1011
- superblock: *SuperBlock,
1012
- callback: *const fn (context: *Context) void,
1013
- context: *Context,
1014
- update: UpdateViewChange,
1015
- ) void {
1016
- assert(superblock.opened);
1017
- assert(superblock.staging.vsr_state.commit_max <= update.commit_max);
1018
- assert(superblock.staging.vsr_state.view <= update.view);
1019
- assert(superblock.staging.vsr_state.log_view <= update.log_view);
1020
- assert(superblock.staging.vsr_state.log_view < update.log_view or
1021
- superblock.staging.vsr_state.view < update.view or
1022
- update.sync_checkpoint != null);
1023
- assert((update.headers.command == .start_view and update.log_view == update.view) or
1024
- (update.headers.command == .do_view_change and update.log_view < update.view));
1025
- assert(
1026
- superblock.staging.vsr_state.checkpoint.header.op <= update.headers.array.get(0).op,
1027
- );
1028
-
1029
- update.headers.verify();
1030
- assert(update.view >= update.log_view);
1031
-
1032
- var vsr_state = superblock.staging.vsr_state;
1033
- vsr_state.commit_max = update.commit_max;
1034
- vsr_state.log_view = update.log_view;
1035
- vsr_state.view = update.view;
1036
- if (update.sync_checkpoint) |*sync_checkpoint| {
1037
- assert(superblock.staging.vsr_state.checkpoint.header.op <
1038
- sync_checkpoint.checkpoint.header.op);
1039
-
1040
- const checkpoint_next = vsr.Checkpoint.checkpoint_after(
1041
- superblock.staging.vsr_state.checkpoint.header.op,
1042
- );
1043
- const checkpoint_next_next = vsr.Checkpoint.checkpoint_after(checkpoint_next);
1044
-
1045
- if (sync_checkpoint.checkpoint.header.op == checkpoint_next) {
1046
- assert(sync_checkpoint.checkpoint.parent_checkpoint_id ==
1047
- superblock.staging.checkpoint_id());
1048
- } else if (sync_checkpoint.checkpoint.header.op == checkpoint_next_next) {
1049
- assert(sync_checkpoint.checkpoint.grandparent_checkpoint_id ==
1050
- superblock.staging.checkpoint_id());
1051
- }
1052
-
1053
- vsr_state.checkpoint = sync_checkpoint.checkpoint.*;
1054
- vsr_state.sync_op_min = sync_checkpoint.sync_op_min;
1055
- vsr_state.sync_op_max = sync_checkpoint.sync_op_max;
1056
- }
1057
- assert(superblock.staging.vsr_state.would_be_updated_by(vsr_state));
1058
-
1059
- context.* = .{
1060
- .superblock = superblock,
1061
- .callback = callback,
1062
- .caller = .view_change,
1063
- .vsr_state = vsr_state,
1064
- .view_headers = update.headers.*,
1065
- };
1066
- superblock.log_context(context);
1067
- superblock.acquire(context);
1068
- }
1069
-
1070
- pub fn grid_size_limit(superblock: *const SuperBlock) usize {
1071
- return superblock.storage_size_limit - data_file_size_min;
1072
- }
1073
-
1074
- pub fn updating(superblock: *const SuperBlock, caller: Caller) bool {
1075
- assert(superblock.opened);
1076
-
1077
- if (superblock.queue_head) |head| {
1078
- if (head.caller == caller) return true;
1079
- }
1080
-
1081
- if (superblock.queue_tail) |tail| {
1082
- if (tail.caller == caller) return true;
1083
- }
1084
-
1085
- return false;
1086
- }
1087
-
1088
- fn write_staging(superblock: *SuperBlock, context: *Context) void {
1089
- assert(context.caller != .open);
1090
- assert(context.caller == .format or superblock.opened);
1091
- assert(context.copy == null);
1092
- context.vsr_state.?.assert_internally_consistent();
1093
- assert(superblock.queue_head == context);
1094
- assert(superblock.queue_tail == null);
1095
-
1096
- superblock.staging.* = superblock.working.*;
1097
- superblock.staging.sequence = superblock.staging.sequence + 1;
1098
- superblock.staging.parent = superblock.staging.checksum;
1099
- superblock.staging.vsr_state = context.vsr_state.?;
1100
-
1101
- if (context.view_headers) |*headers| {
1102
- assert(context.caller.updates_view_headers());
1103
-
1104
- superblock.staging.view_headers_count = headers.array.count_as(u32);
1105
- stdx.copy_disjoint(
1106
- .exact,
1107
- vsr.Header.Prepare,
1108
- superblock.staging.view_headers_all[0..headers.array.count()],
1109
- headers.array.const_slice(),
1110
- );
1111
- @memset(
1112
- superblock.staging.view_headers_all[headers.array.count()..],
1113
- std.mem.zeroes(vsr.Header.Prepare),
1114
- );
1115
- } else {
1116
- assert(!context.caller.updates_view_headers());
1117
- }
1118
-
1119
- context.copy = 0;
1120
- superblock.staging.set_checksum();
1121
- superblock.write_header(context);
1122
- }
1123
-
1124
- fn write_header(superblock: *SuperBlock, context: *Context) void {
1125
- assert(superblock.queue_head == context);
1126
-
1127
- // We update the working superblock for a checkpoint/format/view_change:
1128
- // open() does not update the working superblock, since it only writes to repair.
1129
- if (context.caller == .open) {
1130
- assert(superblock.staging.sequence == superblock.working.sequence);
1131
- } else {
1132
- assert(superblock.staging.sequence == superblock.working.sequence + 1);
1133
- assert(superblock.staging.parent == superblock.working.checksum);
1134
- }
1135
-
1136
- // The superblock cluster and replica should never change once formatted:
1137
- assert(superblock.staging.cluster == superblock.working.cluster);
1138
- assert(superblock.staging.vsr_state.replica_id ==
1139
- superblock.working.vsr_state.replica_id);
1140
-
1141
- const storage_size = superblock.staging.vsr_state.checkpoint.storage_size;
1142
- assert(storage_size >= data_file_size_min);
1143
- assert(storage_size <= constants.storage_size_limit_max);
1144
-
1145
- assert(context.copy.? < constants.superblock_copies);
1146
- superblock.staging.copy = context.copy.?;
1147
- // Updating the copy number should not affect the checksum, which was previously set:
1148
- assert(superblock.staging.valid_checksum());
1149
-
1150
- const buffer = mem.asBytes(superblock.staging);
1151
- const offset = superblock_copy_size * @as(u32, context.copy.?);
1152
-
1153
- log.debug("{?}: {s}: write_header: " ++
1154
- "checksum={x:0>32} sequence={} copy={} size={} offset={}", .{
1155
- superblock.replica_index,
1156
- @tagName(context.caller),
1157
- superblock.staging.checksum,
1158
- superblock.staging.sequence,
1159
- context.copy.?,
1160
- buffer.len,
1161
- offset,
1162
- });
1163
-
1164
- SuperBlock.assert_bounds(offset, buffer.len);
1165
-
1166
- superblock.storage.write_sectors(
1167
- write_header_callback,
1168
- &context.write,
1169
- buffer,
1170
- .superblock,
1171
- offset,
1172
- );
1173
- }
1174
-
1175
- fn write_header_callback(write: *Storage.Write) void {
1176
- const context: *Context = @alignCast(@fieldParentPtr("write", write));
1177
- const superblock = context.superblock;
1178
- const copy = context.copy.?;
1179
-
1180
- assert(superblock.queue_head == context);
1181
-
1182
- assert(copy < constants.superblock_copies);
1183
- assert(copy == superblock.staging.copy);
1184
-
1185
- if (context.caller == .open) {
1186
- context.copy = null;
1187
- superblock.repair(context);
1188
- return;
1189
- }
1190
-
1191
- if (copy + 1 == constants.superblock_copies) {
1192
- context.copy = null;
1193
- superblock.read_working(context, .verify);
1194
- } else {
1195
- context.copy = copy + 1;
1196
- superblock.write_header(context);
1197
- }
1198
- }
1199
-
1200
- fn read_working(
1201
- superblock: *SuperBlock,
1202
- context: *Context,
1203
- threshold: Quorums.Threshold,
1204
- ) void {
1205
- assert(superblock.queue_head == context);
1206
- assert(context.copy == null);
1207
- assert(context.read_threshold == null);
1208
-
1209
- // We do not submit reads in parallel, as while this would shave off 1ms, it would also
1210
- // increase the risk that a single fault applies to more reads due to temporal locality.
1211
- // This would make verification reads more flaky when we do experience a read fault.
1212
- // See "An Analysis of Data Corruption in the Storage Stack".
1213
-
1214
- context.copy = 0;
1215
- context.read_threshold = threshold;
1216
- for (superblock.reading) |*copy| copy.* = undefined;
1217
- superblock.read_header(context);
1218
- }
1219
-
1220
- fn read_header(superblock: *SuperBlock, context: *Context) void {
1221
- assert(superblock.queue_head == context);
1222
- assert(context.copy.? < constants.superblock_copies);
1223
- assert(context.read_threshold != null);
1224
-
1225
- const buffer = mem.asBytes(&superblock.reading[context.copy.?]);
1226
- const offset = superblock_copy_size * @as(u32, context.copy.?);
1227
-
1228
- log.debug("{?}: {s}: read_header: copy={} size={} offset={}", .{
1229
- superblock.replica_index,
1230
- @tagName(context.caller),
1231
- context.copy.?,
1232
- buffer.len,
1233
- offset,
1234
- });
1235
-
1236
- SuperBlock.assert_bounds(offset, buffer.len);
1237
-
1238
- superblock.storage.read_sectors(
1239
- read_header_callback,
1240
- &context.read,
1241
- buffer,
1242
- .superblock,
1243
- offset,
1244
- );
1245
- }
1246
-
1247
- fn read_header_callback(read: *Storage.Read) void {
1248
- const context: *Context = @alignCast(@fieldParentPtr("read", read));
1249
- const superblock = context.superblock;
1250
- const threshold = context.read_threshold.?;
1251
-
1252
- assert(superblock.queue_head == context);
1253
-
1254
- assert(context.copy.? < constants.superblock_copies);
1255
- if (context.copy.? + 1 != constants.superblock_copies) {
1256
- context.copy = context.copy.? + 1;
1257
- superblock.read_header(context);
1258
- return;
1259
- }
1260
-
1261
- context.read_threshold = null;
1262
- context.copy = null;
1263
-
1264
- if (superblock.quorums.working(superblock.reading, threshold)) |quorum| {
1265
- assert(quorum.valid);
1266
- assert(quorum.copies.count() >= threshold.count());
1267
- maybe(quorum.header.copy >= constants.superblock_copies); // `copy` may be corrupt.
1268
-
1269
- const working = quorum.header;
1270
-
1271
- if (working.version != SuperBlockVersion) {
1272
- log.err("found incompatible superblock version {}", .{working.version});
1273
- @panic("cannot read superblock with incompatible version");
1274
- }
1275
-
1276
- if (threshold == .verify) {
1277
- if (working.checksum != superblock.staging.checksum) {
1278
- @panic("superblock failed verification after writing");
1279
- }
1280
- assert(working.equal(superblock.staging));
1281
- }
1282
-
1283
- if (context.caller == .format) {
1284
- assert(working.sequence == 1);
1285
- assert(working.vsr_state.checkpoint.header.checksum ==
1286
- vsr.Header.Prepare.root(working.cluster).checksum);
1287
- assert(working.vsr_state.checkpoint.free_set_blocks_acquired_size == 0);
1288
- assert(working.vsr_state.checkpoint.free_set_blocks_released_size == 0);
1289
- assert(working.vsr_state.checkpoint.client_sessions_size == 0);
1290
- assert(working.vsr_state.checkpoint.storage_size == data_file_size_min);
1291
- assert(working.vsr_state.checkpoint.header.op == 0);
1292
- assert(working.vsr_state.commit_max == 0);
1293
- assert(working.vsr_state.log_view == 0);
1294
- maybe(working.vsr_state.view == 0); // On reformat view≠0.
1295
- assert(working.view_headers_count == 1);
1296
-
1297
- assert(working.vsr_state.replica_count <= constants.replicas_max);
1298
- assert(vsr.member_index(
1299
- &working.vsr_state.members,
1300
- working.vsr_state.replica_id,
1301
- ) != null);
1302
- }
1303
-
1304
- superblock.working.* = working.*;
1305
- superblock.staging.* = working.*;
1306
-
1307
- // Reset the copies, which may be nonzero due to corruption.
1308
- superblock.working.copy = 0;
1309
- superblock.staging.copy = 0;
1310
-
1311
- const working_checkpoint = &superblock.working.vsr_state.checkpoint;
1312
-
1313
- log.debug(
1314
- "{[replica]?}: " ++
1315
- "{[caller]s}: installed working superblock: checksum={[checksum]x:0>32} " ++
1316
- "sequence={[sequence]} " ++
1317
- "release={[release]} " ++
1318
- "cluster={[cluster]x:0>32} replica_id={[replica_id]} " ++
1319
- "size={[size]} " ++
1320
- "free_set_blocks_acquired_size={[free_set_blocks_acquired_size]} " ++
1321
- "free_set_blocks_released_size={[free_set_blocks_released_size]} " ++
1322
- "client_sessions_size={[client_sessions_size]} " ++
1323
- "checkpoint_id={[checkpoint_id]x:0>32} " ++
1324
- "commit_min_checksum={[commit_min_checksum]x:0>32} " ++
1325
- "commit_min={[commit_min]} " ++
1326
- "commit_max={[commit_max]} log_view={[log_view]} view={[view]} " ++
1327
- "sync_op_min={[sync_op_min]} sync_op_max={[sync_op_max]} " ++
1328
- "manifest_oldest_checksum={[manifest_oldest_checksum]x:0>32} " ++
1329
- "manifest_oldest_address={[manifest_oldest_address]} " ++
1330
- "manifest_newest_checksum={[manifest_newest_checksum]x:0>32} " ++
1331
- "manifest_newest_address={[manifest_newest_address]} " ++
1332
- "manifest_block_count={[manifest_block_count]} " ++
1333
- "snapshots_block_checksum={[snapshots_block_checksum]x:0>32} " ++
1334
- "snapshots_block_address={[snapshots_block_address]}",
1335
- .{
1336
- .replica = superblock.replica_index,
1337
- .caller = @tagName(context.caller),
1338
- .checksum = superblock.working.checksum,
1339
- .sequence = superblock.working.sequence,
1340
- .release = working_checkpoint.release,
1341
- .cluster = superblock.working.cluster,
1342
- .replica_id = superblock.working.vsr_state.replica_id,
1343
- .size = working_checkpoint.storage_size,
1344
- .free_set_blocks_acquired_size = working_checkpoint
1345
- .free_set_blocks_acquired_size,
1346
- .free_set_blocks_released_size = working_checkpoint
1347
- .free_set_blocks_released_size,
1348
- .client_sessions_size = working_checkpoint.client_sessions_size,
1349
- .checkpoint_id = superblock.working.checkpoint_id(),
1350
- .commit_min_checksum = working_checkpoint.header.checksum,
1351
- .commit_min = working_checkpoint.header.op,
1352
- .commit_max = superblock.working.vsr_state.commit_max,
1353
- .sync_op_min = superblock.working.vsr_state.sync_op_min,
1354
- .sync_op_max = superblock.working.vsr_state.sync_op_max,
1355
- .log_view = superblock.working.vsr_state.log_view,
1356
- .view = superblock.working.vsr_state.view,
1357
- .manifest_oldest_checksum = working_checkpoint.manifest_oldest_checksum,
1358
- .manifest_oldest_address = working_checkpoint.manifest_oldest_address,
1359
- .manifest_newest_checksum = working_checkpoint.manifest_newest_checksum,
1360
- .manifest_newest_address = working_checkpoint.manifest_newest_address,
1361
- .manifest_block_count = working_checkpoint.manifest_block_count,
1362
- .snapshots_block_checksum = working_checkpoint.snapshots_block_checksum,
1363
- .snapshots_block_address = working_checkpoint.snapshots_block_address,
1364
- },
1365
- );
1366
- for (superblock.working.view_headers().slice) |*header| {
1367
- log.debug("{?}: {s}: vsr_header: op={} checksum={x:0>32}", .{
1368
- superblock.replica_index,
1369
- @tagName(context.caller),
1370
- header.op,
1371
- header.checksum,
1372
- });
1373
- }
1374
-
1375
- if (superblock.working.vsr_state.checkpoint.storage_size >
1376
- superblock.storage_size_limit)
1377
- {
1378
- vsr.fatal(
1379
- .storage_size_exceeds_limit,
1380
- "data file too large size={} > limit={}, " ++
1381
- "restart the replica increasing '--limit-storage'",
1382
- .{
1383
- superblock.working.vsr_state.checkpoint.storage_size,
1384
- superblock.storage_size_limit,
1385
- },
1386
- );
1387
- }
1388
-
1389
- if (context.caller == .open) {
1390
- if (context.repairs) |_| {
1391
- // We just verified that the repair completed.
1392
- assert(threshold == .verify);
1393
- superblock.release(context);
1394
- } else {
1395
- assert(threshold == .open);
1396
-
1397
- context.repairs = quorum.repairs();
1398
- context.copy = null;
1399
- superblock.repair(context);
1400
- }
1401
- } else {
1402
- // TODO Consider calling TRIM() on Grid's free suffix after checkpointing.
1403
- superblock.release(context);
1404
- }
1405
- } else |err| switch (err) {
1406
- error.Fork => @panic("superblock forked"),
1407
- error.NotFound => @panic("superblock not found"),
1408
- error.QuorumLost => @panic("superblock quorum lost"),
1409
- error.ParentNotConnected => @panic("superblock parent not connected"),
1410
- error.ParentSkipped => @panic("superblock parent superseded"),
1411
- error.VSRStateNotMonotonic => @panic("superblock vsr state not monotonic"),
1412
- }
1413
- }
1414
-
1415
- fn repair(superblock: *SuperBlock, context: *Context) void {
1416
- assert(context.caller == .open);
1417
- assert(context.copy == null);
1418
- assert(superblock.queue_head == context);
1419
-
1420
- if (context.repairs.?.next()) |repair_copy| {
1421
- context.copy = repair_copy;
1422
- log.warn("{?}: repair: copy={}", .{ superblock.replica_index, repair_copy });
1423
-
1424
- superblock.staging.* = superblock.working.*;
1425
- superblock.write_header(context);
1426
- } else {
1427
- superblock.release(context);
1428
- }
1429
- }
1430
-
1431
- fn acquire(superblock: *SuperBlock, context: *Context) void {
1432
- if (superblock.queue_head) |head| {
1433
- // All operations are mutually exclusive with themselves.
1434
- assert(head.caller != context.caller);
1435
- assert(Caller.transitions.get(head.caller).?.contains(context.caller));
1436
- assert(superblock.queue_tail == null);
1437
-
1438
- log.debug("{?}: {s}: enqueued after {s}", .{
1439
- superblock.replica_index,
1440
- @tagName(context.caller),
1441
- @tagName(head.caller),
1442
- });
1443
-
1444
- superblock.queue_tail = context;
1445
- } else {
1446
- assert(superblock.queue_tail == null);
1447
-
1448
- superblock.queue_head = context;
1449
- log.debug("{?}: {s}: started", .{
1450
- superblock.replica_index,
1451
- @tagName(context.caller),
1452
- });
1453
-
1454
- if (Storage == @import("../testing/storage.zig").Storage) {
1455
- // We should have finished all pending superblock io before starting any more.
1456
- superblock.storage.assert_no_pending_reads(.superblock);
1457
- superblock.storage.assert_no_pending_writes(.superblock);
1458
- }
1459
-
1460
- if (context.caller == .open) {
1461
- superblock.read_working(context, .open);
1462
- } else {
1463
- superblock.write_staging(context);
1464
- }
1465
- }
1466
- }
1467
-
1468
- fn release(superblock: *SuperBlock, context: *Context) void {
1469
- assert(superblock.queue_head == context);
1470
-
1471
- log.debug("{?}: {s}: complete", .{
1472
- superblock.replica_index,
1473
- @tagName(context.caller),
1474
- });
1475
-
1476
- if (Storage == @import("../testing/storage.zig").Storage) {
1477
- // We should have finished all pending io by now.
1478
- superblock.storage.assert_no_pending_reads(.superblock);
1479
- superblock.storage.assert_no_pending_writes(.superblock);
1480
- }
1481
-
1482
- switch (context.caller) {
1483
- .format => {},
1484
- .open => {
1485
- assert(!superblock.opened);
1486
- superblock.opened = true;
1487
- superblock.replica_index = vsr.member_index(
1488
- &superblock.working.vsr_state.members,
1489
- superblock.working.vsr_state.replica_id,
1490
- ).?;
1491
- },
1492
- .checkpoint,
1493
- .view_change,
1494
- => {
1495
- assert(stdx.equal_bytes(
1496
- SuperBlockHeader.VSRState,
1497
- &superblock.staging.vsr_state,
1498
- &context.vsr_state.?,
1499
- ));
1500
- assert(stdx.equal_bytes(
1501
- SuperBlockHeader.VSRState,
1502
- &superblock.working.vsr_state,
1503
- &context.vsr_state.?,
1504
- ));
1505
- },
1506
- }
1507
-
1508
- const queue_tail = superblock.queue_tail;
1509
- superblock.queue_head = null;
1510
- superblock.queue_tail = null;
1511
- if (queue_tail) |tail| superblock.acquire(tail);
1512
-
1513
- context.callback(context);
1514
- }
1515
-
1516
- fn assert_bounds(offset: u64, size: u64) void {
1517
- assert(offset + size <= superblock_zone_size);
1518
- }
1519
-
1520
- fn log_context(superblock: *const SuperBlock, context: *const Context) void {
1521
- log.debug("{[replica]?}: {[caller]s}: " ++
1522
- "commit_min={[commit_min_old]}..{[commit_min_new]} " ++
1523
- "commit_max={[commit_max_old]}..{[commit_max_new]} " ++
1524
- "commit_min_checksum={[commit_min_checksum_old]x:0>32}.." ++
1525
- "{[commit_min_checksum_new]x:0>32} " ++
1526
- "log_view={[log_view_old]}..{[log_view_new]} " ++
1527
- "view={[view_old]}..{[view_new]} " ++
1528
- "head={[head_old]x:0>32}..{[head_new]x:0>32}", .{
1529
- .replica = superblock.replica_index,
1530
- .caller = @tagName(context.caller),
1531
-
1532
- .commit_min_old = superblock.staging.vsr_state.checkpoint.header.op,
1533
- .commit_min_new = context.vsr_state.?.checkpoint.header.op,
1534
-
1535
- .commit_max_old = superblock.staging.vsr_state.commit_max,
1536
- .commit_max_new = context.vsr_state.?.commit_max,
1537
-
1538
- .commit_min_checksum_old = superblock.staging.vsr_state.checkpoint.header.checksum,
1539
- .commit_min_checksum_new = context.vsr_state.?.checkpoint.header.checksum,
1540
-
1541
- .log_view_old = superblock.staging.vsr_state.log_view,
1542
- .log_view_new = context.vsr_state.?.log_view,
1543
-
1544
- .view_old = superblock.staging.vsr_state.view,
1545
- .view_new = context.vsr_state.?.view,
1546
-
1547
- .head_old = superblock.staging.view_headers().slice[0].checksum,
1548
- .head_new = if (context.view_headers) |*headers|
1549
- headers.array.get(0).checksum
1550
- else
1551
- 0,
1552
- });
1553
- }
1554
- };
1555
- }
1556
-
1557
- pub const Caller = enum {
1558
- format,
1559
- open,
1560
- checkpoint,
1561
- view_change,
1562
-
1563
- /// Beyond formatting and opening of the superblock, which are mutually exclusive of all
1564
- /// other operations, only the following queue combinations are allowed:
1565
- ///
1566
- /// from state → to states
1567
- const transitions = sets: {
1568
- const Set = std.enums.EnumSet(Caller);
1569
- break :sets std.enums.EnumMap(Caller, Set).init(.{
1570
- .format = Set.init(.{}),
1571
- .open = Set.init(.{}),
1572
- .checkpoint = Set.init(.{ .view_change = true }),
1573
- .view_change = Set.init(.{ .checkpoint = true }),
1574
- });
1575
- };
1576
-
1577
- fn updates_view_headers(caller: Caller) bool {
1578
- return switch (caller) {
1579
- .format => true,
1580
- .open => unreachable,
1581
- .checkpoint => true,
1582
- .view_change => true,
1583
- };
1584
- }
1585
- };
1586
-
1587
- test "SuperBlockHeader" {
1588
- const expect = std.testing.expect;
1589
-
1590
- var a = std.mem.zeroes(SuperBlockHeader);
1591
- a.version = SuperBlockVersion;
1592
- a.release_format = vsr.Release.minimum;
1593
- a.set_checksum();
1594
-
1595
- assert(a.copy == 0);
1596
- try expect(a.valid_checksum());
1597
-
1598
- a.copy += 1;
1599
- try expect(a.valid_checksum());
1600
-
1601
- a.version += 1;
1602
- try expect(!a.valid_checksum());
1603
- }