tigerbeetle 0.0.40 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -25
  3. data/README.md +670 -80
  4. data/docs/migration.md +201 -0
  5. data/sig/tigerbeetle.rbs +271 -0
  6. data/src/ext/tigerbeetle/extconf.rb +47 -0
  7. data/src/ext/tigerbeetle/lib/aarch64-linux-gnu.2.27/libtb_client.so +0 -0
  8. data/src/ext/tigerbeetle/lib/aarch64-linux-musl/libtb_client.so +0 -0
  9. data/src/ext/tigerbeetle/lib/aarch64-macos/libtb_client.dylib +0 -0
  10. data/src/ext/tigerbeetle/lib/x86_64-linux-gnu.2.27/libtb_client.so +0 -0
  11. data/src/ext/tigerbeetle/lib/x86_64-linux-musl/libtb_client.so +0 -0
  12. data/src/ext/tigerbeetle/lib/x86_64-macos/libtb_client.dylib +0 -0
  13. data/src/ext/tigerbeetle/lib/x86_64-windows/tb_client.dll +0 -0
  14. data/src/ext/tigerbeetle/rb_tb_gen.h +458 -0
  15. data/{ext/tb_client/tigerbeetle/src/clients/rust/assets → src/ext/tigerbeetle}/tb_client.h +18 -16
  16. data/src/ext/tigerbeetle/tigerbeetle.c +310 -0
  17. data/src/tigerbeetle/bindings.rb +347 -0
  18. data/src/tigerbeetle/client.rb +129 -0
  19. data/src/tigerbeetle/completion_dispatcher.rb +108 -0
  20. data/src/tigerbeetle/id.rb +40 -0
  21. data/src/tigerbeetle/tb.rb +3 -0
  22. data/src/tigerbeetle/version.rb +3 -0
  23. data/src/tigerbeetle.rb +39 -0
  24. metadata +33 -350
  25. data/CHANGELOG.md +0 -162
  26. data/ext/tb_client/extconf.rb +0 -41
  27. data/ext/tb_client/tigerbeetle/LICENSE +0 -177
  28. data/ext/tb_client/tigerbeetle/build.zig +0 -2296
  29. data/ext/tb_client/tigerbeetle/src/aof.zig +0 -1000
  30. data/ext/tb_client/tigerbeetle/src/build/fetch.zig +0 -112
  31. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +0 -808
  32. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +0 -1283
  33. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +0 -1704
  34. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +0 -341
  35. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +0 -1450
  36. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +0 -1659
  37. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +0 -406
  38. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +0 -1092
  39. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +0 -286
  40. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +0 -158
  41. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +0 -229
  42. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +0 -110
  43. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +0 -386
  44. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +0 -34
  45. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +0 -281
  46. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +0 -312
  47. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +0 -138
  48. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +0 -466
  49. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +0 -157
  50. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +0 -90
  51. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +0 -203
  52. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +0 -79
  53. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +0 -542
  54. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +0 -109
  55. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +0 -86
  56. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +0 -370
  57. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +0 -386
  58. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +0 -167
  59. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +0 -126
  60. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +0 -996
  61. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +0 -748
  62. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +0 -3238
  63. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +0 -1718
  64. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +0 -190
  65. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +0 -104
  66. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +0 -75
  67. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +0 -522
  68. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +0 -267
  69. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +0 -3
  70. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +0 -379
  71. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +0 -131
  72. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +0 -63
  73. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +0 -588
  74. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +0 -73
  75. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +0 -106
  76. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +0 -305
  77. data/ext/tb_client/tigerbeetle/src/config.zig +0 -296
  78. data/ext/tb_client/tigerbeetle/src/constants.zig +0 -790
  79. data/ext/tb_client/tigerbeetle/src/copyhound.zig +0 -202
  80. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +0 -72
  81. data/ext/tb_client/tigerbeetle/src/direction.zig +0 -120
  82. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +0 -158
  83. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +0 -156
  84. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +0 -252
  85. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +0 -313
  86. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +0 -87
  87. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +0 -63
  88. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +0 -47
  89. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +0 -28
  90. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +0 -61
  91. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +0 -169
  92. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +0 -46
  93. data/ext/tb_client/tigerbeetle/src/ewah.zig +0 -445
  94. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +0 -128
  95. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +0 -171
  96. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +0 -179
  97. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +0 -662
  98. data/ext/tb_client/tigerbeetle/src/io/common.zig +0 -155
  99. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +0 -1093
  100. data/ext/tb_client/tigerbeetle/src/io/linux.zig +0 -1880
  101. data/ext/tb_client/tigerbeetle/src/io/test.zig +0 -1005
  102. data/ext/tb_client/tigerbeetle/src/io/windows.zig +0 -1598
  103. data/ext/tb_client/tigerbeetle/src/io.zig +0 -34
  104. data/ext/tb_client/tigerbeetle/src/iops.zig +0 -134
  105. data/ext/tb_client/tigerbeetle/src/list.zig +0 -236
  106. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +0 -848
  107. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +0 -179
  108. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +0 -424
  109. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +0 -420
  110. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +0 -2114
  111. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +0 -185
  112. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +0 -1146
  113. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +0 -1102
  114. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +0 -200
  115. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +0 -1495
  116. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +0 -739
  117. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +0 -166
  118. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +0 -754
  119. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +0 -1294
  120. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +0 -510
  121. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +0 -1241
  122. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -628
  123. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +0 -247
  124. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +0 -116
  125. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +0 -543
  126. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +0 -938
  127. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +0 -293
  128. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +0 -359
  129. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +0 -99
  130. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +0 -17
  131. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +0 -962
  132. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +0 -617
  133. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +0 -84
  134. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +0 -1500
  135. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -149
  136. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -7
  137. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +0 -865
  138. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +0 -607
  139. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +0 -843
  140. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +0 -90
  141. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +0 -40
  142. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +0 -629
  143. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +0 -933
  144. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +0 -534
  145. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +0 -469
  146. data/ext/tb_client/tigerbeetle/src/message_bus.zig +0 -1219
  147. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +0 -936
  148. data/ext/tb_client/tigerbeetle/src/message_pool.zig +0 -343
  149. data/ext/tb_client/tigerbeetle/src/multiversion.zig +0 -2195
  150. data/ext/tb_client/tigerbeetle/src/queue.zig +0 -390
  151. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +0 -201
  152. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +0 -1356
  153. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +0 -496
  154. data/ext/tb_client/tigerbeetle/src/repl.zig +0 -1034
  155. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +0 -973
  156. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +0 -1866
  157. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +0 -304
  158. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +0 -227
  159. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +0 -658
  160. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +0 -466
  161. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +0 -1058
  162. data/ext/tb_client/tigerbeetle/src/scripts.zig +0 -105
  163. data/ext/tb_client/tigerbeetle/src/shell.zig +0 -1195
  164. data/ext/tb_client/tigerbeetle/src/stack.zig +0 -260
  165. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +0 -911
  166. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +0 -2079
  167. data/ext/tb_client/tigerbeetle/src/state_machine.zig +0 -4872
  168. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +0 -288
  169. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +0 -3128
  170. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +0 -82
  171. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +0 -157
  172. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +0 -292
  173. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +0 -65
  174. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +0 -1414
  175. data/ext/tb_client/tigerbeetle/src/stdx/huge_page_allocator.zig +0 -115
  176. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +0 -92
  177. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +0 -677
  178. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +0 -336
  179. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +0 -511
  180. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +0 -112
  181. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +0 -1163
  182. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +0 -142
  183. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +0 -361
  184. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +0 -275
  185. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +0 -295
  186. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +0 -436
  187. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +0 -48
  188. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +0 -402
  189. data/ext/tb_client/tigerbeetle/src/storage.zig +0 -489
  190. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +0 -180
  191. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +0 -146
  192. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +0 -53
  193. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +0 -61
  194. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +0 -76
  195. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +0 -110
  196. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +0 -412
  197. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +0 -331
  198. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -458
  199. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +0 -1198
  200. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +0 -128
  201. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +0 -181
  202. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +0 -144
  203. data/ext/tb_client/tigerbeetle/src/testing/id.zig +0 -97
  204. data/ext/tb_client/tigerbeetle/src/testing/io.zig +0 -317
  205. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +0 -126
  206. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +0 -533
  207. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +0 -154
  208. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +0 -389
  209. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +0 -1247
  210. data/ext/tb_client/tigerbeetle/src/testing/table.zig +0 -249
  211. data/ext/tb_client/tigerbeetle/src/testing/time.zig +0 -98
  212. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +0 -212
  213. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +0 -26
  214. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +0 -579
  215. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +0 -39
  216. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +0 -214
  217. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +0 -34
  218. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +0 -785
  219. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +0 -543
  220. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +0 -181
  221. data/ext/tb_client/tigerbeetle/src/tidy.zig +0 -1449
  222. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +0 -227
  223. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +0 -1069
  224. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +0 -1422
  225. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +0 -1658
  226. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +0 -518
  227. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +0 -36
  228. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +0 -646
  229. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +0 -958
  230. data/ext/tb_client/tigerbeetle/src/time.zig +0 -236
  231. data/ext/tb_client/tigerbeetle/src/trace/event.zig +0 -745
  232. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +0 -462
  233. data/ext/tb_client/tigerbeetle/src/trace.zig +0 -556
  234. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +0 -321
  235. data/ext/tb_client/tigerbeetle/src/vopr.zig +0 -1785
  236. data/ext/tb_client/tigerbeetle/src/vortex.zig +0 -101
  237. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +0 -473
  238. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +0 -208
  239. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +0 -43
  240. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +0 -768
  241. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +0 -532
  242. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +0 -338
  243. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +0 -1019
  244. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +0 -279
  245. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +0 -1381
  246. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +0 -315
  247. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +0 -1460
  248. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +0 -757
  249. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +0 -797
  250. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +0 -2586
  251. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +0 -308
  252. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +0 -1777
  253. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +0 -715
  254. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +0 -185
  255. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +0 -333
  256. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +0 -12356
  257. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +0 -416
  258. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +0 -165
  259. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +0 -2928
  260. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +0 -1075
  261. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +0 -1603
  262. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -484
  263. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +0 -405
  264. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -355
  265. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +0 -29
  266. data/ext/tb_client/tigerbeetle/src/vsr.zig +0 -1727
  267. data/lib/tb_client/shared_lib.rb +0 -66
  268. data/lib/tb_client.rb +0 -282
  269. data/lib/tigerbeetle/account.rb +0 -38
  270. data/lib/tigerbeetle/account_balance.rb +0 -23
  271. data/lib/tigerbeetle/account_filter.rb +0 -31
  272. data/lib/tigerbeetle/atomic_counter.rb +0 -14
  273. data/lib/tigerbeetle/client.rb +0 -214
  274. data/lib/tigerbeetle/converters/account.rb +0 -63
  275. data/lib/tigerbeetle/converters/account_balance.rb +0 -31
  276. data/lib/tigerbeetle/converters/account_filter.rb +0 -32
  277. data/lib/tigerbeetle/converters/base.rb +0 -35
  278. data/lib/tigerbeetle/converters/create_accounts_result.rb +0 -21
  279. data/lib/tigerbeetle/converters/create_transfers_result.rb +0 -21
  280. data/lib/tigerbeetle/converters/query_filter.rb +0 -33
  281. data/lib/tigerbeetle/converters/time.rb +0 -23
  282. data/lib/tigerbeetle/converters/transfer.rb +0 -64
  283. data/lib/tigerbeetle/converters/uint_128.rb +0 -24
  284. data/lib/tigerbeetle/converters.rb +0 -12
  285. data/lib/tigerbeetle/error.rb +0 -4
  286. data/lib/tigerbeetle/id.rb +0 -30
  287. data/lib/tigerbeetle/platforms.rb +0 -9
  288. data/lib/tigerbeetle/query_filter.rb +0 -31
  289. data/lib/tigerbeetle/request.rb +0 -7
  290. data/lib/tigerbeetle/transfer.rb +0 -40
  291. data/lib/tigerbeetle/version.rb +0 -4
  292. data/lib/tigerbeetle.rb +0 -13
  293. data/tigerbeetle.gemspec +0 -60
@@ -1,1460 +0,0 @@
1
- const std = @import("std");
2
- const builtin = @import("builtin");
3
- const assert = std.debug.assert;
4
- const maybe = stdx.maybe;
5
- const mem = std.mem;
6
-
7
- const constants = @import("../constants.zig");
8
- const vsr = @import("../vsr.zig");
9
- const schema = @import("../lsm/schema.zig");
10
-
11
- const SuperBlockType = vsr.SuperBlockType;
12
- const QueueType = @import("../queue.zig").QueueType;
13
- const IOPSType = @import("../iops.zig").IOPSType;
14
- const SetAssociativeCacheType = @import("../lsm/set_associative_cache.zig").SetAssociativeCacheType;
15
- const stdx = @import("stdx");
16
- const GridBlocksMissing = @import("./grid_blocks_missing.zig").GridBlocksMissing;
17
- const Tracer = vsr.trace.Tracer;
18
-
19
- const FreeSet = @import("./free_set.zig").FreeSet;
20
-
21
- const log = stdx.log.scoped(.grid);
22
-
23
- pub const BlockPtr = *align(constants.sector_size) [constants.block_size]u8;
24
- pub const BlockPtrConst = *align(constants.sector_size) const [constants.block_size]u8;
25
-
26
- // Leave this outside GridType so we can call it from modules that don't know about Storage.
27
- pub fn allocate_block(
28
- allocator: mem.Allocator,
29
- ) error{OutOfMemory}!*align(constants.sector_size) [constants.block_size]u8 {
30
- const block = try allocator.alignedAlloc(u8, constants.sector_size, constants.block_size);
31
- @memset(block, 0);
32
- return block[0..constants.block_size];
33
- }
34
-
35
- /// The Grid provides access to on-disk blocks (blobs of `block_size` bytes).
36
- /// Each block is identified by an "address" (`u64`, beginning at 1).
37
- ///
38
- /// Recently/frequently-used blocks are transparently cached in memory.
39
- pub fn GridType(comptime Storage: type) type {
40
- const block_size = constants.block_size;
41
- const SuperBlock = SuperBlockType(Storage);
42
-
43
- return struct {
44
- const Grid = @This();
45
- const CheckpointTrailer = vsr.CheckpointTrailerType(Storage);
46
-
47
- pub const read_iops_max = constants.grid_iops_read_max;
48
- pub const write_iops_max = constants.grid_iops_write_max;
49
-
50
- pub const RepairTable = GridBlocksMissing.RepairTable;
51
- pub const RepairTableResult = GridBlocksMissing.RepairTableResult;
52
- pub const Reservation = @import("./free_set.zig").Reservation;
53
-
54
- // Grid just reuses the Storage's NextTick abstraction for simplicity.
55
- pub const NextTick = Storage.NextTick;
56
-
57
- pub const Write = struct {
58
- callback: *const fn (*Grid.Write) void,
59
- address: u64,
60
- repair: bool,
61
- block: *BlockPtr,
62
- /// The current checkpoint when the write began.
63
- /// Verifies that the checkpoint does not advance during the (non-repair) write.
64
- checkpoint_id: u128,
65
-
66
- /// Link for the Grid.write_queue linked list.
67
- link: QueueType(Write).Link = .{},
68
- };
69
-
70
- const WriteIOP = struct {
71
- grid: *Grid,
72
- completion: Storage.Write,
73
- write: *Write,
74
- };
75
-
76
- const ReadBlockCallback = union(enum) {
77
- /// If the local read fails, report the error.
78
- from_local_storage: *const fn (*Grid.Read, ReadBlockResult) void,
79
- /// If the local read fails, this read will be added to a linked list, which Replica can
80
- /// then interrogate each tick(). The callback passed to this function won't be called
81
- /// until the block has been recovered.
82
- from_local_or_global_storage: *const fn (*Grid.Read, BlockPtrConst) void,
83
- };
84
-
85
- pub const Read = struct {
86
- callback: ReadBlockCallback,
87
- address: u64,
88
- checksum: u128,
89
- /// The current checkpoint when the read began.
90
- /// Used to verify that the checkpoint does not advance while the read is in progress.
91
- checkpoint_id: u128,
92
- checkpoint_durable: bool,
93
-
94
- /// When coherent=true:
95
- /// - the block (address+checksum) is part of the current checkpoint.
96
- /// - the read will complete before the next checkpoint occurs.
97
- /// - callback == .from_local_or_global_storage
98
- /// When coherent=false:
99
- /// - the block (address+checksum) is not necessarily part of the current checkpoint.
100
- /// - the read may complete after a future checkpoint.
101
- /// - callback == .from_local_storage
102
- coherent: bool,
103
- cache_read: bool,
104
- cache_write: bool,
105
- pending: ReadPending = .{},
106
- resolves: QueueType(ReadPending) = QueueType(ReadPending).init(.{ .name = null }),
107
-
108
- grid: *Grid,
109
- next_tick: Grid.NextTick = undefined,
110
-
111
- /// Link for Grid.read_queue/Grid.read_global_queue linked lists.
112
- link: QueueType(Read).Link = .{},
113
- };
114
-
115
- /// Although we distinguish between the reasons why the block is invalid, we only use this
116
- /// info for logging, not logic.
117
- pub const ReadBlockResult = union(enum) {
118
- valid: BlockPtrConst,
119
- /// Checksum of block header is invalid.
120
- invalid_checksum,
121
- /// Checksum of block body is invalid.
122
- invalid_checksum_body,
123
- /// The block header is valid, but its `header.command` is not `block`.
124
- /// (This is possible due to misdirected IO).
125
- unexpected_command,
126
- /// The block is valid, but it is not the block we expected.
127
- unexpected_checksum,
128
- /// The block is valid, and it is the block we expected, but the last sector's padding
129
- /// is corrupt, so we will repair it just to be safe.
130
- invalid_padding,
131
- };
132
-
133
- const ReadPending = struct {
134
- /// Link for Read.resolves linked lists.
135
- link: QueueType(ReadPending).Link = .{},
136
- };
137
-
138
- const ReadIOP = struct {
139
- completion: Storage.Read,
140
- read: *Read,
141
- };
142
-
143
- const cache_interface = struct {
144
- inline fn address_from_address(address: *const u64) u64 {
145
- return address.*;
146
- }
147
-
148
- inline fn hash_address(address: u64) u64 {
149
- assert(address > 0);
150
- return stdx.hash_inline(address);
151
- }
152
- };
153
-
154
- const set_associative_cache_ways = 16;
155
-
156
- pub const Cache = SetAssociativeCacheType(
157
- u64,
158
- u64,
159
- cache_interface.address_from_address,
160
- cache_interface.hash_address,
161
- .{
162
- .ways = set_associative_cache_ways,
163
- // layout.cache_line_size isn't actually used to compute anything. Rather, it's
164
- // used by the SetAssociativeCache to assert() on sub-optimal values. In this case,
165
- // it's better to allow the user to be able to run with a much smaller grid cache
166
- // (256MiB vs 1GiB!) than trying to be completely optimal.
167
- .cache_line_size = 16,
168
- .value_alignment = @alignOf(u64),
169
- },
170
- );
171
-
172
- superblock: *SuperBlock,
173
- trace: *Tracer,
174
- free_set: FreeSet,
175
- free_set_checkpoint_blocks_acquired: CheckpointTrailer,
176
- free_set_checkpoint_blocks_released: CheckpointTrailer,
177
-
178
- blocks_missing: GridBlocksMissing,
179
-
180
- cache: Cache,
181
- /// Each entry in cache has a corresponding block.
182
- cache_blocks: []BlockPtr,
183
-
184
- write_iops: IOPSType(WriteIOP, write_iops_max) = .{},
185
- write_queue: QueueType(Write) = QueueType(Write).init(.{ .name = "grid_write" }),
186
-
187
- // Each read_iops has a corresponding block.
188
- read_iop_blocks: [read_iops_max]BlockPtr,
189
- read_iops: IOPSType(ReadIOP, read_iops_max) = .{},
190
- read_queue: QueueType(Read) = QueueType(Read).init(.{ .name = "grid_read" }),
191
-
192
- // List of Read.pending's which are in `read_queue` but also waiting for a free `read_iops`.
193
- read_pending_queue: QueueType(ReadPending) = QueueType(ReadPending).init(.{
194
- .name = "grid_read_pending",
195
- }),
196
- /// List of `Read`s which are waiting for a block repair from another replica.
197
- /// (Reads in this queue have already failed locally).
198
- ///
199
- /// Invariants:
200
- /// - For each read, read.callback=from_local_or_global_storage.
201
- read_global_queue: QueueType(Read) = QueueType(Read).init(.{ .name = "grid_read_global" }),
202
- // True if there's a read that is resolving callbacks.
203
- // If so, the read cache must not be invalidated.
204
- read_resolving: bool = false,
205
-
206
- callback: union(enum) {
207
- none,
208
- open: *const fn (*Grid) void,
209
- checkpoint: *const fn (*Grid) void,
210
- checkpoint_durable: *const fn (*Grid) void,
211
- cancel: *const fn (*Grid) void,
212
- } = .none,
213
-
214
- canceling_tick_context: NextTick = undefined,
215
-
216
- pub fn init(allocator: mem.Allocator, options: struct {
217
- superblock: *SuperBlock,
218
- trace: *Tracer,
219
- cache_blocks_count: u64 = Cache.value_count_max_multiple,
220
- missing_blocks_max: usize,
221
- missing_tables_max: usize,
222
- blocks_released_prior_checkpoint_durability_max: usize,
223
- }) !Grid {
224
- var free_set = try FreeSet.init(allocator, .{
225
- .grid_size_limit = options.superblock.grid_size_limit(),
226
- .blocks_released_prior_checkpoint_durability_max = options
227
- .blocks_released_prior_checkpoint_durability_max,
228
- });
229
- errdefer free_set.deinit(allocator);
230
-
231
- const free_set_encoded_size_max = free_set.encode_size_max();
232
- var free_set_checkpoint_blocks_acquired =
233
- try CheckpointTrailer.init(allocator, .free_set, free_set_encoded_size_max);
234
- errdefer free_set_checkpoint_blocks_acquired.deinit(allocator);
235
-
236
- var free_set_checkpoint_blocks_released =
237
- try CheckpointTrailer.init(allocator, .free_set, free_set_encoded_size_max);
238
- errdefer free_set_checkpoint_blocks_released.deinit(allocator);
239
-
240
- var blocks_missing = try GridBlocksMissing.init(allocator, .{
241
- .blocks_max = options.missing_blocks_max,
242
- .tables_max = options.missing_tables_max,
243
- });
244
- errdefer blocks_missing.deinit(allocator);
245
-
246
- const cache_blocks = try allocator.alloc(BlockPtr, options.cache_blocks_count);
247
- errdefer allocator.free(cache_blocks);
248
-
249
- for (cache_blocks, 0..) |*cache_block, i| {
250
- errdefer for (cache_blocks[0..i]) |block| allocator.free(block);
251
- cache_block.* = try allocate_block(allocator);
252
- }
253
- errdefer for (cache_blocks) |block| allocator.free(block);
254
-
255
- var cache = try Cache.init(allocator, options.cache_blocks_count, .{ .name = "grid" });
256
- errdefer cache.deinit(allocator);
257
-
258
- var read_iop_blocks: [read_iops_max]BlockPtr = undefined;
259
-
260
- for (&read_iop_blocks, 0..) |*read_iop_block, i| {
261
- errdefer for (read_iop_blocks[0..i]) |block| allocator.free(block);
262
- read_iop_block.* = try allocate_block(allocator);
263
- }
264
- errdefer for (&read_iop_blocks) |block| allocator.free(block);
265
-
266
- return Grid{
267
- .superblock = options.superblock,
268
- .trace = options.trace,
269
- .free_set = free_set,
270
- .free_set_checkpoint_blocks_acquired = free_set_checkpoint_blocks_acquired,
271
- .free_set_checkpoint_blocks_released = free_set_checkpoint_blocks_released,
272
- .blocks_missing = blocks_missing,
273
- .cache = cache,
274
- .cache_blocks = cache_blocks,
275
- .read_iop_blocks = read_iop_blocks,
276
- };
277
- }
278
-
279
- pub fn deinit(grid: *Grid, allocator: mem.Allocator) void {
280
- for (&grid.read_iop_blocks) |block| allocator.free(block);
281
-
282
- for (grid.cache_blocks) |block| allocator.free(block);
283
- allocator.free(grid.cache_blocks);
284
-
285
- grid.cache.deinit(allocator);
286
- grid.blocks_missing.deinit(allocator);
287
-
288
- grid.free_set_checkpoint_blocks_acquired.deinit(allocator);
289
- grid.free_set_checkpoint_blocks_released.deinit(allocator);
290
-
291
- grid.free_set.deinit(allocator);
292
-
293
- grid.* = undefined;
294
- }
295
-
296
- pub fn open(grid: *Grid, callback: *const fn (*Grid) void) void {
297
- assert(grid.callback == .none);
298
-
299
- grid.callback = .{ .open = callback };
300
- grid.free_set_checkpoint_blocks_acquired.open(
301
- grid,
302
- grid.superblock.working.free_set_reference(.blocks_acquired),
303
- open_free_set_callback_blocks_acquired,
304
- );
305
- grid.free_set_checkpoint_blocks_released.open(
306
- grid,
307
- grid.superblock.working.free_set_reference(.blocks_released),
308
- open_free_set_callback_blocks_released,
309
- );
310
- }
311
-
312
- fn open_free_set_callback_blocks_acquired(trailer: *CheckpointTrailer) void {
313
- assert(trailer.callback == .none);
314
- const grid: *Grid = @fieldParentPtr("free_set_checkpoint_blocks_acquired", trailer);
315
- grid.open_free_set_callback();
316
- }
317
-
318
- fn open_free_set_callback_blocks_released(trailer: *CheckpointTrailer) void {
319
- assert(trailer.callback == .none);
320
- const grid: *Grid = @fieldParentPtr("free_set_checkpoint_blocks_released", trailer);
321
- grid.open_free_set_callback();
322
- }
323
-
324
- fn open_free_set_callback(grid: *Grid) void {
325
- assert(grid.free_set_checkpoint_blocks_acquired.callback == .none or
326
- grid.free_set_checkpoint_blocks_released.callback == .none);
327
-
328
- const callback = grid.callback.open;
329
- // May still be reading the CheckpointTrailer for `blocks_acquired`.
330
- if (grid.free_set_checkpoint_blocks_acquired.callback == .open) return;
331
- assert(grid.free_set_checkpoint_blocks_acquired.callback == .none);
332
-
333
- // May still be reading the CheckpointTrailer for `blocks_released`.
334
- if (grid.free_set_checkpoint_blocks_released.callback == .open) return;
335
- assert(grid.free_set_checkpoint_blocks_released.callback == .none);
336
-
337
- {
338
- assert(!grid.free_set.opened);
339
- defer assert(grid.free_set.opened);
340
-
341
- const block_count_encoded_blocks_acquired =
342
- grid.free_set_checkpoint_blocks_acquired.block_count();
343
- const block_count_encoded_blocks_released =
344
- grid.free_set_checkpoint_blocks_released.block_count();
345
- grid.free_set.open(.{
346
- .encoded = .{
347
- .blocks_acquired = grid.free_set_checkpoint_blocks_acquired.decode_chunks(),
348
- .blocks_released = grid.free_set_checkpoint_blocks_released.decode_chunks(),
349
- },
350
- .free_set_block_addresses = .{
351
- .blocks_acquired = grid.free_set_checkpoint_blocks_acquired
352
- .block_addresses[0..block_count_encoded_blocks_acquired],
353
- .blocks_released = grid.free_set_checkpoint_blocks_released
354
- .block_addresses[0..block_count_encoded_blocks_released],
355
- },
356
- });
357
- assert((grid.free_set.count_acquired() > 0) ==
358
- (grid.free_set_checkpoint_blocks_acquired.size > 0));
359
-
360
- // Assert that the highest acquired address is compatible with storage_size.
361
- const storage_size: u64 = storage_size: {
362
- var storage_size = vsr.superblock.data_file_size_min;
363
- if (grid.free_set.highest_address_acquired()) |address| {
364
- assert(address > 0);
365
- assert(grid.free_set_checkpoint_blocks_acquired.size > 0);
366
- maybe(grid.free_set_checkpoint_blocks_released.size == 0);
367
-
368
- storage_size += address * constants.block_size;
369
- } else {
370
- assert(grid.free_set_checkpoint_blocks_acquired.size == 0);
371
- assert(grid.free_set_checkpoint_blocks_released.size == 0);
372
-
373
- assert(grid.free_set.count_released() == 0);
374
- }
375
- break :storage_size storage_size;
376
- };
377
- assert(storage_size == grid.superblock.working.vsr_state.checkpoint.storage_size);
378
-
379
- assert(grid.free_set.count_released() >=
380
- (grid.free_set_checkpoint_blocks_acquired.block_count() +
381
- grid.free_set_checkpoint_blocks_released.block_count()));
382
-
383
- assert(grid.free_set.count_reservations() == 0);
384
- }
385
- grid.callback = .none;
386
- callback(grid);
387
- }
388
-
389
- /// Checkpoint process is delicate:
390
- /// 1. Encode free set.
391
- /// 2. Derive the number of blocks required to store the encoding.
392
- /// 3. Allocate free set blocks for the encoding (in the old checkpoint).
393
- /// 4. Write the free set blocks to disk.
394
- /// 5. Mark the free set's own blocks as released (but not yet free).
395
- ///
396
- /// This function handles step 1, and calls CheckpointTrailer.checkpoint, which handles 2-4.
397
- /// The caller is responsible for calling Grid.mark_checkpoint_not_durable, which handles 5.
398
- pub fn checkpoint(grid: *Grid, callback: *const fn (*Grid) void) void {
399
- assert(grid.callback == .none);
400
- assert(grid.read_global_queue.empty());
401
-
402
- {
403
- assert(grid.free_set.count_reservations() == 0);
404
-
405
- const free_set_encoded = grid.free_set.encode_chunks(
406
- grid.free_set_checkpoint_blocks_acquired.encode_chunks(),
407
- grid.free_set_checkpoint_blocks_released.encode_chunks(),
408
- );
409
-
410
- grid.free_set_checkpoint_blocks_acquired.size =
411
- free_set_encoded.encoded_size_blocks_acquired;
412
- grid.free_set_checkpoint_blocks_released.size =
413
- free_set_encoded.encoded_size_blocks_released;
414
-
415
- assert(grid.free_set_checkpoint_blocks_acquired.size % @sizeOf(FreeSet.Word) == 0);
416
- assert(grid.free_set_checkpoint_blocks_released.size % @sizeOf(FreeSet.Word) == 0);
417
- }
418
-
419
- grid.callback = .{ .checkpoint = callback };
420
- grid.free_set_checkpoint_blocks_acquired
421
- .checkpoint(checkpoint_free_set_blocks_acquired_callback);
422
- grid.free_set_checkpoint_blocks_released
423
- .checkpoint(checkpoint_free_set_blocks_released_callback);
424
- }
425
-
426
- fn checkpoint_free_set_blocks_acquired_callback(trailer: *CheckpointTrailer) void {
427
- assert(trailer.callback == .none);
428
- const grid: *Grid = @fieldParentPtr("free_set_checkpoint_blocks_acquired", trailer);
429
- assert(grid.callback == .checkpoint);
430
-
431
- grid.checkpoint_join();
432
- }
433
-
434
- fn checkpoint_free_set_blocks_released_callback(trailer: *CheckpointTrailer) void {
435
- assert(trailer.callback == .none);
436
- const grid: *Grid = @fieldParentPtr("free_set_checkpoint_blocks_released", trailer);
437
- assert(grid.callback == .checkpoint);
438
-
439
- grid.checkpoint_join();
440
- }
441
-
442
- fn checkpoint_join(grid: *Grid) void {
443
- assert(grid.callback == .checkpoint);
444
- assert(grid.read_global_queue.empty());
445
-
446
- if (grid.free_set_checkpoint_blocks_acquired.callback == .checkpoint) {
447
- return; // Still writing free set `blocks_acquired` bitset.
448
- }
449
- assert(grid.free_set_checkpoint_blocks_acquired.callback == .none);
450
-
451
- if (grid.free_set_checkpoint_blocks_released.callback == .checkpoint) {
452
- return; // Still writing free set `blocks_released` bitset.
453
- }
454
- assert(grid.free_set_checkpoint_blocks_released.callback == .none);
455
-
456
- const callback = grid.callback.checkpoint;
457
- grid.callback = .none;
458
- callback(grid);
459
- }
460
-
461
- /// Mark the current checkpoint as not durable, then release the blocks acquired for the
462
- /// FreeSet checkpoints (to be freed when the *next* checkpoint becomes durable).
463
- ///
464
- /// The ordering is important here, if we were to release these blocks before the checkpoint
465
- /// is marked as not durable, they would erroneously be freed when the *current* checkpoint
466
- /// becomes durable.
467
- pub fn mark_checkpoint_not_durable(grid: *Grid) void {
468
- assert(grid.free_set.checkpoint_durable);
469
- defer assert(!grid.free_set.checkpoint_durable);
470
-
471
- grid.free_set.mark_checkpoint_not_durable();
472
- grid.release(grid.free_set_checkpoint_blocks_acquired
473
- .block_addresses[0..grid.free_set_checkpoint_blocks_acquired.block_count()]);
474
- grid.release(grid.free_set_checkpoint_blocks_released
475
- .block_addresses[0..grid.free_set_checkpoint_blocks_released.block_count()]);
476
- }
477
-
478
- /// Now that the checkpoint is durable on a commit quorum of replicas:
479
- /// 1. Await all pending repair-writes to blocks that are about to be freed.
480
- /// 2. Mark currently released blocks as free and eligible for acquisition.
481
- ///
482
- /// This function handles step 1.
483
- /// The caller is responsible for calling FreeSet.checkpoint which handles 2.
484
- pub fn checkpoint_durable(grid: *Grid, callback: *const fn (*Grid) void) void {
485
- assert(!grid.free_set.checkpoint_durable);
486
- grid.callback = .{ .checkpoint_durable = callback };
487
-
488
- grid.blocks_missing.checkpoint_durable_commence(&grid.free_set);
489
- if (grid.blocks_missing.state.checkpoint_durable.aborting == 0) {
490
- grid.checkpoint_durable_join();
491
- }
492
- }
493
-
494
- fn checkpoint_durable_join(grid: *Grid) void {
495
- assert(grid.callback == .checkpoint_durable);
496
-
497
- // We are still repairing some blocks released during the previous checkpoint interval.
498
- if (!grid.blocks_missing.checkpoint_durable_complete()) {
499
- assert(grid.write_iops.executing() > 0);
500
- return;
501
- }
502
-
503
- var write_queue_iterator = grid.write_queue.iterate();
504
- while (write_queue_iterator.next()) |write| {
505
- assert(write.repair);
506
- assert(!grid.free_set.is_free(write.address));
507
- assert(!grid.free_set.to_be_freed_at_checkpoint_durability(write.address));
508
- }
509
-
510
- var write_iops_iterator = grid.write_iops.iterate();
511
- while (write_iops_iterator.next()) |iop| {
512
- assert(!grid.free_set.is_free(iop.write.address));
513
- assert(!grid.free_set.to_be_freed_at_checkpoint_durability(iop.write.address));
514
- }
515
-
516
- // Now that there are no writes to released blocks, we can safely mark them as free,
517
- // and also mark the checkpoint as durable.
518
- assert(!grid.free_set.checkpoint_durable);
519
- defer assert(grid.free_set.checkpoint_durable);
520
-
521
- grid.free_set.mark_checkpoint_durable();
522
-
523
- const callback = grid.callback.checkpoint_durable;
524
- grid.callback = .none;
525
- callback(grid);
526
- }
527
-
528
- pub fn cancel(grid: *Grid, callback: *const fn (*Grid) void) void {
529
- // grid.open() is cancellable the same way that read_block()/write_block() are.
530
- switch (grid.callback) {
531
- .none => {},
532
- .open => {},
533
- .checkpoint_durable => {},
534
- .checkpoint => unreachable,
535
- .cancel => unreachable,
536
- }
537
-
538
- grid.callback = .{ .cancel = callback };
539
-
540
- grid.blocks_missing.cancel();
541
- grid.read_queue.reset();
542
- grid.read_pending_queue.reset();
543
- grid.read_global_queue.reset();
544
- grid.write_queue.reset();
545
- grid.superblock.storage.reset_next_tick_lsm();
546
- grid.superblock.storage.on_next_tick(
547
- .vsr,
548
- cancel_tick_callback,
549
- &grid.canceling_tick_context,
550
- );
551
- }
552
-
553
- fn cancel_tick_callback(next_tick: *NextTick) void {
554
- const grid: *Grid = @alignCast(@fieldParentPtr("canceling_tick_context", next_tick));
555
- if (grid.callback != .cancel) return;
556
-
557
- assert(grid.read_queue.empty());
558
- assert(grid.read_pending_queue.empty());
559
- assert(grid.read_global_queue.empty());
560
- assert(grid.write_queue.empty());
561
-
562
- grid.cancel_join_callback();
563
- }
564
-
565
- fn cancel_join_callback(grid: *Grid) void {
566
- assert(grid.callback == .cancel);
567
- assert(grid.read_queue.empty());
568
- assert(grid.read_pending_queue.empty());
569
- assert(grid.read_global_queue.empty());
570
- assert(grid.write_queue.empty());
571
-
572
- if (grid.read_iops.executing() == 0 and
573
- grid.write_iops.executing() == 0)
574
- {
575
- const callback = grid.callback.cancel;
576
- grid.callback = .none;
577
-
578
- callback(grid);
579
- }
580
- }
581
-
582
- pub fn on_next_tick(
583
- grid: *Grid,
584
- callback: *const fn (*Grid.NextTick) void,
585
- next_tick: *Grid.NextTick,
586
- ) void {
587
- assert(grid.callback != .cancel);
588
- grid.superblock.storage.on_next_tick(.lsm, callback, next_tick);
589
- }
590
-
591
- /// Aborts if there are not enough free blocks to fill the reservation.
592
- /// Should a use case arise where a null return would be preferred, this can be split
593
- /// into panicking and non-panicking versions.
594
- pub fn reserve(grid: *Grid, blocks_count: usize) Reservation {
595
- assert(grid.callback == .none);
596
- return grid.free_set.reserve(blocks_count) orelse vsr.fatal(
597
- .storage_size_would_exceed_limit,
598
- "data file would become too large size={} + reservation={} > limit={}, " ++
599
- "restart the replica increasing '--limit-storage'",
600
- .{
601
- grid.superblock.working.vsr_state.checkpoint.storage_size,
602
- blocks_count * constants.block_size,
603
- grid.superblock.storage_size_limit,
604
- },
605
- );
606
- }
607
-
608
- /// Forfeit a reservation.
609
- pub fn forfeit(grid: *Grid, reservation: Reservation) void {
610
- assert(grid.callback == .none);
611
- return grid.free_set.forfeit(reservation);
612
- }
613
-
614
- /// Returns a just-allocated block.
615
- /// The caller is responsible for not acquiring more blocks than they reserved.
616
- pub fn acquire(grid: *Grid, reservation: Reservation) u64 {
617
- assert(grid.callback == .none);
618
- return grid.free_set.acquire(reservation).?;
619
- }
620
-
621
- /// This function should be used to release addresses, instead of release()
622
- /// on the free set directly, as this also demotes the address within the block cache.
623
- /// This reduces conflict misses in the block cache, by freeing ways soon after they are
624
- /// released.
625
- ///
626
- /// This does not remove the blocks from the cache — the blocks can be read until the next
627
- /// checkpoint.
628
- ///
629
- /// Asserts that the addresses are not currently being read from or written to.
630
- pub fn release(grid: *Grid, addresses: []const u64) void {
631
- assert(grid.callback == .none);
632
- for (addresses) |address| {
633
- assert(address > 0);
634
-
635
- // It's safe to release an address that is being read from, because the superblock
636
- // will not allow it to be overwritten before the end of the bar.
637
- assert(grid.writing(address, null) != .create);
638
-
639
- grid.cache.demote(address);
640
- grid.free_set.release(address);
641
- }
642
- }
643
-
644
- const Writing = enum { create, repair, not_writing };
645
-
646
- /// If the address is being written to by a non-repair, return `.create`.
647
- /// If the address is being written to by a repair, return `.repair`.
648
- /// Otherwise return `.not_writing`.
649
- ///
650
- /// Assert that the block pointer is not being used for any write if non-null.
651
- pub fn writing(grid: *Grid, address: u64, block: ?BlockPtrConst) Writing {
652
- assert(address > 0);
653
-
654
- var result = Writing.not_writing;
655
- {
656
- var it = grid.write_queue.iterate();
657
- while (it.next()) |queued_write| {
658
- assert(block != queued_write.block.*);
659
- if (address == queued_write.address) {
660
- assert(result == .not_writing);
661
- result = if (queued_write.repair) .repair else .create;
662
- }
663
- }
664
- }
665
- {
666
- var it = grid.write_iops.iterate();
667
- while (it.next()) |iop| {
668
- assert(block != iop.write.block.*);
669
- if (address == iop.write.address) {
670
- assert(result == .not_writing);
671
- result = if (iop.write.repair) .repair else .create;
672
- }
673
- }
674
- }
675
- return result;
676
- }
677
-
678
- /// Assert that the address is not currently being read from (disregarding repairs).
679
- /// Assert that the block pointer is not being used for any read if non-null.
680
- fn assert_not_reading(grid: *Grid, address: u64, block: ?BlockPtrConst) void {
681
- assert(address > 0);
682
-
683
- for ([_]*const QueueType(Read){
684
- &grid.read_queue,
685
- &grid.read_global_queue,
686
- }) |queue| {
687
- var it = queue.iterate();
688
- while (it.next()) |queued_read| {
689
- if (queued_read.coherent) {
690
- assert(address != queued_read.address);
691
- }
692
- }
693
- }
694
- {
695
- var it = grid.read_iops.iterate();
696
- while (it.next()) |iop| {
697
- if (iop.read.coherent) {
698
- assert(address != iop.read.address);
699
- }
700
- const iop_block = grid.read_iop_blocks[grid.read_iops.index(iop)];
701
- assert(block != iop_block);
702
- }
703
- }
704
- }
705
-
706
- pub fn assert_only_repairing(grid: *Grid) void {
707
- assert(grid.callback != .cancel);
708
- assert(grid.read_global_queue.empty());
709
-
710
- var read_queue_iterator = grid.read_queue.iterate();
711
- while (read_queue_iterator.next()) |read| {
712
- // Scrubber reads are independent from LSM operations.
713
- assert(!read.coherent);
714
- }
715
-
716
- var write_queue_iterator = grid.write_queue.iterate();
717
- while (write_queue_iterator.next()) |write| {
718
- assert(write.repair);
719
- assert(!grid.free_set.is_free(write.address));
720
- }
721
-
722
- var write_iops = grid.write_iops.iterate();
723
- while (write_iops.next()) |iop| {
724
- assert(iop.write.repair);
725
- assert(!grid.free_set.is_free(iop.write.address));
726
- }
727
- }
728
-
729
- pub fn fulfill_block(grid: *Grid, block: BlockPtrConst) bool {
730
- assert(grid.superblock.opened);
731
- assert(grid.callback != .cancel);
732
-
733
- const block_header = schema.header_from_block(block);
734
- assert(block_header.cluster == grid.superblock.working.cluster);
735
-
736
- var reads_iterator = grid.read_global_queue.iterate();
737
- while (reads_iterator.next()) |read| {
738
- if (read.checksum == block_header.checksum and
739
- read.address == block_header.address)
740
- {
741
- assert(block_header.release.value <=
742
- grid.superblock.working.vsr_state.checkpoint.release.value);
743
- grid.read_global_queue.remove(read);
744
- grid.read_block_resolve(read, .{ .valid = block });
745
- return true;
746
- }
747
- }
748
- return false;
749
- }
750
-
751
- pub fn repair_block_waiting(grid: *Grid, address: u64, checksum: u128) bool {
752
- assert(grid.superblock.opened);
753
- assert(grid.callback != .cancel);
754
- return grid.blocks_missing.block_waiting(address, checksum);
755
- }
756
-
757
- /// Write a block that should already exist but (maybe) doesn't because of:
758
- /// - a disk fault, or
759
- /// - the block was missed due to state sync.
760
- ///
761
- /// NOTE: This will consume `block` and replace it with a fresh block.
762
- pub fn repair_block(
763
- grid: *Grid,
764
- callback: *const fn (*Grid.Write) void,
765
- write: *Grid.Write,
766
- block: *BlockPtr,
767
- ) void {
768
- const block_header = schema.header_from_block(block.*);
769
- assert(grid.superblock.opened);
770
- assert(grid.callback != .cancel);
771
- assert(grid.writing(block_header.address, block.*) == .not_writing);
772
- assert(grid.blocks_missing.block_waiting(block_header.address, block_header.checksum));
773
- assert(!grid.free_set.is_free(block_header.address));
774
-
775
- grid.blocks_missing.write_commence(block_header.address, block_header.checksum);
776
- grid.write_block(callback, write, block, .repair);
777
- }
778
-
779
- /// Write a block for the first time.
780
- /// NOTE: This will consume `block` and replace it with a fresh block.
781
- pub fn create_block(
782
- grid: *Grid,
783
- callback: *const fn (*Grid.Write) void,
784
- write: *Grid.Write,
785
- block: *BlockPtr,
786
- ) void {
787
- const block_header = schema.header_from_block(block.*);
788
- assert(grid.superblock.opened);
789
- assert(grid.callback == .none or grid.callback == .checkpoint);
790
- assert((grid.callback == .checkpoint) == (block_header.block_type == .free_set));
791
- assert(grid.writing(block_header.address, block.*) == .not_writing);
792
- assert(!grid.blocks_missing.block_waiting(
793
- block_header.address,
794
- block_header.checksum,
795
- ));
796
- assert(!grid.free_set.is_free(block_header.address));
797
- grid.assert_not_reading(block_header.address, block.*);
798
-
799
- grid.write_block(callback, write, block, .create);
800
- }
801
-
802
- /// NOTE: This will consume `block` and replace it with a fresh block.
803
- fn write_block(
804
- grid: *Grid,
805
- callback: *const fn (*Grid.Write) void,
806
- write: *Grid.Write,
807
- block: *BlockPtr,
808
- trigger: enum { create, repair },
809
- ) void {
810
- const header = schema.header_from_block(block.*);
811
- assert(header.cluster == grid.superblock.working.cluster);
812
- assert(header.release.value <=
813
- grid.superblock.working.vsr_state.checkpoint.release.value);
814
-
815
- assert(grid.superblock.opened);
816
- assert(grid.callback != .cancel);
817
- assert(grid.writing(header.address, block.*) == .not_writing);
818
- assert(!grid.free_set.is_free(header.address));
819
- grid.assert_coherent(header.address, header.checksum);
820
-
821
- if (constants.verify) {
822
- for (grid.cache_blocks) |cache_block| {
823
- assert(cache_block != block.*);
824
- }
825
- }
826
-
827
- // Zero sector padding.
828
- @memset(block.*[header.size..vsr.sector_ceil(header.size)], 0);
829
-
830
- write.* = .{
831
- .callback = callback,
832
- .address = header.address,
833
- .repair = trigger == .repair,
834
- .block = block,
835
- .checkpoint_id = grid.superblock.working.checkpoint_id(),
836
- };
837
-
838
- const iop = grid.write_iops.acquire() orelse {
839
- grid.write_queue.push(write);
840
- return;
841
- };
842
-
843
- grid.write_block_with(iop, write);
844
- }
845
-
846
- fn write_block_with(grid: *Grid, iop: *WriteIOP, write: *Write) void {
847
- assert(!grid.free_set.is_free(write.address));
848
-
849
- grid.trace.start(.{ .grid_write = .{ .iop = grid.write_iops.index(iop) } });
850
-
851
- iop.* = .{
852
- .grid = grid,
853
- .completion = undefined,
854
- .write = write,
855
- };
856
-
857
- const write_header = schema.header_from_block(write.block.*);
858
- assert(write_header.size > @sizeOf(vsr.Header));
859
- assert(write_header.size <= constants.block_size);
860
- assert(stdx.zeroed(
861
- write.block.*[write_header.size..vsr.sector_ceil(write_header.size)],
862
- ));
863
-
864
- grid.superblock.storage.write_sectors(
865
- write_block_callback,
866
- &iop.completion,
867
- write.block.*[0..vsr.sector_ceil(write_header.size)],
868
- .grid,
869
- block_offset(write.address),
870
- );
871
- }
872
-
873
- fn write_block_callback(completion: *Storage.Write) void {
874
- const iop: *WriteIOP = @fieldParentPtr("completion", completion);
875
-
876
- // We must copy these values to the stack as they will be overwritten
877
- // when we release the iop and potentially start a queued write.
878
- const grid = iop.grid;
879
- const completed_write = iop.write;
880
-
881
- // We can only update the cache if the Grid is not resolving callbacks with a cache
882
- // block.
883
- assert(!grid.read_resolving);
884
- assert(!grid.free_set.is_free(completed_write.address));
885
-
886
- if (!completed_write.repair) {
887
- assert(grid.superblock.working.checkpoint_id() == completed_write.checkpoint_id);
888
- }
889
-
890
- // Insert the write block into the cache, and give the evicted block to the writer.
891
- const cache_index = grid.cache.upsert(&completed_write.address).index;
892
- const cache_block = &grid.cache_blocks[cache_index];
893
- std.mem.swap(BlockPtr, cache_block, completed_write.block);
894
- // This block content won't be used again. We could overwrite the entire thing, but that
895
- // would be more expensive.
896
- @memset(completed_write.block.*[0..@sizeOf(vsr.Header)], 0);
897
-
898
- const cache_block_header = schema.header_from_block(cache_block.*);
899
- assert(cache_block_header.address == completed_write.address);
900
- grid.assert_coherent(completed_write.address, cache_block_header.checksum);
901
-
902
- grid.trace.stop(.{ .grid_write = .{ .iop = grid.write_iops.index(iop) } });
903
-
904
- if (grid.callback == .cancel) {
905
- assert(grid.write_queue.empty());
906
-
907
- grid.write_iops.release(iop);
908
- grid.cancel_join_callback();
909
- return;
910
- }
911
-
912
- // Start a queued write if possible *before* calling the completed
913
- // write's callback. This ensures that if the callback calls
914
- // Grid.write_block() it doesn't preempt the queue.
915
- //
916
- // (Don't pop from the write queue until after the read-repairs are resolved.
917
- // Otherwise their resolution might complete grid cancellation, but the replica has
918
- // not released its own write iop (via callback).)
919
- if (grid.write_queue.pop()) |queued_write| {
920
- grid.write_block_with(iop, queued_write);
921
- } else {
922
- grid.write_iops.release(iop);
923
- }
924
-
925
- // Precede the write's callback, since the callback takes back ownership of the block.
926
- if (completed_write.repair) grid.blocks_missing.write_complete(cache_block.*);
927
- // This call must come after (logically) releasing the IOP. Otherwise we risk tripping
928
- // assertions forbidding concurrent writes using the same block/address
929
- // if the callback calls write_block().
930
- completed_write.callback(completed_write);
931
-
932
- // We start awaiting pending repairs when the checkpoint becomes durable.
933
- if (grid.callback == .checkpoint_durable) grid.checkpoint_durable_join();
934
- }
935
-
936
- /// Fetch the block synchronously from cache, if possible.
937
- /// The returned block pointer is only valid until the next Grid write.
938
- pub fn read_block_from_cache(
939
- grid: *Grid,
940
- address: u64,
941
- checksum: u128,
942
- options: struct { coherent: bool },
943
- ) ?BlockPtrConst {
944
- assert(grid.superblock.opened);
945
- assert(grid.callback != .cancel);
946
- if (options.coherent) {
947
- assert(grid.writing(address, null) != .create);
948
- assert(!grid.free_set.is_free(address));
949
- grid.assert_coherent(address, checksum);
950
- }
951
-
952
- assert(address > 0);
953
-
954
- const cache_index = grid.cache.get_index(address) orelse return null;
955
- const cache_block = grid.cache_blocks[cache_index];
956
-
957
- const header = schema.header_from_block(cache_block);
958
- assert(header.address == address);
959
- assert(header.cluster == grid.superblock.working.cluster);
960
- assert(header.release.value <=
961
- grid.superblock.working.vsr_state.checkpoint.release.value);
962
-
963
- if (header.checksum == checksum) {
964
- if (constants.verify and
965
- options.coherent and
966
- grid.superblock.working.vsr_state.sync_op_max == 0)
967
- {
968
- grid.verify_read_from_cache(address, cache_block);
969
- }
970
-
971
- return cache_block;
972
- } else {
973
- if (options.coherent) {
974
- assert(grid.superblock.working.vsr_state.sync_op_max > 0);
975
- }
976
-
977
- return null;
978
- }
979
- }
980
-
981
- pub fn read_block(
982
- grid: *Grid,
983
- callback: ReadBlockCallback,
984
- read: *Grid.Read,
985
- address: u64,
986
- checksum: u128,
987
- options: struct {
988
- cache_read: bool,
989
- cache_write: bool,
990
- },
991
- ) void {
992
- assert(grid.superblock.opened);
993
- assert(grid.callback != .cancel);
994
- assert(address > 0);
995
-
996
- switch (callback) {
997
- .from_local_storage => {
998
- maybe(grid.callback == .checkpoint);
999
- // We try to read the block even when it is free — if we recently released it,
1000
- // it might be found on disk anyway.
1001
- maybe(grid.free_set.is_free(address));
1002
- maybe(grid.writing(address, null) == .create);
1003
- },
1004
- .from_local_or_global_storage => {
1005
- assert(grid.callback != .checkpoint);
1006
- assert(!grid.free_set.is_free(address));
1007
- assert(grid.writing(address, null) != .create);
1008
- grid.assert_coherent(address, checksum);
1009
- },
1010
- }
1011
-
1012
- read.* = .{
1013
- .callback = callback,
1014
- .address = address,
1015
- .checksum = checksum,
1016
- .coherent = callback == .from_local_or_global_storage,
1017
- .checkpoint_durable = grid.free_set.checkpoint_durable,
1018
- .cache_read = options.cache_read,
1019
- .cache_write = options.cache_write,
1020
- .checkpoint_id = grid.superblock.working.checkpoint_id(),
1021
- .grid = grid,
1022
- };
1023
-
1024
- if (options.cache_read) {
1025
- grid.on_next_tick(read_block_tick_callback, &read.next_tick);
1026
- } else {
1027
- read_block_tick_callback(&read.next_tick);
1028
- }
1029
- }
1030
-
1031
- fn read_block_tick_callback(next_tick: *Storage.NextTick) void {
1032
- const read: *Grid.Read = @alignCast(@fieldParentPtr("next_tick", next_tick));
1033
- const grid = read.grid;
1034
- assert(grid.superblock.opened);
1035
- assert(grid.callback != .cancel);
1036
- if (read.coherent) {
1037
- assert(!grid.free_set.is_free(read.address));
1038
- assert(grid.writing(read.address, null) != .create);
1039
- }
1040
-
1041
- assert(read.address > 0);
1042
-
1043
- // Check if a read is already processing/recovering and merge with it.
1044
- for ([_]*const QueueType(Read){
1045
- &grid.read_queue,
1046
- &grid.read_global_queue,
1047
- }) |queue| {
1048
- // Don't remote-repair repairs – the block may not belong in our current checkpoint.
1049
- if (read.callback == .from_local_storage) {
1050
- if (queue == &grid.read_global_queue) continue;
1051
- }
1052
-
1053
- var it = queue.iterate();
1054
- while (it.next()) |queued_read| {
1055
- if (queued_read.address == read.address) {
1056
- // TODO check all read options match
1057
- if (queued_read.checksum == read.checksum) {
1058
- queued_read.resolves.push(&read.pending);
1059
- return;
1060
- } else {
1061
- assert(!queued_read.coherent or !read.coherent);
1062
- }
1063
- }
1064
- }
1065
- }
1066
-
1067
- // When Read.cache_read is set, the caller of read_block() is responsible for calling
1068
- // us via next_tick().
1069
- if (read.cache_read) {
1070
- if (grid.read_block_from_cache(
1071
- read.address,
1072
- read.checksum,
1073
- .{ .coherent = read.coherent },
1074
- )) |cache_block| {
1075
- grid.read_block_resolve(read, .{ .valid = cache_block });
1076
- return;
1077
- }
1078
- }
1079
-
1080
- // Become the "root" read that's fetching the block for the given address. The fetch
1081
- // happens asynchronously to avoid stack-overflow and nested cache invalidation.
1082
- grid.read_queue.push(read);
1083
-
1084
- // Grab an IOP to resolve the block from storage.
1085
- // Failure to do so means the read is queued to receive an IOP when one finishes.
1086
- const iop = grid.read_iops.acquire() orelse {
1087
- grid.read_pending_queue.push(&read.pending);
1088
- return;
1089
- };
1090
-
1091
- grid.read_block_with(iop, read);
1092
- }
1093
-
1094
- fn read_block_with(grid: *Grid, iop: *Grid.ReadIOP, read: *Grid.Read) void {
1095
- const address = read.address;
1096
- assert(address > 0);
1097
-
1098
- // We can only update the cache if the Grid is not resolving callbacks with a cache
1099
- // block.
1100
- assert(!grid.read_resolving);
1101
-
1102
- grid.trace.start(.{ .grid_read = .{ .iop = grid.read_iops.index(iop) } });
1103
-
1104
- iop.* = .{
1105
- .completion = undefined,
1106
- .read = read,
1107
- };
1108
- const iop_block = grid.read_iop_blocks[grid.read_iops.index(iop)];
1109
-
1110
- grid.superblock.storage.read_sectors(
1111
- read_block_callback,
1112
- &iop.completion,
1113
- iop_block,
1114
- .grid,
1115
- block_offset(address),
1116
- );
1117
- }
1118
-
1119
- fn read_block_callback(completion: *Storage.Read) void {
1120
- const iop: *ReadIOP = @fieldParentPtr("completion", completion);
1121
- const read = iop.read;
1122
- const grid = read.grid;
1123
- const iop_block = &grid.read_iop_blocks[grid.read_iops.index(iop)];
1124
-
1125
- grid.trace.stop(.{ .grid_read = .{ .iop = grid.read_iops.index(iop) } });
1126
-
1127
- if (grid.callback == .cancel) {
1128
- grid.read_iops.release(iop);
1129
- grid.cancel_join_callback();
1130
- return;
1131
- }
1132
-
1133
- // Insert the block into the cache, and give the evicted block to `iop`.
1134
- const cache_index =
1135
- if (read.cache_write) grid.cache.upsert(&read.address).index else null;
1136
- const block = block: {
1137
- if (read.cache_write) {
1138
- const cache_block = &grid.cache_blocks[cache_index.?];
1139
- std.mem.swap(BlockPtr, iop_block, cache_block);
1140
- // This block content won't be used again. We could overwrite the entire thing,
1141
- // but that would be more expensive.
1142
- @memset(iop_block.*[0..@sizeOf(vsr.Header)], 0);
1143
- break :block cache_block;
1144
- } else {
1145
- break :block iop_block;
1146
- }
1147
- };
1148
-
1149
- // Handoff the iop to a pending read or release it before resolving the callbacks below.
1150
- if (grid.read_pending_queue.pop()) |pending| {
1151
- const queued_read: *Read = @alignCast(@fieldParentPtr("pending", pending));
1152
- grid.read_block_with(iop, queued_read);
1153
- } else {
1154
- grid.read_iops.release(iop);
1155
- }
1156
-
1157
- // Remove the "root" read so that the address is no longer actively reading / locked.
1158
- grid.read_queue.remove(read);
1159
-
1160
- const result = read_block_validate(block.*, .{
1161
- .address = read.address,
1162
- .checksum = read.checksum,
1163
- });
1164
-
1165
- if (result != .valid) {
1166
- const header =
1167
- mem.bytesAsValue(vsr.Header.Block, block.*[0..@sizeOf(vsr.Header)]);
1168
- log.warn(
1169
- "{}: {s}: expected address={} checksum={x:0>32}, " ++
1170
- "found address={} checksum={x:0>32}",
1171
- .{
1172
- grid.superblock.replica_index.?,
1173
- @tagName(result),
1174
- read.address,
1175
- read.checksum,
1176
- header.address,
1177
- header.checksum,
1178
- },
1179
- );
1180
-
1181
- if (read.cache_write) {
1182
- // Don't cache a corrupt or incorrect block.
1183
- const removed = grid.cache.remove(read.address);
1184
- assert(removed != null);
1185
- }
1186
-
1187
- if (constants.verify) grid.verify_read_fault(read);
1188
- }
1189
-
1190
- grid.read_block_resolve(read, result);
1191
- }
1192
-
1193
- fn read_block_validate(block: BlockPtrConst, expect: struct {
1194
- address: u64,
1195
- checksum: u128,
1196
- }) ReadBlockResult {
1197
- const header = mem.bytesAsValue(vsr.Header.Block, block[0..@sizeOf(vsr.Header)]);
1198
-
1199
- if (!header.valid_checksum()) return .invalid_checksum;
1200
- if (header.command != .block) return .unexpected_command;
1201
-
1202
- assert(header.size >= @sizeOf(vsr.Header));
1203
- assert(header.size <= constants.block_size);
1204
-
1205
- const block_body = block[@sizeOf(vsr.Header)..header.size];
1206
- if (!header.valid_checksum_body(block_body)) {
1207
- return .invalid_checksum_body;
1208
- }
1209
-
1210
- if (header.checksum != expect.checksum) return .unexpected_checksum;
1211
-
1212
- if (!stdx.zeroed(block[header.size..vsr.sector_ceil(header.size)])) {
1213
- return .invalid_padding;
1214
- }
1215
-
1216
- assert(header.address == expect.address);
1217
- return .{ .valid = block };
1218
- }
1219
-
1220
- fn read_block_resolve(grid: *Grid, read: *Grid.Read, result: ReadBlockResult) void {
1221
- assert(grid.callback != .cancel);
1222
-
1223
- // Guard to make sure the cache cannot be updated by any read.callbacks() below.
1224
- assert(!grid.read_resolving);
1225
- grid.read_resolving = true;
1226
- defer {
1227
- assert(grid.read_resolving);
1228
- grid.read_resolving = false;
1229
- }
1230
-
1231
- if (read.coherent) {
1232
- assert(!grid.free_set.is_free(read.address));
1233
- assert(read.checkpoint_id == grid.superblock.working.checkpoint_id());
1234
- grid.assert_coherent(read.address, read.checksum);
1235
- }
1236
-
1237
- if (result == .valid) {
1238
- const header = schema.header_from_block(result.valid);
1239
- assert(header.cluster == grid.superblock.working.cluster);
1240
- assert(header.release.value <=
1241
- grid.superblock.working.vsr_state.checkpoint.release.value);
1242
- assert(header.address == read.address);
1243
- assert(header.checksum == read.checksum);
1244
- }
1245
-
1246
- var read_remote_resolves: QueueType(ReadPending) = QueueType(ReadPending).init(.{
1247
- .name = read.resolves.any.name,
1248
- });
1249
-
1250
- // Resolve all reads queued to the address with the block.
1251
- while (read.resolves.pop()) |pending| {
1252
- const pending_read: *Read = @alignCast(@fieldParentPtr("pending", pending));
1253
- assert(pending_read.address == read.address);
1254
- assert(pending_read.checksum == read.checksum);
1255
- if (pending_read.coherent) {
1256
- assert(pending_read.checkpoint_id == grid.superblock.working.checkpoint_id());
1257
- }
1258
-
1259
- switch (pending_read.callback) {
1260
- .from_local_storage => |callback| callback(pending_read, result),
1261
- .from_local_or_global_storage => |callback| {
1262
- if (result == .valid) {
1263
- callback(pending_read, result.valid);
1264
- } else {
1265
- read_remote_resolves.push(&pending_read.pending);
1266
- }
1267
- },
1268
- }
1269
- }
1270
-
1271
- // Then invoke the callback with the cache block (which should be valid for the duration
1272
- // of the callback as any nested Grid calls cannot synchronously update the cache).
1273
- switch (read.callback) {
1274
- .from_local_storage => |callback| callback(read, result),
1275
- .from_local_or_global_storage => |callback| {
1276
- if (result == .valid) {
1277
- callback(read, result.valid);
1278
- } else {
1279
- read_remote_resolves.push(&read.pending);
1280
- }
1281
- },
1282
- }
1283
-
1284
- // On the result of an invalid block, move the "root" read (and all others it
1285
- // resolves) to recovery queue. Future reads on the same address will see the "root"
1286
- // read in the recovery queue and enqueue to it.
1287
- if (read_remote_resolves.pop()) |read_remote_head_pending| {
1288
- const read_remote_head: *Read = @alignCast(
1289
- @fieldParentPtr("pending", read_remote_head_pending),
1290
- );
1291
- assert(read_remote_head.callback == .from_local_or_global_storage);
1292
- assert(read_remote_head.coherent);
1293
-
1294
- log.debug("{}: read_block: fault: address={} checksum={x:0>32}", .{
1295
- grid.superblock.replica_index.?,
1296
- read_remote_head.address,
1297
- read_remote_head.checksum,
1298
- });
1299
-
1300
- read_remote_head.resolves = read_remote_resolves;
1301
- grid.read_global_queue.push(read_remote_head);
1302
-
1303
- if (grid.blocks_missing.repair_blocks_available() > 0) {
1304
- grid.blocks_missing.repair_block(
1305
- read_remote_head.address,
1306
- read_remote_head.checksum,
1307
- );
1308
- }
1309
- }
1310
- }
1311
-
1312
- fn block_offset(address: u64) u64 {
1313
- assert(address > 0);
1314
-
1315
- return (address - 1) * block_size;
1316
- }
1317
-
1318
- fn assert_coherent(grid: *const Grid, address: u64, checksum: u128) void {
1319
- assert(!grid.free_set.is_free(address));
1320
-
1321
- const TestStorage = @import("../testing/storage.zig").Storage;
1322
- if (Storage != TestStorage) return;
1323
-
1324
- if (grid.superblock.storage.options.grid_checker) |checker| {
1325
- checker.assert_coherent(
1326
- &grid.superblock.working.vsr_state.checkpoint,
1327
- grid.free_set.checkpoint_durable,
1328
- address,
1329
- checksum,
1330
- );
1331
-
1332
- checker.assert_coherent(
1333
- &grid.superblock.staging.vsr_state.checkpoint,
1334
- checkpoint_durable: {
1335
- if (grid.superblock.working.checkpoint_id() ==
1336
- grid.superblock.staging.checkpoint_id())
1337
- {
1338
- break :checkpoint_durable grid.free_set.checkpoint_durable;
1339
- } else {
1340
- // Checkpoint is currently being written to the superblock. Pass
1341
- // checkpoint_durable=False as we update free_set.checkpoint_durable
1342
- // only *after* the checkpoint is written to the superblock.
1343
- assert(grid.superblock.staging.parent_checkpoint_id() ==
1344
- grid.superblock.working.checkpoint_id());
1345
- assert(grid.free_set.checkpoint_durable);
1346
- break :checkpoint_durable false;
1347
- }
1348
- },
1349
- address,
1350
- checksum,
1351
- );
1352
- }
1353
- }
1354
-
1355
- fn verify_read_from_cache(
1356
- grid: *const Grid,
1357
- address: u64,
1358
- cached_block: BlockPtrConst,
1359
- ) void {
1360
- comptime assert(constants.verify);
1361
-
1362
- const TestStorage = @import("../testing/storage.zig").Storage;
1363
- if (Storage != TestStorage) return;
1364
-
1365
- const actual_block = grid.superblock.storage.grid_block(address).?;
1366
- const actual_header = schema.header_from_block(actual_block);
1367
- const cached_header = schema.header_from_block(cached_block);
1368
- assert(cached_header.checksum == actual_header.checksum);
1369
-
1370
- assert(std.mem.eql(
1371
- u8,
1372
- cached_block[0..cached_header.size],
1373
- actual_block[0..actual_header.size],
1374
- ));
1375
- }
1376
-
1377
- /// Called when we fail to read a block.
1378
- fn verify_read_fault(grid: *const Grid, read: *const Read) void {
1379
- comptime assert(constants.verify);
1380
-
1381
- const TestStorage = @import("../testing/storage.zig").Storage;
1382
- if (Storage != TestStorage) return;
1383
-
1384
- // Only check coherent reads -- i.e., when we know for certain that the read's
1385
- // address/checksum belongs in our current checkpoint.
1386
- if (!read.coherent) return;
1387
-
1388
- // Check our storage (bypassing faults).
1389
- if (grid.superblock.storage.grid_block(read.address)) |actual_block| {
1390
- const actual_header = schema.header_from_block(actual_block);
1391
- if (actual_header.checksum == read.checksum) {
1392
- // Exact block found. Since the read failed anyway, it must have been a
1393
- // simulated read fault.
1394
- assert(grid.superblock.storage.area_faulty(.{
1395
- .grid = .{ .address = read.address },
1396
- }));
1397
- } else {
1398
- // Different block found -- since this is a coherent read, we must be syncing.
1399
- assert(grid.superblock.working.vsr_state.sync_op_max > 0);
1400
- }
1401
- } else {
1402
- // No block found -- since this is a coherent read, we must by syncing.
1403
- assert(grid.superblock.working.vsr_state.sync_op_max > 0);
1404
- }
1405
- }
1406
-
1407
- /// Mark all blocks in the grid cache as MADV_DONTDUMP. Must be done after transitioning
1408
- /// to static, as the combination of madvise() + mremap() can cause an EFAULT.
1409
- ///
1410
- /// It's OK that some blocks, such as the blocks used by compaction escape this -- this is
1411
- /// not to stop sensitive data from appearing in core dumps, but rather to keep the core
1412
- /// dump size managable even with a large grid cache.
1413
- pub fn madv_dont_dump(grid: *const Grid) !void {
1414
- if (builtin.target.os.tag != .linux) return;
1415
-
1416
- assert(grid.cache_blocks.len > 0);
1417
-
1418
- // Each block could be its own isolated memory mapping, with how things are done
1419
- // using allocate_block(), but it's extremely unlikely. Coalesce them where possible to
1420
- // save on madvise() syscalls.
1421
- var continuous_cache_start = @intFromPtr(grid.cache_blocks[0]);
1422
- var continuous_cache_len = grid.cache_blocks[0].len;
1423
- var madvise_bytes: usize = 0;
1424
- var madvise_calls: usize = 0;
1425
-
1426
- for (grid.cache_blocks[1..]) |cache_block| {
1427
- if (continuous_cache_start + continuous_cache_len == @intFromPtr(cache_block.ptr)) {
1428
- continuous_cache_len += cache_block.len;
1429
- } else {
1430
- try std.posix.madvise(
1431
- @ptrFromInt(continuous_cache_start),
1432
- continuous_cache_len,
1433
- std.posix.MADV.DONTDUMP,
1434
- );
1435
- madvise_bytes += continuous_cache_len;
1436
- madvise_calls += 1;
1437
-
1438
- continuous_cache_start = @intFromPtr(cache_block.ptr);
1439
- continuous_cache_len = cache_block.len;
1440
- }
1441
- }
1442
-
1443
- try std.posix.madvise(
1444
- @ptrFromInt(continuous_cache_start),
1445
- continuous_cache_len,
1446
- std.posix.MADV.DONTDUMP,
1447
- );
1448
- madvise_bytes += continuous_cache_len;
1449
- madvise_calls += 1;
1450
-
1451
- assert(madvise_bytes == constants.block_size * grid.cache_blocks.len);
1452
- assert(madvise_calls <= grid.cache_blocks.len);
1453
-
1454
- log.debug("marked {} bytes as MADV_DONTDUMP with {} calls", .{
1455
- madvise_bytes,
1456
- madvise_calls,
1457
- });
1458
- }
1459
- };
1460
- }