tigerbeetle 0.0.40 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -25
  3. data/README.md +670 -80
  4. data/docs/migration.md +201 -0
  5. data/sig/tigerbeetle.rbs +271 -0
  6. data/src/ext/tigerbeetle/extconf.rb +47 -0
  7. data/src/ext/tigerbeetle/lib/aarch64-linux-gnu.2.27/libtb_client.so +0 -0
  8. data/src/ext/tigerbeetle/lib/aarch64-linux-musl/libtb_client.so +0 -0
  9. data/src/ext/tigerbeetle/lib/aarch64-macos/libtb_client.dylib +0 -0
  10. data/src/ext/tigerbeetle/lib/x86_64-linux-gnu.2.27/libtb_client.so +0 -0
  11. data/src/ext/tigerbeetle/lib/x86_64-linux-musl/libtb_client.so +0 -0
  12. data/src/ext/tigerbeetle/lib/x86_64-macos/libtb_client.dylib +0 -0
  13. data/src/ext/tigerbeetle/lib/x86_64-windows/tb_client.dll +0 -0
  14. data/src/ext/tigerbeetle/rb_tb_gen.h +458 -0
  15. data/{ext/tb_client/tigerbeetle/src/clients/rust/assets → src/ext/tigerbeetle}/tb_client.h +18 -16
  16. data/src/ext/tigerbeetle/tigerbeetle.c +310 -0
  17. data/src/tigerbeetle/bindings.rb +347 -0
  18. data/src/tigerbeetle/client.rb +129 -0
  19. data/src/tigerbeetle/completion_dispatcher.rb +108 -0
  20. data/src/tigerbeetle/id.rb +40 -0
  21. data/src/tigerbeetle/tb.rb +3 -0
  22. data/src/tigerbeetle/version.rb +3 -0
  23. data/src/tigerbeetle.rb +39 -0
  24. metadata +33 -350
  25. data/CHANGELOG.md +0 -162
  26. data/ext/tb_client/extconf.rb +0 -41
  27. data/ext/tb_client/tigerbeetle/LICENSE +0 -177
  28. data/ext/tb_client/tigerbeetle/build.zig +0 -2296
  29. data/ext/tb_client/tigerbeetle/src/aof.zig +0 -1000
  30. data/ext/tb_client/tigerbeetle/src/build/fetch.zig +0 -112
  31. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +0 -808
  32. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +0 -1283
  33. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +0 -1704
  34. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +0 -341
  35. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +0 -1450
  36. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +0 -1659
  37. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +0 -406
  38. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +0 -1092
  39. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +0 -286
  40. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +0 -158
  41. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +0 -229
  42. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +0 -110
  43. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +0 -386
  44. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +0 -34
  45. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +0 -281
  46. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +0 -312
  47. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +0 -138
  48. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +0 -466
  49. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +0 -157
  50. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +0 -90
  51. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +0 -203
  52. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +0 -79
  53. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +0 -542
  54. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +0 -109
  55. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +0 -86
  56. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +0 -370
  57. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +0 -386
  58. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +0 -167
  59. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +0 -126
  60. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +0 -996
  61. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +0 -748
  62. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +0 -3238
  63. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +0 -1718
  64. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +0 -190
  65. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +0 -104
  66. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +0 -75
  67. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +0 -522
  68. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +0 -267
  69. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +0 -3
  70. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +0 -379
  71. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +0 -131
  72. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +0 -63
  73. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +0 -588
  74. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +0 -73
  75. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +0 -106
  76. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +0 -305
  77. data/ext/tb_client/tigerbeetle/src/config.zig +0 -296
  78. data/ext/tb_client/tigerbeetle/src/constants.zig +0 -790
  79. data/ext/tb_client/tigerbeetle/src/copyhound.zig +0 -202
  80. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +0 -72
  81. data/ext/tb_client/tigerbeetle/src/direction.zig +0 -120
  82. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +0 -158
  83. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +0 -156
  84. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +0 -252
  85. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +0 -313
  86. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +0 -87
  87. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +0 -63
  88. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +0 -47
  89. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +0 -28
  90. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +0 -61
  91. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +0 -169
  92. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +0 -46
  93. data/ext/tb_client/tigerbeetle/src/ewah.zig +0 -445
  94. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +0 -128
  95. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +0 -171
  96. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +0 -179
  97. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +0 -662
  98. data/ext/tb_client/tigerbeetle/src/io/common.zig +0 -155
  99. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +0 -1093
  100. data/ext/tb_client/tigerbeetle/src/io/linux.zig +0 -1880
  101. data/ext/tb_client/tigerbeetle/src/io/test.zig +0 -1005
  102. data/ext/tb_client/tigerbeetle/src/io/windows.zig +0 -1598
  103. data/ext/tb_client/tigerbeetle/src/io.zig +0 -34
  104. data/ext/tb_client/tigerbeetle/src/iops.zig +0 -134
  105. data/ext/tb_client/tigerbeetle/src/list.zig +0 -236
  106. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +0 -848
  107. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +0 -179
  108. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +0 -424
  109. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +0 -420
  110. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +0 -2114
  111. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +0 -185
  112. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +0 -1146
  113. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +0 -1102
  114. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +0 -200
  115. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +0 -1495
  116. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +0 -739
  117. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +0 -166
  118. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +0 -754
  119. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +0 -1294
  120. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +0 -510
  121. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +0 -1241
  122. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -628
  123. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +0 -247
  124. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +0 -116
  125. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +0 -543
  126. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +0 -938
  127. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +0 -293
  128. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +0 -359
  129. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +0 -99
  130. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +0 -17
  131. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +0 -962
  132. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +0 -617
  133. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +0 -84
  134. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +0 -1500
  135. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -149
  136. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -7
  137. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +0 -865
  138. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +0 -607
  139. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +0 -843
  140. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +0 -90
  141. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +0 -40
  142. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +0 -629
  143. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +0 -933
  144. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +0 -534
  145. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +0 -469
  146. data/ext/tb_client/tigerbeetle/src/message_bus.zig +0 -1219
  147. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +0 -936
  148. data/ext/tb_client/tigerbeetle/src/message_pool.zig +0 -343
  149. data/ext/tb_client/tigerbeetle/src/multiversion.zig +0 -2195
  150. data/ext/tb_client/tigerbeetle/src/queue.zig +0 -390
  151. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +0 -201
  152. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +0 -1356
  153. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +0 -496
  154. data/ext/tb_client/tigerbeetle/src/repl.zig +0 -1034
  155. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +0 -973
  156. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +0 -1866
  157. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +0 -304
  158. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +0 -227
  159. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +0 -658
  160. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +0 -466
  161. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +0 -1058
  162. data/ext/tb_client/tigerbeetle/src/scripts.zig +0 -105
  163. data/ext/tb_client/tigerbeetle/src/shell.zig +0 -1195
  164. data/ext/tb_client/tigerbeetle/src/stack.zig +0 -260
  165. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +0 -911
  166. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +0 -2079
  167. data/ext/tb_client/tigerbeetle/src/state_machine.zig +0 -4872
  168. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +0 -288
  169. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +0 -3128
  170. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +0 -82
  171. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +0 -157
  172. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +0 -292
  173. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +0 -65
  174. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +0 -1414
  175. data/ext/tb_client/tigerbeetle/src/stdx/huge_page_allocator.zig +0 -115
  176. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +0 -92
  177. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +0 -677
  178. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +0 -336
  179. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +0 -511
  180. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +0 -112
  181. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +0 -1163
  182. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +0 -142
  183. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +0 -361
  184. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +0 -275
  185. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +0 -295
  186. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +0 -436
  187. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +0 -48
  188. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +0 -402
  189. data/ext/tb_client/tigerbeetle/src/storage.zig +0 -489
  190. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +0 -180
  191. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +0 -146
  192. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +0 -53
  193. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +0 -61
  194. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +0 -76
  195. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +0 -110
  196. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +0 -412
  197. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +0 -331
  198. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -458
  199. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +0 -1198
  200. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +0 -128
  201. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +0 -181
  202. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +0 -144
  203. data/ext/tb_client/tigerbeetle/src/testing/id.zig +0 -97
  204. data/ext/tb_client/tigerbeetle/src/testing/io.zig +0 -317
  205. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +0 -126
  206. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +0 -533
  207. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +0 -154
  208. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +0 -389
  209. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +0 -1247
  210. data/ext/tb_client/tigerbeetle/src/testing/table.zig +0 -249
  211. data/ext/tb_client/tigerbeetle/src/testing/time.zig +0 -98
  212. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +0 -212
  213. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +0 -26
  214. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +0 -579
  215. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +0 -39
  216. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +0 -214
  217. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +0 -34
  218. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +0 -785
  219. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +0 -543
  220. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +0 -181
  221. data/ext/tb_client/tigerbeetle/src/tidy.zig +0 -1449
  222. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +0 -227
  223. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +0 -1069
  224. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +0 -1422
  225. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +0 -1658
  226. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +0 -518
  227. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +0 -36
  228. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +0 -646
  229. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +0 -958
  230. data/ext/tb_client/tigerbeetle/src/time.zig +0 -236
  231. data/ext/tb_client/tigerbeetle/src/trace/event.zig +0 -745
  232. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +0 -462
  233. data/ext/tb_client/tigerbeetle/src/trace.zig +0 -556
  234. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +0 -321
  235. data/ext/tb_client/tigerbeetle/src/vopr.zig +0 -1785
  236. data/ext/tb_client/tigerbeetle/src/vortex.zig +0 -101
  237. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +0 -473
  238. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +0 -208
  239. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +0 -43
  240. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +0 -768
  241. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +0 -532
  242. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +0 -338
  243. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +0 -1019
  244. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +0 -279
  245. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +0 -1381
  246. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +0 -315
  247. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +0 -1460
  248. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +0 -757
  249. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +0 -797
  250. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +0 -2586
  251. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +0 -308
  252. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +0 -1777
  253. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +0 -715
  254. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +0 -185
  255. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +0 -333
  256. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +0 -12356
  257. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +0 -416
  258. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +0 -165
  259. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +0 -2928
  260. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +0 -1075
  261. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +0 -1603
  262. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -484
  263. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +0 -405
  264. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -355
  265. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +0 -29
  266. data/ext/tb_client/tigerbeetle/src/vsr.zig +0 -1727
  267. data/lib/tb_client/shared_lib.rb +0 -66
  268. data/lib/tb_client.rb +0 -282
  269. data/lib/tigerbeetle/account.rb +0 -38
  270. data/lib/tigerbeetle/account_balance.rb +0 -23
  271. data/lib/tigerbeetle/account_filter.rb +0 -31
  272. data/lib/tigerbeetle/atomic_counter.rb +0 -14
  273. data/lib/tigerbeetle/client.rb +0 -214
  274. data/lib/tigerbeetle/converters/account.rb +0 -63
  275. data/lib/tigerbeetle/converters/account_balance.rb +0 -31
  276. data/lib/tigerbeetle/converters/account_filter.rb +0 -32
  277. data/lib/tigerbeetle/converters/base.rb +0 -35
  278. data/lib/tigerbeetle/converters/create_accounts_result.rb +0 -21
  279. data/lib/tigerbeetle/converters/create_transfers_result.rb +0 -21
  280. data/lib/tigerbeetle/converters/query_filter.rb +0 -33
  281. data/lib/tigerbeetle/converters/time.rb +0 -23
  282. data/lib/tigerbeetle/converters/transfer.rb +0 -64
  283. data/lib/tigerbeetle/converters/uint_128.rb +0 -24
  284. data/lib/tigerbeetle/converters.rb +0 -12
  285. data/lib/tigerbeetle/error.rb +0 -4
  286. data/lib/tigerbeetle/id.rb +0 -30
  287. data/lib/tigerbeetle/platforms.rb +0 -9
  288. data/lib/tigerbeetle/query_filter.rb +0 -31
  289. data/lib/tigerbeetle/request.rb +0 -7
  290. data/lib/tigerbeetle/transfer.rb +0 -40
  291. data/lib/tigerbeetle/version.rb +0 -4
  292. data/lib/tigerbeetle.rb +0 -13
  293. data/tigerbeetle.gemspec +0 -60
@@ -1,797 +0,0 @@
1
- //! Scrub grid blocks.
2
- //!
3
- //! A "data scrubber" is a background task that gradually/incrementally reads the disk and validates
4
- //! what it finds. Its purpose is to discover faults proactively – as early as possibly – rather
5
- //! than waiting for them to be discovered by normal database operation (e.g. during compaction).
6
- //!
7
- //! The most common type of disk fault is a latent sector error:
8
- //!
9
- //! - A "latent sector error" is the temporary or permanent inability to access the data of a
10
- //! particular sector. That is, the disk as a whole continues to function, but a small section of
11
- //! data is unavailable.
12
- //! - "Latent" refers to: the error is not discoverable until the sector is actually read.
13
- //! - "An Analysis of Latent Sector Errors in Disk Drives" (2007) found that >60% of latent sector
14
- //! errors were discovered by a scrubber that cycles every 2 weeks.
15
- //!
16
- //! Finding and repairing errors proactively minimizes the risk of cluster data loss due to multiple
17
- //! intersecting faults (analogous to a "double-fault") – a scenario where we fail to read a block,
18
- //! and try to repair the block from another replica, only to discover that the copy of the block on
19
- //! the remote replica's disk is *also* faulty.
20
- //!
21
- //! TODO Accelerate scrubbing rate (at runtime) if faults are detected frequently.
22
- const std = @import("std");
23
- const assert = std.debug.assert;
24
- const maybe = stdx.maybe;
25
- const KiB = stdx.KiB;
26
- const TiB = stdx.TiB;
27
- const log = std.log.scoped(.grid_scrubber);
28
-
29
- const stdx = @import("stdx");
30
- const vsr = @import("../vsr.zig");
31
- const constants = @import("../constants.zig");
32
- const schema = @import("../lsm/schema.zig");
33
- const QueueType = @import("../queue.zig").QueueType;
34
- const IOPSType = @import("../iops.zig").IOPSType;
35
-
36
- const allocate_block = @import("./grid.zig").allocate_block;
37
- const GridType = @import("./grid.zig").GridType;
38
- const BlockPtr = @import("./grid.zig").BlockPtr;
39
- const ForestTableIteratorType = @import("../lsm/forest_table_iterator.zig").ForestTableIteratorType;
40
- const TestStorage = @import("../testing/storage.zig").Storage;
41
-
42
- pub fn GridScrubberType(comptime Forest: type, grid_scrubber_reads_max: comptime_int) type {
43
- return struct {
44
- const GridScrubber = @This();
45
- const Grid = GridType(Forest.Storage);
46
- const WrappingForestTableIterator = WrappingForestTableIteratorType(Forest);
47
- const SuperBlock = vsr.SuperBlockType(Forest.Storage);
48
- const ManifestBlockIterator = ManifestBlockIteratorType(Forest.ManifestLog);
49
- const CheckpointTrailer = vsr.CheckpointTrailerType(Forest.Storage);
50
-
51
- pub const BlockId = struct {
52
- block_checksum: u128,
53
- block_address: u64,
54
- block_type: schema.BlockType,
55
- };
56
-
57
- pub const BlockStatus = enum {
58
- /// If `read.done`: The scrub failed – the block must be repaired.
59
- /// If `!read.done`: The scrub is still in progress. (This is the initial state).
60
- repair,
61
- /// The scrub succeeded.
62
- /// Don't repair the block.
63
- ok,
64
- /// The scrub was aborted (the replica is about to state-sync).
65
- /// Don't repair the block.
66
- canceled,
67
- /// The block was freed by a checkpoint in the time that the read was in progress.
68
- /// Don't repair the block.
69
- ///
70
- /// (At checkpoint, the FreeSet frees blocks released during the preceding
71
- /// checkpoint. We can scrub released blocks, but not free blocks. Setting this flag
72
- /// ensures that GridScrubber doesn't require a read-barrier at checkpoint.)
73
- released,
74
- };
75
-
76
- const Read = struct {
77
- scrubber: *GridScrubber,
78
- read: Grid.Read = undefined,
79
- block_type: schema.BlockType,
80
-
81
- status: BlockStatus,
82
-
83
- /// Whether the read is ready to be released.
84
- done: bool,
85
-
86
- /// For `reads_busy`/`reads_done` queues.
87
- link: QueueType(Read).Link = .{},
88
- };
89
-
90
- superblock: *SuperBlock,
91
- forest: *Forest,
92
- client_sessions_checkpoint: *const CheckpointTrailer,
93
-
94
- reads: IOPSType(Read, grid_scrubber_reads_max) = .{},
95
-
96
- /// A list of reads that are in progress.
97
- reads_busy: QueueType(Read) = QueueType(Read).init(.{ .name = "grid_scrubber_reads_busy" }),
98
- /// A list of reads that are ready to be released.
99
- reads_done: QueueType(Read) = QueueType(Read).init(.{ .name = "grid_scrubber_reads_done" }),
100
-
101
- /// Track the progress through the grid.
102
- ///
103
- /// Every full tour...
104
- /// - ...on an idle replica (i.e. not committing) scrubs every acquired block in the grid.
105
- /// - ...on a non-idle replica scrubs all blocks that survived the entire span of the tour
106
- /// without moving to a different level, but may not scrub blocks that were added during
107
- /// the tour or which moved.
108
- tour: union(enum) {
109
- init,
110
- done,
111
- table_index,
112
- table_value: struct {
113
- index_checksum: u128,
114
- index_address: u64,
115
- /// Points to `tour_index_block` once the index block has been read.
116
- index_block: ?BlockPtr = null,
117
- value_block_index: u32 = 0,
118
- },
119
- /// The manifest log tour iterates manifest blocks in reverse order.
120
- /// (To ensure that manifest compaction doesn't lead to missed blocks.)
121
- manifest_log: struct { iterator: ManifestBlockIterator = .init },
122
- free_set_blocks_acquired: struct { index: u32 = 0 },
123
- free_set_blocks_released: struct { index: u32 = 0 },
124
- client_sessions: struct { index: u32 = 0 },
125
- },
126
-
127
- /// When tour == .init, tour_tables == .{}
128
- /// When tour == .done, tour_tables.next() == null.
129
- tour_tables: ?WrappingForestTableIterator,
130
- /// The "offset" within the LSM from which scrubber table iteration cycles begin/end.
131
- /// This varies between replicas to minimize risk of data loss.
132
- tour_tables_origin: ?WrappingForestTableIterator.Origin,
133
-
134
- /// Contains a table index block when tour=table_value.
135
- tour_index_block: BlockPtr,
136
-
137
- /// These counters reset after every tour cycle.
138
- /// NB: tour_blocks_scrubbed_count will include repeat index blocks reads.
139
- /// (See read_next_callback() for more detail.)
140
- tour_blocks_scrubbed_count: u64,
141
-
142
- pub fn init(
143
- allocator: std.mem.Allocator,
144
- forest: *Forest,
145
- client_sessions_checkpoint: *const CheckpointTrailer,
146
- ) error{OutOfMemory}!GridScrubber {
147
- const tour_index_block = try allocate_block(allocator);
148
- errdefer allocator.free(tour_index_block);
149
-
150
- return .{
151
- .superblock = forest.grid.superblock,
152
- .forest = forest,
153
- .client_sessions_checkpoint = client_sessions_checkpoint,
154
- .tour = .init,
155
- .tour_tables = null,
156
- .tour_tables_origin = null,
157
- .tour_index_block = tour_index_block,
158
- .tour_blocks_scrubbed_count = 0,
159
- };
160
- }
161
-
162
- pub fn deinit(scrubber: *GridScrubber, allocator: std.mem.Allocator) void {
163
- allocator.free(scrubber.tour_index_block);
164
-
165
- scrubber.* = undefined;
166
- }
167
-
168
- pub fn open(scrubber: *GridScrubber, prng: *stdx.PRNG) void {
169
- // Compute the tour origin exactly once.
170
- if (scrubber.tour_tables_origin != null) {
171
- return;
172
- }
173
-
174
- // Each replica's scrub origin is chosen independently.
175
- // This reduces the chance that the same block across multiple replicas can bitrot
176
- // without being discovered and repaired by a scrubber.
177
- //
178
- // To accomplish this, try to select an origin uniformly across all blocks:
179
- // - Bias towards levels with more tables.
180
- // - Bias towards trees with more blocks per table.
181
- // - (Though, for ease of implementation, the origin is always at the beginning of a
182
- // tree's level, never in the middle.)
183
- assert(scrubber.tour == .init);
184
-
185
- scrubber.tour_tables_origin = .{
186
- .level = 0,
187
- .tree_id = Forest.tree_infos[0].tree_id,
188
- };
189
-
190
- var reservoir = stdx.PRNG.Reservoir.init();
191
-
192
- for (0..constants.lsm_levels) |level| {
193
- inline for (Forest.tree_infos) |tree_info| {
194
- const tree_id = comptime Forest.tree_id_cast(tree_info.tree_id);
195
- const tree = scrubber.forest.tree_for_id_const(tree_id);
196
- const levels = &tree.manifest.levels;
197
- const tree_level_weight = @as(u64, levels[level].tables.len()) *
198
- tree_info.Tree.Table.index.value_block_count_max;
199
- if (tree_level_weight > 0 and reservoir.replace(prng, tree_level_weight)) {
200
- scrubber.tour_tables_origin = .{
201
- .level = @intCast(level),
202
- .tree_id = tree_info.tree_id,
203
- };
204
- }
205
- }
206
- }
207
-
208
- scrubber.tour_tables = WrappingForestTableIterator.init(scrubber.tour_tables_origin.?);
209
-
210
- log.debug("{}: open: tour_tables_origin.level={} tour_tables_origin.tree_id={}", .{
211
- scrubber.superblock.replica_index.?,
212
- scrubber.tour_tables_origin.?.level,
213
- scrubber.tour_tables_origin.?.tree_id,
214
- });
215
- }
216
-
217
- pub fn cancel(scrubber: *GridScrubber) void {
218
- for ([_]QueueType(Read){ scrubber.reads_busy, scrubber.reads_done }) |reads_fifo| {
219
- var reads_iterator = reads_fifo.iterate();
220
- while (reads_iterator.next()) |read| {
221
- read.status = .canceled;
222
- }
223
- }
224
-
225
- if (scrubber.tour == .table_value) {
226
- // Skip scrubbing the table data; the table may not exist when state sync finishes.
227
- scrubber.tour = .table_index;
228
- }
229
- }
230
-
231
- /// Cancel queued reads to blocks that will be freed, now that the current checkpoint is
232
- /// durable. (The read still runs, but the results will be ignored.)
233
- pub fn checkpoint_durable(scrubber: *GridScrubber) void {
234
- assert(scrubber.superblock.opened);
235
- // GridScrubber.checkpoint_durable() is called immediately before
236
- // FreeSet.mark_checkpoint_durable(). All released blocks are about to be freed.
237
- assert(scrubber.forest.grid.callback == .none);
238
-
239
- for ([_]QueueType(Read){ scrubber.reads_busy, scrubber.reads_done }) |reads_fifo| {
240
- var reads_iterator = reads_fifo.iterate();
241
- while (reads_iterator.next()) |read| {
242
- if (read.status == .repair) {
243
- assert(!scrubber.forest.grid.free_set.is_free(read.read.address));
244
- // Use `to_be_freed_at_checkpoint_durability` instead of `is_released`;
245
- // the latter also contains the blocks that will be released when the
246
- // *next* checkpoint becomes durable. We only need to abort scrubbing for
247
- // blocks that are just about to be freed.
248
- if (scrubber.forest.grid.free_set
249
- .to_be_freed_at_checkpoint_durability(read.read.address))
250
- {
251
- read.status = .released;
252
- }
253
- }
254
- }
255
- }
256
-
257
- if (scrubber.tour == .table_value) {
258
- const index_address = scrubber.tour.table_value.index_address;
259
- assert(!scrubber.forest.grid.free_set.is_free(index_address));
260
-
261
- if (scrubber.forest.grid.free_set
262
- .to_be_freed_at_checkpoint_durability(index_address))
263
- {
264
- // Skip scrubbing the table data, since the table is about to be released.
265
- scrubber.tour = .table_index;
266
- }
267
- }
268
- }
269
-
270
- /// Returns whether or not a new Read was started.
271
- pub fn read_next(scrubber: *GridScrubber) bool {
272
- assert(scrubber.superblock.opened);
273
- assert(scrubber.forest.grid.callback != .cancel);
274
- assert(scrubber.reads_busy.count() + scrubber.reads_done.count() ==
275
- scrubber.reads.executing());
276
- defer assert(scrubber.reads_busy.count() + scrubber.reads_done.count() ==
277
- scrubber.reads.executing());
278
-
279
- if (scrubber.reads.available() == 0) return false;
280
- const block_id = scrubber.tour_next() orelse return false;
281
- scrubber.tour_blocks_scrubbed_count += 1;
282
-
283
- const read = scrubber.reads.acquire().?;
284
- assert(!scrubber.reads_busy.contains(read));
285
- assert(!scrubber.reads_done.contains(read));
286
-
287
- log.debug("{}: read_next: address={} checksum={x:0>32} type={s}", .{
288
- scrubber.superblock.replica_index.?,
289
- block_id.block_address,
290
- block_id.block_checksum,
291
- @tagName(block_id.block_type),
292
- });
293
-
294
- read.* = .{
295
- .scrubber = scrubber,
296
- .block_type = block_id.block_type,
297
- .status = .repair,
298
- .done = false,
299
- };
300
- scrubber.reads_busy.push(read);
301
-
302
- scrubber.forest.grid.read_block(
303
- .{ .from_local_storage = read_next_callback },
304
- &read.read,
305
- block_id.block_address,
306
- block_id.block_checksum,
307
- .{ .cache_read = false, .cache_write = false },
308
- );
309
- return true;
310
- }
311
-
312
- fn read_next_callback(grid_read: *Grid.Read, result: Grid.ReadBlockResult) void {
313
- const read: *Read = @fieldParentPtr("read", grid_read);
314
- const scrubber = read.scrubber;
315
- assert(scrubber.reads_busy.contains(read));
316
- assert(!scrubber.reads_done.contains(read));
317
- assert(!read.done);
318
- maybe(read.status != .repair);
319
-
320
- log.debug("{}: read_next_callback: result={s} " ++
321
- "(address={} checksum={x:0>32} type={s} status={?})", .{
322
- scrubber.superblock.replica_index.?,
323
- @tagName(result),
324
- read.read.address,
325
- read.read.checksum,
326
- @tagName(read.block_type),
327
- read.status,
328
- });
329
-
330
- if (read.status == .repair and
331
- scrubber.tour == .table_value and
332
- scrubber.tour.table_value.index_block == null and
333
- scrubber.tour.table_value.index_checksum == read.read.checksum and
334
- scrubber.tour.table_value.index_address == read.read.address)
335
- {
336
- assert(scrubber.tour.table_value.value_block_index == 0);
337
-
338
- if (result == .valid) {
339
- stdx.copy_disjoint(.inexact, u8, scrubber.tour_index_block, result.valid);
340
- scrubber.tour.table_value.index_block = scrubber.tour_index_block;
341
- } else {
342
- // The scrubber can't scrub the table value blocks until it has the
343
- // corresponding index block. We will wait for the index block, and keep
344
- // re-scrubbing it until it is repaired (or until the block is released by
345
- // a checkpoint).
346
- //
347
- // (Alternatively, we could just skip past the table value blocks, and we will
348
- // come across them again during the next cycle. But waiting for them makes for
349
- // nicer invariants + tests.)
350
- log.debug("{}: read_next_callback: waiting for index repair " ++
351
- "(address={} checksum={x:0>32})", .{
352
- scrubber.superblock.replica_index.?,
353
- read.read.address,
354
- read.read.checksum,
355
- });
356
- }
357
- }
358
-
359
- if (result == .valid) {
360
- if (read.status == .repair) {
361
- read.status = .ok;
362
- }
363
- }
364
-
365
- read.done = true;
366
- scrubber.reads_busy.remove(read);
367
- scrubber.reads_done.push(read);
368
- }
369
-
370
- pub fn read_result_next(scrubber: *GridScrubber) ?struct {
371
- block: BlockId,
372
- status: BlockStatus,
373
- } {
374
- assert(scrubber.reads_busy.count() + scrubber.reads_done.count() ==
375
- scrubber.reads.executing());
376
- defer assert(scrubber.reads_busy.count() + scrubber.reads_done.count() ==
377
- scrubber.reads.executing());
378
-
379
- const read = scrubber.reads_done.pop() orelse return null;
380
- defer scrubber.reads.release(read);
381
-
382
- assert(read.done);
383
-
384
- const block: BlockId = .{
385
- .block_address = read.read.address,
386
- .block_checksum = read.read.checksum,
387
- .block_type = read.block_type,
388
- };
389
- return .{ .block = block, .status = read.status };
390
- }
391
-
392
- fn tour_next(scrubber: *GridScrubber) ?BlockId {
393
- assert(scrubber.superblock.opened);
394
- assert(scrubber.forest.manifest_log.opened);
395
- assert(scrubber.tour_tables_origin != null);
396
-
397
- const tour = &scrubber.tour;
398
- if (tour.* == .init) {
399
- tour.* = .table_index;
400
- }
401
-
402
- if (tour.* == .table_value) {
403
- const index_block = tour.table_value.index_block orelse {
404
- // The table index is `null` if:
405
- // - It was corrupt when we just scrubbed it.
406
- // - Or `grid_scrubber_reads > 1`.
407
- // Keep trying until either we find it, or a checkpoint removes it.
408
- // (See read_next_callback() for more detail.)
409
- return .{
410
- .block_checksum = tour.table_value.index_checksum,
411
- .block_address = tour.table_value.index_address,
412
- .block_type = .index,
413
- };
414
- };
415
-
416
- const index_schema = schema.TableIndex.from(index_block);
417
- const value_block_index = tour.table_value.value_block_index;
418
- if (value_block_index <
419
- index_schema.value_blocks_used(scrubber.tour_index_block))
420
- {
421
- tour.table_value.value_block_index += 1;
422
-
423
- const value_block_addresses =
424
- index_schema.value_addresses_used(scrubber.tour_index_block);
425
- const value_block_checksums =
426
- index_schema.value_checksums_used(scrubber.tour_index_block);
427
- return .{
428
- .block_checksum = value_block_checksums[value_block_index].value,
429
- .block_address = value_block_addresses[value_block_index],
430
- .block_type = .value,
431
- };
432
- } else {
433
- assert(value_block_index ==
434
- index_schema.value_blocks_used(scrubber.tour_index_block));
435
- tour.* = .table_index;
436
- }
437
- }
438
-
439
- if (tour.* == .table_index) {
440
- if (scrubber.tour_tables.?.next(scrubber.forest)) |table_info| {
441
- if (Forest.Storage == TestStorage) {
442
- scrubber.superblock.storage.verify_table(
443
- table_info.address,
444
- table_info.checksum,
445
- );
446
- }
447
-
448
- tour.* = .{ .table_value = .{
449
- .index_checksum = table_info.checksum,
450
- .index_address = table_info.address,
451
- } };
452
-
453
- return .{
454
- .block_checksum = table_info.checksum,
455
- .block_address = table_info.address,
456
- .block_type = .index,
457
- };
458
- } else {
459
- tour.* = .{ .manifest_log = .{} };
460
- }
461
- }
462
-
463
- if (tour.* == .manifest_log) {
464
- if (tour.manifest_log.iterator.next(
465
- &scrubber.forest.manifest_log,
466
- )) |block_reference| {
467
- return .{
468
- .block_checksum = block_reference.checksum,
469
- .block_address = block_reference.address,
470
- .block_type = .manifest,
471
- };
472
- } else {
473
- tour.* = .{ .free_set_blocks_acquired = .{} };
474
- }
475
- }
476
-
477
- if (tour.* == .free_set_blocks_acquired) {
478
- const free_set_trailer = &scrubber.forest.grid.free_set_checkpoint_blocks_acquired;
479
- if (free_set_trailer.callback != .none) return null;
480
- if (tour.free_set_blocks_acquired.index < free_set_trailer.block_count()) {
481
- const index = tour.free_set_blocks_acquired.index;
482
- tour.free_set_blocks_acquired.index += 1;
483
- return .{
484
- .block_checksum = free_set_trailer.block_checksums[index],
485
- .block_address = free_set_trailer.block_addresses[index],
486
- .block_type = .free_set,
487
- };
488
- } else {
489
- // A checkpoint can reduce the number of trailer blocks while we are scrubbing
490
- // the trailer.
491
- maybe(tour.free_set_blocks_acquired.index > free_set_trailer.block_count());
492
- tour.* = .{ .free_set_blocks_released = .{} };
493
- }
494
- }
495
-
496
- if (tour.* == .free_set_blocks_released) {
497
- const free_set_trailer = &scrubber.forest.grid.free_set_checkpoint_blocks_released;
498
- if (free_set_trailer.callback != .none) return null;
499
- if (tour.free_set_blocks_released.index < free_set_trailer.block_count()) {
500
- const index = tour.free_set_blocks_released.index;
501
- tour.free_set_blocks_released.index += 1;
502
- return .{
503
- .block_checksum = free_set_trailer.block_checksums[index],
504
- .block_address = free_set_trailer.block_addresses[index],
505
- .block_type = .free_set,
506
- };
507
- } else {
508
- // A checkpoint can reduce the number of trailer blocks while we are scrubbing
509
- // the trailer.
510
- maybe(tour.free_set_blocks_released.index > free_set_trailer.block_count());
511
- tour.* = .{ .client_sessions = .{} };
512
- }
513
- }
514
-
515
- if (tour.* == .client_sessions) {
516
- const client_sessions = scrubber.client_sessions_checkpoint;
517
- if (client_sessions.callback != .none) return null;
518
- if (tour.client_sessions.index < client_sessions.block_count()) {
519
- const index = tour.client_sessions.index;
520
- tour.client_sessions.index += 1;
521
- return .{
522
- .block_checksum = client_sessions.block_checksums[index],
523
- .block_address = client_sessions.block_addresses[index],
524
- .block_type = .client_sessions,
525
- };
526
- } else {
527
- // A checkpoint can reduce the number of trailer blocks while we are scrubbing
528
- // the trailer.
529
- maybe(tour.client_sessions.index > client_sessions.block_count());
530
- tour.* = .done;
531
- }
532
- }
533
-
534
- // Note that this is just the end of the tour.
535
- // (Some of the cycle's reads may still be in progress).
536
- log.debug("{}: tour_next: cycle done (toured_blocks={})", .{
537
- scrubber.superblock.replica_index.?,
538
- scrubber.tour_blocks_scrubbed_count,
539
- });
540
-
541
- assert(tour.* == .done);
542
- return null;
543
- }
544
-
545
- pub fn wrap(scrubber: *GridScrubber) void {
546
- assert(scrubber.tour == .done);
547
-
548
- scrubber.tour = .init;
549
-
550
- scrubber.tour_tables = WrappingForestTableIterator.init(scrubber.tour_tables_origin.?);
551
- scrubber.tour_blocks_scrubbed_count = 0;
552
- }
553
- };
554
- }
555
-
556
- fn WrappingForestTableIteratorType(comptime Forest: type) type {
557
- return struct {
558
- const WrappingForestTableIterator = @This();
559
- const ForestTableIterator = ForestTableIteratorType(Forest);
560
-
561
- origin: Origin,
562
- tables: ForestTableIterator,
563
- wrapped: bool,
564
-
565
- pub const Origin = struct {
566
- level: u6,
567
- tree_id: u16,
568
- };
569
-
570
- pub fn init(origin: Origin) WrappingForestTableIterator {
571
- return .{
572
- .origin = origin,
573
- .tables = .{
574
- .level = origin.level,
575
- .tree_id = origin.tree_id,
576
- },
577
- .wrapped = false,
578
- };
579
- }
580
-
581
- pub fn next(
582
- iterator: *WrappingForestTableIterator,
583
- forest: *const Forest,
584
- ) ?schema.ManifestNode.TableInfo {
585
- const table = iterator.tables.next(forest) orelse {
586
- if (iterator.wrapped) {
587
- return null;
588
- } else {
589
- iterator.wrapped = true;
590
- iterator.tables = .{};
591
- return iterator.tables.next(forest);
592
- }
593
- };
594
-
595
- if (iterator.wrapped and
596
- iterator.origin.level <= table.label.level and
597
- iterator.origin.tree_id <= table.tree_id)
598
- {
599
- return null;
600
- }
601
- return table;
602
- }
603
- };
604
- }
605
-
606
- /// Iterate over every manifest block address/checksum in the manifest log.
607
- ///
608
- /// This iterator is stable across ManifestLog mutation – that is, it is guaranteed to iterate over
609
- /// every manifest block that survives the entire iteration.
610
- fn ManifestBlockIteratorType(comptime ManifestLog: type) type {
611
- return union(enum) {
612
- const ManifestBlockIterator = @This();
613
-
614
- init,
615
- done,
616
- state: struct {
617
- /// The last-known index (within the manifest blocks) of the address/checksum.
618
- index: u32,
619
- /// The address/checksum of the most-recently iterated manifest block.
620
- address: u64,
621
- checksum: u128,
622
- },
623
-
624
- fn next(
625
- iterator: *ManifestBlockIterator,
626
- manifest_log: *const ManifestLog,
627
- ) ?vsr.BlockReference {
628
- // Don't scrub the trailing `blocks_closed`; they are not yet flushed to disk.
629
- const log_block_count: u32 =
630
- @intCast(manifest_log.log_block_addresses.count - manifest_log.blocks_closed);
631
-
632
- const position: ?u32 = switch (iterator.*) {
633
- .done => null,
634
- .init => if (log_block_count == 0) null else log_block_count - 1,
635
- .state => |state| position: {
636
- // `index` may be beyond the limit due to blocks removed by manifest compaction.
637
- maybe(state.index >= log_block_count);
638
-
639
- // The block that we most recently scrubbed may:
640
- // - be in the same position, or
641
- // - have shifted earlier in the list (due to manifest compaction), or
642
- // - have been removed from the list (due to manifest compaction).
643
- // Use the block's old position to find its current position.
644
- var position: u32 = @min(state.index, log_block_count -| 1);
645
- while (position > 0) : (position -= 1) {
646
- if (manifest_log.log_block_addresses.get(position).? == state.address and
647
- manifest_log.log_block_checksums.get(position).? == state.checksum)
648
- {
649
- break :position if (position == 0) null else position - 1;
650
- }
651
- } else {
652
- break :position null;
653
- }
654
- },
655
- };
656
-
657
- if (position) |index| {
658
- iterator.* = .{ .state = .{
659
- .index = index,
660
- .address = manifest_log.log_block_addresses.get(index).?,
661
- .checksum = manifest_log.log_block_checksums.get(index).?,
662
- } };
663
-
664
- return .{
665
- .address = iterator.state.address,
666
- .checksum = iterator.state.checksum,
667
- };
668
- } else {
669
- iterator.* = .done;
670
- return null;
671
- }
672
- }
673
- };
674
- }
675
-
676
- // Model the probability that the cluster experiences data loss due to bitrot.
677
- // Specifically, that *every* copy of *any* block is corrupted before the scrubber can repair it.
678
- //
679
- // Optimistic assumptions (see below):
680
- // - Faults are independent between replicas. ¹
681
- // - Faults are independent (i.e. uncorrelated) in space and time. ²
682
- //
683
- // Pessimistic assumptions:
684
- // - There are only 3 (quorum_replication) copies of each sector.
685
- // - Scrub randomization is ignored.
686
- // - The simulated fault rate is much greater than a real disk's. (See `sector_faults_per_year`).
687
- // - Reads, writes, and repairs due to other workloads (besides the scrubber) are not modeled.
688
- // - All blocks are always full (512KiB).
689
- //
690
- // ¹: To mitigate the risk of correlated errors in production, replicas could use different SSD
691
- // (hardware) models.
692
- //
693
- // ²: SSD faults are not independent (in either time or space).
694
- // See, for example:
695
- // - "An In-Depth Study of Correlated Failures in Production SSD-Based Data Centers"
696
- // (https://www.usenix.org/system/files/fast21-han.pdf)
697
- // - "Flash Reliability in Production: The Expected and the Unexpected"
698
- // (https://www.usenix.org/system/files/conference/fast16/fast16-papers-schroeder.pdf)
699
- // That being said, for the purposes of modeling scrubbing, it is a decent approximation because
700
- // blocks are large relative to sectors. (Additionally, blocks that are written together are often
701
- // scrubbed together).
702
- test "GridScrubber cycle interval" {
703
- // Parameters:
704
-
705
- // The number of years that the test is "running". As the test runs longer, the probability that
706
- // the cluster will experience data loss increases.
707
- const test_duration_years = 20;
708
-
709
- // The number of days between scrubs of a particular sector.
710
- // Equivalently, the number of days to scrub the entire data file.
711
- const cycle_interval_days = 180;
712
-
713
- // The total size of the data file.
714
- // Note that since this parameter is separate from the faults/year rate, increasing
715
- // `storage_size` actually reduces the likelihood of data loss.
716
- const storage_size = 16 * TiB;
717
-
718
- // The expected (average) number of sector faults per year.
719
- // I can't find any good, recent statistics for faults on SSDs.
720
- //
721
- // Most papers express the fault rate as "UBER" (uncorrectable bit errors per total bits read).
722
- // But "Flash Reliability in Production: The Expected and the Unexpected" §5.1 finds that
723
- // UBER's underlying assumption ­ that the uncorrectable errors is correlated to the number of
724
- // bytes read ­ is false. (That paper only shares "fraction of drives affected by an error",
725
- // which is too coarse for this model's purposes.)
726
- //
727
- // Instead, the parameter is chosen conservatively ­ greater than the "true" number by at least
728
- // an order of magnitude.
729
- const sector_faults_per_year = 10_000;
730
-
731
- // A block has multiple sectors. If any of a block's sectors are corrupt, then the block is
732
- // corrupt.
733
- //
734
- // Increasing this parameter increases the likelihood of eventual data loss.
735
- // (Intuitively, a single bitrot within 1GiB is more likely than a single bitrot within 1KiB.)
736
- const block_size = 512 * KiB;
737
-
738
- // The total number of copies of each sector.
739
- // The cluster is recoverable if a sector's number of faults is less than `replicas_total`.
740
- // Set to 3 rather than 6 since 3 is the quorum_replication.
741
- const replicas_total = 3;
742
-
743
- const sector_size = constants.sector_size;
744
-
745
- // Computation:
746
-
747
- const block_sectors = @divExact(block_size, sector_size);
748
- const storage_sectors = @divExact(storage_size, sector_size);
749
- const storage_blocks = @divExact(storage_size, block_size);
750
- const test_duration_days = test_duration_years * 365;
751
- const test_duration_cycles = stdx.div_ceil(test_duration_days, cycle_interval_days);
752
- const sector_faults_per_cycle =
753
- stdx.div_ceil(sector_faults_per_year * cycle_interval_days, 365);
754
-
755
- // P(a specific block is uncorrupted for an entire cycle)
756
- // If any of the block's sectors is corrupted, then the whole block is corrupted.
757
- const p_block_healthy_per_cycle = std.math.pow(
758
- f64,
759
- @as(f64, @floatFromInt(storage_sectors - block_sectors)) /
760
- @as(f64, @floatFromInt(storage_sectors)),
761
- @as(f64, @floatFromInt(sector_faults_per_cycle)),
762
- );
763
-
764
- const p_block_corrupt_per_cycle = 1.0 - p_block_healthy_per_cycle;
765
- // P(a specific block is corrupted on all replicas during a single cycle)
766
- const p_cluster_block_corrupt_per_cycle =
767
- std.math.pow(f64, p_block_corrupt_per_cycle, @as(f64, @floatFromInt(replicas_total)));
768
- // P(a specific block is uncorrupted on at least one replica during a single cycle)
769
- const p_cluster_block_healthy_per_cycle = 1.0 - p_cluster_block_corrupt_per_cycle;
770
-
771
- // P(a specific block is uncorrupted on at least one replica for all cycles)
772
- // Note that each cycle can be considered independently because we assume that if is at the end
773
- // of the cycle there is at least one healthy copy, then all of the corrupt copies are repaired.
774
- const p_cluster_block_healthy_per_span = std.math.pow(
775
- f64,
776
- p_cluster_block_healthy_per_cycle,
777
- @as(f64, @floatFromInt(test_duration_cycles)),
778
- );
779
-
780
- // P(each block is uncorrupted on at least one replica for all cycles)
781
- const p_cluster_blocks_healthy_per_span = std.math.pow(
782
- f64,
783
- p_cluster_block_healthy_per_span,
784
- @as(f64, @floatFromInt(storage_blocks)),
785
- );
786
-
787
- // P(at some point during all cycles, at least one block is corrupt across all replicas)
788
- // In other words, P(eventual data loss).
789
- const p_cluster_blocks_corrupt_per_span = 1.0 - p_cluster_blocks_healthy_per_span;
790
-
791
- const Snap = stdx.Snap;
792
- const snap = Snap.snap_fn("src");
793
-
794
- try snap(@src(),
795
- \\4.3582921528e-3
796
- ).diff_fmt("{e:.10}", .{p_cluster_blocks_corrupt_per_span});
797
- }