tigerbeetle 0.0.34 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/client.rb +1 -1
  245. data/lib/tigerbeetle/platforms.rb +9 -0
  246. data/lib/tigerbeetle/version.rb +2 -2
  247. data/tigerbeetle.gemspec +22 -5
  248. metadata +242 -3
  249. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,757 @@
1
+ //! Track corrupt/missing grid blocks.
2
+ //!
3
+ //! - The GridBlocksMissing is LSM-aware: it can repair entire tables.
4
+ //! - The GridBlocksMissing is shared by all Trees.
5
+ //! - The GridBlocksMissing is "coherent" – that is, all of the blocks in the queue belong in the
6
+ //! replica's current checkpoint:
7
+ //! - The GridBlocksMissing will not repair freed blocks.
8
+ //! - The GridBlocksMissing will repair released blocks, until they are freed at the checkpoint.
9
+ //! - GridBlocksMissing.sync_table() is called immediately after superblock sync.
10
+ //! - GridBlocksMissing.repair_block() is called by the grid when non-repair reads encounter
11
+ //! corrupt blocks.
12
+ const std = @import("std");
13
+ const assert = std.debug.assert;
14
+ const maybe = stdx.maybe;
15
+
16
+ const constants = @import("../constants.zig");
17
+ const stdx = @import("stdx");
18
+ const schema = @import("../lsm/schema.zig");
19
+ const vsr = @import("../vsr.zig");
20
+
21
+ const QueueType = @import("../queue.zig").QueueType;
22
+ const BlockPtrConst = *align(constants.sector_size) const [constants.block_size]u8;
23
+
24
+ pub const GridBlocksMissing = struct {
25
+ /// A block is removed from the collection when:
26
+ /// - the block's write completes, or
27
+ /// - the block is released and the release is checkpointed, or
28
+ /// - the grid is canceled.
29
+ ///
30
+ /// The map is keyed by block address.
31
+ const FaultyBlocks = std.AutoArrayHashMapUnmanaged(u64, FaultyBlock);
32
+
33
+ const FaultyBlock = struct {
34
+ checksum: u128,
35
+ cause: Cause,
36
+ /// Transitions:
37
+ /// - Initial state is `waiting`.
38
+ /// - `waiting → writing` when the block arrives and begins to repair.
39
+ /// - `writing → aborting` when checkpoint becomes durable and the (writing) block is to be
40
+ /// freed.
41
+ state: enum { waiting, writing, aborting } = .waiting,
42
+
43
+ const Cause = union(enum) {
44
+ /// Repair a single block.
45
+ ///
46
+ /// Originates from one of:
47
+ /// - the grid scrubber
48
+ /// - a grid read during prefetch/compaction
49
+ /// - a grid read while opening the grid/forest
50
+ repair,
51
+ /// State syncing the index or a value block of a table.
52
+ /// NB: when a replica decides to sync a block, it might already be repairing.
53
+ sync: struct {
54
+ table: *RepairTable,
55
+ block: union(enum) {
56
+ table_index,
57
+ /// The index of the value block within the index block.
58
+ table_value: u32,
59
+ },
60
+ },
61
+ };
62
+ };
63
+
64
+ pub const RepairTable = struct {
65
+ table_info: schema.ManifestNode.TableInfo,
66
+ /// Invariants:
67
+ /// - value_blocks_received.count < table_blocks_total
68
+ /// - value_blocks_received.capacity = constants.lsm_table_value_blocks_max
69
+ /// TODO(Congestion control): This bitset is currently used only for extra validation.
70
+ /// Eventually we should request tables using this + EWAH encoding, instead of
71
+ /// block-by-block.
72
+ value_blocks_received: *std.DynamicBitSetUnmanaged,
73
+ /// This count includes the index block.
74
+ /// Invariants:
75
+ /// - table_blocks_written ≤ table_blocks_total
76
+ table_blocks_written: u32 = 0,
77
+ /// When null, the table is awaiting an index block.
78
+ /// When non-null, the table is awaiting value blocks.
79
+ /// This count includes the index block.
80
+ table_blocks_total: ?u32 = null,
81
+ /// For `faulty_tables`/`faulty_tables_free` queues.
82
+ link: QueueType(RepairTable).Link = .{},
83
+ };
84
+
85
+ pub const Options = struct {
86
+ /// Lower-bound for the limit of concurrent repair_block()'s available.
87
+ blocks_max: usize,
88
+ /// Maximum number of concurrent sync_table()'s.
89
+ tables_max: usize,
90
+ };
91
+
92
+ options: Options,
93
+
94
+ /// Invariants:
95
+ /// - For every block address in faulty_blocks, ¬free_set.is_free(address).
96
+ faulty_blocks: FaultyBlocks,
97
+ /// Index within `faulty_blocks`, used to cycle through block-repair requests.
98
+ ///
99
+ /// Invariants:
100
+ /// - faulty_blocks.count() > 0 implies faulty_blocks_repair_index < faulty_blocks.count()
101
+ /// - faulty_blocks.count() = 0 implies faulty_blocks_repair_index = faulty_blocks.count()
102
+ faulty_blocks_repair_index: usize = 0,
103
+
104
+ /// On `sync_jump_commence()` and `sync_complete()`, swap this with `faulty_blocks` so that the
105
+ /// (possibly invalid) table blocks don't interfere.
106
+ ///
107
+ /// See state.sync_jump for more information.
108
+ syncing_faulty_blocks: FaultyBlocks,
109
+
110
+ /// Invariants:
111
+ /// - enqueued_blocks_sync + enqueued_blocks_repair =
112
+ /// faulty_blocks.count() + syncing_faulty_blocks.count()
113
+ /// - enqueued_blocks_sync ≤ options.tables_max * lsm_table_content_blocks_max
114
+ enqueued_blocks_repair: usize = 0,
115
+ enqueued_blocks_sync: usize = 0,
116
+
117
+ /// Invariants:
118
+ /// - For every index address in faulty_tables: ¬free_set.is_free(address).
119
+ /// - A given RepairTable is never in both `faulty_tables` and `faulty_tables_free`.
120
+ /// - `faulty_tables` does not contain multiple items with the same underlying table
121
+ /// (address/checksum).
122
+ faulty_tables: QueueType(RepairTable) = QueueType(RepairTable).init(.{
123
+ .name = "grid_missing_blocks_tables",
124
+ }),
125
+ faulty_tables_free: QueueType(RepairTable) = QueueType(RepairTable).init(.{
126
+ .name = "grid_missing_blocks_tables_free",
127
+ }),
128
+
129
+ state: union(enum) {
130
+ repairing,
131
+ /// Set while the replica is syncing its superblock and opening its grid/forest.
132
+ ///
133
+ /// While `state=sync_jump`, only repair single blocks, not tables. Table blocks are
134
+ /// temporarily relegated to syncing_faulty_blocks:
135
+ /// - When state≠sync_jump, faulty_blocks=big and syncing_faulty_blocks=small/unused.
136
+ /// - When state=sync_jump, faulty_blocks=small and syncing_faulty_blocks=big.
137
+ ///
138
+ /// When we finish with state=sync_jump:
139
+ /// - For any table belonging in the new checkpoint: pick up repair where we left off.
140
+ /// - For any table not belonging in the new checkpoint: cancel.
141
+ sync_jump,
142
+ checkpoint_durable: struct {
143
+ /// The number of faulty_blocks with state=aborting.
144
+ aborting: u64,
145
+ },
146
+ } = .repairing,
147
+
148
+ pub fn init(
149
+ allocator: std.mem.Allocator,
150
+ options: Options,
151
+ ) error{OutOfMemory}!GridBlocksMissing {
152
+ var faulty_blocks = FaultyBlocks{};
153
+ errdefer faulty_blocks.deinit(allocator);
154
+
155
+ var syncing_faulty_blocks = FaultyBlocks{};
156
+ errdefer syncing_faulty_blocks.deinit(allocator);
157
+
158
+ try faulty_blocks.ensureTotalCapacity(
159
+ allocator,
160
+ options.blocks_max + options.tables_max * constants.lsm_table_value_blocks_max,
161
+ );
162
+ // During state=sync_jump, we only need to sync single blocks, not full tables.
163
+ // (This sounds backwards! But the reason is that state=sync_jump corresponds to grid
164
+ // cancellation + checkpoint replacement, not table/content sync. We repair missing blocks
165
+ // from the free set and checkpoint trailers.)
166
+ try syncing_faulty_blocks.ensureTotalCapacity(allocator, options.blocks_max);
167
+
168
+ return GridBlocksMissing{
169
+ .options = options,
170
+ .faulty_blocks = faulty_blocks,
171
+ .syncing_faulty_blocks = syncing_faulty_blocks,
172
+ };
173
+ }
174
+
175
+ pub fn deinit(queue: *GridBlocksMissing, allocator: std.mem.Allocator) void {
176
+ queue.syncing_faulty_blocks.deinit(allocator);
177
+ queue.faulty_blocks.deinit(allocator);
178
+
179
+ queue.* = undefined;
180
+ }
181
+
182
+ pub fn verify(queue: *const GridBlocksMissing) void {
183
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
184
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
185
+ assert(queue.faulty_blocks_repair_index == 0 or
186
+ queue.faulty_blocks_repair_index < queue.faulty_blocks.count());
187
+
188
+ var enqueued_blocks_repair: u32 = 0;
189
+ var enqueued_blocks_sync: u32 = 0;
190
+ var enqueued_blocks_aborting: u32 = 0;
191
+ for ([_]FaultyBlocks{
192
+ queue.faulty_blocks,
193
+ queue.syncing_faulty_blocks,
194
+ }) |faulty_blocks| {
195
+ for (faulty_blocks.values()) |fault| {
196
+ switch (fault.cause) {
197
+ .sync => enqueued_blocks_sync += 1,
198
+ .repair => enqueued_blocks_repair += 1,
199
+ }
200
+ enqueued_blocks_aborting += @intFromBool(fault.state == .aborting);
201
+
202
+ switch (fault.cause) {
203
+ .repair => {},
204
+ .sync => |sync| {
205
+ // These are not exclusive because the replica may reuse a RepairTable while
206
+ // we are still aborting the old blocks.
207
+ assert(queue.faulty_tables.contains(sync.table) or
208
+ (fault.state == .aborting));
209
+ },
210
+ }
211
+ }
212
+ }
213
+ assert(queue.enqueued_blocks_repair == enqueued_blocks_repair);
214
+ assert(queue.enqueued_blocks_sync == enqueued_blocks_sync);
215
+ if (enqueued_blocks_sync == 0) assert(queue.faulty_tables.empty());
216
+
217
+ if (queue.state == .checkpoint_durable) {
218
+ assert(enqueued_blocks_aborting == queue.state.checkpoint_durable.aborting);
219
+ } else {
220
+ assert(enqueued_blocks_aborting == 0);
221
+ }
222
+
223
+ assert(queue.syncing_faulty_blocks.capacity() != queue.faulty_blocks.capacity());
224
+ if (queue.state == .sync_jump) {
225
+ assert(queue.syncing_faulty_blocks.capacity() > queue.faulty_blocks.capacity());
226
+ } else {
227
+ assert(queue.syncing_faulty_blocks.capacity() < queue.faulty_blocks.capacity());
228
+ assert(queue.syncing_faulty_blocks.count() == 0);
229
+ }
230
+
231
+ var faulty_tables_free = queue.faulty_tables_free.iterate();
232
+ while (faulty_tables_free.next()) |table_free| {
233
+ assert(!queue.faulty_tables.contains(table_free));
234
+ }
235
+ }
236
+
237
+ /// Note that returning `null` doesn't necessarily indicate that there are no more blocks.
238
+ pub fn next_request(queue: *GridBlocksMissing) ?vsr.BlockRequest {
239
+ assert(queue.faulty_blocks.count() > 0);
240
+ assert(queue.faulty_blocks_repair_index < queue.faulty_blocks.count());
241
+
242
+ const fault_addresses = queue.faulty_blocks.keys();
243
+ const fault_data = queue.faulty_blocks.values();
244
+ const fault_index = queue.faulty_blocks_repair_index;
245
+
246
+ queue.faulty_blocks_repair_index = (fault_index + 1) % queue.faulty_blocks.count();
247
+
248
+ return switch (fault_data[fault_index].state) {
249
+ .waiting => .{
250
+ .block_address = fault_addresses[fault_index],
251
+ .block_checksum = fault_data[fault_index].checksum,
252
+ },
253
+ .writing => null,
254
+ .aborting => null,
255
+ };
256
+ }
257
+
258
+ pub fn reclaim_table(queue: *GridBlocksMissing) ?*RepairTable {
259
+ const table = queue.faulty_tables_free.pop() orelse return null;
260
+ table.value_blocks_received.unsetAll();
261
+ return table;
262
+ }
263
+
264
+ /// Count the number of *non-table* block repairs available.
265
+ pub fn repair_blocks_available(queue: *const GridBlocksMissing) usize {
266
+ assert(queue.faulty_tables.count() <= queue.options.tables_max);
267
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
268
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
269
+ assert(queue.enqueued_blocks_sync <=
270
+ queue.options.tables_max * constants.lsm_table_value_blocks_max);
271
+
272
+ if (queue.state == .sync_jump) {
273
+ const faulty_blocks_free =
274
+ queue.faulty_blocks.capacity() -
275
+ queue.enqueued_blocks_repair;
276
+ return faulty_blocks_free;
277
+ } else {
278
+ const faulty_blocks_free =
279
+ queue.faulty_blocks.capacity() -
280
+ queue.enqueued_blocks_repair -
281
+ queue.options.tables_max * constants.lsm_table_value_blocks_max;
282
+ return faulty_blocks_free;
283
+ }
284
+ }
285
+
286
+ /// Queue a faulty block to request from the cluster and repair.
287
+ pub fn repair_block(queue: *GridBlocksMissing, address: u64, checksum: u128) void {
288
+ assert(queue.repair_blocks_available() > 0);
289
+ assert(queue.faulty_tables.count() <= queue.options.tables_max);
290
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
291
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
292
+
293
+ const enqueue = queue.enqueue_faulty_block(address, checksum, .repair);
294
+ switch (enqueue) {
295
+ .insert => {},
296
+ .duplicate => {},
297
+ .replace => assert(queue.state == .sync_jump),
298
+ }
299
+ }
300
+
301
+ pub fn sync_table(
302
+ queue: *GridBlocksMissing,
303
+ table: *RepairTable,
304
+ table_bitset: *std.DynamicBitSetUnmanaged,
305
+ table_info: *const schema.ManifestNode.TableInfo,
306
+ ) enum { insert, duplicate } {
307
+ assert(queue.state == .repairing or queue.state == .checkpoint_durable);
308
+ assert(queue.faulty_tables.count() < queue.options.tables_max);
309
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
310
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
311
+ assert(table_bitset.capacity() == constants.lsm_table_value_blocks_max);
312
+ assert(table_bitset.count() == 0);
313
+
314
+ const address = table_info.address;
315
+ const checksum = table_info.checksum;
316
+
317
+ var tables = queue.faulty_tables.iterate();
318
+ while (tables.next()) |queue_table| {
319
+ assert(queue_table != table);
320
+ assert(queue_table.value_blocks_received != table_bitset);
321
+
322
+ if (queue_table.table_info.address == address) {
323
+ // The ForestTableIterator does not repeat tables *except* when the table was first
324
+ // encountered at level L, and then it was re-encountered having moved to a deeper
325
+ // level (L+1, etc).
326
+ assert(queue_table.table_info.checksum == checksum);
327
+ return .duplicate;
328
+ }
329
+ }
330
+
331
+ table.* = .{
332
+ .table_info = table_info.*,
333
+ .value_blocks_received = table_bitset,
334
+ };
335
+ queue.faulty_tables.push(table);
336
+
337
+ const enqueue = queue.enqueue_faulty_block(address, checksum, .{
338
+ .sync = .{ .table = table, .block = .table_index },
339
+ });
340
+ assert(enqueue == .insert or enqueue == .replace);
341
+
342
+ return .insert;
343
+ }
344
+
345
+ fn enqueue_faulty_block(
346
+ queue: *GridBlocksMissing,
347
+ address: u64,
348
+ checksum: u128,
349
+ cause: FaultyBlock.Cause,
350
+ ) union(enum) {
351
+ insert,
352
+ replace: *FaultyBlock,
353
+ duplicate,
354
+ } {
355
+ assert(queue.faulty_tables.count() <= queue.options.tables_max);
356
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
357
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
358
+
359
+ defer {
360
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
361
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
362
+ }
363
+
364
+ const fault_result = queue.faulty_blocks.getOrPutAssumeCapacity(address);
365
+ if (fault_result.found_existing) {
366
+ const fault = fault_result.value_ptr;
367
+ assert(fault.checksum == checksum);
368
+ assert(fault.state != .aborting);
369
+
370
+ switch (cause) {
371
+ .repair => return .duplicate,
372
+ .sync => {
373
+ // The value block may already have been queued by either the scrubber or a
374
+ // commit/compaction grid read.
375
+ assert(fault.cause == .repair);
376
+
377
+ queue.enqueued_blocks_repair -= 1;
378
+ queue.enqueued_blocks_sync += 1;
379
+ fault.cause = cause;
380
+ return .{ .replace = fault };
381
+ },
382
+ }
383
+ } else {
384
+ switch (cause) {
385
+ .repair => queue.enqueued_blocks_repair += 1,
386
+ .sync => queue.enqueued_blocks_sync += 1,
387
+ }
388
+
389
+ fault_result.value_ptr.* = .{
390
+ .checksum = checksum,
391
+ .cause = cause,
392
+ };
393
+ return .insert;
394
+ }
395
+ }
396
+
397
+ pub fn repairing_blocks(queue: *const GridBlocksMissing) bool {
398
+ return queue.repairing_tables() or queue.enqueued_blocks_repair > 0;
399
+ }
400
+
401
+ pub fn repairing_tables(queue: *const GridBlocksMissing) bool {
402
+ return queue.state != .sync_jump and queue.enqueued_blocks_sync > 0;
403
+ }
404
+
405
+ pub fn block_waiting(queue: *const GridBlocksMissing, address: u64, checksum: u128) bool {
406
+ const fault_index = queue.faulty_blocks.getIndex(address) orelse return false;
407
+ const fault = &queue.faulty_blocks.values()[fault_index];
408
+ return fault.checksum == checksum and fault.state == .waiting;
409
+ }
410
+
411
+ pub fn write_commence(queue: *GridBlocksMissing, address: u64, checksum: u128) void {
412
+ assert(queue.block_waiting(address, checksum));
413
+ maybe(queue.state == .checkpoint_durable);
414
+ maybe(queue.state == .sync_jump);
415
+
416
+ const fault_index = queue.faulty_blocks.getIndex(address).?;
417
+ const fault = &queue.faulty_blocks.values()[fault_index];
418
+ assert(fault.checksum == checksum);
419
+ assert(fault.state == .waiting);
420
+ if (queue.state == .sync_jump) assert(fault.cause == .repair);
421
+
422
+ if (fault.cause == .sync and
423
+ fault.cause.sync.block == .table_value)
424
+ {
425
+ const cause = &fault.cause.sync;
426
+ assert(cause.table.table_blocks_written < cause.table.table_blocks_total.?);
427
+ assert(!cause.table.value_blocks_received.isSet(cause.block.table_value));
428
+
429
+ cause.table.value_blocks_received.set(cause.block.table_value);
430
+ }
431
+
432
+ fault.state = .writing;
433
+ }
434
+
435
+ pub fn write_complete(queue: *GridBlocksMissing, block: BlockPtrConst) void {
436
+ const block_header = schema.header_from_block(block);
437
+ const fault_index = queue.faulty_blocks.getIndex(block_header.address).?;
438
+ const fault_address = queue.faulty_blocks.keys()[fault_index];
439
+ const fault: FaultyBlock = queue.faulty_blocks.values()[fault_index];
440
+ assert(fault_address == block_header.address);
441
+ assert(fault.checksum == block_header.checksum);
442
+ assert(fault.state == .aborting or fault.state == .writing);
443
+ if (queue.state == .sync_jump) assert(fault.cause == .repair);
444
+
445
+ queue.release_fault(fault_index);
446
+
447
+ if (fault.state == .aborting) {
448
+ queue.state.checkpoint_durable.aborting -= 1;
449
+ return;
450
+ }
451
+
452
+ switch (fault.cause) {
453
+ .repair => {
454
+ maybe(queue.state == .sync_jump);
455
+ },
456
+ .sync => |sync| {
457
+ switch (sync.block) {
458
+ .table_index => {
459
+ assert(queue.state != .sync_jump);
460
+ assert(sync.table.value_blocks_received.count() == 0);
461
+
462
+ // The reason that the value blocks are queued here (when the write ends)
463
+ // rather than when the write begins is so that a `repair_block()` can be
464
+ // converted to a `sync_table()` after the former's write is already in
465
+ // progress.
466
+ queue.enqueue_table_value(fault.cause.sync.table, block);
467
+ },
468
+ .table_value => |value_index| {
469
+ assert(queue.state != .sync_jump);
470
+ assert(sync.table.value_blocks_received.isSet(value_index));
471
+ },
472
+ }
473
+
474
+ // We already received the index block.
475
+ assert(sync.table.table_blocks_total != null);
476
+ assert(sync.table.table_blocks_written < sync.table.table_blocks_total.?);
477
+ assert(sync.table.value_blocks_received.count() <=
478
+ sync.table.table_blocks_total.? - 1);
479
+
480
+ sync.table.table_blocks_written += 1;
481
+ if (sync.table.table_blocks_written == sync.table.table_blocks_total.?) {
482
+ queue.faulty_tables.remove(sync.table);
483
+ queue.faulty_tables_free.push(sync.table);
484
+ }
485
+ },
486
+ }
487
+ }
488
+
489
+ fn enqueue_table_value(
490
+ queue: *GridBlocksMissing,
491
+ table: *RepairTable,
492
+ index_block: BlockPtrConst,
493
+ ) void {
494
+ assert(queue.state != .sync_jump);
495
+ assert(queue.faulty_blocks.count() ==
496
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
497
+ assert(table.table_blocks_total == null);
498
+ assert(table.table_blocks_written == 0);
499
+ assert(table.value_blocks_received.count() == 0);
500
+
501
+ const index_schema = schema.TableIndex.from(index_block);
502
+ const index_block_header = schema.header_from_block(index_block);
503
+ assert(index_block_header.address == table.table_info.address);
504
+ assert(index_block_header.checksum == table.table_info.checksum);
505
+ assert(index_block_header.block_type == .index);
506
+
507
+ table.table_blocks_total = index_schema.value_blocks_used(index_block) + 1;
508
+
509
+ for (
510
+ index_schema.value_addresses_used(index_block),
511
+ index_schema.value_checksums_used(index_block),
512
+ 0..,
513
+ ) |address, checksum, index| {
514
+ const enqueue = queue.enqueue_faulty_block(
515
+ address,
516
+ checksum.value,
517
+ .{ .sync = .{ .table = table, .block = .{ .table_value = @intCast(index) } } },
518
+ );
519
+
520
+ if (enqueue == .replace) {
521
+ if (enqueue.replace.state == .writing) {
522
+ table.value_blocks_received.set(index);
523
+ }
524
+ } else {
525
+ assert(enqueue == .insert);
526
+ }
527
+ }
528
+ }
529
+
530
+ fn release_fault(queue: *GridBlocksMissing, fault_index: usize) void {
531
+ assert(queue.faulty_blocks_repair_index < queue.faulty_blocks.count());
532
+
533
+ switch (queue.faulty_blocks.values()[fault_index].cause) {
534
+ .repair => queue.enqueued_blocks_repair -= 1,
535
+ .sync => queue.enqueued_blocks_sync -= 1,
536
+ }
537
+
538
+ queue.faulty_blocks.swapRemoveAt(fault_index);
539
+
540
+ if (queue.faulty_blocks_repair_index == queue.faulty_blocks.count()) {
541
+ queue.faulty_blocks_repair_index = 0;
542
+ }
543
+ }
544
+
545
+ pub fn cancel(queue: *GridBlocksMissing) void {
546
+ queue.verify();
547
+ defer queue.verify();
548
+
549
+ for (queue.faulty_blocks.values()) |*fault| {
550
+ switch (fault.state) {
551
+ .aborting => unreachable,
552
+ .waiting => {},
553
+ .writing => {
554
+ // Due to Grid.cancel() this write may not actually take place.
555
+ fault.state = .waiting;
556
+
557
+ if (fault.cause == .sync and
558
+ fault.cause.sync.block == .table_value)
559
+ {
560
+ const sync = &fault.cause.sync;
561
+ assert(sync.table.value_blocks_received.isSet(sync.block.table_value));
562
+ sync.table.value_blocks_received.unset(sync.block.table_value);
563
+ }
564
+ },
565
+ }
566
+ }
567
+ }
568
+
569
+ /// When we state sync, cancellation of our already-queued missing blocks happens in two stages:
570
+ /// 1. First (in this function, called immediately after grid.cancel()) we clean up single-block
571
+ /// faults.
572
+ /// 2. Later (in sync_complete()), after the state machine is opened with the new checkpoint, we
573
+ /// clean up any tables which did not survive into the new checkpoint.
574
+ pub fn sync_jump_commence(queue: *GridBlocksMissing) void {
575
+ queue.verify();
576
+ defer if (constants.verify) queue.verify();
577
+ // The replica may call sync_jump_commence() without ever calling sync_complete() if it
578
+ // syncs multiple checkpoints without successfully opening the state machine.
579
+ assert(queue.state == .repairing or queue.state == .sync_jump);
580
+
581
+ // Release the "single" blocks since when we finish syncing we have no easy way of checking
582
+ // whether they will still be valid.
583
+ var faulty_blocks = queue.faulty_blocks.iterator();
584
+ while (faulty_blocks.next()) |fault_entry| {
585
+ assert(fault_entry.value_ptr.state == .waiting);
586
+ if (fault_entry.value_ptr.cause == .repair) {
587
+ faulty_blocks.index -= 1;
588
+ faulty_blocks.len -= 1;
589
+ queue.release_fault(faulty_blocks.index);
590
+ } else {
591
+ assert(queue.state == .repairing);
592
+ }
593
+ }
594
+ assert(queue.enqueued_blocks_repair == 0);
595
+
596
+ if (queue.state == .repairing) {
597
+ queue.state = .sync_jump;
598
+
599
+ assert(queue.syncing_faulty_blocks.count() == 0);
600
+ std.mem.swap(FaultyBlocks, &queue.faulty_blocks, &queue.syncing_faulty_blocks);
601
+ queue.faulty_blocks_repair_index = 0;
602
+ }
603
+ assert(queue.faulty_blocks.count() == 0);
604
+ assert(queue.syncing_faulty_blocks.count() == queue.enqueued_blocks_sync);
605
+ }
606
+
607
+ /// Cancel repair for tables that don't belong in the new (sync target) checkpoint.
608
+ /// (Unlike checkpoint, we can't just use the free set to determine which blocks to discard.)
609
+ pub fn sync_tables_cancel(
610
+ queue: *GridBlocksMissing,
611
+ tables: []const *RepairTable,
612
+ free_set: *const vsr.FreeSet,
613
+ ) void {
614
+ queue.verify();
615
+ defer if (constants.verify) queue.verify();
616
+
617
+ assert(queue.state == .sync_jump);
618
+
619
+ for (tables) |table| {
620
+ assert(queue.faulty_tables.contains(table) != queue.faulty_tables_free.contains(table));
621
+
622
+ // The table was already cancelled/completed, it just hasn't been reclaimed yet.
623
+ if (queue.faulty_tables_free.contains(table)) continue;
624
+
625
+ var faulty_blocks_removed: u32 = 0;
626
+ var faulty_blocks = queue.syncing_faulty_blocks.iterator();
627
+ while (faulty_blocks.next()) |fault_entry| {
628
+ const fault = fault_entry.value_ptr;
629
+ assert(fault.state != .aborting);
630
+
631
+ switch (fault.cause) {
632
+ .repair => {},
633
+ .sync => |sync| {
634
+ assert(fault.state == .waiting);
635
+ if (sync.table == table) {
636
+ faulty_blocks_removed += 1;
637
+ faulty_blocks.index -= 1;
638
+ faulty_blocks.len -= 1;
639
+ queue.enqueued_blocks_sync -= 1;
640
+ queue.syncing_faulty_blocks.swapRemoveAt(faulty_blocks.index);
641
+ }
642
+ },
643
+ }
644
+ }
645
+ assert(faulty_blocks_removed ==
646
+ (table.table_blocks_total orelse 1) - table.table_blocks_written);
647
+ assert(queue.faulty_blocks.count() + queue.syncing_faulty_blocks.count() ==
648
+ queue.enqueued_blocks_sync + queue.enqueued_blocks_repair);
649
+
650
+ queue.faulty_tables.remove(table);
651
+ queue.faulty_tables_free.push(table);
652
+ }
653
+ queue.sync_complete(free_set);
654
+ }
655
+
656
+ fn sync_complete(queue: *GridBlocksMissing, free_set: *const vsr.FreeSet) void {
657
+ queue.verify();
658
+ defer if (constants.verify) queue.verify();
659
+
660
+ assert(queue.state == .sync_jump);
661
+ assert(free_set.opened);
662
+
663
+ queue.state = .repairing;
664
+ std.mem.swap(FaultyBlocks, &queue.faulty_blocks, &queue.syncing_faulty_blocks);
665
+
666
+ // Move any leftover block repairs (from faults incurred during since
667
+ // `sync_jump_commence()`) back to `faulty_blocks`.
668
+ while (queue.syncing_faulty_blocks.pop()) |fault_entry| {
669
+ assert(fault_entry.value.cause == .repair);
670
+
671
+ const fault_address = fault_entry.key;
672
+ const fault_result = queue.faulty_blocks.getOrPutAssumeCapacity(fault_address);
673
+ assert(!fault_result.found_existing);
674
+ fault_result.value_ptr.* = fault_entry.value;
675
+ }
676
+
677
+ for (queue.faulty_blocks.keys()) |fault_address| {
678
+ assert(!free_set.is_free(fault_address));
679
+ }
680
+ }
681
+
682
+ /// Aborts queued repairs to blocks to be freed, now that the current checkpoint is durable.
683
+ pub fn checkpoint_durable_commence(
684
+ queue: *GridBlocksMissing,
685
+ free_set: *const vsr.FreeSet,
686
+ ) void {
687
+ queue.verify();
688
+ defer if (constants.verify) queue.verify();
689
+
690
+ assert(queue.state == .repairing);
691
+ assert(queue.faulty_blocks.count() ==
692
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
693
+ assert(free_set.opened);
694
+
695
+ var aborting: usize = 0;
696
+
697
+ var faulty_blocks = queue.faulty_blocks.iterator();
698
+ while (faulty_blocks.next()) |fault_entry| {
699
+ const fault_address = fault_entry.key_ptr.*;
700
+ assert(!free_set.is_free(fault_address));
701
+ assert(fault_entry.value_ptr.state != .aborting);
702
+ // Use `to_be_freed_at_checkpoint_durability` instead of `is_released`; the latter also
703
+ // contains the blocks that will be released when the *next* checkpoint becomes durable.
704
+ if (free_set.to_be_freed_at_checkpoint_durability(fault_address)) {
705
+ switch (fault_entry.value_ptr.state) {
706
+ .waiting => {
707
+ faulty_blocks.index -= 1;
708
+ faulty_blocks.len -= 1;
709
+ queue.release_fault(faulty_blocks.index);
710
+ },
711
+ .writing => {
712
+ fault_entry.value_ptr.state = .aborting;
713
+ aborting += 1;
714
+ },
715
+ .aborting => unreachable,
716
+ }
717
+ }
718
+ }
719
+
720
+ var tables: QueueType(RepairTable) = QueueType(RepairTable).init(.{
721
+ .name = queue.faulty_tables.any.name,
722
+ });
723
+ while (queue.faulty_tables.pop()) |table| {
724
+ assert(!free_set.is_free(table.table_info.address));
725
+
726
+ if (free_set.to_be_freed_at_checkpoint_durability(table.table_info.address)) {
727
+ queue.faulty_tables_free.push(table);
728
+ } else {
729
+ tables.push(table);
730
+ }
731
+ }
732
+ queue.faulty_tables = tables;
733
+
734
+ queue.state = .{ .checkpoint_durable = .{ .aborting = aborting } };
735
+ }
736
+
737
+ /// Returns `true` when the `state≠waiting` faults for blocks that are staged to be
738
+ /// released have finished. (All other writes can safely complete after the checkpoint.)
739
+ pub fn checkpoint_durable_complete(queue: *GridBlocksMissing) bool {
740
+ queue.verify();
741
+ assert(queue.state == .checkpoint_durable);
742
+ assert(queue.faulty_blocks.count() ==
743
+ queue.enqueued_blocks_repair + queue.enqueued_blocks_sync);
744
+
745
+ if (queue.state.checkpoint_durable.aborting == 0) {
746
+ queue.state = .repairing;
747
+
748
+ for (queue.faulty_blocks.values()) |*faulty_block| {
749
+ assert(faulty_block.state != .aborting);
750
+ }
751
+
752
+ return true;
753
+ } else {
754
+ return false;
755
+ }
756
+ }
757
+ };