tigerbeetle 0.0.40 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -25
  3. data/README.md +670 -80
  4. data/docs/migration.md +201 -0
  5. data/sig/tigerbeetle.rbs +271 -0
  6. data/src/ext/tigerbeetle/extconf.rb +47 -0
  7. data/src/ext/tigerbeetle/lib/aarch64-linux-gnu.2.27/libtb_client.so +0 -0
  8. data/src/ext/tigerbeetle/lib/aarch64-linux-musl/libtb_client.so +0 -0
  9. data/src/ext/tigerbeetle/lib/aarch64-macos/libtb_client.dylib +0 -0
  10. data/src/ext/tigerbeetle/lib/x86_64-linux-gnu.2.27/libtb_client.so +0 -0
  11. data/src/ext/tigerbeetle/lib/x86_64-linux-musl/libtb_client.so +0 -0
  12. data/src/ext/tigerbeetle/lib/x86_64-macos/libtb_client.dylib +0 -0
  13. data/src/ext/tigerbeetle/lib/x86_64-windows/tb_client.dll +0 -0
  14. data/src/ext/tigerbeetle/rb_tb_gen.h +458 -0
  15. data/{ext/tb_client/tigerbeetle/src/clients/rust/assets → src/ext/tigerbeetle}/tb_client.h +18 -16
  16. data/src/ext/tigerbeetle/tigerbeetle.c +310 -0
  17. data/src/tigerbeetle/bindings.rb +347 -0
  18. data/src/tigerbeetle/client.rb +129 -0
  19. data/src/tigerbeetle/completion_dispatcher.rb +108 -0
  20. data/src/tigerbeetle/id.rb +40 -0
  21. data/src/tigerbeetle/tb.rb +3 -0
  22. data/src/tigerbeetle/version.rb +3 -0
  23. data/src/tigerbeetle.rb +39 -0
  24. metadata +33 -350
  25. data/CHANGELOG.md +0 -162
  26. data/ext/tb_client/extconf.rb +0 -41
  27. data/ext/tb_client/tigerbeetle/LICENSE +0 -177
  28. data/ext/tb_client/tigerbeetle/build.zig +0 -2296
  29. data/ext/tb_client/tigerbeetle/src/aof.zig +0 -1000
  30. data/ext/tb_client/tigerbeetle/src/build/fetch.zig +0 -112
  31. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +0 -808
  32. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +0 -1283
  33. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +0 -1704
  34. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +0 -341
  35. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +0 -1450
  36. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +0 -1659
  37. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +0 -406
  38. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +0 -1092
  39. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +0 -286
  40. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +0 -158
  41. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +0 -229
  42. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +0 -110
  43. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +0 -386
  44. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +0 -34
  45. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +0 -281
  46. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +0 -312
  47. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +0 -138
  48. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +0 -466
  49. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +0 -157
  50. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +0 -90
  51. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +0 -203
  52. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +0 -79
  53. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +0 -542
  54. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +0 -109
  55. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +0 -86
  56. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +0 -370
  57. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +0 -386
  58. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +0 -167
  59. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +0 -126
  60. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +0 -996
  61. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +0 -748
  62. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +0 -3238
  63. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +0 -1718
  64. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +0 -190
  65. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +0 -104
  66. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +0 -75
  67. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +0 -522
  68. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +0 -267
  69. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +0 -3
  70. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +0 -379
  71. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +0 -131
  72. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +0 -63
  73. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +0 -588
  74. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +0 -73
  75. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +0 -106
  76. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +0 -305
  77. data/ext/tb_client/tigerbeetle/src/config.zig +0 -296
  78. data/ext/tb_client/tigerbeetle/src/constants.zig +0 -790
  79. data/ext/tb_client/tigerbeetle/src/copyhound.zig +0 -202
  80. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +0 -72
  81. data/ext/tb_client/tigerbeetle/src/direction.zig +0 -120
  82. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +0 -158
  83. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +0 -156
  84. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +0 -252
  85. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +0 -313
  86. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +0 -87
  87. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +0 -63
  88. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +0 -47
  89. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +0 -28
  90. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +0 -61
  91. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +0 -169
  92. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +0 -46
  93. data/ext/tb_client/tigerbeetle/src/ewah.zig +0 -445
  94. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +0 -128
  95. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +0 -171
  96. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +0 -179
  97. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +0 -662
  98. data/ext/tb_client/tigerbeetle/src/io/common.zig +0 -155
  99. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +0 -1093
  100. data/ext/tb_client/tigerbeetle/src/io/linux.zig +0 -1880
  101. data/ext/tb_client/tigerbeetle/src/io/test.zig +0 -1005
  102. data/ext/tb_client/tigerbeetle/src/io/windows.zig +0 -1598
  103. data/ext/tb_client/tigerbeetle/src/io.zig +0 -34
  104. data/ext/tb_client/tigerbeetle/src/iops.zig +0 -134
  105. data/ext/tb_client/tigerbeetle/src/list.zig +0 -236
  106. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +0 -848
  107. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +0 -179
  108. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +0 -424
  109. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +0 -420
  110. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +0 -2114
  111. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +0 -185
  112. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +0 -1146
  113. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +0 -1102
  114. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +0 -200
  115. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +0 -1495
  116. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +0 -739
  117. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +0 -166
  118. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +0 -754
  119. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +0 -1294
  120. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +0 -510
  121. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +0 -1241
  122. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -628
  123. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +0 -247
  124. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +0 -116
  125. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +0 -543
  126. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +0 -938
  127. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +0 -293
  128. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +0 -359
  129. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +0 -99
  130. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +0 -17
  131. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +0 -962
  132. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +0 -617
  133. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +0 -84
  134. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +0 -1500
  135. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -149
  136. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -7
  137. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +0 -865
  138. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +0 -607
  139. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +0 -843
  140. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +0 -90
  141. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +0 -40
  142. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +0 -629
  143. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +0 -933
  144. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +0 -534
  145. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +0 -469
  146. data/ext/tb_client/tigerbeetle/src/message_bus.zig +0 -1219
  147. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +0 -936
  148. data/ext/tb_client/tigerbeetle/src/message_pool.zig +0 -343
  149. data/ext/tb_client/tigerbeetle/src/multiversion.zig +0 -2195
  150. data/ext/tb_client/tigerbeetle/src/queue.zig +0 -390
  151. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +0 -201
  152. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +0 -1356
  153. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +0 -496
  154. data/ext/tb_client/tigerbeetle/src/repl.zig +0 -1034
  155. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +0 -973
  156. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +0 -1866
  157. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +0 -304
  158. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +0 -227
  159. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +0 -658
  160. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +0 -466
  161. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +0 -1058
  162. data/ext/tb_client/tigerbeetle/src/scripts.zig +0 -105
  163. data/ext/tb_client/tigerbeetle/src/shell.zig +0 -1195
  164. data/ext/tb_client/tigerbeetle/src/stack.zig +0 -260
  165. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +0 -911
  166. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +0 -2079
  167. data/ext/tb_client/tigerbeetle/src/state_machine.zig +0 -4872
  168. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +0 -288
  169. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +0 -3128
  170. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +0 -82
  171. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +0 -157
  172. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +0 -292
  173. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +0 -65
  174. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +0 -1414
  175. data/ext/tb_client/tigerbeetle/src/stdx/huge_page_allocator.zig +0 -115
  176. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +0 -92
  177. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +0 -677
  178. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +0 -336
  179. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +0 -511
  180. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +0 -112
  181. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +0 -1163
  182. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +0 -142
  183. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +0 -361
  184. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +0 -275
  185. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +0 -295
  186. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +0 -436
  187. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +0 -48
  188. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +0 -402
  189. data/ext/tb_client/tigerbeetle/src/storage.zig +0 -489
  190. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +0 -180
  191. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +0 -146
  192. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +0 -53
  193. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +0 -61
  194. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +0 -76
  195. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +0 -110
  196. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +0 -412
  197. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +0 -331
  198. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -458
  199. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +0 -1198
  200. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +0 -128
  201. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +0 -181
  202. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +0 -144
  203. data/ext/tb_client/tigerbeetle/src/testing/id.zig +0 -97
  204. data/ext/tb_client/tigerbeetle/src/testing/io.zig +0 -317
  205. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +0 -126
  206. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +0 -533
  207. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +0 -154
  208. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +0 -389
  209. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +0 -1247
  210. data/ext/tb_client/tigerbeetle/src/testing/table.zig +0 -249
  211. data/ext/tb_client/tigerbeetle/src/testing/time.zig +0 -98
  212. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +0 -212
  213. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +0 -26
  214. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +0 -579
  215. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +0 -39
  216. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +0 -214
  217. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +0 -34
  218. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +0 -785
  219. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +0 -543
  220. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +0 -181
  221. data/ext/tb_client/tigerbeetle/src/tidy.zig +0 -1449
  222. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +0 -227
  223. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +0 -1069
  224. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +0 -1422
  225. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +0 -1658
  226. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +0 -518
  227. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +0 -36
  228. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +0 -646
  229. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +0 -958
  230. data/ext/tb_client/tigerbeetle/src/time.zig +0 -236
  231. data/ext/tb_client/tigerbeetle/src/trace/event.zig +0 -745
  232. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +0 -462
  233. data/ext/tb_client/tigerbeetle/src/trace.zig +0 -556
  234. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +0 -321
  235. data/ext/tb_client/tigerbeetle/src/vopr.zig +0 -1785
  236. data/ext/tb_client/tigerbeetle/src/vortex.zig +0 -101
  237. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +0 -473
  238. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +0 -208
  239. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +0 -43
  240. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +0 -768
  241. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +0 -532
  242. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +0 -338
  243. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +0 -1019
  244. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +0 -279
  245. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +0 -1381
  246. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +0 -315
  247. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +0 -1460
  248. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +0 -757
  249. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +0 -797
  250. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +0 -2586
  251. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +0 -308
  252. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +0 -1777
  253. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +0 -715
  254. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +0 -185
  255. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +0 -333
  256. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +0 -12356
  257. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +0 -416
  258. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +0 -165
  259. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +0 -2928
  260. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +0 -1075
  261. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +0 -1603
  262. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -484
  263. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +0 -405
  264. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -355
  265. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +0 -29
  266. data/ext/tb_client/tigerbeetle/src/vsr.zig +0 -1727
  267. data/lib/tb_client/shared_lib.rb +0 -66
  268. data/lib/tb_client.rb +0 -282
  269. data/lib/tigerbeetle/account.rb +0 -38
  270. data/lib/tigerbeetle/account_balance.rb +0 -23
  271. data/lib/tigerbeetle/account_filter.rb +0 -31
  272. data/lib/tigerbeetle/atomic_counter.rb +0 -14
  273. data/lib/tigerbeetle/client.rb +0 -214
  274. data/lib/tigerbeetle/converters/account.rb +0 -63
  275. data/lib/tigerbeetle/converters/account_balance.rb +0 -31
  276. data/lib/tigerbeetle/converters/account_filter.rb +0 -32
  277. data/lib/tigerbeetle/converters/base.rb +0 -35
  278. data/lib/tigerbeetle/converters/create_accounts_result.rb +0 -21
  279. data/lib/tigerbeetle/converters/create_transfers_result.rb +0 -21
  280. data/lib/tigerbeetle/converters/query_filter.rb +0 -33
  281. data/lib/tigerbeetle/converters/time.rb +0 -23
  282. data/lib/tigerbeetle/converters/transfer.rb +0 -64
  283. data/lib/tigerbeetle/converters/uint_128.rb +0 -24
  284. data/lib/tigerbeetle/converters.rb +0 -12
  285. data/lib/tigerbeetle/error.rb +0 -4
  286. data/lib/tigerbeetle/id.rb +0 -30
  287. data/lib/tigerbeetle/platforms.rb +0 -9
  288. data/lib/tigerbeetle/query_filter.rb +0 -31
  289. data/lib/tigerbeetle/request.rb +0 -7
  290. data/lib/tigerbeetle/transfer.rb +0 -40
  291. data/lib/tigerbeetle/version.rb +0 -4
  292. data/lib/tigerbeetle.rb +0 -13
  293. data/tigerbeetle.gemspec +0 -60
@@ -1,1241 +0,0 @@
1
- //! Maintains a durable manifest log of the latest TableInfo's for every LSM tree's in-memory
2
- //! manifest.
3
- //!
4
- //! Invariants:
5
- //!
6
- //! * Checkpointing the manifest log must flush all buffered log blocks.
7
- //!
8
- //! * Opening the manifest log must emit only the latest TableInfo's to be inserted.
9
- //!
10
- //! * The latest version of a table must never be dropped from the log through a compaction, unless
11
- //! the table was removed.
12
- //!
13
- //! * Removes that are recorded in a log block must also queue that log block for compaction.
14
- //!
15
- //! * Compaction must compact partially full blocks, even where it must rewrite all entries to the
16
- //! tail end of the log.
17
- //!
18
- //! * If a remove is dropped from the log, then all prior inserts/updates must already have been
19
- //! dropped.
20
-
21
- const std = @import("std");
22
- const assert = std.debug.assert;
23
- const mem = std.mem;
24
- const maybe = stdx.maybe;
25
-
26
- const log = std.log.scoped(.manifest_log);
27
-
28
- const constants = @import("../constants.zig");
29
- const vsr = @import("../vsr.zig");
30
- const stdx = @import("stdx");
31
-
32
- const SuperBlockType = vsr.SuperBlockType;
33
- const GridType = @import("../vsr/grid.zig").GridType;
34
- const BlockPtr = @import("../vsr/grid.zig").BlockPtr;
35
- const BlockPtrConst = @import("../vsr/grid.zig").BlockPtrConst;
36
- const allocate_block = @import("../vsr/grid.zig").allocate_block;
37
- const compaction = @import("compaction.zig");
38
- const RingBufferType = stdx.RingBufferType;
39
- const schema = @import("schema.zig");
40
- const TableInfo = schema.ManifestNode.TableInfo;
41
- const BlockReference = vsr.BlockReference;
42
-
43
- const block_builder_schema = schema.ManifestNode{
44
- .entry_count = schema.ManifestNode.entry_count_max,
45
- };
46
-
47
- pub fn ManifestLogType(comptime Storage: type) type {
48
- return struct {
49
- const ManifestLog = @This();
50
-
51
- const SuperBlock = SuperBlockType(Storage);
52
- const Grid = GridType(Storage);
53
-
54
- pub const Callback = *const fn (manifest_log: *ManifestLog) void;
55
-
56
- pub const OpenEvent = *const fn (manifest_log: *ManifestLog, table: *const TableInfo) void;
57
-
58
- const Write = struct {
59
- manifest_log: *ManifestLog,
60
- write: Grid.Write = undefined,
61
- };
62
-
63
- const TableExtents = std.AutoHashMapUnmanaged(u64, TableExtent);
64
- const TablesRemoved = std.AutoHashMapUnmanaged(u64, void);
65
-
66
- pub const TableExtent = struct {
67
- block: u64, // Manifest block address.
68
- entry: u32, // Index within the manifest block Label/TableInfo arrays.
69
- };
70
-
71
- superblock: *SuperBlock,
72
- grid: *Grid,
73
- pace: *const Pace,
74
-
75
- forest_table_count_max: u32,
76
-
77
- grid_reservation: ?Grid.Reservation = null,
78
-
79
- /// The number of blocks (remaining) to compact during the current half-bar.
80
- compact_blocks: ?u32 = null,
81
-
82
- /// This is a struct-of-arrays of `BlockReference`s.
83
- /// It includes:
84
- /// - blocks that are written
85
- /// - blocks that have closed, but not yet flushed
86
- /// - blocks that are being flushed
87
- ///
88
- /// Entries are ordered from oldest to newest.
89
- log_block_checksums: RingBufferType(u128, .slice),
90
- log_block_addresses: RingBufferType(u64, .slice),
91
-
92
- /// The head block accumulates a full block, to be written at the next flush.
93
- /// The remaining blocks must accommodate all further appends.
94
- blocks: RingBufferType(BlockPtr, .slice),
95
-
96
- /// The number of blocks that have been appended to, filled up, and then closed.
97
- blocks_closed: u8 = 0,
98
-
99
- /// The number of entries in the open block.
100
- ///
101
- /// Invariants:
102
- /// - When `entry_count = 0`, there is no open block.
103
- /// - `entry_count < entry_count_max`. When `entry_count` reaches the maximum, the open
104
- /// block is closed, and `entry_count` resets to 0.
105
- entry_count: u32 = 0,
106
-
107
- opened: bool = false,
108
- open_event: OpenEvent = undefined,
109
-
110
- /// Set for the duration of `open` and `compact`.
111
- reading: bool = false,
112
- read: Grid.Read = undefined,
113
- read_callback: ?Callback = null,
114
-
115
- /// Set for the duration of `flush` and `checkpoint`.
116
- writing: bool = false,
117
- writes: []Write,
118
- writes_pending: usize = 0,
119
- write_callback: ?Callback = null,
120
-
121
- next_tick: Grid.NextTick = undefined,
122
-
123
- /// A map from table address to the manifest block and entry that is the latest extent
124
- /// version. Used to determine whether a table should be dropped in a compaction.
125
- table_extents: TableExtents,
126
-
127
- /// For a particular table in the manifest, the sequence of events is:
128
- ///
129
- /// insert(0|1), update(0+), remove(0|1)
130
- ///
131
- /// During open(), manifest entries are processed in reverse-chronological order.
132
- ///
133
- /// This hash-set tracks tables that have been removed but whose corresponding "insert" has
134
- /// not yet been encountered. Given that the maximum number of tables in the forest at any
135
- /// given moment is `forest_table_count_max`, there are likewise at most
136
- /// `forest_table_count_max` "unpaired" removes to track.
137
- // TODO(Optimization) This memory (~35MiB) is only needed during open() – maybe borrow it
138
- // from the grid cache or node pool instead so that we don't pay for it during normal
139
- // operation.
140
- tables_removed: TablesRemoved,
141
-
142
- pub fn init(
143
- manifest_log: *ManifestLog,
144
- allocator: mem.Allocator,
145
- grid: *Grid,
146
- compaction_pace: *const Pace,
147
- ) !void {
148
- manifest_log.* = .{
149
- .superblock = grid.superblock,
150
- .grid = grid,
151
- .forest_table_count_max = compaction_pace.tables_max,
152
- .pace = compaction_pace,
153
- .log_block_checksums = undefined,
154
- .log_block_addresses = undefined,
155
- .blocks = undefined,
156
- .writes = undefined,
157
- .table_extents = undefined,
158
- .tables_removed = undefined,
159
- };
160
-
161
- inline for (std.meta.fields(Pace)) |pace_field| {
162
- log.debug("{?}: Manifest.Pace.{s} = {d}", .{
163
- grid.superblock.replica_index,
164
- pace_field.name,
165
- @field(manifest_log.pace, pace_field.name),
166
- });
167
- }
168
-
169
- manifest_log.log_block_checksums =
170
- try RingBufferType(u128, .slice).init(allocator, manifest_log.pace.log_blocks_max);
171
- errdefer manifest_log.log_block_checksums.deinit(allocator);
172
-
173
- manifest_log.log_block_addresses =
174
- try RingBufferType(u64, .slice).init(allocator, manifest_log.pace.log_blocks_max);
175
- errdefer manifest_log.log_block_addresses.deinit(allocator);
176
-
177
- // The upper-bound of manifest blocks we must buffer.
178
- //
179
- // `blocks` must have sufficient capacity for:
180
- // - a leftover open block from the previous ops (+1 block)
181
- // - table updates copied from a half bar of manifest compactions
182
- // - table updates from a half bar of table compactions
183
- const half_bar_buffer_blocks_max = 1 + manifest_log.pace.half_bar_compact_blocks_max +
184
- manifest_log.pace.half_bar_append_blocks_max;
185
- assert(half_bar_buffer_blocks_max >= 3);
186
-
187
- // TODO RingBuffer for .slice should be extended to take care of alignment:
188
- manifest_log.blocks =
189
- try RingBufferType(BlockPtr, .slice).init(allocator, half_bar_buffer_blocks_max);
190
- errdefer manifest_log.blocks.deinit(allocator);
191
-
192
- for (manifest_log.blocks.buffer, 0..) |*block, i| {
193
- errdefer for (manifest_log.blocks.buffer[0..i]) |b| allocator.free(b);
194
- block.* = try allocate_block(allocator);
195
- }
196
- errdefer for (manifest_log.blocks.buffer) |b| allocator.free(b);
197
-
198
- manifest_log.writes = try allocator.alloc(Write, half_bar_buffer_blocks_max);
199
- errdefer allocator.free(manifest_log.writes);
200
- @memset(manifest_log.writes, undefined);
201
-
202
- manifest_log.table_extents = TableExtents{};
203
- try manifest_log.table_extents.ensureTotalCapacity(
204
- allocator,
205
- // Allocate space for one additional table, so that the code can still use
206
- // `getOrPutAssumeCapacity` while making it easier to check if the limit has been
207
- // exceeded to error with a friendly message.
208
- manifest_log.forest_table_count_max + 1,
209
- );
210
- errdefer manifest_log.table_extents.deinit(allocator);
211
-
212
- manifest_log.tables_removed = TablesRemoved{};
213
- try manifest_log.tables_removed.ensureTotalCapacity(
214
- allocator,
215
- manifest_log.forest_table_count_max,
216
- );
217
- errdefer manifest_log.tables_removed.deinit(allocator);
218
- }
219
-
220
- pub fn deinit(manifest_log: *ManifestLog, allocator: mem.Allocator) void {
221
- manifest_log.tables_removed.deinit(allocator);
222
- manifest_log.table_extents.deinit(allocator);
223
- allocator.free(manifest_log.writes);
224
- for (manifest_log.blocks.buffer) |block| allocator.free(block);
225
- manifest_log.blocks.deinit(allocator);
226
- manifest_log.log_block_addresses.deinit(allocator);
227
- manifest_log.log_block_checksums.deinit(allocator);
228
- }
229
-
230
- pub fn reset(manifest_log: *ManifestLog) void {
231
- assert(manifest_log.log_block_checksums.count ==
232
- manifest_log.log_block_addresses.count);
233
-
234
- manifest_log.grid.trace.cancel(.compact_manifest);
235
-
236
- manifest_log.log_block_checksums.clear();
237
- manifest_log.log_block_addresses.clear();
238
- for (manifest_log.blocks.buffer) |block| @memset(block, 0);
239
- manifest_log.table_extents.clearRetainingCapacity();
240
- manifest_log.tables_removed.clearRetainingCapacity();
241
-
242
- manifest_log.* = .{
243
- .superblock = manifest_log.superblock,
244
- .grid = manifest_log.grid,
245
- .forest_table_count_max = manifest_log.pace.tables_max,
246
- .pace = manifest_log.pace,
247
- .log_block_checksums = manifest_log.log_block_checksums,
248
- .log_block_addresses = manifest_log.log_block_addresses,
249
- .blocks = .{ .buffer = manifest_log.blocks.buffer },
250
- .writes = manifest_log.writes,
251
- .table_extents = manifest_log.table_extents,
252
- .tables_removed = manifest_log.tables_removed,
253
- };
254
- }
255
-
256
- /// Opens the manifest log.
257
- /// Reads the manifest blocks in reverse order and passes extent table inserts to event().
258
- /// Therefore, only the latest version of a table will be emitted by event() for insertion
259
- /// into the in-memory manifest. Older versions of a table in older manifest blocks will not
260
- /// be emitted, as an optimization to not replay all table mutations.
261
- /// `ManifestLog.table_extents` is used to track the latest version of a table.
262
- // TODO(Optimization): Accumulate tables unordered, then sort all at once to splice into the
263
- // ManifestLevels' SegmentedArrays. (Constructing SegmentedArrays by repeated inserts is
264
- // expensive.)
265
- pub fn open(manifest_log: *ManifestLog, event: OpenEvent, callback: Callback) void {
266
- assert(!manifest_log.opened);
267
- assert(!manifest_log.reading);
268
- assert(!manifest_log.writing);
269
- assert(manifest_log.read_callback == null);
270
-
271
- assert(manifest_log.log_block_checksums.count == 0);
272
- assert(manifest_log.log_block_addresses.count == 0);
273
- assert(manifest_log.blocks.count == 0);
274
- assert(manifest_log.blocks_closed == 0);
275
- assert(manifest_log.entry_count == 0);
276
- assert(manifest_log.table_extents.count() == 0);
277
- assert(manifest_log.tables_removed.count() == 0);
278
-
279
- manifest_log.open_event = event;
280
- manifest_log.reading = true;
281
- manifest_log.read_callback = callback;
282
-
283
- const references = manifest_log.superblock.working.manifest_references();
284
- assert(references.block_count <= manifest_log.log_block_checksums.buffer.len);
285
-
286
- if (references.empty()) {
287
- manifest_log.grid.on_next_tick(open_next_tick_callback, &manifest_log.next_tick);
288
- } else {
289
- manifest_log.open_read_block(.{
290
- .checksum = references.newest_checksum,
291
- .address = references.newest_address,
292
- });
293
- }
294
- }
295
-
296
- fn open_next_tick_callback(next_tick: *Grid.NextTick) void {
297
- const manifest_log: *ManifestLog = @alignCast(@fieldParentPtr("next_tick", next_tick));
298
- assert(!manifest_log.opened);
299
- assert(manifest_log.reading);
300
- assert(!manifest_log.writing);
301
-
302
- assert(manifest_log.log_block_checksums.count == 0);
303
- assert(manifest_log.log_block_addresses.count == 0);
304
- assert(manifest_log.table_extents.count() == 0);
305
- assert(manifest_log.tables_removed.count() == 0);
306
- assert(manifest_log.superblock.working.manifest_references().empty());
307
-
308
- manifest_log.open_done();
309
- }
310
-
311
- fn open_read_block(manifest_log: *ManifestLog, block_reference: BlockReference) void {
312
- assert(!manifest_log.opened);
313
- assert(manifest_log.reading);
314
- assert(manifest_log.read_callback != null);
315
- assert(!manifest_log.writing);
316
- assert(manifest_log.write_callback == null);
317
- assert(manifest_log.table_extents.count() <= manifest_log.forest_table_count_max);
318
- assert(manifest_log.tables_removed.count() <= manifest_log.forest_table_count_max);
319
- assert(manifest_log.log_block_checksums.count <
320
- manifest_log.log_block_checksums.buffer.len);
321
- assert(manifest_log.log_block_checksums.count ==
322
- manifest_log.log_block_addresses.count);
323
- assert(manifest_log.log_block_checksums.count <
324
- manifest_log.superblock.working.vsr_state.checkpoint.manifest_block_count);
325
- assert(manifest_log.blocks.count == 0);
326
- assert(manifest_log.blocks_closed == 0);
327
- assert(manifest_log.entry_count == 0);
328
- assert(block_reference.address > 0);
329
-
330
- if (constants.verify) {
331
- // The manifest block list has no cycles.
332
- var address_iterator = manifest_log.log_block_addresses.iterator();
333
- while (address_iterator.next()) |address| {
334
- assert(address != block_reference.address);
335
- }
336
- }
337
-
338
- manifest_log.log_block_checksums.push_head_assume_capacity(block_reference.checksum);
339
- manifest_log.log_block_addresses.push_head_assume_capacity(block_reference.address);
340
-
341
- manifest_log.grid.read_block(
342
- .{ .from_local_or_global_storage = open_read_block_callback },
343
- &manifest_log.read,
344
- block_reference.address,
345
- block_reference.checksum,
346
- .{ .cache_read = true, .cache_write = true },
347
- );
348
- }
349
-
350
- fn open_read_block_callback(read: *Grid.Read, block: BlockPtrConst) void {
351
- const manifest_log: *ManifestLog = @fieldParentPtr("read", read);
352
- assert(!manifest_log.opened);
353
- assert(manifest_log.reading);
354
- assert(!manifest_log.writing);
355
- assert(manifest_log.log_block_addresses.count > 0);
356
- assert(manifest_log.log_block_checksums.count > 0);
357
- assert(!manifest_log.superblock.working.manifest_references().empty());
358
-
359
- const block_checksum = manifest_log.log_block_checksums.head().?;
360
- const block_address = manifest_log.log_block_addresses.head().?;
361
- verify_block(block, block_checksum, block_address);
362
-
363
- const block_schema = schema.ManifestNode.from(block);
364
- const tables_used = block_schema.tables_const(block);
365
- assert(block_schema.entry_count > 0);
366
- assert(block_schema.entry_count <= schema.ManifestNode.entry_count_max);
367
-
368
- var entry = block_schema.entry_count;
369
- while (entry > 0) {
370
- entry -= 1;
371
-
372
- const table = &tables_used[entry];
373
- assert(table.label.event != .reserved);
374
- assert(table.address > 0);
375
-
376
- if (table.label.event == .remove) {
377
- const table_removed =
378
- manifest_log.tables_removed.fetchPutAssumeCapacity(table.address, {});
379
- assert(table_removed == null);
380
- } else {
381
- if (manifest_log.tables_removed.get(table.address)) |_| {
382
- if (table.label.event == .insert) {
383
- assert(manifest_log.tables_removed.remove(table.address));
384
- }
385
- } else {
386
- const extent =
387
- manifest_log.table_extents.getOrPutAssumeCapacity(table.address);
388
- if (!extent.found_existing) {
389
- manifest_log.check_tables_count();
390
- extent.value_ptr.* = .{ .block = block_address, .entry = entry };
391
- manifest_log.open_event(manifest_log, table);
392
- }
393
- }
394
- }
395
- }
396
-
397
- log.debug("{}: opened: checksum={x:0>32} address={} entries={}", .{
398
- manifest_log.superblock.replica_index.?,
399
- block_checksum,
400
- block_address,
401
- block_schema.entry_count,
402
- });
403
-
404
- const checkpoint_state = &manifest_log.superblock.working.vsr_state.checkpoint;
405
- if (checkpoint_state.manifest_oldest_address == block_address) {
406
- // When we find the oldest block, stop iterating the linked list – any more blocks
407
- // have already been compacted away.
408
- assert(checkpoint_state.manifest_oldest_checksum == block_checksum);
409
-
410
- manifest_log.open_done();
411
- } else {
412
- const block_reference_previous = schema.ManifestNode.previous(block).?;
413
-
414
- manifest_log.open_read_block(.{
415
- .checksum = block_reference_previous.checksum,
416
- .address = block_reference_previous.address,
417
- });
418
- }
419
- }
420
-
421
- fn open_done(manifest_log: *ManifestLog) void {
422
- assert(!manifest_log.opened);
423
- assert(manifest_log.reading);
424
- assert(manifest_log.read_callback != null);
425
- assert(!manifest_log.writing);
426
- assert(manifest_log.write_callback == null);
427
- assert(manifest_log.table_extents.count() <= manifest_log.forest_table_count_max);
428
- assert(manifest_log.tables_removed.count() <= manifest_log.forest_table_count_max);
429
- assert(manifest_log.log_block_checksums.count ==
430
- manifest_log.log_block_addresses.count);
431
- assert(manifest_log.log_block_checksums.count ==
432
- manifest_log.superblock.working.vsr_state.checkpoint.manifest_block_count);
433
- assert(manifest_log.blocks.count == 0);
434
- assert(manifest_log.blocks_closed == 0);
435
- assert(manifest_log.entry_count == 0);
436
-
437
- log.debug("{}: open_done: opened block_count={} table_count={}", .{
438
- manifest_log.superblock.replica_index.?,
439
- manifest_log.log_block_checksums.count,
440
- manifest_log.table_extents.count(),
441
- });
442
-
443
- const callback = manifest_log.read_callback.?;
444
- manifest_log.opened = true;
445
- manifest_log.open_event = undefined;
446
- manifest_log.reading = false;
447
- manifest_log.read_callback = null;
448
-
449
- callback(manifest_log);
450
- }
451
-
452
- /// Appends an insert/update/remove of a table to a level.
453
- ///
454
- /// A move is only recorded as an update, there is no remove from the previous level, since
455
- /// this is safer (no potential to get the event order wrong) and reduces fragmentation.
456
- pub fn append(manifest_log: *ManifestLog, table: *const TableInfo) void {
457
- maybe(manifest_log.opened);
458
- maybe(manifest_log.reading);
459
- assert(!manifest_log.writing);
460
-
461
- switch (table.label.event) {
462
- .reserved => unreachable,
463
- .insert => assert(manifest_log.table_extents.get(table.address) == null),
464
- // For updates + removes, the table must have previously been inserted into the log:
465
- .update => assert(manifest_log.table_extents.get(table.address) != null),
466
- .remove => assert(manifest_log.table_extents.get(table.address) != null),
467
- }
468
-
469
- manifest_log.append_internal(table);
470
- }
471
-
472
- /// The table extent must be updated immediately when appending, without delay.
473
- /// Otherwise, ManifestLog.compact() may append a stale version over the latest.
474
- ///
475
- /// append_internal() is used for both:
476
- /// - External appends, e.g. events created due to table compaction.
477
- /// - Internal appends, e.g. events recycled by manifest compaction.
478
- fn append_internal(manifest_log: *ManifestLog, table: *const TableInfo) void {
479
- assert(manifest_log.opened);
480
- assert(!manifest_log.writing);
481
- maybe(manifest_log.reading);
482
- assert(manifest_log.grid_reservation != null);
483
- assert(table.label.level < constants.lsm_levels);
484
- assert(table.address > 0);
485
- assert(table.snapshot_min > 0);
486
- assert(table.snapshot_max > table.snapshot_min);
487
-
488
- if (manifest_log.entry_count == 0) {
489
- assert(manifest_log.blocks.count == manifest_log.blocks_closed);
490
- manifest_log.acquire_block();
491
- } else if (manifest_log.entry_count > 0) {
492
- assert(manifest_log.blocks.count > 0);
493
- }
494
-
495
- assert(manifest_log.entry_count < schema.ManifestNode.entry_count_max);
496
- assert(manifest_log.blocks.count - manifest_log.blocks_closed == 1);
497
-
498
- log.debug(
499
- "{}: {s}: level={} tree={} checksum={x:0>32} address={} snapshot={}..{}",
500
- .{
501
- manifest_log.superblock.replica_index.?,
502
- @tagName(table.label.event),
503
- table.label.level,
504
- table.tree_id,
505
- table.checksum,
506
- table.address,
507
- table.snapshot_min,
508
- table.snapshot_max,
509
- },
510
- );
511
-
512
- const block: BlockPtr = manifest_log.blocks.tail().?;
513
- const entry = manifest_log.entry_count;
514
- block_builder_schema.tables(block)[entry] = table.*;
515
-
516
- const block_header =
517
- mem.bytesAsValue(vsr.Header.Block, block[0..@sizeOf(vsr.Header)]);
518
- const block_address = block_header.address;
519
-
520
- switch (table.label.event) {
521
- .reserved => unreachable,
522
- .insert,
523
- .update,
524
- => {
525
- const extent = manifest_log.table_extents.getOrPutAssumeCapacity(table.address);
526
- if (!extent.found_existing) {
527
- assert(table.label.event == .insert);
528
-
529
- // When inserting, check that the insertion didn't cause the number of
530
- // tables to exceed `forest_table_count_max`.
531
- manifest_log.check_tables_count();
532
- } else {
533
- maybe(table.label.event == .insert); // (Compaction.)
534
- }
535
- extent.value_ptr.* = .{ .block = block_address, .entry = entry };
536
- },
537
- .remove => assert(manifest_log.table_extents.remove(table.address)),
538
- }
539
-
540
- manifest_log.entry_count += 1;
541
- if (manifest_log.entry_count == schema.ManifestNode.entry_count_max) {
542
- manifest_log.close_block();
543
- assert(manifest_log.entry_count == 0);
544
- }
545
- }
546
-
547
- fn check_tables_count(manifest_log: *ManifestLog) void {
548
- const tables_count = manifest_log.table_extents.count();
549
- if (tables_count > manifest_log.forest_table_count_max) {
550
- vsr.fatal(
551
- .forest_tables_count_would_exceed_limit,
552
- "forest_tables_count would exceed limit " ++
553
- "(tables_count={} tables_max={}) - " ++
554
- "please contact the team directly who will be able to assist",
555
- .{ tables_count, manifest_log.forest_table_count_max },
556
- );
557
- }
558
- }
559
-
560
- fn flush(manifest_log: *ManifestLog, callback: Callback) void {
561
- assert(manifest_log.opened);
562
- assert(!manifest_log.reading);
563
- assert(!manifest_log.writing);
564
- assert(manifest_log.write_callback == null);
565
-
566
- log.debug("{}: flush: writing {} block(s)", .{
567
- manifest_log.superblock.replica_index.?,
568
- manifest_log.blocks_closed,
569
- });
570
-
571
- manifest_log.writing = true;
572
- manifest_log.write_callback = callback;
573
-
574
- for (0..manifest_log.blocks_closed) |_| manifest_log.write_block();
575
- assert(manifest_log.blocks_closed == manifest_log.writes_pending);
576
-
577
- if (manifest_log.writes_pending == 0) {
578
- manifest_log.grid.on_next_tick(flush_next_tick_callback, &manifest_log.next_tick);
579
- }
580
- }
581
-
582
- fn flush_next_tick_callback(next_tick: *Grid.NextTick) void {
583
- const manifest_log: *ManifestLog = @alignCast(@fieldParentPtr("next_tick", next_tick));
584
- assert(manifest_log.writing);
585
-
586
- manifest_log.flush_done();
587
- }
588
-
589
- fn flush_done(manifest_log: *ManifestLog) void {
590
- assert(manifest_log.writing);
591
- assert(manifest_log.write_callback != null);
592
- assert(manifest_log.blocks_closed == 0);
593
-
594
- const callback = manifest_log.write_callback.?;
595
- manifest_log.write_callback = null;
596
- manifest_log.writing = false;
597
- callback(manifest_log);
598
- }
599
-
600
- fn write_block(manifest_log: *ManifestLog) void {
601
- assert(manifest_log.opened);
602
- assert(manifest_log.writing);
603
- assert(manifest_log.blocks_closed > 0);
604
- assert(manifest_log.blocks_closed <= manifest_log.blocks.count);
605
- assert(manifest_log.writes_pending < manifest_log.blocks_closed);
606
-
607
- const block_index = manifest_log.writes_pending;
608
- const block = manifest_log.blocks.get_ptr(block_index).?;
609
- verify_block(block.*, null, null);
610
-
611
- const block_schema = schema.ManifestNode.from(block.*);
612
- assert(block_schema.entry_count > 0);
613
-
614
- const header = schema.header_from_block(block.*);
615
- assert(header.address > 0);
616
-
617
- if (block_index == manifest_log.blocks_closed - 1) {
618
- // This might be the last block of a checkpoint, which can be a partial block.
619
- assert(block_schema.entry_count <= schema.ManifestNode.entry_count_max);
620
- } else {
621
- assert(block_schema.entry_count == schema.ManifestNode.entry_count_max);
622
- }
623
-
624
- log.debug("{}: write_block: checksum={x:0>32} address={} entries={}", .{
625
- manifest_log.superblock.replica_index.?,
626
- header.checksum,
627
- header.address,
628
- block_schema.entry_count,
629
- });
630
-
631
- const write = &manifest_log.writes[block_index];
632
- write.* = .{ .manifest_log = manifest_log };
633
-
634
- manifest_log.writes_pending += 1;
635
- manifest_log.grid.create_block(write_block_callback, &write.write, block);
636
- }
637
-
638
- fn write_block_callback(grid_write: *Grid.Write) void {
639
- const write: *Write = @fieldParentPtr("write", grid_write);
640
- const manifest_log = write.manifest_log;
641
- assert(manifest_log.opened);
642
- assert(manifest_log.writing);
643
- assert(manifest_log.blocks_closed <= manifest_log.blocks.count);
644
-
645
- manifest_log.writes_pending -= 1;
646
-
647
- if (manifest_log.writes_pending == 0) {
648
- for (0..manifest_log.blocks_closed) |_| manifest_log.blocks.advance_head();
649
- manifest_log.blocks_closed = 0;
650
-
651
- if (manifest_log.blocks.count == 0) {
652
- assert(manifest_log.entry_count == 0);
653
- } else {
654
- assert(manifest_log.blocks.count == 1);
655
- assert(manifest_log.entry_count < schema.ManifestNode.entry_count_max);
656
- }
657
-
658
- manifest_log.flush_done();
659
- }
660
- }
661
-
662
- /// `compact` does not close a partial block; that is only necessary during `checkpoint`.
663
- ///
664
- /// The (production) block size is large, so the number of blocks compacted per half-bar is
665
- /// relatively small (e.g. ~4). We read them in sequence rather than parallel to spread the
666
- /// work more evenly across the half-bar's beats.
667
- // TODO Make sure block reservation cannot fail — before compaction begins verify that
668
- // enough free blocks are available for all reservations.
669
- pub fn compact(manifest_log: *ManifestLog, callback: Callback, op: u64) void {
670
- assert(manifest_log.opened);
671
- assert(!manifest_log.reading);
672
- assert(!manifest_log.writing);
673
- assert(manifest_log.read_callback == null);
674
- assert(manifest_log.write_callback == null);
675
- assert(manifest_log.grid_reservation == null);
676
- assert(manifest_log.blocks.count ==
677
- manifest_log.blocks_closed + @intFromBool(manifest_log.entry_count > 0));
678
- assert(manifest_log.compact_blocks == null);
679
- assert(op >= constants.lsm_compaction_ops);
680
- assert(!manifest_log.superblock.working.vsr_state.op_compacted(op));
681
-
682
- // TODO: Currently manifest compaction is hardcoded to run on the last beat of each
683
- // half-bar.
684
- // This is because otherwise it would mess with our grid reserve / forfeit ordering,
685
- // since we now reserve / forfeit per beat.
686
- assert((op + 1) % @divExact(constants.lsm_compaction_ops, 2) == 0);
687
-
688
- manifest_log.grid.trace.start(.compact_manifest);
689
-
690
- manifest_log.compact_blocks = @min(
691
- manifest_log.pace.half_bar_compact_blocks(.{
692
- .log_blocks_count = @intCast(manifest_log.log_block_checksums.count),
693
- .tables_count = manifest_log.table_extents.count(),
694
- }),
695
- // Never compact closed blocks. (They haven't even been written yet.)
696
- manifest_log.log_block_checksums.count - manifest_log.blocks_closed,
697
- );
698
- assert(manifest_log.compact_blocks.? <= manifest_log.pace.half_bar_compact_blocks_max);
699
-
700
- manifest_log.grid_reservation = manifest_log.grid.reserve(
701
- manifest_log.compact_blocks.? +
702
- manifest_log.pace.half_bar_append_blocks_max,
703
- );
704
-
705
- manifest_log.read_callback = callback;
706
- manifest_log.flush(compact_next_block);
707
- }
708
-
709
- fn compact_next_block(manifest_log: *ManifestLog) void {
710
- assert(manifest_log.opened);
711
- assert(!manifest_log.reading);
712
- assert(!manifest_log.writing);
713
- assert(manifest_log.read_callback != null);
714
- assert(manifest_log.grid_reservation != null);
715
-
716
- const compact_blocks = manifest_log.compact_blocks.?;
717
- if (compact_blocks == 0) {
718
- manifest_log.compact_done_callback();
719
- } else {
720
- const oldest_checksum = manifest_log.log_block_checksums.head().?;
721
- const oldest_address = manifest_log.log_block_addresses.head().?;
722
- assert(oldest_address > 0);
723
-
724
- manifest_log.compact_blocks.? -= 1;
725
- manifest_log.reading = true;
726
- manifest_log.grid.read_block(
727
- .{ .from_local_or_global_storage = compact_read_block_callback },
728
- &manifest_log.read,
729
- oldest_address,
730
- oldest_checksum,
731
- .{ .cache_read = true, .cache_write = true },
732
- );
733
- }
734
- }
735
-
736
- fn compact_read_block_callback(read: *Grid.Read, block: BlockPtrConst) void {
737
- const manifest_log: *ManifestLog = @fieldParentPtr("read", read);
738
- assert(manifest_log.opened);
739
- assert(manifest_log.reading);
740
- assert(!manifest_log.writing);
741
- assert(manifest_log.read_callback != null);
742
- assert(manifest_log.grid_reservation != null);
743
-
744
- const oldest_checksum = manifest_log.log_block_checksums.pop().?;
745
- const oldest_address = manifest_log.log_block_addresses.pop().?;
746
- verify_block(block, oldest_checksum, oldest_address);
747
-
748
- const block_schema = schema.ManifestNode.from(block);
749
- assert(block_schema.entry_count > 0);
750
- assert(block_schema.entry_count <= schema.ManifestNode.entry_count_max);
751
-
752
- var frees: u32 = 0;
753
- for (
754
- block_schema.tables_const(block),
755
- 0..block_schema.entry_count,
756
- ) |*table, entry_index| {
757
- const entry: u32 = @intCast(entry_index);
758
- switch (table.label.event) {
759
- .reserved => unreachable,
760
- // Append the table, updating the table extent:
761
- .insert,
762
- .update,
763
- => {
764
- // Update the extent if the table is the latest version.
765
- // We must iterate entries in forward order to drop the extent here.
766
- // Otherwise, stale versions earlier in the block may reappear.
767
- if (std.meta.eql(
768
- manifest_log.table_extents.get(table.address),
769
- .{ .block = oldest_address, .entry = entry },
770
- )) {
771
- // Append the table, updating the table extent:
772
- manifest_log.append_internal(table);
773
- } else {
774
- // Either:
775
- // - This is not the latest insert for this table, so it can be dropped.
776
- // - The table was removed some time after this insert.
777
- frees += 1;
778
- }
779
- },
780
- // Since we compact oldest blocks first, we know that we have already
781
- // compacted all inserts that were eclipsed by this remove, so this remove
782
- // can now be safely dropped.
783
- .remove => frees += 1,
784
- }
785
- }
786
-
787
- log.debug("{}: compacted: checksum={x:0>32} address={} free={}/{}", .{
788
- manifest_log.superblock.replica_index.?,
789
- oldest_checksum,
790
- oldest_address,
791
- frees,
792
- block_schema.entry_count,
793
- });
794
-
795
- // Blocks are compacted in sequence – not skipped, even if no entries will be freed.
796
- // (That should be rare though, since blocks are large.)
797
- // This is necessary to update the block's "previous block" pointer in the header.
798
- maybe(frees == 0);
799
- assert(manifest_log.blocks_closed <= manifest_log.pace.half_bar_compact_blocks_max);
800
-
801
- manifest_log.grid.release(&.{oldest_address});
802
- manifest_log.reading = false;
803
-
804
- manifest_log.compact_next_block();
805
- }
806
-
807
- fn compact_done_callback(manifest_log: *ManifestLog) void {
808
- assert(manifest_log.opened);
809
- assert(!manifest_log.reading);
810
- assert(!manifest_log.writing);
811
- assert(manifest_log.blocks_closed <= manifest_log.pace.half_bar_compact_blocks_max);
812
- assert(manifest_log.read_callback != null);
813
- assert(manifest_log.grid_reservation != null);
814
- assert(manifest_log.compact_blocks.? == 0);
815
-
816
- manifest_log.grid.trace.stop(.compact_manifest);
817
-
818
- const callback = manifest_log.read_callback.?;
819
- manifest_log.read_callback = null;
820
- manifest_log.compact_blocks = null;
821
-
822
- callback(manifest_log);
823
- }
824
-
825
- pub fn compact_end(manifest_log: *ManifestLog) void {
826
- assert(manifest_log.opened);
827
- assert(!manifest_log.reading);
828
- assert(!manifest_log.writing);
829
- assert(manifest_log.read_callback == null);
830
- assert(manifest_log.write_callback == null);
831
-
832
- if (manifest_log.grid_reservation) |reservation| {
833
- manifest_log.grid.forfeit(reservation);
834
- manifest_log.grid_reservation = null;
835
- } else {
836
- // Compaction was skipped for this half-bar.
837
- assert(manifest_log.entry_count == 0);
838
- assert(manifest_log.blocks.count == 0);
839
- assert(manifest_log.blocks_closed == 0);
840
- }
841
- }
842
-
843
- pub fn checkpoint(manifest_log: *ManifestLog, callback: Callback) void {
844
- assert(manifest_log.opened);
845
- assert(!manifest_log.reading);
846
- assert(!manifest_log.writing);
847
- assert(manifest_log.write_callback == null);
848
- assert(manifest_log.grid_reservation == null);
849
-
850
- if (manifest_log.entry_count > 0) {
851
- manifest_log.close_block();
852
- assert(manifest_log.entry_count == 0);
853
- assert(manifest_log.blocks_closed > 0);
854
- }
855
- assert(manifest_log.blocks_closed == manifest_log.blocks.count);
856
-
857
- manifest_log.flush(callback);
858
- }
859
-
860
- pub fn checkpoint_references(
861
- manifest_log: *const ManifestLog,
862
- ) vsr.SuperBlockManifestReferences {
863
- assert(manifest_log.opened);
864
- assert(!manifest_log.reading);
865
- assert(!manifest_log.writing);
866
- assert(manifest_log.write_callback == null);
867
- assert(manifest_log.grid_reservation == null);
868
- assert(manifest_log.log_block_checksums.count ==
869
- manifest_log.log_block_addresses.count);
870
- assert(manifest_log.blocks.count == 0);
871
- assert(manifest_log.blocks_closed == 0);
872
- assert(manifest_log.entry_count == 0);
873
-
874
- if (manifest_log.log_block_addresses.count == 0) {
875
- return std.mem.zeroes(vsr.SuperBlockManifestReferences);
876
- } else {
877
- return .{
878
- .oldest_checksum = manifest_log.log_block_checksums.head().?,
879
- .oldest_address = manifest_log.log_block_addresses.head().?,
880
- .newest_checksum = manifest_log.log_block_checksums.tail().?,
881
- .newest_address = manifest_log.log_block_addresses.tail().?,
882
- .block_count = @intCast(manifest_log.log_block_addresses.count),
883
- };
884
- }
885
- }
886
-
887
- fn acquire_block(manifest_log: *ManifestLog) void {
888
- assert(manifest_log.opened);
889
- maybe(manifest_log.reading);
890
- assert(!manifest_log.writing);
891
- assert(manifest_log.entry_count == 0);
892
- assert(manifest_log.log_block_checksums.count ==
893
- manifest_log.log_block_addresses.count);
894
- assert(manifest_log.blocks.count == manifest_log.blocks_closed);
895
- assert(!manifest_log.blocks.full());
896
-
897
- manifest_log.blocks.advance_tail();
898
-
899
- const block: BlockPtr = manifest_log.blocks.tail().?;
900
- // The ManifestLog acquires block addresses eagerly here, rather than deferring until
901
- // close_block(). This is because the open block's address must be inserted into
902
- // `table_extents` at the same time the entry is appended to the open block.
903
- const block_address = manifest_log.grid.acquire(manifest_log.grid_reservation.?);
904
-
905
- const header = mem.bytesAsValue(vsr.Header.Block, block[0..@sizeOf(vsr.Header)]);
906
- header.* = .{
907
- .cluster = manifest_log.superblock.working.cluster,
908
- .address = block_address,
909
- .snapshot = 0, // TODO(snapshots): Set this properly; it is useful for debugging.
910
- .size = undefined,
911
- .command = .block,
912
- .release = manifest_log.superblock.working.vsr_state.checkpoint.release,
913
- .metadata_bytes = undefined, // Set by close_block().
914
- .block_type = .manifest,
915
- };
916
- }
917
-
918
- fn close_block(manifest_log: *ManifestLog) void {
919
- assert(manifest_log.opened);
920
- maybe(manifest_log.reading);
921
- assert(!manifest_log.writing);
922
- assert(manifest_log.blocks.count == manifest_log.blocks_closed + 1);
923
- assert(manifest_log.log_block_checksums.count <
924
- manifest_log.log_block_checksums.buffer.len);
925
-
926
- const block: BlockPtr = manifest_log.blocks.tail().?;
927
- const entry_count = manifest_log.entry_count;
928
- assert(entry_count > 0);
929
- assert(entry_count <= schema.ManifestNode.entry_count_max);
930
-
931
- const block_schema = schema.ManifestNode{ .entry_count = entry_count };
932
- const header = mem.bytesAsValue(vsr.Header.Block, block[0..@sizeOf(vsr.Header)]);
933
- assert(header.cluster == manifest_log.superblock.working.cluster);
934
- assert(header.command == .block);
935
- assert(header.address > 0);
936
- header.size = block_schema.size();
937
-
938
- const newest_checksum = manifest_log.log_block_checksums.tail() orelse 0;
939
- const newest_address = manifest_log.log_block_addresses.tail() orelse 0;
940
- header.metadata_bytes = @bitCast(schema.ManifestNode.Metadata{
941
- .previous_manifest_block_checksum = newest_checksum,
942
- .previous_manifest_block_address = newest_address,
943
- .entry_count = entry_count,
944
- });
945
-
946
- // Zero padding:
947
- @memset(block[header.size..], 0);
948
-
949
- header.set_checksum_body(block[@sizeOf(vsr.Header)..header.size]);
950
- header.set_checksum();
951
- verify_block(block, null, null);
952
-
953
- manifest_log.log_block_checksums.push_assume_capacity(header.checksum);
954
- manifest_log.log_block_addresses.push_assume_capacity(header.address);
955
-
956
- log.debug("{}: close_block: checksum={x:0>32} address={} entries={}/{}", .{
957
- manifest_log.superblock.replica_index.?,
958
- header.checksum,
959
- header.address,
960
- entry_count,
961
- schema.ManifestNode.entry_count_max,
962
- });
963
-
964
- manifest_log.blocks_closed += 1;
965
- manifest_log.entry_count = 0;
966
- assert(manifest_log.blocks.count == manifest_log.blocks_closed);
967
- }
968
-
969
- fn verify_block(block: BlockPtrConst, checksum: ?u128, address: ?u64) void {
970
- {
971
- const frame = std.mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
972
- assert(frame.valid_checksum());
973
- assert(frame.valid_checksum_body(block[@sizeOf(vsr.Header)..frame.size]));
974
- }
975
-
976
- const header = schema.header_from_block(block);
977
- assert(header.block_type == .manifest);
978
-
979
- assert(address == null or header.address == address.?);
980
- assert(checksum == null or header.checksum == checksum.?);
981
-
982
- const block_schema = schema.ManifestNode.from(block);
983
- assert(block_schema.entry_count > 0);
984
- assert(block_schema.entry_count <= schema.ManifestNode.entry_count_max);
985
- }
986
- };
987
- }
988
-
989
- /// The goals of manifest log compaction are (in no particular order):
990
- ///
991
- /// 1. Free enough manifest blocks such that there are always enough free slots in the manifest
992
- /// log checksums/addresses ring buffers to accommodate the appends by table compaction.
993
- /// 2. Shrink the manifest log: A smaller manifest means that fewer blocks need to be replayed
994
- /// during recovery, or repaired during state sync.
995
- /// 3. Don't shrink the manifest too much: The more manifest compaction work is deferred, the more
996
- /// "efficient" compaction is. Put another way: deferring manifest compaction means that more
997
- /// entries are freed per block compacted.
998
- /// 4. Spread compaction work evenly between half-bars, to avoid latency spikes.
999
- ///
1000
- /// To address goal 1, we must (on average) "remove" as many blocks from the manifest log as we add.
1001
- /// But when we compact a block, only a subset of its entries can be freed/dropped – the remainder
1002
- /// must be re-appended to the manifest log.
1003
- ///
1004
- /// The upper-bound number of manifest blocks is related to the rate at which we compact blocks.
1005
- /// Put simply, the more compaction work we do, the smaller the upper bound.
1006
- ///
1007
- ///
1008
- /// To reason about this relation mathematically, and compute the upper-bound number of manifest
1009
- /// blocks in terms of the compaction rate:
1010
- ///
1011
- /// - Let `A` be the maximum number of manifest blocks that may be created by any single half-bar
1012
- /// due to appends via table compaction. (In other words, `A` does not count manifest compaction.)
1013
- /// - Let `T` be the minimum number of manifest blocks to hold `table_count_max` tables (inserts).
1014
- /// - Let `C` be the maximum number of manifest blocks to compact (i.e. read) during any half-bar.
1015
- /// - In the worst case, compacting a block frees no entries.
1016
- /// - (Then `C` is also the worst-case number of manifest blocks *written* due to manifest
1017
- /// compaction during each half-bar.)
1018
- ///
1019
- /// Suppose that at a certain point in time `t₀`, there are `M₀` manifest blocks total.
1020
- ///
1021
- /// If we compact at least `C` manifest blocks for each of `⌈M₀/C⌉` half-bars, then any of the
1022
- /// initial `M₀` manifest blocks that required compaction at time `t₀` have been compacted.
1023
- /// In the worst case (where all of those `M₀` blocks were full of live entries) we now have as
1024
- /// many as `M₁ = min(M₀,T) + A×⌈M₀/C⌉` manifest blocks:
1025
- ///
1026
- /// - `min(M₀,T)`: After compacting the original `M₀` blocks, we may produce as many as `M₀`
1027
- /// blocks (if no entries were freed). But if there are more than `T` blocks then some *must* be
1028
- /// dropped, since `T` is the upper-bound of a fully-compacted manifest.
1029
- /// - `⌈M₀/C⌉` is the number of half-bars that it takes to compact the initial `M₀` manifest
1030
- /// blocks.
1031
- /// - `A×⌈M₀/C⌉` is the maximum number of manifest blocks produced by table compaction while
1032
- /// compacting the original `M₀` manifest blocks.
1033
- ///
1034
- /// If we cycle again, starting with `M₁` manifest blocks this time, then at the end of the cycle
1035
- /// there are at most `M₂ = min(M₁,T) + A×⌈M₁/C⌉` manifest blocks.
1036
- ///
1037
- /// To generalize, at the beginning of any cycle `c`, the maximum number of manifest blocks
1038
- /// (`MC(c)`) is:
1039
- ///
1040
- /// MC(c) = min(T, MC(c-1)) + A×⌈MC(c-1)/C⌉
1041
- ///
1042
- ///
1043
- /// However, *within* a cycle the manifest block count may "burst" temporarily beyond this limit.
1044
- /// We compact chronologically. If the blocks early in the manifest have no/few free entries, we
1045
- /// must still compact them anyway, shifting their entries from the prefix of the log to its suffix.
1046
- /// During that time, the table-compact appends still occur, so the net manifest log size grows.
1047
- ///
1048
- /// The lower-bound for the number of blocks freed (`F(k)`) in terms of the number of blocks
1049
- /// compacted (`k`) is:
1050
- ///
1051
- /// F(k) ≥ max(0, k - (T + 1))
1052
- ///
1053
- /// In other words:
1054
- /// - After compacting `T` or fewer blocks, we may not have freed any whole blocks.
1055
- /// - After compacting `T+1` blocks, we must have freed at least 1 whole block.
1056
- /// - After compacting `T+2` blocks, we must have freed at least 2 whole blocks.
1057
- /// - Etc.
1058
- ///
1059
- /// Then the upper-bound number of manifest blocks (`MB(b)`) at any half-bar boundary (`b`) is:
1060
- ///
1061
- /// MB(b) = min(T, MB(b-1)) + A×⌈M(b-1)/C⌉ + A×⌈(T+1)/C⌉
1062
- ///
1063
- /// As `b` approaches infinity, this recurrence relation converges (iff `C > A`) to the absolute
1064
- /// upper-bound number of manifest blocks.
1065
- ///
1066
- /// As `C` increases (relative to `A`), the manifest block upper-bound decreases, but the amount of
1067
- /// compaction work performed increases.
1068
- ///
1069
- /// If, for any half-bar that the manifest log contains at least `MC(∞)` blocks we compact at least
1070
- /// `C` blocks, then the total size of the manifest log will never exceed `MB(∞)` blocks.
1071
- ///
1072
- /// NOTE: Both the algorithm above and the implementation below make several simplifications:
1073
- ///
1074
- /// - The calculation is performed at the granularity of blocks, not entries. In particular, this
1075
- /// means that "A" might in truth be fractional, but we would round up. For example, if "A" is
1076
- /// 2.1, for the purposes of the upper-bound it is 3. Because `C` is computed (below) as
1077
- /// "A + compact_extra_blocks", the result is that we perform more compaction (relative to
1078
- /// appends) than the block-granular constants indicate.
1079
- /// As a result, we overestimate the upper-bound (or, equivalently, perform compaction more
1080
- /// quickly than strictly necessary).
1081
- /// - The calculation does *not* consider the "padding" appends in to a partial block written
1082
- /// during a checkpoint. This oversight is masked because "A" is overestimated (see previous
1083
- /// bullet).
1084
- ///
1085
- pub const Pace = struct {
1086
- /// "A":
1087
- /// The maximum number of manifest blocks appended during a single half-bar by table appends.
1088
- ///
1089
- /// This counts:
1090
- /// - Input tables are updated in the manifest (snapshot_max is reduced).
1091
- /// - Input tables are removed from the manifest (if not held by a persistent snapshot).
1092
- /// - Output tables are inserted into the manifest.
1093
- /// This does *not* count:
1094
- /// - Manifest log compaction.
1095
- /// - Releasing persistent snapshots.
1096
- half_bar_append_blocks_max: u32,
1097
-
1098
- /// "C":
1099
- /// The maximum number of manifest blocks to compact (i.e. read) during a single half-bar.
1100
- half_bar_compact_blocks_max: u32,
1101
-
1102
- /// "T":
1103
- /// The maximum number of blocks in a fully-compacted manifest.
1104
- /// (Exposed by the struct only for the purpose of logging.)
1105
- log_blocks_full_max: u64,
1106
-
1107
- /// "limit of MC(c) as c approaches ∞"
1108
- log_blocks_cycle_max: u64,
1109
- /// "limit of MB(b) as b approaches ∞"
1110
- log_blocks_max: u64,
1111
-
1112
- tables_max: u32,
1113
-
1114
- comptime {
1115
- const log_pace = false;
1116
- if (log_pace) {
1117
- const pace = Pace.init(.{
1118
- .tree_count = 24,
1119
- .tables_max = 2_300_000,
1120
- .compact_extra_blocks = constants.lsm_manifest_compact_extra_blocks,
1121
- });
1122
-
1123
- for (std.meta.fields(Pace)) |pace_field| {
1124
- @compileLog(std.fmt.comptimePrint("ManifestLog.Pace.{s} = {d}", .{
1125
- pace_field.name,
1126
- @field(pace, pace_field.name),
1127
- }));
1128
- }
1129
- }
1130
- }
1131
-
1132
- pub fn init(options: struct {
1133
- tree_count: u32,
1134
- tables_max: u32,
1135
- compact_extra_blocks: u32,
1136
- }) Pace {
1137
- assert(options.tree_count > 0);
1138
- assert(options.tables_max > 0);
1139
- assert(options.tables_max > options.tree_count);
1140
- assert(options.compact_extra_blocks > 0);
1141
-
1142
- const block_entries_max = schema.ManifestNode.entry_count_max;
1143
-
1144
- const half_bar_append_entries_max = options.tree_count *
1145
- stdx.div_ceil(constants.lsm_levels, 2) * // Maximum number of compactions/half-bar.
1146
- (compaction.compaction_tables_input_max + // Update snapshot_max.
1147
- compaction.compaction_tables_input_max + // Remove.
1148
- compaction.compaction_tables_output_max); // Insert.
1149
-
1150
- // "A":
1151
- const half_bar_append_blocks_max =
1152
- stdx.div_ceil(half_bar_append_entries_max, block_entries_max);
1153
-
1154
- const half_bar_compact_blocks_extra = options.compact_extra_blocks;
1155
- assert(half_bar_compact_blocks_extra > 0);
1156
-
1157
- // "C":
1158
- const half_bar_compact_blocks_max =
1159
- half_bar_append_blocks_max + half_bar_compact_blocks_extra;
1160
- assert(half_bar_compact_blocks_max > half_bar_append_blocks_max);
1161
-
1162
- // "T":
1163
- const log_blocks_full_max = stdx.div_ceil(options.tables_max, block_entries_max);
1164
- assert(log_blocks_full_max > 0);
1165
-
1166
- // "limit of MC(c) as c approaches ∞":
1167
- // Working out this recurrence relation's limit with a closed-form solution is complicated.
1168
- // Just compute the limit iteratively instead. (1024 is an arbitrary safety counter.)
1169
- var log_blocks_before: u32 = 0;
1170
- const log_blocks_cycle_max = for (0..1024) |_| {
1171
- const log_blocks_after =
1172
- log_blocks_full_max +
1173
- half_bar_append_blocks_max *
1174
- stdx.div_ceil(log_blocks_before, half_bar_compact_blocks_max);
1175
-
1176
- if (log_blocks_before == log_blocks_after) {
1177
- break log_blocks_after;
1178
- }
1179
- log_blocks_before = log_blocks_after;
1180
- } else {
1181
- // If the value does not converge within the given number of steps,
1182
- // constants.lsm_manifest_compact_blocks_extra should probably be raised.
1183
- @panic("ManifestLog.Pace.log_blocks_cycle_max: no convergence");
1184
- };
1185
-
1186
- const log_blocks_burst_max = half_bar_append_blocks_max *
1187
- stdx.div_ceil(log_blocks_full_max + 1, half_bar_compact_blocks_max);
1188
-
1189
- // "limit of MB(b) as b approaches ∞":
1190
- const log_blocks_max = log_blocks_cycle_max + log_blocks_burst_max;
1191
-
1192
- assert(log_blocks_cycle_max > log_blocks_full_max);
1193
- assert(log_blocks_cycle_max < log_blocks_max);
1194
-
1195
- return .{
1196
- .half_bar_append_blocks_max = half_bar_append_blocks_max,
1197
- .half_bar_compact_blocks_max = half_bar_compact_blocks_max,
1198
- .log_blocks_full_max = log_blocks_full_max,
1199
- .log_blocks_max = log_blocks_max,
1200
- .log_blocks_cycle_max = log_blocks_cycle_max,
1201
- .tables_max = options.tables_max,
1202
- };
1203
- }
1204
-
1205
- fn half_bar_compact_blocks(pace: Pace, options: struct {
1206
- /// The number of manifest blocks that *currently* exist.
1207
- log_blocks_count: u32,
1208
- /// The number of live tables.
1209
- tables_count: u32,
1210
- }) u32 {
1211
- assert(options.tables_count <= pace.tables_max);
1212
-
1213
- // Pretend we have an extra half_bar_append_blocks_max blocks so that we always switch to
1214
- // the maximum compaction rate before we exceed the cycle-max.
1215
- if (pace.log_blocks_cycle_max <=
1216
- options.log_blocks_count + pace.half_bar_append_blocks_max)
1217
- {
1218
- return pace.half_bar_compact_blocks_max;
1219
- }
1220
-
1221
- // We have enough free manifest blocks that we could go a whole "cycle" without
1222
- // compacting any. It doesn't strictly matter how much compaction we do in this case, so
1223
- // just try to pace the work evenly, maintaining a constant load factor with respect to
1224
- // the cycle-max.
1225
-
1226
- // Our "target" block count extrapolates a log block count from our table count and the
1227
- // log's maximum load factor.
1228
- const log_blocks_target = @max(1, @divFloor(
1229
- pace.log_blocks_cycle_max * options.tables_count,
1230
- pace.tables_max,
1231
- ));
1232
-
1233
- return @min(
1234
- pace.half_bar_compact_blocks_max,
1235
- @divFloor(
1236
- pace.half_bar_compact_blocks_max * options.log_blocks_count,
1237
- log_blocks_target,
1238
- ),
1239
- );
1240
- }
1241
- };