tigerbeetle 0.0.36 → 0.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/platforms.rb +9 -0
  245. data/lib/tigerbeetle/version.rb +2 -2
  246. data/tigerbeetle.gemspec +22 -5
  247. metadata +242 -3
  248. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,1092 @@
1
+ const std = @import("std");
2
+ const builtin = @import("builtin");
3
+ const assert = std.debug.assert;
4
+
5
+ const log = std.log.scoped(.tb_client_context);
6
+
7
+ const vsr = @import("../tb_client.zig").vsr;
8
+
9
+ const constants = vsr.constants;
10
+ const stdx = vsr.stdx;
11
+ const maybe = stdx.maybe;
12
+ const Header = vsr.Header;
13
+
14
+ const MultiBatchDecoder = vsr.multi_batch.MultiBatchDecoder;
15
+ const MultiBatchEncoder = vsr.multi_batch.MultiBatchEncoder;
16
+
17
+ const IO = vsr.io.IO;
18
+ const TimeOS = vsr.time.TimeOS;
19
+ const message_pool = vsr.message_pool;
20
+
21
+ const MessagePool = message_pool.MessagePool;
22
+ const Message = MessagePool.Message;
23
+ const Packet = @import("packet.zig").Packet;
24
+ const Signal = @import("signal.zig").Signal;
25
+
26
+ const KiB = stdx.KiB;
27
+
28
+ const io_thread_stack_size = 512 * KiB;
29
+
30
+ pub const InitParameters = extern struct {
31
+ cluster_id: u128,
32
+ client_id: u128,
33
+ addresses_ptr: [*]const u8,
34
+ addresses_len: u64,
35
+ };
36
+
37
+ /// Thread-safe client interface allocated by the user.
38
+ /// Contains the `VTable` with function pointers to the StateMachine-specific implementation
39
+ /// and the synchronization status.
40
+ /// Safe to call from multiple threads, even after `deinit` is called.
41
+ pub const ClientInterface = extern struct {
42
+ pub const Error = error{ClientInvalid};
43
+ pub const VTable = struct {
44
+ submit_fn: *const fn (*anyopaque, *Packet.Extern) void,
45
+ completion_context_fn: *const fn (*anyopaque) usize,
46
+ deinit_fn: *const fn (*anyopaque) void,
47
+ init_parameters_fn: *const fn (*anyopaque, *InitParameters) void,
48
+ };
49
+
50
+ /// Magic number used as a tag, preventing the use of uninitialized pointers.
51
+ const beetle: u64 = 0xBEE71E;
52
+
53
+ // Since the client interface is an intrusive struct allocated by the user,
54
+ // it is exported as an opaque `[_]u64` array.
55
+ // An `extern union` is used to ensure a platform-independent size for pointer fields,
56
+ // avoiding the need for different versions of `tb_client.h` on 32-bit targets.
57
+
58
+ context: extern union {
59
+ ptr: ?*anyopaque,
60
+ int_ptr: u64,
61
+ },
62
+ vtable: extern union {
63
+ ptr: *const VTable,
64
+ int_ptr: u64,
65
+ },
66
+ locker: Locker,
67
+ reserved: u32,
68
+ magic_number: u64,
69
+
70
+ pub fn init(self: *ClientInterface, context: *anyopaque, vtable: *const VTable) void {
71
+ self.* = .{
72
+ .context = .{ .ptr = context },
73
+ .vtable = .{ .ptr = vtable },
74
+ .locker = .{},
75
+ .reserved = 0,
76
+ .magic_number = 0,
77
+ };
78
+ }
79
+
80
+ pub fn submit(client: *ClientInterface, packet: *Packet.Extern) Error!void {
81
+ if (client.magic_number != beetle) return Error.ClientInvalid;
82
+ assert(client.reserved == 0);
83
+
84
+ client.locker.lock();
85
+ defer client.locker.unlock();
86
+
87
+ const context = client.context.ptr orelse return Error.ClientInvalid;
88
+ client.vtable.ptr.submit_fn(context, packet);
89
+ }
90
+
91
+ pub fn completion_context(client: *ClientInterface) Error!usize {
92
+ if (client.magic_number != beetle) return Error.ClientInvalid;
93
+ assert(client.reserved == 0);
94
+
95
+ client.locker.lock();
96
+ defer client.locker.unlock();
97
+
98
+ const context = client.context.ptr orelse return Error.ClientInvalid;
99
+ return client.vtable.ptr.completion_context_fn(context);
100
+ }
101
+
102
+ pub fn deinit(client: *ClientInterface) Error!void {
103
+ if (client.magic_number != beetle) return Error.ClientInvalid;
104
+ assert(client.reserved == 0);
105
+
106
+ const context: *anyopaque = context: {
107
+ client.locker.lock();
108
+ defer client.locker.unlock();
109
+
110
+ if (client.context.ptr == null) return Error.ClientInvalid;
111
+
112
+ defer client.context.ptr = null;
113
+
114
+ break :context client.context.ptr.?;
115
+ };
116
+ client.vtable.ptr.deinit_fn(context);
117
+ }
118
+
119
+ pub fn init_parameters(client: *ClientInterface, out_parameters: *InitParameters) Error!void {
120
+ if (client.magic_number != beetle) return Error.ClientInvalid;
121
+ assert(client.reserved == 0);
122
+
123
+ client.locker.lock();
124
+ defer client.locker.unlock();
125
+
126
+ const context = client.context.ptr orelse return Error.ClientInvalid;
127
+ return client.vtable.ptr.init_parameters_fn(context, out_parameters);
128
+ }
129
+
130
+ comptime {
131
+ assert(@sizeOf(ClientInterface) == 32);
132
+ assert(@alignOf(ClientInterface) == 8);
133
+ }
134
+ };
135
+
136
+ /// The function pointer called by the IO thread when a request is completed or fails.
137
+ /// The memory referenced by `result` is only valid for the duration of this callback.
138
+ /// `result_ptr` is `null` for unsuccessful requests. See `packet.status` for more details.
139
+ pub const CompletionCallback = *const fn (
140
+ context: usize,
141
+ packet: *Packet.Extern,
142
+ timestamp: u64,
143
+ result: ?[*]const u8,
144
+ result_size: u32,
145
+ ) callconv(.c) void;
146
+
147
+ pub const InitError = std.mem.Allocator.Error || error{
148
+ Unexpected,
149
+ AddressInvalid,
150
+ AddressLimitExceeded,
151
+ SystemResources,
152
+ NetworkSubsystemFailed,
153
+ };
154
+
155
+ /// Implements a `ClientInterface` with specialized `vsr.Client` and `StateMachine` types.
156
+ pub fn ContextType(
157
+ comptime Client: type,
158
+ ) type {
159
+ return struct {
160
+ const Context = @This();
161
+ const GPA = std.heap.GeneralPurposeAllocator(.{
162
+ .thread_safe = true,
163
+ });
164
+
165
+ const Operation = Client.Operation;
166
+ const allowed_operations = [_]Operation{
167
+ .create_accounts,
168
+ .create_transfers,
169
+ .lookup_accounts,
170
+ .lookup_transfers,
171
+ .get_account_transfers,
172
+ .get_account_balances,
173
+ .query_accounts,
174
+ .query_transfers,
175
+ .get_change_events,
176
+ };
177
+
178
+ const UserData = extern struct {
179
+ self: *Context,
180
+ packet: *Packet,
181
+
182
+ comptime {
183
+ assert(@sizeOf(UserData) == @sizeOf(u128));
184
+ }
185
+ };
186
+
187
+ const PacketError = error{
188
+ TooMuchData,
189
+ ClientShutdown,
190
+ ClientEvicted,
191
+ ClientReleaseTooLow,
192
+ ClientReleaseTooHigh,
193
+ InvalidOperation,
194
+ InvalidDataSize,
195
+ };
196
+
197
+ gpa: GPA,
198
+ time_os: TimeOS,
199
+ client_id: u128,
200
+ cluster_id: u128,
201
+ addresses_copy: []const u8,
202
+
203
+ addresses: stdx.BoundedArrayType(std.net.Address, constants.replicas_max),
204
+ io: IO,
205
+ message_pool: MessagePool,
206
+ client: Client,
207
+ batch_size_limit: ?u32,
208
+
209
+ completion_callback: CompletionCallback,
210
+ completion_context: usize,
211
+
212
+ interface: *ClientInterface,
213
+ submitted: Packet.Queue,
214
+ pending: Packet.Queue,
215
+
216
+ signal: Signal,
217
+ eviction_reason: ?vsr.Header.Eviction.Reason,
218
+ thread: std.Thread,
219
+
220
+ previous_request_instant: ?stdx.Instant = null,
221
+ previous_request_latency: ?stdx.Duration = null,
222
+
223
+ pub fn init(
224
+ root_allocator: std.mem.Allocator,
225
+ client_out: *ClientInterface,
226
+ cluster_id: u128,
227
+ addresses: []const u8,
228
+ completion_ctx: usize,
229
+ completion_callback: CompletionCallback,
230
+ ) InitError!void {
231
+ var context: *Context = context: {
232
+ // Wrap the root allocator - usually heap.c_allocator when built as a library - in
233
+ // a GPA to keep maximum compatibility while gaining the extra safety. As a library,
234
+ // libtbclient is running inside another process's address space.
235
+ var gpa = GPA{
236
+ .backing_allocator = root_allocator,
237
+ };
238
+ errdefer assert(gpa.deinit() == .ok);
239
+
240
+ const context = try gpa.allocator().create(Context);
241
+
242
+ // Moving the GPA is safe, since we don't have any live reference to `allocator`.
243
+ context.gpa = gpa;
244
+
245
+ break :context context;
246
+ };
247
+
248
+ errdefer {
249
+ var gpa: GPA = context.gpa;
250
+ gpa.allocator().destroy(context);
251
+ assert(gpa.deinit() == .ok);
252
+ }
253
+
254
+ const allocator = context.gpa.allocator();
255
+ context.client_id = stdx.unique_u128();
256
+ context.cluster_id = cluster_id;
257
+ context.addresses_copy = try allocator.dupe(u8, addresses);
258
+ errdefer allocator.free(context.addresses_copy);
259
+
260
+ context.time_os = .{};
261
+ const time = context.time_os.time();
262
+
263
+ log.debug("{}: init: parsing vsr addresses: {s}", .{ context.client_id, addresses });
264
+ context.addresses = .{};
265
+ const addresses_parsed = vsr.parse_addresses(
266
+ addresses,
267
+ context.addresses.unused_capacity_slice(),
268
+ ) catch |err| return switch (err) {
269
+ error.AddressLimitExceeded => error.AddressLimitExceeded,
270
+ error.AddressHasMoreThanOneColon,
271
+ error.AddressHasTrailingComma,
272
+ error.AddressInvalid,
273
+ error.PortInvalid,
274
+ error.PortOverflow,
275
+ => error.AddressInvalid,
276
+ };
277
+ assert(addresses_parsed.len > 0);
278
+ assert(addresses_parsed.len <= constants.replicas_max);
279
+ context.addresses.resize(addresses_parsed.len) catch unreachable;
280
+
281
+ log.debug("{}: init: initializing IO", .{context.client_id});
282
+ context.io = IO.init(32, 0) catch |err| {
283
+ log.err("{}: failed to initialize IO: {s}", .{
284
+ context.client_id,
285
+ @errorName(err),
286
+ });
287
+ return switch (err) {
288
+ error.ProcessFdQuotaExceeded => error.SystemResources,
289
+ error.Unexpected => error.Unexpected,
290
+ else => unreachable,
291
+ };
292
+ };
293
+ errdefer context.io.deinit();
294
+
295
+ log.debug("{}: init: initializing MessagePool", .{context.client_id});
296
+ context.message_pool = try MessagePool.init(allocator, .client);
297
+ errdefer context.message_pool.deinit(allocator);
298
+
299
+ log.debug("{}: init: initializing client (cluster_id={x:0>32}, addresses={any})", .{
300
+ context.client_id,
301
+ cluster_id,
302
+ context.addresses.const_slice(),
303
+ });
304
+ context.client = Client.init(
305
+ allocator,
306
+ time,
307
+ &context.message_pool,
308
+ .{
309
+ .id = context.client_id,
310
+ .cluster = cluster_id,
311
+ .replica_count = context.addresses.count_as(u8),
312
+ .aof_recovery = false,
313
+ .message_bus_options = .{
314
+ .configuration = context.addresses.const_slice(),
315
+ .io = &context.io,
316
+ .trace = null,
317
+ },
318
+ .eviction_callback = client_eviction_callback,
319
+ },
320
+ ) catch |err| {
321
+ log.err("{}: failed to initialize Client: {s}", .{
322
+ context.client_id,
323
+ @errorName(err),
324
+ });
325
+ return switch (err) {
326
+ error.OutOfMemory => error.OutOfMemory,
327
+ };
328
+ };
329
+ errdefer context.client.deinit(allocator);
330
+
331
+ ClientInterface.init(client_out, context, comptime &.{
332
+ .submit_fn = &vtable_submit_fn,
333
+ .completion_context_fn = &vtable_completion_context_fn,
334
+ .deinit_fn = &vtable_deinit_fn,
335
+ .init_parameters_fn = &vtable_init_parameters_fn,
336
+ });
337
+ context.interface = client_out;
338
+ context.submitted = Packet.Queue.init(.{
339
+ .name = null,
340
+ .verify_push = builtin.is_test,
341
+ });
342
+ context.pending = Packet.Queue.init(.{
343
+ .name = null,
344
+ .verify_push = builtin.is_test,
345
+ });
346
+ context.completion_context = completion_ctx;
347
+ context.completion_callback = completion_callback;
348
+ context.eviction_reason = null;
349
+
350
+ log.debug("{}: init: initializing signal", .{context.client_id});
351
+ try context.signal.init(&context.io, Context.signal_notify_callback);
352
+ errdefer context.signal.deinit();
353
+
354
+ context.batch_size_limit = null;
355
+ context.client.register(client_register_callback, @intFromPtr(context));
356
+
357
+ log.debug("{}: init: spawning thread", .{context.client_id});
358
+ context.thread = std.Thread.spawn(
359
+ .{ .stack_size = io_thread_stack_size },
360
+ Context.io_thread,
361
+ .{context},
362
+ ) catch |err| {
363
+ log.err("{}: failed to spawn thread: {s}", .{
364
+ context.client_id,
365
+ @errorName(err),
366
+ });
367
+ return switch (err) {
368
+ error.Unexpected => error.Unexpected,
369
+ error.OutOfMemory => error.OutOfMemory,
370
+ error.SystemResources,
371
+ error.ThreadQuotaExceeded,
372
+ error.LockedMemoryLimitExceeded,
373
+ => error.SystemResources,
374
+ };
375
+ };
376
+
377
+ // Setting `magic_number` tags the interface as initialized.
378
+ // Writing it at the end so that if `init` fails part-way through and the
379
+ // user doesn’t handle the error before using it, we'll still be able to validate.
380
+ client_out.magic_number = ClientInterface.beetle;
381
+ }
382
+
383
+ fn tick(self: *Context) void {
384
+ if (self.eviction_reason == null) {
385
+ self.client.tick();
386
+ }
387
+ }
388
+
389
+ fn io_thread(self: *Context) void {
390
+ while (self.signal.status() != .stopped) {
391
+ self.tick();
392
+ self.io.run_for_ns(constants.tick_ms * std.time.ns_per_ms) catch |err| {
393
+ log.err("{}: IO.run() failed: {s}", .{
394
+ self.client_id,
395
+ @errorName(err),
396
+ });
397
+ @panic("IO.run() failed");
398
+ };
399
+ }
400
+
401
+ self.cancel_request_inflight();
402
+
403
+ while (self.pending.pop()) |packet| {
404
+ packet.assert_phase(.pending);
405
+ self.packet_cancel(packet);
406
+ }
407
+
408
+ // The submitted queue is no longer accessible to user threads,
409
+ // so synchronization is not required here.
410
+ while (self.submitted.pop()) |packet| {
411
+ packet.assert_phase(.submitted);
412
+ self.packet_cancel(packet);
413
+ }
414
+
415
+ self.io.cancel_all();
416
+ self.signal.deinit();
417
+ self.client.deinit(self.gpa.allocator());
418
+ self.message_pool.deinit(self.gpa.allocator());
419
+ self.io.deinit();
420
+ }
421
+
422
+ /// Cancel the current inflight request (and the entire batched linked list of packets),
423
+ /// as it won't be replied anymore.
424
+ fn cancel_request_inflight(self: *Context) void {
425
+ if (self.client.request_inflight) |*inflight| {
426
+ if (inflight.message.header.operation != .register) {
427
+ const packet: *Packet = @as(UserData, @bitCast(inflight.user_data)).packet;
428
+ packet.assert_phase(.sent);
429
+ self.packet_cancel(packet);
430
+ }
431
+ }
432
+ }
433
+
434
+ /// Calls the user callback when a packet (the entire batched linked list of packets)
435
+ /// is canceled due to the client being either evicted or shutdown.
436
+ fn packet_cancel(self: *Context, packet_list: *Packet) void {
437
+ assert(packet_list.link.next == null);
438
+ assert(packet_list.phase != .complete);
439
+ packet_list.assert_phase(packet_list.phase);
440
+
441
+ const result = if (self.eviction_reason) |reason| switch (reason) {
442
+ .reserved => unreachable,
443
+ .client_release_too_low => error.ClientReleaseTooLow,
444
+ .client_release_too_high => error.ClientReleaseTooHigh,
445
+ else => error.ClientEvicted,
446
+ } else result: {
447
+ assert(self.signal.status() != .running);
448
+ break :result error.ClientShutdown;
449
+ };
450
+
451
+ var it: ?*Packet = packet_list;
452
+ while (it) |batched| {
453
+ if (batched != packet_list) batched.assert_phase(.batched);
454
+ it = batched.multi_batch_next;
455
+ self.notify_completion(batched, result);
456
+ }
457
+ }
458
+
459
+ fn packet_enqueue(self: *Context, packet: *Packet) void {
460
+ assert(self.batch_size_limit != null);
461
+ packet.assert_phase(.submitted);
462
+
463
+ if (self.eviction_reason != null) {
464
+ return self.packet_cancel(packet);
465
+ }
466
+
467
+ const operation: Operation = operation_from_int(packet.operation) orelse {
468
+ return self.notify_completion(packet, error.InvalidOperation);
469
+ };
470
+
471
+ // Make sure the packet.data wouldn't overflow a request,
472
+ // and that the corresponding results won't overflow a reply.
473
+ const batch: struct {
474
+ event_size: u32,
475
+ result_size: u32,
476
+ event_count: u32,
477
+ result_count_expected: u32,
478
+ } = batch: {
479
+ const event_size: u32 = operation.event_size();
480
+ assert(event_size > 0);
481
+
482
+ const result_size: u32 = operation.result_size();
483
+ assert(result_size > 0);
484
+
485
+ const slice: []const u8 = packet.slice();
486
+ assert(slice.len == packet.data_size);
487
+ maybe(slice.len == 0);
488
+ if (slice.len % event_size != 0) {
489
+ return self.notify_completion(packet, error.InvalidDataSize);
490
+ }
491
+
492
+ const event_count: u32 = @intCast(@divExact(slice.len, event_size));
493
+ const event_max: u32 = operation.event_max(self.batch_size_limit.?);
494
+ if (event_count > event_max) {
495
+ return self.notify_completion(packet, error.TooMuchData);
496
+ }
497
+ const result_max: u32 = operation.result_max(self.batch_size_limit.?);
498
+ const result_count_expected: u32 = operation.result_count_expected(slice);
499
+ if (result_count_expected > result_max) {
500
+ return self.notify_completion(packet, error.TooMuchData);
501
+ }
502
+
503
+ break :batch .{
504
+ .event_size = event_size,
505
+ .result_size = result_size,
506
+ .event_count = @intCast(@divExact(slice.len, event_size)),
507
+ .result_count_expected = result_count_expected,
508
+ };
509
+ };
510
+ assert(packet.data_size == batch.event_count * batch.event_size);
511
+ maybe(batch.event_count == 0);
512
+ maybe(batch.result_count_expected == 0);
513
+
514
+ // Avoid making a packet inflight by cancelling it if the client was shutdown.
515
+ if (self.signal.status() != .running) {
516
+ self.packet_cancel(packet);
517
+ return;
518
+ }
519
+
520
+ // Nothing inflight means the packet should be submitted right now.
521
+ if (self.client.request_inflight == null) {
522
+ assert(self.pending.count() == 0);
523
+ packet.phase = .pending;
524
+ packet.multi_batch_time_monotonic = self.client.time.monotonic().ns;
525
+ packet.multi_batch_count = 1;
526
+ packet.multi_batch_event_count = @intCast(batch.event_count);
527
+ packet.multi_batch_result_count_expected = @intCast(batch.result_count_expected);
528
+ self.packet_send(packet);
529
+ return;
530
+ }
531
+
532
+ var it = self.pending.iterate();
533
+ while (it.next()) |root| {
534
+ root.assert_phase(.pending);
535
+
536
+ if (root.operation != packet.operation) continue;
537
+
538
+ // Check if the message has enough space for the submitted number of events:
539
+ const request_size: u32 = size: {
540
+ const trailer_size = vsr.multi_batch.trailer_total_size(.{
541
+ .element_size = batch.event_size,
542
+ .batch_count = root.multi_batch_count + 1,
543
+ });
544
+ const event_count: u32 = batch.event_count +
545
+ root.multi_batch_event_count;
546
+ break :size (event_count * batch.event_size) + trailer_size;
547
+ };
548
+ if (request_size > self.batch_size_limit.?) continue;
549
+
550
+ // Check if the reply has enough space for the maximum expected number of results:
551
+ const reply_size_expected: u32 = size: {
552
+ const trailer_size = vsr.multi_batch.trailer_total_size(.{
553
+ .element_size = batch.result_size,
554
+ .batch_count = root.multi_batch_count + 1,
555
+ });
556
+ const event_count: u32 = batch.result_count_expected +
557
+ root.multi_batch_result_count_expected;
558
+ break :size (event_count * batch.result_size) + trailer_size;
559
+ };
560
+ if (reply_size_expected > constants.message_body_size_max) continue;
561
+
562
+ packet.phase = .batched;
563
+ if (root.multi_batch_next == null) {
564
+ assert(root.multi_batch_tail == null);
565
+ assert(root.multi_batch_count == 1);
566
+ root.multi_batch_next = packet;
567
+ root.multi_batch_tail = packet;
568
+ } else {
569
+ assert(root.multi_batch_tail != null);
570
+ assert(root.multi_batch_count > 1);
571
+ root.multi_batch_tail.?.multi_batch_next = packet;
572
+ root.multi_batch_tail = packet;
573
+ }
574
+ root.multi_batch_count += 1;
575
+ root.multi_batch_event_count += @intCast(batch.event_count);
576
+ root.multi_batch_result_count_expected += @intCast(batch.result_count_expected);
577
+ return;
578
+ }
579
+
580
+ // Couldn't batch with existing packet so push to pending directly.
581
+ packet.phase = .pending;
582
+ packet.multi_batch_time_monotonic = self.client.time.monotonic().ns;
583
+ packet.multi_batch_count = 1;
584
+ packet.multi_batch_event_count = @intCast(batch.event_count);
585
+ packet.multi_batch_result_count_expected = @intCast(batch.result_count_expected);
586
+ self.pending.push(packet);
587
+ }
588
+
589
+ /// Sends the packet (the entire batched linked list of packets) through the vsr client.
590
+ /// Always called by the io thread.
591
+ fn packet_send(self: *Context, packet_list: *Packet) void {
592
+ assert(self.batch_size_limit != null);
593
+ assert(self.client.request_inflight == null);
594
+ packet_list.assert_phase(.pending);
595
+
596
+ // On shutdown, cancel this packet as well as any others batched onto it.
597
+ if (self.signal.status() != .running) {
598
+ return self.packet_cancel(packet_list);
599
+ }
600
+
601
+ const message = self.client.get_message().build(.request);
602
+ defer {
603
+ self.client.release_message(message.base());
604
+ packet_list.assert_phase(.sent);
605
+ }
606
+
607
+ const operation: Operation = operation_from_int(packet_list.operation).?;
608
+ const event_size: u32 = operation.event_size();
609
+ const request_size: u32 = request_size: {
610
+ if (!operation.is_multi_batch()) {
611
+ assert(packet_list.multi_batch_next == null);
612
+ const source: []const u8 = packet_list.slice();
613
+ stdx.copy_disjoint(
614
+ .inexact,
615
+ u8,
616
+ message.buffer[@sizeOf(Header)..],
617
+ source,
618
+ );
619
+ break :request_size @intCast(source.len);
620
+ }
621
+ assert(operation.is_multi_batch());
622
+
623
+ var message_encoder = MultiBatchEncoder.init(message.buffer[@sizeOf(Header)..], .{
624
+ .element_size = event_size,
625
+ });
626
+
627
+ var it: ?*Packet = packet_list;
628
+ var multi_batch_events_count: u16 = 0;
629
+ while (it) |batched| {
630
+ if (batched != packet_list) batched.assert_phase(.batched);
631
+ it = batched.multi_batch_next;
632
+
633
+ const source: []const u8 = batched.slice();
634
+ const target = message_encoder.writable().?;
635
+ assert(target.len >= source.len);
636
+ stdx.copy_disjoint(
637
+ .exact,
638
+ u8,
639
+ target[0..source.len],
640
+ source,
641
+ );
642
+ message_encoder.add(@intCast(source.len));
643
+
644
+ const events_count: u16 = @intCast(@divExact(source.len, event_size));
645
+ multi_batch_events_count += events_count;
646
+ }
647
+ assert(multi_batch_events_count == packet_list.multi_batch_event_count);
648
+ assert(message_encoder.batch_count == packet_list.multi_batch_count);
649
+
650
+ // Check if the reply has enough space for the maximum expected number of results.
651
+ const result_size: u32 = operation.result_size();
652
+ const trailer_size = vsr.multi_batch.trailer_total_size(.{
653
+ .element_size = result_size,
654
+ .batch_count = packet_list.multi_batch_count,
655
+ });
656
+ const reply_size_max: u32 = (result_size *
657
+ packet_list.multi_batch_result_count_expected) + trailer_size;
658
+ assert(reply_size_max % result_size == 0);
659
+ assert(reply_size_max <= constants.message_body_size_max);
660
+
661
+ break :request_size message_encoder.finish();
662
+ };
663
+ assert(request_size % event_size == 0);
664
+ assert(request_size <= self.batch_size_limit.?);
665
+
666
+ // Sending the request.
667
+ const previous_request_latency =
668
+ self.previous_request_latency orelse stdx.Duration{ .ns = 0 };
669
+ message.header.* = .{
670
+ .release = self.client.release,
671
+ .client = self.client.id,
672
+ .request = 0, // Set by client.raw_request.
673
+ .cluster = self.client.cluster,
674
+ .command = .request,
675
+ .operation = operation.to_vsr(),
676
+ .size = @sizeOf(vsr.Header) + request_size,
677
+ .previous_request_latency = @intCast(@min(
678
+ previous_request_latency.ns,
679
+ std.math.maxInt(u32),
680
+ )),
681
+ };
682
+
683
+ assert((self.previous_request_instant == null) ==
684
+ (self.previous_request_latency == null));
685
+ self.previous_request_instant = .{ .ns = packet_list.multi_batch_time_monotonic };
686
+
687
+ packet_list.phase = .sent;
688
+ self.client.raw_request(
689
+ Context.client_result_callback,
690
+ @bitCast(UserData{
691
+ .self = self,
692
+ .packet = packet_list,
693
+ }),
694
+ message.ref(),
695
+ );
696
+ assert(message.header.request != 0);
697
+ }
698
+
699
+ fn signal_notify_callback(signal: *Signal) void {
700
+ const self: *Context = @alignCast(@fieldParentPtr("signal", signal));
701
+ assert(self.signal.status() != .stopped);
702
+
703
+ // Don't send any requests until registration completes.
704
+ if (self.batch_size_limit == null) {
705
+ assert(self.client.request_inflight != null);
706
+ assert(self.client.request_inflight.?.message.header.operation == .register);
707
+ return;
708
+ }
709
+
710
+ // Prevents IO thread starvation under heavy client load.
711
+ // Process only the minimal number of packets for the next pending request.
712
+ const enqueued_count = self.pending.count();
713
+ const safety_limit = 8 * 1024; // Avoid unbounded loop in case of invalid packets.
714
+ for (0..safety_limit) |_| {
715
+ const packet: *Packet = pop: {
716
+ self.interface.locker.lock();
717
+ defer self.interface.locker.unlock();
718
+
719
+ break :pop self.submitted.pop() orelse return;
720
+ };
721
+ self.packet_enqueue(packet);
722
+
723
+ // Packets can be processed without increasing `pending.count`:
724
+ // - If the packet is invalid.
725
+ // - If there's no in-flight request, the packet is sent immediately without
726
+ // using the pending queue.
727
+ // - If the packet can be batched with another previously enqueued packet.
728
+ if (self.pending.count() > enqueued_count) break;
729
+ }
730
+
731
+ // Defer this work to later,
732
+ // allowing the IO thread to remain free for processing completions.
733
+ const empty: bool = empty: {
734
+ self.interface.locker.lock();
735
+ defer self.interface.locker.unlock();
736
+
737
+ break :empty self.submitted.empty();
738
+ };
739
+ if (!empty) {
740
+ self.signal.notify();
741
+ }
742
+ }
743
+
744
+ fn client_register_callback(user_data: u128, result: *const vsr.RegisterResult) void {
745
+ const self: *Context = @ptrFromInt(@as(usize, @intCast(user_data)));
746
+ assert(self.client.request_inflight == null);
747
+ assert(self.batch_size_limit == null);
748
+ assert(result.batch_size_limit > 0);
749
+
750
+ // The client might have a smaller message size limit.
751
+ maybe(constants.message_body_size_max < result.batch_size_limit);
752
+ self.batch_size_limit = @min(result.batch_size_limit, constants.message_body_size_max);
753
+
754
+ // Some requests may have queued up while the client was registering.
755
+ signal_notify_callback(&self.signal);
756
+ }
757
+
758
+ fn client_eviction_callback(client: *Client, eviction: *const Message.Eviction) void {
759
+ const self: *Context = @fieldParentPtr("client", client);
760
+ assert(self.eviction_reason == null);
761
+
762
+ log.debug("{}: client_eviction_callback: reason={?s} reason_int={}", .{
763
+ self.client_id,
764
+ std.enums.tagName(vsr.Header.Eviction.Reason, eviction.header.reason),
765
+ @intFromEnum(eviction.header.reason),
766
+ });
767
+
768
+ // Now that the client is evicted, no more requests can be submitted to it and we can
769
+ // safely deinitialize it. First, we stop the IO thread, which then deinitializes the
770
+ // client before it exits (see `io_thread`).
771
+ self.eviction_reason = eviction.header.reason;
772
+ self.signal.stop();
773
+ }
774
+
775
+ fn client_result_callback(
776
+ raw_user_data: u128,
777
+ operation_vsr: vsr.Operation,
778
+ timestamp: u64,
779
+ reply: []const u8,
780
+ ) void {
781
+ const user_data: UserData = @bitCast(raw_user_data);
782
+ const self: *Context = user_data.self;
783
+ const packet_list: *Packet = user_data.packet;
784
+ const operation = operation_vsr.cast(Client.Operation);
785
+ assert(packet_list.operation == @intFromEnum(operation));
786
+ assert(timestamp > 0);
787
+ packet_list.assert_phase(.sent);
788
+
789
+ const current_timestamp = self.client.time.monotonic();
790
+ self.previous_request_latency =
791
+ current_timestamp.duration_since(self.previous_request_instant.?);
792
+
793
+ // Submit the next pending packet (if any) now that VSR has completed this one.
794
+ assert(self.client.request_inflight == null);
795
+ while (self.pending.pop()) |packet_next| {
796
+ self.packet_send(packet_next);
797
+ if (self.client.request_inflight != null) break;
798
+ }
799
+
800
+ // The callback should never be called with an operation not in `allowed_operations`.
801
+ // This also guards from sending an unsupported operation.
802
+ assert(operation_from_int(@intFromEnum(operation)) != null);
803
+
804
+ if (!operation.is_multi_batch()) {
805
+ assert(packet_list.multi_batch_next == null);
806
+ return self.notify_completion(packet_list, .{
807
+ .timestamp = timestamp,
808
+ .reply = reply,
809
+ });
810
+ }
811
+ assert(operation.is_multi_batch());
812
+
813
+ const result_size: u32 = operation.result_size();
814
+ assert(result_size > 0);
815
+ var reply_decoder = MultiBatchDecoder.init(reply, .{
816
+ .element_size = result_size,
817
+ }) catch unreachable;
818
+ assert(packet_list.multi_batch_count == reply_decoder.batch_count());
819
+
820
+ // Copying it because `packet` is no longer valid after the callback.
821
+ const multi_batch_results_expected: u16 =
822
+ packet_list.multi_batch_result_count_expected;
823
+ var multi_batch_results_actual: u16 = 0;
824
+ var it: ?*Packet = packet_list;
825
+ while (it) |batched| {
826
+ if (batched != packet_list) batched.assert_phase(.batched);
827
+ assert(batched.operation == @intFromEnum(operation));
828
+
829
+ // NB: The reference to `batched` isn't valid after `notify_completion`.
830
+ it = batched.multi_batch_next;
831
+
832
+ const batched_reply: []const u8 = reply_decoder.pop().?;
833
+ multi_batch_results_actual += @intCast(@divExact(
834
+ batched_reply.len,
835
+ result_size,
836
+ ));
837
+ self.notify_completion(batched, .{
838
+ .timestamp = timestamp,
839
+ .reply = batched_reply,
840
+ });
841
+ }
842
+ assert(reply_decoder.pop() == null);
843
+ assert(multi_batch_results_actual <= multi_batch_results_expected);
844
+ }
845
+
846
+ fn notify_completion(
847
+ self: *Context,
848
+ packet: *Packet,
849
+ completion: PacketError!struct {
850
+ timestamp: u64,
851
+ reply: []const u8,
852
+ },
853
+ ) void {
854
+ const result = completion catch |err| {
855
+ packet.status = switch (err) {
856
+ error.TooMuchData => .too_much_data,
857
+ error.ClientEvicted => .client_evicted,
858
+ error.ClientReleaseTooLow => .client_release_too_low,
859
+ error.ClientReleaseTooHigh => .client_release_too_high,
860
+ error.ClientShutdown => .client_shutdown,
861
+ error.InvalidOperation => .invalid_operation,
862
+ error.InvalidDataSize => .invalid_data_size,
863
+ };
864
+ assert(packet.status != .ok);
865
+ packet.phase = .complete;
866
+
867
+ // The packet completed with an error.
868
+ self.completion_callback(
869
+ self.completion_context,
870
+ packet.cast(),
871
+ 0,
872
+ null,
873
+ 0,
874
+ );
875
+ return;
876
+ };
877
+
878
+ // The packet completed normally.
879
+ assert(packet.status == .ok);
880
+ packet.phase = .complete;
881
+ self.completion_callback(
882
+ self.completion_context,
883
+ packet.cast(),
884
+ result.timestamp,
885
+ result.reply.ptr,
886
+ @intCast(result.reply.len),
887
+ );
888
+ }
889
+
890
+ // VTable functions called by `ClientInterface`, which are thread-safe.
891
+
892
+ fn vtable_submit_fn(context: *anyopaque, packet_extern: *Packet.Extern) void {
893
+ const self: *Context = @ptrCast(@alignCast(context));
894
+
895
+ // Packet is caller-allocated to enable elastic intrusive-link-list-based
896
+ // memory management. However, some of Packet's fields are essentially private.
897
+ // Initialize them here to avoid threading default fields through FFI boundary.
898
+ const packet: *Packet = packet_extern.cast();
899
+ packet.* = .{
900
+ .user_data = packet_extern.user_data,
901
+ .operation = packet_extern.operation,
902
+ .data_size = packet_extern.data_size,
903
+ .data = packet_extern.data,
904
+ .user_tag = packet_extern.user_tag,
905
+ .status = .ok,
906
+ .link = .{},
907
+ .multi_batch_time_monotonic = 0,
908
+ .multi_batch_next = null,
909
+ .multi_batch_tail = null,
910
+ .multi_batch_count = 0,
911
+ .multi_batch_event_count = 0,
912
+ .multi_batch_result_count_expected = 0,
913
+ .phase = .submitted,
914
+ };
915
+
916
+ if (self.eviction_reason == null) {
917
+ // Enqueue the packet and notify the IO thread to process it asynchronously.
918
+ assert(self.signal.status() == .running);
919
+ self.submitted.push(packet);
920
+ self.signal.notify();
921
+ } else {
922
+ // Cancel the packet since we stop the IO thread during eviction.
923
+ assert(self.signal.status() != .running);
924
+ self.packet_cancel(packet);
925
+ }
926
+ }
927
+
928
+ fn vtable_completion_context_fn(context: *anyopaque) usize {
929
+ const self: *Context = @ptrCast(@alignCast(context));
930
+ return self.completion_context;
931
+ }
932
+
933
+ fn vtable_deinit_fn(context: *anyopaque) void {
934
+ const self: *Context = @ptrCast(@alignCast(context));
935
+
936
+ self.signal.stop();
937
+ self.thread.join();
938
+
939
+ assert(self.submitted.pop() == null);
940
+ assert(self.pending.pop() == null);
941
+
942
+ self.gpa.allocator().free(self.addresses_copy);
943
+
944
+ // NB: Copy the allocator back out before trying to destroy `self` with it!
945
+ var gpa: GPA = self.gpa;
946
+ gpa.allocator().destroy(self);
947
+ assert(gpa.deinit() == .ok);
948
+ }
949
+
950
+ fn vtable_init_parameters_fn(context: *anyopaque, out_parameters: *InitParameters) void {
951
+ const self: *Context = @ptrCast(@alignCast(context));
952
+ assert(self.signal.status() == .running);
953
+
954
+ out_parameters.cluster_id = self.cluster_id;
955
+ out_parameters.client_id = self.client_id;
956
+ out_parameters.addresses_ptr = self.addresses_copy.ptr;
957
+ out_parameters.addresses_len = self.addresses_copy.len;
958
+ }
959
+
960
+ fn operation_from_int(op: u8) ?Operation {
961
+ inline for (allowed_operations) |operation| {
962
+ if (op == @intFromEnum(operation)) {
963
+ return operation;
964
+ }
965
+ }
966
+ return null;
967
+ }
968
+ };
969
+ }
970
+
971
+ /// Implements the `Mutex` API as an `extern` struct, based on `std.Thread.Futex`.
972
+ /// Vendored from `std.Thread.Mutex.FutexImpl`.
973
+ const Locker = extern struct {
974
+ const Futex = std.Thread.Futex;
975
+ const unlocked: u32 = 0b00;
976
+ const locked: u32 = 0b01;
977
+ const contended: u32 = 0b11; // Must contain the `locked` bit for x86 optimization below.
978
+
979
+ state: std.atomic.Value(u32) = std.atomic.Value(u32).init(unlocked),
980
+
981
+ fn lock(self: *Locker) void {
982
+ if (!self.try_lock()) {
983
+ self.lock_slow();
984
+ }
985
+ }
986
+
987
+ fn try_lock(self: *Locker) bool {
988
+ // On x86, use `lock bts` instead of `lock cmpxchg` as:
989
+ // - they both seem to mark the cache-line as modified regardless: https://stackoverflow.com/a/63350048.
990
+ // - `lock bts` is smaller instruction-wise which makes it better for inlining.
991
+ if (comptime builtin.target.cpu.arch.isX86()) {
992
+ const locked_bit = @ctz(locked);
993
+ return self.state.bitSet(locked_bit, .acquire) == 0;
994
+ }
995
+
996
+ // Acquire barrier ensures grabbing the lock happens before the critical section
997
+ // and that the previous lock holder's critical section happens before we grab the lock.
998
+ return self.state.cmpxchgWeak(unlocked, locked, .acquire, .monotonic) == null;
999
+ }
1000
+
1001
+ fn lock_slow(self: *Locker) void {
1002
+ @branchHint(.cold);
1003
+
1004
+ // Avoid doing an atomic swap below if we already know the state is contended.
1005
+ // An atomic swap unconditionally stores which marks the cache-line as modified
1006
+ // unnecessarily.
1007
+ if (self.state.load(.monotonic) == contended) {
1008
+ Futex.wait(&self.state, contended);
1009
+ }
1010
+
1011
+ // Try to acquire the lock while also telling the existing lock holder that there are
1012
+ // threads waiting.
1013
+ //
1014
+ // Once we sleep on the Futex, we must acquire the mutex using `contended` rather than
1015
+ // `locked`.
1016
+ // If not, threads sleeping on the Futex wouldn't see the state change in unlock and
1017
+ // potentially deadlock.
1018
+ // The downside is that the last mutex unlocker will see `contended` and do an unnecessary
1019
+ // Futex wake but this is better than having to wake all waiting threads on mutex unlock.
1020
+ //
1021
+ // Acquire barrier ensures grabbing the lock happens before the critical section
1022
+ // and that the previous lock holder's critical section happens before we grab the lock.
1023
+ while (self.state.swap(contended, .acquire) != unlocked) {
1024
+ Futex.wait(&self.state, contended);
1025
+ }
1026
+ }
1027
+
1028
+ fn unlock(self: *Locker) void {
1029
+ // Unlock the mutex and wake up a waiting thread if any.
1030
+ //
1031
+ // A waiting thread will acquire with `contended` instead of `locked`
1032
+ // which ensures that it wakes up another thread on the next unlock().
1033
+ //
1034
+ // Release barrier ensures the critical section happens before we let go of the lock
1035
+ // and that our critical section happens before the next lock holder grabs the lock.
1036
+ const state = self.state.swap(unlocked, .release);
1037
+ assert(state != unlocked);
1038
+
1039
+ if (state == contended) {
1040
+ Futex.wake(&self.state, 1);
1041
+ }
1042
+ }
1043
+ };
1044
+
1045
+ const testing = std.testing;
1046
+ test "Locker: smoke test" {
1047
+ var locker = Locker{};
1048
+
1049
+ try testing.expect(locker.try_lock());
1050
+ try testing.expect(!locker.try_lock());
1051
+ locker.unlock();
1052
+
1053
+ locker.lock();
1054
+ try testing.expect(!locker.try_lock());
1055
+ locker.unlock();
1056
+ }
1057
+
1058
+ test "Locker: contended" {
1059
+ const threads_count = 4;
1060
+ const increments = 1000;
1061
+
1062
+ const State = struct {
1063
+ locker: Locker = .{},
1064
+ counter: u32 = 0,
1065
+ };
1066
+
1067
+ const Runner = struct {
1068
+ thread: std.Thread = undefined,
1069
+ state: *State,
1070
+ fn run(self: *@This()) void {
1071
+ while (true) {
1072
+ self.state.locker.lock();
1073
+ defer self.state.locker.unlock();
1074
+
1075
+ if (self.state.counter == increments) break;
1076
+ self.state.counter += 1;
1077
+ }
1078
+ }
1079
+ };
1080
+
1081
+ var state = State{};
1082
+ var runners: [threads_count]Runner = undefined;
1083
+ for (&runners) |*runner| {
1084
+ runner.* = .{ .state = &state };
1085
+ runner.thread = try std.Thread.spawn(.{}, Runner.run, .{runner});
1086
+ }
1087
+ for (&runners) |*runner| {
1088
+ runner.thread.join();
1089
+ }
1090
+
1091
+ try testing.expectEqual(state.counter, increments);
1092
+ }