tigerbeetle 0.0.34 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/client.rb +1 -1
  245. data/lib/tigerbeetle/platforms.rb +9 -0
  246. data/lib/tigerbeetle/version.rb +2 -2
  247. data/tigerbeetle.gemspec +22 -5
  248. metadata +242 -3
  249. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,1880 @@
1
+ const std = @import("std");
2
+ const assert = std.debug.assert;
3
+ const os = std.os;
4
+ const posix = std.posix;
5
+ const linux = os.linux;
6
+ const IO_Uring = linux.IoUring;
7
+ const io_uring_cqe = linux.io_uring_cqe;
8
+ const io_uring_sqe = linux.io_uring_sqe;
9
+ const log = std.log.scoped(.io);
10
+
11
+ const constants = @import("../constants.zig");
12
+ const stdx = @import("stdx");
13
+ const common = @import("./common.zig");
14
+ const QueueType = @import("../queue.zig").QueueType;
15
+ const buffer_limit = @import("../io.zig").buffer_limit;
16
+ const DirectIO = @import("../io.zig").DirectIO;
17
+ const DoublyLinkedListType = @import("../list.zig").DoublyLinkedListType;
18
+ const parse_dirty_semver = stdx.parse_dirty_semver;
19
+ const maybe = stdx.maybe;
20
+
21
+ pub const IO = struct {
22
+ pub const TCPOptions = common.TCPOptions;
23
+ pub const ListenOptions = common.ListenOptions;
24
+ const CompletionList = DoublyLinkedListType(Completion, .awaiting_back, .awaiting_next);
25
+
26
+ ring: IO_Uring,
27
+
28
+ /// Operations not yet submitted to the kernel and waiting on available space in the
29
+ /// submission queue.
30
+ unqueued: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_unqueued" }),
31
+
32
+ /// Completions that are ready to have their callbacks run.
33
+ completed: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_completed" }),
34
+
35
+ // TODO Track these as metrics:
36
+ ios_queued: u32 = 0,
37
+ ios_in_kernel: u32 = 0,
38
+
39
+ /// The head of a doubly-linked list of all operations that are:
40
+ /// - in the submission queue, or
41
+ /// - in the kernel, or
42
+ /// - in the completion queue, or
43
+ /// - in the `completed` list (excluding zero-duration timeouts).
44
+ awaiting: CompletionList = .{},
45
+
46
+ // This is the completion that performs the cancellation.
47
+ // This is *not* the completion that is being canceled.
48
+ cancel_completion: Completion = undefined,
49
+
50
+ cancel_all_status: union(enum) {
51
+ // Not canceling.
52
+ inactive,
53
+ // Waiting to start canceling the next awaiting operation.
54
+ next,
55
+ // The target's cancellation SQE is queued; waiting for the cancellation's completion.
56
+ queued: struct { target: *Completion },
57
+ // Currently canceling the target operation.
58
+ wait: struct { target: *Completion },
59
+ // All operations have been canceled.
60
+ done,
61
+ } = .inactive,
62
+
63
+ pub fn init(entries: u12, flags: u32) !IO {
64
+ // Detect the linux version to ensure that we support all io_uring ops used.
65
+ const uts = posix.uname();
66
+ const version = try parse_dirty_semver(&uts.release);
67
+ if (version.order(std.SemanticVersion{ .major = 5, .minor = 5, .patch = 0 }) == .lt) {
68
+ @panic("Linux kernel 5.5 or greater is required for io_uring OP_ACCEPT");
69
+ }
70
+
71
+ errdefer |err| switch (err) {
72
+ error.SystemOutdated => {
73
+ log.err("io_uring is not available", .{});
74
+ log.err("likely cause: the syscall is disabled by seccomp", .{});
75
+ },
76
+ error.PermissionDenied => {
77
+ log.err("io_uring is not available", .{});
78
+ log.err("likely cause: the syscall is disabled by sysctl, " ++
79
+ "try 'sysctl -w kernel.io_uring_disabled=0'", .{});
80
+ },
81
+ else => {},
82
+ };
83
+
84
+ return IO{ .ring = try IO_Uring.init(entries, flags) };
85
+ }
86
+
87
+ pub fn deinit(self: *IO) void {
88
+ self.ring.deinit();
89
+ }
90
+
91
+ /// Pass all queued submissions to the kernel and peek for completions.
92
+ pub fn run(self: *IO) !void {
93
+ assert(self.cancel_all_status != .done);
94
+
95
+ // We assume that all timeouts submitted by `run_for_ns()` will be reaped by `run_for_ns()`
96
+ // and that `tick()` and `run_for_ns()` cannot be run concurrently.
97
+ // Therefore `timeouts` here will never be decremented and `etime` will always be false.
98
+ var timeouts: usize = 0;
99
+ var etime = false;
100
+
101
+ try self.flush(0, &timeouts, &etime);
102
+ assert(etime == false);
103
+
104
+ // Flush any SQEs that were queued while running completion callbacks in `flush()`:
105
+ // This is an optimization to avoid delaying submissions until the next tick.
106
+ // At the same time, we do not flush any ready CQEs since SQEs may complete synchronously.
107
+ // We guard against an io_uring_enter() syscall if we know we do not have any queued SQEs.
108
+ // We cannot use `self.ring.sq_ready()` here since this counts flushed and unflushed SQEs.
109
+ const queued = self.ring.sq.sqe_tail -% self.ring.sq.sqe_head;
110
+ if (queued > 0) {
111
+ try self.flush_submissions(0, &timeouts, &etime);
112
+ assert(etime == false);
113
+ }
114
+ }
115
+
116
+ /// Pass all queued submissions to the kernel and run for `nanoseconds`.
117
+ /// The `nanoseconds` argument is a u63 to allow coercion to the i64 used
118
+ /// in the kernel_timespec struct.
119
+ pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
120
+ assert(self.cancel_all_status != .done);
121
+
122
+ // We must use the same clock source used by io_uring (CLOCK_MONOTONIC) since we specify the
123
+ // timeout below as an absolute value. Otherwise, we may deadlock if the clock sources are
124
+ // dramatically different. Any kernel that supports io_uring will support CLOCK_MONOTONIC.
125
+ const current_ts = posix.clock_gettime(posix.CLOCK.MONOTONIC) catch unreachable;
126
+ // The absolute CLOCK_MONOTONIC time after which we may return from this function:
127
+ const timeout_ts: os.linux.kernel_timespec = .{
128
+ .sec = current_ts.sec,
129
+ .nsec = current_ts.nsec + nanoseconds,
130
+ };
131
+ var timeouts: usize = 0;
132
+ var etime = false;
133
+ while (!etime) {
134
+ const timeout_sqe = self.ring.get_sqe() catch blk: {
135
+ // The submission queue is full, so flush submissions to make space:
136
+ try self.flush_submissions(0, &timeouts, &etime);
137
+ break :blk self.ring.get_sqe() catch unreachable;
138
+ };
139
+ // Submit an absolute timeout that will be canceled if any other SQE completes first:
140
+ timeout_sqe.prep_timeout(&timeout_ts, 1, os.linux.IORING_TIMEOUT_ABS);
141
+ timeout_sqe.user_data = 0;
142
+ timeouts += 1;
143
+
144
+ // We don't really want to count this timeout as an io,
145
+ // but it's tricky to track separately.
146
+ self.ios_queued += 1;
147
+
148
+ // The amount of time this call will block is bounded by the timeout we just submitted:
149
+ try self.flush(1, &timeouts, &etime);
150
+ }
151
+ // Reap any remaining timeouts, which reference the timespec in the current stack frame.
152
+ // The busy loop here is required to avoid a potential deadlock, as the kernel determines
153
+ // when the timeouts are pushed to the completion queue, not us.
154
+ while (timeouts > 0) _ = try self.flush_completions(0, &timeouts, &etime);
155
+ }
156
+
157
+ fn flush(self: *IO, wait_nr: u32, timeouts: *usize, etime: *bool) !void {
158
+ // Flush any queued SQEs and reuse the same syscall to wait for completions if required:
159
+ try self.flush_submissions(wait_nr, timeouts, etime);
160
+ // We can now just peek for any CQEs without waiting and without another syscall:
161
+ try self.flush_completions(0, timeouts, etime);
162
+
163
+ // The SQE array is empty from flush_submissions(). Fill it up with unqueued completions.
164
+ // This runs before `self.completed` is flushed below to prevent new IO from reserving SQE
165
+ // slots and potentially starving those in `self.unqueued`.
166
+ // Loop over a copy to avoid an infinite loop of `enqueue()` re-adding to `self.unqueued`.
167
+ {
168
+ var copy = self.unqueued;
169
+ self.unqueued.reset();
170
+ while (copy.pop()) |completion| self.enqueue(completion);
171
+ }
172
+
173
+ // Run completions only after all completions have been flushed:
174
+ // Loop until all completions are processed. Calls to complete() may queue more work
175
+ // and extend the duration of the loop, but this is fine as it 1) executes completions
176
+ // that become ready without going through another syscall from flush_submissions() and
177
+ // 2) potentially queues more SQEs to take advantage more of the next flush_submissions().
178
+ while (self.completed.pop()) |completion| {
179
+ if (completion.operation == .timeout and
180
+ completion.operation.timeout.timespec.sec == 0 and
181
+ completion.operation.timeout.timespec.nsec == 0)
182
+ {
183
+ // Zero-duration timeouts are a special case, and aren't listed in `awaiting`.
184
+ maybe(self.awaiting.empty());
185
+ assert(completion.result == -@as(i32, @intFromEnum(posix.E.TIME)));
186
+ assert(completion.awaiting_back == null);
187
+ assert(completion.awaiting_next == null);
188
+ } else {
189
+ assert(!self.awaiting.empty());
190
+ self.awaiting.remove(completion);
191
+ }
192
+
193
+ switch (self.cancel_all_status) {
194
+ .inactive => completion.complete(),
195
+ .next => {},
196
+ .queued => if (completion.operation == .cancel) completion.complete(),
197
+ .wait => |wait| if (wait.target == completion) {
198
+ self.cancel_all_status = .next;
199
+ },
200
+ .done => unreachable,
201
+ }
202
+ }
203
+
204
+ // At this point, unqueued could have completions either by 1) those who didn't get an SQE
205
+ // during the popping of unqueued or 2) completion.complete() which start new IO. These
206
+ // unqueued completions will get priority to acquiring SQEs on the next flush().
207
+ }
208
+
209
+ fn flush_completions(self: *IO, wait_nr: u32, timeouts: *usize, etime: *bool) !void {
210
+ var cqes: [256]io_uring_cqe = undefined;
211
+ var wait_remaining = wait_nr;
212
+ while (true) {
213
+ // Guard against waiting indefinitely (if there are too few requests inflight),
214
+ // especially if this is not the first time round the loop:
215
+ const completed = self.ring.copy_cqes(&cqes, wait_remaining) catch |err| switch (err) {
216
+ error.SignalInterrupt => continue,
217
+ else => return err,
218
+ };
219
+ if (completed > wait_remaining) wait_remaining = 0 else wait_remaining -= completed;
220
+ for (cqes[0..completed]) |cqe| {
221
+ self.ios_in_kernel -= 1;
222
+
223
+ if (cqe.user_data == 0) {
224
+ timeouts.* -= 1;
225
+ // We are only done if the timeout submitted was completed due to time, not if
226
+ // it was completed due to the completion of an event, in which case `cqe.res`
227
+ // would be 0. It is possible for multiple timeout operations to complete at the
228
+ // same time if the nanoseconds value passed to `run_for_ns()` is very short.
229
+ if (-cqe.res == @intFromEnum(posix.E.TIME)) etime.* = true;
230
+ continue;
231
+ }
232
+ const completion: *Completion = @ptrFromInt(cqe.user_data);
233
+ completion.result = cqe.res;
234
+ // We do not run the completion here (instead appending to a linked list) to avoid:
235
+ // * recursion through `flush_submissions()` and `flush_completions()`,
236
+ // * unbounded stack usage, and
237
+ // * confusing stack traces.
238
+ self.completed.push(completion);
239
+ }
240
+
241
+ if (completed < cqes.len) break;
242
+ }
243
+ }
244
+
245
+ fn flush_submissions(self: *IO, wait_nr: u32, timeouts: *usize, etime: *bool) !void {
246
+ while (true) {
247
+ const submitted = self.ring.submit_and_wait(wait_nr) catch |err| switch (err) {
248
+ error.SignalInterrupt => continue,
249
+ // Wait for some completions and then try again:
250
+ // See https://github.com/axboe/liburing/issues/281 re: error.SystemResources.
251
+ // Be careful also that copy_cqes() will flush before entering to wait (it does):
252
+ // https://github.com/axboe/liburing/commit/35c199c48dfd54ad46b96e386882e7ac341314c5
253
+ error.CompletionQueueOvercommitted, error.SystemResources => {
254
+ try self.flush_completions(1, timeouts, etime);
255
+ continue;
256
+ },
257
+ else => return err,
258
+ };
259
+
260
+ self.ios_queued -= submitted;
261
+ self.ios_in_kernel += submitted;
262
+
263
+ break;
264
+ }
265
+ }
266
+
267
+ fn enqueue(self: *IO, completion: *Completion) void {
268
+ switch (self.cancel_all_status) {
269
+ .inactive => {},
270
+ .queued => assert(completion.operation == .cancel),
271
+ else => unreachable,
272
+ }
273
+
274
+ const sqe = self.ring.get_sqe() catch |err| switch (err) {
275
+ error.SubmissionQueueFull => {
276
+ self.unqueued.push(completion);
277
+ return;
278
+ },
279
+ };
280
+ completion.prep(sqe);
281
+
282
+ self.awaiting.push(completion);
283
+ self.ios_queued += 1;
284
+ }
285
+
286
+ /// Cancel should be invoked at most once, before any of the memory owned by read/recv buffers
287
+ /// is freed (so that lingering async operations do not write to them).
288
+ ///
289
+ /// After this function is invoked:
290
+ /// - No more completion callbacks will be called.
291
+ /// - No more IO may be submitted.
292
+ ///
293
+ /// This function doesn't return until either:
294
+ /// - All events submitted to io_uring have completed.
295
+ /// (They may complete with `error.Canceled`).
296
+ /// - Or, an io_uring error occurs.
297
+ ///
298
+ /// TODO(Linux):
299
+ /// - Linux kernel ≥5.19 supports the IORING_ASYNC_CANCEL_ALL and IORING_ASYNC_CANCEL_ANY flags,
300
+ /// which would allow all events to be cancelled simultaneously with a single "cancel"
301
+ /// operation, without IO needing to maintain the `awaiting` doubly-linked list and the `next`
302
+ /// cancellation stage.
303
+ /// - Linux kernel ≥6.0 supports `io_uring_register_sync_cancel` which would remove the `queued`
304
+ /// cancellation stage.
305
+ pub fn cancel_all(self: *IO) void {
306
+ assert(self.cancel_all_status == .inactive);
307
+
308
+ // Even if we return early due to an io_uring error, IO won't allow more operations.
309
+ defer self.cancel_all_status = .done;
310
+
311
+ self.cancel_all_status = .next;
312
+
313
+ // Discard any operations that haven't started yet.
314
+ while (self.unqueued.pop()) |_| {}
315
+
316
+ while (self.awaiting.tail) |target| {
317
+ assert(!self.awaiting.empty());
318
+ assert(self.cancel_all_status == .next);
319
+ assert(target.operation != .cancel);
320
+
321
+ self.cancel_all_status = .{ .queued = .{ .target = target } };
322
+
323
+ self.cancel(
324
+ *IO,
325
+ self,
326
+ cancel_all_callback,
327
+ .{
328
+ .completion = &self.cancel_completion,
329
+ .target = target,
330
+ },
331
+ );
332
+
333
+ while (self.cancel_all_status == .queued or self.cancel_all_status == .wait) {
334
+ self.run_for_ns(constants.tick_ms * std.time.ns_per_ms) catch |err| {
335
+ std.debug.panic("IO.cancel_all: run_for_ns error: {}", .{err});
336
+ };
337
+ }
338
+ assert(self.cancel_all_status == .next);
339
+ }
340
+ assert(self.awaiting.empty());
341
+ assert(self.ios_queued == 0);
342
+ assert(self.ios_in_kernel == 0);
343
+ }
344
+
345
+ fn cancel_all_callback(self: *IO, completion: *Completion, result: CancelError!void) void {
346
+ assert(self.cancel_all_status == .queued);
347
+ assert(completion == &self.cancel_completion);
348
+ assert(completion.operation == .cancel);
349
+ assert(completion.operation.cancel.target == self.cancel_all_status.queued.target);
350
+
351
+ self.cancel_all_status = status: {
352
+ result catch |err| switch (err) {
353
+ error.NotRunning => break :status .next,
354
+ error.NotInterruptable => {},
355
+ error.Unexpected => unreachable,
356
+ };
357
+ // Wait for the target operation to complete or abort.
358
+ break :status .{ .wait = .{ .target = self.cancel_all_status.queued.target } };
359
+ };
360
+ }
361
+
362
+ pub const CancelError = error{
363
+ NotRunning,
364
+ NotInterruptable,
365
+ } || posix.UnexpectedError;
366
+
367
+ pub fn cancel(
368
+ self: *IO,
369
+ comptime Context: type,
370
+ context: Context,
371
+ comptime callback: fn (
372
+ context: Context,
373
+ completion: *Completion,
374
+ result: CancelError!void,
375
+ ) void,
376
+ options: struct {
377
+ completion: *Completion,
378
+ target: *Completion,
379
+ },
380
+ ) void {
381
+ options.completion.* = .{
382
+ .io = self,
383
+ .context = context,
384
+ .callback = erase_types(Context, CancelError!void, callback),
385
+ .operation = .{ .cancel = .{ .target = options.target } },
386
+ };
387
+
388
+ self.enqueue(options.completion);
389
+ }
390
+
391
+ /// This struct holds the data needed for a single io_uring operation.
392
+ pub const Completion = struct {
393
+ io: *IO,
394
+ result: i32 = undefined,
395
+ link: QueueType(Completion).Link = .{},
396
+ operation: Operation,
397
+ context: ?*anyopaque,
398
+ callback: *const fn (
399
+ context: ?*anyopaque,
400
+ completion: *Completion,
401
+ result: *const anyopaque,
402
+ ) void,
403
+
404
+ /// Used by the `IO.awaiting` doubly-linked list.
405
+ awaiting_back: ?*Completion = null,
406
+ awaiting_next: ?*Completion = null,
407
+
408
+ fn prep(completion: *Completion, sqe: *io_uring_sqe) void {
409
+ switch (completion.operation) {
410
+ .cancel => |op| {
411
+ sqe.prep_cancel(@intFromPtr(op.target), 0);
412
+ },
413
+ .accept => |*op| {
414
+ sqe.prep_accept(
415
+ op.socket,
416
+ &op.address,
417
+ &op.address_size,
418
+ posix.SOCK.CLOEXEC,
419
+ );
420
+ },
421
+ .close => |op| {
422
+ sqe.prep_close(op.fd);
423
+ },
424
+ .connect => |*op| {
425
+ sqe.prep_connect(
426
+ op.socket,
427
+ &op.address.any,
428
+ op.address.getOsSockLen(),
429
+ );
430
+ },
431
+ .fsync => |op| {
432
+ sqe.prep_fsync(op.fd, op.flags);
433
+ },
434
+ .openat => |op| {
435
+ sqe.prep_openat(
436
+ op.dir_fd,
437
+ op.file_path,
438
+ op.flags,
439
+ op.mode,
440
+ );
441
+ },
442
+ .read => |op| {
443
+ sqe.prep_read(
444
+ op.fd,
445
+ op.buffer[0..buffer_limit(op.buffer.len)],
446
+ op.offset,
447
+ );
448
+ },
449
+ .recv => |op| {
450
+ sqe.prep_recv(op.socket, op.buffer, 0);
451
+ },
452
+ .send => |op| {
453
+ sqe.prep_send(op.socket, op.buffer, posix.MSG.NOSIGNAL);
454
+ },
455
+ .statx => |op| {
456
+ sqe.prep_statx(
457
+ op.dir_fd,
458
+ op.file_path,
459
+ op.flags,
460
+ op.mask,
461
+ op.statxbuf,
462
+ );
463
+ },
464
+ .timeout => |*op| {
465
+ sqe.prep_timeout(&op.timespec, 0, 0);
466
+ },
467
+ .write => |op| {
468
+ sqe.prep_write(
469
+ op.fd,
470
+ op.buffer[0..buffer_limit(op.buffer.len)],
471
+ op.offset,
472
+ );
473
+ },
474
+ }
475
+ sqe.user_data = @intFromPtr(completion);
476
+ }
477
+
478
+ fn complete(completion: *Completion) void {
479
+ switch (completion.operation) {
480
+ .cancel => {
481
+ const result: CancelError!void = result: {
482
+ if (completion.result < 0) {
483
+ break :result switch (@as(posix.E, @enumFromInt(-completion.result))) {
484
+ // No operation matching the completion is queued, so there is
485
+ // nothing to cancel.
486
+ .NOENT => error.NotRunning,
487
+ // The operation as far enough along that it cannot be canceled.
488
+ // It should complete soon.
489
+ .ALREADY => error.NotInterruptable,
490
+ // SQE is invalid.
491
+ .INVAL => unreachable,
492
+ else => |errno| stdx.unexpected_errno("cancel", errno),
493
+ };
494
+ }
495
+ };
496
+ completion.callback(completion.context, completion, &result);
497
+ },
498
+ .accept => {
499
+ const result: AcceptError!socket_t = blk: {
500
+ if (completion.result < 0) {
501
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
502
+ .INTR => {
503
+ completion.io.enqueue(completion);
504
+ return;
505
+ },
506
+ .AGAIN => error.WouldBlock,
507
+ .BADF => error.FileDescriptorInvalid,
508
+ .CONNABORTED => error.ConnectionAborted,
509
+ .FAULT => unreachable,
510
+ .INVAL => error.SocketNotListening,
511
+ .MFILE => error.ProcessFdQuotaExceeded,
512
+ .NFILE => error.SystemFdQuotaExceeded,
513
+ .NOBUFS => error.SystemResources,
514
+ .NOMEM => error.SystemResources,
515
+ .NOTSOCK => error.FileDescriptorNotASocket,
516
+ .OPNOTSUPP => error.OperationNotSupported,
517
+ .PERM => error.PermissionDenied,
518
+ .PROTO => error.ProtocolFailure,
519
+ else => |errno| stdx.unexpected_errno("accept", errno),
520
+ };
521
+ break :blk err;
522
+ } else {
523
+ break :blk @intCast(completion.result);
524
+ }
525
+ };
526
+ completion.callback(completion.context, completion, &result);
527
+ },
528
+ .close => {
529
+ const result: CloseError!void = blk: {
530
+ if (completion.result < 0) {
531
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
532
+ // A success, see https://github.com/ziglang/zig/issues/2425.
533
+ .INTR => {},
534
+ .BADF => error.FileDescriptorInvalid,
535
+ .DQUOT => error.DiskQuota,
536
+ .IO => error.InputOutput,
537
+ .NOSPC => error.NoSpaceLeft,
538
+ else => |errno| stdx.unexpected_errno("close", errno),
539
+ };
540
+ break :blk err;
541
+ } else {
542
+ assert(completion.result == 0);
543
+ }
544
+ };
545
+ completion.callback(completion.context, completion, &result);
546
+ },
547
+ .connect => {
548
+ const result: ConnectError!void = blk: {
549
+ if (completion.result < 0) {
550
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
551
+ .INTR => {
552
+ completion.io.enqueue(completion);
553
+ return;
554
+ },
555
+ .ACCES => error.AccessDenied,
556
+ .ADDRINUSE => error.AddressInUse,
557
+ .ADDRNOTAVAIL => error.AddressNotAvailable,
558
+ .AFNOSUPPORT => error.AddressFamilyNotSupported,
559
+ .AGAIN, .INPROGRESS => error.WouldBlock,
560
+ .ALREADY => error.OpenAlreadyInProgress,
561
+ .BADF => error.FileDescriptorInvalid,
562
+ .CANCELED => error.Canceled,
563
+ .CONNREFUSED => error.ConnectionRefused,
564
+ .CONNRESET => error.ConnectionResetByPeer,
565
+ .FAULT => unreachable,
566
+ .ISCONN => error.AlreadyConnected,
567
+ .NETUNREACH => error.NetworkUnreachable,
568
+ .HOSTUNREACH => error.HostUnreachable,
569
+ .NOENT => error.FileNotFound,
570
+ .NOTSOCK => error.FileDescriptorNotASocket,
571
+ .PERM => error.PermissionDenied,
572
+ .PROTOTYPE => error.ProtocolNotSupported,
573
+ .TIMEDOUT => error.ConnectionTimedOut,
574
+ else => |errno| stdx.unexpected_errno("connect", errno),
575
+ };
576
+ break :blk err;
577
+ } else {
578
+ assert(completion.result == 0);
579
+ }
580
+ };
581
+ completion.callback(completion.context, completion, &result);
582
+ },
583
+ .fsync => {
584
+ const result: anyerror!void = blk: {
585
+ if (completion.result < 0) {
586
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
587
+ .INTR => {
588
+ completion.io.enqueue(completion);
589
+ return;
590
+ },
591
+ .BADF => error.FileDescriptorInvalid,
592
+ .IO => error.InputOutput,
593
+ .INVAL => unreachable,
594
+ else => |errno| stdx.unexpected_errno("fsync", errno),
595
+ };
596
+ break :blk err;
597
+ } else {
598
+ assert(completion.result == 0);
599
+ }
600
+ };
601
+ completion.callback(completion.context, completion, &result);
602
+ },
603
+ .openat => {
604
+ const result: OpenatError!fd_t = blk: {
605
+ if (completion.result < 0) {
606
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
607
+ .INTR => {
608
+ completion.io.enqueue(completion);
609
+ return;
610
+ },
611
+ .FAULT => unreachable,
612
+ .INVAL => unreachable,
613
+ .BADF => unreachable,
614
+ .ACCES => error.AccessDenied,
615
+ .FBIG => error.FileTooBig,
616
+ .OVERFLOW => error.FileTooBig,
617
+ .ISDIR => error.IsDir,
618
+ .LOOP => error.SymLinkLoop,
619
+ .MFILE => error.ProcessFdQuotaExceeded,
620
+ .NAMETOOLONG => error.NameTooLong,
621
+ .NFILE => error.SystemFdQuotaExceeded,
622
+ .NODEV => error.NoDevice,
623
+ .NOENT => error.FileNotFound,
624
+ .NOMEM => error.SystemResources,
625
+ .NOSPC => error.NoSpaceLeft,
626
+ .NOTDIR => error.NotDir,
627
+ .PERM => error.AccessDenied,
628
+ .EXIST => error.PathAlreadyExists,
629
+ .BUSY => error.DeviceBusy,
630
+ .OPNOTSUPP => error.FileLocksNotSupported,
631
+ .AGAIN => error.WouldBlock,
632
+ .TXTBSY => error.FileBusy,
633
+ else => |errno| stdx.unexpected_errno("openat", errno),
634
+ };
635
+ break :blk err;
636
+ } else {
637
+ break :blk @intCast(completion.result);
638
+ }
639
+ };
640
+ completion.callback(completion.context, completion, &result);
641
+ },
642
+ .read => {
643
+ const result: ReadError!usize = blk: {
644
+ if (completion.result < 0) {
645
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
646
+ .INTR, .AGAIN => {
647
+ // Some file systems, like XFS, can return EAGAIN even when
648
+ // reading from a blocking file without flags like RWF_NOWAIT.
649
+ completion.io.enqueue(completion);
650
+ return;
651
+ },
652
+ .BADF => error.NotOpenForReading,
653
+ .CONNRESET => error.ConnectionResetByPeer,
654
+ .FAULT => unreachable,
655
+ .INVAL => error.Alignment,
656
+ .IO => error.InputOutput,
657
+ .ISDIR => error.IsDir,
658
+ .NOBUFS => error.SystemResources,
659
+ .NOMEM => error.SystemResources,
660
+ .NXIO => error.Unseekable,
661
+ .OVERFLOW => error.Unseekable,
662
+ .SPIPE => error.Unseekable,
663
+ .TIMEDOUT => error.ConnectionTimedOut,
664
+ else => |errno| stdx.unexpected_errno("read", errno),
665
+ };
666
+ break :blk err;
667
+ } else {
668
+ break :blk @intCast(completion.result);
669
+ }
670
+ };
671
+ completion.callback(completion.context, completion, &result);
672
+ },
673
+ .recv => {
674
+ const result: RecvError!usize = blk: {
675
+ if (completion.result < 0) {
676
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
677
+ .INTR => {
678
+ completion.io.enqueue(completion);
679
+ return;
680
+ },
681
+ .AGAIN => error.WouldBlock,
682
+ .BADF => error.FileDescriptorInvalid,
683
+ .CANCELED => error.Canceled,
684
+ .CONNREFUSED => error.ConnectionRefused,
685
+ .FAULT => unreachable,
686
+ .INVAL => unreachable,
687
+ .NOMEM => error.SystemResources,
688
+ .NOTCONN => error.SocketNotConnected,
689
+ .NOTSOCK => error.FileDescriptorNotASocket,
690
+ .CONNRESET => error.ConnectionResetByPeer,
691
+ .TIMEDOUT => error.ConnectionTimedOut,
692
+ .OPNOTSUPP => error.OperationNotSupported,
693
+ else => |errno| stdx.unexpected_errno("recv", errno),
694
+ };
695
+ break :blk err;
696
+ } else {
697
+ break :blk @intCast(completion.result);
698
+ }
699
+ };
700
+ completion.callback(completion.context, completion, &result);
701
+ },
702
+ .send => {
703
+ const result: SendError!usize = blk: {
704
+ if (completion.result < 0) {
705
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
706
+ .INTR => {
707
+ completion.io.enqueue(completion);
708
+ return;
709
+ },
710
+ .ACCES => error.AccessDenied,
711
+ .AGAIN => error.WouldBlock,
712
+ .ALREADY => error.FastOpenAlreadyInProgress,
713
+ .AFNOSUPPORT => error.AddressFamilyNotSupported,
714
+ .BADF => error.FileDescriptorInvalid,
715
+ // Can happen when send()'ing to a UDP socket.
716
+ .CONNREFUSED => error.ConnectionRefused,
717
+ .CONNRESET => error.ConnectionResetByPeer,
718
+ .DESTADDRREQ => unreachable,
719
+ .FAULT => unreachable,
720
+ .INVAL => unreachable,
721
+ .ISCONN => unreachable,
722
+ .MSGSIZE => error.MessageTooBig,
723
+ .NOBUFS => error.SystemResources,
724
+ .NOMEM => error.SystemResources,
725
+ .NOTCONN => error.SocketNotConnected,
726
+ .NOTSOCK => error.FileDescriptorNotASocket,
727
+ .OPNOTSUPP => error.OperationNotSupported,
728
+ .PIPE => error.BrokenPipe,
729
+ .TIMEDOUT => error.ConnectionTimedOut,
730
+ .CANCELED => error.Canceled,
731
+ else => |errno| stdx.unexpected_errno("send", errno),
732
+ };
733
+ break :blk err;
734
+ } else {
735
+ break :blk @intCast(completion.result);
736
+ }
737
+ };
738
+ completion.callback(completion.context, completion, &result);
739
+ },
740
+ .statx => {
741
+ const result: StatxError!void = blk: {
742
+ if (completion.result < 0) {
743
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
744
+ .INTR => {
745
+ completion.io.enqueue(completion);
746
+ return;
747
+ },
748
+ .FAULT => unreachable,
749
+ .INVAL => unreachable,
750
+ .BADF => unreachable,
751
+ .ACCES => error.AccessDenied,
752
+ .LOOP => error.SymLinkLoop,
753
+ .NAMETOOLONG => error.NameTooLong,
754
+ .NOENT => error.FileNotFound,
755
+ .NOMEM => error.SystemResources,
756
+ .NOTDIR => error.NotDir,
757
+ else => |errno| stdx.unexpected_errno("statx", errno),
758
+ };
759
+ break :blk err;
760
+ } else {
761
+ assert(completion.result == 0);
762
+ }
763
+ };
764
+ completion.callback(completion.context, completion, &result);
765
+ },
766
+ .timeout => {
767
+ assert(completion.result < 0);
768
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
769
+ .INTR => {
770
+ completion.io.enqueue(completion);
771
+ return;
772
+ },
773
+ .CANCELED => error.Canceled,
774
+ .TIME => {}, // A success.
775
+ else => |errno| stdx.unexpected_errno("timeout", errno),
776
+ };
777
+ const result: TimeoutError!void = err;
778
+ completion.callback(completion.context, completion, &result);
779
+ },
780
+ .write => {
781
+ const result: WriteError!usize = blk: {
782
+ if (completion.result < 0) {
783
+ const err = switch (@as(posix.E, @enumFromInt(-completion.result))) {
784
+ .INTR => {
785
+ completion.io.enqueue(completion);
786
+ return;
787
+ },
788
+ .AGAIN => error.WouldBlock,
789
+ .BADF => error.NotOpenForWriting,
790
+ .DESTADDRREQ => error.NotConnected,
791
+ .DQUOT => error.DiskQuota,
792
+ .FAULT => unreachable,
793
+ .FBIG => error.FileTooBig,
794
+ .INVAL => error.Alignment,
795
+ .IO => error.InputOutput,
796
+ .NOSPC => error.NoSpaceLeft,
797
+ .NXIO => error.Unseekable,
798
+ .OVERFLOW => error.Unseekable,
799
+ .PERM => error.AccessDenied,
800
+ .PIPE => error.BrokenPipe,
801
+ .SPIPE => error.Unseekable,
802
+ else => |errno| stdx.unexpected_errno("write", errno),
803
+ };
804
+ break :blk err;
805
+ } else {
806
+ break :blk @intCast(completion.result);
807
+ }
808
+ };
809
+ completion.callback(completion.context, completion, &result);
810
+ },
811
+ }
812
+ }
813
+ };
814
+
815
+ /// This union encodes the set of operations supported as well as their arguments.
816
+ const Operation = union(enum) {
817
+ cancel: struct {
818
+ target: *Completion,
819
+ },
820
+ accept: struct {
821
+ socket: socket_t,
822
+ address: posix.sockaddr = undefined,
823
+ address_size: posix.socklen_t = @sizeOf(posix.sockaddr),
824
+ },
825
+ close: struct {
826
+ fd: fd_t,
827
+ },
828
+ connect: struct {
829
+ socket: socket_t,
830
+ address: std.net.Address,
831
+ },
832
+ fsync: struct {
833
+ fd: fd_t,
834
+ flags: u32,
835
+ },
836
+ openat: struct {
837
+ dir_fd: fd_t,
838
+ file_path: [*:0]const u8,
839
+ flags: posix.O,
840
+ mode: posix.mode_t,
841
+ },
842
+ read: struct {
843
+ fd: fd_t,
844
+ buffer: []u8,
845
+ offset: u64,
846
+ },
847
+ recv: struct {
848
+ socket: socket_t,
849
+ buffer: []u8,
850
+ },
851
+ send: struct {
852
+ socket: socket_t,
853
+ buffer: []const u8,
854
+ },
855
+ statx: struct {
856
+ dir_fd: fd_t,
857
+ file_path: [*:0]const u8,
858
+ flags: u32,
859
+ mask: u32,
860
+ statxbuf: *std.os.linux.Statx,
861
+ },
862
+ timeout: struct {
863
+ timespec: os.linux.kernel_timespec,
864
+ },
865
+ write: struct {
866
+ fd: fd_t,
867
+ buffer: []const u8,
868
+ offset: u64,
869
+ },
870
+ };
871
+
872
+ pub const AcceptError = error{
873
+ WouldBlock,
874
+ FileDescriptorInvalid,
875
+ ConnectionAborted,
876
+ SocketNotListening,
877
+ ProcessFdQuotaExceeded,
878
+ SystemFdQuotaExceeded,
879
+ SystemResources,
880
+ FileDescriptorNotASocket,
881
+ OperationNotSupported,
882
+ PermissionDenied,
883
+ ProtocolFailure,
884
+ } || posix.UnexpectedError;
885
+
886
+ pub fn accept(
887
+ self: *IO,
888
+ comptime Context: type,
889
+ context: Context,
890
+ comptime callback: fn (
891
+ context: Context,
892
+ completion: *Completion,
893
+ result: AcceptError!socket_t,
894
+ ) void,
895
+ completion: *Completion,
896
+ socket: socket_t,
897
+ ) void {
898
+ completion.* = .{
899
+ .io = self,
900
+ .context = context,
901
+ .callback = erase_types(Context, AcceptError!socket_t, callback),
902
+ .operation = .{
903
+ .accept = .{
904
+ .socket = socket,
905
+ .address = undefined,
906
+ .address_size = @sizeOf(posix.sockaddr),
907
+ },
908
+ },
909
+ };
910
+ self.enqueue(completion);
911
+ }
912
+
913
+ pub const CloseError = error{
914
+ FileDescriptorInvalid,
915
+ DiskQuota,
916
+ InputOutput,
917
+ NoSpaceLeft,
918
+ } || posix.UnexpectedError;
919
+
920
+ pub fn close(
921
+ self: *IO,
922
+ comptime Context: type,
923
+ context: Context,
924
+ comptime callback: fn (
925
+ context: Context,
926
+ completion: *Completion,
927
+ result: CloseError!void,
928
+ ) void,
929
+ completion: *Completion,
930
+ fd: fd_t,
931
+ ) void {
932
+ completion.* = .{
933
+ .io = self,
934
+ .context = context,
935
+ .callback = erase_types(Context, CloseError!void, callback),
936
+ .operation = .{
937
+ .close = .{ .fd = fd },
938
+ },
939
+ };
940
+ self.enqueue(completion);
941
+ }
942
+
943
+ pub const ConnectError = error{
944
+ AccessDenied,
945
+ AddressInUse,
946
+ AddressNotAvailable,
947
+ AddressFamilyNotSupported,
948
+ WouldBlock,
949
+ OpenAlreadyInProgress,
950
+ FileDescriptorInvalid,
951
+ ConnectionRefused,
952
+ ConnectionResetByPeer,
953
+ AlreadyConnected,
954
+ NetworkUnreachable,
955
+ HostUnreachable,
956
+ FileNotFound,
957
+ FileDescriptorNotASocket,
958
+ PermissionDenied,
959
+ ProtocolNotSupported,
960
+ ConnectionTimedOut,
961
+ SystemResources,
962
+ Canceled,
963
+ } || posix.UnexpectedError;
964
+
965
+ pub fn connect(
966
+ self: *IO,
967
+ comptime Context: type,
968
+ context: Context,
969
+ comptime callback: fn (
970
+ context: Context,
971
+ completion: *Completion,
972
+ result: ConnectError!void,
973
+ ) void,
974
+ completion: *Completion,
975
+ socket: socket_t,
976
+ address: std.net.Address,
977
+ ) void {
978
+ completion.* = .{
979
+ .io = self,
980
+ .context = context,
981
+ .callback = erase_types(Context, ConnectError!void, callback),
982
+ .operation = .{
983
+ .connect = .{
984
+ .socket = socket,
985
+ .address = address,
986
+ },
987
+ },
988
+ };
989
+ self.enqueue(completion);
990
+ }
991
+
992
+ pub const FsyncError = error{
993
+ FileDescriptorInvalid,
994
+ InputOutput,
995
+ } || posix.UnexpectedError;
996
+
997
+ pub fn fsync(
998
+ self: *IO,
999
+ comptime Context: type,
1000
+ context: Context,
1001
+ comptime callback: fn (
1002
+ context: Context,
1003
+ completion: *Completion,
1004
+ result: FsyncError!void,
1005
+ ) void,
1006
+ completion: *Completion,
1007
+ fd: fd_t,
1008
+ ) void {
1009
+ completion.* = .{
1010
+ .io = self,
1011
+ .context = context,
1012
+ .callback = erase_types(Context, FsyncError!void, callback),
1013
+ .operation = .{
1014
+ .fsync = .{
1015
+ .fd = fd,
1016
+ .flags = os.linux.IORING_FSYNC_DATASYNC,
1017
+ },
1018
+ },
1019
+ };
1020
+ self.enqueue(completion);
1021
+ }
1022
+
1023
+ pub const OpenatError = posix.OpenError || posix.UnexpectedError;
1024
+
1025
+ pub fn openat(
1026
+ self: *IO,
1027
+ comptime Context: type,
1028
+ context: Context,
1029
+ comptime callback: fn (
1030
+ context: Context,
1031
+ completion: *Completion,
1032
+ result: OpenatError!fd_t,
1033
+ ) void,
1034
+ completion: *Completion,
1035
+ dir_fd: fd_t,
1036
+ file_path: [*:0]const u8,
1037
+ flags: posix.O,
1038
+ mode: posix.mode_t,
1039
+ ) void {
1040
+ var new_flags = flags;
1041
+ new_flags.CLOEXEC = true;
1042
+
1043
+ completion.* = .{
1044
+ .io = self,
1045
+ .context = context,
1046
+ .callback = erase_types(Context, OpenatError!fd_t, callback),
1047
+ .operation = .{
1048
+ .openat = .{
1049
+ .dir_fd = dir_fd,
1050
+ .file_path = file_path,
1051
+ .flags = new_flags,
1052
+ .mode = mode,
1053
+ },
1054
+ },
1055
+ };
1056
+ self.enqueue(completion);
1057
+ }
1058
+
1059
+ pub const ReadError = error{
1060
+ WouldBlock,
1061
+ NotOpenForReading,
1062
+ ConnectionResetByPeer,
1063
+ Alignment,
1064
+ InputOutput,
1065
+ IsDir,
1066
+ SystemResources,
1067
+ Unseekable,
1068
+ ConnectionTimedOut,
1069
+ } || posix.UnexpectedError;
1070
+
1071
+ pub fn read(
1072
+ self: *IO,
1073
+ comptime Context: type,
1074
+ context: Context,
1075
+ comptime callback: fn (
1076
+ context: Context,
1077
+ completion: *Completion,
1078
+ result: ReadError!usize,
1079
+ ) void,
1080
+ completion: *Completion,
1081
+ fd: fd_t,
1082
+ buffer: []u8,
1083
+ offset: u64,
1084
+ ) void {
1085
+ completion.* = .{
1086
+ .io = self,
1087
+ .context = context,
1088
+ .callback = erase_types(Context, ReadError!usize, callback),
1089
+ .operation = .{
1090
+ .read = .{
1091
+ .fd = fd,
1092
+ .buffer = buffer,
1093
+ .offset = offset,
1094
+ },
1095
+ },
1096
+ };
1097
+ self.enqueue(completion);
1098
+ }
1099
+
1100
+ pub const RecvError = error{
1101
+ WouldBlock,
1102
+ FileDescriptorInvalid,
1103
+ ConnectionRefused,
1104
+ SystemResources,
1105
+ SocketNotConnected,
1106
+ FileDescriptorNotASocket,
1107
+ ConnectionResetByPeer,
1108
+ ConnectionTimedOut,
1109
+ OperationNotSupported,
1110
+ Canceled,
1111
+ } || posix.UnexpectedError;
1112
+
1113
+ pub fn recv(
1114
+ self: *IO,
1115
+ comptime Context: type,
1116
+ context: Context,
1117
+ comptime callback: fn (
1118
+ context: Context,
1119
+ completion: *Completion,
1120
+ result: RecvError!usize,
1121
+ ) void,
1122
+ completion: *Completion,
1123
+ socket: socket_t,
1124
+ buffer: []u8,
1125
+ ) void {
1126
+ completion.* = .{
1127
+ .io = self,
1128
+ .context = context,
1129
+ .callback = erase_types(Context, RecvError!usize, callback),
1130
+ .operation = .{
1131
+ .recv = .{
1132
+ .socket = socket,
1133
+ .buffer = buffer,
1134
+ },
1135
+ },
1136
+ };
1137
+ self.enqueue(completion);
1138
+ }
1139
+
1140
+ pub const SendError = error{
1141
+ AccessDenied,
1142
+ WouldBlock,
1143
+ FastOpenAlreadyInProgress,
1144
+ AddressFamilyNotSupported,
1145
+ FileDescriptorInvalid,
1146
+ ConnectionResetByPeer,
1147
+ MessageTooBig,
1148
+ SystemResources,
1149
+ SocketNotConnected,
1150
+ FileDescriptorNotASocket,
1151
+ OperationNotSupported,
1152
+ BrokenPipe,
1153
+ ConnectionTimedOut,
1154
+ ConnectionRefused,
1155
+ Canceled,
1156
+ } || posix.UnexpectedError;
1157
+
1158
+ pub fn send(
1159
+ self: *IO,
1160
+ comptime Context: type,
1161
+ context: Context,
1162
+ comptime callback: fn (
1163
+ context: Context,
1164
+ completion: *Completion,
1165
+ result: SendError!usize,
1166
+ ) void,
1167
+ completion: *Completion,
1168
+ socket: socket_t,
1169
+ buffer: []const u8,
1170
+ ) void {
1171
+ completion.* = .{
1172
+ .io = self,
1173
+ .context = context,
1174
+ .callback = erase_types(Context, SendError!usize, callback),
1175
+ .operation = .{
1176
+ .send = .{
1177
+ .socket = socket,
1178
+ .buffer = buffer,
1179
+ },
1180
+ },
1181
+ };
1182
+ self.enqueue(completion);
1183
+ }
1184
+
1185
+ /// Best effort to synchronously transfer bytes to the kernel.
1186
+ pub fn send_now(self: *IO, socket: socket_t, buffer: []const u8) ?usize {
1187
+ _ = self;
1188
+ // posix.send is a thin wrapper around posix.sendto() that assumes the socket is connected
1189
+ // and has an `unreachable` on eg NetworkUnreachable and a few others. Tring to check this
1190
+ // before using the socket is race prone, so rather use sendto() directly to correctly
1191
+ // handle those cases.
1192
+ return posix.sendto(
1193
+ socket,
1194
+ buffer,
1195
+ posix.MSG.DONTWAIT | posix.MSG.NOSIGNAL,
1196
+ null,
1197
+ 0,
1198
+ ) catch |err| switch (err) {
1199
+ error.WouldBlock => return null,
1200
+ // To avoid duplicating error handling, force the caller to fallback to normal send.
1201
+ else => return null,
1202
+ };
1203
+ }
1204
+
1205
+ pub const StatxError = error{
1206
+ SymLinkLoop,
1207
+ FileNotFound,
1208
+ NameTooLong,
1209
+ NotDir,
1210
+ } || std.fs.File.StatError || posix.UnexpectedError;
1211
+
1212
+ pub fn statx(
1213
+ self: *IO,
1214
+ comptime Context: type,
1215
+ context: Context,
1216
+ comptime callback: fn (
1217
+ context: Context,
1218
+ completion: *Completion,
1219
+ result: StatxError!void,
1220
+ ) void,
1221
+ completion: *Completion,
1222
+ dir_fd: fd_t,
1223
+ file_path: [*:0]const u8,
1224
+ flags: u32,
1225
+ mask: u32,
1226
+ statxbuf: *std.os.linux.Statx,
1227
+ ) void {
1228
+ completion.* = .{
1229
+ .io = self,
1230
+ .context = context,
1231
+ .callback = erase_types(Context, StatxError!void, callback),
1232
+ .operation = .{
1233
+ .statx = .{
1234
+ .dir_fd = dir_fd,
1235
+ .file_path = file_path,
1236
+ .flags = flags,
1237
+ .mask = mask,
1238
+ .statxbuf = statxbuf,
1239
+ },
1240
+ },
1241
+ };
1242
+ self.enqueue(completion);
1243
+ }
1244
+
1245
+ pub const TimeoutError = error{Canceled} || posix.UnexpectedError;
1246
+
1247
+ pub fn timeout(
1248
+ self: *IO,
1249
+ comptime Context: type,
1250
+ context: Context,
1251
+ comptime callback: fn (
1252
+ context: Context,
1253
+ completion: *Completion,
1254
+ result: TimeoutError!void,
1255
+ ) void,
1256
+ completion: *Completion,
1257
+ nanoseconds: u63,
1258
+ ) void {
1259
+ completion.* = .{
1260
+ .io = self,
1261
+ .context = context,
1262
+ .callback = erase_types(Context, TimeoutError!void, callback),
1263
+ .operation = .{
1264
+ .timeout = .{
1265
+ .timespec = .{ .sec = 0, .nsec = nanoseconds },
1266
+ },
1267
+ },
1268
+ };
1269
+
1270
+ // Special case a zero timeout as a yield.
1271
+ if (nanoseconds == 0) {
1272
+ completion.result = -@as(i32, @intFromEnum(posix.E.TIME));
1273
+ self.completed.push(completion);
1274
+ return;
1275
+ }
1276
+
1277
+ self.enqueue(completion);
1278
+ }
1279
+
1280
+ pub const WriteError = error{
1281
+ WouldBlock,
1282
+ NotOpenForWriting,
1283
+ NotConnected,
1284
+ DiskQuota,
1285
+ FileTooBig,
1286
+ Alignment,
1287
+ InputOutput,
1288
+ NoSpaceLeft,
1289
+ Unseekable,
1290
+ AccessDenied,
1291
+ BrokenPipe,
1292
+ } || posix.UnexpectedError;
1293
+
1294
+ pub fn write(
1295
+ self: *IO,
1296
+ comptime Context: type,
1297
+ context: Context,
1298
+ comptime callback: fn (
1299
+ context: Context,
1300
+ completion: *Completion,
1301
+ result: WriteError!usize,
1302
+ ) void,
1303
+ completion: *Completion,
1304
+ fd: fd_t,
1305
+ buffer: []const u8,
1306
+ offset: u64,
1307
+ ) void {
1308
+ completion.* = .{
1309
+ .io = self,
1310
+ .context = context,
1311
+ .callback = erase_types(Context, WriteError!usize, callback),
1312
+ .operation = .{
1313
+ .write = .{
1314
+ .fd = fd,
1315
+ .buffer = buffer,
1316
+ .offset = offset,
1317
+ },
1318
+ },
1319
+ };
1320
+ self.enqueue(completion);
1321
+ }
1322
+
1323
+ pub const Event = posix.fd_t;
1324
+ pub const INVALID_EVENT: Event = -1;
1325
+
1326
+ pub fn open_event(self: *IO) !Event {
1327
+ _ = self;
1328
+
1329
+ // eventfd initialized with no (zero) previous write value.
1330
+ const event_fd = posix.eventfd(0, linux.EFD.CLOEXEC) catch |err| switch (err) {
1331
+ error.SystemResources,
1332
+ error.SystemFdQuotaExceeded,
1333
+ error.ProcessFdQuotaExceeded,
1334
+ => return error.SystemResources,
1335
+ error.Unexpected => return error.Unexpected,
1336
+ };
1337
+ assert(event_fd != INVALID_EVENT);
1338
+ errdefer os.close(event_fd);
1339
+
1340
+ return event_fd;
1341
+ }
1342
+
1343
+ pub fn event_listen(
1344
+ self: *IO,
1345
+ event: Event,
1346
+ completion: *Completion,
1347
+ comptime on_event: fn (*Completion) void,
1348
+ ) void {
1349
+ assert(event != INVALID_EVENT);
1350
+ const Context = struct {
1351
+ const Context = @This();
1352
+ var buffer: u64 = undefined;
1353
+
1354
+ fn on_read(
1355
+ _: *Context,
1356
+ completion_inner: *Completion,
1357
+ result: ReadError!usize,
1358
+ ) void {
1359
+ const bytes = result catch unreachable; // eventfd reads should not fail.
1360
+ assert(bytes == @sizeOf(u64));
1361
+ on_event(completion_inner);
1362
+ }
1363
+ };
1364
+
1365
+ self.read(
1366
+ *Context,
1367
+ undefined,
1368
+ Context.on_read,
1369
+ completion,
1370
+ event,
1371
+ std.mem.asBytes(&Context.buffer),
1372
+ 0, // eventfd reads must always start from 0 offset.
1373
+ );
1374
+ }
1375
+
1376
+ pub fn event_trigger(self: *IO, event: Event, completion: *Completion) void {
1377
+ assert(event != INVALID_EVENT);
1378
+ _ = self;
1379
+ _ = completion;
1380
+
1381
+ const value: u64 = 1;
1382
+ const bytes = posix.write(event, std.mem.asBytes(&value)) catch unreachable;
1383
+ assert(bytes == @sizeOf(u64));
1384
+ }
1385
+
1386
+ pub fn close_event(self: *IO, event: Event) void {
1387
+ assert(event != INVALID_EVENT);
1388
+ _ = self;
1389
+
1390
+ posix.close(event);
1391
+ }
1392
+
1393
+ pub const socket_t = posix.socket_t;
1394
+
1395
+ /// Creates a TCP socket that can be used for async operations with the IO instance.
1396
+ pub fn open_socket_tcp(self: *IO, family: u32, options: TCPOptions) !socket_t {
1397
+ const fd = try posix.socket(
1398
+ family,
1399
+ posix.SOCK.STREAM | posix.SOCK.CLOEXEC,
1400
+ posix.IPPROTO.TCP,
1401
+ );
1402
+ errdefer self.close_socket(fd);
1403
+
1404
+ try common.tcp_options(fd, options);
1405
+ return fd;
1406
+ }
1407
+
1408
+ /// Creates a UDP socket that can be used for async operations with the IO instance.
1409
+ pub fn open_socket_udp(self: *IO, family: u32) !socket_t {
1410
+ _ = self;
1411
+ return try posix.socket(
1412
+ family,
1413
+ std.posix.SOCK.DGRAM | posix.SOCK.CLOEXEC,
1414
+ posix.IPPROTO.UDP,
1415
+ );
1416
+ }
1417
+
1418
+ /// Closes a socket opened by the IO instance.
1419
+ pub fn close_socket(self: *IO, socket: socket_t) void {
1420
+ _ = self;
1421
+ posix.close(socket);
1422
+ }
1423
+
1424
+ /// Listen on the given TCP socket.
1425
+ /// Returns socket resolved address, which might be more specific
1426
+ /// than the input address (e.g., listening on port 0).
1427
+ pub fn listen(
1428
+ _: *IO,
1429
+ fd: socket_t,
1430
+ address: std.net.Address,
1431
+ options: ListenOptions,
1432
+ ) !std.net.Address {
1433
+ return common.listen(fd, address, options);
1434
+ }
1435
+
1436
+ pub fn shutdown(_: *IO, socket: socket_t, how: posix.ShutdownHow) posix.ShutdownError!void {
1437
+ return posix.shutdown(socket, how);
1438
+ }
1439
+
1440
+ /// Opens a directory with read only access.
1441
+ pub fn open_dir(dir_path: []const u8) !fd_t {
1442
+ return posix.open(dir_path, .{ .CLOEXEC = true, .ACCMODE = .RDONLY }, 0);
1443
+ }
1444
+
1445
+ pub const fd_t = posix.fd_t;
1446
+ pub const INVALID_FILE: fd_t = -1;
1447
+
1448
+ pub const OpenDataFilePurpose = enum { format, open, inspect };
1449
+ /// Opens or creates a journal file:
1450
+ /// - For reading and writing.
1451
+ /// - For Direct I/O (if possible in development mode, but required in production mode).
1452
+ /// - Obtains an advisory exclusive lock to the file descriptor.
1453
+ /// - Allocates the file contiguously on disk if this is supported by the file system.
1454
+ /// - Ensures that the file data (and file inode in the parent directory) is durable on disk.
1455
+ /// The caller is responsible for ensuring that the parent directory inode is durable.
1456
+ /// - Verifies that the file size matches the expected file size before returning.
1457
+ pub fn open_data_file(
1458
+ self: *IO,
1459
+ dir_fd: fd_t,
1460
+ relative_path: []const u8,
1461
+ size: u64,
1462
+ purpose: OpenDataFilePurpose,
1463
+ direct_io: DirectIO,
1464
+ ) !fd_t {
1465
+ _ = self;
1466
+
1467
+ assert(relative_path.len > 0);
1468
+ assert(size % constants.sector_size == 0);
1469
+ // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
1470
+ assert(!std.fs.path.isAbsolute(relative_path));
1471
+
1472
+ var flags: posix.O = .{
1473
+ .CLOEXEC = true,
1474
+ .ACCMODE = if (purpose == .inspect) .RDONLY else .RDWR,
1475
+ .DSYNC = true,
1476
+ };
1477
+ var mode: posix.mode_t = 0;
1478
+
1479
+ const kind: enum { file, block_device } = blk: {
1480
+ const stat = posix.fstatat(
1481
+ dir_fd,
1482
+ relative_path,
1483
+ 0,
1484
+ ) catch |err| switch (err) {
1485
+ error.FileNotFound => {
1486
+ if (purpose == .format) {
1487
+ // It's impossible to distinguish creating a new file and opening a new
1488
+ // block device with the current API. So if it's possible that we should
1489
+ // create a file we try that instead of failing here.
1490
+ break :blk .file;
1491
+ } else {
1492
+ @panic("Path does not exist.");
1493
+ }
1494
+ },
1495
+ else => |err_| return err_,
1496
+ };
1497
+ if (posix.S.ISBLK(stat.mode)) {
1498
+ break :blk .block_device;
1499
+ } else {
1500
+ if (!posix.S.ISREG(stat.mode)) {
1501
+ @panic("file path does not point to block device or regular file.");
1502
+ }
1503
+ break :blk .file;
1504
+ }
1505
+ };
1506
+
1507
+ // This is not strictly necessary on 64bit systems but it's harmless.
1508
+ // This will avoid errors with handling large files on certain configurations
1509
+ // of 32bit kernels. In all other cases, it's a noop.
1510
+ // See: <https://github.com/torvalds/linux/blob/ab27740f76654ed58dd32ac0ba0031c18a6dea3b/fs/open.c#L1602>
1511
+ if (@hasField(posix.O, "LARGEFILE")) flags.LARGEFILE = true;
1512
+
1513
+ switch (kind) {
1514
+ .block_device => {
1515
+ if (direct_io != .direct_io_disabled) {
1516
+ // Block devices should always support Direct IO.
1517
+ flags.DIRECT = true;
1518
+ // Use O_EXCL when opening as a block device to obtain an advisory exclusive
1519
+ // lock. Normally, you can't do this for files you don't create, but for
1520
+ // block devices this guarantees:
1521
+ // - that there are no mounts using this block device
1522
+ // - that no new mounts can use this block device while we have it open
1523
+ //
1524
+ // However it doesn't prevent other processes with root from opening without
1525
+ // O_EXCL and writing (mount is just a special case that always checks O_EXCL).
1526
+ //
1527
+ // This should be stronger than flock(2) locks, which work on a separate system.
1528
+ // The relevant kernel code (as of v6.7) is here:
1529
+ // <https://github.com/torvalds/linux/blob/7da71072e1d6967c0482abcbb5991ffb5953fdf2/block/bdev.c#L932>
1530
+ flags.EXCL = true;
1531
+ }
1532
+ log.info("opening block device \"{s}\"...", .{relative_path});
1533
+ },
1534
+ .file => {
1535
+ var direct_io_supported = false;
1536
+ const dir_on_tmpfs = try fs_is_tmpfs(dir_fd);
1537
+
1538
+ if (dir_on_tmpfs) {
1539
+ log.warn(
1540
+ "tmpfs is not durable, and your data will be lost on reboot",
1541
+ .{},
1542
+ );
1543
+ }
1544
+
1545
+ // Special case. tmpfs doesn't support Direct I/O. Normally we would panic
1546
+ // here (see below) but being able to benchmark production workloads
1547
+ // on tmpfs is very useful for removing disk speed from the equation.
1548
+ if (direct_io != .direct_io_disabled and !dir_on_tmpfs) {
1549
+ direct_io_supported = try fs_supports_direct_io(dir_fd);
1550
+ if (direct_io_supported) {
1551
+ flags.DIRECT = true;
1552
+ } else if (direct_io == .direct_io_optional) {
1553
+ log.warn("This file system does not support Direct I/O.", .{});
1554
+ } else {
1555
+ assert(direct_io == .direct_io_required);
1556
+ // We require Direct I/O for safety to handle fsync failure correctly, and
1557
+ // therefore panic in production if it is not supported.
1558
+ log.err("This file system does not support Direct I/O.", .{});
1559
+ log.err("TigerBeetle uses Direct I/O to bypass the kernel page cache, " ++
1560
+ "to ensure that data is durable when writes complete.", .{});
1561
+ log.err("If this is a production replica, Direct I/O is required.", .{});
1562
+ log.err("If this is a development/testing replica, " ++
1563
+ "re-run with --development set to bypass this error.", .{});
1564
+ @panic("file system does not support Direct I/O");
1565
+ }
1566
+ }
1567
+
1568
+ switch (purpose) {
1569
+ .format => {
1570
+ flags.CREAT = true;
1571
+ flags.EXCL = true;
1572
+ mode = 0o666;
1573
+ log.info("creating \"{s}\"...", .{relative_path});
1574
+ },
1575
+ .open, .inspect => {
1576
+ log.info("opening \"{s}\"...", .{relative_path});
1577
+ },
1578
+ }
1579
+ },
1580
+ }
1581
+
1582
+ // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
1583
+ assert(flags.DSYNC);
1584
+
1585
+ const fd = try posix.openat(dir_fd, relative_path, flags, mode);
1586
+ // TODO Return a proper error message when the path exists or does not exist (init/start).
1587
+ errdefer posix.close(fd);
1588
+
1589
+ {
1590
+ // Make sure we're getting the type of file descriptor we expect.
1591
+ const stat = try posix.fstat(fd);
1592
+ switch (kind) {
1593
+ .file => assert(posix.S.ISREG(stat.mode)),
1594
+ .block_device => assert(posix.S.ISBLK(stat.mode)),
1595
+ }
1596
+ }
1597
+
1598
+ // Obtain an advisory exclusive lock that works only if all processes actually use flock().
1599
+ // LOCK_NB means that we want to fail the lock without waiting if another process has it.
1600
+ //
1601
+ // This is wrapped inside a retry loop with a sleep because of the interaction between
1602
+ // io_uring semantics and flock: flocks are held per fd, but io_uring will keep a reference
1603
+ // to the fd alive even once a process has been terminated, until all async operations have
1604
+ // been completed.
1605
+ //
1606
+ // This means that when killing and starting a tigerbeetle process in an automated way, you
1607
+ // can see "another process holds the data file lock" errors, even though the process really
1608
+ // has terminated.
1609
+ const lock_acquired = blk: {
1610
+ for (0..2) |_| {
1611
+ posix.flock(fd, posix.LOCK.EX | posix.LOCK.NB) catch |err| switch (err) {
1612
+ error.WouldBlock => {
1613
+ std.time.sleep(50 * std.time.ns_per_ms);
1614
+ continue;
1615
+ },
1616
+ else => return err,
1617
+ };
1618
+ break :blk true;
1619
+ } else {
1620
+ posix.flock(fd, posix.LOCK.EX | posix.LOCK.NB) catch |err| switch (err) {
1621
+ error.WouldBlock => break :blk false,
1622
+ else => return err,
1623
+ };
1624
+ break :blk true;
1625
+ }
1626
+ };
1627
+
1628
+ if (purpose == .inspect) {
1629
+ assert(flags.ACCMODE == .RDONLY);
1630
+ maybe(lock_acquired);
1631
+
1632
+ if (!lock_acquired) {
1633
+ log.warn(
1634
+ "another process holds the data file lock - results may be inconsistent",
1635
+ .{},
1636
+ );
1637
+ }
1638
+ } else if (!lock_acquired) {
1639
+ @panic("another process holds the data file lock");
1640
+ }
1641
+
1642
+ assert(flags.ACCMODE == .RDONLY or lock_acquired);
1643
+
1644
+ // Ask the file system to allocate contiguous sectors for the file (if possible):
1645
+ // If the file system does not support `fallocate()`, then this could mean more seeks or a
1646
+ // panic if we run out of disk space (ENOSPC).
1647
+ if (purpose == .format and kind == .file) {
1648
+ log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
1649
+ fs_allocate(fd, size) catch |err| switch (err) {
1650
+ error.OperationNotSupported => {
1651
+ log.warn("file system does not support fallocate(), an ENOSPC will panic", .{});
1652
+ log.info("allocating by writing to the last sector " ++
1653
+ "of the file instead...", .{});
1654
+
1655
+ const sector_size = constants.sector_size;
1656
+ const sector: [sector_size]u8 align(sector_size) = @splat(0);
1657
+
1658
+ // Handle partial writes where the physical sector is
1659
+ // less than a logical sector:
1660
+ const write_offset = size - sector.len;
1661
+ var written: usize = 0;
1662
+ while (written < sector.len) {
1663
+ written += try posix.pwrite(fd, sector[written..], write_offset + written);
1664
+ }
1665
+ },
1666
+ else => |e| return e,
1667
+ };
1668
+ }
1669
+
1670
+ // The best fsync strategy is always to fsync before reading because this prevents us from
1671
+ // making decisions on data that was never durably written by a previously crashed process.
1672
+ // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
1673
+ // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
1674
+ try posix.fsync(fd);
1675
+
1676
+ // We fsync the parent directory to ensure that the file inode is durably written.
1677
+ // The caller is responsible for the parent directory inode stored under the grandparent.
1678
+ // We always do this when opening because we don't know if this was done before crashing.
1679
+ try posix.fsync(dir_fd);
1680
+
1681
+ switch (kind) {
1682
+ .file => {
1683
+ if ((try posix.fstat(fd)).size < size) {
1684
+ @panic("data file inode size was truncated or corrupted");
1685
+ }
1686
+ },
1687
+ .block_device => {
1688
+ const BLKGETSIZE64 = os.linux.IOCTL.IOR(0x12, 114, usize);
1689
+ var block_device_size: usize = 0;
1690
+
1691
+ switch (os.linux.E.init(os.linux.ioctl(
1692
+ fd,
1693
+ BLKGETSIZE64,
1694
+ @intFromPtr(&block_device_size),
1695
+ ))) {
1696
+ .SUCCESS => {},
1697
+
1698
+ // These are the only errors that are supposed to be possible from ioctl(2).
1699
+ .BADF => return error.InvalidFileDescriptor,
1700
+ .NOTTY => return error.BadRequest,
1701
+ .FAULT => return error.InvalidAddress,
1702
+ else => |err| return stdx.unexpected_errno("open_file:ioctl", err),
1703
+ }
1704
+
1705
+ if (block_device_size < size) {
1706
+ std.debug.panic(
1707
+ "The block device used is too small ({} available/{} needed).",
1708
+ .{
1709
+ std.fmt.fmtIntSizeBin(block_device_size),
1710
+ std.fmt.fmtIntSizeBin(size),
1711
+ },
1712
+ );
1713
+ }
1714
+
1715
+ if (purpose == .format) {
1716
+ // Check that the first superblock_zone_size bytes are 0.
1717
+ // - It'll ensure that the block device is not directly TigerBeetle.
1718
+ // - It'll be very likely to catch any cases where there's an existing
1719
+ // other filesystem.
1720
+ // - In the case of there being a partition table (eg, two partitions,
1721
+ // one starting at 0MiB, one at 1024MiB) and the operator tries to format
1722
+ // the raw disk (/dev/sda) while a partition later is
1723
+ // TigerBeetle (/dev/sda2) it'll be blocked by the MBR/GPT existing.
1724
+ const superblock_zone_size =
1725
+ @import("../vsr/superblock.zig").superblock_zone_size;
1726
+ var read_buf: [superblock_zone_size]u8 align(constants.sector_size) = undefined;
1727
+
1728
+ // We can do this without worrying about retrying partial reads because on
1729
+ // linux, read(2) on block devices can not be interrupted by signals.
1730
+ // See signal(7).
1731
+ assert(superblock_zone_size == try posix.read(fd, &read_buf));
1732
+ if (!std.mem.allEqual(u8, &read_buf, 0)) {
1733
+ std.debug.panic(
1734
+ "Superblock on block device not empty. " ++
1735
+ "If this is the correct block device to use, " ++
1736
+ "please zero the first {} using a tool like dd.",
1737
+ .{std.fmt.fmtIntSizeBin(superblock_zone_size)},
1738
+ );
1739
+ }
1740
+ // Reset position in the block device to compensate for read(2).
1741
+ try posix.lseek_CUR(fd, -superblock_zone_size);
1742
+ assert(try posix.lseek_CUR_get(fd) == 0);
1743
+ }
1744
+ },
1745
+ }
1746
+
1747
+ return fd;
1748
+ }
1749
+
1750
+ /// Detects whether the underlying file system for a given directory fd is tmpfs. This is used
1751
+ /// to relax our Direct I/O check - running on tmpfs for benchmarking is useful.
1752
+ fn fs_is_tmpfs(dir_fd: fd_t) !bool {
1753
+ var statfs: stdx.StatFs = undefined;
1754
+
1755
+ while (true) {
1756
+ const res = stdx.fstatfs(dir_fd, &statfs);
1757
+ switch (os.linux.E.init(res)) {
1758
+ .SUCCESS => {
1759
+ return statfs.f_type == stdx.TmpfsMagic;
1760
+ },
1761
+ .INTR => continue,
1762
+ else => |err| return stdx.unexpected_errno("fs_is_tmpfs", err),
1763
+ }
1764
+ }
1765
+ }
1766
+
1767
+ /// Detects whether the underlying file system for a given directory fd supports Direct I/O.
1768
+ /// Not all Linux file systems support `O_DIRECT`, e.g. a shared macOS volume.
1769
+ fn fs_supports_direct_io(dir_fd: fd_t) !bool {
1770
+ if (!@hasField(posix.O, "DIRECT")) return false;
1771
+
1772
+ var cookie: [16]u8 = @splat('0');
1773
+ _ = stdx.array_print(16, &cookie, "{0x}", .{std.crypto.random.int(u64)});
1774
+
1775
+ const path: [:0]const u8 = "fs_supports_direct_io-" ++ cookie ++ "";
1776
+ const dir = std.fs.Dir{ .fd = dir_fd };
1777
+ const flags: posix.O = .{ .CLOEXEC = true, .CREAT = true, .TRUNC = true };
1778
+ const fd = try posix.openatZ(dir_fd, path, flags, 0o666);
1779
+ defer posix.close(fd);
1780
+ defer dir.deleteFile(path) catch {};
1781
+
1782
+ while (true) {
1783
+ const dir_flags: posix.O = .{ .CLOEXEC = true, .ACCMODE = .RDONLY, .DIRECT = true };
1784
+ const res = os.linux.openat(dir_fd, path, dir_flags, 0);
1785
+ switch (os.linux.E.init(res)) {
1786
+ .SUCCESS => {
1787
+ posix.close(@intCast(res));
1788
+ return true;
1789
+ },
1790
+ .INTR => continue,
1791
+ .INVAL => return false,
1792
+ else => |err| return stdx.unexpected_errno("fs_supports_direct_io", err),
1793
+ }
1794
+ }
1795
+ }
1796
+
1797
+ /// Allocates a file contiguously using fallocate() if supported.
1798
+ /// Alternatively, writes to the last sector so that at least the file size is correct.
1799
+ fn fs_allocate(fd: fd_t, size: u64) !void {
1800
+ const mode: i32 = 0;
1801
+ const offset: i64 = 0;
1802
+ const length: i64 = @intCast(size);
1803
+
1804
+ while (true) {
1805
+ const rc = os.linux.fallocate(fd, mode, offset, length);
1806
+ switch (os.linux.E.init(rc)) {
1807
+ .SUCCESS => return,
1808
+ .BADF => return error.FileDescriptorInvalid,
1809
+ .FBIG => return error.FileTooBig,
1810
+ .INTR => continue,
1811
+ .INVAL => return error.ArgumentsInvalid,
1812
+ .IO => return error.InputOutput,
1813
+ .NODEV => return error.NoDevice,
1814
+ .NOSPC => return error.NoSpaceLeft,
1815
+ .NOSYS => return error.SystemOutdated,
1816
+ .OPNOTSUPP => return error.OperationNotSupported,
1817
+ .PERM => return error.PermissionDenied,
1818
+ .SPIPE => return error.Unseekable,
1819
+ .TXTBSY => return error.FileBusy,
1820
+ else => |errno| return stdx.unexpected_errno("fs_allocate", errno),
1821
+ }
1822
+ }
1823
+ }
1824
+
1825
+ pub const PReadError = posix.PReadError;
1826
+
1827
+ pub fn aof_blocking_write_all(_: *IO, fd: fd_t, buffer: []const u8) posix.WriteError!void {
1828
+ return common.aof_blocking_write_all(fd, buffer);
1829
+ }
1830
+
1831
+ pub fn aof_blocking_pread_all(_: *IO, fd: fd_t, buffer: []u8, offset: u64) PReadError!usize {
1832
+ return common.aof_blocking_pread_all(fd, buffer, offset);
1833
+ }
1834
+
1835
+ pub fn aof_blocking_close(_: *IO, fd: fd_t) void {
1836
+ return common.aof_blocking_close(fd);
1837
+ }
1838
+
1839
+ pub fn aof_blocking_stat(_: *IO, path: []const u8) std.fs.Dir.StatFileError!std.fs.File.Stat {
1840
+ return common.aof_blocking_stat(path);
1841
+ }
1842
+
1843
+ pub fn aof_blocking_fstat(_: *IO, fd: fd_t) std.fs.Dir.StatError!std.fs.File.Stat {
1844
+ return common.aof_blocking_fstat(fd);
1845
+ }
1846
+
1847
+ pub fn aof_blocking_open(io: *IO, path: []const u8) !fd_t {
1848
+ stdx.maybe(std.fs.path.isAbsolute(path));
1849
+
1850
+ const dir_path = std.fs.path.dirname(path) orelse ".";
1851
+ const dir_fd = try IO.open_dir(dir_path);
1852
+ defer io.aof_blocking_close(dir_fd);
1853
+
1854
+ const file_path = std.fs.path.basename(path);
1855
+
1856
+ return common.aof_blocking_open(dir_fd, file_path);
1857
+ }
1858
+
1859
+ fn erase_types(
1860
+ comptime Context: type,
1861
+ comptime Result: type,
1862
+ comptime callback: fn (
1863
+ context: Context,
1864
+ completion: *Completion,
1865
+ result: Result,
1866
+ ) void,
1867
+ ) *const fn (?*anyopaque, *Completion, *const anyopaque) void {
1868
+ return &struct {
1869
+ fn erased(
1870
+ ctx_any: ?*anyopaque,
1871
+ completion: *Completion,
1872
+ result_any: *const anyopaque,
1873
+ ) void {
1874
+ const ctx: Context = @ptrCast(@alignCast(ctx_any));
1875
+ const result: *const Result = @ptrCast(@alignCast(result_any));
1876
+ callback(ctx, completion, result.*);
1877
+ }
1878
+ }.erased;
1879
+ }
1880
+ };