tigerbeetle 0.0.34 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/client.rb +1 -1
  245. data/lib/tigerbeetle/platforms.rb +9 -0
  246. data/lib/tigerbeetle/version.rb +2 -2
  247. data/tigerbeetle.gemspec +22 -5
  248. metadata +242 -3
  249. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,1093 @@
1
+ const std = @import("std");
2
+ const posix = std.posix;
3
+ const mem = std.mem;
4
+ const assert = std.debug.assert;
5
+ const log = std.log.scoped(.io);
6
+
7
+ const stdx = @import("stdx");
8
+ const constants = @import("../constants.zig");
9
+ const common = @import("./common.zig");
10
+ const QueueType = @import("../queue.zig").QueueType;
11
+ const TimeOS = @import("../time.zig").TimeOS;
12
+ const buffer_limit = @import("../io.zig").buffer_limit;
13
+ const DirectIO = @import("../io.zig").DirectIO;
14
+
15
+ pub const IO = struct {
16
+ pub const TCPOptions = common.TCPOptions;
17
+ pub const ListenOptions = common.ListenOptions;
18
+
19
+ kq: fd_t,
20
+ event_id: Event = 0,
21
+ time_os: TimeOS = .{},
22
+ io_inflight: usize = 0,
23
+ timeouts: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_timeouts" }),
24
+ completed: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_completed" }),
25
+ io_pending: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_pending" }),
26
+
27
+ pub fn init(entries: u12, flags: u32) !IO {
28
+ _ = entries;
29
+ _ = flags;
30
+
31
+ const kq = try posix.kqueue();
32
+ assert(kq > -1);
33
+ return IO{ .kq = kq };
34
+ }
35
+
36
+ pub fn deinit(self: *IO) void {
37
+ assert(self.kq > -1);
38
+ posix.close(self.kq);
39
+ self.kq = -1;
40
+ }
41
+
42
+ /// Pass all queued submissions to the kernel and peek for completions.
43
+ pub fn run(self: *IO) !void {
44
+ return self.flush(false);
45
+ }
46
+
47
+ /// Pass all queued submissions to the kernel and run for `nanoseconds`.
48
+ /// The `nanoseconds` argument is a u63 to allow coercion to the i64 used
49
+ /// in the __kernel_timespec struct.
50
+ pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
51
+ var timed_out = false;
52
+ var completion: Completion = undefined;
53
+ const on_timeout = struct {
54
+ fn callback(
55
+ timed_out_ptr: *bool,
56
+ _completion: *Completion,
57
+ result: TimeoutError!void,
58
+ ) void {
59
+ _ = _completion;
60
+ _ = result catch unreachable;
61
+
62
+ timed_out_ptr.* = true;
63
+ }
64
+ }.callback;
65
+
66
+ // Submit a timeout which sets the timed_out value to true to terminate the loop below.
67
+ self.timeout(
68
+ *bool,
69
+ &timed_out,
70
+ on_timeout,
71
+ &completion,
72
+ nanoseconds,
73
+ );
74
+
75
+ // Loop until our timeout completion is processed above, which sets timed_out to true.
76
+ // LLVM shouldn't be able to cache timed_out's value here since its address escapes above.
77
+ while (!timed_out) {
78
+ try self.flush(true);
79
+ }
80
+ }
81
+
82
+ fn flush(self: *IO, wait_for_completions: bool) !void {
83
+ var events: [256]posix.Kevent = undefined;
84
+
85
+ // Check timeouts and fill events with completions in io_pending
86
+ // (they will be submitted through kevent).
87
+ // Timeouts are expired here and possibly pushed to the completed queue.
88
+ const next_timeout = self.flush_timeouts();
89
+ const change_events = self.flush_io(&events);
90
+
91
+ // Only call kevent() if we need to submit io events or if we need to wait for completions.
92
+ if (change_events > 0 or self.completed.empty()) {
93
+ // Zero timeouts for kevent() implies a non-blocking poll.
94
+ var ts = std.mem.zeroes(posix.timespec);
95
+
96
+ // We need to wait (not poll) on kevent if there's nothing to submit or complete.
97
+ // We should never wait indefinitely (timeout_ptr = null for kevent) given:
98
+ // - tick() is non-blocking (wait_for_completions = false)
99
+ // - run_for_ns() always submits a timeout
100
+ if (change_events == 0 and self.completed.empty()) {
101
+ if (wait_for_completions) {
102
+ const timeout_ns = next_timeout orelse @panic("kevent() blocking forever");
103
+ ts.nsec = @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s));
104
+ ts.sec = @as(@TypeOf(ts.sec), @intCast(timeout_ns / std.time.ns_per_s));
105
+ } else if (self.io_inflight == 0) {
106
+ return;
107
+ }
108
+ }
109
+
110
+ const new_events = try posix.kevent(
111
+ self.kq,
112
+ events[0..change_events],
113
+ events[0..events.len],
114
+ &ts,
115
+ );
116
+
117
+ // Mark the io events submitted only after kevent() successfully processed them.
118
+ self.io_inflight += change_events;
119
+ self.io_inflight -= new_events;
120
+
121
+ for (events[0..new_events]) |event| {
122
+ const completion: *Completion = @ptrFromInt(event.udata);
123
+ assert(completion.link.next == null);
124
+ self.completed.push(completion);
125
+ }
126
+ }
127
+
128
+ var completed = self.completed;
129
+ self.completed.reset();
130
+ while (completed.pop()) |completion| {
131
+ (completion.callback)(self, completion);
132
+ }
133
+ }
134
+
135
+ fn flush_io(self: *IO, events: []posix.Kevent) usize {
136
+ for (events, 0..) |*event, flushed| {
137
+ const completion = self.io_pending.pop() orelse return flushed;
138
+
139
+ const event_info = switch (completion.operation) {
140
+ .accept => |op| [2]c_int{ op.socket, posix.system.EVFILT.READ },
141
+ .connect => |op| [2]c_int{ op.socket, posix.system.EVFILT.WRITE },
142
+ .read => |op| [2]c_int{ op.fd, posix.system.EVFILT.READ },
143
+ .write => |op| [2]c_int{ op.fd, posix.system.EVFILT.WRITE },
144
+ .recv => |op| [2]c_int{ op.socket, posix.system.EVFILT.READ },
145
+ .send => |op| [2]c_int{ op.socket, posix.system.EVFILT.WRITE },
146
+ else => @panic("invalid completion operation queued for io"),
147
+ };
148
+
149
+ event.* = .{
150
+ .ident = @as(u32, @intCast(event_info[0])),
151
+ .filter = @as(i16, @intCast(event_info[1])),
152
+ .flags = posix.system.EV.ADD | posix.system.EV.ENABLE | posix.system.EV.ONESHOT,
153
+ .fflags = 0,
154
+ .data = 0,
155
+ .udata = @intFromPtr(completion),
156
+ };
157
+ }
158
+ return events.len;
159
+ }
160
+
161
+ fn flush_timeouts(self: *IO) ?u64 {
162
+ var min_timeout: ?u64 = null;
163
+ var timeouts_iterator = self.timeouts.iterate();
164
+ while (timeouts_iterator.next()) |completion| {
165
+
166
+ // NOTE: We could cache `now` above the loop but monotonic() should be cheap to call.
167
+ const now = self.time_os.time().monotonic().ns;
168
+ const expires = completion.operation.timeout.expires;
169
+
170
+ // NOTE: remove() could be O(1) here with a doubly-linked-list
171
+ // since we know the previous Completion.
172
+ if (now >= expires) {
173
+ self.timeouts.remove(completion);
174
+ self.completed.push(completion);
175
+ continue;
176
+ }
177
+
178
+ const timeout_ns = expires - now;
179
+ if (min_timeout) |min_ns| {
180
+ min_timeout = @min(min_ns, timeout_ns);
181
+ } else {
182
+ min_timeout = timeout_ns;
183
+ }
184
+ }
185
+ return min_timeout;
186
+ }
187
+
188
+ /// This struct holds the data needed for a single IO operation.
189
+ pub const Completion = struct {
190
+ link: QueueType(Completion).Link = .{},
191
+ context: ?*anyopaque,
192
+ callback: *const fn (*IO, *Completion) void,
193
+ operation: Operation,
194
+ };
195
+
196
+ const Operation = union(enum) {
197
+ accept: struct {
198
+ socket: socket_t,
199
+ },
200
+ close: struct {
201
+ fd: fd_t,
202
+ },
203
+ connect: struct {
204
+ socket: socket_t,
205
+ address: std.net.Address,
206
+ initiated: bool,
207
+ },
208
+ fsync: struct {
209
+ fd: fd_t,
210
+ },
211
+ read: struct {
212
+ fd: fd_t,
213
+ buf: [*]u8,
214
+ len: u32,
215
+ offset: u64,
216
+ },
217
+ recv: struct {
218
+ socket: socket_t,
219
+ buf: [*]u8,
220
+ len: u32,
221
+ },
222
+ send: struct {
223
+ socket: socket_t,
224
+ buf: [*]const u8,
225
+ len: u32,
226
+ },
227
+ timeout: struct {
228
+ expires: u64,
229
+ },
230
+ write: struct {
231
+ fd: fd_t,
232
+ buf: [*]const u8,
233
+ len: u32,
234
+ offset: u64,
235
+ },
236
+ };
237
+
238
+ fn submit(
239
+ self: *IO,
240
+ context: anytype,
241
+ comptime callback: anytype,
242
+ completion: *Completion,
243
+ comptime operation_tag: std.meta.Tag(Operation),
244
+ operation_data: std.meta.TagPayload(Operation, operation_tag),
245
+ comptime OperationImpl: type,
246
+ ) void {
247
+ const on_complete_fn = struct {
248
+ fn on_complete(io: *IO, _completion: *Completion) void {
249
+ // Perform the actual operation
250
+ const op_data = &@field(_completion.operation, @tagName(operation_tag));
251
+ const result = OperationImpl.do_operation(op_data);
252
+
253
+ // Requeue onto io_pending if error.WouldBlock.
254
+ switch (operation_tag) {
255
+ .accept, .connect, .read, .write, .send, .recv => {
256
+ _ = result catch |err| switch (err) {
257
+ error.WouldBlock => {
258
+ _completion.link = .{};
259
+ io.io_pending.push(_completion);
260
+ return;
261
+ },
262
+ else => {},
263
+ };
264
+ },
265
+ else => {},
266
+ }
267
+
268
+ // Complete the Completion.
269
+ return callback(
270
+ @ptrCast(@alignCast(_completion.context)),
271
+ _completion,
272
+ result,
273
+ );
274
+ }
275
+ }.on_complete;
276
+
277
+ completion.* = .{
278
+ .link = .{},
279
+ .context = context,
280
+ .callback = on_complete_fn,
281
+ .operation = @unionInit(Operation, @tagName(operation_tag), operation_data),
282
+ };
283
+
284
+ switch (operation_tag) {
285
+ .timeout => self.timeouts.push(completion),
286
+ else => self.completed.push(completion),
287
+ }
288
+ }
289
+
290
+ pub fn cancel_all(_: *IO) void {
291
+ // TODO Cancel in-flight async IO and wait for all completions.
292
+ }
293
+
294
+ pub const CancelError = error{
295
+ NotRunning,
296
+ NotInterruptable,
297
+ } || posix.UnexpectedError;
298
+
299
+ pub fn cancel(
300
+ _: *IO,
301
+ comptime Context: type,
302
+ _: Context,
303
+ comptime _: fn (
304
+ context: Context,
305
+ completion: *Completion,
306
+ result: CancelError!void,
307
+ ) void,
308
+ _: struct {
309
+ completion: *Completion,
310
+ target: *Completion,
311
+ },
312
+ ) void {
313
+ @panic("cancelation is not supported on darwin");
314
+ }
315
+
316
+ pub const AcceptError = posix.AcceptError || posix.SetSockOptError;
317
+
318
+ pub fn accept(
319
+ self: *IO,
320
+ comptime Context: type,
321
+ context: Context,
322
+ comptime callback: fn (
323
+ context: Context,
324
+ completion: *Completion,
325
+ result: AcceptError!socket_t,
326
+ ) void,
327
+ completion: *Completion,
328
+ socket: socket_t,
329
+ ) void {
330
+ self.submit(
331
+ context,
332
+ callback,
333
+ completion,
334
+ .accept,
335
+ .{
336
+ .socket = socket,
337
+ },
338
+ struct {
339
+ fn do_operation(op: anytype) AcceptError!socket_t {
340
+ const fd = try posix.accept(
341
+ op.socket,
342
+ null,
343
+ null,
344
+ posix.SOCK.NONBLOCK | posix.SOCK.CLOEXEC,
345
+ );
346
+ errdefer posix.close(fd);
347
+
348
+ // Darwin doesn't support posix.MSG_NOSIGNAL to avoid getting SIGPIPE on
349
+ // socket send(). Instead, it uses the SO_NOSIGPIPE socket option which does
350
+ // the same for all send()s.
351
+ posix.setsockopt(
352
+ fd,
353
+ posix.SOL.SOCKET,
354
+ posix.SO.NOSIGPIPE,
355
+ &mem.toBytes(@as(c_int, 1)),
356
+ ) catch |err| return switch (err) {
357
+ error.TimeoutTooBig => unreachable,
358
+ error.PermissionDenied => error.NetworkSubsystemFailed,
359
+ error.AlreadyConnected => error.NetworkSubsystemFailed,
360
+ error.InvalidProtocolOption => error.ProtocolFailure,
361
+ else => |e| e,
362
+ };
363
+
364
+ return fd;
365
+ }
366
+ },
367
+ );
368
+ }
369
+
370
+ pub const CloseError = error{
371
+ FileDescriptorInvalid,
372
+ DiskQuota,
373
+ InputOutput,
374
+ NoSpaceLeft,
375
+ } || posix.UnexpectedError;
376
+
377
+ pub fn close(
378
+ self: *IO,
379
+ comptime Context: type,
380
+ context: Context,
381
+ comptime callback: fn (
382
+ context: Context,
383
+ completion: *Completion,
384
+ result: CloseError!void,
385
+ ) void,
386
+ completion: *Completion,
387
+ fd: fd_t,
388
+ ) void {
389
+ self.submit(
390
+ context,
391
+ callback,
392
+ completion,
393
+ .close,
394
+ .{
395
+ .fd = fd,
396
+ },
397
+ struct {
398
+ fn do_operation(op: anytype) CloseError!void {
399
+ return switch (posix.errno(posix.system.close(op.fd))) {
400
+ .SUCCESS => {},
401
+ .BADF => error.FileDescriptorInvalid,
402
+ .INTR => {}, // A success, see https://github.com/ziglang/zig/issues/2425.
403
+ .IO => error.InputOutput,
404
+ else => |errno| stdx.unexpected_errno("close", errno),
405
+ };
406
+ }
407
+ },
408
+ );
409
+ }
410
+
411
+ pub const ConnectError = posix.ConnectError;
412
+
413
+ pub fn connect(
414
+ self: *IO,
415
+ comptime Context: type,
416
+ context: Context,
417
+ comptime callback: fn (
418
+ context: Context,
419
+ completion: *Completion,
420
+ result: ConnectError!void,
421
+ ) void,
422
+ completion: *Completion,
423
+ socket: socket_t,
424
+ address: std.net.Address,
425
+ ) void {
426
+ self.submit(
427
+ context,
428
+ callback,
429
+ completion,
430
+ .connect,
431
+ .{
432
+ .socket = socket,
433
+ .address = address,
434
+ .initiated = false,
435
+ },
436
+ struct {
437
+ fn do_operation(op: anytype) ConnectError!void {
438
+ // Don't call connect after being rescheduled by io_pending as it gives EISCONN.
439
+ // Instead, check the socket error to see if has been connected successfully.
440
+ const result = switch (op.initiated) {
441
+ true => posix.getsockoptError(op.socket),
442
+ else => posix.connect(
443
+ op.socket,
444
+ &op.address.any,
445
+ op.address.getOsSockLen(),
446
+ ),
447
+ };
448
+
449
+ op.initiated = true;
450
+ return result;
451
+ }
452
+ },
453
+ );
454
+ }
455
+
456
+ pub const FsyncError = posix.SyncError || posix.UnexpectedError;
457
+
458
+ pub fn fsync(
459
+ self: *IO,
460
+ comptime Context: type,
461
+ context: Context,
462
+ comptime callback: fn (
463
+ context: Context,
464
+ completion: *Completion,
465
+ result: FsyncError!void,
466
+ ) void,
467
+ completion: *Completion,
468
+ fd: fd_t,
469
+ ) void {
470
+ self.submit(
471
+ context,
472
+ callback,
473
+ completion,
474
+ .fsync,
475
+ .{
476
+ .fd = fd,
477
+ },
478
+ struct {
479
+ fn do_operation(op: anytype) FsyncError!void {
480
+ return fs_sync(op.fd);
481
+ }
482
+ },
483
+ );
484
+ }
485
+
486
+ pub const OpenatError = posix.OpenError || posix.UnexpectedError;
487
+
488
+ pub const ReadError = error{
489
+ WouldBlock,
490
+ NotOpenForReading,
491
+ ConnectionResetByPeer,
492
+ Alignment,
493
+ InputOutput,
494
+ IsDir,
495
+ SystemResources,
496
+ Unseekable,
497
+ ConnectionTimedOut,
498
+ } || posix.UnexpectedError;
499
+
500
+ pub fn read(
501
+ self: *IO,
502
+ comptime Context: type,
503
+ context: Context,
504
+ comptime callback: fn (
505
+ context: Context,
506
+ completion: *Completion,
507
+ result: ReadError!usize,
508
+ ) void,
509
+ completion: *Completion,
510
+ fd: fd_t,
511
+ buffer: []u8,
512
+ offset: u64,
513
+ ) void {
514
+ self.submit(
515
+ context,
516
+ callback,
517
+ completion,
518
+ .read,
519
+ .{
520
+ .fd = fd,
521
+ .buf = buffer.ptr,
522
+ .len = @as(u32, @intCast(buffer_limit(buffer.len))),
523
+ .offset = offset,
524
+ },
525
+ struct {
526
+ fn do_operation(op: anytype) ReadError!usize {
527
+ while (true) {
528
+ const rc = posix.system.pread(
529
+ op.fd,
530
+ op.buf,
531
+ op.len,
532
+ @bitCast(op.offset),
533
+ );
534
+ return switch (posix.errno(rc)) {
535
+ .SUCCESS => @intCast(rc),
536
+ .INTR => continue,
537
+ .AGAIN => error.WouldBlock,
538
+ .BADF => error.NotOpenForReading,
539
+ .CONNRESET => error.ConnectionResetByPeer,
540
+ .FAULT => unreachable,
541
+ .INVAL => error.Alignment,
542
+ .IO => error.InputOutput,
543
+ .ISDIR => error.IsDir,
544
+ .NOBUFS => error.SystemResources,
545
+ .NOMEM => error.SystemResources,
546
+ .NXIO => error.Unseekable,
547
+ .OVERFLOW => error.Unseekable,
548
+ .SPIPE => error.Unseekable,
549
+ .TIMEDOUT => error.ConnectionTimedOut,
550
+ else => |err| stdx.unexpected_errno("read", err),
551
+ };
552
+ }
553
+ }
554
+ },
555
+ );
556
+ }
557
+
558
+ pub const RecvError = posix.RecvFromError;
559
+
560
+ pub fn recv(
561
+ self: *IO,
562
+ comptime Context: type,
563
+ context: Context,
564
+ comptime callback: fn (
565
+ context: Context,
566
+ completion: *Completion,
567
+ result: RecvError!usize,
568
+ ) void,
569
+ completion: *Completion,
570
+ socket: socket_t,
571
+ buffer: []u8,
572
+ ) void {
573
+ self.submit(
574
+ context,
575
+ callback,
576
+ completion,
577
+ .recv,
578
+ .{
579
+ .socket = socket,
580
+ .buf = buffer.ptr,
581
+ .len = @as(u32, @intCast(buffer_limit(buffer.len))),
582
+ },
583
+ struct {
584
+ fn do_operation(op: anytype) RecvError!usize {
585
+ return posix.recv(op.socket, op.buf[0..op.len], 0);
586
+ }
587
+ },
588
+ );
589
+ }
590
+
591
+ pub const SendError = error{ConnectionRefused} || posix.SendError;
592
+
593
+ pub fn send(
594
+ self: *IO,
595
+ comptime Context: type,
596
+ context: Context,
597
+ comptime callback: fn (
598
+ context: Context,
599
+ completion: *Completion,
600
+ result: SendError!usize,
601
+ ) void,
602
+ completion: *Completion,
603
+ socket: socket_t,
604
+ buffer: []const u8,
605
+ ) void {
606
+ self.submit(
607
+ context,
608
+ callback,
609
+ completion,
610
+ .send,
611
+ .{
612
+ .socket = socket,
613
+ .buf = buffer.ptr,
614
+ .len = @as(u32, @intCast(buffer_limit(buffer.len))),
615
+ },
616
+ struct {
617
+ fn do_operation(op: anytype) SendError!usize {
618
+ // Use `posix.sendto` instead of `posix.send` because UDP sockets
619
+ // may return `ConnectionRefused`.
620
+ // https://github.com/ziglang/zig/issues/20219
621
+ // https://github.com/ziglang/zig/pull/20223
622
+ return posix.sendto(
623
+ op.socket,
624
+ op.buf[0..op.len],
625
+ 0,
626
+ null,
627
+ 0,
628
+ ) catch |err| switch (err) {
629
+ error.AddressFamilyNotSupported => unreachable,
630
+ error.SymLinkLoop => unreachable,
631
+ error.NameTooLong => unreachable,
632
+ error.FileNotFound => unreachable,
633
+ error.NotDir => unreachable,
634
+ error.NetworkUnreachable => unreachable,
635
+ error.AddressNotAvailable => unreachable,
636
+ error.SocketNotConnected => unreachable,
637
+ error.UnreachableAddress => unreachable,
638
+ else => |e| return e,
639
+ };
640
+ }
641
+ },
642
+ );
643
+ }
644
+
645
+ pub fn send_now(_: *IO, _: socket_t, _: []const u8) ?usize {
646
+ return null; // No support for best-effort non-blocking synchronous send.
647
+ }
648
+
649
+ pub const TimeoutError = error{Canceled} || posix.UnexpectedError;
650
+
651
+ pub fn timeout(
652
+ self: *IO,
653
+ comptime Context: type,
654
+ context: Context,
655
+ comptime callback: fn (
656
+ context: Context,
657
+ completion: *Completion,
658
+ result: TimeoutError!void,
659
+ ) void,
660
+ completion: *Completion,
661
+ nanoseconds: u63,
662
+ ) void {
663
+ // Special case a zero timeout as a yield.
664
+ if (nanoseconds == 0) {
665
+ completion.* = .{
666
+ .link = .{},
667
+ .context = context,
668
+ .operation = undefined,
669
+ .callback = struct {
670
+ fn on_complete(_io: *IO, _completion: *Completion) void {
671
+ _ = _io;
672
+ const _context: Context = @ptrCast(@alignCast(_completion.context));
673
+ callback(_context, _completion, {});
674
+ }
675
+ }.on_complete,
676
+ };
677
+
678
+ self.completed.push(completion);
679
+ return;
680
+ }
681
+
682
+ self.submit(
683
+ context,
684
+ callback,
685
+ completion,
686
+ .timeout,
687
+ .{
688
+ .expires = self.time_os.time().monotonic().ns + nanoseconds,
689
+ },
690
+ struct {
691
+ fn do_operation(_: anytype) TimeoutError!void {
692
+ return; // Timeouts don't have errors for now.
693
+ }
694
+ },
695
+ );
696
+ }
697
+
698
+ pub const WriteError = posix.PWriteError;
699
+
700
+ pub fn write(
701
+ self: *IO,
702
+ comptime Context: type,
703
+ context: Context,
704
+ comptime callback: fn (
705
+ context: Context,
706
+ completion: *Completion,
707
+ result: WriteError!usize,
708
+ ) void,
709
+ completion: *Completion,
710
+ fd: fd_t,
711
+ buffer: []const u8,
712
+ offset: u64,
713
+ ) void {
714
+ self.submit(
715
+ context,
716
+ callback,
717
+ completion,
718
+ .write,
719
+ .{
720
+ .fd = fd,
721
+ .buf = buffer.ptr,
722
+ .len = @as(u32, @intCast(buffer_limit(buffer.len))),
723
+ .offset = offset,
724
+ },
725
+ struct {
726
+ fn do_operation(op: anytype) WriteError!usize {
727
+ // In the current implementation, Darwin file IO (namely, the posix.pwrite
728
+ // below) is _synchronous_, so it's safe to call fs_sync after it has
729
+ // completed.
730
+ const result = posix.pwrite(op.fd, op.buf[0..op.len], op.offset);
731
+ try fs_sync(op.fd);
732
+
733
+ return result;
734
+ }
735
+ },
736
+ );
737
+ }
738
+
739
+ pub const Event = usize;
740
+ pub const INVALID_EVENT: Event = 0;
741
+
742
+ pub fn open_event(
743
+ self: *IO,
744
+ ) !Event {
745
+ self.event_id += 1;
746
+ const event = self.event_id;
747
+ assert(event != INVALID_EVENT);
748
+
749
+ var kev = mem.zeroes([1]posix.Kevent);
750
+ kev[0].ident = event;
751
+ kev[0].filter = posix.system.EVFILT.USER;
752
+ kev[0].flags = posix.system.EV.ADD | posix.system.EV.ENABLE | posix.system.EV.CLEAR;
753
+
754
+ const polled = posix.kevent(self.kq, &kev, kev[0..0], null) catch |err| switch (err) {
755
+ error.AccessDenied => unreachable, // EV_FILTER is allowed for every user.
756
+ error.EventNotFound => unreachable, // We're not modifying or deleting an existing one.
757
+ error.ProcessNotFound => unreachable, // We're not monitoring a process.
758
+ error.Overflow, error.SystemResources => return error.SystemResources,
759
+ };
760
+ assert(polled == 0);
761
+
762
+ return event;
763
+ }
764
+
765
+ pub fn event_listen(
766
+ self: *IO,
767
+ event: Event,
768
+ completion: *Completion,
769
+ comptime on_event: fn (*Completion) void,
770
+ ) void {
771
+ assert(event != INVALID_EVENT);
772
+ completion.* = .{
773
+ .link = .{},
774
+ .context = null,
775
+ .operation = undefined,
776
+ .callback = struct {
777
+ fn on_complete(_: *IO, completion_inner: *Completion) void {
778
+ on_event(completion_inner);
779
+ }
780
+ }.on_complete,
781
+ };
782
+
783
+ self.io_inflight += 1;
784
+ }
785
+
786
+ pub fn event_trigger(self: *IO, event: Event, completion: *Completion) void {
787
+ assert(event != INVALID_EVENT);
788
+
789
+ var kev = mem.zeroes([1]posix.Kevent);
790
+ kev[0].ident = event;
791
+ kev[0].filter = posix.system.EVFILT.USER;
792
+ kev[0].fflags = posix.system.NOTE.TRIGGER;
793
+ kev[0].udata = @intFromPtr(completion);
794
+
795
+ const polled: usize = posix.kevent(self.kq, &kev, kev[0..0], null) catch unreachable;
796
+ assert(polled == 0);
797
+ }
798
+
799
+ pub fn close_event(self: *IO, event: Event) void {
800
+ assert(event != INVALID_EVENT);
801
+
802
+ var kev = mem.zeroes([1]posix.Kevent);
803
+ kev[0].ident = event;
804
+ kev[0].filter = posix.system.EVFILT.USER;
805
+ kev[0].flags = posix.system.EV.DELETE;
806
+ kev[0].udata = 0; // Not needed for EV_DELETE.
807
+
808
+ const polled = posix.kevent(self.kq, &kev, kev[0..0], null) catch unreachable;
809
+ assert(polled == 0);
810
+ }
811
+
812
+ pub const socket_t = posix.socket_t;
813
+
814
+ /// Creates a TCP socket that can be used for async operations with the IO instance.
815
+ pub fn open_socket_tcp(self: *IO, family: u32, options: TCPOptions) !socket_t {
816
+ const fd = try self.open_socket(
817
+ family,
818
+ posix.SOCK.STREAM | posix.SOCK.NONBLOCK,
819
+ posix.IPPROTO.TCP,
820
+ );
821
+ errdefer self.close_socket(fd);
822
+
823
+ try common.tcp_options(fd, options);
824
+ return fd;
825
+ }
826
+
827
+ /// Creates a UDP socket that can be used for async operations with the IO instance.
828
+ pub fn open_socket_udp(self: *IO, family: u32) !socket_t {
829
+ return try self.open_socket(
830
+ family,
831
+ posix.SOCK.DGRAM | posix.SOCK.NONBLOCK,
832
+ posix.IPPROTO.UDP,
833
+ );
834
+ }
835
+
836
+ fn open_socket(self: *IO, family: u32, sock_type: u32, protocol: u32) !socket_t {
837
+ const fd = try posix.socket(
838
+ family,
839
+ sock_type | posix.SOCK.NONBLOCK,
840
+ protocol,
841
+ );
842
+ errdefer self.close_socket(fd);
843
+
844
+ // Darwin doesn't support SOCK_CLOEXEC.
845
+ _ = try posix.fcntl(fd, posix.F.SETFD, posix.FD_CLOEXEC);
846
+ // Darwin doesn't support posix.MSG_NOSIGNAL, but instead a socket option to avoid SIGPIPE.
847
+ try common.setsockopt(fd, posix.SOL.SOCKET, posix.SO.NOSIGPIPE, 1);
848
+
849
+ return fd;
850
+ }
851
+
852
+ /// Closes a socket opened by the IO instance.
853
+ pub fn close_socket(self: *IO, socket: socket_t) void {
854
+ _ = self;
855
+ posix.close(socket);
856
+ }
857
+
858
+ /// Listen on the given TCP socket.
859
+ /// Returns socket resolved address, which might be more specific
860
+ /// than the input address (e.g., listening on port 0).
861
+ pub fn listen(
862
+ _: *IO,
863
+ fd: socket_t,
864
+ address: std.net.Address,
865
+ options: ListenOptions,
866
+ ) !std.net.Address {
867
+ return common.listen(fd, address, options);
868
+ }
869
+
870
+ pub fn shutdown(_: *IO, socket: socket_t, how: posix.ShutdownHow) posix.ShutdownError!void {
871
+ return posix.shutdown(socket, how);
872
+ }
873
+
874
+ /// Opens a directory with read only access.
875
+ pub fn open_dir(dir_path: []const u8) !fd_t {
876
+ return posix.open(dir_path, .{ .CLOEXEC = true, .ACCMODE = .RDONLY }, 0);
877
+ }
878
+
879
+ pub const fd_t = posix.fd_t;
880
+ pub const INVALID_FILE: fd_t = -1;
881
+
882
+ pub const OpenDataFilePurpose = enum { format, open, inspect };
883
+ /// Opens or creates a journal file:
884
+ /// - For reading and writing.
885
+ /// - For Direct I/O (required on darwin).
886
+ /// - Obtains an advisory exclusive lock to the file descriptor.
887
+ /// - Allocates the file contiguously on disk if this is supported by the file system.
888
+ /// - Ensures that the file data (and file inode in the parent directory) is durable on disk.
889
+ /// The caller is responsible for ensuring that the parent directory inode is durable.
890
+ /// - Verifies that the file size matches the expected file size before returning.
891
+ pub fn open_data_file(
892
+ self: *IO,
893
+ dir_fd: fd_t,
894
+ relative_path: []const u8,
895
+ size: u64,
896
+ purpose: OpenDataFilePurpose,
897
+ direct_io: DirectIO,
898
+ ) !fd_t {
899
+ _ = self;
900
+
901
+ assert(relative_path.len > 0);
902
+ assert(size % constants.sector_size == 0);
903
+
904
+ // TODO Use O_EXCL when opening as a block device to obtain a mandatory exclusive lock.
905
+ // This is much stronger than an advisory exclusive lock, and is required on some platforms.
906
+
907
+ // Normally, O_DSYNC enables us to omit fsync() calls in the data plane, since we sync to
908
+ // the disk on every write, but that's not the case for Darwin:
909
+ // https://x.com/TigerBeetleDB/status/1536628729031581697
910
+ // To work around this, fs_sync() is explicitly called after writing in do_operation.
911
+ var flags: posix.O = .{
912
+ .CLOEXEC = true,
913
+ .ACCMODE = if (purpose == .inspect) .RDONLY else .RDWR,
914
+ .DSYNC = true,
915
+ };
916
+ var mode: posix.mode_t = 0;
917
+
918
+ // TODO Document this and investigate whether this is in fact correct to set here.
919
+ if (@hasField(posix.O, "LARGEFILE")) flags.LARGEFILE = true;
920
+
921
+ switch (purpose) {
922
+ .format => {
923
+ flags.CREAT = true;
924
+ flags.EXCL = true;
925
+ mode = 0o666;
926
+ log.info("creating \"{s}\"...", .{relative_path});
927
+ },
928
+ .open, .inspect => {
929
+ log.info("opening \"{s}\"...", .{relative_path});
930
+ },
931
+ }
932
+
933
+ // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
934
+ assert(flags.DSYNC);
935
+
936
+ // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
937
+ assert(!std.fs.path.isAbsolute(relative_path));
938
+ const fd = try posix.openat(dir_fd, relative_path, flags, mode);
939
+ // TODO Return a proper error message when the path exists or does not exist (init/start).
940
+ errdefer posix.close(fd);
941
+
942
+ // TODO Check that the file is actually a file.
943
+
944
+ // On darwin assume that Direct I/O is always supported.
945
+ // Use F_NOCACHE to disable the page cache as O_DIRECT doesn't exist.
946
+ if (direct_io != .direct_io_disabled) {
947
+ _ = try posix.fcntl(fd, posix.F.NOCACHE, 1);
948
+ }
949
+
950
+ // Obtain an advisory exclusive lock that works only if all processes actually use flock().
951
+ // LOCK_NB means that we want to fail the lock without waiting if another process has it.
952
+ posix.flock(fd, posix.LOCK.EX | posix.LOCK.NB) catch |err| switch (err) {
953
+ error.WouldBlock => {
954
+ if (purpose == .inspect) {
955
+ log.warn(
956
+ "another process holds the data file lock - results may be inconsistent",
957
+ .{},
958
+ );
959
+ } else {
960
+ @panic("another process holds the data file lock");
961
+ }
962
+ },
963
+ else => return err,
964
+ };
965
+
966
+ // Ask the file system to allocate contiguous sectors for the file (if possible):
967
+ // If the file system does not support `fallocate()`, then this could mean more seeks or a
968
+ // panic if we run out of disk space (ENOSPC).
969
+ if (purpose == .format) try fs_allocate(fd, size);
970
+
971
+ // The best fsync strategy is always to fsync before reading because this prevents us from
972
+ // making decisions on data that was never durably written by a previously crashed process.
973
+ // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
974
+ // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
975
+ try fs_sync(fd);
976
+
977
+ // We fsync the parent directory to ensure that the file inode is durably written.
978
+ // The caller is responsible for the parent directory inode stored under the grandparent.
979
+ // We always do this when opening because we don't know if this was done before crashing.
980
+ try fs_sync(dir_fd);
981
+
982
+ // TODO Document that `size` is now `data_file_size_min` from `main.zig`.
983
+ const stat = try posix.fstat(fd);
984
+ if (stat.size < size) @panic("data file inode size was truncated or corrupted");
985
+
986
+ return fd;
987
+ }
988
+
989
+ /// Darwin's fsync() syscall does not flush past the disk cache. We must use F_FULLFSYNC
990
+ /// instead.
991
+ /// https://twitter.com/TigerBeetleDB/status/1422491736224436225
992
+ fn fs_sync(fd: fd_t) !void {
993
+ // TODO: This is of dubious safety - it's _not_ safe to fall back on posix.fsync unless it's
994
+ // known at startup that the disk (eg, an external disk on a Mac) doesn't support
995
+ // F_FULLFSYNC.
996
+ _ = posix.fcntl(fd, posix.F.FULLFSYNC, 1) catch return posix.fsync(fd);
997
+ }
998
+
999
+ /// Allocates a file contiguously using fallocate() if supported.
1000
+ /// Alternatively, writes to the last sector so that at least the file size is correct.
1001
+ fn fs_allocate(fd: fd_t, size: u64) !void {
1002
+ log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
1003
+
1004
+ // Darwin doesn't have fallocate() but we can simulate it using fcntl()s.
1005
+ //
1006
+ // https://stackoverflow.com/a/11497568
1007
+ // https://api.kde.org/frameworks/kcoreaddons/html/posix__fallocate__mac_8h_source.html
1008
+ // http://hg.mozilla.org/mozilla-central/file/3d846420a907/xpcom/glue/FileUtils.cpp#l61
1009
+
1010
+ const F_ALLOCATECONTIG = 0x2; // Allocate contiguous space.
1011
+ const F_ALLOCATEALL = 0x4; // Allocate all or nothing.
1012
+ const F_PEOFPOSMODE = 3; // Use relative offset from the seek pos mode.
1013
+ const fstore_t = extern struct {
1014
+ fst_flags: c_uint,
1015
+ fst_posmode: c_int,
1016
+ fst_offset: posix.off_t,
1017
+ fst_length: posix.off_t,
1018
+ fst_bytesalloc: posix.off_t,
1019
+ };
1020
+
1021
+ var store = fstore_t{
1022
+ .fst_flags = F_ALLOCATECONTIG | F_ALLOCATEALL,
1023
+ .fst_posmode = F_PEOFPOSMODE,
1024
+ .fst_offset = 0,
1025
+ .fst_length = @intCast(size),
1026
+ .fst_bytesalloc = 0,
1027
+ };
1028
+
1029
+ // Try to pre-allocate contiguous space and fall back to default non-contiguous.
1030
+ var res = posix.system.fcntl(fd, posix.F.PREALLOCATE, @intFromPtr(&store));
1031
+ if (posix.errno(res) != .SUCCESS) {
1032
+ store.fst_flags = F_ALLOCATEALL;
1033
+ res = posix.system.fcntl(fd, posix.F.PREALLOCATE, @intFromPtr(&store));
1034
+ }
1035
+
1036
+ switch (posix.errno(res)) {
1037
+ .SUCCESS => {},
1038
+ .ACCES => unreachable, // F_SETLK or F_SETSIZE of F_WRITEBOOTSTRAP
1039
+ .BADF => return error.FileDescriptorInvalid,
1040
+ .DEADLK => unreachable, // F_SETLKW
1041
+ .INTR => unreachable, // F_SETLKW
1042
+ .INVAL => return error.ArgumentsInvalid, // for F_PREALLOCATE (offset invalid)
1043
+ .MFILE => unreachable, // F_DUPFD or F_DUPED
1044
+ .NOLCK => unreachable, // F_SETLK or F_SETLKW
1045
+ .OVERFLOW => return error.FileTooBig,
1046
+ .SRCH => unreachable, // F_SETOWN
1047
+
1048
+ // Not reported but need same error union.
1049
+ .OPNOTSUPP => return error.OperationNotSupported,
1050
+ else => |errno| return stdx.unexpected_errno("fs_allocate", errno),
1051
+ }
1052
+
1053
+ // Now actually perform the allocation.
1054
+ return posix.ftruncate(fd, size) catch |err| switch (err) {
1055
+ error.AccessDenied => error.PermissionDenied,
1056
+ else => |e| e,
1057
+ };
1058
+ }
1059
+
1060
+ pub const PReadError = posix.PReadError;
1061
+
1062
+ pub fn aof_blocking_write_all(_: *IO, fd: fd_t, buffer: []const u8) posix.WriteError!void {
1063
+ return common.aof_blocking_write_all(fd, buffer);
1064
+ }
1065
+
1066
+ pub fn aof_blocking_pread_all(_: *IO, fd: fd_t, buffer: []u8, offset: u64) PReadError!usize {
1067
+ return common.aof_blocking_pread_all(fd, buffer, offset);
1068
+ }
1069
+
1070
+ pub fn aof_blocking_close(_: *IO, fd: fd_t) void {
1071
+ return common.aof_blocking_close(fd);
1072
+ }
1073
+
1074
+ pub fn aof_blocking_stat(_: *IO, path: []const u8) std.fs.Dir.StatFileError!std.fs.File.Stat {
1075
+ return common.aof_blocking_stat(path);
1076
+ }
1077
+
1078
+ pub fn aof_blocking_fstat(_: *IO, fd: fd_t) std.fs.Dir.StatError!std.fs.File.Stat {
1079
+ return common.aof_blocking_fstat(fd);
1080
+ }
1081
+
1082
+ pub fn aof_blocking_open(io: *IO, path: []const u8) !fd_t {
1083
+ stdx.maybe(std.fs.path.isAbsolute(path));
1084
+
1085
+ const dir_path = std.fs.path.dirname(path) orelse ".";
1086
+ const dir_fd = try IO.open_dir(dir_path);
1087
+ defer io.aof_blocking_close(dir_fd);
1088
+
1089
+ const file_path = std.fs.path.basename(path);
1090
+
1091
+ return common.aof_blocking_open(dir_fd, file_path);
1092
+ }
1093
+ };