tigerbeetle 0.0.36 → 0.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/platforms.rb +9 -0
  245. data/lib/tigerbeetle/version.rb +2 -2
  246. data/tigerbeetle.gemspec +22 -5
  247. metadata +242 -3
  248. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,1598 @@
1
+ const std = @import("std");
2
+ const stdx = @import("stdx");
3
+ const os = std.os;
4
+ const posix = std.posix;
5
+ const assert = std.debug.assert;
6
+ const log = std.log.scoped(.io);
7
+ const constants = @import("../constants.zig");
8
+ const common = @import("./common.zig");
9
+
10
+ const QueueType = @import("../queue.zig").QueueType;
11
+ const TimeOS = @import("../time.zig").TimeOS;
12
+ const buffer_limit = @import("../io.zig").buffer_limit;
13
+ const DirectIO = @import("../io.zig").DirectIO;
14
+
15
+ pub const IO = struct {
16
+ pub const TCPOptions = common.TCPOptions;
17
+ pub const ListenOptions = common.ListenOptions;
18
+
19
+ iocp: os.windows.HANDLE,
20
+ time_os: TimeOS = .{},
21
+ io_pending: usize = 0,
22
+ timeouts: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_timeouts" }),
23
+ completed: QueueType(Completion) = QueueType(Completion).init(.{ .name = "io_completed" }),
24
+
25
+ pub fn init(entries: u12, flags: u32) !IO {
26
+ _ = entries;
27
+ _ = flags;
28
+
29
+ _ = try os.windows.WSAStartup(2, 2);
30
+ errdefer os.windows.WSACleanup() catch unreachable;
31
+
32
+ const iocp = try os.windows.CreateIoCompletionPort(
33
+ os.windows.INVALID_HANDLE_VALUE,
34
+ null,
35
+ 0,
36
+ 0,
37
+ );
38
+ return IO{ .iocp = iocp };
39
+ }
40
+
41
+ pub fn deinit(self: *IO) void {
42
+ assert(self.iocp != os.windows.INVALID_HANDLE_VALUE);
43
+ os.windows.CloseHandle(self.iocp);
44
+ self.iocp = os.windows.INVALID_HANDLE_VALUE;
45
+
46
+ os.windows.WSACleanup() catch unreachable;
47
+ }
48
+
49
+ pub fn run(self: *IO) !void {
50
+ return self.flush(.non_blocking);
51
+ }
52
+
53
+ pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
54
+ const Callback = struct {
55
+ fn on_timeout(
56
+ timed_out: *bool,
57
+ completion: *Completion,
58
+ result: TimeoutError!void,
59
+ ) void {
60
+ _ = result catch unreachable;
61
+ _ = completion;
62
+ timed_out.* = true;
63
+ }
64
+ };
65
+
66
+ var timed_out = false;
67
+ var completion: Completion = undefined;
68
+ self.timeout(*bool, &timed_out, Callback.on_timeout, &completion, nanoseconds);
69
+
70
+ while (!timed_out) {
71
+ try self.flush(.blocking);
72
+ }
73
+ }
74
+
75
+ const FlushMode = enum {
76
+ blocking,
77
+ non_blocking,
78
+ };
79
+
80
+ fn flush(self: *IO, mode: FlushMode) !void {
81
+ if (self.completed.empty()) {
82
+ // Compute how long to poll by flushing timeout completions.
83
+ // NOTE: this may push to completed queue.
84
+ var timeout_ms: ?os.windows.DWORD = null;
85
+ if (self.flush_timeouts()) |expires_ns| {
86
+ // 0ns expires should have been completed not returned.
87
+ assert(expires_ns != 0);
88
+ // Round up sub-millisecond expire times to the next millisecond.
89
+ const expires_ms = (expires_ns + (std.time.ns_per_ms / 2)) / std.time.ns_per_ms;
90
+ // Saturating cast to DWORD milliseconds.
91
+ const expires = std.math.cast(os.windows.DWORD, expires_ms) orelse
92
+ std.math.maxInt(os.windows.DWORD);
93
+ // Max DWORD is reserved for INFINITE so cap the cast at max - 1.
94
+ timeout_ms = if (expires == os.windows.INFINITE) expires - 1 else expires;
95
+ }
96
+
97
+ // Poll for IO iff there's IO pending and flush_timeouts() found no ready completions.
98
+ if (self.io_pending > 0 and self.completed.empty()) {
99
+ // In blocking mode, we're always waiting at least until the timeout by run_for_ns.
100
+ // In non-blocking mode, we shouldn't wait at all.
101
+ const io_timeout = switch (mode) {
102
+ .blocking => timeout_ms orelse @panic("IO.flush blocking unbounded"),
103
+ .non_blocking => 0,
104
+ };
105
+
106
+ var events: [64]os.windows.OVERLAPPED_ENTRY = undefined;
107
+ const num_events: u32 = os.windows.GetQueuedCompletionStatusEx(
108
+ self.iocp,
109
+ &events,
110
+ io_timeout,
111
+ false, // Non-alertable wait.
112
+ ) catch |err| switch (err) {
113
+ error.Timeout => 0,
114
+ error.Aborted => unreachable,
115
+ else => |e| return e,
116
+ };
117
+
118
+ assert(self.io_pending >= num_events);
119
+ self.io_pending -= num_events;
120
+
121
+ for (events[0..num_events]) |event| {
122
+ const raw_overlapped = event.lpOverlapped;
123
+ const overlapped: *Completion.Overlapped = @fieldParentPtr(
124
+ "raw",
125
+ raw_overlapped,
126
+ );
127
+ const completion = overlapped.completion;
128
+ completion.link = .{};
129
+ self.completed.push(completion);
130
+ }
131
+ }
132
+ }
133
+
134
+ // Dequeue and invoke all the completions currently ready.
135
+ // Must read all `completions` before invoking the callbacks
136
+ // as the callbacks could potentially submit more completions.
137
+ var completed = self.completed;
138
+ self.completed.reset();
139
+ while (completed.pop()) |completion| {
140
+ (completion.callback)(Completion.Context{
141
+ .io = self,
142
+ .completion = completion,
143
+ });
144
+ }
145
+ }
146
+
147
+ fn flush_timeouts(self: *IO) ?u64 {
148
+ var min_expires: ?u64 = null;
149
+ var current_time: ?u64 = null;
150
+
151
+ // Iterate through the timeouts, returning min_expires at the end.
152
+ var timeouts_iterator = self.timeouts.iterate();
153
+ while (timeouts_iterator.next()) |completion| {
154
+ // Lazily get the current time.
155
+ const now = current_time orelse self.time_os.time().monotonic().ns;
156
+ current_time = now;
157
+
158
+ // Move the completion to completed if it expired.
159
+ if (now >= completion.operation.timeout.deadline) {
160
+ self.timeouts.remove(completion);
161
+ self.completed.push(completion);
162
+ continue;
163
+ }
164
+
165
+ // If it's still waiting, update min_timeout.
166
+ const expires = completion.operation.timeout.deadline - now;
167
+ if (min_expires) |current_min_expires| {
168
+ min_expires = @min(expires, current_min_expires);
169
+ } else {
170
+ min_expires = expires;
171
+ }
172
+ }
173
+
174
+ return min_expires;
175
+ }
176
+
177
+ /// This struct holds the data needed for a single IO operation.
178
+ pub const Completion = struct {
179
+ link: QueueType(Completion).Link,
180
+ context: ?*anyopaque,
181
+ callback: *const fn (Context) void,
182
+ operation: Operation,
183
+
184
+ const Context = struct {
185
+ io: *IO,
186
+ completion: *Completion,
187
+ };
188
+
189
+ const Overlapped = struct {
190
+ raw: os.windows.OVERLAPPED,
191
+ completion: *Completion,
192
+ };
193
+
194
+ const Transfer = struct {
195
+ socket: socket_t,
196
+ buf: os.windows.ws2_32.WSABUF,
197
+ overlapped: Overlapped,
198
+ pending: bool,
199
+ };
200
+
201
+ const Operation = union(enum) {
202
+ accept: struct {
203
+ overlapped: Overlapped,
204
+ listen_socket: socket_t,
205
+ client_socket: ?socket_t,
206
+ addr_buffer: [(@sizeOf(std.net.Address) + 16) * 2]u8 align(4),
207
+ },
208
+ connect: struct {
209
+ socket: socket_t,
210
+ address: std.net.Address,
211
+ overlapped: Overlapped,
212
+ pending: bool,
213
+ },
214
+ fsync: struct {
215
+ fd: fd_t,
216
+ },
217
+ send: Transfer,
218
+ recv: Transfer,
219
+ read: struct {
220
+ fd: fd_t,
221
+ buf: [*]u8,
222
+ len: u32,
223
+ offset: u64,
224
+ overlapped: Overlapped,
225
+ pending: bool,
226
+ },
227
+ write: struct {
228
+ fd: fd_t,
229
+ buf: [*]const u8,
230
+ len: u32,
231
+ offset: u64,
232
+ overlapped: Overlapped,
233
+ pending: bool,
234
+ },
235
+ close: struct {
236
+ fd: fd_t,
237
+ },
238
+ timeout: struct {
239
+ deadline: u64,
240
+ },
241
+ event: Overlapped,
242
+ };
243
+ };
244
+
245
+ fn submit(
246
+ self: *IO,
247
+ context: anytype,
248
+ comptime callback: anytype,
249
+ completion: *Completion,
250
+ comptime op_tag: std.meta.Tag(Completion.Operation),
251
+ op_data: std.meta.TagPayload(Completion.Operation, op_tag),
252
+ comptime OperationImpl: type,
253
+ ) void {
254
+ const Callback = struct {
255
+ fn onComplete(ctx: Completion.Context) void {
256
+ // Perform the operation and get the result.
257
+ const data = &@field(ctx.completion.operation, @tagName(op_tag));
258
+ const result = OperationImpl.do_operation(ctx, data);
259
+
260
+ // For OVERLAPPED IO, error.WouldBlock assumes that it will be completed by IOCP.
261
+ switch (op_tag) {
262
+ .accept, .read, .recv, .connect, .write, .send => {
263
+ _ = result catch |err| switch (err) {
264
+ error.WouldBlock => {
265
+ ctx.io.io_pending += 1;
266
+ return;
267
+ },
268
+ else => {},
269
+ };
270
+ },
271
+ else => {},
272
+ }
273
+
274
+ // The completion is finally ready to invoke the callback.
275
+ callback(
276
+ @ptrCast(@alignCast(ctx.completion.context)),
277
+ ctx.completion,
278
+ result,
279
+ );
280
+ }
281
+ };
282
+
283
+ // Setup the completion with the callback wrapper above.
284
+ completion.* = .{
285
+ .link = .{},
286
+ .context = @ptrCast(context),
287
+ .callback = Callback.onComplete,
288
+ .operation = @unionInit(Completion.Operation, @tagName(op_tag), op_data),
289
+ };
290
+
291
+ // Submit the completion onto the right queue.
292
+ switch (op_tag) {
293
+ .timeout => self.timeouts.push(completion),
294
+ else => self.completed.push(completion),
295
+ }
296
+ }
297
+
298
+ pub fn cancel_all(_: *IO) void {
299
+ // TODO Cancel in-flight async IO and wait for all completions.
300
+ }
301
+
302
+ pub const CancelError = error{
303
+ NotRunning,
304
+ NotInterruptable,
305
+ } || posix.UnexpectedError;
306
+
307
+ pub fn cancel(
308
+ _: *IO,
309
+ comptime Context: type,
310
+ _: Context,
311
+ comptime _: fn (
312
+ context: Context,
313
+ completion: *Completion,
314
+ result: CancelError!void,
315
+ ) void,
316
+ _: struct {
317
+ completion: *Completion,
318
+ target: *Completion,
319
+ },
320
+ ) void {
321
+ @panic("cancelation is not supported on windows");
322
+ }
323
+
324
+ pub const AcceptError = posix.AcceptError || posix.SetSockOptError;
325
+
326
+ pub fn accept(
327
+ self: *IO,
328
+ comptime Context: type,
329
+ context: Context,
330
+ comptime callback: fn (
331
+ context: Context,
332
+ completion: *Completion,
333
+ result: AcceptError!socket_t,
334
+ ) void,
335
+ completion: *Completion,
336
+ socket: socket_t,
337
+ ) void {
338
+ self.submit(
339
+ context,
340
+ callback,
341
+ completion,
342
+ .accept,
343
+ .{
344
+ .overlapped = undefined,
345
+ .listen_socket = socket,
346
+ .client_socket = null,
347
+ .addr_buffer = undefined,
348
+ },
349
+ struct {
350
+ fn do_operation(
351
+ ctx: Completion.Context,
352
+ op: anytype,
353
+ ) AcceptError!socket_t {
354
+ var flags: os.windows.DWORD = undefined;
355
+ var transferred: os.windows.DWORD = undefined;
356
+
357
+ const rc = if (op.client_socket == null) blk: {
358
+ // When first called, the client_socket is invalid so we start the op.
359
+ // Create the socket that will be used for accept.
360
+ op.client_socket = ctx.io.open_socket(
361
+ posix.AF.INET,
362
+ posix.SOCK.STREAM,
363
+ posix.IPPROTO.TCP,
364
+ ) catch |err| switch (err) {
365
+ error.AddressFamilyNotSupported => unreachable,
366
+ error.ProtocolNotSupported => unreachable,
367
+ else => |e| return e,
368
+ };
369
+
370
+ var sync_bytes_read: os.windows.DWORD = undefined;
371
+ op.overlapped = .{
372
+ .raw = std.mem.zeroes(os.windows.OVERLAPPED),
373
+ .completion = ctx.completion,
374
+ };
375
+
376
+ // Start the asynchronous accept with the created socket.
377
+ break :blk os.windows.ws2_32.AcceptEx(
378
+ op.listen_socket,
379
+ op.client_socket.?,
380
+ &op.addr_buffer,
381
+ 0,
382
+ @sizeOf(std.net.Address) + 16,
383
+ @sizeOf(std.net.Address) + 16,
384
+ &sync_bytes_read,
385
+ &op.overlapped.raw,
386
+ );
387
+ } else blk: {
388
+ // Called after accept was started, so get the result.
389
+ break :blk os.windows.ws2_32.WSAGetOverlappedResult(
390
+ op.listen_socket,
391
+ &op.overlapped.raw,
392
+ &transferred,
393
+ os.windows.FALSE, // Don't wait.
394
+ &flags,
395
+ );
396
+ };
397
+
398
+ // Return the socket if we succeed in accepting.
399
+ if (rc != os.windows.FALSE) {
400
+ // Enables getsockopt, setsockopt, getsockname, getpeername.
401
+ _ = os.windows.ws2_32.setsockopt(
402
+ op.client_socket.?,
403
+ os.windows.ws2_32.SOL.SOCKET,
404
+ os.windows.ws2_32.SO.UPDATE_ACCEPT_CONTEXT,
405
+ null,
406
+ 0,
407
+ );
408
+
409
+ return op.client_socket.?;
410
+ }
411
+
412
+ // Destroy the client_socket we created if we get a non WouldBlock error.
413
+ errdefer |err| switch (err) {
414
+ error.WouldBlock => {},
415
+ else => {
416
+ ctx.io.close_socket(op.client_socket.?);
417
+ op.client_socket = null;
418
+ },
419
+ };
420
+
421
+ return switch (os.windows.ws2_32.WSAGetLastError()) {
422
+ .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
423
+ .WSANOTINITIALISED => unreachable, // WSAStartup() was called.
424
+ .WSAENETDOWN => unreachable, // WinSock error.
425
+ .WSAENOTSOCK => error.FileDescriptorNotASocket,
426
+ .WSAEOPNOTSUPP => error.OperationNotSupported,
427
+ .WSA_INVALID_HANDLE => unreachable, // We don't use hEvent in OVERLAPPED.
428
+ .WSAEFAULT, .WSA_INVALID_PARAMETER => unreachable, // Params should be ok.
429
+ .WSAECONNRESET => error.ConnectionAborted,
430
+ .WSAEMFILE => unreachable, // We create our own descriptor so its available.
431
+ .WSAENOBUFS => error.SystemResources,
432
+ .WSAEINTR, .WSAEINPROGRESS => unreachable, // No blocking calls.
433
+ else => |err| os.windows.unexpectedWSAError(err),
434
+ };
435
+ }
436
+ },
437
+ );
438
+ }
439
+
440
+ pub const CloseError = error{
441
+ FileDescriptorInvalid,
442
+ DiskQuota,
443
+ InputOutput,
444
+ NoSpaceLeft,
445
+ } || posix.UnexpectedError;
446
+
447
+ pub const ConnectError = posix.ConnectError || error{FileDescriptorNotASocket};
448
+
449
+ pub fn connect(
450
+ self: *IO,
451
+ comptime Context: type,
452
+ context: Context,
453
+ comptime callback: fn (
454
+ context: Context,
455
+ completion: *Completion,
456
+ result: ConnectError!void,
457
+ ) void,
458
+ completion: *Completion,
459
+ socket: socket_t,
460
+ address: std.net.Address,
461
+ ) void {
462
+ self.submit(
463
+ context,
464
+ callback,
465
+ completion,
466
+ .connect,
467
+ .{
468
+ .socket = socket,
469
+ .address = address,
470
+ .overlapped = undefined,
471
+ .pending = false,
472
+ },
473
+ struct {
474
+ fn do_operation(ctx: Completion.Context, op: anytype) ConnectError!void {
475
+ var flags: os.windows.DWORD = undefined;
476
+ var transferred: os.windows.DWORD = undefined;
477
+
478
+ const rc = blk: {
479
+ // Poll for the result if we've already started the connect op.
480
+ if (op.pending) {
481
+ break :blk os.windows.ws2_32.WSAGetOverlappedResult(
482
+ op.socket,
483
+ &op.overlapped.raw,
484
+ &transferred,
485
+ os.windows.FALSE, // Don't wait.
486
+ &flags,
487
+ );
488
+ }
489
+
490
+ // ConnectEx requires the socket to be initially bound (INADDR_ANY).
491
+ const inaddr_any: [4]u8 = @splat(0);
492
+ const bind_addr = std.net.Address.initIp4(inaddr_any, 0);
493
+ posix.bind(
494
+ op.socket,
495
+ &bind_addr.any,
496
+ bind_addr.getOsSockLen(),
497
+ ) catch |err| switch (err) {
498
+ error.AccessDenied => unreachable,
499
+ error.SymLinkLoop => unreachable,
500
+ error.NameTooLong => unreachable,
501
+ error.NotDir => unreachable,
502
+ error.ReadOnlyFileSystem => unreachable,
503
+ error.NetworkSubsystemFailed => unreachable,
504
+ error.AlreadyBound => unreachable,
505
+ else => |e| return e,
506
+ };
507
+
508
+ const LPFN_CONNECTEX = *const fn (
509
+ Socket: os.windows.ws2_32.SOCKET,
510
+ SockAddr: *const os.windows.ws2_32.sockaddr,
511
+ SockLen: posix.socklen_t,
512
+ SendBuf: ?*const anyopaque,
513
+ SendBufLen: os.windows.DWORD,
514
+ BytesSent: *os.windows.DWORD,
515
+ Overlapped: *os.windows.OVERLAPPED,
516
+ ) callconv(os.windows.WINAPI) os.windows.BOOL;
517
+
518
+ // Find the ConnectEx function by dynamically looking it up on the socket.
519
+ // TODO: use `os.windows.loadWinsockExtensionFunction` once the function
520
+ // pointer is no longer required to be comptime.
521
+ var connect_ex: LPFN_CONNECTEX = undefined;
522
+ var num_bytes: os.windows.DWORD = undefined;
523
+ const guid = os.windows.ws2_32.WSAID_CONNECTEX;
524
+ const socket_error = os.windows.ws2_32.SOCKET_ERROR;
525
+ switch (os.windows.ws2_32.WSAIoctl(
526
+ op.socket,
527
+ os.windows.ws2_32.SIO_GET_EXTENSION_FUNCTION_POINTER,
528
+ @ptrCast(&guid),
529
+ @sizeOf(os.windows.GUID),
530
+ @ptrCast(&connect_ex),
531
+ @sizeOf(LPFN_CONNECTEX),
532
+ &num_bytes,
533
+ null,
534
+ null,
535
+ )) {
536
+ socket_error => switch (os.windows.ws2_32.WSAGetLastError()) {
537
+ .WSAEOPNOTSUPP => unreachable,
538
+ .WSAENOTSOCK => unreachable,
539
+ else => |err| return os.windows.unexpectedWSAError(err),
540
+ },
541
+ else => assert(num_bytes == @sizeOf(LPFN_CONNECTEX)),
542
+ }
543
+
544
+ op.pending = true;
545
+ op.overlapped = .{
546
+ .raw = std.mem.zeroes(os.windows.OVERLAPPED),
547
+ .completion = ctx.completion,
548
+ };
549
+
550
+ // Start the connect operation.
551
+ break :blk (connect_ex)(
552
+ op.socket,
553
+ &op.address.any,
554
+ op.address.getOsSockLen(),
555
+ null,
556
+ 0,
557
+ &transferred,
558
+ &op.overlapped.raw,
559
+ );
560
+ };
561
+
562
+ // Return if we succeeded in connecting.
563
+ if (rc != os.windows.FALSE) {
564
+ // Enables getsockopt, setsockopt, getsockname, getpeername.
565
+ _ = os.windows.ws2_32.setsockopt(
566
+ op.socket,
567
+ os.windows.ws2_32.SOL.SOCKET,
568
+ os.windows.ws2_32.SO.UPDATE_CONNECT_CONTEXT,
569
+ null,
570
+ 0,
571
+ );
572
+
573
+ return;
574
+ }
575
+
576
+ return switch (os.windows.ws2_32.WSAGetLastError()) {
577
+ .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
578
+ .WSAEALREADY => error.WouldBlock,
579
+ .WSANOTINITIALISED => unreachable, // WSAStartup() was called.
580
+ .WSAENETDOWN => unreachable, // Network subsystem is down.
581
+ .WSAEADDRNOTAVAIL => error.AddressNotAvailable,
582
+ .WSAEAFNOSUPPORT => error.AddressFamilyNotSupported,
583
+ .WSAECONNREFUSED => error.ConnectionRefused,
584
+ .WSAEFAULT => unreachable, // All addresses should be valid.
585
+ .WSAEINVAL => unreachable, // Invalid socket type.
586
+ .WSAEHOSTUNREACH, .WSAENETUNREACH => error.NetworkUnreachable,
587
+ .WSAENOBUFS => error.SystemResources,
588
+ .WSAENOTSOCK => unreachable, // Socket is not bound or is listening.
589
+ .WSAETIMEDOUT => error.ConnectionTimedOut,
590
+ .WSA_INVALID_HANDLE => unreachable, // We don't use hEvent in OVERLAPPED.
591
+ else => |err| os.windows.unexpectedWSAError(err),
592
+ };
593
+ }
594
+ },
595
+ );
596
+ }
597
+
598
+ pub const FsyncError = posix.SyncError || posix.UnexpectedError;
599
+
600
+ pub fn fsync(
601
+ self: *IO,
602
+ comptime Context: type,
603
+ context: Context,
604
+ comptime callback: fn (
605
+ context: Context,
606
+ completion: *Completion,
607
+ result: FsyncError!void,
608
+ ) void,
609
+ completion: *Completion,
610
+ fd: fd_t,
611
+ ) void {
612
+ self.submit(
613
+ context,
614
+ callback,
615
+ completion,
616
+ .fsync,
617
+ .{
618
+ .fd = fd,
619
+ },
620
+ struct {
621
+ fn do_operation(ctx: Completion.Context, op: anytype) FsyncError!void {
622
+ _ = ctx;
623
+ return posix.fsync(op.fd);
624
+ }
625
+ },
626
+ );
627
+ }
628
+
629
+ pub const SendError = posix.SendError;
630
+
631
+ pub fn send(
632
+ self: *IO,
633
+ comptime Context: type,
634
+ context: Context,
635
+ comptime callback: fn (
636
+ context: Context,
637
+ completion: *Completion,
638
+ result: SendError!usize,
639
+ ) void,
640
+ completion: *Completion,
641
+ socket: socket_t,
642
+ buffer: []const u8,
643
+ ) void {
644
+ const transfer = Completion.Transfer{
645
+ .socket = socket,
646
+ .buf = os.windows.ws2_32.WSABUF{
647
+ .len = @intCast(buffer_limit(buffer.len)),
648
+ .buf = @constCast(buffer.ptr),
649
+ },
650
+ .overlapped = undefined,
651
+ .pending = false,
652
+ };
653
+
654
+ self.submit(
655
+ context,
656
+ callback,
657
+ completion,
658
+ .send,
659
+ transfer,
660
+ struct {
661
+ fn do_operation(ctx: Completion.Context, op: anytype) SendError!usize {
662
+ var flags: os.windows.DWORD = undefined;
663
+ var transferred: os.windows.DWORD = undefined;
664
+
665
+ const rc = blk: {
666
+ // Poll for the result if we've already started the send op.
667
+ if (op.pending) {
668
+ break :blk os.windows.ws2_32.WSAGetOverlappedResult(
669
+ op.socket,
670
+ &op.overlapped.raw,
671
+ &transferred,
672
+ os.windows.FALSE, // Don't wait.
673
+ &flags,
674
+ );
675
+ }
676
+
677
+ op.pending = true;
678
+ op.overlapped = .{
679
+ .raw = std.mem.zeroes(os.windows.OVERLAPPED),
680
+ .completion = ctx.completion,
681
+ };
682
+
683
+ // Start the send operation.
684
+ break :blk switch (os.windows.ws2_32.WSASend(
685
+ op.socket,
686
+ @ptrCast(&op.buf),
687
+ 1, // One buffer.
688
+ &transferred,
689
+ 0, // No flags.
690
+ &op.overlapped.raw,
691
+ null,
692
+ )) {
693
+ os.windows.ws2_32.SOCKET_ERROR => @as(
694
+ os.windows.BOOL,
695
+ os.windows.FALSE,
696
+ ),
697
+ 0 => os.windows.TRUE,
698
+ else => unreachable,
699
+ };
700
+ };
701
+
702
+ // Return bytes transferred on success.
703
+ if (rc != os.windows.FALSE)
704
+ return transferred;
705
+
706
+ return switch (os.windows.ws2_32.WSAGetLastError()) {
707
+ .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
708
+ .WSANOTINITIALISED => unreachable, // WSAStartup() was called
709
+ .WSA_INVALID_HANDLE => unreachable, // We don't use OVERLAPPED.hEvent
710
+ .WSA_INVALID_PARAMETER => unreachable, // Parameters are fine.
711
+ .WSAECONNABORTED => error.ConnectionResetByPeer,
712
+ .WSAECONNRESET => error.ConnectionResetByPeer,
713
+ .WSAEFAULT => unreachable, // Invalid buffer.
714
+ .WSAEINTR => unreachable, // This is non blocking.
715
+ .WSAEINPROGRESS => unreachable, // This is non blocking.
716
+ .WSAEINVAL => unreachable, // Invalid socket type.
717
+ .WSAEMSGSIZE => error.MessageTooBig,
718
+ .WSAENETDOWN => error.NetworkSubsystemFailed,
719
+ .WSAENETRESET => error.ConnectionResetByPeer,
720
+ .WSAENOBUFS => error.SystemResources,
721
+ .WSAENOTCONN => error.FileDescriptorNotASocket,
722
+ .WSAEOPNOTSUPP => unreachable, // We don't use MSG_OOB or MSG_PARTIAL.
723
+ .WSAESHUTDOWN => error.BrokenPipe,
724
+ .WSA_OPERATION_ABORTED => unreachable, // Operation was cancelled.
725
+ else => |err| os.windows.unexpectedWSAError(err),
726
+ };
727
+ }
728
+ },
729
+ );
730
+ }
731
+
732
+ pub fn send_now(_: *IO, _: socket_t, _: []const u8) ?usize {
733
+ return null; // No support for best-effort non-blocking synchronous send.
734
+ }
735
+
736
+ pub const RecvError = posix.RecvFromError;
737
+
738
+ pub fn recv(
739
+ self: *IO,
740
+ comptime Context: type,
741
+ context: Context,
742
+ comptime callback: fn (
743
+ context: Context,
744
+ completion: *Completion,
745
+ result: RecvError!usize,
746
+ ) void,
747
+ completion: *Completion,
748
+ socket: socket_t,
749
+ buffer: []u8,
750
+ ) void {
751
+ const transfer = Completion.Transfer{
752
+ .socket = socket,
753
+ .buf = os.windows.ws2_32.WSABUF{
754
+ .len = @intCast(buffer_limit(buffer.len)),
755
+ .buf = buffer.ptr,
756
+ },
757
+ .overlapped = undefined,
758
+ .pending = false,
759
+ };
760
+
761
+ self.submit(
762
+ context,
763
+ callback,
764
+ completion,
765
+ .recv,
766
+ transfer,
767
+ struct {
768
+ fn do_operation(ctx: Completion.Context, op: anytype) RecvError!usize {
769
+ var flags: os.windows.DWORD = 0; // Used both as input and output.
770
+ var transferred: os.windows.DWORD = undefined;
771
+
772
+ const rc = blk: {
773
+ // Poll for the result if we've already started the recv op.
774
+ if (op.pending) {
775
+ break :blk os.windows.ws2_32.WSAGetOverlappedResult(
776
+ op.socket,
777
+ &op.overlapped.raw,
778
+ &transferred,
779
+ os.windows.FALSE, // Don't wait.
780
+ &flags,
781
+ );
782
+ }
783
+
784
+ op.pending = true;
785
+ op.overlapped = .{
786
+ .raw = std.mem.zeroes(os.windows.OVERLAPPED),
787
+ .completion = ctx.completion,
788
+ };
789
+
790
+ // Start the recv operation.
791
+ break :blk switch (os.windows.ws2_32.WSARecv(
792
+ op.socket,
793
+ @ptrCast(&op.buf),
794
+ 1, // one buffer
795
+ &transferred,
796
+ &flags,
797
+ &op.overlapped.raw,
798
+ null,
799
+ )) {
800
+ os.windows.ws2_32.SOCKET_ERROR => @as(
801
+ os.windows.BOOL,
802
+ os.windows.FALSE,
803
+ ),
804
+ 0 => os.windows.TRUE,
805
+ else => unreachable,
806
+ };
807
+ };
808
+
809
+ // Return bytes received on success.
810
+ if (rc != os.windows.FALSE)
811
+ return transferred;
812
+
813
+ return switch (os.windows.ws2_32.WSAGetLastError()) {
814
+ .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
815
+ .WSANOTINITIALISED => unreachable, // WSAStartup() was called
816
+ .WSA_INVALID_HANDLE => unreachable, // We don't use OVERLAPPED.hEvent.
817
+ .WSA_INVALID_PARAMETER => unreachable, // Parameters are fine.
818
+ .WSAECONNABORTED => error.ConnectionRefused,
819
+ .WSAECONNRESET => error.ConnectionResetByPeer,
820
+ .WSAEDISCON => unreachable, // We only stream sockets.
821
+ .WSAEFAULT => unreachable, // Invalid buffer.
822
+ .WSAEINTR => unreachable, // This is non blocking.
823
+ .WSAEINPROGRESS => unreachable, // This is non blocking.
824
+ .WSAEINVAL => unreachable, // Invalid socket type
825
+ .WSAEMSGSIZE => error.MessageTooBig,
826
+ .WSAENETDOWN => error.NetworkSubsystemFailed,
827
+ .WSAENETRESET => error.ConnectionResetByPeer,
828
+ .WSAENOTCONN => error.SocketNotConnected,
829
+ .WSAEOPNOTSUPP => unreachable, // We don't use MSG_OOB or MSG_PARTIAL.
830
+ .WSAESHUTDOWN => error.SocketNotConnected,
831
+ .WSAETIMEDOUT => error.ConnectionRefused,
832
+ .WSA_OPERATION_ABORTED => unreachable, // Operation was cancelled.
833
+ else => |err| os.windows.unexpectedWSAError(err),
834
+ };
835
+ }
836
+ },
837
+ );
838
+ }
839
+
840
+ pub const OpenatError = posix.OpenError || posix.UnexpectedError;
841
+
842
+ fn do_file_io(ctx: Completion.Context, op: anytype, comptime overlapped_fn: anytype) !usize {
843
+ var transferred: os.windows.DWORD = undefined;
844
+ const rc = blk: {
845
+ // Poll result if already started.
846
+ if (op.pending) break :blk os.windows.kernel32.GetOverlappedResult(
847
+ op.fd,
848
+ &op.overlapped.raw,
849
+ &transferred,
850
+ os.windows.FALSE, // Don't wait here.
851
+ );
852
+
853
+ // Start the operation.
854
+ op.pending = true;
855
+ op.overlapped = .{
856
+ .raw = .{
857
+ .Internal = 0,
858
+ .InternalHigh = 0,
859
+ .DUMMYUNIONNAME = .{
860
+ .DUMMYSTRUCTNAME = .{
861
+ .Offset = @truncate(op.offset),
862
+ .OffsetHigh = @truncate(op.offset >> 32),
863
+ },
864
+ },
865
+ .hEvent = null,
866
+ },
867
+ .completion = ctx.completion,
868
+ };
869
+ break :blk overlapped_fn(op.fd, op.buf, op.len, &transferred, &op.overlapped.raw);
870
+ };
871
+
872
+ // Operation completed successfully.
873
+ if (rc != os.windows.FALSE) {
874
+ return transferred;
875
+ }
876
+
877
+ return switch (os.windows.kernel32.GetLastError()) {
878
+ .IO_PENDING => error.WouldBlock,
879
+ .INVALID_USER_BUFFER, .NOT_ENOUGH_MEMORY => error.SystemResources,
880
+ .NOT_ENOUGH_QUOTA => error.SystemResources,
881
+ .OPERATION_ABORTED => unreachable, // overlapped_fn() doesn't get cancelled.
882
+ // ReadFile and WriteFile don't allow partial IO (acting more like readAll/writeAll)
883
+ // so assume the offset is correct and simulate partial IO by returning 0 bytes moved.
884
+ .HANDLE_EOF => return 0,
885
+ else => |err| return os.windows.unexpectedError(err),
886
+ };
887
+ }
888
+
889
+ pub const ReadError = error{
890
+ WouldBlock,
891
+ NotOpenForReading,
892
+ ConnectionResetByPeer,
893
+ Alignment,
894
+ InputOutput,
895
+ IsDir,
896
+ SystemResources,
897
+ Unseekable,
898
+ ConnectionTimedOut,
899
+ } || posix.UnexpectedError;
900
+
901
+ pub fn read(
902
+ self: *IO,
903
+ comptime Context: type,
904
+ context: Context,
905
+ comptime callback: fn (
906
+ context: Context,
907
+ completion: *Completion,
908
+ result: ReadError!usize,
909
+ ) void,
910
+ completion: *Completion,
911
+ fd: fd_t,
912
+ buffer: []u8,
913
+ offset: u64,
914
+ ) void {
915
+ self.submit(
916
+ context,
917
+ callback,
918
+ completion,
919
+ .read,
920
+ .{
921
+ .fd = fd,
922
+ .buf = buffer.ptr,
923
+ .len = @as(u32, @intCast(buffer_limit(buffer.len))),
924
+ .offset = offset,
925
+ .overlapped = undefined,
926
+ .pending = false,
927
+ },
928
+ struct {
929
+ fn do_operation(ctx: Completion.Context, op: anytype) ReadError!usize {
930
+ return do_file_io(ctx, op, os.windows.kernel32.ReadFile);
931
+ }
932
+ },
933
+ );
934
+ }
935
+
936
+ pub const WriteError = posix.PWriteError;
937
+
938
+ pub fn write(
939
+ self: *IO,
940
+ comptime Context: type,
941
+ context: Context,
942
+ comptime callback: fn (
943
+ context: Context,
944
+ completion: *Completion,
945
+ result: WriteError!usize,
946
+ ) void,
947
+ completion: *Completion,
948
+ fd: fd_t,
949
+ buffer: []const u8,
950
+ offset: u64,
951
+ ) void {
952
+ self.submit(
953
+ context,
954
+ callback,
955
+ completion,
956
+ .write,
957
+ .{
958
+ .fd = fd,
959
+ .buf = buffer.ptr,
960
+ .len = @as(u32, @intCast(buffer_limit(buffer.len))),
961
+ .offset = offset,
962
+ .overlapped = undefined,
963
+ .pending = false,
964
+ },
965
+ struct {
966
+ fn do_operation(ctx: Completion.Context, op: anytype) WriteError!usize {
967
+ return do_file_io(ctx, op, os.windows.kernel32.WriteFile);
968
+ }
969
+ },
970
+ );
971
+ }
972
+
973
+ pub fn close(
974
+ self: *IO,
975
+ comptime Context: type,
976
+ context: Context,
977
+ comptime callback: fn (
978
+ context: Context,
979
+ completion: *Completion,
980
+ result: CloseError!void,
981
+ ) void,
982
+ completion: *Completion,
983
+ fd: fd_t,
984
+ ) void {
985
+ self.submit(
986
+ context,
987
+ callback,
988
+ completion,
989
+ .close,
990
+ .{ .fd = fd },
991
+ struct {
992
+ fn do_operation(ctx: Completion.Context, op: anytype) CloseError!void {
993
+ // Check if the fd is a SOCKET by seeing if getsockopt() returns ENOTSOCK
994
+ // https://stackoverflow.com/a/50981652
995
+ const socket: socket_t = @ptrCast(op.fd);
996
+ getsockoptError(socket) catch |err| switch (err) {
997
+ error.FileDescriptorNotASocket => return os.windows.CloseHandle(op.fd),
998
+ else => {},
999
+ };
1000
+
1001
+ ctx.io.close_socket(socket);
1002
+ }
1003
+ },
1004
+ );
1005
+ }
1006
+
1007
+ pub const TimeoutError = error{Canceled} || posix.UnexpectedError;
1008
+
1009
+ pub fn timeout(
1010
+ self: *IO,
1011
+ comptime Context: type,
1012
+ context: Context,
1013
+ comptime callback: fn (
1014
+ context: Context,
1015
+ completion: *Completion,
1016
+ result: TimeoutError!void,
1017
+ ) void,
1018
+ completion: *Completion,
1019
+ nanoseconds: u63,
1020
+ ) void {
1021
+ // Special case a zero timeout as a yield.
1022
+ if (nanoseconds == 0) {
1023
+ completion.* = .{
1024
+ .link = .{},
1025
+ .context = @ptrCast(context),
1026
+ .operation = undefined,
1027
+ .callback = struct {
1028
+ fn on_complete(ctx: Completion.Context) void {
1029
+ const _context: Context = @ptrCast(@alignCast(ctx.completion.context));
1030
+ callback(_context, ctx.completion, {});
1031
+ }
1032
+ }.on_complete,
1033
+ };
1034
+
1035
+ self.completed.push(completion);
1036
+ return;
1037
+ }
1038
+
1039
+ self.submit(
1040
+ context,
1041
+ callback,
1042
+ completion,
1043
+ .timeout,
1044
+ .{ .deadline = self.time_os.time().monotonic().ns + nanoseconds },
1045
+ struct {
1046
+ fn do_operation(ctx: Completion.Context, op: anytype) TimeoutError!void {
1047
+ _ = ctx;
1048
+ _ = op;
1049
+ return;
1050
+ }
1051
+ },
1052
+ );
1053
+ }
1054
+
1055
+ pub const Event = u1;
1056
+ pub const INVALID_EVENT: Event = 0;
1057
+
1058
+ pub fn open_event(
1059
+ self: *IO,
1060
+ ) !Event {
1061
+ _ = self;
1062
+ // Events on Windows don't need an identifier,
1063
+ // they're handled just by the OVERLAPPED structure.
1064
+ return INVALID_EVENT + 1;
1065
+ }
1066
+
1067
+ pub fn event_listen(
1068
+ self: *IO,
1069
+ event: Event,
1070
+ completion: *Completion,
1071
+ comptime on_event: fn (*Completion) void,
1072
+ ) void {
1073
+ assert(event != INVALID_EVENT);
1074
+ completion.* = .{
1075
+ .link = .{},
1076
+ .context = null,
1077
+ .operation = .{
1078
+ .event = .{
1079
+ .raw = std.mem.zeroes(os.windows.OVERLAPPED),
1080
+ .completion = completion,
1081
+ },
1082
+ },
1083
+ .callback = struct {
1084
+ fn on_complete(ctx: Completion.Context) void {
1085
+ on_event(ctx.completion);
1086
+ }
1087
+ }.on_complete,
1088
+ };
1089
+
1090
+ // Conceptually start listening by bumping the io_pending count.
1091
+ self.io_pending += 1;
1092
+ }
1093
+
1094
+ pub fn event_trigger(self: *IO, event: Event, completion: *Completion) void {
1095
+ assert(event != INVALID_EVENT);
1096
+ os.windows.PostQueuedCompletionStatus(
1097
+ self.iocp,
1098
+ undefined,
1099
+ undefined,
1100
+ &completion.operation.event.raw,
1101
+ ) catch unreachable;
1102
+ }
1103
+
1104
+ pub fn close_event(self: *IO, event: Event) void {
1105
+ _ = self;
1106
+ // Nothing to close as events are just intrusive OVERLAPPED structs.
1107
+ assert(event != INVALID_EVENT);
1108
+ }
1109
+
1110
+ pub const socket_t = posix.socket_t;
1111
+
1112
+ /// Creates a TCP socket that can be used for async operations with the IO instance.
1113
+ pub fn open_socket_tcp(self: *IO, family: u32, options: TCPOptions) !socket_t {
1114
+ const socket = try self.open_socket(
1115
+ @bitCast(family),
1116
+ posix.SOCK.STREAM,
1117
+ posix.IPPROTO.TCP,
1118
+ );
1119
+ errdefer self.close_socket(socket);
1120
+
1121
+ try common.tcp_options(socket, options);
1122
+ return socket;
1123
+ }
1124
+
1125
+ /// Creates a UDP socket that can be used for async operations with the IO instance.
1126
+ pub fn open_socket_udp(self: *IO, family: u32) !socket_t {
1127
+ return try self.open_socket(
1128
+ @bitCast(family),
1129
+ posix.SOCK.DGRAM,
1130
+ posix.IPPROTO.UDP,
1131
+ );
1132
+ }
1133
+
1134
+ fn open_socket(self: *IO, family: u32, sock_type: i32, protocol: i32) !socket_t {
1135
+ // Equivalent to SOCK_NONBLOCK | SOCK_CLOEXEC.
1136
+ const socket_flags: os.windows.DWORD =
1137
+ os.windows.ws2_32.WSA_FLAG_OVERLAPPED |
1138
+ os.windows.ws2_32.WSA_FLAG_NO_HANDLE_INHERIT;
1139
+
1140
+ const socket = try os.windows.WSASocketW(
1141
+ @bitCast(family),
1142
+ sock_type,
1143
+ protocol,
1144
+ null,
1145
+ 0,
1146
+ socket_flags,
1147
+ );
1148
+ errdefer self.close_socket(socket);
1149
+
1150
+ try self.register_handle(@ptrCast(socket));
1151
+ return socket;
1152
+ }
1153
+
1154
+ /// Register the IO handle for overlapped operations.
1155
+ fn register_handle(self: *IO, handle: os.windows.HANDLE) !void {
1156
+ const iocp_handle = try os.windows.CreateIoCompletionPort(handle, self.iocp, 0, 0);
1157
+ assert(iocp_handle == self.iocp);
1158
+
1159
+ // Ensure that synchronous IO completion doesn't queue an unneeded overlapped
1160
+ // and that the event for the handle (WaitForSingleObject) doesn't need to be set.
1161
+ var mode: os.windows.BYTE = 0;
1162
+ mode |= os.windows.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS;
1163
+ mode |= os.windows.FILE_SKIP_SET_EVENT_ON_HANDLE;
1164
+ try os.windows.SetFileCompletionNotificationModes(handle, mode);
1165
+ }
1166
+
1167
+ /// Closes a socket opened by the IO instance.
1168
+ pub fn close_socket(self: *IO, socket: socket_t) void {
1169
+ _ = self;
1170
+ posix.close(socket);
1171
+ }
1172
+
1173
+ /// Listen on the given TCP socket.
1174
+ /// Returns socket resolved address, which might be more specific
1175
+ /// than the input address (e.g., listening on port 0).
1176
+ pub fn listen(
1177
+ _: *IO,
1178
+ fd: socket_t,
1179
+ address: std.net.Address,
1180
+ options: ListenOptions,
1181
+ ) !std.net.Address {
1182
+ return common.listen(fd, address, options);
1183
+ }
1184
+
1185
+ pub fn shutdown(_: *IO, socket: socket_t, how: posix.ShutdownHow) posix.ShutdownError!void {
1186
+ return posix.shutdown(socket, how);
1187
+ }
1188
+
1189
+ /// Opens a directory with read only access.
1190
+ pub fn open_dir(dir_path: []const u8) !fd_t {
1191
+ const dir = try std.fs.cwd().openDir(dir_path, .{});
1192
+ return dir.fd;
1193
+ }
1194
+
1195
+ pub const fd_t = posix.fd_t;
1196
+ pub const INVALID_FILE = os.windows.INVALID_HANDLE_VALUE;
1197
+
1198
+ fn open_file_handle(
1199
+ self: *IO,
1200
+ dir_handle: fd_t,
1201
+ relative_path: []const u8,
1202
+ purpose: enum { format, open, inspect },
1203
+ ) !fd_t {
1204
+ const path_w = try os.windows.sliceToPrefixedFileW(dir_handle, relative_path);
1205
+
1206
+ // FILE_CREATE = O_CREAT | O_EXCL
1207
+ var creation_disposition: os.windows.DWORD = 0;
1208
+ switch (purpose) {
1209
+ .format => {
1210
+ creation_disposition = os.windows.FILE_CREATE;
1211
+ log.info("creating \"{s}\"...", .{relative_path});
1212
+ },
1213
+ .open, .inspect => {
1214
+ creation_disposition = os.windows.OPEN_EXISTING;
1215
+ log.info("opening \"{s}\"...", .{relative_path});
1216
+ },
1217
+ }
1218
+
1219
+ // O_EXCL
1220
+ const shared_mode: os.windows.DWORD = 0;
1221
+
1222
+ // O_RDWR
1223
+ // Zig's mask seems wonky; according to
1224
+ // https://learn.microsoft.com/en-us/windows/win32/api/winternl/nf-winternl-ntcreatefile
1225
+ // FILE_GENERIC_READ should include SYNCHRONIZE but it does not.
1226
+ var access_mask: os.windows.DWORD = 0;
1227
+ access_mask |= os.windows.SYNCHRONIZE;
1228
+ access_mask |= os.windows.GENERIC_READ;
1229
+
1230
+ if (purpose != .inspect) {
1231
+ access_mask |= os.windows.GENERIC_WRITE;
1232
+ }
1233
+
1234
+ // O_DIRECT | O_DSYNC
1235
+ // NB: These are NtDll flags, not to be confused with the Win32 style flags that are
1236
+ // similar but different (!).
1237
+ var attributes: os.windows.DWORD = 0;
1238
+ attributes |= os.windows.FILE_NO_INTERMEDIATE_BUFFERING;
1239
+ attributes |= os.windows.FILE_WRITE_THROUGH;
1240
+
1241
+ // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
1242
+ assert((attributes & os.windows.FILE_WRITE_THROUGH) > 0);
1243
+
1244
+ // It's a little confusing, but with NtCreateFile, which is what windows_open_file uses
1245
+ // under the hood, not specifying anything gets you a file capable of overlapped IO.
1246
+ // FILE_FLAG_OVERLAPPED and co belong to the higher level kernel32 API.
1247
+ const handle = try windows_open_file(path_w.span(), .{
1248
+ .access_mask = access_mask,
1249
+ .dir = dir_handle,
1250
+ .sa = null,
1251
+ .share_access = shared_mode,
1252
+ .creation = creation_disposition,
1253
+ .filter = .file_only,
1254
+ .follow_symlinks = false,
1255
+ }, attributes);
1256
+
1257
+ if (handle == os.windows.INVALID_HANDLE_VALUE) {
1258
+ return switch (os.windows.kernel32.GetLastError()) {
1259
+ .FILE_NOT_FOUND => error.FileNotFound,
1260
+ .SHARING_VIOLATION, .ACCESS_DENIED => error.AccessDenied,
1261
+ else => |err| {
1262
+ return os.windows.unexpectedError(err);
1263
+ },
1264
+ };
1265
+ }
1266
+
1267
+ errdefer os.windows.CloseHandle(handle);
1268
+
1269
+ // Register the file with the IO handle for overlapped operations.
1270
+ try self.register_handle(handle);
1271
+
1272
+ return handle;
1273
+ }
1274
+
1275
+ pub const OpenDataFilePurpose = enum { format, open, inspect };
1276
+ /// Opens or creates a journal file:
1277
+ /// - For reading and writing.
1278
+ /// - For Direct I/O (required on windows).
1279
+ /// - Obtains an advisory exclusive lock to the file descriptor.
1280
+ /// - Allocates the file contiguously on disk if this is supported by the file system.
1281
+ /// - Ensures that the file data is durable on disk.
1282
+ /// The caller is responsible for ensuring that the parent directory inode is durable.
1283
+ /// - Verifies that the file size matches the expected file size before returning.
1284
+ pub fn open_data_file(
1285
+ self: *IO,
1286
+ dir_handle: fd_t,
1287
+ relative_path: []const u8,
1288
+ size: u64,
1289
+ purpose: OpenDataFilePurpose,
1290
+ direct_io: DirectIO,
1291
+ ) !fd_t {
1292
+ assert(relative_path.len > 0);
1293
+ assert(size % constants.sector_size == 0);
1294
+ // On windows, assume that Direct IO is always available.
1295
+ _ = direct_io;
1296
+
1297
+ const handle = switch (purpose) {
1298
+ .format => try self.open_file_handle(dir_handle, relative_path, .format),
1299
+ .open => try self.open_file_handle(dir_handle, relative_path, .open),
1300
+ .inspect => try self.open_file_handle(
1301
+ dir_handle,
1302
+ relative_path,
1303
+ .inspect,
1304
+ ),
1305
+ };
1306
+ errdefer os.windows.CloseHandle(handle);
1307
+
1308
+ // Obtain an advisory exclusive lock
1309
+ // even when we haven't given shared access to other processes.
1310
+ fs_lock(handle, size) catch |err| switch (err) {
1311
+ error.WouldBlock => {
1312
+ if (purpose == .inspect) {
1313
+ log.warn(
1314
+ "another process holds the data file lock - results may be inconsistent",
1315
+ .{},
1316
+ );
1317
+ } else {
1318
+ @panic("another process holds the data file lock");
1319
+ }
1320
+ },
1321
+ else => return err,
1322
+ };
1323
+
1324
+ // Ask the file system to allocate contiguous sectors for the file (if possible):
1325
+ if (purpose == .format) {
1326
+ log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
1327
+ fs_allocate(handle, size) catch {
1328
+ log.warn("file system failed to preallocate the file memory", .{});
1329
+ log.info("allocating by writing to the last sector of the file instead...", .{});
1330
+
1331
+ const sector_size = constants.sector_size;
1332
+ const sector: [sector_size]u8 align(sector_size) = @splat(0);
1333
+
1334
+ // Handle partial writes where the physical sector is less than a logical sector:
1335
+ const write_offset = size - sector.len;
1336
+ var written: usize = 0;
1337
+ while (written < sector.len) {
1338
+ written += try posix.pwrite(
1339
+ handle,
1340
+ sector[written..],
1341
+ write_offset + written,
1342
+ );
1343
+ }
1344
+ };
1345
+ }
1346
+
1347
+ // The best fsync strategy is always to fsync before reading because this prevents us from
1348
+ // making decisions on data that was never durably written by a previously crashed process.
1349
+ // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
1350
+ // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
1351
+ if (purpose != .inspect) {
1352
+ try posix.fsync(handle);
1353
+ }
1354
+
1355
+ // We cannot fsync the directory handle on Windows.
1356
+ // We have no way to open a directory with write access.
1357
+ //
1358
+ // try posix.fsync(dir_handle);
1359
+
1360
+ const file_size = try os.windows.GetFileSizeEx(handle);
1361
+ if (file_size < size) @panic("data file inode size was truncated or corrupted");
1362
+
1363
+ return handle;
1364
+ }
1365
+
1366
+ fn fs_lock(handle: fd_t, size: u64) !void {
1367
+ // TODO: Look into using SetFileIoOverlappedRange() for better unbuffered async IO perf
1368
+ // NOTE: Requires SeLockMemoryPrivilege.
1369
+
1370
+ // hEvent = null
1371
+ // Offset & OffsetHigh = 0
1372
+ var lock_overlapped = std.mem.zeroes(os.windows.OVERLAPPED);
1373
+
1374
+ // LOCK_EX | LOCK_NB
1375
+ var lock_flags: os.windows.DWORD = 0;
1376
+ lock_flags |= stdx.windows.LOCKFILE_EXCLUSIVE_LOCK;
1377
+ lock_flags |= stdx.windows.LOCKFILE_FAIL_IMMEDIATELY;
1378
+
1379
+ const locked = stdx.windows.LockFileEx(
1380
+ handle,
1381
+ lock_flags,
1382
+ 0, // Reserved param is always zero.
1383
+ @as(u32, @truncate(size)), // Low bits of size.
1384
+ @as(u32, @truncate(size >> 32)), // High bits of size.
1385
+ &lock_overlapped,
1386
+ );
1387
+
1388
+ if (locked == os.windows.FALSE) {
1389
+ return switch (os.windows.kernel32.GetLastError()) {
1390
+ .IO_PENDING => error.WouldBlock,
1391
+ else => |err| os.windows.unexpectedError(err),
1392
+ };
1393
+ }
1394
+ }
1395
+
1396
+ fn fs_allocate(handle: fd_t, size: u64) !void {
1397
+ // TODO: Look into using SetFileValidData() instead
1398
+ // NOTE: Requires SE_MANAGE_VOLUME_NAME privilege
1399
+
1400
+ // Move the file pointer to the start + size.
1401
+ const seeked = os.windows.kernel32.SetFilePointerEx(
1402
+ handle,
1403
+ @intCast(size),
1404
+ null, // No reference to new file pointer.
1405
+ os.windows.FILE_BEGIN,
1406
+ );
1407
+
1408
+ if (seeked == os.windows.FALSE) {
1409
+ return switch (os.windows.kernel32.GetLastError()) {
1410
+ .INVALID_HANDLE => unreachable,
1411
+ .INVALID_PARAMETER => unreachable,
1412
+ else => |err| os.windows.unexpectedError(err),
1413
+ };
1414
+ }
1415
+
1416
+ // Mark the moved file pointer (start + size) as the physical EOF.
1417
+ const allocated = stdx.windows.SetEndOfFile(handle);
1418
+ if (allocated == os.windows.FALSE) {
1419
+ const err = os.windows.kernel32.GetLastError();
1420
+ return os.windows.unexpectedError(err);
1421
+ }
1422
+ }
1423
+
1424
+ pub const PReadError = posix.PReadError;
1425
+
1426
+ pub fn aof_blocking_write_all(_: *IO, fd: fd_t, buffer: []const u8) posix.WriteError!void {
1427
+ return common.aof_blocking_write_all(fd, buffer);
1428
+ }
1429
+
1430
+ pub fn aof_blocking_pread_all(_: *IO, fd: fd_t, buffer: []u8, offset: u64) PReadError!usize {
1431
+ return common.aof_blocking_pread_all(fd, buffer, offset);
1432
+ }
1433
+
1434
+ pub fn aof_blocking_close(_: *IO, fd: fd_t) void {
1435
+ return common.aof_blocking_close(fd);
1436
+ }
1437
+
1438
+ pub fn aof_blocking_stat(_: *IO, path: []const u8) std.fs.Dir.StatFileError!std.fs.File.Stat {
1439
+ return common.aof_blocking_stat(path);
1440
+ }
1441
+
1442
+ pub fn aof_blocking_fstat(_: *IO, fd: fd_t) std.fs.Dir.StatError!std.fs.File.Stat {
1443
+ return common.aof_blocking_fstat(fd);
1444
+ }
1445
+
1446
+ pub fn aof_blocking_open(io: *IO, path: []const u8) !fd_t {
1447
+ stdx.maybe(std.fs.path.isAbsolute(path));
1448
+
1449
+ const dir_path = std.fs.path.dirname(path) orelse ".";
1450
+ const dir_fd = try IO.open_dir(dir_path);
1451
+ defer io.aof_blocking_close(dir_fd);
1452
+
1453
+ const file_path = std.fs.path.basename(path);
1454
+
1455
+ return common.aof_blocking_open(dir_fd, file_path);
1456
+ }
1457
+ };
1458
+
1459
+ // TODO: use posix.getsockoptError when fixed for windows in stdlib.
1460
+ fn getsockoptError(socket: posix.socket_t) IO.ConnectError!void {
1461
+ var err_code: u32 = undefined;
1462
+ var size: i32 = @sizeOf(u32);
1463
+ const rc = os.windows.ws2_32.getsockopt(
1464
+ socket,
1465
+ posix.SOL.SOCKET,
1466
+ posix.SO.ERROR,
1467
+ std.mem.asBytes(&err_code),
1468
+ &size,
1469
+ );
1470
+
1471
+ if (rc != 0) {
1472
+ switch (os.windows.ws2_32.WSAGetLastError()) {
1473
+ .WSAENETDOWN => return error.NetworkUnreachable,
1474
+ .WSANOTINITIALISED => unreachable, // WSAStartup() was never called.
1475
+
1476
+ // The address pointed to by optval or optlen is not in a valid part of the process
1477
+ // address space.
1478
+ .WSAEFAULT => unreachable,
1479
+
1480
+ .WSAEINVAL => unreachable, // The level parameter is unknown or invalid.
1481
+ .WSAENOPROTOOPT => unreachable, // The option is unknown at the level indicated.
1482
+ .WSAENOTSOCK => return error.FileDescriptorNotASocket,
1483
+ else => |err| return os.windows.unexpectedWSAError(err),
1484
+ }
1485
+ }
1486
+
1487
+ assert(size == 4);
1488
+ if (err_code == 0)
1489
+ return;
1490
+
1491
+ const ws_err: os.windows.ws2_32.WinsockError = @enumFromInt(@as(u16, @intCast(err_code)));
1492
+ return switch (ws_err) {
1493
+ .WSAEACCES => error.PermissionDenied,
1494
+ .WSAEADDRINUSE => error.AddressInUse,
1495
+ .WSAEADDRNOTAVAIL => error.AddressNotAvailable,
1496
+ .WSAEAFNOSUPPORT => error.AddressFamilyNotSupported,
1497
+ .WSAEALREADY => error.ConnectionPending,
1498
+ .WSAEBADF => unreachable,
1499
+ .WSAECONNREFUSED => error.ConnectionRefused,
1500
+ .WSAEFAULT => unreachable,
1501
+ .WSAEISCONN => unreachable, // error.AlreadyConnected,
1502
+ .WSAENETUNREACH => error.NetworkUnreachable,
1503
+ .WSAENOTSOCK => error.FileDescriptorNotASocket,
1504
+ .WSAEPROTOTYPE => unreachable,
1505
+ .WSAETIMEDOUT => error.ConnectionTimedOut,
1506
+ .WSAECONNRESET => error.ConnectionResetByPeer,
1507
+ else => |e| os.windows.unexpectedWSAError(e),
1508
+ };
1509
+ }
1510
+
1511
+ // Vendor std.os.windows.OpenFile so we can set file attributes. Add it as a parameter after
1512
+ // `options`, so we don't have to vendor that struct too.
1513
+ pub fn windows_open_file(
1514
+ sub_path_w: []const u16,
1515
+ options: os.windows.OpenFileOptions,
1516
+ file_flags: os.windows.ULONG,
1517
+ ) os.windows.OpenError!os.windows.HANDLE {
1518
+ if (std.mem.eql(u16, sub_path_w, &[_]u16{'.'}) and options.filter == .file_only) {
1519
+ return error.IsDir;
1520
+ }
1521
+ if (std.mem.eql(u16, sub_path_w, &[_]u16{ '.', '.' }) and options.filter == .file_only) {
1522
+ return error.IsDir;
1523
+ }
1524
+
1525
+ var result: os.windows.HANDLE = undefined;
1526
+
1527
+ const path_len_bytes = std.math.cast(u16, sub_path_w.len * 2) orelse return error.NameTooLong;
1528
+ var nt_name = os.windows.UNICODE_STRING{
1529
+ .Length = path_len_bytes,
1530
+ .MaximumLength = path_len_bytes,
1531
+ .Buffer = @constCast(sub_path_w.ptr),
1532
+ };
1533
+ var attr = os.windows.OBJECT_ATTRIBUTES{
1534
+ .Length = @sizeOf(os.windows.OBJECT_ATTRIBUTES),
1535
+ .RootDirectory = if (std.fs.path.isAbsoluteWindowsWTF16(sub_path_w)) null else options.dir,
1536
+ .Attributes = 0, // Note we do not use OBJ_CASE_INSENSITIVE here.
1537
+ .ObjectName = &nt_name,
1538
+ .SecurityDescriptor = if (options.sa) |ptr| ptr.lpSecurityDescriptor else null,
1539
+ .SecurityQualityOfService = null,
1540
+ };
1541
+ var io: os.windows.IO_STATUS_BLOCK = undefined;
1542
+ const file_or_dir_flag: os.windows.ULONG = switch (options.filter) {
1543
+ .file_only => os.windows.FILE_NON_DIRECTORY_FILE,
1544
+ .dir_only => os.windows.FILE_DIRECTORY_FILE,
1545
+ .any => 0,
1546
+ };
1547
+ // This code is changed slightly from Zig's stdlib: there, options.follow_symlinks enforces
1548
+ // FILE_SYNCHRONOUS_IO_NONALERT which stops overlapped IO.
1549
+ assert(!options.follow_symlinks);
1550
+ const flags: os.windows.ULONG = file_or_dir_flag | os.windows.FILE_OPEN_REPARSE_POINT;
1551
+
1552
+ while (true) {
1553
+ const rc = os.windows.ntdll.NtCreateFile(
1554
+ &result,
1555
+ options.access_mask,
1556
+ &attr,
1557
+ &io,
1558
+ null,
1559
+ os.windows.FILE_ATTRIBUTE_NORMAL,
1560
+ options.share_access,
1561
+ options.creation,
1562
+ flags | file_flags,
1563
+ null,
1564
+ 0,
1565
+ );
1566
+ switch (rc) {
1567
+ .SUCCESS => return result,
1568
+ .OBJECT_NAME_INVALID => unreachable,
1569
+ .OBJECT_NAME_NOT_FOUND => return error.FileNotFound,
1570
+ .OBJECT_PATH_NOT_FOUND => return error.FileNotFound,
1571
+ .BAD_NETWORK_PATH => return error.NetworkNotFound, // \\server was not found.
1572
+ // \\server was found but \\server\share wasn't.
1573
+ .BAD_NETWORK_NAME => return error.NetworkNotFound,
1574
+ .NO_MEDIA_IN_DEVICE => return error.NoDevice,
1575
+ .INVALID_PARAMETER => unreachable,
1576
+ .SHARING_VIOLATION => return error.AccessDenied,
1577
+ .ACCESS_DENIED => return error.AccessDenied,
1578
+ .PIPE_BUSY => return error.PipeBusy,
1579
+ .OBJECT_PATH_SYNTAX_BAD => unreachable,
1580
+ .OBJECT_NAME_COLLISION => return error.PathAlreadyExists,
1581
+ .FILE_IS_A_DIRECTORY => return error.IsDir,
1582
+ .NOT_A_DIRECTORY => return error.NotDir,
1583
+ .USER_MAPPED_FILE => return error.AccessDenied,
1584
+ .INVALID_HANDLE => unreachable,
1585
+ .DELETE_PENDING => {
1586
+ // This error means that there *was* a file in this location on
1587
+ // the file system, but it was deleted. However, the OS is not
1588
+ // finished with the deletion operation, and so this CreateFile
1589
+ // call has failed. There is not really a sane way to handle
1590
+ // this other than retrying the creation after the OS finishes
1591
+ // the deletion.
1592
+ std.time.sleep(std.time.ns_per_ms);
1593
+ continue;
1594
+ },
1595
+ else => return os.windows.unexpectedStatus(rc),
1596
+ }
1597
+ }
1598
+ }