uringmachine 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +1 -1
  3. data/.gitignore +1 -0
  4. data/.gitmodules +0 -3
  5. data/CHANGELOG.md +17 -0
  6. data/Gemfile +12 -1
  7. data/README.md +266 -112
  8. data/Rakefile +8 -0
  9. data/TODO.md +40 -17
  10. data/benchmark/bm_io_pipe.rb +43 -1
  11. data/benchmark/bm_io_socketpair.rb +32 -2
  12. data/benchmark/bm_mutex_io.rb +47 -5
  13. data/benchmark/chart_bm_io_pipe_x.png +0 -0
  14. data/benchmark/common.rb +163 -17
  15. data/benchmark/http_parse.rb +9 -9
  16. data/benchmark/http_server_accept_queue.rb +104 -0
  17. data/benchmark/http_server_multi_accept.rb +93 -0
  18. data/benchmark/http_server_multi_ractor.rb +99 -0
  19. data/benchmark/http_server_single_thread.rb +80 -0
  20. data/benchmark/ips_io_pipe.rb +146 -0
  21. data/benchmark/openssl.rb +77 -0
  22. data/benchmark/openssl_socketpair.rb +112 -0
  23. data/benchmark/sqlite.rb +1 -1
  24. data/docs/design/buffer_pool.md +183 -0
  25. data/docs/um_api.md +91 -0
  26. data/examples/fiber_scheduler_file_io.rb +34 -0
  27. data/examples/fiber_scheduler_file_io_async.rb +33 -0
  28. data/ext/um/extconf.rb +15 -0
  29. data/ext/um/um.c +83 -50
  30. data/ext/um/um.h +18 -3
  31. data/ext/um/um_async_op_class.c +31 -0
  32. data/ext/um/um_class.c +759 -30
  33. data/ext/um/um_const.c +31 -0
  34. data/ext/um/um_mutex_class.c +12 -0
  35. data/ext/um/um_queue_class.c +16 -0
  36. data/ext/um/um_sidecar.c +106 -0
  37. data/ext/um/um_ssl.c +109 -0
  38. data/ext/um/um_stream.c +40 -8
  39. data/ext/um/um_stream_class.c +14 -0
  40. data/ext/um/um_utils.c +3 -4
  41. data/grant-2025/interim-report.md +130 -0
  42. data/grant-2025/journal.md +166 -2
  43. data/grant-2025/tasks.md +32 -20
  44. data/lib/uringmachine/dns_resolver.rb +38 -0
  45. data/lib/uringmachine/fiber_scheduler.rb +42 -32
  46. data/lib/uringmachine/version.rb +1 -1
  47. data/lib/uringmachine.rb +105 -7
  48. data/test/helper.rb +23 -3
  49. data/test/test_fiber.rb +16 -0
  50. data/test/test_fiber_scheduler.rb +221 -72
  51. data/test/test_ssl.rb +85 -0
  52. data/test/test_stream.rb +27 -0
  53. data/test/test_um.rb +250 -26
  54. data/uringmachine.gemspec +1 -7
  55. data/vendor/liburing/examples/send-zerocopy.c +43 -31
  56. data/vendor/liburing/examples/zcrx.c +260 -69
  57. data/vendor/liburing/liburing.spec +1 -1
  58. data/vendor/liburing/src/include/liburing/io_uring.h +12 -0
  59. data/vendor/liburing/src/include/liburing.h +3 -2
  60. data/vendor/liburing/src/liburing-ffi.map +4 -0
  61. data/vendor/liburing/src/liburing.map +4 -0
  62. data/vendor/liburing/src/queue.c +12 -0
  63. data/vendor/liburing/src/register.c +1 -0
  64. data/vendor/liburing/src/setup.c +15 -7
  65. data/vendor/liburing/test/Makefile +8 -4
  66. data/vendor/liburing/test/conn-unreach.c +1 -1
  67. data/vendor/liburing/test/epwait.c +32 -6
  68. data/vendor/liburing/test/io-wq-exit.c +131 -0
  69. data/vendor/liburing/test/iowait.c +1 -1
  70. data/vendor/liburing/test/min-timeout.c +3 -1
  71. data/vendor/liburing/test/open-close.c +39 -0
  72. data/vendor/liburing/test/poll-update-trigger.c +85 -0
  73. data/vendor/liburing/test/recvsend_bundle.c +14 -11
  74. data/vendor/liburing/test/sendzc-bug.c +146 -0
  75. data/vendor/liburing/test/sqe-mixed-nop.c +151 -7
  76. data/vendor/liburing/test/test.h +2 -0
  77. data/vendor/liburing/test/timestamp-bug.c +135 -0
  78. data/vendor/liburing/test/timestamp.c +5 -0
  79. data/vendor/liburing/test/vec-regbuf.c +136 -1
  80. metadata +50 -284
  81. data/vendor/libressl/.github/scripts/changelog.sh +0 -74
  82. data/vendor/libressl/.github/workflows/android.yml +0 -35
  83. data/vendor/libressl/.github/workflows/cifuzz.yml +0 -33
  84. data/vendor/libressl/.github/workflows/cmake-config.yml +0 -98
  85. data/vendor/libressl/.github/workflows/coverity.yml +0 -69
  86. data/vendor/libressl/.github/workflows/emscripten.yml +0 -71
  87. data/vendor/libressl/.github/workflows/fedora-rawhide.yml +0 -39
  88. data/vendor/libressl/.github/workflows/freebsd.yml +0 -71
  89. data/vendor/libressl/.github/workflows/linux.yml +0 -71
  90. data/vendor/libressl/.github/workflows/macos.yml +0 -37
  91. data/vendor/libressl/.github/workflows/release.yml +0 -81
  92. data/vendor/libressl/.github/workflows/rust-openssl.yml +0 -47
  93. data/vendor/libressl/.github/workflows/solaris.yml +0 -37
  94. data/vendor/libressl/.github/workflows/windows.yml +0 -70
  95. data/vendor/libressl/.gitignore +0 -333
  96. data/vendor/libressl/CMakeLists.txt +0 -581
  97. data/vendor/libressl/COPYING +0 -133
  98. data/vendor/libressl/ChangeLog +0 -3280
  99. data/vendor/libressl/FindLibreSSL.cmake +0 -232
  100. data/vendor/libressl/LibreSSLConfig.cmake.in +0 -36
  101. data/vendor/libressl/Makefile.am +0 -60
  102. data/vendor/libressl/Makefile.am.common +0 -20
  103. data/vendor/libressl/OPENBSD_BRANCH +0 -1
  104. data/vendor/libressl/README.md +0 -238
  105. data/vendor/libressl/README.mingw.md +0 -43
  106. data/vendor/libressl/apps/CMakeLists.txt +0 -18
  107. data/vendor/libressl/apps/Makefile.am +0 -5
  108. data/vendor/libressl/apps/nc/CMakeLists.txt +0 -67
  109. data/vendor/libressl/apps/nc/Makefile.am +0 -64
  110. data/vendor/libressl/apps/nc/compat/accept4.c +0 -17
  111. data/vendor/libressl/apps/nc/compat/readpassphrase.c +0 -205
  112. data/vendor/libressl/apps/nc/compat/socket.c +0 -29
  113. data/vendor/libressl/apps/nc/compat/sys/socket.h +0 -30
  114. data/vendor/libressl/apps/ocspcheck/CMakeLists.txt +0 -44
  115. data/vendor/libressl/apps/ocspcheck/Makefile.am +0 -45
  116. data/vendor/libressl/apps/ocspcheck/compat/.gitignore +0 -0
  117. data/vendor/libressl/apps/openssl/CMakeLists.txt +0 -97
  118. data/vendor/libressl/apps/openssl/Makefile.am +0 -108
  119. data/vendor/libressl/apps/openssl/apps_win.c +0 -138
  120. data/vendor/libressl/apps/openssl/certhash_win.c +0 -13
  121. data/vendor/libressl/apps/openssl/compat/clock_gettime_osx.c +0 -26
  122. data/vendor/libressl/apps/openssl/compat/poll_win.c +0 -329
  123. data/vendor/libressl/appveyor.yml +0 -53
  124. data/vendor/libressl/autogen.sh +0 -15
  125. data/vendor/libressl/check-release.sh +0 -86
  126. data/vendor/libressl/cmake_export_symbol.cmake +0 -71
  127. data/vendor/libressl/cmake_uninstall.cmake.in +0 -36
  128. data/vendor/libressl/config +0 -17
  129. data/vendor/libressl/configure.ac +0 -165
  130. data/vendor/libressl/crypto/CMakeLists.txt +0 -863
  131. data/vendor/libressl/crypto/Makefile.am +0 -962
  132. data/vendor/libressl/crypto/Makefile.am.arc4random +0 -46
  133. data/vendor/libressl/crypto/Makefile.am.elf-mips +0 -14
  134. data/vendor/libressl/crypto/Makefile.am.elf-mips64 +0 -14
  135. data/vendor/libressl/crypto/Makefile.am.elf-x86_64 +0 -35
  136. data/vendor/libressl/crypto/Makefile.am.macosx-x86_64 +0 -35
  137. data/vendor/libressl/crypto/Makefile.am.masm-x86_64 +0 -22
  138. data/vendor/libressl/crypto/Makefile.am.mingw64-x86_64 +0 -23
  139. data/vendor/libressl/crypto/arch/aarch64/crypto_cpu_caps_darwin.c +0 -60
  140. data/vendor/libressl/crypto/arch/aarch64/crypto_cpu_caps_linux.c +0 -62
  141. data/vendor/libressl/crypto/arch/aarch64/crypto_cpu_caps_none.c +0 -26
  142. data/vendor/libressl/crypto/arch/aarch64/crypto_cpu_caps_windows.c +0 -36
  143. data/vendor/libressl/crypto/arch/loongarch64/crypto_arch.h +0 -21
  144. data/vendor/libressl/crypto/arch/mips/crypto_arch.h +0 -21
  145. data/vendor/libressl/crypto/bn/arch/loongarch64/bn_arch.h +0 -23
  146. data/vendor/libressl/crypto/bn/arch/mips/bn_arch.h +0 -24
  147. data/vendor/libressl/crypto/compat/.gitignore +0 -31
  148. data/vendor/libressl/crypto/compat/arc4random.h +0 -41
  149. data/vendor/libressl/crypto/compat/b_win.c +0 -55
  150. data/vendor/libressl/crypto/compat/bsd-asprintf.c +0 -96
  151. data/vendor/libressl/crypto/compat/crypto_lock_win.c +0 -56
  152. data/vendor/libressl/crypto/compat/explicit_bzero_win.c +0 -13
  153. data/vendor/libressl/crypto/compat/freezero.c +0 -32
  154. data/vendor/libressl/crypto/compat/getdelim.c +0 -78
  155. data/vendor/libressl/crypto/compat/getline.c +0 -40
  156. data/vendor/libressl/crypto/compat/getopt_long.c +0 -528
  157. data/vendor/libressl/crypto/compat/getpagesize.c +0 -18
  158. data/vendor/libressl/crypto/compat/getprogname_linux.c +0 -23
  159. data/vendor/libressl/crypto/compat/getprogname_unimpl.c +0 -7
  160. data/vendor/libressl/crypto/compat/getprogname_windows.c +0 -13
  161. data/vendor/libressl/crypto/compat/posix_win.c +0 -296
  162. data/vendor/libressl/crypto/compat/syslog_r.c +0 -19
  163. data/vendor/libressl/crypto/compat/ui_openssl_win.c +0 -334
  164. data/vendor/libressl/dist.sh +0 -22
  165. data/vendor/libressl/gen-coverage-report.sh +0 -58
  166. data/vendor/libressl/gen-openbsd-tags.sh +0 -20
  167. data/vendor/libressl/include/CMakeLists.txt +0 -61
  168. data/vendor/libressl/include/Makefile.am +0 -79
  169. data/vendor/libressl/include/arch/loongarch64/opensslconf.h +0 -150
  170. data/vendor/libressl/include/arch/mips/opensslconf.h +0 -150
  171. data/vendor/libressl/include/compat/arpa/inet.h +0 -15
  172. data/vendor/libressl/include/compat/arpa/nameser.h +0 -25
  173. data/vendor/libressl/include/compat/cet.h +0 -19
  174. data/vendor/libressl/include/compat/dirent.h +0 -17
  175. data/vendor/libressl/include/compat/dirent_msvc.h +0 -611
  176. data/vendor/libressl/include/compat/endian.h +0 -161
  177. data/vendor/libressl/include/compat/err.h +0 -95
  178. data/vendor/libressl/include/compat/fcntl.h +0 -32
  179. data/vendor/libressl/include/compat/getopt.h +0 -50
  180. data/vendor/libressl/include/compat/limits.h +0 -25
  181. data/vendor/libressl/include/compat/netdb.h +0 -10
  182. data/vendor/libressl/include/compat/netinet/in.h +0 -19
  183. data/vendor/libressl/include/compat/netinet/ip.h +0 -49
  184. data/vendor/libressl/include/compat/netinet/tcp.h +0 -10
  185. data/vendor/libressl/include/compat/poll.h +0 -63
  186. data/vendor/libressl/include/compat/pthread.h +0 -122
  187. data/vendor/libressl/include/compat/readpassphrase.h +0 -44
  188. data/vendor/libressl/include/compat/resolv.h +0 -24
  189. data/vendor/libressl/include/compat/stdint.h +0 -31
  190. data/vendor/libressl/include/compat/stdio.h +0 -65
  191. data/vendor/libressl/include/compat/stdlib.h +0 -57
  192. data/vendor/libressl/include/compat/string.h +0 -98
  193. data/vendor/libressl/include/compat/sys/_null.h +0 -18
  194. data/vendor/libressl/include/compat/sys/ioctl.h +0 -11
  195. data/vendor/libressl/include/compat/sys/mman.h +0 -19
  196. data/vendor/libressl/include/compat/sys/param.h +0 -15
  197. data/vendor/libressl/include/compat/sys/queue.h +0 -536
  198. data/vendor/libressl/include/compat/sys/select.h +0 -10
  199. data/vendor/libressl/include/compat/sys/socket.h +0 -18
  200. data/vendor/libressl/include/compat/sys/stat.h +0 -129
  201. data/vendor/libressl/include/compat/sys/time.h +0 -37
  202. data/vendor/libressl/include/compat/sys/tree.h +0 -1006
  203. data/vendor/libressl/include/compat/sys/types.h +0 -69
  204. data/vendor/libressl/include/compat/sys/uio.h +0 -17
  205. data/vendor/libressl/include/compat/syslog.h +0 -38
  206. data/vendor/libressl/include/compat/time.h +0 -59
  207. data/vendor/libressl/include/compat/unistd.h +0 -83
  208. data/vendor/libressl/include/compat/win32netcompat.h +0 -57
  209. data/vendor/libressl/include/openssl/Makefile.am.tpl +0 -45
  210. data/vendor/libressl/libcrypto.pc.in +0 -28
  211. data/vendor/libressl/libressl.pub +0 -2
  212. data/vendor/libressl/libssl.pc.in +0 -28
  213. data/vendor/libressl/libtls.pc.in +0 -28
  214. data/vendor/libressl/m4/ax_add_fortify_source.m4 +0 -80
  215. data/vendor/libressl/m4/ax_check_compile_flag.m4 +0 -53
  216. data/vendor/libressl/m4/check-hardening-options.m4 +0 -110
  217. data/vendor/libressl/m4/check-libc.m4 +0 -189
  218. data/vendor/libressl/m4/check-os-options.m4 +0 -181
  219. data/vendor/libressl/m4/disable-compiler-warnings.m4 +0 -44
  220. data/vendor/libressl/man/CMakeLists.txt +0 -26
  221. data/vendor/libressl/man/links +0 -2780
  222. data/vendor/libressl/man/update_links.sh +0 -25
  223. data/vendor/libressl/openssl.pc.in +0 -11
  224. data/vendor/libressl/patches/bn_shift.patch +0 -34
  225. data/vendor/libressl/patches/crypto_arch.h.patch +0 -34
  226. data/vendor/libressl/patches/crypto_namespace.h.patch +0 -22
  227. data/vendor/libressl/patches/netcat.c.patch +0 -178
  228. data/vendor/libressl/patches/openssl.c.patch +0 -12
  229. data/vendor/libressl/patches/opensslfeatures.h.patch +0 -49
  230. data/vendor/libressl/patches/patch-amd64-crypto-cpu-caps.c.patch +0 -20
  231. data/vendor/libressl/patches/patch-i386-crypto-cpu-caps.c.patch +0 -20
  232. data/vendor/libressl/patches/speed.c.patch +0 -114
  233. data/vendor/libressl/patches/ssl_namespace.h.patch +0 -21
  234. data/vendor/libressl/patches/tls.h.patch +0 -16
  235. data/vendor/libressl/patches/tls_config.c.patch +0 -15
  236. data/vendor/libressl/patches/win32_amd64_bn_arch.h.patch +0 -28
  237. data/vendor/libressl/patches/windows_headers.patch +0 -80
  238. data/vendor/libressl/scripts/config.guess +0 -1774
  239. data/vendor/libressl/scripts/config.sub +0 -1907
  240. data/vendor/libressl/scripts/i686-w64-mingw32.cmake +0 -9
  241. data/vendor/libressl/scripts/test +0 -210
  242. data/vendor/libressl/scripts/wrap-compiler-for-flag-check +0 -31
  243. data/vendor/libressl/scripts/x86_64-w64-mingw32.cmake +0 -9
  244. data/vendor/libressl/ssl/CMakeLists.txt +0 -183
  245. data/vendor/libressl/ssl/Makefile.am +0 -187
  246. data/vendor/libressl/tests/CMakeLists.txt +0 -970
  247. data/vendor/libressl/tests/Makefile.am +0 -944
  248. data/vendor/libressl/tests/aeadtest.sh +0 -30
  249. data/vendor/libressl/tests/arc4randomforktest.sh +0 -21
  250. data/vendor/libressl/tests/asn1time_small.test +0 -10
  251. data/vendor/libressl/tests/cmake/CMakeLists.txt +0 -52
  252. data/vendor/libressl/tests/cmake/crypto.c +0 -7
  253. data/vendor/libressl/tests/cmake/ssl.c +0 -6
  254. data/vendor/libressl/tests/cmake/tls.c +0 -6
  255. data/vendor/libressl/tests/compat/pipe2.c +0 -186
  256. data/vendor/libressl/tests/dtlstest.sh +0 -28
  257. data/vendor/libressl/tests/evptest.sh +0 -22
  258. data/vendor/libressl/tests/keypairtest.sh +0 -27
  259. data/vendor/libressl/tests/mlkem_tests.sh +0 -39
  260. data/vendor/libressl/tests/ocsptest.bat +0 -25
  261. data/vendor/libressl/tests/ocsptest.sh +0 -23
  262. data/vendor/libressl/tests/openssl.cnf +0 -29
  263. data/vendor/libressl/tests/optionstest.c +0 -381
  264. data/vendor/libressl/tests/pidwraptest.c +0 -85
  265. data/vendor/libressl/tests/pidwraptest.sh +0 -26
  266. data/vendor/libressl/tests/quictest.bat +0 -27
  267. data/vendor/libressl/tests/quictest.sh +0 -30
  268. data/vendor/libressl/tests/renegotiation_test.bat +0 -27
  269. data/vendor/libressl/tests/renegotiation_test.sh +0 -30
  270. data/vendor/libressl/tests/rfc5280time_small.test +0 -10
  271. data/vendor/libressl/tests/servertest.bat +0 -27
  272. data/vendor/libressl/tests/servertest.sh +0 -30
  273. data/vendor/libressl/tests/shutdowntest.bat +0 -27
  274. data/vendor/libressl/tests/shutdowntest.sh +0 -30
  275. data/vendor/libressl/tests/ssltest.bat +0 -32
  276. data/vendor/libressl/tests/ssltest.sh +0 -48
  277. data/vendor/libressl/tests/testdsa.bat +0 -47
  278. data/vendor/libressl/tests/testdsa.sh +0 -57
  279. data/vendor/libressl/tests/testenc.bat +0 -85
  280. data/vendor/libressl/tests/testenc.sh +0 -93
  281. data/vendor/libressl/tests/testrsa.bat +0 -47
  282. data/vendor/libressl/tests/testrsa.sh +0 -57
  283. data/vendor/libressl/tests/testssl.bat +0 -171
  284. data/vendor/libressl/tests/tlstest.bat +0 -27
  285. data/vendor/libressl/tests/tlstest.sh +0 -28
  286. data/vendor/libressl/tls/CMakeLists.txt +0 -125
  287. data/vendor/libressl/tls/Makefile.am +0 -76
  288. data/vendor/libressl/tls/compat/ftruncate.c +0 -17
  289. data/vendor/libressl/tls/compat/pread.c +0 -29
  290. data/vendor/libressl/tls/compat/pwrite.c +0 -29
  291. data/vendor/libressl/update.sh +0 -460
@@ -455,16 +455,180 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
455
455
  [Extralite](https://github.com/digital-fabric/extralite/)): normally, using an
456
456
  actor interface, or protected by a mutex. I'll try to follow up with a
457
457
  benchmark measuring concurrent access to SQLite DBs, similar to the PG one.
458
-
458
+
459
459
  Another interesting benchmark I found was one for resolving DNS addresses
460
460
  using Ruby's builtin `Addrinfo` API, the bundled `resolv` gem, and a basic DNS
461
461
  resolver included in UringMachine (I totally forgot I made one). Here too, I'd
462
462
  like to add a benchmark to measure how these different solutions do in a
463
463
  highly concurrent scenario.
464
-
464
+
465
465
  - Thanks to one of these old benchmarks I made a change that more than doubled
466
466
  the performance of `UM#snooze`. What this method does is it adds the current
467
467
  fiber to the end of the runqueue, and yields control to the next fiber in the
468
468
  runqueue, or to process available CQE's. This method is useful for testing,
469
469
  but also for yielding control periodically when performing CPU-bound work, in
470
470
  order to keep the application responsive and improve latency.
471
+
472
+ # 2025-12-14
473
+
474
+ - Changed how `struct um_op`s are allocated. This struct is used to represent
475
+ any io_uring operation. It is also used to represent runqueue entries. Now,
476
+ for most I/O operations, this struct is stack-allocated. But when a new fiber
477
+ is scheduled, or when using the `#timeout` or any of the `#xxx_async` methods,
478
+ like `#close_async` or `#write_async`, we need to use a heap-allocated
479
+ `um_op`, because we don't control its lifetime. In order to minimize
480
+ allocations, once a `um_op` is done with (it's been pulled out of the
481
+ runqueue, or its corresponding CQE has been processed), it is put on a
482
+ freelist in order to be reused when needed. Previously, when the freelist was
483
+ empty, UringMachine would just allocate a new one using `malloc`. Now
484
+ UringMachine allocates a array of 256 structs at once and puts all of them on
485
+ the freelist.
486
+ - Implemented the vectorized versions of `#write` and `#send`, so now one can
487
+ use `#writev` and `#sendv` to send multiple buffers at once. This could be
488
+ very useful for situations like sending an HTTP response, which is made of a
489
+ headers part and a body part. Also, `#writev` and `#sendv` are guaranteed to
490
+ write/send the entirety of the given buffers, unlike `#write` and `#send`
491
+ which can do partial write/send (for `#send` you can specify the
492
+ `UM::MSG_WAITALL` flag) to guarantee a complete send.
493
+ - With the new built-in `Set` class and its new [C
494
+ API](https://github.com/ruby/ruby/pull/13735), I've switched the internal
495
+ `pending_fibers` holding fibers waiting for an operation to complete, from a
496
+ hash to a set.
497
+
498
+ # 2025-12-15
499
+
500
+ - Working more with benchmarks, it has occurred to me that with the current
501
+ design of UringMachine, whenever we check for I/O completions (which is also
502
+ the moment when we make I/O submissions to the kernel), we leave some
503
+ performance on the table. This is because when we call `io_uring_submit` or
504
+ `io_uring_wait_cqes`, we make a blocking system call (namely,
505
+ `io_uring_enter`), and correspondigly we release the GVL.
506
+
507
+ What this means is that while we're waiting for the system call to return, the
508
+ GVL is available for another Ruby thread to do CPU-bound work. Normally when
509
+ there's a discussion about concurrency in Ruby, there's this dichotomy: it's
510
+ either threads or fibers. But as described above, even when using fibers and
511
+ io_uring for concurrent I/O, we still need to enter the kernel periodically in
512
+ order to submit operations and process completions. So this is an opportunity
513
+ to yield the GVL to a different thread, which can run some Ruby code while the
514
+ first thread is waiting for the system call to return.
515
+
516
+ With that in mind, I modified the benchmark code to see what would happen if
517
+ we run two UringMachine instances on two separate threads. The results are
518
+ quite interesting: splitting the work load between two UringMachine instances
519
+ running on separate threads, we get a marked improvement in performance.
520
+ Depending on the benchmark, we get even better performance if we increase the
521
+ thread count to 4.
522
+
523
+ But, as we increase the thread count, we eventually hit diminishing returns
524
+ and risk actually having worse performance than with just a single thread. So,
525
+ at least for the workloads I tested (including a very primitive HTTP/1.1
526
+ server), the sweet spot is between 2 and 4 threads.
527
+
528
+ One thing I have noticed though, is that while the pure UM version (i.e. using
529
+ the UM low-level API) gets a boost from running on multiple threads, the UM
530
+ fiber scheduler actually can perform worse. This is also the case for the
531
+ Async fiber scheduler, so this might have to do with the fact that the Ruby IO
532
+ class does a lot of work behind the scenes, including locking write mutexes
533
+ and other stuff that's done when the IO is closed. This is still to be
534
+ investigated...
535
+
536
+ # 2025-12-16
537
+
538
+ - Added `UM#accept_into_queue`, which accepts incoming socket connections in a
539
+ loop and pushes them to the given queue.
540
+
541
+ - Improved error handling in the fiber scheduler, and added more tests. There
542
+ are now about 4.2KLoC of test code, with 255 test cases and 780 assertions. And
543
+ that's without all the tests that depend on the
544
+ [`rb_process_new`](https://github.com/ruby/ruby/pull/15213) API, the PR for
545
+ which is currently still not merged.
546
+
547
+ - Added a test mode to UringMachine that affects runqueue processing, without
548
+ impacting performance under normal conditions.
549
+
550
+ # 2025-12-17
551
+
552
+ - I noticed that the fiber scheduler `#io_write` was not being called on
553
+ `IO#flush` or when closing an IO with buffered writes. So any time the IO
554
+ write buffer needs to be flushed, instead of calling the `#io_write` hook, the
555
+ Ruby I/O layer would just run this on a worker thread by calling the
556
+ `#blocking_operation_wait` hook. I've made a
557
+ [PR](https://github.com/ruby/ruby/pull/15609) to fix this.
558
+
559
+ # 2025-12-18
560
+
561
+ - Added a [PR](https://github.com/ruby/ruby/pull/15629) to update Ruby NEWS with
562
+ changes to the FiberScheduler interface.
563
+
564
+ - I did some more verification work on the fiber scheduler implementation. I
565
+ added more tests and improved error handling in read/write hooks.
566
+
567
+ - Made some small changes to fiber scheduling. I added a test mode which peeks
568
+ at CQEs on each snooze, in order to facilitate testing.
569
+
570
+ # 2025-12-20
571
+
572
+ - Did some more work on benchmarks, and added provisory GVL time measurement.
573
+
574
+ - Implemented sidecar mode - the basic idea is that UringMachine starts an
575
+ auxiliary thread that loops entering the kernel with a call to
576
+ `io_uring_enter` in order to make CQEs available. On return from the system
577
+ call, it signals through a futex that ready CQEs can be processed.
578
+
579
+ On fiber switch, the next fiber to run is shifted from the runqueue. If the
580
+ runqueue is empty, the UringMachine will wait for the signal, and then process
581
+ all CQEs. The idea is that in a single threaded environment, under high enough
582
+ I/O load, we don't need to release the GVL in order to process ready CQEs,
583
+ and thus we can better saturate the CPU.
584
+
585
+ # 2025-12-26
586
+
587
+ - Finished up the sidecar mode implementation. I did some preliminary benchmarks
588
+ and this mode does provide a small performance benefit, depending on the
589
+ context. But for the moment, I consider this mode experimental.
590
+
591
+ # 2026-01-07
592
+
593
+ - In the last week I've been working on implementing a buffer pool
594
+ with automatic buffer manangement. I've been contemplating the design for a
595
+ few weeks already, and after the vacation has decided the idea is solid enough
596
+ for me to start writing some code. But let me back up and explain what I'm
597
+ trying to achieve.
598
+
599
+ The io_uring interface includes a facility for setting up buffer rings. The
600
+ idea is that the application provides buffers to the kernel, which uses those
601
+ buffers for reading or receiving repeatedly from an fd, letting the
602
+ application know with each CQE which buffer was used and with how much data.
603
+ This is particularly useful when dealing with bursts of incoming data.
604
+
605
+ The application initiates multishot read/recv operations on each connection,
606
+ and the kernel has at its disposition a pool of application-provided buffers
607
+ it can use whenever a chunk of data is read / received. So the kernel consumes
608
+ those buffers as needed, and fills them with data when it becomes available.
609
+ Those data will be processed by the application at some later time when it's
610
+ ready to process CQEs. The application will then add the consumed buffers back
611
+ to the buffer ring, making them available to the kernel again.
612
+
613
+ Multiple buffer rings may be registered by the application, each with a set
614
+ maxmimum number of buffers and with a buffer group id (`bgid`). The buffers
615
+ added to a buffer ring may be of any size. Each buffer in a buffer ring also
616
+ has an id (`bid`). So buffers are identified by the tuple `[bgid, bid]`. When
617
+ submitting a multishot read/recv operation, we indicate the buffer group id
618
+ (`bgid`), letting the kernel know which buffer ring to use. The kernel then
619
+ generates CQEs (completion queue entries) which contain the id of the buffer
620
+ that contains the data (`bid`). Crucially, a single buffer ring may be used in
621
+ multiple concurrent multishot read/recv operations on different fd's.
622
+
623
+ In addition,on recent kernels io_uring is capable of partially consuming
624
+ buffers, which prevents wasting buffer space. When a buffer ring is set up for
625
+ [partial buffer
626
+ consumption](https://www.man7.org/linux/man-pages/man3/io_uring_setup_buf_ring.3.html),
627
+ each CQE relating to a multishot read/recv operation will also have a flag
628
+ telling the application [whether the buffer will be further
629
+ used](https://www.man7.org/linux/man-pages/man3/io_uring_prep_recv.3.html)
630
+ beyond the amount of data readily available. Each completion of a given buffer
631
+ ID will continue where the previous one left off. So it's great that buffer
632
+ space can be used fully by the kernel, but the application is required to keep
633
+ track of a "cursor" for each buffer.
634
+
data/grant-2025/tasks.md CHANGED
@@ -12,29 +12,39 @@
12
12
  https://unixism.net/loti/tutorial/sq_poll.html
13
13
  - [v] Add `UM.socketpair`
14
14
 
15
- - [ ] Add more metrics
15
+ - [v] Add more metrics
16
16
  - [v] runqueue depth
17
17
  - [v] number of pending fibers
18
18
  - [v] ops: transient count, free count
19
19
  - [v] total fiber switches, total waiting for CQEs
20
- - [ ] watermark: ops_pending, ops_unsubmitted, ops_runqueue, ops_free, ops_transient
21
- (only in profile mode)
22
- - [ ] Performance tuning parameters
23
- - [ ] max fiber switches before processing CQEs
24
- - [ ] max fiber switches before submitting unsubmitted SQEs
25
- - [ ] measure switches since last submitting / last CQE processing
26
-
27
- - [ ] Better buffer management buffer rings
20
+
21
+ - [v] Make writev automatically complete partial writes
22
+
23
+ - [v] Add inotify API
24
+
25
+ https://www.man7.org/linux/man-pages/man7/inotify.7.html
26
+
27
+ - [ ] Better buffer management
28
28
  - [v] Add `UM#sendv` method (see below)
29
29
  - [v] Benchmark `#sendv` vs `#send_bundle` (in concurrent situation)
30
+ - [v] Support for `IO::Buffer`?
30
31
  - [ ] Benchmark `#read_each` vs `#read` (in concurrent situation)
31
- - [ ] Support for `IO::Buffer`? How's the API gonna look like?
32
- - [ ] Some higher-level abstraction for managing a *pool* of buffer rings
33
-
34
- - [ ] Add some way to measure fiber CPU time.
35
- https://github.com/socketry/async/issues/428
36
-
37
- - [ ] UringMachine Fiber::Scheduler implementation
32
+ - [ ] Implement automatic buffer pool:
33
+ - [ ] Automatic buffer allocation,registration and management.
34
+ - [ ] Support for partial buffer consumption.
35
+ - [ ] Data processing through a rewritten stream implementation.
36
+
37
+ - [v] Sidecar mode
38
+ - [v] Convert `UM#initialize` to take kwargs
39
+ - [v] `:size` - SQ entries
40
+ - [v] `:sqpoll` - sqpoll mode
41
+ - [v] `:sidecar` - sidecar mode
42
+ - [v] Sidecar implementation
43
+ - [v] sidecar thread
44
+ - [v] futex handling
45
+ - [v] submission logic
46
+
47
+ - [v] UringMachine Fiber::Scheduler implementation
38
48
  - [v] Check how scheduler interacts with `fork`.
39
49
  - [v] Implement `process_wait` (with `rb_process_status_new`)
40
50
  - [v] Implement `fiber_interrupt` hook
@@ -97,7 +107,7 @@
97
107
  - [v] pipes: multiple pairs of fibers - reader / writer
98
108
  - [v] sockets: echo server + many clients
99
109
 
100
- - [ ] Benchmarks
110
+ - [v] Benchmarks
101
111
  - [v] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
102
112
 
103
113
  N groups where each group has M producers and O consumers accessing the same queue.
@@ -124,10 +134,12 @@
124
134
  - [v] hook for close
125
135
  - [ ] hooks for send/recv/sendmsg/recvmsg
126
136
 
127
- - [ ] SSL
128
- - [ ] openssl gem: custom BIO?
137
+ - [v] SSL
138
+ - [v] setup custom BIO
139
+ - [v] SSL read/write methods
129
140
 
130
- - curl: https://github.com/curl/curl/blob/5f4cd4c689c822ce957bb415076f0c78e5f474b5/lib/vtls/openssl.c#L786-L803
141
+ - [v] RDoc
142
+ - [v] Rewrite README to show some examples
131
143
 
132
144
  - [ ] UringMachine website
133
145
  - [ ] domain: uringmachine.dev
@@ -3,7 +3,13 @@
3
3
  require 'resolv'
4
4
 
5
5
  class UringMachine
6
+ # A basic DNS resolver implementation.
6
7
  class DNSResolver
8
+
9
+ # Initializes the DNS resolver.
10
+ #
11
+ # @param machine [UringMachine] UringMachine instance
12
+ # @return [void]
7
13
  def initialize(machine)
8
14
  @machine = machine
9
15
  @requests = UM::Queue.new
@@ -12,11 +18,19 @@ class UringMachine
12
18
  @last_id = 0
13
19
  end
14
20
 
21
+ # Resolves the given hostname.
22
+ #
23
+ # @param hostname [String] hostname
24
+ # @param type [Symbol] DNS record type
25
+ # @return [String] IP address
15
26
  def resolve(hostname, type)
16
27
  @machine.push(@requests, [hostname, type, Fiber.current])
17
28
  @machine.yield
18
29
  end
19
30
 
31
+ private
32
+
33
+ # Handles resolve requests as they come.
20
34
  def handle_requests_loop
21
35
  while true
22
36
  hostname, type, fiber = @machine.shift(@requests)
@@ -25,6 +39,9 @@ class UringMachine
25
39
  end
26
40
  end
27
41
 
42
+ # Returns an array of nameservers.
43
+ #
44
+ # @return [Array<String>] name servers
28
45
  def get_nameservers
29
46
  nameservers = []
30
47
  IO.readlines('/etc/resolv.conf').each do |line|
@@ -35,10 +52,16 @@ class UringMachine
35
52
  nameservers
36
53
  end
37
54
 
55
+ # Returns a DNS socket fd connected to a name server.
56
+ #
57
+ # @return [Integer] fd
38
58
  def socket_fd
39
59
  @socket_fd ||= prepare_socket
40
60
  end
41
61
 
62
+ # Prepares a socket fd connected to a name server.
63
+ #
64
+ # @return [Integer] fd
42
65
  def prepare_socket
43
66
  fd = @machine.socket(UM::AF_INET, UM::SOCK_DGRAM, 0, 0)
44
67
  @machine.bind(fd, '0.0.0.0', 0)
@@ -46,6 +69,12 @@ class UringMachine
46
69
  fd
47
70
  end
48
71
 
72
+ # Resolves a DNS query.
73
+ #
74
+ # @param hostname [String] hostname
75
+ # @param type [Symbol] DNS record type
76
+ # @param try_count [Integer] retry counter
77
+ # @return [Array<String>] array of addresses
49
78
  def do_resolve(hostname, type, try_count = 0)
50
79
  fd = socket_fd
51
80
  req = prepare_request_packet(hostname, type)
@@ -67,6 +96,11 @@ class UringMachine
67
96
  addrs
68
97
  end
69
98
 
99
+ # Prepares a request packet.
100
+ #
101
+ # @param hostname [String] hostname
102
+ # @param type [Symbol] DNS record type
103
+ # @return [Resolv::DNS::Message]
70
104
  def prepare_request_packet(hostname, type)
71
105
  msg = Resolv::DNS::Message.new
72
106
  msg.id = (@last_id += 1)
@@ -75,6 +109,10 @@ class UringMachine
75
109
  msg
76
110
  end
77
111
 
112
+ # Returns the message type class.
113
+ #
114
+ # @param type [Symbol] DNS record type
115
+ # @return [Class] message type class
78
116
  def msg_type(type)
79
117
  # TODO: add support for other types
80
118
  Resolv::DNS::Resource::IN::A
@@ -5,6 +5,7 @@ require 'etc'
5
5
  require 'uringmachine'
6
6
 
7
7
  class UringMachine
8
+
8
9
  # Implements a worker thread pool for running blocking operations. Worker
9
10
  # threads are started as needed. Worker thread count is limited to the number
10
11
  # of CPU cores available.
@@ -12,11 +13,13 @@ class UringMachine
12
13
 
13
14
  # Initializes a new worker pool.
14
15
  #
16
+ # @param max_workers [Integer] maximum worker thread count
15
17
  # @return [void]
16
- def initialize
18
+ def initialize(max_workers = Etc.nprocessors)
19
+ @max_workers = max_workers
17
20
  @pending_count = 0
18
21
  @worker_count = 0
19
- @max_workers = Etc.nprocessors
22
+
20
23
  @worker_mutex = UM::Mutex.new
21
24
  @job_queue = UM::Queue.new
22
25
  @workers = []
@@ -52,7 +55,7 @@ class UringMachine
52
55
 
53
56
  # @return [void]
54
57
  def run_worker_thread
55
- machine = UM.new(4)
58
+ machine = UM.new(size: 4)
56
59
  loop do
57
60
  q, op = machine.shift(@job_queue)
58
61
  @pending_count += 1
@@ -70,13 +73,13 @@ class UringMachine
70
73
  end
71
74
  end
72
75
 
73
- # UringMachine::FiberScheduler implements the Fiber::Scheduler interface for
74
- # creating fiber-based concurrent applications in Ruby, in tight integration
75
- # with the standard Ruby I/O and locking APIs.
76
+ # Implements the `Fiber::Scheduler` interface for creating fiber-based
77
+ # concurrent applications in Ruby, in tight integration with the standard Ruby
78
+ # I/O and locking APIs.
76
79
  class FiberScheduler
77
80
 
78
81
  # The blocking operation thread pool is shared by all fiber schedulers.
79
- @@blocking_operation_thread_pool = BlockingOperationThreadPool.new
82
+ DEFAULT_THREAD_POOL = BlockingOperationThreadPool.new
80
83
 
81
84
  # UringMachine instance associated with scheduler.
82
85
  attr_reader :machine
@@ -92,8 +95,9 @@ class UringMachine
92
95
  #
93
96
  # @param machine [UringMachine, nil] UringMachine instance
94
97
  # @return [void]
95
- def initialize(machine = nil)
98
+ def initialize(machine = nil, thread_pool = DEFAULT_THREAD_POOL)
96
99
  @machine = machine || UM.new
100
+ @thread_pool = thread_pool
97
101
  @fiber_map = ObjectSpace::WeakMap.new
98
102
  @thread = Thread.current
99
103
  end
@@ -107,7 +111,8 @@ class UringMachine
107
111
  # the fiber map, scheduled on the scheduler machine, and started before this
108
112
  # method returns (by calling snooze).
109
113
  #
110
- # @param block [Proc] fiber block @return [Fiber]
114
+ # @param block [Proc] fiber block
115
+ # @return [Fiber]
111
116
  def fiber(&block)
112
117
  fiber = Fiber.new(blocking: false) { @machine.run(fiber, &block) }
113
118
 
@@ -145,7 +150,7 @@ class UringMachine
145
150
  # @param op [callable] blocking operation
146
151
  # @return [void]
147
152
  def blocking_operation_wait(op)
148
- @@blocking_operation_thread_pool.process(@machine, op)
153
+ @thread_pool.process(@machine, op)
149
154
  end
150
155
 
151
156
  # Blocks the current fiber by yielding to the machine. This hook is called
@@ -188,7 +193,6 @@ class UringMachine
188
193
  # Yields to the next runnable fiber.
189
194
  def yield
190
195
  @machine.snooze
191
- # @machine.yield
192
196
  end
193
197
 
194
198
  # Waits for the given io to become ready.
@@ -196,9 +200,8 @@ class UringMachine
196
200
  # @param io [IO] IO object
197
201
  # @param events [Number] readiness bitmask
198
202
  # @param timeout [Number, nil] optional timeout
199
- # @param return
203
+ # @return [void]
200
204
  def io_wait(io, events, timeout = nil)
201
- # p(io_wait: io, events:)
202
205
  timeout ||= io.timeout
203
206
  if timeout
204
207
  @machine.timeout(timeout, Timeout::Error) {
@@ -243,7 +246,7 @@ class UringMachine
243
246
  length = buffer.size if length == 0
244
247
 
245
248
  if (timeout = io.timeout)
246
- @machine.timeout(timeout, Timeout::Error) do
249
+ @machine.timeout(timeout, Timeout::Error) do
247
250
  @machine.read(io.fileno, buffer, length, offset)
248
251
  rescue Errno::EINTR
249
252
  retry
@@ -253,6 +256,8 @@ class UringMachine
253
256
  end
254
257
  rescue Errno::EINTR
255
258
  retry
259
+ rescue Errno => e
260
+ -e.errno
256
261
  end
257
262
 
258
263
  # Reads from the given IO at the given file offset
@@ -267,7 +272,7 @@ class UringMachine
267
272
  length = buffer.size if length == 0
268
273
 
269
274
  if (timeout = io.timeout)
270
- @machine.timeout(timeout, Timeout::Error) do
275
+ @machine.timeout(timeout, Timeout::Error) do
271
276
  @machine.read(io.fileno, buffer, length, offset, from)
272
277
  rescue Errno::EINTR
273
278
  retry
@@ -277,6 +282,8 @@ class UringMachine
277
282
  end
278
283
  rescue Errno::EINTR
279
284
  retry
285
+ rescue Errno => e
286
+ -e.errno
280
287
  end
281
288
 
282
289
  # Writes to the given IO.
@@ -287,12 +294,11 @@ class UringMachine
287
294
  # @param offset [Integer] write offset
288
295
  # @return [Integer] bytes written
289
296
  def io_write(io, buffer, length, offset)
290
- # p(io_write: io, length:, offset:, timeout: io.timeout)
291
297
  length = buffer.size if length == 0
292
298
  buffer = buffer.slice(offset) if offset > 0
293
299
 
294
300
  if (timeout = io.timeout)
295
- @machine.timeout(timeout, Timeout::Error) do
301
+ @machine.timeout(timeout, Timeout::Error) do
296
302
  @machine.write(io.fileno, buffer, length)
297
303
  rescue Errno::EINTR
298
304
  retry
@@ -302,23 +308,24 @@ class UringMachine
302
308
  end
303
309
  rescue Errno::EINTR
304
310
  retry
311
+ rescue Errno => e
312
+ -e.errno
305
313
  end
306
314
 
307
315
  # Writes to the given IO at the given file offset.
308
316
  #
309
317
  # @param io [IO] IO object
310
318
  # @param buffer [IO::Buffer] write buffer
311
- # @param length [Integer] file offset
319
+ # @param from [Integer] file offset
312
320
  # @param length [Integer] write length
313
321
  # @param offset [Integer] buffer offset
314
322
  # @return [Integer] bytes written
315
323
  def io_pwrite(io, buffer, from, length, offset)
316
- # p(io_pwrite: io, from:, length:, offset:, timeout: io.timeout)
317
324
  length = buffer.size if length == 0
318
325
  buffer = buffer.slice(offset) if offset > 0
319
326
 
320
327
  if (timeout = io.timeout)
321
- @machine.timeout(timeout, Timeout::Error) do
328
+ @machine.timeout(timeout, Timeout::Error) do
322
329
  @machine.write(io.fileno, buffer, length, from)
323
330
  rescue Errno::EINTR
324
331
  retry
@@ -328,6 +335,8 @@ class UringMachine
328
335
  end
329
336
  rescue Errno::EINTR
330
337
  retry
338
+ rescue Errno => e
339
+ -e.errno
331
340
  end
332
341
 
333
342
  # Closes the given fd.
@@ -335,8 +344,9 @@ class UringMachine
335
344
  # @param fd [Integer] file descriptor
336
345
  # @return [Integer] file descriptor
337
346
  def io_close(fd)
338
- # p(io_close: fd)
339
347
  @machine.close_async(fd)
348
+ rescue Errno => e
349
+ -e.errno
340
350
  end
341
351
 
342
352
  if UM.method_defined?(:waitid_status)
@@ -366,17 +376,17 @@ class UringMachine
366
376
  #
367
377
  # @param hostname [String] hostname to resolve
368
378
  # @return [Array<Addrinfo>] array of resolved addresses
369
- def address_resolve(hostname)
370
- Resolv.getaddresses(hostname)
371
- end
372
-
373
- # Run the given block with a timeout.
374
- #
375
- # @param duration [Number] timeout duration
376
- # @param exception [Class] exception Class
377
- # @param message [String] exception message
378
- # @param block [Proc] block to run
379
- # @return [any] block return value
379
+ def address_resolve(hostname)
380
+ Resolv.getaddresses(hostname)
381
+ end
382
+
383
+ # Run the given block with a timeout.
384
+ #
385
+ # @param duration [Number] timeout duration
386
+ # @param exception [Class] exception Class
387
+ # @param message [String] exception message
388
+ # @param block [Proc] block to run
389
+ # @return [any] block return value
380
390
  def timeout_after(duration, exception, message, &block)
381
391
  @machine.timeout(duration, exception, &block)
382
392
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class UringMachine
4
- VERSION = '0.23.1'
4
+ VERSION = '0.25.0'
5
5
  end