uringmachine 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +3 -4
  3. data/.rubocop.yml +2 -0
  4. data/CHANGELOG.md +34 -0
  5. data/TODO.md +132 -26
  6. data/benchmark/README.md +173 -0
  7. data/benchmark/bm_io_pipe.rb +70 -0
  8. data/benchmark/bm_io_socketpair.rb +71 -0
  9. data/benchmark/bm_mutex_cpu.rb +57 -0
  10. data/benchmark/bm_mutex_io.rb +64 -0
  11. data/benchmark/bm_pg_client.rb +109 -0
  12. data/benchmark/bm_queue.rb +76 -0
  13. data/benchmark/chart.png +0 -0
  14. data/benchmark/common.rb +135 -0
  15. data/benchmark/dns_client.rb +47 -0
  16. data/{examples/bm_http_parse.rb → benchmark/http_parse.rb} +1 -1
  17. data/benchmark/run_bm.rb +8 -0
  18. data/benchmark/sqlite.rb +108 -0
  19. data/{examples/bm_write.rb → benchmark/write.rb} +6 -3
  20. data/ext/um/extconf.rb +1 -1
  21. data/ext/um/um.c +404 -95
  22. data/ext/um/um.h +77 -24
  23. data/ext/um/um_async_op.c +2 -2
  24. data/ext/um/um_class.c +168 -18
  25. data/ext/um/um_op.c +43 -0
  26. data/ext/um/um_sync.c +10 -16
  27. data/ext/um/um_utils.c +16 -0
  28. data/grant-2025/journal.md +242 -1
  29. data/grant-2025/tasks.md +136 -41
  30. data/lib/uringmachine/actor.rb +8 -0
  31. data/lib/uringmachine/dns_resolver.rb +1 -2
  32. data/lib/uringmachine/fiber_scheduler.rb +283 -110
  33. data/lib/uringmachine/version.rb +1 -1
  34. data/lib/uringmachine.rb +32 -3
  35. data/test/helper.rb +7 -18
  36. data/test/test_actor.rb +12 -3
  37. data/test/test_async_op.rb +10 -10
  38. data/test/test_fiber.rb +84 -1
  39. data/test/test_fiber_scheduler.rb +1425 -20
  40. data/test/test_um.rb +565 -113
  41. data/uringmachine.gemspec +6 -5
  42. data/vendor/liburing/src/include/liburing/io_uring.h +1 -0
  43. data/vendor/liburing/src/include/liburing.h +13 -0
  44. data/vendor/liburing/src/liburing-ffi.map +1 -0
  45. data/vendor/liburing/test/bind-listen.c +175 -13
  46. data/vendor/liburing/test/read-write.c +4 -4
  47. data/vendor/liburing/test/ringbuf-read.c +4 -4
  48. data/vendor/liburing/test/send_recv.c +8 -7
  49. metadata +50 -28
  50. data/examples/bm_fileno.rb +0 -33
  51. data/examples/bm_queue.rb +0 -110
  52. data/examples/bm_side_running.rb +0 -83
  53. data/examples/bm_sqlite.rb +0 -89
  54. data/examples/dns_client.rb +0 -12
  55. /data/{examples/bm_mutex.rb → benchmark/mutex.rb} +0 -0
  56. /data/{examples/bm_mutex_single.rb → benchmark/mutex_single.rb} +0 -0
  57. /data/{examples/bm_send.rb → benchmark/send.rb} +0 -0
  58. /data/{examples/bm_snooze.rb → benchmark/snooze.rb} +0 -0
data/ext/um/um_utils.c CHANGED
@@ -2,6 +2,7 @@
2
2
  #include <sys/mman.h>
3
3
  #include <stdlib.h>
4
4
  #include <ruby/io/buffer.h>
5
+ #include <time.h>
5
6
 
6
7
  inline struct __kernel_timespec um_double_to_timespec(double value) {
7
8
  double integral;
@@ -16,6 +17,21 @@ inline double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec) {
16
17
  return (double)tv_sec + ((double)tv_nsec) / 1000000000;
17
18
  }
18
19
 
20
+ inline double um_get_time_cpu() {
21
+ struct timespec ts;
22
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts)) return -1.0;
23
+
24
+ return um_timestamp_to_double(ts.tv_sec, ts.tv_nsec);
25
+ }
26
+
27
+ inline double um_get_time_monotonic() {
28
+ struct timespec ts;
29
+ if (clock_gettime(CLOCK_MONOTONIC, &ts)) return -1.0;
30
+
31
+ return um_timestamp_to_double(ts.tv_sec, ts.tv_nsec);
32
+ }
33
+
34
+
19
35
  #define RAISE_EXCEPTION(e) rb_funcall(e, ID_invoke, 0);
20
36
 
21
37
  inline int um_value_is_exception_p(VALUE v) {
@@ -205,7 +205,8 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
205
205
  I'll see if I can prepare a PR for that to be merged for the Ruby 4.0 release.
206
206
 
207
207
  For the time being, I've added a `#post_fork` method to the UM fiber scheduler
208
- which should be manually called after a fork. (commit 2c7877385869c6acbdd8354e2b2909cff448651b)
208
+ which should be manually called after a fork. (commit
209
+ 2c7877385869c6acbdd8354e2b2909cff448651b)
209
210
 
210
211
  - Added two new low-level APIs for waiting on processes, instead of
211
212
  `UM#waitpid`, using the io_uring version of `waitid`. The vanilla version
@@ -227,3 +228,243 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
227
228
  ...
228
229
  pid2, status = machine.waitid(P_PIDFD, fd, UM::WEXITED)
229
230
  ```
231
+
232
+ # 2025-11-28
233
+
234
+ - On Samuel's suggestions, I've submitted a
235
+ [PR](https://github.com/ruby/ruby/pull/15342) for adding a
236
+ `Fiber::Scheduler#process_fork` hook that is automatically invoked after a
237
+ fork. This is in continuation to the `#post_fork` method. I still have a lot
238
+ to learn about working with the Ruby core code, but I'm really excited about
239
+ the possibility of this PR (and the [previous
240
+ one](https://github.com/ruby/ruby/pull/15213) as well) getting merged in time
241
+ for the Ruby 4.0 release.
242
+ - Added a bunch of tests for `UM::FiberScheduler`: socket I/O, file I/O, mutex,
243
+ queue, waiting for threads. In the process I discovered a lots of things that
244
+ can be improved in the way Ruby invokes the fiber scheduler.
245
+
246
+ - For regular files, Ruby assumes file I/O can never be non-blocking (or
247
+ async), and thus invokes the `#blocking_operation_wait` hook in order to
248
+ perform the I/O in a separate thread. With io_uring, of course, file I/O
249
+ *is* asynchronous.
250
+ - For sockets there are no specialized hooks, like `#socket_send` etc.
251
+ Instead, Ruby makes the socket fd's non-blocking and invokes `#io_wait` to
252
+ wait for the socket to be ready.
253
+
254
+ I find it interesting how io_uring breaks a lot of assumptions about how I/O
255
+ should be done.
256
+
257
+ # 2025-12-03
258
+
259
+ - Samuel and me continued discussing the behavior of the fiber scheduler after a
260
+ fork. After talking it through, we decided the best course of action would be
261
+ to remove the fiber scheduler after a fork, rather than to introduce a
262
+ `process_fork` hook. This is a safer choice, since a scheduler risks carrying
263
+ over some of its state across a fork, leading to unexpected behavior.
264
+
265
+ Another problem I uncovered is that if a fork is done from a non-blocking
266
+ fiber, the main fiber of the forked process (which "inherits" the forking
267
+ fiber) stays in non-blocking mode, which also may lead to unexpected behavior,
268
+ since the main fiber of all Ruby threads should be in blocking mode.
269
+
270
+ So I submitted a new [PR](https://github.com/ruby/ruby/pull/15385) that
271
+ corrects these two problems.
272
+
273
+ - I mapped the remaining missing hooks in the UringMachine fiber scheduler
274
+ implementation, and made the tests more robust by checking that the different
275
+ scheduler hooks were actually being called.
276
+
277
+ - Continued implementing the missing fiber scheduler hooks: `#fiber_interrupt`,
278
+ `#address_resolve`, `#timeout_after`. For the most part, they were simple to
279
+ implement. I probably spent most of my time figuring out how to test these,
280
+ rather than implementing them. Most of the hooks involve just a few lines of
281
+ code, with many of them consisting of a single line of code, calling into the
282
+ relevant UringMachine low-level API.
283
+
284
+ - Implemented the `#io_select` hook, which involved implementing a low-level
285
+ `UM#select` method. This method took some effort to implement, since it needs
286
+ to handle an arbitrary number of file descriptors to check for readiness. We
287
+ need to create a separate SQE for each fd we want to poll. When one or more
288
+ CQEs arrive for polled fd's, we also need to cancel all poll operations that
289
+ have not completed.
290
+
291
+ Since in many cases, `IO.select` is called with just a single IO, I also added
292
+ a special-case implementation of `UM#select` that specifically handles a
293
+ single fd.
294
+
295
+ # 2025-12-04
296
+
297
+ - Implemented a worker pool for performing blocking operations in the scheduler.
298
+ Up until now, each scheduler started their own worker thread for performing
299
+ blocking operations for use in the `#blocking_operation_wait` hook. The new
300
+ implementation uses a worker thread pool shared by all schedulers, with a
301
+ worker count limited to CPU count. Workers are started when needed.
302
+
303
+ I also added an optional `entries` argument to set the SQE and CQE buffer
304
+ sizes when starting a new `UringMachine` instance. The default size is 4096
305
+ SQE entries (liburing by default makes the CQE buffer size double that of the
306
+ SQE buffer). The blocking operations worker threads specify a value of 4 since
307
+ they only use their UringMachine instance for popping jobs off the job queue
308
+ and pushing the blocking operation result back to the scheduler.
309
+
310
+ - Added support for `file_offset` argument in `UM#read` and `UM#write` in
311
+ preparation for implementing the `#io_pread` and `#io_pwrite` hooks. The
312
+ `UM#write_async` API, which permits writing to a file descriptor without
313
+ waiting for the operation to complete, got support for specifying `length` and
314
+ `file_offset` arguments as well. In addition, `UM#write` and `UM#write_async`
315
+ got short-circuit logic for writes with a length of 0.
316
+
317
+ - Added support for specifying buffer offset in `#io_read` and `#io_write`
318
+ hooks.
319
+
320
+ - Added support for timeout in `#block`, `#io_read` and `#io_write` hooks.
321
+
322
+ # 2025-12-05
323
+
324
+ - I found and fixed a problem with how `futex_wake` was done in the low-level
325
+ UringMachine code handling mutexes and queues. This fixed a deadlock in the
326
+ scheduler background worker pool where clients of the pool where not properly
327
+ woken after the submitted operation was done.
328
+
329
+ - I finished work on the `#io_pread` and `#io_pwrite` hooks. Unfortunately, the
330
+ test for `#io_pwrite` consistently hangs (not on `IO#pwrite` itself, rather on
331
+ closing the file.) With Samuel's help, hopefully we'll find a solution...
332
+
333
+ - With those two last hooks, the fiber scheduler implementation is now feature
334
+ complete! While I have written test cases for the different fiber scheduler
335
+ hooks, I'd like to add more tests - and especially tests that exercise
336
+ multiple hooks, tests with high concurrency, and integration tests where I
337
+ check how the fiber scheduler plays with Ruby APIs like `Net::HTTP` and the
338
+ `socket` API in general.
339
+
340
+ # 2025-12-06
341
+
342
+ - Samuel has found the issue with pwrite (it turns out the the `#io_pwrite` hook
343
+ was being invoked with the GVL released), and [fixed
344
+ it](https://github.com/ruby/ruby/pull/15428). So now `#pwrite` works correctly
345
+ with a fiber scheduler!
346
+
347
+ - I followed Samuel's suggestion and incorporated some debug logging into the
348
+ extension code interfacing with liburing, in order to facilitate debugging
349
+ when issues are encountered.
350
+
351
+ - Added support for [SQPOLL
352
+ mode](https://unixism.net/loti/tutorial/sq_poll.html) when setting up a
353
+ UringMachine instance. It's not clear to me what are the performance
354
+ implications of that, but I'll try to make some time to check this against
355
+ [TP2](https://github.com/noteflakes/tp2), a UringMachine-based web server I'm
356
+ currently using in a bunch of projects.
357
+
358
+ # 2025-12-07
359
+
360
+ - I started looking at getting `#io_close` to work, and found out that Samuel
361
+ has already done the work, that is the code was already there, but was
362
+ commented out. Samuel explained that it was impossible to get it to work due
363
+ to the complexity of the implementation of `IO#close`, and indeed when I tried
364
+ it myself I saw that in fact it was just not possible the way the IO state is
365
+ managed when an IO is closed. I then had the idea that maybe we could pass the
366
+ underlying fd instead of the IO object itself to the `#io_close` hook. I tried
367
+ it and indeed it worked without any problems. The only issue is that this
368
+ breaks the convention where the different `io_xxx` hooks take an io as their
369
+ first argument. Nevertheless, I suggested this idea to Samuel and gladly he
370
+ accepted when he saw this is the only we can make this hook work. Samuel then
371
+ proceeded to prepare a [PR](https://github.com/ruby/ruby/pull/15434) and merge
372
+ it.
373
+
374
+ - Added the `#io_close` hook to the UringMachine fiber scheduler, as well as a
375
+ `#yield` hook for dealing with thread interrupts in response to another
376
+ [PR](https://github.com/ruby/ruby/pull/14700) by Samuel. I also added missing
377
+ docs for the different methods in the fiber scheduler.
378
+
379
+ # 2025-12-08
380
+
381
+ - Wrote a bunch of benchmarks for different scenarios comparing threads vs fiber
382
+ scheduler vs low-level UM implementation. The
383
+ [results](https://github.com/digital-fabric/uringmachine/blob/main/benchmark/README.md)
384
+ show the promise of UringMachine and of its fiber scheduler. What is great
385
+ about the fiber scheduler interface is that it provides a significant boost to
386
+ I/O-bound scenarios, with almost no change to the source code (basically, you
387
+ just need to replace `Thread.new` with `Fiber.schedule`).
388
+
389
+ These results, though preliminary, seem to validate the approach I took with
390
+ UringMachine - implementing a low-level API and tying it to the entire Ruby
391
+ ecosystem by way of the fiber scheduler interface.
392
+
393
+ - Spent the rest of the day writing lots of tests for the fiber scheduler. I
394
+ tried to cover the entire `IO` API - both class- and instance methods. I also
395
+ wrote some "integration" tests - different scenarios not unlike those in the
396
+ benchmarks, which exercise the different hooks in the fiber scheduler.
397
+
398
+ - Added some new APIs to help with testing: `UM#await_fibers` is a method for
399
+ waiting for one or more fibers to terminate. Unlike `UM#join`, it doesn't
400
+ return the return values of the given fibers, it just waits for them to
401
+ terminate. Another new API is `UM.socketpair`, which is like
402
+ `Socket.socketpair` except it returns raw fd's.
403
+
404
+ - Fixed a tricky bug that caused an occasional segmentation fault while running
405
+ benchmarks. Some fibers waiting an operation to complete were garbage
406
+ collected because there was no reference to them anywhere. I fixed this by
407
+ adding a map of pending fibers at the C-extension level and adding and
408
+ removing pending fibers from it automatically. I also added checking for
409
+ leaking fibers at the end of each test, so the UringMachine instance will not
410
+ hold onto fibers that have terminated.
411
+
412
+ # 2025-12-09
413
+
414
+ - Added the Async fiber scheduler to the different benchmarks. Also added an
415
+ SQPOLL mode to the benchmarks. Added a PG client benchmark.
416
+
417
+ - Fixed some small issues in the UM fiber scheduler and in the UM low-level API
418
+ implementation.
419
+
420
+ # 2025-12-10
421
+
422
+ - Refactored the benchmarks, abstracting away all the common code into a
423
+ common class that is then used in the different benchmarks. I also added a
424
+ test for ASync with an epoll selector. I'll try to find some time in the
425
+ coming days to update the results in the repo.
426
+
427
+ - Added and streamlined metrics that indicate the following:
428
+
429
+ - The ring size
430
+ - Total number of ops
431
+ - Total number of fiber switches
432
+ - Total number of waits for CQEs
433
+ - Current number of pending ops
434
+ - Current number of unsubmitted ops
435
+ - Current size of runqueue
436
+ - Current number of transient ops
437
+ - Current number of free ops
438
+
439
+ I also added some basic time measurements:
440
+
441
+ - Total CPU time
442
+ - Total time spent waiting for CQEs
443
+
444
+ These are off by default, but can be enabled by calling `UM#profile(true)`.
445
+ I'd like to do a lot more with profiling, like measuring the CPU time spent on
446
+ each fiber, but I'm a bit apprehensive of the performance costs involved, as
447
+ getting the `CLOCK_THREAD_CPUTIME_ID` clock is relatively slow, and then
448
+ managing this for each fiber means getting and setting a couple of instance
449
+ variables, which can *really* slow things down. On top of that, I'm not that
450
+ sure this is really needed.
451
+
452
+ - I went through some old benchmarks, reorganized them, get rid of some that
453
+ were irrelevant. There were some really interesting ones: a benchmark
454
+ measuring the cost of different ways of accessing an SQLite DB (using
455
+ [Extralite](https://github.com/digital-fabric/extralite/)): normally, using an
456
+ actor interface, or protected by a mutex. I'll try to follow up with a
457
+ benchmark measuring concurrent access to SQLite DBs, similar to the PG one.
458
+
459
+ Another interesting benchmark I found was one for resolving DNS addresses
460
+ using Ruby's builtin `Addrinfo` API, the bundled `resolv` gem, and a basic DNS
461
+ resolver included in UringMachine (I totally forgot I made one). Here too, I'd
462
+ like to add a benchmark to measure how these different solutions do in a
463
+ highly concurrent scenario.
464
+
465
+ - Thanks to one of these old benchmarks I made a change that more than doubled
466
+ the performance of `UM#snooze`. What this method does is it adds the current
467
+ fiber to the end of the runqueue, and yields control to the next fiber in the
468
+ runqueue, or to process available CQE's. This method is useful for testing,
469
+ but also for yielding control periodically when performing CPU-bound work, in
470
+ order to keep the application responsive and improve latency.
data/grant-2025/tasks.md CHANGED
@@ -1,44 +1,119 @@
1
1
  - [v] io-event
2
-
3
2
  - [v] Make PR to use io_uring_prep_waitid for kernel version >= 6.7
4
3
 
5
- - https://github.com/socketry/io-event/blob/44666dc92ac3e093ca6ce3ab47052b808a58a325/ext/io/event/selector/uring.c#L460
6
- - https://github.com/digital-fabric/uringmachine/blob/d5505d7fd94b800c848d186e17585e03ad9af6f2/ext/um/um.c#L697-L713
4
+ - [ ] UringMachine low-level API
5
+ - [v] Add support for IO::Buffer in UM API.
6
+ - [v] Add `UM::Error` class to be used instead of RuntimeError
7
+ - [v] Add optional ring size argument to `UM.new` (for example, a the
8
+ worker thread for the scheduler `blocking_operation_wait` hook does not need
9
+ a lot of depth, so you can basically do `UM.new(4)`)
10
+ - [v] Add debugging code suggested by Samuel
11
+ - [v] Add support for SQPOLL
12
+ https://unixism.net/loti/tutorial/sq_poll.html
13
+ - [v] Add `UM.socketpair`
14
+
15
+ - [ ] Add more metrics
16
+ - [v] runqueue depth
17
+ - [v] number of pending fibers
18
+ - [v] ops: transient count, free count
19
+ - [v] total fiber switches, total waiting for CQEs
20
+ - [ ] watermark: ops_pending, ops_unsubmitted, ops_runqueue, ops_free, ops_transient
21
+ (only in profile mode)
22
+ - [ ] Performance tuning parameters
23
+ - [ ] max fiber switches before processing CQEs
24
+ - [ ] max fiber switches before submitting unsubmitted SQEs
25
+ - [ ] measure switches since last submitting / last CQE processing
26
+
27
+ - [ ] Add support for using IO::Buffer in association with io_uring registered
28
+ buffers / buffer rings
29
+ - [ ] Add some way to measure fiber CPU time.
30
+ https://github.com/socketry/async/issues/428
31
+
32
+ - [ ] UringMachine Fiber::Scheduler implementation
33
+ - [v] Check how scheduler interacts with `fork`.
34
+ - [v] Implement `process_wait` (with `rb_process_status_new`)
35
+ - [v] Implement `fiber_interrupt` hook
36
+ - [v] Add `#address_resolve` hook with same impl as Async:
37
+ https://github.com/socketry/async/blob/ea8b0725042b63667ea781d4d011786ca3658256/lib/async/scheduler.rb#L285-L296
38
+ - [v] Implement other hooks:
39
+ - [v] `#timeout_after`
40
+ https://github.com/socketry/async/blob/ea8b0725042b63667ea781d4d011786ca3658256/lib/async/scheduler.rb#L631-L644
41
+ - [v] `#io_pread`
42
+ - [v] `#io_pwrite`
43
+ - [v] `#io_select`
44
+ - [v] Add timeout handling in different I/O hooks
45
+ - [v] Experiment more with fork:
46
+ - [v] what happens to schedulers on other threads (those that don't make it post-fork)
47
+ - do they get GC'd?
48
+ - do they get closed (`#scheduler_close` called)?
49
+ - are they freed cleanly (at least for UM)?
50
+
51
+ ```ruby
52
+ class S
53
+ def respond_to?(sym) = true
54
+ end
55
+ o = S.new
56
+ ObjectSpace.define_finalizer(o, ->(*){ puts 'scheduler finalized' })
57
+ t1 = Thread.new { Fiber.set_scheduler(o); sleep }
58
+ t2 = Thread.new {
59
+ fork { p(t1:, t2:) }
60
+ GC.start
61
+ }
62
+
63
+ # output:
64
+ # scheduler finalized
65
+ ```
66
+
67
+ So, apparently there's no problem!
68
+ - [v] Implement multi-thread worker pool for `blocking_operation_wait`
69
+ Single thread pool at class level, shared by all schedulers
70
+ With worker count according to CPU count
71
+ - [v] Test working with non-blocking files, it should be fine, and we shouldn't need to reset `O_NONBLOCK`.
72
+ - [v] Implement timeouts (how do timeouts interact with blocking ops?)
73
+ - [v] Implement `#yield` hook (https://github.com/ruby/ruby/pull/14700)
74
+ - [v] Finish documentation for the `FiberScheduler` class
75
+ - [v] Implement `#io_close` hook
76
+
77
+ - [v] tests:
78
+ - [v] Wrap the scheduler interface such that we can verify that specific
79
+ hooks were called. Add asserts for called hooks for all tests.
80
+ - [v] Sockets (only io_wait)
81
+ - [v] Files
82
+ - [v] Mutex / Queue
83
+ - [v] Thread.join
84
+ - [v] Process.wait
85
+ - [v] fork
86
+ - [v] system / exec / etc.
87
+ - [v] popen
88
+ - [v] "Integration tests"
89
+ - [v] IO - all methods!
90
+ - [v] queue: multiple concurrent readers / writers
91
+ - [v] net/http test: ad-hoc HTTP/1.1 server + `Net::HTTP` client
92
+ - [v] pipes: multiple pairs of fibers - reader / writer
93
+ - [v] sockets: echo server + many clients
7
94
 
8
- - [ ] UringMachine
9
- - [v] Add support for IO::Buffer in UM API. (How can we detect an IO::Buffer object?)
10
- https://docs.ruby-lang.org/capi/en/master/d8/d36/group__object.html#gab1b70414d07e7de585f47ee50a64a86c
95
+ - [ ] Benchmarks
96
+ - [v] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
11
97
 
12
- - [v] Add `UM::Error` class to be used instead of RuntimeError
98
+ N groups where each group has M producers and O consumers accessing the same queue.
13
99
 
14
- - [ ] Do batch allocation for `struct um_op`, so they'll be adjacent
15
- - [ ] Add optional buffer depth argument to `UM.new` (for example, a the
16
- worker thread for the scheduler `blocking_operation_wait` hook does not need
17
- a lot of depth, so you can basically do `UM.new(4)`)
100
+ - [v] UM mutex / Ruby mutex (threads) / Ruby mutex with UM fiber scheduler
18
101
 
19
- - [ ] Add support for using IO::Buffer in association with io_uring registered buffers / buffer rings
102
+ - [v] N groups where each group has M fibers locking the same mutex and
103
+ performing CPU-bound work
104
+ - [v] N groups where each group has M fibers locking the same mutex and
105
+ performing IO-bound work (write to a file)
20
106
 
21
- - [ ] FiberScheduler implementation
22
- 4
23
- - [v] Check how scheduler interacts with `fork`.
24
- - [v] Implement `process_wait` (with `rb_process_status_new`)
25
- - [ ] Implement timeouts (how do timeouts interact with blocking ops?)
26
- - [ ] Implement address resolution hook
27
- - [ ] Add tests:
28
- - [ ] Sockets
29
- - [ ] Files
30
- - [ ] Mutex / Queue
31
- - [ ] Thread.join
32
- - [ ] Process.wait
33
- - [ ] fork
34
- - [ ] system / exec / etc.
35
- - [ ] popen
107
+ - [v] Pipe IO raw UM / Ruby threaded / Ruby with UM fiber scheduler
108
+
109
+ N groups where each group has a pair of reader / writer to a pipe
110
+
111
+ - [v] Socket IO (with socketpair) raw UM / Ruby threaded / Ruby with UM fiber scheduler
112
+
113
+ N groups where each group has a pair of reader / writer to a socketpair
114
+
115
+ - [v] Postgres test
36
116
 
37
- - [ ] Benchmarks
38
- - [ ] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
39
- - [ ] UM mutex / Ruby mutex (threads) / Ruby mutex with UM fiber scheduler
40
- - [ ] Pipe IO raw UM / Ruby threaded / Ruby with UM fiber scheduler
41
- - [ ] Socket IO (with socketpair) raw UM / Ruby threaded / Ruby with UM fiber scheduler
42
117
  - [ ] Measure CPU (thread) time usage for above examples
43
118
 
44
119
  - run each version 1M times
@@ -49,18 +124,38 @@
49
124
  cpu_time = Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID)
50
125
  ```
51
126
 
52
- - my hunch is we'll be able to show with io_uring real_time is less,
53
- while cpu_time is more. But it's just a hunch.
54
-
55
-
56
- - https://github.com/ruby/ruby/blob/master/doc/fiber.md
57
- - https://github.com/ruby/ruby/blob/master/test/fiber/scheduler.rb
58
- - https://github.com/socketry/async/blob/main/context/getting-started.md
59
- - https://github.com/socketry/async/blob/main/context/scheduler.md
60
- - https://github.com/socketry/async/blob/main/lib/async/scheduler.rb#L28
127
+ - [ ] Ruby Fiber::Scheduler interface
128
+ - [v] Make a PR for resetting the scheduler and resetting the fiber non-blocking flag.
129
+ - [v] hook for close
130
+ - [ ] hooks for send/recv/sendmsg/recvmsg
61
131
 
62
132
  - [ ] SSL
63
133
  - [ ] openssl gem: custom BIO?
64
134
 
65
135
  - curl: https://github.com/curl/curl/blob/5f4cd4c689c822ce957bb415076f0c78e5f474b5/lib/vtls/openssl.c#L786-L803
66
136
 
137
+ - [ ] UringMachine website
138
+ - [ ] domain: uringmachine.dev
139
+ - [ ] logo: ???
140
+ - [ ] docs (similar to papercraft docs)
141
+
142
+ - [ ] Uma - web server
143
+ - [ ] child process workers
144
+ - [ ] reforking (following https://github.com/Shopify/pitchfork)
145
+ see also: https://byroot.github.io/ruby/performance/2025/03/04/the-pitchfork-story.html
146
+ - Monitor worker memory usage - how much is shared
147
+ - Choose worker with most served request count as "mold" for next generation
148
+ - Perform GC out of band, preferably when there are no active requests
149
+ https://railsatscale.com/2024-10-23-next-generation-oob-gc/
150
+ - When a worker is promoted to "mold", it:
151
+ - Stops `accept`ing requests
152
+ - When finally idle, calls `Process.warmup`
153
+ - Starts replacing sibling workers with forked workers
154
+ see also: https://www.youtube.com/watch?v=kAW5O2dkSU8
155
+ - [ ] Each worker is single-threaded (except for auxiliary threads)
156
+ - [ ] Rack 3.0-compatible
157
+ see: https://github.com/socketry/protocol-rack
158
+ - [ ] Rails integration (Railtie)
159
+ see: https://github.com/socketry/falcon
160
+ - [ ] Benchmarks
161
+ - [ ] Add to the TechEmpower bencchmarks
@@ -24,6 +24,8 @@ class UringMachine
24
24
  end
25
25
 
26
26
  class Actor < Fiber
27
+ class Stop < UM::Error; end
28
+
27
29
  def run(machine, target, mailbox)
28
30
  @machine = machine
29
31
  @target = target
@@ -31,6 +33,8 @@ class UringMachine
31
33
  while (msg = machine.shift(mailbox))
32
34
  process_message(msg)
33
35
  end
36
+ rescue Stop
37
+ # stopped
34
38
  ensure
35
39
  @target.teardown if @target.respond_to?(:teardown)
36
40
  end
@@ -45,6 +49,10 @@ class UringMachine
45
49
  @machine.shift response_mailbox
46
50
  end
47
51
 
52
+ def stop
53
+ @machine.schedule(self, Stop.new)
54
+ end
55
+
48
56
  private
49
57
 
50
58
  def process_message(msg)
@@ -10,7 +10,6 @@ class UringMachine
10
10
  @nameservers = get_nameservers
11
11
  @fiber = @machine.spin { handle_requests_loop }
12
12
  @last_id = 0
13
- @cache = {}
14
13
  end
15
14
 
16
15
  def resolve(hostname, type)
@@ -59,7 +58,7 @@ class UringMachine
59
58
  msg = Resolv::DNS::Message.decode buf
60
59
  addrs = []
61
60
  msg.each_answer do |name, ttl, data|
62
- p [name, ttl, data]
61
+ # p [name, ttl, data]
63
62
  if data.kind_of?(Resolv::DNS::Resource::IN::A) ||
64
63
  data.kind_of?(Resolv::DNS::Resource::IN::AAAA)
65
64
  addrs << data.address.to_s