uringmachine 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +2 -0
  3. data/CHANGELOG.md +14 -0
  4. data/TODO.md +144 -0
  5. data/benchmark/README.md +173 -0
  6. data/benchmark/bm_io_pipe.rb +70 -0
  7. data/benchmark/bm_io_socketpair.rb +71 -0
  8. data/benchmark/bm_mutex_cpu.rb +57 -0
  9. data/benchmark/bm_mutex_io.rb +64 -0
  10. data/benchmark/bm_pg_client.rb +109 -0
  11. data/benchmark/bm_queue.rb +76 -0
  12. data/benchmark/chart.png +0 -0
  13. data/benchmark/common.rb +135 -0
  14. data/benchmark/dns_client.rb +47 -0
  15. data/{examples/bm_http_parse.rb → benchmark/http_parse.rb} +1 -1
  16. data/benchmark/run_bm.rb +8 -0
  17. data/benchmark/sqlite.rb +108 -0
  18. data/{examples/bm_write.rb → benchmark/write.rb} +4 -4
  19. data/ext/um/um.c +189 -100
  20. data/ext/um/um.h +36 -10
  21. data/ext/um/um_async_op.c +1 -1
  22. data/ext/um/um_class.c +87 -13
  23. data/ext/um/um_op.c +6 -0
  24. data/ext/um/um_sync.c +2 -2
  25. data/ext/um/um_utils.c +16 -0
  26. data/grant-2025/journal.md +118 -1
  27. data/grant-2025/tasks.md +48 -22
  28. data/lib/uringmachine/actor.rb +8 -0
  29. data/lib/uringmachine/dns_resolver.rb +1 -2
  30. data/lib/uringmachine/fiber_scheduler.rb +127 -81
  31. data/lib/uringmachine/version.rb +1 -1
  32. data/lib/uringmachine.rb +32 -3
  33. data/test/helper.rb +7 -18
  34. data/test/test_actor.rb +12 -3
  35. data/test/test_async_op.rb +10 -10
  36. data/test/test_fiber.rb +84 -1
  37. data/test/test_fiber_scheduler.rb +950 -47
  38. data/test/test_um.rb +297 -120
  39. data/uringmachine.gemspec +2 -1
  40. metadata +38 -16
  41. data/examples/bm_fileno.rb +0 -33
  42. data/examples/bm_queue.rb +0 -111
  43. data/examples/bm_side_running.rb +0 -83
  44. data/examples/bm_sqlite.rb +0 -89
  45. data/examples/dns_client.rb +0 -12
  46. /data/{examples/bm_mutex.rb → benchmark/mutex.rb} +0 -0
  47. /data/{examples/bm_mutex_single.rb → benchmark/mutex_single.rb} +0 -0
  48. /data/{examples/bm_send.rb → benchmark/send.rb} +0 -0
  49. /data/{examples/bm_snooze.rb → benchmark/snooze.rb} +0 -0
data/ext/um/um.h CHANGED
@@ -4,7 +4,6 @@
4
4
  #include <ruby.h>
5
5
  #include <liburing.h>
6
6
 
7
-
8
7
  // debugging
9
8
  enum {
10
9
  // set to 1 to enable debug logging
@@ -18,6 +17,10 @@ enum {
18
17
  #define TRACE_FREE(ptr) //printf("Free %p %s:%d\n", ptr, __FILE__, __LINE__)
19
18
  #define DEBUG_MARK(machine, markv, msg) \
20
19
  if (machine->mark == markv) printf("%s\n", msg);
20
+ #define DEBUG_PRINTF(...) \
21
+ if (DEBUG) fprintf(stderr, __VA_ARGS__)
22
+
23
+ #define SYM_DEF(name) SYM_#name = ID2SYM(rb_intern("#name"))
21
24
 
22
25
  // branching
23
26
  #ifndef unlikely
@@ -32,6 +35,7 @@ enum {
32
35
  (TYPE(buffer) == RUBY_T_DATA) && rb_obj_is_instance_of(buffer, rb_cIOBuffer)
33
36
 
34
37
  enum um_op_kind {
38
+ OP_UNDEFINED,
35
39
  OP_TIMEOUT,
36
40
  OP_SCHEDULE,
37
41
 
@@ -121,6 +125,22 @@ struct buf_ring_descriptor {
121
125
  void *buf_base;
122
126
  };
123
127
 
128
+ struct um_metrics {
129
+ ulong total_ops; // total ops submitted
130
+ ulong total_switches; // total fiber switches
131
+ ulong total_waits; // total number of CQE waits
132
+
133
+ uint ops_pending; // number of pending ops
134
+ uint ops_unsubmitted; // number of unsubmitted
135
+ uint ops_runqueue; // number of ops in runqueue
136
+ uint ops_free; // number of ops in freelist
137
+ uint ops_transient; // number of ops in transient list
138
+
139
+ double time_total_wait; // total CPU time waiting for CQEs
140
+ double time_last_cpu; // last seen time stamp
141
+ double time_first_cpu; // last seen time stamp
142
+ };
143
+
124
144
  #define BUFFER_RING_MAX_COUNT 10
125
145
 
126
146
  struct um {
@@ -133,17 +153,19 @@ struct um {
133
153
  uint ring_initialized; // is the ring initialized successfully
134
154
  uint mark; // used to mark instances for debugging
135
155
 
136
- uint unsubmitted_count; // number of unsubmitted SQEs pending
137
- uint pending_count; // number of pending operations (i.e. not yet completed)
156
+ struct um_metrics metrics;
157
+ int profile_mode;
158
+
138
159
  uint buffer_ring_count; // number of registered buffer rings
139
- ulong total_op_count; // total number of operations submitted since ring was initialized
140
-
141
- uint entries; // number of entries in SQ
160
+
161
+ uint size; // size of SQ
142
162
  uint sqpoll_mode; // SQPOLL mode enabled
143
163
 
144
164
  struct buf_ring_descriptor buffer_rings[BUFFER_RING_MAX_COUNT];
145
165
 
146
- struct um_op *transient_head;
166
+ struct um_op *transient_head; // list of pending transient ops
167
+ VALUE pending_fibers; // hash containing pending fibers
168
+
147
169
  struct um_op *runqueue_head;
148
170
  struct um_op *runqueue_tail;
149
171
 
@@ -203,9 +225,11 @@ extern VALUE cAsyncOp;
203
225
  extern VALUE eStreamRESPError;
204
226
 
205
227
  struct um *um_get_machine(VALUE self);
206
- void um_setup(VALUE self, struct um *machine, uint entries, uint sqpoll_timeout_msec);
228
+ void um_setup(VALUE self, struct um *machine, uint size, uint sqpoll_timeout_msec);
207
229
  void um_teardown(struct um *machine);
208
230
 
231
+ VALUE um_metrics(struct um *machine, struct um_metrics *metrics);
232
+
209
233
  const char * um_op_kind_name(enum um_op_kind kind);
210
234
  struct um_op *um_op_alloc(struct um *machine);
211
235
  void um_op_free(struct um *machine, struct um_op *op);
@@ -227,6 +251,8 @@ void um_free_buffer_linked_list(struct um *machine);
227
251
 
228
252
  struct __kernel_timespec um_double_to_timespec(double value);
229
253
  double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec);
254
+ double um_get_time_cpu();
255
+ double um_get_time_monotonic();
230
256
  int um_value_is_exception_p(VALUE v);
231
257
  VALUE um_raise_exception(VALUE v);
232
258
 
@@ -244,8 +270,8 @@ void um_add_strings_to_buffer_ring(struct um *machine, int bgid, VALUE strings);
244
270
  struct io_uring_sqe *um_get_sqe(struct um *machine, struct um_op *op);
245
271
 
246
272
  uint um_submit(struct um *machine);
247
- VALUE um_fiber_switch(struct um *machine);
248
- VALUE um_await(struct um *machine);
273
+ VALUE um_yield(struct um *machine);
274
+ VALUE um_switch(struct um *machine);
249
275
  VALUE um_wakeup(struct um *machine);
250
276
  void um_cancel_op(struct um *machine, struct um_op *op);
251
277
  void um_cancel_and_wait(struct um *machine, struct um_op *op);
data/ext/um/um_async_op.c CHANGED
@@ -26,7 +26,7 @@ VALUE um_async_op_await(struct um_async_op *async_op) {
26
26
  RB_OBJ_WRITE(async_op->machine->self, &async_op->op->fiber, rb_fiber_current());
27
27
  async_op->op->flags &= ~OP_F_ASYNC;
28
28
 
29
- VALUE ret = um_fiber_switch(async_op->machine);
29
+ VALUE ret = um_switch(async_op->machine);
30
30
  if (!um_op_completed_p(async_op->op))
31
31
  um_cancel_and_wait(async_op->machine, async_op->op);
32
32
 
data/ext/um/um_class.c CHANGED
@@ -3,15 +3,29 @@
3
3
  #include <ruby/io.h>
4
4
  #include <sys/syscall.h>
5
5
  #include <unistd.h>
6
+ #include <sys/socket.h>
6
7
 
7
8
  VALUE cUM;
8
9
  VALUE eUMError;
9
10
 
11
+ VALUE SYM_size;
12
+ VALUE SYM_total_ops;
13
+ VALUE SYM_total_switches;
14
+ VALUE SYM_total_waits;
15
+ VALUE SYM_ops_pending;
16
+ VALUE SYM_ops_unsubmitted;
17
+ VALUE SYM_ops_runqueue;
18
+ VALUE SYM_ops_free;
19
+ VALUE SYM_ops_transient;
20
+ VALUE SYM_time_total_cpu;
21
+ VALUE SYM_time_total_wait;
22
+
10
23
  static ID id_fileno;
11
24
 
12
25
  static void UM_mark(void *ptr) {
13
26
  struct um *machine = ptr;
14
27
  rb_gc_mark_movable(machine->self);
28
+ rb_gc_mark_movable(machine->pending_fibers);
15
29
 
16
30
  um_op_list_mark(machine, machine->transient_head);
17
31
  um_op_list_mark(machine, machine->runqueue_head);
@@ -20,6 +34,7 @@ static void UM_mark(void *ptr) {
20
34
  static void UM_compact(void *ptr) {
21
35
  struct um *machine = ptr;
22
36
  machine->self = rb_gc_location(machine->self);
37
+ machine->pending_fibers = rb_gc_location(machine->pending_fibers);
23
38
 
24
39
  um_op_list_compact(machine, machine->transient_head);
25
40
  um_op_list_compact(machine, machine->runqueue_head);
@@ -90,9 +105,9 @@ VALUE UM_setup_buffer_ring(VALUE self, VALUE size, VALUE count) {
90
105
  return INT2NUM(bgid);
91
106
  }
92
107
 
93
- VALUE UM_entries(VALUE self) {
108
+ VALUE UM_size(VALUE self) {
94
109
  struct um *machine = um_get_machine(self);
95
- return UINT2NUM(machine->entries);
110
+ return UINT2NUM(machine->size);
96
111
  }
97
112
 
98
113
  VALUE UM_mark_m(VALUE self, VALUE mark) {
@@ -101,25 +116,51 @@ VALUE UM_mark_m(VALUE self, VALUE mark) {
101
116
  return self;
102
117
  }
103
118
 
104
- VALUE UM_pending_count(VALUE self) {
119
+ VALUE UM_metrics(VALUE self) {
105
120
  struct um *machine = um_get_machine(self);
106
- return UINT2NUM(machine->pending_count);
121
+ return um_metrics(machine, &machine->metrics);
107
122
  }
108
123
 
109
- VALUE UM_total_op_count(VALUE self) {
124
+ VALUE UM_profile_p(VALUE self) {
110
125
  struct um *machine = um_get_machine(self);
111
- return UINT2NUM(machine->total_op_count);
126
+ return machine->profile_mode ? Qtrue : Qfalse;
127
+ }
128
+
129
+ VALUE UM_profile_set(VALUE self, VALUE value) {
130
+ struct um *machine = um_get_machine(self);
131
+ machine->profile_mode = RTEST(value);
132
+ if (machine->profile_mode) {
133
+ machine->metrics.time_total_wait = 0.0;
134
+ machine->metrics.time_last_cpu = machine->metrics.time_first_cpu = um_get_time_cpu();
135
+ }
136
+ return value;
112
137
  }
113
138
 
114
139
  VALUE UM_snooze(VALUE self) {
115
140
  struct um *machine = um_get_machine(self);
116
141
  um_schedule(machine, rb_fiber_current(), Qnil);
117
- return um_await(machine);
142
+
143
+ // the current fiber is already scheduled, and the runqueue is GC-marked, so
144
+ // we can safely call um_switch, which is faster than calling um_yield.
145
+ VALUE ret = um_switch(machine);
146
+ RAISE_IF_EXCEPTION(ret);
147
+ return ret;
118
148
  }
119
149
 
120
150
  VALUE UM_yield(VALUE self) {
121
151
  struct um *machine = um_get_machine(self);
122
- return um_await(machine);
152
+
153
+ VALUE ret = um_yield(machine);
154
+ RAISE_IF_EXCEPTION(ret);
155
+ return ret;
156
+ }
157
+
158
+ VALUE UM_switch(VALUE self) {
159
+ struct um *machine = um_get_machine(self);
160
+
161
+ VALUE ret = um_switch(machine);
162
+ RAISE_IF_EXCEPTION(ret);
163
+ return ret;
123
164
  }
124
165
 
125
166
  VALUE UM_wakeup(VALUE self) {
@@ -133,6 +174,11 @@ VALUE UM_submit(VALUE self) {
133
174
  return UINT2NUM(ret);
134
175
  }
135
176
 
177
+ VALUE UM_pending_fibers(VALUE self) {
178
+ struct um *machine = um_get_machine(self);
179
+ return machine->pending_fibers;
180
+ }
181
+
136
182
  VALUE UM_schedule(VALUE self, VALUE fiber, VALUE value) {
137
183
  struct um *machine = um_get_machine(self);
138
184
  um_schedule(machine, fiber, value);
@@ -428,6 +474,17 @@ VALUE UM_pipe(VALUE self) {
428
474
  return rb_ary_new_from_args(2, INT2NUM(fds[0]), INT2NUM(fds[1]));
429
475
  }
430
476
 
477
+ VALUE UM_socketpair(VALUE self, VALUE domain, VALUE type, VALUE protocol) {
478
+ int fds[2];
479
+ int ret = socketpair(NUM2INT(domain), NUM2INT(type), NUM2INT(protocol), fds);
480
+ if (ret) {
481
+ int e = errno;
482
+ rb_syserr_fail(e, strerror(e));
483
+ }
484
+
485
+ return rb_ary_new_from_args(2, INT2NUM(fds[0]), INT2NUM(fds[1]));
486
+ }
487
+
431
488
  VALUE UM_pidfd_open(VALUE self, VALUE pid) {
432
489
  int fd = syscall(SYS_pidfd_open, NUM2INT(pid), 0);
433
490
  if (fd == -1) {
@@ -483,7 +540,7 @@ VALUE UM_kernel_version(VALUE self) {
483
540
  }
484
541
 
485
542
  VALUE UM_debug(VALUE self, VALUE str) {
486
- printf("%s\n", StringValueCStr(str));
543
+ fprintf(stderr, "%s\n", StringValueCStr(str));
487
544
  return Qnil;
488
545
  }
489
546
 
@@ -494,13 +551,16 @@ void Init_UM(void) {
494
551
  rb_define_alloc_func(cUM, UM_allocate);
495
552
 
496
553
  rb_define_method(cUM, "initialize", UM_initialize, -1);
497
- rb_define_method(cUM, "entries", UM_entries, 0);
554
+ rb_define_method(cUM, "size", UM_size, 0);
498
555
  rb_define_method(cUM, "mark", UM_mark_m, 1);
499
- rb_define_method(cUM, "pending_count", UM_pending_count, 0);
500
- rb_define_method(cUM, "total_op_count", UM_total_op_count, 0);
556
+ rb_define_method(cUM, "metrics", UM_metrics, 0);
557
+ rb_define_method(cUM, "profile?", UM_profile_p, 0);
558
+ rb_define_method(cUM, "profile", UM_profile_set, 1);
559
+
501
560
  rb_define_method(cUM, "setup_buffer_ring", UM_setup_buffer_ring, 2);
502
561
 
503
562
  rb_define_singleton_method(cUM, "pipe", UM_pipe, 0);
563
+ rb_define_singleton_method(cUM, "socketpair", UM_socketpair, 3);
504
564
  rb_define_singleton_method(cUM, "pidfd_open", UM_pidfd_open, 1);
505
565
  rb_define_singleton_method(cUM, "pidfd_send_signal", UM_pidfd_send_signal, 2);
506
566
 
@@ -513,8 +573,10 @@ void Init_UM(void) {
513
573
  rb_define_method(cUM, "snooze", UM_snooze, 0);
514
574
  rb_define_method(cUM, "timeout", UM_timeout, 2);
515
575
  rb_define_method(cUM, "yield", UM_yield, 0);
576
+ rb_define_method(cUM, "switch", UM_switch, 0);
516
577
  rb_define_method(cUM, "wakeup", UM_wakeup, 0);
517
578
  rb_define_method(cUM, "submit", UM_submit, 0);
579
+ rb_define_method(cUM, "pending_fibers", UM_pending_fibers, 0);
518
580
 
519
581
  rb_define_method(cUM, "close", UM_close, 1);
520
582
  rb_define_method(cUM, "close_async", UM_close_async, 1);
@@ -530,7 +592,7 @@ void Init_UM(void) {
530
592
  rb_define_method(cUM, "poll", UM_poll, 2);
531
593
  rb_define_method(cUM, "select", UM_select, 3);
532
594
  rb_define_method(cUM, "waitid", UM_waitid, 3);
533
-
595
+
534
596
  #ifdef HAVE_RB_PROCESS_STATUS_NEW
535
597
  rb_define_method(cUM, "waitid_status", UM_waitid_status, 3);
536
598
  #endif
@@ -562,5 +624,17 @@ void Init_UM(void) {
562
624
 
563
625
  um_define_net_constants(cUM);
564
626
 
627
+ SYM_size = ID2SYM(rb_intern("size"));
628
+ SYM_total_ops = ID2SYM(rb_intern("total_ops"));
629
+ SYM_total_switches = ID2SYM(rb_intern("total_switches"));
630
+ SYM_total_waits = ID2SYM(rb_intern("total_waits"));
631
+ SYM_ops_pending = ID2SYM(rb_intern("ops_pending"));
632
+ SYM_ops_unsubmitted = ID2SYM(rb_intern("ops_unsubmitted"));
633
+ SYM_ops_runqueue = ID2SYM(rb_intern("ops_runqueue"));
634
+ SYM_ops_free = ID2SYM(rb_intern("ops_free"));
635
+ SYM_ops_transient = ID2SYM(rb_intern("ops_transient"));
636
+ SYM_time_total_cpu = ID2SYM(rb_intern("time_total_cpu"));
637
+ SYM_time_total_wait = ID2SYM(rb_intern("time_total_wait"));
638
+
565
639
  id_fileno = rb_intern_const("fileno");
566
640
  }
data/ext/um/um_op.c CHANGED
@@ -50,6 +50,7 @@ inline void um_op_transient_add(struct um *machine, struct um_op *op) {
50
50
  machine->transient_head->prev = op;
51
51
  }
52
52
  machine->transient_head = op;
53
+ machine->metrics.ops_transient++;
53
54
  }
54
55
 
55
56
  inline void um_op_transient_remove(struct um *machine, struct um_op *op) {
@@ -60,6 +61,7 @@ inline void um_op_transient_remove(struct um *machine, struct um_op *op) {
60
61
 
61
62
  if (machine->transient_head == op)
62
63
  machine->transient_head = op->next;
64
+ machine->metrics.ops_transient--;
63
65
  }
64
66
 
65
67
  inline void um_runqueue_push(struct um *machine, struct um_op *op) {
@@ -71,6 +73,7 @@ inline void um_runqueue_push(struct um *machine, struct um_op *op) {
71
73
  else
72
74
  machine->runqueue_head = machine->runqueue_tail = op;
73
75
  op->next = NULL;
76
+ machine->metrics.ops_runqueue++;
74
77
  }
75
78
 
76
79
  inline struct um_op *um_runqueue_shift(struct um *machine) {
@@ -80,6 +83,7 @@ inline struct um_op *um_runqueue_shift(struct um *machine) {
80
83
  machine->runqueue_head = op->next;
81
84
  if (!machine->runqueue_head)
82
85
  machine->runqueue_tail = NULL;
86
+ machine->metrics.ops_runqueue--;
83
87
  return op;
84
88
  }
85
89
 
@@ -152,6 +156,7 @@ inline struct um_op *um_op_alloc(struct um *machine) {
152
156
  if (machine->op_freelist) {
153
157
  struct um_op *op = machine->op_freelist;
154
158
  machine->op_freelist = op->next;
159
+ machine->metrics.ops_free--;
155
160
  return op;
156
161
  }
157
162
  return malloc(sizeof(struct um_op));
@@ -160,4 +165,5 @@ inline struct um_op *um_op_alloc(struct um *machine) {
160
165
  inline void um_op_free(struct um *machine, struct um_op *op) {
161
166
  op->next = machine->op_freelist;
162
167
  machine->op_freelist = op;
168
+ machine->metrics.ops_free++;
163
169
  }
data/ext/um/um_sync.c CHANGED
@@ -13,7 +13,7 @@ void um_futex_wait(struct um *machine, uint32_t *futex, uint32_t value) {
13
13
  sqe, (uint32_t *)futex, value, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
14
14
  );
15
15
 
16
- VALUE ret = um_fiber_switch(machine);
16
+ VALUE ret = um_yield(machine);
17
17
  if (!um_op_completed_p(&op))
18
18
  um_cancel_and_wait(machine, &op);
19
19
  else {
@@ -33,7 +33,7 @@ void um_futex_wake(struct um *machine, uint32_t *futex, uint32_t num_waiters) {
33
33
  sqe, (uint32_t *)futex, num_waiters, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
34
34
  );
35
35
 
36
- VALUE ret = um_fiber_switch(machine);
36
+ VALUE ret = um_yield(machine);
37
37
  um_check_completion(machine, &op);
38
38
 
39
39
  RAISE_IF_EXCEPTION(ret);
data/ext/um/um_utils.c CHANGED
@@ -2,6 +2,7 @@
2
2
  #include <sys/mman.h>
3
3
  #include <stdlib.h>
4
4
  #include <ruby/io/buffer.h>
5
+ #include <time.h>
5
6
 
6
7
  inline struct __kernel_timespec um_double_to_timespec(double value) {
7
8
  double integral;
@@ -16,6 +17,21 @@ inline double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec) {
16
17
  return (double)tv_sec + ((double)tv_nsec) / 1000000000;
17
18
  }
18
19
 
20
+ inline double um_get_time_cpu() {
21
+ struct timespec ts;
22
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts)) return -1.0;
23
+
24
+ return um_timestamp_to_double(ts.tv_sec, ts.tv_nsec);
25
+ }
26
+
27
+ inline double um_get_time_monotonic() {
28
+ struct timespec ts;
29
+ if (clock_gettime(CLOCK_MONOTONIC, &ts)) return -1.0;
30
+
31
+ return um_timestamp_to_double(ts.tv_sec, ts.tv_nsec);
32
+ }
33
+
34
+
19
35
  #define RAISE_EXCEPTION(e) rb_funcall(e, ID_invoke, 0);
20
36
 
21
37
  inline int um_value_is_exception_p(VALUE v) {
@@ -339,7 +339,10 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
339
339
 
340
340
  # 2025-12-06
341
341
 
342
- - Samuel has found the issue with pwrite (it turns out the the `#io_pwrite` hook was being invoked with the GVL released), and [fixed it](https://github.com/ruby/ruby/pull/15428). So now `#pwrite` works correctly with a fiber scheduler!
342
+ - Samuel has found the issue with pwrite (it turns out the the `#io_pwrite` hook
343
+ was being invoked with the GVL released), and [fixed
344
+ it](https://github.com/ruby/ruby/pull/15428). So now `#pwrite` works correctly
345
+ with a fiber scheduler!
343
346
 
344
347
  - I followed Samuel's suggestion and incorporated some debug logging into the
345
348
  extension code interfacing with liburing, in order to facilitate debugging
@@ -351,3 +354,117 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
351
354
  implications of that, but I'll try to make some time to check this against
352
355
  [TP2](https://github.com/noteflakes/tp2), a UringMachine-based web server I'm
353
356
  currently using in a bunch of projects.
357
+
358
+ # 2025-12-07
359
+
360
+ - I started looking at getting `#io_close` to work, and found out that Samuel
361
+ has already done the work, that is the code was already there, but was
362
+ commented out. Samuel explained that it was impossible to get it to work due
363
+ to the complexity of the implementation of `IO#close`, and indeed when I tried
364
+ it myself I saw that in fact it was just not possible the way the IO state is
365
+ managed when an IO is closed. I then had the idea that maybe we could pass the
366
+ underlying fd instead of the IO object itself to the `#io_close` hook. I tried
367
+ it and indeed it worked without any problems. The only issue is that this
368
+ breaks the convention where the different `io_xxx` hooks take an io as their
369
+ first argument. Nevertheless, I suggested this idea to Samuel and gladly he
370
+ accepted when he saw this is the only we can make this hook work. Samuel then
371
+ proceeded to prepare a [PR](https://github.com/ruby/ruby/pull/15434) and merge
372
+ it.
373
+
374
+ - Added the `#io_close` hook to the UringMachine fiber scheduler, as well as a
375
+ `#yield` hook for dealing with thread interrupts in response to another
376
+ [PR](https://github.com/ruby/ruby/pull/14700) by Samuel. I also added missing
377
+ docs for the different methods in the fiber scheduler.
378
+
379
+ # 2025-12-08
380
+
381
+ - Wrote a bunch of benchmarks for different scenarios comparing threads vs fiber
382
+ scheduler vs low-level UM implementation. The
383
+ [results](https://github.com/digital-fabric/uringmachine/blob/main/benchmark/README.md)
384
+ show the promise of UringMachine and of its fiber scheduler. What is great
385
+ about the fiber scheduler interface is that it provides a significant boost to
386
+ I/O-bound scenarios, with almost no change to the source code (basically, you
387
+ just need to replace `Thread.new` with `Fiber.schedule`).
388
+
389
+ These results, though preliminary, seem to validate the approach I took with
390
+ UringMachine - implementing a low-level API and tying it to the entire Ruby
391
+ ecosystem by way of the fiber scheduler interface.
392
+
393
+ - Spent the rest of the day writing lots of tests for the fiber scheduler. I
394
+ tried to cover the entire `IO` API - both class- and instance methods. I also
395
+ wrote some "integration" tests - different scenarios not unlike those in the
396
+ benchmarks, which exercise the different hooks in the fiber scheduler.
397
+
398
+ - Added some new APIs to help with testing: `UM#await_fibers` is a method for
399
+ waiting for one or more fibers to terminate. Unlike `UM#join`, it doesn't
400
+ return the return values of the given fibers, it just waits for them to
401
+ terminate. Another new API is `UM.socketpair`, which is like
402
+ `Socket.socketpair` except it returns raw fd's.
403
+
404
+ - Fixed a tricky bug that caused an occasional segmentation fault while running
405
+ benchmarks. Some fibers waiting an operation to complete were garbage
406
+ collected because there was no reference to them anywhere. I fixed this by
407
+ adding a map of pending fibers at the C-extension level and adding and
408
+ removing pending fibers from it automatically. I also added checking for
409
+ leaking fibers at the end of each test, so the UringMachine instance will not
410
+ hold onto fibers that have terminated.
411
+
412
+ # 2025-12-09
413
+
414
+ - Added the Async fiber scheduler to the different benchmarks. Also added an
415
+ SQPOLL mode to the benchmarks. Added a PG client benchmark.
416
+
417
+ - Fixed some small issues in the UM fiber scheduler and in the UM low-level API
418
+ implementation.
419
+
420
+ # 2025-12-10
421
+
422
+ - Refactored the benchmarks, abstracting away all the common code into a
423
+ common class that is then used in the different benchmarks. I also added a
424
+ test for ASync with an epoll selector. I'll try to find some time in the
425
+ coming days to update the results in the repo.
426
+
427
+ - Added and streamlined metrics that indicate the following:
428
+
429
+ - The ring size
430
+ - Total number of ops
431
+ - Total number of fiber switches
432
+ - Total number of waits for CQEs
433
+ - Current number of pending ops
434
+ - Current number of unsubmitted ops
435
+ - Current size of runqueue
436
+ - Current number of transient ops
437
+ - Current number of free ops
438
+
439
+ I also added some basic time measurements:
440
+
441
+ - Total CPU time
442
+ - Total time spent waiting for CQEs
443
+
444
+ These are off by default, but can be enabled by calling `UM#profile(true)`.
445
+ I'd like to do a lot more with profiling, like measuring the CPU time spent on
446
+ each fiber, but I'm a bit apprehensive of the performance costs involved, as
447
+ getting the `CLOCK_THREAD_CPUTIME_ID` clock is relatively slow, and then
448
+ managing this for each fiber means getting and setting a couple of instance
449
+ variables, which can *really* slow things down. On top of that, I'm not that
450
+ sure this is really needed.
451
+
452
+ - I went through some old benchmarks, reorganized them, get rid of some that
453
+ were irrelevant. There were some really interesting ones: a benchmark
454
+ measuring the cost of different ways of accessing an SQLite DB (using
455
+ [Extralite](https://github.com/digital-fabric/extralite/)): normally, using an
456
+ actor interface, or protected by a mutex. I'll try to follow up with a
457
+ benchmark measuring concurrent access to SQLite DBs, similar to the PG one.
458
+
459
+ Another interesting benchmark I found was one for resolving DNS addresses
460
+ using Ruby's builtin `Addrinfo` API, the bundled `resolv` gem, and a basic DNS
461
+ resolver included in UringMachine (I totally forgot I made one). Here too, I'd
462
+ like to add a benchmark to measure how these different solutions do in a
463
+ highly concurrent scenario.
464
+
465
+ - Thanks to one of these old benchmarks I made a change that more than doubled
466
+ the performance of `UM#snooze`. What this method does is it adds the current
467
+ fiber to the end of the runqueue, and yields control to the next fiber in the
468
+ runqueue, or to process available CQE's. This method is useful for testing,
469
+ but also for yielding control periodically when performing CPU-bound work, in
470
+ order to keep the application responsive and improve latency.
data/grant-2025/tasks.md CHANGED
@@ -5,16 +5,27 @@
5
5
  - [v] Add support for IO::Buffer in UM API.
6
6
  - [v] Add `UM::Error` class to be used instead of RuntimeError
7
7
  - [v] Add optional ring size argument to `UM.new` (for example, a the
8
- worker thread for the scheduler `blocking_operation_wait` hook does not need
9
- a lot of depth, so you can basically do `UM.new(4)`)
8
+ worker thread for the scheduler `blocking_operation_wait` hook does not need
9
+ a lot of depth, so you can basically do `UM.new(4)`)
10
10
  - [v] Add debugging code suggested by Samuel
11
11
  - [v] Add support for SQPOLL
12
12
  https://unixism.net/loti/tutorial/sq_poll.html
13
+ - [v] Add `UM.socketpair`
14
+
15
+ - [ ] Add more metrics
16
+ - [v] runqueue depth
17
+ - [v] number of pending fibers
18
+ - [v] ops: transient count, free count
19
+ - [v] total fiber switches, total waiting for CQEs
20
+ - [ ] watermark: ops_pending, ops_unsubmitted, ops_runqueue, ops_free, ops_transient
21
+ (only in profile mode)
22
+ - [ ] Performance tuning parameters
23
+ - [ ] max fiber switches before processing CQEs
24
+ - [ ] max fiber switches before submitting unsubmitted SQEs
25
+ - [ ] measure switches since last submitting / last CQE processing
13
26
 
14
27
  - [ ] Add support for using IO::Buffer in association with io_uring registered
15
28
  buffers / buffer rings
16
- - [ ] Set `IOSQE_CQE_SKIP_SUCCESS` flag for `#close_async` and `#write_async`
17
- - [ ] In `UM#spin` always start fibers as non-blocking.
18
29
  - [ ] Add some way to measure fiber CPU time.
19
30
  https://github.com/socketry/async/issues/428
20
31
 
@@ -59,8 +70,9 @@
59
70
  With worker count according to CPU count
60
71
  - [v] Test working with non-blocking files, it should be fine, and we shouldn't need to reset `O_NONBLOCK`.
61
72
  - [v] Implement timeouts (how do timeouts interact with blocking ops?)
62
- - [ ] Implement `#yield` hook (https://github.com/ruby/ruby/pull/14700)
63
- - [ ] Finish documentation for the `FiberScheduler` class.
73
+ - [v] Implement `#yield` hook (https://github.com/ruby/ruby/pull/14700)
74
+ - [v] Finish documentation for the `FiberScheduler` class
75
+ - [v] Implement `#io_close` hook
64
76
 
65
77
  - [v] tests:
66
78
  - [v] Wrap the scheduler interface such that we can verify that specific
@@ -73,17 +85,35 @@
73
85
  - [v] fork
74
86
  - [v] system / exec / etc.
75
87
  - [v] popen
76
- - [ ] "Integration tests"
77
- - [ ] queue: multiple concurrent readers / writers
78
- - [ ] net/http test: ad-hoc HTTP/1.1 server + `Net::HTTP` client
79
- - [ ] sockets: echo server + many clients
80
- - [ ] IO - all methods!
88
+ - [v] "Integration tests"
89
+ - [v] IO - all methods!
90
+ - [v] queue: multiple concurrent readers / writers
91
+ - [v] net/http test: ad-hoc HTTP/1.1 server + `Net::HTTP` client
92
+ - [v] pipes: multiple pairs of fibers - reader / writer
93
+ - [v] sockets: echo server + many clients
81
94
 
82
95
  - [ ] Benchmarks
83
- - [ ] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
84
- - [ ] UM mutex / Ruby mutex (threads) / Ruby mutex with UM fiber scheduler
85
- - [ ] Pipe IO raw UM / Ruby threaded / Ruby with UM fiber scheduler
86
- - [ ] Socket IO (with socketpair) raw UM / Ruby threaded / Ruby with UM fiber scheduler
96
+ - [v] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
97
+
98
+ N groups where each group has M producers and O consumers accessing the same queue.
99
+
100
+ - [v] UM mutex / Ruby mutex (threads) / Ruby mutex with UM fiber scheduler
101
+
102
+ - [v] N groups where each group has M fibers locking the same mutex and
103
+ performing CPU-bound work
104
+ - [v] N groups where each group has M fibers locking the same mutex and
105
+ performing IO-bound work (write to a file)
106
+
107
+ - [v] Pipe IO raw UM / Ruby threaded / Ruby with UM fiber scheduler
108
+
109
+ N groups where each group has a pair of reader / writer to a pipe
110
+
111
+ - [v] Socket IO (with socketpair) raw UM / Ruby threaded / Ruby with UM fiber scheduler
112
+
113
+ N groups where each group has a pair of reader / writer to a socketpair
114
+
115
+ - [v] Postgres test
116
+
87
117
  - [ ] Measure CPU (thread) time usage for above examples
88
118
 
89
119
  - run each version 1M times
@@ -94,14 +124,10 @@
94
124
  cpu_time = Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID)
95
125
  ```
96
126
 
97
- - my hunch is we'll be able to show with io_uring real_time is less,
98
- while cpu_time is more. But it's just a hunch.
99
-
100
127
  - [ ] Ruby Fiber::Scheduler interface
101
- - [ ] Make a PR for resetting the scheduler and resetting the fiber non-blocking flag.
102
- - [ ] Missing hook for close
103
- - [ ] Missing hooks for send/recv/sendmsg/recvmsg
104
- - [ ] Writes to a file (including `IO.write`) do not invoke `#io_write` (because writes to files cannot be non-blocking?) Instead, `blocking_operation_wait` is invoked.
128
+ - [v] Make a PR for resetting the scheduler and resetting the fiber non-blocking flag.
129
+ - [v] hook for close
130
+ - [ ] hooks for send/recv/sendmsg/recvmsg
105
131
 
106
132
  - [ ] SSL
107
133
  - [ ] openssl gem: custom BIO?
@@ -24,6 +24,8 @@ class UringMachine
24
24
  end
25
25
 
26
26
  class Actor < Fiber
27
+ class Stop < UM::Error; end
28
+
27
29
  def run(machine, target, mailbox)
28
30
  @machine = machine
29
31
  @target = target
@@ -31,6 +33,8 @@ class UringMachine
31
33
  while (msg = machine.shift(mailbox))
32
34
  process_message(msg)
33
35
  end
36
+ rescue Stop
37
+ # stopped
34
38
  ensure
35
39
  @target.teardown if @target.respond_to?(:teardown)
36
40
  end
@@ -45,6 +49,10 @@ class UringMachine
45
49
  @machine.shift response_mailbox
46
50
  end
47
51
 
52
+ def stop
53
+ @machine.schedule(self, Stop.new)
54
+ end
55
+
48
56
  private
49
57
 
50
58
  def process_message(msg)