RubyGems - uringmachine - Versions diffs - 0.21.0 → 0.22.0 - Mend

uringmachine 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -0
data/CHANGELOG.md +14 -0
data/TODO.md +144 -0
data/benchmark/README.md +173 -0
data/benchmark/bm_io_pipe.rb +70 -0
data/benchmark/bm_io_socketpair.rb +71 -0
data/benchmark/bm_mutex_cpu.rb +57 -0
data/benchmark/bm_mutex_io.rb +64 -0
data/benchmark/bm_pg_client.rb +109 -0
data/benchmark/bm_queue.rb +76 -0
data/benchmark/chart.png +0 -0
data/benchmark/common.rb +135 -0
data/benchmark/dns_client.rb +47 -0
data/{examples/bm_http_parse.rb → benchmark/http_parse.rb} +1 -1
data/benchmark/run_bm.rb +8 -0
data/benchmark/sqlite.rb +108 -0
data/{examples/bm_write.rb → benchmark/write.rb} +4 -4
data/ext/um/um.c +189 -100
data/ext/um/um.h +36 -10
data/ext/um/um_async_op.c +1 -1
data/ext/um/um_class.c +87 -13
data/ext/um/um_op.c +6 -0
data/ext/um/um_sync.c +2 -2
data/ext/um/um_utils.c +16 -0
data/grant-2025/journal.md +118 -1
data/grant-2025/tasks.md +48 -22
data/lib/uringmachine/actor.rb +8 -0
data/lib/uringmachine/dns_resolver.rb +1 -2
data/lib/uringmachine/fiber_scheduler.rb +127 -81
data/lib/uringmachine/version.rb +1 -1
data/lib/uringmachine.rb +32 -3
data/test/helper.rb +7 -18
data/test/test_actor.rb +12 -3
data/test/test_async_op.rb +10 -10
data/test/test_fiber.rb +84 -1
data/test/test_fiber_scheduler.rb +950 -47
data/test/test_um.rb +297 -120
data/uringmachine.gemspec +2 -1
metadata +38 -16
data/examples/bm_fileno.rb +0 -33
data/examples/bm_queue.rb +0 -111
data/examples/bm_side_running.rb +0 -83
data/examples/bm_sqlite.rb +0 -89
data/examples/dns_client.rb +0 -12
/data/{examples/bm_mutex.rb → benchmark/mutex.rb} +0 -0
/data/{examples/bm_mutex_single.rb → benchmark/mutex_single.rb} +0 -0
/data/{examples/bm_send.rb → benchmark/send.rb} +0 -0
/data/{examples/bm_snooze.rb → benchmark/snooze.rb} +0 -0

data/ext/um/um.h CHANGED Viewed

@@ -4,7 +4,6 @@
 #include <ruby.h>
 #include <liburing.h>
 // debugging
 enum {
   // set to 1 to enable debug logging
@@ -18,6 +17,10 @@ enum {
 #define TRACE_FREE(ptr) //printf("Free %p %s:%d\n", ptr, __FILE__, __LINE__)
 #define DEBUG_MARK(machine, markv, msg) \
   if (machine->mark == markv) printf("%s\n", msg);
+#define DEBUG_PRINTF(...) \
+  if (DEBUG) fprintf(stderr, __VA_ARGS__)
+#define SYM_DEF(name) SYM_#name = ID2SYM(rb_intern("#name"))
 // branching
 #ifndef unlikely
@@ -32,6 +35,7 @@ enum {
   (TYPE(buffer) == RUBY_T_DATA) && rb_obj_is_instance_of(buffer, rb_cIOBuffer)
 enum um_op_kind {
+  OP_UNDEFINED,
   OP_TIMEOUT,
   OP_SCHEDULE,
@@ -121,6 +125,22 @@ struct buf_ring_descriptor {
 	void *buf_base;
 };
+struct um_metrics {
+  ulong total_ops;        // total ops submitted
+  ulong total_switches;   // total fiber switches
+  ulong total_waits;      // total number of CQE waits
+  uint ops_pending;       // number of pending ops
+  uint ops_unsubmitted;   // number of unsubmitted
+  uint ops_runqueue;      // number of ops in runqueue
+  uint ops_free;          // number of ops in freelist
+  uint ops_transient;     // number of ops in transient list
+  double time_total_wait; // total CPU time waiting for CQEs
+  double time_last_cpu; // last seen time stamp
+  double time_first_cpu; // last seen time stamp
+};
 #define BUFFER_RING_MAX_COUNT 10
 struct um {
@@ -133,17 +153,19 @@ struct um {
   uint ring_initialized; // is the ring initialized successfully
   uint mark; // used to mark instances for debugging
-  uint unsubmitted_count; // number of unsubmitted SQEs pending
-  uint pending_count; // number of pending operations (i.e. not yet completed)
+  struct um_metrics metrics;
+  int profile_mode;
   uint buffer_ring_count; // number of registered buffer rings
-  ulong total_op_count; // total number of operations submitted since ring was initialized
-  uint entries; // number of entries in SQ
+  uint size; // size of SQ
   uint sqpoll_mode; // SQPOLL mode enabled
   struct buf_ring_descriptor buffer_rings[BUFFER_RING_MAX_COUNT];
-  struct um_op *transient_head;
+  struct um_op *transient_head; // list of pending transient ops
+  VALUE pending_fibers; // hash containing pending fibers
   struct um_op *runqueue_head;
   struct um_op *runqueue_tail;
@@ -203,9 +225,11 @@ extern VALUE cAsyncOp;
 extern VALUE eStreamRESPError;
 struct um *um_get_machine(VALUE self);
-void um_setup(VALUE self, struct um *machine, uint entries, uint sqpoll_timeout_msec);
+void um_setup(VALUE self, struct um *machine, uint size, uint sqpoll_timeout_msec);
 void um_teardown(struct um *machine);
+VALUE um_metrics(struct um *machine, struct um_metrics *metrics);
 const char * um_op_kind_name(enum um_op_kind kind);
 struct um_op *um_op_alloc(struct um *machine);
 void um_op_free(struct um *machine, struct um_op *op);
@@ -227,6 +251,8 @@ void um_free_buffer_linked_list(struct um *machine);
 struct __kernel_timespec um_double_to_timespec(double value);
 double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec);
+double um_get_time_cpu();
+double um_get_time_monotonic();
 int um_value_is_exception_p(VALUE v);
 VALUE um_raise_exception(VALUE v);
@@ -244,8 +270,8 @@ void um_add_strings_to_buffer_ring(struct um *machine, int bgid, VALUE strings);
 struct io_uring_sqe *um_get_sqe(struct um *machine, struct um_op *op);
 uint um_submit(struct um *machine);
-VALUE um_fiber_switch(struct um *machine);
-VALUE um_await(struct um *machine);
+VALUE um_yield(struct um *machine);
+VALUE um_switch(struct um *machine);
 VALUE um_wakeup(struct um *machine);
 void um_cancel_op(struct um *machine, struct um_op *op);
 void um_cancel_and_wait(struct um *machine, struct um_op *op);

data/ext/um/um_async_op.c CHANGED Viewed

@@ -26,7 +26,7 @@ VALUE um_async_op_await(struct um_async_op *async_op) {
   RB_OBJ_WRITE(async_op->machine->self, &async_op->op->fiber, rb_fiber_current());
   async_op->op->flags &= ~OP_F_ASYNC;
-  VALUE ret = um_fiber_switch(async_op->machine);
+  VALUE ret = um_switch(async_op->machine);
   if (!um_op_completed_p(async_op->op))
     um_cancel_and_wait(async_op->machine, async_op->op);

data/ext/um/um_class.c CHANGED Viewed

@@ -3,15 +3,29 @@
 #include <ruby/io.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+#include <sys/socket.h>
 VALUE cUM;
 VALUE eUMError;
+VALUE SYM_size;
+VALUE SYM_total_ops;
+VALUE SYM_total_switches;
+VALUE SYM_total_waits;
+VALUE SYM_ops_pending;
+VALUE SYM_ops_unsubmitted;
+VALUE SYM_ops_runqueue;
+VALUE SYM_ops_free;
+VALUE SYM_ops_transient;
+VALUE SYM_time_total_cpu;
+VALUE SYM_time_total_wait;
 static ID id_fileno;
 static void UM_mark(void *ptr) {
   struct um *machine = ptr;
   rb_gc_mark_movable(machine->self);
+  rb_gc_mark_movable(machine->pending_fibers);
   um_op_list_mark(machine, machine->transient_head);
   um_op_list_mark(machine, machine->runqueue_head);
@@ -20,6 +34,7 @@ static void UM_mark(void *ptr) {
 static void UM_compact(void *ptr) {
   struct um *machine = ptr;
   machine->self = rb_gc_location(machine->self);
+  machine->pending_fibers = rb_gc_location(machine->pending_fibers);
   um_op_list_compact(machine, machine->transient_head);
   um_op_list_compact(machine, machine->runqueue_head);
@@ -90,9 +105,9 @@ VALUE UM_setup_buffer_ring(VALUE self, VALUE size, VALUE count) {
   return INT2NUM(bgid);
 }
-VALUE UM_entries(VALUE self) {
+VALUE UM_size(VALUE self) {
   struct um *machine = um_get_machine(self);
-  return UINT2NUM(machine->entries);
+  return UINT2NUM(machine->size);
 }
 VALUE UM_mark_m(VALUE self, VALUE mark) {
@@ -101,25 +116,51 @@ VALUE UM_mark_m(VALUE self, VALUE mark) {
   return self;
 }
-VALUE UM_pending_count(VALUE self) {
+VALUE UM_metrics(VALUE self) {
   struct um *machine = um_get_machine(self);
-  return UINT2NUM(machine->pending_count);
+  return um_metrics(machine, &machine->metrics);
 }
-VALUE UM_total_op_count(VALUE self) {
+VALUE UM_profile_p(VALUE self) {
   struct um *machine = um_get_machine(self);
-  return UINT2NUM(machine->total_op_count);
+  return machine->profile_mode ? Qtrue : Qfalse;
+}
+VALUE UM_profile_set(VALUE self, VALUE value) {
+  struct um *machine = um_get_machine(self);
+  machine->profile_mode = RTEST(value);
+  if (machine->profile_mode) {
+    machine->metrics.time_total_wait = 0.0;
+    machine->metrics.time_last_cpu = machine->metrics.time_first_cpu = um_get_time_cpu();
+  }
+  return value;
 }
 VALUE UM_snooze(VALUE self) {
   struct um *machine = um_get_machine(self);
   um_schedule(machine, rb_fiber_current(), Qnil);
-  return um_await(machine);
+  // the current fiber is already scheduled, and the runqueue is GC-marked, so
+  // we can safely call um_switch, which is faster than calling um_yield.
+  VALUE ret = um_switch(machine);
+  RAISE_IF_EXCEPTION(ret);
+  return ret;
 }
 VALUE UM_yield(VALUE self) {
   struct um *machine = um_get_machine(self);
-  return um_await(machine);
+  VALUE ret = um_yield(machine);
+  RAISE_IF_EXCEPTION(ret);
+  return ret;
+}
+VALUE UM_switch(VALUE self) {
+  struct um *machine = um_get_machine(self);
+  VALUE ret = um_switch(machine);
+  RAISE_IF_EXCEPTION(ret);
+  return ret;
 }
 VALUE UM_wakeup(VALUE self) {
@@ -133,6 +174,11 @@ VALUE UM_submit(VALUE self) {
   return UINT2NUM(ret);
 }
+VALUE UM_pending_fibers(VALUE self) {
+  struct um *machine = um_get_machine(self);
+  return machine->pending_fibers;
+}
 VALUE UM_schedule(VALUE self, VALUE fiber, VALUE value) {
   struct um *machine = um_get_machine(self);
   um_schedule(machine, fiber, value);
@@ -428,6 +474,17 @@ VALUE UM_pipe(VALUE self) {
   return rb_ary_new_from_args(2, INT2NUM(fds[0]), INT2NUM(fds[1]));
 }
+VALUE UM_socketpair(VALUE self, VALUE domain, VALUE type, VALUE protocol) {
+  int fds[2];
+  int ret = socketpair(NUM2INT(domain), NUM2INT(type), NUM2INT(protocol), fds);
+  if (ret) {
+    int e = errno;
+    rb_syserr_fail(e, strerror(e));
+  }
+  return rb_ary_new_from_args(2, INT2NUM(fds[0]), INT2NUM(fds[1]));
+}
 VALUE UM_pidfd_open(VALUE self, VALUE pid) {
   int fd = syscall(SYS_pidfd_open, NUM2INT(pid), 0);
   if (fd == -1) {
@@ -483,7 +540,7 @@ VALUE UM_kernel_version(VALUE self) {
 }
 VALUE UM_debug(VALUE self, VALUE str) {
-  printf("%s\n", StringValueCStr(str));
+  fprintf(stderr, "%s\n", StringValueCStr(str));
   return Qnil;
 }
@@ -494,13 +551,16 @@ void Init_UM(void) {
   rb_define_alloc_func(cUM, UM_allocate);
   rb_define_method(cUM, "initialize", UM_initialize, -1);
-  rb_define_method(cUM, "entries", UM_entries, 0);
+  rb_define_method(cUM, "size", UM_size, 0);
   rb_define_method(cUM, "mark", UM_mark_m, 1);
-  rb_define_method(cUM, "pending_count", UM_pending_count, 0);
-  rb_define_method(cUM, "total_op_count", UM_total_op_count, 0);
+  rb_define_method(cUM, "metrics", UM_metrics, 0);
+  rb_define_method(cUM, "profile?", UM_profile_p, 0);
+  rb_define_method(cUM, "profile", UM_profile_set, 1);
   rb_define_method(cUM, "setup_buffer_ring", UM_setup_buffer_ring, 2);
   rb_define_singleton_method(cUM, "pipe", UM_pipe, 0);
+  rb_define_singleton_method(cUM, "socketpair", UM_socketpair, 3);
   rb_define_singleton_method(cUM, "pidfd_open", UM_pidfd_open, 1);
   rb_define_singleton_method(cUM, "pidfd_send_signal", UM_pidfd_send_signal, 2);
@@ -513,8 +573,10 @@ void Init_UM(void) {
   rb_define_method(cUM, "snooze", UM_snooze, 0);
   rb_define_method(cUM, "timeout", UM_timeout, 2);
   rb_define_method(cUM, "yield", UM_yield, 0);
+  rb_define_method(cUM, "switch", UM_switch, 0);
   rb_define_method(cUM, "wakeup", UM_wakeup, 0);
   rb_define_method(cUM, "submit", UM_submit, 0);
+  rb_define_method(cUM, "pending_fibers", UM_pending_fibers, 0);
   rb_define_method(cUM, "close", UM_close, 1);
   rb_define_method(cUM, "close_async", UM_close_async, 1);
@@ -530,7 +592,7 @@ void Init_UM(void) {
   rb_define_method(cUM, "poll", UM_poll, 2);
   rb_define_method(cUM, "select", UM_select, 3);
   rb_define_method(cUM, "waitid", UM_waitid, 3);
   #ifdef HAVE_RB_PROCESS_STATUS_NEW
   rb_define_method(cUM, "waitid_status", UM_waitid_status, 3);
   #endif
@@ -562,5 +624,17 @@ void Init_UM(void) {
   um_define_net_constants(cUM);
+  SYM_size =            ID2SYM(rb_intern("size"));
+  SYM_total_ops =       ID2SYM(rb_intern("total_ops"));
+  SYM_total_switches =  ID2SYM(rb_intern("total_switches"));
+  SYM_total_waits =     ID2SYM(rb_intern("total_waits"));
+  SYM_ops_pending =     ID2SYM(rb_intern("ops_pending"));
+  SYM_ops_unsubmitted = ID2SYM(rb_intern("ops_unsubmitted"));
+  SYM_ops_runqueue =    ID2SYM(rb_intern("ops_runqueue"));
+  SYM_ops_free =        ID2SYM(rb_intern("ops_free"));
+  SYM_ops_transient =   ID2SYM(rb_intern("ops_transient"));
+  SYM_time_total_cpu =  ID2SYM(rb_intern("time_total_cpu"));
+  SYM_time_total_wait = ID2SYM(rb_intern("time_total_wait"));
   id_fileno = rb_intern_const("fileno");
 }

data/ext/um/um_op.c CHANGED Viewed

@@ -50,6 +50,7 @@ inline void um_op_transient_add(struct um *machine, struct um_op *op) {
     machine->transient_head->prev = op;
   }
   machine->transient_head = op;
+  machine->metrics.ops_transient++;
 }
 inline void um_op_transient_remove(struct um *machine, struct um_op *op) {
@@ -60,6 +61,7 @@ inline void um_op_transient_remove(struct um *machine, struct um_op *op) {
   if (machine->transient_head == op)
     machine->transient_head = op->next;
+  machine->metrics.ops_transient--;
 }
 inline void um_runqueue_push(struct um *machine, struct um_op *op) {
@@ -71,6 +73,7 @@ inline void um_runqueue_push(struct um *machine, struct um_op *op) {
   else
     machine->runqueue_head = machine->runqueue_tail = op;
   op->next = NULL;
+  machine->metrics.ops_runqueue++;
 }
 inline struct um_op *um_runqueue_shift(struct um *machine) {
@@ -80,6 +83,7 @@ inline struct um_op *um_runqueue_shift(struct um *machine) {
   machine->runqueue_head = op->next;
   if (!machine->runqueue_head)
     machine->runqueue_tail = NULL;
+  machine->metrics.ops_runqueue--;
   return op;
 }
@@ -152,6 +156,7 @@ inline struct um_op *um_op_alloc(struct um *machine) {
   if (machine->op_freelist) {
     struct um_op *op = machine->op_freelist;
     machine->op_freelist = op->next;
+    machine->metrics.ops_free--;
     return op;
   }
   return malloc(sizeof(struct um_op));
@@ -160,4 +165,5 @@ inline struct um_op *um_op_alloc(struct um *machine) {
 inline void um_op_free(struct um *machine, struct um_op *op) {
   op->next = machine->op_freelist;
   machine->op_freelist = op;
+  machine->metrics.ops_free++;
 }

data/ext/um/um_sync.c CHANGED Viewed

@@ -13,7 +13,7 @@ void um_futex_wait(struct um *machine, uint32_t *futex, uint32_t value) {
     sqe, (uint32_t *)futex, value, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
   );
-  VALUE ret = um_fiber_switch(machine);
+  VALUE ret = um_yield(machine);
   if (!um_op_completed_p(&op))
     um_cancel_and_wait(machine, &op);
   else {
@@ -33,7 +33,7 @@ void um_futex_wake(struct um *machine, uint32_t *futex, uint32_t num_waiters) {
     sqe, (uint32_t *)futex, num_waiters, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
   );
-  VALUE ret = um_fiber_switch(machine);
+  VALUE ret = um_yield(machine);
   um_check_completion(machine, &op);
   RAISE_IF_EXCEPTION(ret);

data/ext/um/um_utils.c CHANGED Viewed

@@ -2,6 +2,7 @@
 #include <sys/mman.h>
 #include <stdlib.h>
 #include <ruby/io/buffer.h>
+#include <time.h>
 inline struct __kernel_timespec um_double_to_timespec(double value) {
   double integral;
@@ -16,6 +17,21 @@ inline double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec) {
   return (double)tv_sec + ((double)tv_nsec) / 1000000000;
 }
+inline double um_get_time_cpu() {
+  struct timespec ts;
+  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts)) return -1.0;
+  return um_timestamp_to_double(ts.tv_sec, ts.tv_nsec);
+}
+inline double um_get_time_monotonic() {
+  struct timespec ts;
+  if (clock_gettime(CLOCK_MONOTONIC, &ts)) return -1.0;
+  return um_timestamp_to_double(ts.tv_sec, ts.tv_nsec);
+}
 #define RAISE_EXCEPTION(e) rb_funcall(e, ID_invoke, 0);
 inline int um_value_is_exception_p(VALUE v) {

data/grant-2025/journal.md CHANGED Viewed

@@ -339,7 +339,10 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
 # 2025-12-06
-- Samuel has found the issue with pwrite (it turns out the the `#io_pwrite` hook was being invoked with the GVL released), and [fixed it](https://github.com/ruby/ruby/pull/15428). So now `#pwrite` works correctly with a fiber scheduler!
+- Samuel has found the issue with pwrite (it turns out the the `#io_pwrite` hook
+  was being invoked with the GVL released), and [fixed
+  it](https://github.com/ruby/ruby/pull/15428). So now `#pwrite` works correctly
+  with a fiber scheduler!
 - I followed Samuel's suggestion and incorporated some debug logging into the
   extension code interfacing with liburing, in order to facilitate debugging
@@ -351,3 +354,117 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
   implications of that, but I'll try to make some time to check this against
   [TP2](https://github.com/noteflakes/tp2), a UringMachine-based web server I'm
   currently using in a bunch of projects.
+# 2025-12-07
+- I started looking at getting `#io_close` to work, and found out that Samuel
+  has already done the work, that is the code was already there, but was
+  commented out. Samuel explained that it was impossible to get it to work due
+  to the complexity of the implementation of `IO#close`, and indeed when I tried
+  it myself I saw that in fact it was just not possible the way the IO state is
+  managed when an IO is closed. I then had the idea that maybe we could pass the
+  underlying fd instead of the IO object itself to the `#io_close` hook. I tried
+  it and indeed it worked without any problems. The only issue is that this
+  breaks the convention where the different `io_xxx` hooks take an io as their
+  first argument. Nevertheless, I suggested this idea to Samuel and gladly he
+  accepted when he saw this is the only we can make this hook work. Samuel then
+  proceeded to prepare a [PR](https://github.com/ruby/ruby/pull/15434) and merge
+  it.
+- Added the `#io_close` hook to the UringMachine fiber scheduler, as well as a
+  `#yield` hook for dealing with thread interrupts in response to another
+  [PR](https://github.com/ruby/ruby/pull/14700) by Samuel. I also added missing
+  docs for the different methods in the fiber scheduler.
+# 2025-12-08
+- Wrote a bunch of benchmarks for different scenarios comparing threads vs fiber
+  scheduler vs low-level UM implementation. The
+  [results](https://github.com/digital-fabric/uringmachine/blob/main/benchmark/README.md)
+  show the promise of UringMachine and of its fiber scheduler. What is great
+  about the fiber scheduler interface is that it provides a significant boost to
+  I/O-bound scenarios, with almost no change to the source code (basically, you
+  just need to replace `Thread.new` with `Fiber.schedule`).
+  These results, though preliminary, seem to validate the approach I took with
+  UringMachine - implementing a low-level API and tying it to the entire Ruby
+  ecosystem by way of the fiber scheduler interface.
+- Spent the rest of the day writing lots of tests for the fiber scheduler. I
+  tried to cover the entire `IO` API - both class- and instance methods. I also
+  wrote some "integration" tests - different scenarios not unlike those in the
+  benchmarks, which exercise the different hooks in the fiber scheduler.
+- Added some new APIs to help with testing: `UM#await_fibers` is a method for
+  waiting for one or more fibers to terminate. Unlike `UM#join`, it doesn't
+  return the return values of the given fibers, it just waits for them to
+  terminate. Another new API is `UM.socketpair`, which is like
+  `Socket.socketpair` except it returns raw fd's.
+- Fixed a tricky bug that caused an occasional segmentation fault while running
+  benchmarks. Some fibers waiting an operation to complete were garbage
+  collected because there was no reference to them anywhere. I fixed this by
+  adding a map of pending fibers at the C-extension level and adding and
+  removing pending fibers from it automatically. I also added checking for
+  leaking fibers at the end of each test, so the UringMachine instance will not
+  hold onto fibers that have terminated.
+# 2025-12-09
+- Added the Async fiber scheduler to the different benchmarks. Also added an
+  SQPOLL mode to the benchmarks. Added a PG client benchmark.
+- Fixed some small issues in the UM fiber scheduler and in the UM low-level API
+  implementation.
+# 2025-12-10
+- Refactored the benchmarks, abstracting away all the common code into a
+  common class that is then used in the different benchmarks. I also added a
+  test for ASync with an epoll selector. I'll try to find some time in the
+  coming days to update the results in the repo.
+- Added and streamlined metrics that indicate the following:
+  - The ring size
+  - Total number of ops
+  - Total number of fiber switches
+  - Total number of waits for CQEs
+  - Current number of pending ops
+  - Current number of unsubmitted ops
+  - Current size of runqueue
+  - Current number of transient ops
+  - Current number of free ops
+  I also added some basic time measurements:
+  - Total CPU time
+  - Total time spent waiting for CQEs
+  These are off by default, but can be enabled by calling `UM#profile(true)`.
+  I'd like to do a lot more with profiling, like measuring the CPU time spent on
+  each fiber, but I'm a bit apprehensive of the performance costs involved, as
+  getting the `CLOCK_THREAD_CPUTIME_ID` clock is relatively slow, and then
+  managing this for each fiber means getting and setting a couple of instance
+  variables, which can *really* slow things down. On top of that, I'm not that
+  sure this is really needed.
+- I went through some old benchmarks, reorganized them, get rid of some that
+  were irrelevant. There were some really interesting ones: a benchmark
+  measuring the cost of different ways of accessing an SQLite DB (using
+  [Extralite](https://github.com/digital-fabric/extralite/)): normally, using an
+  actor interface, or protected by a mutex. I'll try to follow up with a
+  benchmark measuring concurrent access to SQLite DBs, similar to the PG one.
+  Another interesting benchmark I found was one for resolving DNS addresses
+  using Ruby's builtin `Addrinfo` API, the bundled `resolv` gem, and a basic DNS
+  resolver included in UringMachine (I totally forgot I made one). Here too, I'd
+  like to add a benchmark to measure how these different solutions do in a
+  highly concurrent scenario.
+- Thanks to one of these old benchmarks I made a change that more than doubled
+  the performance of `UM#snooze`. What this method does is it adds the current
+  fiber to the end of the runqueue, and yields control to the next fiber in the
+  runqueue, or to process available CQE's. This method is useful for testing,
+  but also for yielding control periodically when performing CPU-bound work, in
+  order to keep the application responsive and improve latency.

data/grant-2025/tasks.md CHANGED Viewed

@@ -5,16 +5,27 @@
   - [v] Add support for IO::Buffer in UM API.
   - [v] Add `UM::Error` class to be used instead of RuntimeError
   - [v] Add optional ring size argument to `UM.new` (for example, a the
-    worker thread for the scheduler `blocking_operation_wait` hook does not need
-    a lot of depth, so you can basically do `UM.new(4)`)
+        worker thread for the scheduler `blocking_operation_wait` hook does not need
+        a lot of depth, so you can basically do `UM.new(4)`)
   - [v] Add debugging code suggested by Samuel
   - [v] Add support for SQPOLL
         https://unixism.net/loti/tutorial/sq_poll.html
+  - [v] Add `UM.socketpair`
+  - [ ] Add more metrics
+    - [v] runqueue depth
+    - [v] number of pending fibers
+    - [v] ops: transient count, free count
+    - [v] total fiber switches, total waiting for CQEs
+    - [ ] watermark: ops_pending, ops_unsubmitted, ops_runqueue, ops_free, ops_transient
+          (only in profile mode)
+  - [ ] Performance tuning parameters
+    - [ ] max fiber switches before processing CQEs
+    - [ ] max fiber switches before submitting unsubmitted SQEs
+    - [ ] measure switches since last submitting / last CQE processing
   - [ ] Add support for using IO::Buffer in association with io_uring registered
     buffers / buffer rings
-  - [ ] Set `IOSQE_CQE_SKIP_SUCCESS` flag for `#close_async` and `#write_async`
-  - [ ] In `UM#spin` always start fibers as non-blocking.
   - [ ] Add some way to measure fiber CPU time.
         https://github.com/socketry/async/issues/428
@@ -59,8 +70,9 @@
         With worker count according to CPU count
   - [v] Test working with non-blocking files, it should be fine, and we shouldn't need to reset `O_NONBLOCK`.
   - [v] Implement timeouts (how do timeouts interact with blocking ops?)
-  - [ ] Implement `#yield` hook (https://github.com/ruby/ruby/pull/14700)
-  - [ ] Finish documentation for the `FiberScheduler` class.
+  - [v] Implement `#yield` hook (https://github.com/ruby/ruby/pull/14700)
+  - [v] Finish documentation for the `FiberScheduler` class
+  - [v] Implement `#io_close` hook
   - [v] tests:
     - [v] Wrap the scheduler interface such that we can verify that specific
@@ -73,17 +85,35 @@
     - [v] fork
     - [v] system / exec / etc.
     - [v] popen
-  - [ ] "Integration tests"
-    - [ ] queue: multiple concurrent readers / writers
-    - [ ] net/http test: ad-hoc HTTP/1.1 server + `Net::HTTP` client
-    - [ ] sockets: echo server + many clients
-    - [ ] IO - all methods!
+  - [v] "Integration tests"
+    - [v] IO - all methods!
+    - [v] queue: multiple concurrent readers / writers
+    - [v] net/http test: ad-hoc HTTP/1.1 server + `Net::HTTP` client
+    - [v] pipes: multiple pairs of fibers - reader / writer
+    - [v] sockets: echo server + many clients
   - [ ] Benchmarks
-    - [ ] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
-    - [ ] UM mutex / Ruby mutex (threads) / Ruby mutex with UM fiber scheduler
-    - [ ] Pipe IO raw UM / Ruby threaded / Ruby with UM fiber scheduler
-    - [ ] Socket IO (with socketpair) raw UM / Ruby threaded / Ruby with UM fiber scheduler
+    - [v] UM queue / Ruby queue (threads) / Ruby queue with UM fiber scheduler
+          N groups where each group has M producers and O consumers accessing the same queue.
+    - [v] UM mutex / Ruby mutex (threads) / Ruby mutex with UM fiber scheduler
+      - [v] N groups where each group has M fibers locking the same mutex and
+            performing CPU-bound work
+      - [v] N groups where each group has M fibers locking the same mutex and
+            performing IO-bound work (write to a file)
+    - [v] Pipe IO raw UM / Ruby threaded / Ruby with UM fiber scheduler
+          N groups where each group has a pair of reader / writer to a pipe
+    - [v] Socket IO (with socketpair) raw UM / Ruby threaded / Ruby with UM fiber scheduler
+          N groups where each group has a pair of reader / writer to a socketpair
+    - [v] Postgres test
     - [ ] Measure CPU (thread) time usage for above examples
         - run each version 1M times
@@ -94,14 +124,10 @@
         cpu_time = Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID)
         ```
-        - my hunch is we'll be able to show with io_uring real_time is less,
-          while cpu_time is more. But it's just a hunch.
 - [ ] Ruby Fiber::Scheduler interface
-  - [ ] Make a PR for resetting the scheduler and resetting the fiber non-blocking flag.
-  - [ ] Missing hook for close
-  - [ ] Missing hooks for send/recv/sendmsg/recvmsg
-  - [ ] Writes to a file (including `IO.write`) do not invoke `#io_write` (because writes to files cannot be non-blocking?) Instead, `blocking_operation_wait` is invoked.
+  - [v] Make a PR for resetting the scheduler and resetting the fiber non-blocking flag.
+  - [v] hook for close
+  - [ ] hooks for send/recv/sendmsg/recvmsg
 - [ ] SSL
   - [ ] openssl gem: custom BIO?

data/lib/uringmachine/actor.rb CHANGED Viewed

@@ -24,6 +24,8 @@ class UringMachine
   end
   class Actor < Fiber
+    class Stop < UM::Error; end
     def run(machine, target, mailbox)
       @machine = machine
       @target = target
@@ -31,6 +33,8 @@ class UringMachine
       while (msg = machine.shift(mailbox))
         process_message(msg)
       end
+    rescue Stop
+      # stopped
     ensure
       @target.teardown if @target.respond_to?(:teardown)
     end
@@ -45,6 +49,10 @@ class UringMachine
       @machine.shift response_mailbox
     end
+    def stop
+      @machine.schedule(self, Stop.new)
+    end
     private
     def process_message(msg)