uringmachine 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +3 -4
- data/CHANGELOG.md +20 -0
- data/TODO.md +0 -38
- data/examples/bm_queue.rb +2 -1
- data/examples/bm_write.rb +4 -1
- data/ext/um/extconf.rb +1 -1
- data/ext/um/um.c +269 -49
- data/ext/um/um.h +48 -21
- data/ext/um/um_async_op.c +1 -1
- data/ext/um/um_class.c +89 -13
- data/ext/um/um_op.c +37 -0
- data/ext/um/um_sync.c +8 -14
- data/grant-2025/journal.md +125 -1
- data/grant-2025/tasks.md +102 -33
- data/lib/uringmachine/fiber_scheduler.rb +191 -64
- data/lib/uringmachine/version.rb +1 -1
- data/test/test_fiber_scheduler.rb +519 -17
- data/test/test_um.rb +298 -23
- data/uringmachine.gemspec +5 -5
- data/vendor/liburing/src/include/liburing/io_uring.h +1 -0
- data/vendor/liburing/src/include/liburing.h +13 -0
- data/vendor/liburing/src/liburing-ffi.map +1 -0
- data/vendor/liburing/test/bind-listen.c +175 -13
- data/vendor/liburing/test/read-write.c +4 -4
- data/vendor/liburing/test/ringbuf-read.c +4 -4
- data/vendor/liburing/test/send_recv.c +8 -7
- metadata +15 -15
data/ext/um/um.h
CHANGED
|
@@ -4,12 +4,20 @@
|
|
|
4
4
|
#include <ruby.h>
|
|
5
5
|
#include <liburing.h>
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
// debugging
|
|
9
|
+
enum {
|
|
10
|
+
// set to 1 to enable debug logging
|
|
11
|
+
DEBUG = 0
|
|
12
|
+
};
|
|
13
|
+
|
|
8
14
|
#define OBJ_ID(obj) (NUM2LONG(rb_funcall(obj, rb_intern("object_id"), 0)))
|
|
9
15
|
#define INSPECT(str, obj) { printf(str); VALUE s = rb_funcall(obj, rb_intern("inspect"), 0); printf(": %s\n", StringValueCStr(s)); }
|
|
10
16
|
#define CALLER() rb_funcall(rb_mKernel, rb_intern("caller"), 0)
|
|
11
17
|
#define TRACE_CALLER() INSPECT("caller: ", CALLER())
|
|
12
18
|
#define TRACE_FREE(ptr) //printf("Free %p %s:%d\n", ptr, __FILE__, __LINE__)
|
|
19
|
+
#define DEBUG_MARK(machine, markv, msg) \
|
|
20
|
+
if (machine->mark == markv) printf("%s\n", msg);
|
|
13
21
|
|
|
14
22
|
// branching
|
|
15
23
|
#ifndef unlikely
|
|
@@ -23,7 +31,7 @@
|
|
|
23
31
|
#define IO_BUFFER_P(buffer) \
|
|
24
32
|
(TYPE(buffer) == RUBY_T_DATA) && rb_obj_is_instance_of(buffer, rb_cIOBuffer)
|
|
25
33
|
|
|
26
|
-
enum
|
|
34
|
+
enum um_op_kind {
|
|
27
35
|
OP_TIMEOUT,
|
|
28
36
|
OP_SCHEDULE,
|
|
29
37
|
|
|
@@ -62,12 +70,17 @@ enum op_kind {
|
|
|
62
70
|
OP_SLEEP_MULTISHOT
|
|
63
71
|
};
|
|
64
72
|
|
|
65
|
-
#define OP_F_COMPLETED (1U <<
|
|
66
|
-
#define OP_F_TRANSIENT (1U <<
|
|
67
|
-
#define OP_F_ASYNC (1U <<
|
|
68
|
-
#define
|
|
69
|
-
#define
|
|
70
|
-
#define
|
|
73
|
+
#define OP_F_COMPLETED (1U << 0) // op is completed (set on each CQE for multishot ops)
|
|
74
|
+
#define OP_F_TRANSIENT (1U << 1) // op is heap allocated
|
|
75
|
+
#define OP_F_ASYNC (1U << 2) // op belongs to an AsyncOp
|
|
76
|
+
#define OP_F_CANCELED (1U << 3) // op is cancelled
|
|
77
|
+
#define OP_F_IGNORE_CANCELED (1U << 4) // CQE with -ECANCEL should be ignored
|
|
78
|
+
#define OP_F_MULTISHOT (1U << 5) // op is multishot
|
|
79
|
+
#define OP_F_FREE_ON_COMPLETE (1U << 6) // op should be freed on receiving CQE
|
|
80
|
+
#define OP_F_RUNQUEUE_SKIP (1U << 7) // runqueue entry should be skipped
|
|
81
|
+
#define OP_F_SELECT_POLLIN (1U << 8) // select POLLIN
|
|
82
|
+
#define OP_F_SELECT_POLLOUT (1U << 9) // select POLLOUT
|
|
83
|
+
#define OP_F_SELECT_POLLPRI (1U << 10) // select POLLPRI
|
|
71
84
|
|
|
72
85
|
struct um_op_result {
|
|
73
86
|
__s32 res;
|
|
@@ -79,8 +92,8 @@ struct um_op {
|
|
|
79
92
|
struct um_op *prev;
|
|
80
93
|
struct um_op *next;
|
|
81
94
|
|
|
82
|
-
enum
|
|
83
|
-
|
|
95
|
+
enum um_op_kind kind;
|
|
96
|
+
uint flags;
|
|
84
97
|
|
|
85
98
|
VALUE fiber;
|
|
86
99
|
VALUE value;
|
|
@@ -88,7 +101,7 @@ struct um_op {
|
|
|
88
101
|
|
|
89
102
|
struct um_op_result result;
|
|
90
103
|
struct um_op_result *multishot_result_tail;
|
|
91
|
-
|
|
104
|
+
uint multishot_result_count;
|
|
92
105
|
|
|
93
106
|
struct __kernel_timespec ts; // used for timeout operation
|
|
94
107
|
};
|
|
@@ -117,12 +130,18 @@ struct um {
|
|
|
117
130
|
|
|
118
131
|
struct io_uring ring;
|
|
119
132
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
133
|
+
uint ring_initialized; // is the ring initialized successfully
|
|
134
|
+
uint mark; // used to mark instances for debugging
|
|
135
|
+
|
|
136
|
+
uint unsubmitted_count; // number of unsubmitted SQEs pending
|
|
137
|
+
uint pending_count; // number of pending operations (i.e. not yet completed)
|
|
138
|
+
uint buffer_ring_count; // number of registered buffer rings
|
|
139
|
+
ulong total_op_count; // total number of operations submitted since ring was initialized
|
|
140
|
+
|
|
141
|
+
uint entries; // number of entries in SQ
|
|
142
|
+
uint sqpoll_mode; // SQPOLL mode enabled
|
|
123
143
|
|
|
124
144
|
struct buf_ring_descriptor buffer_rings[BUFFER_RING_MAX_COUNT];
|
|
125
|
-
unsigned int buffer_ring_count;
|
|
126
145
|
|
|
127
146
|
struct um_op *transient_head;
|
|
128
147
|
struct um_op *runqueue_head;
|
|
@@ -184,9 +203,10 @@ extern VALUE cAsyncOp;
|
|
|
184
203
|
extern VALUE eStreamRESPError;
|
|
185
204
|
|
|
186
205
|
struct um *um_get_machine(VALUE self);
|
|
187
|
-
void um_setup(VALUE self, struct um *machine);
|
|
206
|
+
void um_setup(VALUE self, struct um *machine, uint entries, uint sqpoll_timeout_msec);
|
|
188
207
|
void um_teardown(struct um *machine);
|
|
189
208
|
|
|
209
|
+
const char * um_op_kind_name(enum um_op_kind kind);
|
|
190
210
|
struct um_op *um_op_alloc(struct um *machine);
|
|
191
211
|
void um_op_free(struct um *machine, struct um_op *op);
|
|
192
212
|
void um_op_clear(struct um *machine, struct um_op *op);
|
|
@@ -210,9 +230,9 @@ double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec);
|
|
|
210
230
|
int um_value_is_exception_p(VALUE v);
|
|
211
231
|
VALUE um_raise_exception(VALUE v);
|
|
212
232
|
|
|
213
|
-
#define RAISE_IF_EXCEPTION(v) if (um_value_is_exception_p(v)) { um_raise_exception(v); }
|
|
233
|
+
#define RAISE_IF_EXCEPTION(v) if (unlikely(um_value_is_exception_p(v))) { um_raise_exception(v); }
|
|
214
234
|
|
|
215
|
-
void um_prep_op(struct um *machine, struct um_op *op, enum
|
|
235
|
+
void um_prep_op(struct um *machine, struct um_op *op, enum um_op_kind kind, unsigned flags);
|
|
216
236
|
void um_raise_on_error_result(int result);
|
|
217
237
|
void um_get_buffer_bytes_for_writing(VALUE buffer, const void **base, size_t *size);
|
|
218
238
|
void * um_prepare_read_buffer(VALUE buffer, ssize_t len, ssize_t ofs);
|
|
@@ -223,9 +243,11 @@ void um_add_strings_to_buffer_ring(struct um *machine, int bgid, VALUE strings);
|
|
|
223
243
|
|
|
224
244
|
struct io_uring_sqe *um_get_sqe(struct um *machine, struct um_op *op);
|
|
225
245
|
|
|
246
|
+
uint um_submit(struct um *machine);
|
|
226
247
|
VALUE um_fiber_switch(struct um *machine);
|
|
227
248
|
VALUE um_await(struct um *machine);
|
|
228
|
-
|
|
249
|
+
VALUE um_wakeup(struct um *machine);
|
|
250
|
+
void um_cancel_op(struct um *machine, struct um_op *op);
|
|
229
251
|
void um_cancel_and_wait(struct um *machine, struct um_op *op);
|
|
230
252
|
int um_check_completion(struct um *machine, struct um_op *op);
|
|
231
253
|
|
|
@@ -236,17 +258,22 @@ VALUE um_timeout(struct um *machine, VALUE interval, VALUE class);
|
|
|
236
258
|
|
|
237
259
|
VALUE um_sleep(struct um *machine, double duration);
|
|
238
260
|
VALUE um_periodically(struct um *machine, double interval);
|
|
239
|
-
VALUE um_read(struct um *machine, int fd, VALUE buffer, size_t maxlen, ssize_t buffer_offset);
|
|
261
|
+
VALUE um_read(struct um *machine, int fd, VALUE buffer, size_t maxlen, ssize_t buffer_offset, __u64 file_offset);
|
|
240
262
|
size_t um_read_raw(struct um *machine, int fd, char *buffer, size_t maxlen);
|
|
241
263
|
VALUE um_read_each(struct um *machine, int fd, int bgid);
|
|
242
|
-
VALUE um_write(struct um *machine, int fd, VALUE buffer, size_t len);
|
|
243
|
-
VALUE um_write_async(struct um *machine, int fd, VALUE buffer);
|
|
264
|
+
VALUE um_write(struct um *machine, int fd, VALUE buffer, size_t len, __u64 file_offset);
|
|
265
|
+
VALUE um_write_async(struct um *machine, int fd, VALUE buffer, size_t len, __u64 file_offset);
|
|
244
266
|
VALUE um_close(struct um *machine, int fd);
|
|
245
267
|
VALUE um_close_async(struct um *machine, int fd);
|
|
246
268
|
VALUE um_open(struct um *machine, VALUE pathname, int flags, int mode);
|
|
247
269
|
VALUE um_poll(struct um *machine, int fd, unsigned mask);
|
|
270
|
+
VALUE um_select(struct um *machine, VALUE rfds, VALUE wfds, VALUE efds);
|
|
248
271
|
VALUE um_waitid(struct um *machine, int idtype, int id, int options);
|
|
272
|
+
|
|
273
|
+
#ifdef HAVE_RB_PROCESS_STATUS_NEW
|
|
249
274
|
VALUE um_waitid_status(struct um *machine, int idtype, int id, int options);
|
|
275
|
+
#endif
|
|
276
|
+
|
|
250
277
|
VALUE um_statx(struct um *machine, int dirfd, VALUE path, int flags, unsigned int mask);
|
|
251
278
|
|
|
252
279
|
VALUE um_accept(struct um *machine, int fd);
|
data/ext/um/um_async_op.c
CHANGED
data/ext/um/um_class.c
CHANGED
|
@@ -54,9 +54,33 @@ inline struct um *um_get_machine(VALUE self) {
|
|
|
54
54
|
return um;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
static inline uint get_sqpoll_timeout_msec(VALUE sqpoll_timeout) {
|
|
58
|
+
switch (TYPE(sqpoll_timeout)) {
|
|
59
|
+
case T_NIL:
|
|
60
|
+
case T_FALSE:
|
|
61
|
+
return 0;
|
|
62
|
+
case T_FLOAT:
|
|
63
|
+
return (uint)(NUM2DBL(sqpoll_timeout) * 1000);
|
|
64
|
+
case T_FIXNUM:
|
|
65
|
+
return NUM2UINT(sqpoll_timeout) * 1000;
|
|
66
|
+
case T_TRUE:
|
|
67
|
+
return 1000;
|
|
68
|
+
default:
|
|
69
|
+
rb_raise(eUMError, "Invalid sqpoll_timeout value");
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
VALUE UM_initialize(int argc, VALUE *argv, VALUE self) {
|
|
58
74
|
struct um *machine = RTYPEDDATA_DATA(self);
|
|
59
|
-
|
|
75
|
+
VALUE entries;
|
|
76
|
+
VALUE sqpoll_timeout;
|
|
77
|
+
rb_scan_args(argc, argv, "02", &entries, &sqpoll_timeout);
|
|
78
|
+
|
|
79
|
+
uint entries_i = NIL_P(entries) ? 0 : NUM2UINT(entries);
|
|
80
|
+
uint sqpoll_timeout_msec = get_sqpoll_timeout_msec(sqpoll_timeout);
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
um_setup(self, machine, entries_i, sqpoll_timeout_msec);
|
|
60
84
|
return self;
|
|
61
85
|
}
|
|
62
86
|
|
|
@@ -66,9 +90,25 @@ VALUE UM_setup_buffer_ring(VALUE self, VALUE size, VALUE count) {
|
|
|
66
90
|
return INT2NUM(bgid);
|
|
67
91
|
}
|
|
68
92
|
|
|
93
|
+
VALUE UM_entries(VALUE self) {
|
|
94
|
+
struct um *machine = um_get_machine(self);
|
|
95
|
+
return UINT2NUM(machine->entries);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
VALUE UM_mark_m(VALUE self, VALUE mark) {
|
|
99
|
+
struct um *machine = um_get_machine(self);
|
|
100
|
+
machine->mark = NUM2UINT(mark);
|
|
101
|
+
return self;
|
|
102
|
+
}
|
|
103
|
+
|
|
69
104
|
VALUE UM_pending_count(VALUE self) {
|
|
70
105
|
struct um *machine = um_get_machine(self);
|
|
71
|
-
return
|
|
106
|
+
return UINT2NUM(machine->pending_count);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
VALUE UM_total_op_count(VALUE self) {
|
|
110
|
+
struct um *machine = um_get_machine(self);
|
|
111
|
+
return UINT2NUM(machine->total_op_count);
|
|
72
112
|
}
|
|
73
113
|
|
|
74
114
|
VALUE UM_snooze(VALUE self) {
|
|
@@ -82,6 +122,17 @@ VALUE UM_yield(VALUE self) {
|
|
|
82
122
|
return um_await(machine);
|
|
83
123
|
}
|
|
84
124
|
|
|
125
|
+
VALUE UM_wakeup(VALUE self) {
|
|
126
|
+
struct um *machine = um_get_machine(self);
|
|
127
|
+
return um_wakeup(machine);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
VALUE UM_submit(VALUE self) {
|
|
131
|
+
struct um *machine = um_get_machine(self);
|
|
132
|
+
uint ret = um_submit(machine);
|
|
133
|
+
return UINT2NUM(ret);
|
|
134
|
+
}
|
|
135
|
+
|
|
85
136
|
VALUE UM_schedule(VALUE self, VALUE fiber, VALUE value) {
|
|
86
137
|
struct um *machine = um_get_machine(self);
|
|
87
138
|
um_schedule(machine, fiber, value);
|
|
@@ -109,12 +160,14 @@ VALUE UM_read(int argc, VALUE *argv, VALUE self) {
|
|
|
109
160
|
VALUE buffer;
|
|
110
161
|
VALUE maxlen;
|
|
111
162
|
VALUE buffer_offset;
|
|
112
|
-
|
|
163
|
+
VALUE file_offset;
|
|
164
|
+
rb_scan_args(argc, argv, "23", &fd, &buffer, &maxlen, &buffer_offset, &file_offset);
|
|
113
165
|
|
|
114
166
|
ssize_t maxlen_i = NIL_P(maxlen) ? -1 : NUM2INT(maxlen);
|
|
115
167
|
ssize_t buffer_offset_i = NIL_P(buffer_offset) ? 0 : NUM2INT(buffer_offset);
|
|
168
|
+
__u64 file_offset_i = NIL_P(file_offset) ? (__u64)-1 : NUM2UINT(file_offset);
|
|
116
169
|
|
|
117
|
-
return um_read(machine, NUM2INT(fd), buffer, maxlen_i, buffer_offset_i);
|
|
170
|
+
return um_read(machine, NUM2INT(fd), buffer, maxlen_i, buffer_offset_i, file_offset_i);
|
|
118
171
|
}
|
|
119
172
|
|
|
120
173
|
VALUE UM_read_each(VALUE self, VALUE fd, VALUE bgid) {
|
|
@@ -127,15 +180,27 @@ VALUE UM_write(int argc, VALUE *argv, VALUE self) {
|
|
|
127
180
|
VALUE fd;
|
|
128
181
|
VALUE buffer;
|
|
129
182
|
VALUE len;
|
|
130
|
-
|
|
183
|
+
VALUE file_offset;
|
|
184
|
+
rb_scan_args(argc, argv, "22", &fd, &buffer, &len, &file_offset);
|
|
185
|
+
|
|
186
|
+
size_t len_i = NIL_P(len) ? (size_t)-1 : NUM2UINT(len);
|
|
187
|
+
__u64 file_offset_i = NIL_P(file_offset) ? (__u64)-1 : NUM2UINT(file_offset);
|
|
131
188
|
|
|
132
|
-
|
|
133
|
-
return um_write(machine, NUM2INT(fd), buffer, bytes);
|
|
189
|
+
return um_write(machine, NUM2INT(fd), buffer, len_i, file_offset_i);
|
|
134
190
|
}
|
|
135
191
|
|
|
136
|
-
VALUE UM_write_async(
|
|
192
|
+
VALUE UM_write_async(int argc, VALUE *argv, VALUE self) {
|
|
137
193
|
struct um *machine = um_get_machine(self);
|
|
138
|
-
|
|
194
|
+
VALUE fd;
|
|
195
|
+
VALUE buffer;
|
|
196
|
+
VALUE len;
|
|
197
|
+
VALUE file_offset;
|
|
198
|
+
rb_scan_args(argc, argv, "22", &fd, &buffer, &len, &file_offset);
|
|
199
|
+
|
|
200
|
+
size_t len_i = NIL_P(len) ? (size_t)-1 : NUM2UINT(len);
|
|
201
|
+
__u64 file_offset_i = NIL_P(file_offset) ? (__u64)-1 : NUM2UINT(file_offset);
|
|
202
|
+
|
|
203
|
+
return um_write_async(machine, NUM2INT(fd), buffer, len_i, file_offset_i);
|
|
139
204
|
}
|
|
140
205
|
|
|
141
206
|
VALUE UM_statx(VALUE self, VALUE dirfd, VALUE path, VALUE flags, VALUE mask) {
|
|
@@ -330,6 +395,11 @@ VALUE UM_poll(VALUE self, VALUE fd, VALUE mask) {
|
|
|
330
395
|
return um_poll(machine, NUM2INT(fd), NUM2UINT(mask));
|
|
331
396
|
}
|
|
332
397
|
|
|
398
|
+
VALUE UM_select(VALUE self, VALUE rfds, VALUE wfds, VALUE efds) {
|
|
399
|
+
struct um *machine = um_get_machine(self);
|
|
400
|
+
return um_select(machine, rfds, wfds, efds);
|
|
401
|
+
}
|
|
402
|
+
|
|
333
403
|
VALUE UM_waitid(VALUE self, VALUE idtype, VALUE id, VALUE options) {
|
|
334
404
|
struct um *machine = um_get_machine(self);
|
|
335
405
|
return um_waitid(machine, NUM2INT(idtype), NUM2INT(id), NUM2INT(options));
|
|
@@ -423,8 +493,11 @@ void Init_UM(void) {
|
|
|
423
493
|
cUM = rb_define_class("UringMachine", rb_cObject);
|
|
424
494
|
rb_define_alloc_func(cUM, UM_allocate);
|
|
425
495
|
|
|
426
|
-
rb_define_method(cUM, "initialize", UM_initialize,
|
|
496
|
+
rb_define_method(cUM, "initialize", UM_initialize, -1);
|
|
497
|
+
rb_define_method(cUM, "entries", UM_entries, 0);
|
|
498
|
+
rb_define_method(cUM, "mark", UM_mark_m, 1);
|
|
427
499
|
rb_define_method(cUM, "pending_count", UM_pending_count, 0);
|
|
500
|
+
rb_define_method(cUM, "total_op_count", UM_total_op_count, 0);
|
|
428
501
|
rb_define_method(cUM, "setup_buffer_ring", UM_setup_buffer_ring, 2);
|
|
429
502
|
|
|
430
503
|
rb_define_singleton_method(cUM, "pipe", UM_pipe, 0);
|
|
@@ -436,11 +509,12 @@ void Init_UM(void) {
|
|
|
436
509
|
rb_define_singleton_method(cUM, "kernel_version", UM_kernel_version, 0);
|
|
437
510
|
rb_define_singleton_method(cUM, "debug", UM_debug, 1);
|
|
438
511
|
|
|
439
|
-
|
|
440
512
|
rb_define_method(cUM, "schedule", UM_schedule, 2);
|
|
441
513
|
rb_define_method(cUM, "snooze", UM_snooze, 0);
|
|
442
514
|
rb_define_method(cUM, "timeout", UM_timeout, 2);
|
|
443
515
|
rb_define_method(cUM, "yield", UM_yield, 0);
|
|
516
|
+
rb_define_method(cUM, "wakeup", UM_wakeup, 0);
|
|
517
|
+
rb_define_method(cUM, "submit", UM_submit, 0);
|
|
444
518
|
|
|
445
519
|
rb_define_method(cUM, "close", UM_close, 1);
|
|
446
520
|
rb_define_method(cUM, "close_async", UM_close_async, 1);
|
|
@@ -450,11 +524,13 @@ void Init_UM(void) {
|
|
|
450
524
|
rb_define_method(cUM, "sleep", UM_sleep, 1);
|
|
451
525
|
rb_define_method(cUM, "periodically", UM_periodically, 1);
|
|
452
526
|
rb_define_method(cUM, "write", UM_write, -1);
|
|
453
|
-
rb_define_method(cUM, "write_async", UM_write_async,
|
|
527
|
+
rb_define_method(cUM, "write_async", UM_write_async, -1);
|
|
454
528
|
rb_define_method(cUM, "statx", UM_statx, 4);
|
|
455
529
|
|
|
456
530
|
rb_define_method(cUM, "poll", UM_poll, 2);
|
|
531
|
+
rb_define_method(cUM, "select", UM_select, 3);
|
|
457
532
|
rb_define_method(cUM, "waitid", UM_waitid, 3);
|
|
533
|
+
|
|
458
534
|
#ifdef HAVE_RB_PROCESS_STATUS_NEW
|
|
459
535
|
rb_define_method(cUM, "waitid_status", UM_waitid_status, 3);
|
|
460
536
|
#endif
|
data/ext/um/um_op.c
CHANGED
|
@@ -1,5 +1,42 @@
|
|
|
1
1
|
#include "um.h"
|
|
2
2
|
|
|
3
|
+
const char * um_op_kind_name(enum um_op_kind kind) {
|
|
4
|
+
switch (kind) {
|
|
5
|
+
case OP_TIMEOUT: return "OP_TIMEOUT";
|
|
6
|
+
case OP_SCHEDULE: return "OP_SCHEDULE";
|
|
7
|
+
case OP_SLEEP: return "OP_SLEEP";
|
|
8
|
+
case OP_OPEN: return "OP_OPEN";
|
|
9
|
+
case OP_READ: return "OP_READ";
|
|
10
|
+
case OP_WRITE: return "OP_WRITE";
|
|
11
|
+
case OP_WRITE_ASYNC: return "OP_WRITE_ASYNC";
|
|
12
|
+
case OP_CLOSE: return "OP_CLOSE";
|
|
13
|
+
case OP_CLOSE_ASYNC: return "OP_CLOSE_ASYNC";
|
|
14
|
+
case OP_STATX: return "OP_STATX";
|
|
15
|
+
case OP_ACCEPT: return "OP_ACCEPT";
|
|
16
|
+
case OP_RECV: return "OP_RECV";
|
|
17
|
+
case OP_SEND: return "OP_SEND";
|
|
18
|
+
case OP_SEND_BUNDLE: return "OP_SEND_BUNDLE";
|
|
19
|
+
case OP_SOCKET: return "OP_SOCKET";
|
|
20
|
+
case OP_CONNECT: return "OP_CONNECT";
|
|
21
|
+
case OP_BIND: return "OP_BIND";
|
|
22
|
+
case OP_LISTEN: return "OP_LISTEN";
|
|
23
|
+
case OP_GETSOCKOPT: return "OP_GETSOCKOPT";
|
|
24
|
+
case OP_SETSOCKOPT: return "OP_SETSOCKOPT";
|
|
25
|
+
case OP_SHUTDOWN: return "OP_SHUTDOWN";
|
|
26
|
+
case OP_SHUTDOWN_ASYNC: return "OP_SHUTDOWN_ASYNC";
|
|
27
|
+
case OP_POLL: return "OP_POLL";
|
|
28
|
+
case OP_WAITID: return "OP_WAITID";
|
|
29
|
+
case OP_FUTEX_WAIT: return "OP_FUTEX_WAIT";
|
|
30
|
+
case OP_FUTEX_WAKE: return "OP_FUTEX_WAKE";
|
|
31
|
+
case OP_ACCEPT_MULTISHOT: return "OP_ACCEPT_MULTISHOT";
|
|
32
|
+
case OP_READ_MULTISHOT: return "OP_READ_MULTISHOT";
|
|
33
|
+
case OP_RECV_MULTISHOT: return "OP_RECV_MULTISHOT";
|
|
34
|
+
case OP_TIMEOUT_MULTISHOT: return "OP_TIMEOUT_MULTISHOT";
|
|
35
|
+
case OP_SLEEP_MULTISHOT: return "OP_SLEEP_MULTISHOT";
|
|
36
|
+
default: return "UNKNOWN_OP_KIND";
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
3
40
|
inline void um_op_clear(struct um *machine, struct um_op *op) {
|
|
4
41
|
memset(op, 0, sizeof(struct um_op));
|
|
5
42
|
op->fiber = Qnil;
|
data/ext/um/um_sync.c
CHANGED
|
@@ -4,13 +4,13 @@
|
|
|
4
4
|
|
|
5
5
|
#define FUTEX2_SIZE_U32 0x02
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
// The value argument is the current (known) futex value.
|
|
8
|
+
void um_futex_wait(struct um *machine, uint32_t *futex, uint32_t value) {
|
|
8
9
|
struct um_op op;
|
|
9
10
|
um_prep_op(machine, &op, OP_FUTEX_WAIT, 0);
|
|
10
11
|
struct io_uring_sqe *sqe = um_get_sqe(machine, &op);
|
|
11
12
|
io_uring_prep_futex_wait(
|
|
12
|
-
sqe, (uint32_t *)futex,
|
|
13
|
-
FUTEX2_SIZE_U32, 0
|
|
13
|
+
sqe, (uint32_t *)futex, value, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
|
|
14
14
|
);
|
|
15
15
|
|
|
16
16
|
VALUE ret = um_fiber_switch(machine);
|
|
@@ -29,10 +29,8 @@ void um_futex_wake(struct um *machine, uint32_t *futex, uint32_t num_waiters) {
|
|
|
29
29
|
struct um_op op;
|
|
30
30
|
um_prep_op(machine, &op, OP_FUTEX_WAKE, 0);
|
|
31
31
|
struct io_uring_sqe *sqe = um_get_sqe(machine, &op);
|
|
32
|
-
// submit futex_wait
|
|
33
32
|
io_uring_prep_futex_wake(
|
|
34
|
-
sqe, (uint32_t *)futex, num_waiters, FUTEX_BITSET_MATCH_ANY,
|
|
35
|
-
FUTEX2_SIZE_U32, 0
|
|
33
|
+
sqe, (uint32_t *)futex, num_waiters, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
|
|
36
34
|
);
|
|
37
35
|
|
|
38
36
|
VALUE ret = um_fiber_switch(machine);
|
|
@@ -45,12 +43,11 @@ void um_futex_wake(struct um *machine, uint32_t *futex, uint32_t num_waiters) {
|
|
|
45
43
|
void um_futex_wake_transient(struct um *machine, uint32_t *futex, uint32_t num_waiters) {
|
|
46
44
|
struct io_uring_sqe *sqe = um_get_sqe(machine, NULL);
|
|
47
45
|
io_uring_prep_futex_wake(
|
|
48
|
-
sqe, (uint32_t *)futex, num_waiters, FUTEX_BITSET_MATCH_ANY,
|
|
49
|
-
FUTEX2_SIZE_U32, 0
|
|
46
|
+
sqe, (uint32_t *)futex, num_waiters, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0
|
|
50
47
|
);
|
|
48
|
+
um_submit(machine);
|
|
51
49
|
}
|
|
52
50
|
|
|
53
|
-
|
|
54
51
|
#define MUTEX_LOCKED 1
|
|
55
52
|
#define MUTEX_UNLOCKED 0
|
|
56
53
|
|
|
@@ -210,7 +207,6 @@ static inline VALUE um_queue_add(struct um *machine, struct um_queue *queue, VAL
|
|
|
210
207
|
else queue_add_tail(queue, value);
|
|
211
208
|
|
|
212
209
|
queue->count++;
|
|
213
|
-
|
|
214
210
|
queue->state = QUEUE_READY;
|
|
215
211
|
if (queue->num_waiters)
|
|
216
212
|
um_futex_wake_transient(machine, &queue->state, 1);
|
|
@@ -241,10 +237,8 @@ VALUE um_queue_remove_start(VALUE arg) {
|
|
|
241
237
|
um_futex_wait(ctx->machine, &ctx->queue->state, QUEUE_EMPTY);
|
|
242
238
|
}
|
|
243
239
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
if (!ctx->queue->tail)
|
|
247
|
-
um_raise_internal_error("Internal error: queue should be in ready state!");
|
|
240
|
+
assert(ctx->queue->state == QUEUE_READY);
|
|
241
|
+
assert(ctx->queue->tail);
|
|
248
242
|
|
|
249
243
|
ctx->queue->count--;
|
|
250
244
|
return (ctx->op == QUEUE_POP ? queue_remove_tail : queue_remove_head)(ctx->queue);
|
data/grant-2025/journal.md
CHANGED
|
@@ -205,7 +205,8 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
|
|
|
205
205
|
I'll see if I can prepare a PR for that to be merged for the Ruby 4.0 release.
|
|
206
206
|
|
|
207
207
|
For the time being, I've added a `#post_fork` method to the UM fiber scheduler
|
|
208
|
-
which should be manually called after a fork. (commit
|
|
208
|
+
which should be manually called after a fork. (commit
|
|
209
|
+
2c7877385869c6acbdd8354e2b2909cff448651b)
|
|
209
210
|
|
|
210
211
|
- Added two new low-level APIs for waiting on processes, instead of
|
|
211
212
|
`UM#waitpid`, using the io_uring version of `waitid`. The vanilla version
|
|
@@ -227,3 +228,126 @@ Ruby I/O layer. Some interesting warts in the Ruby `IO` implementation:
|
|
|
227
228
|
...
|
|
228
229
|
pid2, status = machine.waitid(P_PIDFD, fd, UM::WEXITED)
|
|
229
230
|
```
|
|
231
|
+
|
|
232
|
+
# 2025-11-28
|
|
233
|
+
|
|
234
|
+
- On Samuel's suggestions, I've submitted a
|
|
235
|
+
[PR](https://github.com/ruby/ruby/pull/15342) for adding a
|
|
236
|
+
`Fiber::Scheduler#process_fork` hook that is automatically invoked after a
|
|
237
|
+
fork. This is in continuation to the `#post_fork` method. I still have a lot
|
|
238
|
+
to learn about working with the Ruby core code, but I'm really excited about
|
|
239
|
+
the possibility of this PR (and the [previous
|
|
240
|
+
one](https://github.com/ruby/ruby/pull/15213) as well) getting merged in time
|
|
241
|
+
for the Ruby 4.0 release.
|
|
242
|
+
- Added a bunch of tests for `UM::FiberScheduler`: socket I/O, file I/O, mutex,
|
|
243
|
+
queue, waiting for threads. In the process I discovered a lots of things that
|
|
244
|
+
can be improved in the way Ruby invokes the fiber scheduler.
|
|
245
|
+
|
|
246
|
+
- For regular files, Ruby assumes file I/O can never be non-blocking (or
|
|
247
|
+
async), and thus invokes the `#blocking_operation_wait` hook in order to
|
|
248
|
+
perform the I/O in a separate thread. With io_uring, of course, file I/O
|
|
249
|
+
*is* asynchronous.
|
|
250
|
+
- For sockets there are no specialized hooks, like `#socket_send` etc.
|
|
251
|
+
Instead, Ruby makes the socket fd's non-blocking and invokes `#io_wait` to
|
|
252
|
+
wait for the socket to be ready.
|
|
253
|
+
|
|
254
|
+
I find it interesting how io_uring breaks a lot of assumptions about how I/O
|
|
255
|
+
should be done.
|
|
256
|
+
|
|
257
|
+
# 2025-12-03
|
|
258
|
+
|
|
259
|
+
- Samuel and me continued discussing the behavior of the fiber scheduler after a
|
|
260
|
+
fork. After talking it through, we decided the best course of action would be
|
|
261
|
+
to remove the fiber scheduler after a fork, rather than to introduce a
|
|
262
|
+
`process_fork` hook. This is a safer choice, since a scheduler risks carrying
|
|
263
|
+
over some of its state across a fork, leading to unexpected behavior.
|
|
264
|
+
|
|
265
|
+
Another problem I uncovered is that if a fork is done from a non-blocking
|
|
266
|
+
fiber, the main fiber of the forked process (which "inherits" the forking
|
|
267
|
+
fiber) stays in non-blocking mode, which also may lead to unexpected behavior,
|
|
268
|
+
since the main fiber of all Ruby threads should be in blocking mode.
|
|
269
|
+
|
|
270
|
+
So I submitted a new [PR](https://github.com/ruby/ruby/pull/15385) that
|
|
271
|
+
corrects these two problems.
|
|
272
|
+
|
|
273
|
+
- I mapped the remaining missing hooks in the UringMachine fiber scheduler
|
|
274
|
+
implementation, and made the tests more robust by checking that the different
|
|
275
|
+
scheduler hooks were actually being called.
|
|
276
|
+
|
|
277
|
+
- Continued implementing the missing fiber scheduler hooks: `#fiber_interrupt`,
|
|
278
|
+
`#address_resolve`, `#timeout_after`. For the most part, they were simple to
|
|
279
|
+
implement. I probably spent most of my time figuring out how to test these,
|
|
280
|
+
rather than implementing them. Most of the hooks involve just a few lines of
|
|
281
|
+
code, with many of them consisting of a single line of code, calling into the
|
|
282
|
+
relevant UringMachine low-level API.
|
|
283
|
+
|
|
284
|
+
- Implemented the `#io_select` hook, which involved implementing a low-level
|
|
285
|
+
`UM#select` method. This method took some effort to implement, since it needs
|
|
286
|
+
to handle an arbitrary number of file descriptors to check for readiness. We
|
|
287
|
+
need to create a separate SQE for each fd we want to poll. When one or more
|
|
288
|
+
CQEs arrive for polled fd's, we also need to cancel all poll operations that
|
|
289
|
+
have not completed.
|
|
290
|
+
|
|
291
|
+
Since in many cases, `IO.select` is called with just a single IO, I also added
|
|
292
|
+
a special-case implementation of `UM#select` that specifically handles a
|
|
293
|
+
single fd.
|
|
294
|
+
|
|
295
|
+
# 2025-12-04
|
|
296
|
+
|
|
297
|
+
- Implemented a worker pool for performing blocking operations in the scheduler.
|
|
298
|
+
Up until now, each scheduler started their own worker thread for performing
|
|
299
|
+
blocking operations for use in the `#blocking_operation_wait` hook. The new
|
|
300
|
+
implementation uses a worker thread pool shared by all schedulers, with a
|
|
301
|
+
worker count limited to CPU count. Workers are started when needed.
|
|
302
|
+
|
|
303
|
+
I also added an optional `entries` argument to set the SQE and CQE buffer
|
|
304
|
+
sizes when starting a new `UringMachine` instance. The default size is 4096
|
|
305
|
+
SQE entries (liburing by default makes the CQE buffer size double that of the
|
|
306
|
+
SQE buffer). The blocking operations worker threads specify a value of 4 since
|
|
307
|
+
they only use their UringMachine instance for popping jobs off the job queue
|
|
308
|
+
and pushing the blocking operation result back to the scheduler.
|
|
309
|
+
|
|
310
|
+
- Added support for `file_offset` argument in `UM#read` and `UM#write` in
|
|
311
|
+
preparation for implementing the `#io_pread` and `#io_pwrite` hooks. The
|
|
312
|
+
`UM#write_async` API, which permits writing to a file descriptor without
|
|
313
|
+
waiting for the operation to complete, got support for specifying `length` and
|
|
314
|
+
`file_offset` arguments as well. In addition, `UM#write` and `UM#write_async`
|
|
315
|
+
got short-circuit logic for writes with a length of 0.
|
|
316
|
+
|
|
317
|
+
- Added support for specifying buffer offset in `#io_read` and `#io_write`
|
|
318
|
+
hooks.
|
|
319
|
+
|
|
320
|
+
- Added support for timeout in `#block`, `#io_read` and `#io_write` hooks.
|
|
321
|
+
|
|
322
|
+
# 2025-12-05
|
|
323
|
+
|
|
324
|
+
- I found and fixed a problem with how `futex_wake` was done in the low-level
|
|
325
|
+
UringMachine code handling mutexes and queues. This fixed a deadlock in the
|
|
326
|
+
scheduler background worker pool where clients of the pool where not properly
|
|
327
|
+
woken after the submitted operation was done.
|
|
328
|
+
|
|
329
|
+
- I finished work on the `#io_pread` and `#io_pwrite` hooks. Unfortunately, the
|
|
330
|
+
test for `#io_pwrite` consistently hangs (not on `IO#pwrite` itself, rather on
|
|
331
|
+
closing the file.) With Samuel's help, hopefully we'll find a solution...
|
|
332
|
+
|
|
333
|
+
- With those two last hooks, the fiber scheduler implementation is now feature
|
|
334
|
+
complete! While I have written test cases for the different fiber scheduler
|
|
335
|
+
hooks, I'd like to add more tests - and especially tests that exercise
|
|
336
|
+
multiple hooks, tests with high concurrency, and integration tests where I
|
|
337
|
+
check how the fiber scheduler plays with Ruby APIs like `Net::HTTP` and the
|
|
338
|
+
`socket` API in general.
|
|
339
|
+
|
|
340
|
+
# 2025-12-06
|
|
341
|
+
|
|
342
|
+
- Samuel has found the issue with pwrite (it turns out the the `#io_pwrite` hook was being invoked with the GVL released), and [fixed it](https://github.com/ruby/ruby/pull/15428). So now `#pwrite` works correctly with a fiber scheduler!
|
|
343
|
+
|
|
344
|
+
- I followed Samuel's suggestion and incorporated some debug logging into the
|
|
345
|
+
extension code interfacing with liburing, in order to facilitate debugging
|
|
346
|
+
when issues are encountered.
|
|
347
|
+
|
|
348
|
+
- Added support for [SQPOLL
|
|
349
|
+
mode](https://unixism.net/loti/tutorial/sq_poll.html) when setting up a
|
|
350
|
+
UringMachine instance. It's not clear to me what are the performance
|
|
351
|
+
implications of that, but I'll try to make some time to check this against
|
|
352
|
+
[TP2](https://github.com/noteflakes/tp2), a UringMachine-based web server I'm
|
|
353
|
+
currently using in a bunch of projects.
|