uringmachine 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +3 -4
- data/CHANGELOG.md +20 -0
- data/TODO.md +0 -38
- data/examples/bm_queue.rb +2 -1
- data/examples/bm_write.rb +4 -1
- data/ext/um/extconf.rb +1 -1
- data/ext/um/um.c +269 -49
- data/ext/um/um.h +48 -21
- data/ext/um/um_async_op.c +1 -1
- data/ext/um/um_class.c +89 -13
- data/ext/um/um_op.c +37 -0
- data/ext/um/um_sync.c +8 -14
- data/grant-2025/journal.md +125 -1
- data/grant-2025/tasks.md +102 -33
- data/lib/uringmachine/fiber_scheduler.rb +191 -64
- data/lib/uringmachine/version.rb +1 -1
- data/test/test_fiber_scheduler.rb +519 -17
- data/test/test_um.rb +298 -23
- data/uringmachine.gemspec +5 -5
- data/vendor/liburing/src/include/liburing/io_uring.h +1 -0
- data/vendor/liburing/src/include/liburing.h +13 -0
- data/vendor/liburing/src/liburing-ffi.map +1 -0
- data/vendor/liburing/test/bind-listen.c +175 -13
- data/vendor/liburing/test/read-write.c +4 -4
- data/vendor/liburing/test/ringbuf-read.c +4 -4
- data/vendor/liburing/test/send_recv.c +8 -7
- metadata +15 -15
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e14941110d09a575728da68cb038a02334d151f0d384d88ad67a34b21a49d234
|
|
4
|
+
data.tar.gz: 158299106b117a973163ae480ea4e9827a0cb88eb76b2323da04fd631e8916ca
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f71a5b6b6740fea281df8880511b424a0c759e876e17f79a6b661453416d13c2d9d65e0c53ddee5df066ca92472571ffae8c0e9a4ca4fd745d02705a8645965d
|
|
7
|
+
data.tar.gz: baa86b9bc0009c69bfe894aae3489d43ed28d1fca44a24a9ddc7cc1625a54de6c17f65731c1a707af561942174f9584554a183a67f8428d5855e799c274db0b6
|
data/.github/workflows/test.yml
CHANGED
|
@@ -11,9 +11,8 @@ jobs:
|
|
|
11
11
|
strategy:
|
|
12
12
|
fail-fast: false
|
|
13
13
|
matrix:
|
|
14
|
-
# macos-latest uses arm64, macos-13 uses x86
|
|
15
14
|
os: [ubuntu-latest]
|
|
16
|
-
ruby: ['
|
|
15
|
+
ruby: ['head']
|
|
17
16
|
|
|
18
17
|
name: ${{matrix.os}}, ${{matrix.ruby}}
|
|
19
18
|
|
|
@@ -32,5 +31,5 @@ jobs:
|
|
|
32
31
|
- name: Compile C-extension
|
|
33
32
|
run: bundle exec rake compile
|
|
34
33
|
- name: Run tests
|
|
35
|
-
|
|
36
|
-
run: bundle exec rake test
|
|
34
|
+
run: bundle exec ruby test/run.rb --verbose
|
|
35
|
+
# run: bundle exec rake test
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
# 0.21.0 2025-12-06
|
|
2
|
+
|
|
3
|
+
- Add `UM#submit`
|
|
4
|
+
- Update liburing
|
|
5
|
+
- Do not release GVL in um_submit if SQ does not need entering the kernel
|
|
6
|
+
- Fix compilation when rb_process_status_new is not available
|
|
7
|
+
- Fix um_futex_wake_transient to submit SQE, fix futex_wait usage
|
|
8
|
+
- Add debug logging for key io_uring interactions
|
|
9
|
+
- Add UM#mark and DEBUG_MARK for debugging specific UM instances
|
|
10
|
+
- Short-circuit zero-length writes
|
|
11
|
+
- Add optional file_offset argument to #read, #write. Add optional len and file_off
|
|
12
|
+
set arguments to #write_async
|
|
13
|
+
- Add support for specifying SQPOLL mode and SQ idle timeout in `UM#initialize`
|
|
14
|
+
- Add support for specifying number of SQ entries in `UM#initialize`
|
|
15
|
+
- Implement global worker pool for blocking operations in fiber scheduler
|
|
16
|
+
- Finish implementing all fiber scheduler hooks
|
|
17
|
+
- Add `UM#select`
|
|
18
|
+
- Add `UM#wakeup`
|
|
19
|
+
- Add `UM#total_op_count`
|
|
20
|
+
|
|
1
21
|
# 0.20.0 2025-11-26
|
|
2
22
|
|
|
3
23
|
- Add `UM.pidfd_open`, `UM.pidfd_send_signal` methods
|
data/TODO.md
CHANGED
|
@@ -1,43 +1,5 @@
|
|
|
1
1
|
## immediate
|
|
2
2
|
|
|
3
|
-
- make a reproducer for segfault on timeout, spin lots of fibers where a timeout
|
|
4
|
-
wraps a #shift call (from an empty queue).
|
|
5
|
-
- see also: https://mensfeld.pl/2025/11/ruby-ffi-gc-bug-hash-becomes-string/
|
|
6
|
-
|
|
7
|
-
Analysis:
|
|
8
|
-
|
|
9
|
-
- The segfault is related to timeouts
|
|
10
|
-
- Looking at process_runqueue_op (um.c):
|
|
11
|
-
|
|
12
|
-
```c
|
|
13
|
-
inline VALUE process_runqueue_op(struct um *machine, struct um_op *op) {
|
|
14
|
-
VALUE fiber = op->fiber;
|
|
15
|
-
VALUE value = op->value;
|
|
16
|
-
|
|
17
|
-
// on timeout, the op flags are changed to turn on OP_F_TRANSIENT
|
|
18
|
-
if (unlikely(op->flags & OP_F_TRANSIENT))
|
|
19
|
-
// here the op is freed, so the value is not visible to the GC anymoore
|
|
20
|
-
um_op_free(machine, op);
|
|
21
|
-
|
|
22
|
-
// if a GC occurs here, we risk a segfault
|
|
23
|
-
|
|
24
|
-
// value is used
|
|
25
|
-
return rb_fiber_transfer(fiber, 1, &value);
|
|
26
|
-
}
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
- So, a possible solution is to put a `RB_GC_GUARD` after the `return`.
|
|
30
|
-
- But first, I want to be able to reproduce it. We can start by setting
|
|
31
|
-
`GC.stress = true` on tests and see if we segfault.
|
|
32
|
-
|
|
33
|
-
## FiberScheduler implementation
|
|
34
|
-
|
|
35
|
-
Some resources:
|
|
36
|
-
|
|
37
|
-
- https://github.com/socketry/async/blob/main/context/getting-started.md
|
|
38
|
-
- https://github.com/socketry/async/blob/main/context/scheduler.md
|
|
39
|
-
- https://github.com/socketry/async/blob/main/lib/async/scheduler.rb#L28
|
|
40
|
-
|
|
41
3
|
## useful concurrency tools
|
|
42
4
|
|
|
43
5
|
- debounce
|
data/examples/bm_queue.rb
CHANGED
|
@@ -5,11 +5,12 @@ require 'bundler/inline'
|
|
|
5
5
|
gemfile do
|
|
6
6
|
source 'https://rubygems.org'
|
|
7
7
|
gem 'uringmachine', path: '..'
|
|
8
|
+
gem 'benchmark'
|
|
8
9
|
gem 'benchmark-ips'
|
|
9
10
|
end
|
|
10
11
|
|
|
11
12
|
require 'benchmark/ips'
|
|
12
|
-
|
|
13
|
+
require_relative '../lib/uringmachine'
|
|
13
14
|
|
|
14
15
|
COUNT = 1000
|
|
15
16
|
NUM_PRODUCERS = 8
|
data/examples/bm_write.rb
CHANGED
|
@@ -11,7 +11,7 @@ end
|
|
|
11
11
|
require 'benchmark'
|
|
12
12
|
require 'uringmachine'
|
|
13
13
|
|
|
14
|
-
ITERATIONS =
|
|
14
|
+
ITERATIONS = 10000
|
|
15
15
|
BUF = ('*' * 8192).freeze
|
|
16
16
|
FN = '/tmp/bm_write'
|
|
17
17
|
|
|
@@ -47,6 +47,9 @@ ensure
|
|
|
47
47
|
fio.close
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
+
run_io_write(1)
|
|
51
|
+
run_um_write(1)
|
|
52
|
+
|
|
50
53
|
Benchmark.bm do |x|
|
|
51
54
|
[1, 2, 4, 8].each do |c|
|
|
52
55
|
x.report("IO (#{c} threads)") { run_io_write(c) }
|
data/ext/um/extconf.rb
CHANGED
|
@@ -49,7 +49,7 @@ if !find_library('uring', nil, File.join(liburing_path, 'src'))
|
|
|
49
49
|
raise "Couldn't find liburing.a"
|
|
50
50
|
end
|
|
51
51
|
|
|
52
|
-
have_func("rb_process_status_new")
|
|
52
|
+
have_func("&rb_process_status_new")
|
|
53
53
|
|
|
54
54
|
$defs << "-DUM_KERNEL_VERSION=#{config[:kernel_version]}"
|
|
55
55
|
$defs << '-DHAVE_IO_URING_PREP_BIND' if config[:prep_bind]
|
data/ext/um/um.c
CHANGED
|
@@ -1,25 +1,34 @@
|
|
|
1
|
-
#include <float.h>
|
|
2
1
|
#include "um.h"
|
|
2
|
+
#include <float.h>
|
|
3
3
|
#include <ruby/thread.h>
|
|
4
|
+
#include <assert.h>
|
|
5
|
+
#include <poll.h>
|
|
6
|
+
|
|
7
|
+
#define DEFAULT_ENTRIES 4096
|
|
8
|
+
|
|
9
|
+
inline void prepare_io_uring_params(struct io_uring_params *params, uint sqpoll_timeout_msec) {
|
|
10
|
+
memset(params, 0, sizeof(struct io_uring_params));
|
|
11
|
+
params->flags = IORING_SETUP_SUBMIT_ALL;
|
|
12
|
+
if (sqpoll_timeout_msec) {
|
|
13
|
+
params->flags |= IORING_SETUP_SQPOLL;
|
|
14
|
+
params->sq_thread_idle = sqpoll_timeout_msec;
|
|
15
|
+
}
|
|
16
|
+
else
|
|
17
|
+
params->flags |= IORING_SETUP_COOP_TASKRUN;
|
|
18
|
+
}
|
|
4
19
|
|
|
5
|
-
void um_setup(VALUE self, struct um *machine) {
|
|
20
|
+
void um_setup(VALUE self, struct um *machine, uint entries, uint sqpoll_timeout_msec) {
|
|
6
21
|
memset(machine, 0, sizeof(struct um));
|
|
7
22
|
|
|
8
23
|
RB_OBJ_WRITE(self, &machine->self, self);
|
|
9
24
|
|
|
10
|
-
|
|
11
|
-
|
|
25
|
+
machine->entries = (entries > 0) ? entries : DEFAULT_ENTRIES;
|
|
26
|
+
machine->sqpoll_mode = !!sqpoll_timeout_msec;
|
|
12
27
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
// if ENOMEM is returned, try with half as much entries
|
|
18
|
-
if (unlikely(ret == -ENOMEM && prepared_limit > 64))
|
|
19
|
-
prepared_limit = prepared_limit / 2;
|
|
20
|
-
else
|
|
21
|
-
rb_syserr_fail(-ret, strerror(-ret));
|
|
22
|
-
}
|
|
28
|
+
struct io_uring_params params;
|
|
29
|
+
prepare_io_uring_params(¶ms, sqpoll_timeout_msec);
|
|
30
|
+
int ret = io_uring_queue_init_params(machine->entries, &machine->ring, ¶ms);
|
|
31
|
+
if (ret) rb_syserr_fail(-ret, strerror(-ret));
|
|
23
32
|
machine->ring_initialized = 1;
|
|
24
33
|
}
|
|
25
34
|
|
|
@@ -39,6 +48,11 @@ inline void um_teardown(struct um *machine) {
|
|
|
39
48
|
}
|
|
40
49
|
|
|
41
50
|
inline struct io_uring_sqe *um_get_sqe(struct um *machine, struct um_op *op) {
|
|
51
|
+
if (DEBUG) fprintf(stderr, "-> %p um_get_sqe: op->kind=%s unsubmitted=%d pending=%d total=%lu\n",
|
|
52
|
+
&machine->ring, op ? um_op_kind_name(op->kind) : "NULL", machine->unsubmitted_count,
|
|
53
|
+
machine->pending_count, machine->total_op_count
|
|
54
|
+
);
|
|
55
|
+
|
|
42
56
|
struct io_uring_sqe *sqe;
|
|
43
57
|
sqe = io_uring_get_sqe(&machine->ring);
|
|
44
58
|
if (likely(sqe)) goto done;
|
|
@@ -57,22 +71,78 @@ done:
|
|
|
57
71
|
sqe->user_data = (long long)op;
|
|
58
72
|
sqe->flags = 0;
|
|
59
73
|
machine->unsubmitted_count++;
|
|
60
|
-
if (op)
|
|
74
|
+
if (op) {
|
|
75
|
+
machine->pending_count++;
|
|
76
|
+
machine->total_op_count++;
|
|
77
|
+
}
|
|
61
78
|
return sqe;
|
|
62
79
|
}
|
|
63
80
|
|
|
81
|
+
struct um_submit_ctx {
|
|
82
|
+
struct um *machine;
|
|
83
|
+
int result;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// adapted from liburing/src/queue.c
|
|
87
|
+
static inline bool sq_ring_needs_enter(struct um *machine) {
|
|
88
|
+
if (machine->sqpoll_mode) {
|
|
89
|
+
io_uring_smp_mb();
|
|
90
|
+
if (unlikely(IO_URING_READ_ONCE(*machine->ring.sq.kflags) & IORING_SQ_NEED_WAKEUP))
|
|
91
|
+
return true;
|
|
92
|
+
}
|
|
93
|
+
return true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
void *um_submit_without_gvl(void *ptr) {
|
|
97
|
+
struct um_submit_ctx *ctx = ptr;
|
|
98
|
+
ctx->result = io_uring_submit(&ctx->machine->ring);
|
|
99
|
+
return NULL;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
inline uint um_submit(struct um *machine) {
|
|
103
|
+
if (DEBUG) fprintf(stderr, "-> %p um_submit: unsubmitted=%d pending=%d total=%lu\n",
|
|
104
|
+
&machine->ring, machine->unsubmitted_count, machine->pending_count, machine->total_op_count
|
|
105
|
+
);
|
|
106
|
+
if (!machine->unsubmitted_count) {
|
|
107
|
+
if (DEBUG) fprintf(stderr, "<- %p um_submit: no unsubmitted SQEs, early return\n",
|
|
108
|
+
&machine->ring
|
|
109
|
+
);
|
|
110
|
+
return 0;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
struct um_submit_ctx ctx = { .machine = machine };
|
|
114
|
+
if (sq_ring_needs_enter(machine))
|
|
115
|
+
rb_thread_call_without_gvl(um_submit_without_gvl, (void *)&ctx, RUBY_UBF_IO, 0);
|
|
116
|
+
else
|
|
117
|
+
ctx.result = io_uring_submit(&machine->ring);
|
|
118
|
+
|
|
119
|
+
if (DEBUG) fprintf(stderr, "<- %p um_submit: result=%d\n",
|
|
120
|
+
&machine->ring, ctx.result
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
if (ctx.result < 0)
|
|
124
|
+
rb_syserr_fail(-ctx.result, strerror(-ctx.result));
|
|
125
|
+
|
|
126
|
+
machine->unsubmitted_count = 0;
|
|
127
|
+
return ctx.result;
|
|
128
|
+
}
|
|
129
|
+
|
|
64
130
|
static inline void um_process_cqe(struct um *machine, struct io_uring_cqe *cqe) {
|
|
65
131
|
struct um_op *op = (struct um_op *)cqe->user_data;
|
|
132
|
+
if (DEBUG) {
|
|
133
|
+
if (op) fprintf(stderr, "<- %p um_process_cqe: op %p kind %s flags %d cqe_res %d cqe_flags %d pending %d\n",
|
|
134
|
+
&machine->ring, op, um_op_kind_name(op->kind), op->flags, cqe->res, cqe->flags, machine->pending_count
|
|
135
|
+
);
|
|
136
|
+
else fprintf(stderr, "<- %p um_process_cqe: op NULL cqe_res %d cqe_flags %d pending %d\n",
|
|
137
|
+
&machine->ring, cqe->res, cqe->flags, machine->pending_count
|
|
138
|
+
);
|
|
139
|
+
}
|
|
66
140
|
if (unlikely(!op)) return;
|
|
67
141
|
|
|
142
|
+
|
|
68
143
|
if (!(cqe->flags & IORING_CQE_F_MORE))
|
|
69
144
|
machine->pending_count--;
|
|
70
145
|
|
|
71
|
-
// printf(
|
|
72
|
-
// ":process_cqe op %p kind %d flags %d cqe_res %d cqe_flags %d pending %d\n",
|
|
73
|
-
// op, op->kind, op->flags, cqe->res, cqe->flags, machine->pending_count
|
|
74
|
-
// );
|
|
75
|
-
|
|
76
146
|
if (op->flags & OP_F_FREE_ON_COMPLETE) {
|
|
77
147
|
if (op->flags & OP_F_TRANSIENT)
|
|
78
148
|
um_op_transient_remove(machine, op);
|
|
@@ -81,9 +151,10 @@ static inline void um_process_cqe(struct um *machine, struct io_uring_cqe *cqe)
|
|
|
81
151
|
return;
|
|
82
152
|
}
|
|
83
153
|
|
|
154
|
+
op->flags |= OP_F_COMPLETED;
|
|
84
155
|
if (unlikely((cqe->res == -ECANCELED) && (op->flags & OP_F_IGNORE_CANCELED))) return;
|
|
156
|
+
if (unlikely(op->flags & OP_F_CANCELED)) return;
|
|
85
157
|
|
|
86
|
-
op->flags |= OP_F_COMPLETED;
|
|
87
158
|
if (op->flags & OP_F_TRANSIENT)
|
|
88
159
|
um_op_transient_remove(machine, op);
|
|
89
160
|
|
|
@@ -108,6 +179,10 @@ static inline int cq_ring_needs_flush(struct io_uring *ring) {
|
|
|
108
179
|
}
|
|
109
180
|
|
|
110
181
|
static inline int um_process_ready_cqes(struct um *machine) {
|
|
182
|
+
if (DEBUG) fprintf(stderr, "-> %p um_process_ready_cqes: unsubmitted=%d pending=%d total=%lu\n",
|
|
183
|
+
&machine->ring, machine->unsubmitted_count, machine->pending_count, machine->total_op_count
|
|
184
|
+
);
|
|
185
|
+
|
|
111
186
|
unsigned total_count = 0;
|
|
112
187
|
iterate:
|
|
113
188
|
bool overflow_checked = false;
|
|
@@ -124,12 +199,21 @@ iterate:
|
|
|
124
199
|
if (overflow_checked) goto done;
|
|
125
200
|
|
|
126
201
|
if (cq_ring_needs_flush(&machine->ring)) {
|
|
127
|
-
|
|
202
|
+
if (DEBUG) fprintf(stderr, "-> %p io_uring_enter\n", &machine->ring);
|
|
203
|
+
int ret = io_uring_enter(machine->ring.ring_fd, 0, 0, IORING_ENTER_GETEVENTS, NULL);
|
|
204
|
+
if (DEBUG) fprintf(stderr, "<- %p io_uring_enter: result=%d\n", &machine->ring, ret);
|
|
205
|
+
if (ret < 0)
|
|
206
|
+
rb_syserr_fail(-ret, strerror(-ret));
|
|
207
|
+
|
|
128
208
|
overflow_checked = true;
|
|
129
209
|
goto iterate;
|
|
130
210
|
}
|
|
131
211
|
|
|
132
212
|
done:
|
|
213
|
+
if (DEBUG) fprintf(stderr, "<- %p um_process_ready_cqes: total_processed=%u\n",
|
|
214
|
+
&machine->ring, total_count
|
|
215
|
+
);
|
|
216
|
+
|
|
133
217
|
return total_count;
|
|
134
218
|
}
|
|
135
219
|
|
|
@@ -143,18 +227,33 @@ struct wait_for_cqe_ctx {
|
|
|
143
227
|
void *um_wait_for_cqe_without_gvl(void *ptr) {
|
|
144
228
|
struct wait_for_cqe_ctx *ctx = ptr;
|
|
145
229
|
if (ctx->machine->unsubmitted_count) {
|
|
146
|
-
|
|
230
|
+
if (DEBUG) fprintf(stderr, "-> %p io_uring_submit_and_wait_timeout: unsubmitted=%d pending=%d total=%lu\n",
|
|
231
|
+
&ctx->machine->ring, ctx->machine->unsubmitted_count, ctx->machine->pending_count,
|
|
232
|
+
ctx->machine->total_op_count
|
|
233
|
+
);
|
|
147
234
|
|
|
148
235
|
// Attn: The io_uring_submit_and_wait_timeout will not return -EINTR if
|
|
149
236
|
// interrupted with a signal. We can detect this by testing ctx->cqe for
|
|
150
237
|
// NULL.
|
|
151
238
|
//
|
|
152
239
|
// https://github.com/axboe/liburing/issues/1280
|
|
153
|
-
int
|
|
154
|
-
ctx->
|
|
240
|
+
int ret = io_uring_submit_and_wait_timeout(&ctx->machine->ring, &ctx->cqe, ctx->wait_nr, NULL, NULL);
|
|
241
|
+
ctx->machine->unsubmitted_count = 0;
|
|
242
|
+
if (DEBUG) fprintf(stderr, "<- %p io_uring_submit_and_wait_timeout: result=%d\n",
|
|
243
|
+
&ctx->machine->ring, ret
|
|
244
|
+
);
|
|
245
|
+
ctx->result = (ret > 0 && !ctx->cqe) ? -EINTR : ret;
|
|
155
246
|
}
|
|
156
|
-
else
|
|
247
|
+
else {
|
|
248
|
+
if (DEBUG) fprintf(stderr, "-> %p io_uring_wait_cqes: unsubmitted=%d pending=%d total=%lu\n",
|
|
249
|
+
&ctx->machine->ring, ctx->machine->unsubmitted_count, ctx->machine->pending_count,
|
|
250
|
+
ctx->machine->total_op_count
|
|
251
|
+
);
|
|
157
252
|
ctx->result = io_uring_wait_cqes(&ctx->machine->ring, &ctx->cqe, ctx->wait_nr, NULL, NULL);
|
|
253
|
+
if (DEBUG) fprintf(stderr, "<- %p io_uring_wait_cqes: result=%d\n",
|
|
254
|
+
&ctx->machine->ring, ctx->result
|
|
255
|
+
);
|
|
256
|
+
}
|
|
158
257
|
return NULL;
|
|
159
258
|
}
|
|
160
259
|
|
|
@@ -201,9 +300,14 @@ inline VALUE process_runqueue_op(struct um *machine, struct um_op *op) {
|
|
|
201
300
|
}
|
|
202
301
|
|
|
203
302
|
inline VALUE um_fiber_switch(struct um *machine) {
|
|
303
|
+
if (DEBUG) fprintf(stderr, "-> %p um_fiber_switch: unsubmitted=%d pending=%d total=%lu\n",
|
|
304
|
+
&machine->ring, machine->unsubmitted_count, machine->pending_count, machine->total_op_count
|
|
305
|
+
);
|
|
204
306
|
while (true) {
|
|
205
307
|
struct um_op *op = um_runqueue_shift(machine);
|
|
206
308
|
if (op) {
|
|
309
|
+
if (unlikely(op->flags & OP_F_RUNQUEUE_SKIP)) continue;
|
|
310
|
+
|
|
207
311
|
// in case of a snooze, we need to prevent a situation where completions
|
|
208
312
|
// are not processed because the runqueue is never empty. Theoretically,
|
|
209
313
|
// we can still have a situation where multiple fibers are all doing a
|
|
@@ -231,16 +335,15 @@ inline VALUE um_fiber_switch(struct um *machine) {
|
|
|
231
335
|
}
|
|
232
336
|
}
|
|
233
337
|
|
|
234
|
-
void
|
|
338
|
+
void um_cancel_op(struct um *machine, struct um_op *op) {
|
|
235
339
|
struct io_uring_sqe *sqe = um_get_sqe(machine, NULL);
|
|
236
340
|
io_uring_prep_cancel64(sqe, (long long)op, 0);
|
|
237
341
|
}
|
|
238
342
|
|
|
239
343
|
inline void um_cancel_and_wait(struct um *machine, struct um_op *op) {
|
|
240
|
-
|
|
241
|
-
while (
|
|
344
|
+
um_cancel_op(machine, op);
|
|
345
|
+
while (!um_op_completed_p(op)) {
|
|
242
346
|
um_fiber_switch(machine);
|
|
243
|
-
if (um_op_completed_p(op)) break;
|
|
244
347
|
}
|
|
245
348
|
}
|
|
246
349
|
|
|
@@ -261,7 +364,14 @@ inline VALUE um_await(struct um *machine) {
|
|
|
261
364
|
return ret;
|
|
262
365
|
}
|
|
263
366
|
|
|
264
|
-
|
|
367
|
+
VALUE um_wakeup(struct um *machine) {
|
|
368
|
+
struct io_uring_sqe *sqe = um_get_sqe(machine, NULL);
|
|
369
|
+
io_uring_prep_nop(sqe);
|
|
370
|
+
io_uring_submit(&machine->ring);
|
|
371
|
+
return Qnil;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
inline void um_prep_op(struct um *machine, struct um_op *op, enum um_op_kind kind, unsigned flags) {
|
|
265
375
|
memset(op, 0, sizeof(struct um_op));
|
|
266
376
|
op->kind = kind;
|
|
267
377
|
op->flags = flags;
|
|
@@ -299,7 +409,7 @@ VALUE um_timeout_complete(VALUE arg) {
|
|
|
299
409
|
struct op_ctx *ctx = (struct op_ctx *)arg;
|
|
300
410
|
|
|
301
411
|
if (!um_op_completed_p(ctx->op)) {
|
|
302
|
-
|
|
412
|
+
um_cancel_op(ctx->machine, ctx->op);
|
|
303
413
|
ctx->op->flags |= OP_F_TRANSIENT | OP_F_IGNORE_CANCELED;
|
|
304
414
|
um_op_transient_add(ctx->machine, ctx->op);
|
|
305
415
|
}
|
|
@@ -353,12 +463,12 @@ VALUE um_sleep(struct um *machine, double duration) {
|
|
|
353
463
|
return ret;
|
|
354
464
|
}
|
|
355
465
|
|
|
356
|
-
VALUE um_read(struct um *machine, int fd, VALUE buffer, size_t maxlen, ssize_t buffer_offset) {
|
|
466
|
+
VALUE um_read(struct um *machine, int fd, VALUE buffer, size_t maxlen, ssize_t buffer_offset, __u64 file_offset) {
|
|
357
467
|
struct um_op op;
|
|
358
468
|
um_prep_op(machine, &op, OP_READ, 0);
|
|
359
469
|
struct io_uring_sqe *sqe = um_get_sqe(machine, &op);
|
|
360
470
|
void *ptr = um_prepare_read_buffer(buffer, maxlen, buffer_offset);
|
|
361
|
-
io_uring_prep_read(sqe, fd, ptr, maxlen,
|
|
471
|
+
io_uring_prep_read(sqe, fd, ptr, maxlen, file_offset);
|
|
362
472
|
|
|
363
473
|
VALUE ret = um_fiber_switch(machine);
|
|
364
474
|
if (um_check_completion(machine, &op)) {
|
|
@@ -389,17 +499,18 @@ size_t um_read_raw(struct um *machine, int fd, char *buffer, size_t maxlen) {
|
|
|
389
499
|
return 0;
|
|
390
500
|
}
|
|
391
501
|
|
|
392
|
-
VALUE um_write(struct um *machine, int fd, VALUE buffer, size_t len) {
|
|
393
|
-
struct um_op op;
|
|
394
|
-
um_prep_op(machine, &op, OP_WRITE, 0);
|
|
395
|
-
struct io_uring_sqe *sqe = um_get_sqe(machine, &op);
|
|
396
|
-
|
|
502
|
+
VALUE um_write(struct um *machine, int fd, VALUE buffer, size_t len, __u64 file_offset) {
|
|
397
503
|
const void *base;
|
|
398
504
|
size_t size;
|
|
399
505
|
um_get_buffer_bytes_for_writing(buffer, &base, &size);
|
|
400
506
|
if ((len == (size_t)-1) || (len > size)) len = size;
|
|
507
|
+
if (unlikely(!len)) return INT2NUM(0);
|
|
508
|
+
|
|
509
|
+
struct um_op op;
|
|
510
|
+
um_prep_op(machine, &op, OP_WRITE, 0);
|
|
511
|
+
struct io_uring_sqe *sqe = um_get_sqe(machine, &op);
|
|
401
512
|
|
|
402
|
-
io_uring_prep_write(sqe, fd, base, len,
|
|
513
|
+
io_uring_prep_write(sqe, fd, base, len, file_offset);
|
|
403
514
|
|
|
404
515
|
VALUE ret = um_fiber_switch(machine);
|
|
405
516
|
if (um_check_completion(machine, &op))
|
|
@@ -410,19 +521,22 @@ VALUE um_write(struct um *machine, int fd, VALUE buffer, size_t len) {
|
|
|
410
521
|
return ret;
|
|
411
522
|
}
|
|
412
523
|
|
|
413
|
-
VALUE um_write_async(struct um *machine, int fd, VALUE buffer) {
|
|
524
|
+
VALUE um_write_async(struct um *machine, int fd, VALUE buffer, size_t len, __u64 file_offset) {
|
|
525
|
+
const void *base;
|
|
526
|
+
size_t size;
|
|
527
|
+
um_get_buffer_bytes_for_writing(buffer, &base, &size);
|
|
528
|
+
if ((len == (size_t)-1) || (len > size)) len = size;
|
|
529
|
+
if (unlikely(!len)) return INT2NUM(0);
|
|
530
|
+
|
|
414
531
|
struct um_op *op = um_op_alloc(machine);
|
|
415
532
|
um_prep_op(machine, op, OP_WRITE_ASYNC, OP_F_TRANSIENT | OP_F_FREE_ON_COMPLETE);
|
|
416
533
|
RB_OBJ_WRITE(machine->self, &op->fiber, Qnil);
|
|
417
534
|
RB_OBJ_WRITE(machine->self, &op->value, buffer);
|
|
418
535
|
RB_OBJ_WRITE(machine->self, &op->async_op, Qnil);
|
|
419
536
|
|
|
420
|
-
const void *base;
|
|
421
|
-
size_t size;
|
|
422
|
-
um_get_buffer_bytes_for_writing(buffer, &base, &size);
|
|
423
537
|
|
|
424
538
|
struct io_uring_sqe *sqe = um_get_sqe(machine, op);
|
|
425
|
-
io_uring_prep_write(sqe, fd, base,
|
|
539
|
+
io_uring_prep_write(sqe, fd, base, len, file_offset);
|
|
426
540
|
um_op_transient_add(machine, op);
|
|
427
541
|
|
|
428
542
|
return buffer;
|
|
@@ -686,6 +800,114 @@ VALUE um_poll(struct um *machine, int fd, unsigned mask) {
|
|
|
686
800
|
return ret;
|
|
687
801
|
}
|
|
688
802
|
|
|
803
|
+
static inline void prepare_select_poll_ops(struct um *machine, uint *idx, struct um_op *ops, VALUE fds, uint len, uint flags, uint event) {
|
|
804
|
+
for (uint i = 0; i < len; i++) {
|
|
805
|
+
struct um_op *op = ops + ((*idx)++);
|
|
806
|
+
um_prep_op(machine, op, OP_POLL, flags | OP_F_IGNORE_CANCELED);
|
|
807
|
+
struct io_uring_sqe *sqe = um_get_sqe(machine, op);
|
|
808
|
+
VALUE fd = rb_ary_entry(fds, i);
|
|
809
|
+
RB_OBJ_WRITE(machine->self, &op->value, fd);
|
|
810
|
+
io_uring_prep_poll_add(sqe, NUM2INT(fd), event);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
VALUE um_select_single(struct um *machine, VALUE rfds, VALUE wfds, VALUE efds, uint rfds_len, uint wfds_len, uint efds_len) {
|
|
815
|
+
struct um_op op;
|
|
816
|
+
uint idx = 0;
|
|
817
|
+
if (rfds_len)
|
|
818
|
+
prepare_select_poll_ops(machine, &idx, &op, rfds, rfds_len, OP_F_SELECT_POLLIN, POLLIN);
|
|
819
|
+
else if (wfds_len)
|
|
820
|
+
prepare_select_poll_ops(machine, &idx, &op, wfds, wfds_len, OP_F_SELECT_POLLOUT, POLLOUT);
|
|
821
|
+
else if (efds_len)
|
|
822
|
+
prepare_select_poll_ops(machine, &idx, &op, efds, efds_len, OP_F_SELECT_POLLPRI, POLLPRI);
|
|
823
|
+
assert(idx == 1);
|
|
824
|
+
|
|
825
|
+
VALUE ret = um_fiber_switch(machine);
|
|
826
|
+
um_check_completion(machine, &op);
|
|
827
|
+
RAISE_IF_EXCEPTION(ret);
|
|
828
|
+
|
|
829
|
+
if (op.flags & OP_F_SELECT_POLLIN)
|
|
830
|
+
return rb_ary_new3(3, rb_ary_new3(1, ret), rb_ary_new(), rb_ary_new());
|
|
831
|
+
else if (op.flags & OP_F_SELECT_POLLOUT)
|
|
832
|
+
return rb_ary_new3(3, rb_ary_new(), rb_ary_new3(1, ret), rb_ary_new());
|
|
833
|
+
else
|
|
834
|
+
return rb_ary_new3(3, rb_ary_new(), rb_ary_new(), rb_ary_new3(1, ret));
|
|
835
|
+
|
|
836
|
+
RB_GC_GUARD(ret);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
VALUE um_select(struct um *machine, VALUE rfds, VALUE wfds, VALUE efds) {
|
|
840
|
+
uint rfds_len = RARRAY_LEN(rfds);
|
|
841
|
+
uint wfds_len = RARRAY_LEN(wfds);
|
|
842
|
+
uint efds_len = RARRAY_LEN(efds);
|
|
843
|
+
uint total_len = rfds_len + wfds_len + efds_len;
|
|
844
|
+
if (total_len == 1)
|
|
845
|
+
return um_select_single(machine, rfds, wfds, efds, rfds_len, wfds_len, efds_len);
|
|
846
|
+
|
|
847
|
+
if (unlikely(!total_len))
|
|
848
|
+
return rb_ary_new3(3, rb_ary_new(), rb_ary_new(), rb_ary_new());
|
|
849
|
+
|
|
850
|
+
struct um_op *ops = malloc(sizeof(struct um_op) * total_len);
|
|
851
|
+
uint idx = 0;
|
|
852
|
+
prepare_select_poll_ops(machine, &idx, ops, rfds, rfds_len, OP_F_SELECT_POLLIN, POLLIN);
|
|
853
|
+
prepare_select_poll_ops(machine, &idx, ops, wfds, wfds_len, OP_F_SELECT_POLLOUT, POLLOUT);
|
|
854
|
+
prepare_select_poll_ops(machine, &idx, ops, efds, efds_len, OP_F_SELECT_POLLPRI, POLLPRI);
|
|
855
|
+
assert(idx == total_len);
|
|
856
|
+
|
|
857
|
+
VALUE ret = um_fiber_switch(machine);
|
|
858
|
+
if (unlikely(um_value_is_exception_p(ret))) {
|
|
859
|
+
free(ops);
|
|
860
|
+
um_raise_exception(ret);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
VALUE rfds_out = rb_ary_new();
|
|
864
|
+
VALUE wfds_out = rb_ary_new();
|
|
865
|
+
VALUE efds_out = rb_ary_new();
|
|
866
|
+
|
|
867
|
+
int error_code = 0;
|
|
868
|
+
uint pending = total_len;
|
|
869
|
+
for (uint i = 0; i < total_len; i++) {
|
|
870
|
+
if (um_op_completed_p(&ops[i])) {
|
|
871
|
+
ops[i].flags |= OP_F_RUNQUEUE_SKIP;
|
|
872
|
+
pending--;
|
|
873
|
+
|
|
874
|
+
if (unlikely((ops[i].result.res < 0) && !error_code)) {
|
|
875
|
+
error_code = ops[i].result.res;
|
|
876
|
+
}
|
|
877
|
+
else {
|
|
878
|
+
if (ops[i].flags & OP_F_SELECT_POLLIN) rb_ary_push(rfds_out, ops[i].value);
|
|
879
|
+
if (ops[i].flags & OP_F_SELECT_POLLOUT) rb_ary_push(wfds_out, ops[i].value);
|
|
880
|
+
if (ops[i].flags & OP_F_SELECT_POLLPRI) rb_ary_push(efds_out, ops[i].value);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
else {
|
|
884
|
+
ops[i].flags |= OP_F_CANCELED;
|
|
885
|
+
um_cancel_op(machine, &ops[i]);
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
while (pending) {
|
|
890
|
+
um_wait_for_and_process_ready_cqes(machine, 0);
|
|
891
|
+
|
|
892
|
+
for (uint i = 0; i < total_len; i++) {
|
|
893
|
+
struct um_op *op = ops + i;
|
|
894
|
+
if (op->flags & OP_F_CANCELED && um_op_completed_p(op)) {
|
|
895
|
+
pending--;
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
free(ops);
|
|
900
|
+
|
|
901
|
+
if (error_code)
|
|
902
|
+
um_raise_on_error_result(error_code);
|
|
903
|
+
|
|
904
|
+
return rb_ary_new3(3, rfds_out, wfds_out, efds_out);
|
|
905
|
+
|
|
906
|
+
RB_GC_GUARD(rfds_out);
|
|
907
|
+
RB_GC_GUARD(wfds_out);
|
|
908
|
+
RB_GC_GUARD(efds_out);
|
|
909
|
+
}
|
|
910
|
+
|
|
689
911
|
VALUE um_waitid(struct um *machine, int idtype, int id, int options) {
|
|
690
912
|
struct um_op op;
|
|
691
913
|
um_prep_op(machine, &op, OP_WAITID, 0);
|
|
@@ -706,8 +928,8 @@ VALUE um_waitid(struct um *machine, int idtype, int id, int options) {
|
|
|
706
928
|
);
|
|
707
929
|
}
|
|
708
930
|
|
|
709
|
-
VALUE um_waitid_status(struct um *machine, int idtype, int id, int options) {
|
|
710
931
|
#ifdef HAVE_RB_PROCESS_STATUS_NEW
|
|
932
|
+
VALUE um_waitid_status(struct um *machine, int idtype, int id, int options) {
|
|
711
933
|
struct um_op op;
|
|
712
934
|
um_prep_op(machine, &op, OP_WAITID, 0);
|
|
713
935
|
struct io_uring_sqe *sqe = um_get_sqe(machine, &op);
|
|
@@ -723,10 +945,8 @@ VALUE um_waitid_status(struct um *machine, int idtype, int id, int options) {
|
|
|
723
945
|
RB_GC_GUARD(ret);
|
|
724
946
|
|
|
725
947
|
return rb_process_status_new(infop.si_pid, (infop.si_status & 0xff) << 8, 0);
|
|
726
|
-
#else
|
|
727
|
-
rb_raise(rb_eNotImpError, "Missing rb_process_status_new");
|
|
728
|
-
#endif
|
|
729
948
|
}
|
|
949
|
+
#endif
|
|
730
950
|
|
|
731
951
|
#define hash_set(h, sym, v) rb_hash_aset(h, ID2SYM(rb_intern(sym)), v)
|
|
732
952
|
|