uringmachine 0.28.3 → 0.29.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/TODO.md +29 -35
- data/benchmark/common.rb +6 -6
- data/benchmark/gets.rb +49 -0
- data/benchmark/gets_concurrent.rb +122 -0
- data/benchmark/{read_each.rb → output.rb} +27 -24
- data/docs/design/buffer_pool.md +35 -0
- data/docs/um_api.md +2 -0
- data/ext/um/extconf.rb +6 -5
- data/ext/um/um.c +50 -16
- data/ext/um/um.h +102 -32
- data/ext/um/um_buffer_pool.c +248 -0
- data/ext/um/um_class.c +28 -16
- data/ext/um/um_op.c +29 -13
- data/ext/um/um_ssl.c +24 -27
- data/ext/um/um_stream.c +382 -150
- data/ext/um/um_stream_class.c +119 -63
- data/ext/um/um_utils.c +6 -6
- data/grant-2025/tasks.md +13 -7
- data/lib/uringmachine/fiber_scheduler.rb +36 -10
- data/lib/uringmachine/version.rb +1 -1
- data/lib/uringmachine.rb +60 -19
- data/test/helper.rb +4 -0
- data/test/test_fiber.rb +93 -12
- data/test/test_fiber_scheduler.rb +8 -50
- data/test/test_stream.rb +466 -124
- data/test/test_um.rb +133 -49
- metadata +6 -4
- data/ext/um/um_buffer.c +0 -49
data/ext/um/um.h
CHANGED
|
@@ -77,16 +77,24 @@ enum um_op_kind {
|
|
|
77
77
|
OP_TIMEOUT_MULTISHOT,
|
|
78
78
|
};
|
|
79
79
|
|
|
80
|
+
enum um_stream_mode {
|
|
81
|
+
STREAM_BP_READ,
|
|
82
|
+
STREAM_BP_RECV,
|
|
83
|
+
STREAM_SSL,
|
|
84
|
+
STREAM_STRING,
|
|
85
|
+
STREAM_IO_BUFFER
|
|
86
|
+
};
|
|
80
87
|
|
|
81
88
|
#define OP_F_CQE_SEEN (1U << 0) // CQE has been seen
|
|
82
89
|
#define OP_F_CQE_DONE (1U << 1) // CQE has been seen and operation is done
|
|
83
90
|
#define OP_F_SCHEDULED (1U << 2) // op is on runqueue
|
|
84
91
|
#define OP_F_CANCELED (1U << 3) // op is cancelled (disregard CQE results)
|
|
85
92
|
#define OP_F_MULTISHOT (1U << 4) // op is multishot
|
|
86
|
-
#define OP_F_ASYNC (1U << 5) // op is async (
|
|
87
|
-
#define OP_F_TRANSIENT (1U << 6) // op is on transient list (for GC
|
|
93
|
+
#define OP_F_ASYNC (1U << 5) // op is async (don't schedule fiber)
|
|
94
|
+
#define OP_F_TRANSIENT (1U << 6) // op is on transient list (for GC marking)
|
|
88
95
|
#define OP_F_FREE_IOVECS (1U << 7) // op->iovecs should be freed on release
|
|
89
96
|
#define OP_F_SKIP (1U << 8) // op should be skipped when pulled from runqueue
|
|
97
|
+
#define OP_F_BUFFER_POOL (1U << 9) // multishot op using buffer pool
|
|
90
98
|
|
|
91
99
|
#define OP_F_SELECT_POLLIN (1U << 13) // select POLLIN
|
|
92
100
|
#define OP_F_SELECT_POLLOUT (1U << 14) // select POLLOUT
|
|
@@ -101,9 +109,34 @@ enum um_op_kind {
|
|
|
101
109
|
#define OP_TRANSIENT_P(op) ((op)->flags & OP_F_TRANSIENT)
|
|
102
110
|
#define OP_SKIP_P(op) ((op)->flags & OP_F_SKIP)
|
|
103
111
|
|
|
112
|
+
#define BP_BGID 0xF00B
|
|
113
|
+
#define BP_BR_ENTRIES 1024
|
|
114
|
+
#define BP_INITIAL_BUFFER_SIZE (1U << 14) // 16KB
|
|
115
|
+
#define BP_INITIAL_COMMIT_LEVEL (BP_INITIAL_BUFFER_SIZE * 16) // 256KB
|
|
116
|
+
#define BP_MAX_BUFFER_SIZE (1U << 20) // 1MB
|
|
117
|
+
#define BP_MAX_COMMIT_LEVEL (BP_MAX_BUFFER_SIZE * BP_BR_ENTRIES) // 1GB
|
|
118
|
+
#define BP_AVAIL_BID_BITMAP_WORDS (BP_BR_ENTRIES / 64)
|
|
119
|
+
|
|
120
|
+
struct um_buffer {
|
|
121
|
+
size_t len;
|
|
122
|
+
size_t pos;
|
|
123
|
+
uint ref_count;
|
|
124
|
+
struct um_buffer *next;
|
|
125
|
+
char buf[];
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
struct um_segment {
|
|
129
|
+
char *ptr;
|
|
130
|
+
size_t len;
|
|
131
|
+
|
|
132
|
+
struct um_buffer *buffer;
|
|
133
|
+
struct um_segment *next;
|
|
134
|
+
};
|
|
135
|
+
|
|
104
136
|
struct um_op_result {
|
|
105
137
|
__s32 res;
|
|
106
138
|
__u32 flags;
|
|
139
|
+
struct um_segment *segment;
|
|
107
140
|
struct um_op_result *next;
|
|
108
141
|
};
|
|
109
142
|
|
|
@@ -128,15 +161,10 @@ struct um_op {
|
|
|
128
161
|
struct iovec *iovecs; // used for vectorized write/send
|
|
129
162
|
siginfo_t siginfo; // used for waitid
|
|
130
163
|
int int_value; // used for getsockopt
|
|
164
|
+
size_t bp_commit_level; // buffer pool commit threshold
|
|
131
165
|
};
|
|
132
166
|
};
|
|
133
167
|
|
|
134
|
-
struct um_buffer {
|
|
135
|
-
struct um_buffer *next;
|
|
136
|
-
void *ptr;
|
|
137
|
-
long len;
|
|
138
|
-
};
|
|
139
|
-
|
|
140
168
|
struct buf_ring_descriptor {
|
|
141
169
|
struct io_uring_buf_ring *br;
|
|
142
170
|
size_t br_size;
|
|
@@ -147,19 +175,25 @@ struct buf_ring_descriptor {
|
|
|
147
175
|
};
|
|
148
176
|
|
|
149
177
|
struct um_metrics {
|
|
150
|
-
ulong total_ops;
|
|
151
|
-
ulong total_switches;
|
|
152
|
-
ulong total_waits;
|
|
153
|
-
|
|
154
|
-
uint ops_pending;
|
|
155
|
-
uint ops_unsubmitted;
|
|
156
|
-
uint ops_runqueue;
|
|
157
|
-
uint ops_free;
|
|
158
|
-
uint ops_transient;
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
178
|
+
ulong total_ops; // total ops submitted
|
|
179
|
+
ulong total_switches; // total fiber switches
|
|
180
|
+
ulong total_waits; // total number of CQE waits
|
|
181
|
+
|
|
182
|
+
uint ops_pending; // number of pending ops
|
|
183
|
+
uint ops_unsubmitted; // number of unsubmitted
|
|
184
|
+
uint ops_runqueue; // number of ops in runqueue
|
|
185
|
+
uint ops_free; // number of ops in freelist
|
|
186
|
+
uint ops_transient; // number of ops in transient list
|
|
187
|
+
|
|
188
|
+
uint buffers_allocated; // number of allocated buffers
|
|
189
|
+
uint buffers_free; // number of available buffers
|
|
190
|
+
uint segments_free; // free segments
|
|
191
|
+
size_t buffer_space_allocated; // total allocated buffer space
|
|
192
|
+
size_t buffer_space_commited; // commited buffer space
|
|
193
|
+
|
|
194
|
+
double time_total_wait; // total CPU time waiting for CQEs
|
|
195
|
+
double time_last_cpu; // last seen time stamp
|
|
196
|
+
double time_first_cpu; // last seen time stamp
|
|
163
197
|
};
|
|
164
198
|
|
|
165
199
|
#define BUFFER_RING_MAX_COUNT 10
|
|
@@ -167,8 +201,6 @@ struct um_metrics {
|
|
|
167
201
|
struct um {
|
|
168
202
|
VALUE self;
|
|
169
203
|
|
|
170
|
-
struct um_buffer *buffer_freelist;
|
|
171
|
-
|
|
172
204
|
struct io_uring ring;
|
|
173
205
|
|
|
174
206
|
uint ring_initialized; // is the ring initialized successfully
|
|
@@ -196,6 +228,17 @@ struct um {
|
|
|
196
228
|
|
|
197
229
|
struct um_op *op_freelist;
|
|
198
230
|
struct um_op_result *result_freelist;
|
|
231
|
+
struct um_segment *segment_freelist;
|
|
232
|
+
|
|
233
|
+
struct io_uring_buf_ring *bp_br;
|
|
234
|
+
size_t bp_buffer_size;
|
|
235
|
+
size_t bp_commit_level;
|
|
236
|
+
struct um_buffer **bp_commited_buffers;
|
|
237
|
+
uint64_t bp_avail_bid_bitmap[BP_AVAIL_BID_BITMAP_WORDS];
|
|
238
|
+
|
|
239
|
+
struct um_buffer *bp_buffer_freelist;
|
|
240
|
+
uint bp_buffer_count;
|
|
241
|
+
size_t bp_total_commited;
|
|
199
242
|
};
|
|
200
243
|
|
|
201
244
|
struct um_mutex {
|
|
@@ -227,11 +270,21 @@ struct um_async_op {
|
|
|
227
270
|
};
|
|
228
271
|
|
|
229
272
|
struct um_stream {
|
|
273
|
+
VALUE self;
|
|
230
274
|
struct um *machine;
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
275
|
+
|
|
276
|
+
enum um_stream_mode mode;
|
|
277
|
+
union {
|
|
278
|
+
int fd;
|
|
279
|
+
VALUE target;
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
struct um_buffer *working_buffer;
|
|
283
|
+
struct um_op *op;
|
|
284
|
+
struct um_segment *head;
|
|
285
|
+
struct um_segment *tail;
|
|
286
|
+
size_t pending_len;
|
|
287
|
+
size_t pos;
|
|
235
288
|
int eof;
|
|
236
289
|
};
|
|
237
290
|
|
|
@@ -271,12 +324,15 @@ void um_op_list_compact(struct um *machine, struct um_op *head);
|
|
|
271
324
|
void um_op_multishot_results_push(struct um *machine, struct um_op *op, __s32 res, __u32 flags);
|
|
272
325
|
void um_op_multishot_results_clear(struct um *machine, struct um_op *op);
|
|
273
326
|
|
|
327
|
+
struct um_segment *um_segment_alloc(struct um *machine);
|
|
328
|
+
void um_segment_free(struct um *machine, struct um_segment *segment);
|
|
329
|
+
|
|
274
330
|
void um_runqueue_push(struct um *machine, struct um_op *op);
|
|
275
331
|
struct um_op *um_runqueue_shift(struct um *machine);
|
|
276
332
|
|
|
277
333
|
struct um_buffer *um_buffer_checkout(struct um *machine, int len);
|
|
278
|
-
void
|
|
279
|
-
void
|
|
334
|
+
void bp_buffer_checkin(struct um *machine, struct um_buffer *buffer);
|
|
335
|
+
void bp_discard_buffer_freelist(struct um *machine);
|
|
280
336
|
|
|
281
337
|
struct __kernel_timespec um_double_to_timespec(double value);
|
|
282
338
|
double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec);
|
|
@@ -375,9 +431,11 @@ VALUE um_queue_pop(struct um *machine, struct um_queue *queue);
|
|
|
375
431
|
VALUE um_queue_unshift(struct um *machine, struct um_queue *queue, VALUE value);
|
|
376
432
|
VALUE um_queue_shift(struct um *machine, struct um_queue *queue);
|
|
377
433
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
VALUE
|
|
434
|
+
void stream_teardown(struct um_stream *stream);
|
|
435
|
+
void stream_clear(struct um_stream *stream);
|
|
436
|
+
VALUE stream_get_line(struct um_stream *stream, VALUE buf, size_t maxlen);
|
|
437
|
+
VALUE stream_get_string(struct um_stream *stream, VALUE out_buffer, ssize_t len, size_t inc, int safe_inc);
|
|
438
|
+
void stream_skip(struct um_stream *stream, size_t inc, int safe_inc);
|
|
381
439
|
VALUE resp_decode(struct um_stream *stream, VALUE out_buffer);
|
|
382
440
|
void resp_encode(struct um_write_buffer *buf, VALUE obj);
|
|
383
441
|
void resp_encode_cmd(struct um_write_buffer *buf, int argc, VALUE *argv);
|
|
@@ -396,6 +454,18 @@ void um_sidecar_signal_wake(struct um *machine);
|
|
|
396
454
|
|
|
397
455
|
void um_ssl_set_bio(struct um *machine, VALUE ssl_obj);
|
|
398
456
|
int um_ssl_read(struct um *machine, VALUE ssl, VALUE buf, int maxlen);
|
|
457
|
+
int um_ssl_read_raw(struct um *machine, VALUE ssl_obj, char *ptr, int maxlen);
|
|
399
458
|
int um_ssl_write(struct um *machine, VALUE ssl, VALUE buf, int len);
|
|
400
459
|
|
|
460
|
+
void bp_setup(struct um *machine);
|
|
461
|
+
void bp_teardown(struct um *machine);
|
|
462
|
+
|
|
463
|
+
struct um_segment *bp_get_op_result_segment(struct um *machine, struct um_op *op, __s32 res, __u32 flags);
|
|
464
|
+
void um_segment_checkin(struct um *machine, struct um_segment *segment);
|
|
465
|
+
void bp_handle_enobufs(struct um *machine);
|
|
466
|
+
void bp_ensure_commit_level(struct um *machine);
|
|
467
|
+
struct um_buffer *bp_buffer_checkout(struct um *machine);
|
|
468
|
+
void bp_buffer_checkin(struct um *machine, struct um_buffer *buffer);
|
|
469
|
+
struct um_segment *bp_buffer_consume(struct um *machine, struct um_buffer *buffer, size_t len);
|
|
470
|
+
|
|
401
471
|
#endif // UM_H
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
#include <string.h>
|
|
2
|
+
#include "um.h"
|
|
3
|
+
|
|
4
|
+
#define UM_SEGMENT_ALLOC_BATCH_SIZE 256
|
|
5
|
+
|
|
6
|
+
inline struct um_buffer *buffer_alloc(struct um *machine) {
|
|
7
|
+
struct um_buffer *buffer = malloc(sizeof(struct um_buffer) + machine->bp_buffer_size);
|
|
8
|
+
if (unlikely(!buffer)) {
|
|
9
|
+
fprintf(stderr, "!ENOMEM!\n");
|
|
10
|
+
exit(1);
|
|
11
|
+
rb_syserr_fail(errno, strerror(errno));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
machine->metrics.buffers_allocated += 1;
|
|
15
|
+
machine->metrics.buffer_space_allocated += machine->bp_buffer_size;
|
|
16
|
+
buffer->len = machine->bp_buffer_size;
|
|
17
|
+
buffer->ref_count = 0;
|
|
18
|
+
return buffer;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
inline struct um_buffer *bp_buffer_checkout(struct um *machine) {
|
|
22
|
+
struct um_buffer *buffer = machine->bp_buffer_freelist;
|
|
23
|
+
if (buffer) {
|
|
24
|
+
struct um_buffer *next = buffer->next;
|
|
25
|
+
machine->bp_buffer_freelist = next;
|
|
26
|
+
machine->metrics.buffers_free--;
|
|
27
|
+
}
|
|
28
|
+
else
|
|
29
|
+
buffer = buffer_alloc(machine);
|
|
30
|
+
|
|
31
|
+
buffer->ref_count++;
|
|
32
|
+
buffer->pos = 0;
|
|
33
|
+
buffer->next = NULL;
|
|
34
|
+
return buffer;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
inline void buffer_free(struct um *machine, struct um_buffer *buffer) {
|
|
38
|
+
if (unlikely(!machine->ring_initialized || buffer->len != machine->bp_buffer_size)) {
|
|
39
|
+
// The machine is being shut down or working buffer size has changed, so the
|
|
40
|
+
// buffer can be freed.
|
|
41
|
+
machine->metrics.buffers_allocated -= 1;
|
|
42
|
+
machine->metrics.buffer_space_allocated -= buffer->len;
|
|
43
|
+
free(buffer);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
// otherwise, keep it on the freelist
|
|
47
|
+
buffer->next = machine->bp_buffer_freelist;
|
|
48
|
+
machine->bp_buffer_freelist = buffer;
|
|
49
|
+
machine->metrics.buffers_free++;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
inline void bp_buffer_checkin(struct um *machine, struct um_buffer *buffer) {
|
|
54
|
+
assert(buffer->ref_count > 0);
|
|
55
|
+
buffer->ref_count--;
|
|
56
|
+
if (!buffer->ref_count) buffer_free(machine, buffer);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
inline void bp_discard_buffer_freelist(struct um *machine) {
|
|
60
|
+
while (machine->bp_buffer_freelist) {
|
|
61
|
+
struct um_buffer *buffer = machine->bp_buffer_freelist;
|
|
62
|
+
struct um_buffer *next = buffer->next;
|
|
63
|
+
|
|
64
|
+
machine->metrics.buffers_allocated -= 1;
|
|
65
|
+
machine->metrics.buffer_space_allocated -= buffer->len;
|
|
66
|
+
|
|
67
|
+
free(buffer);
|
|
68
|
+
machine->bp_buffer_freelist = next;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
inline void bp_setup(struct um *machine) {
|
|
73
|
+
int ret;
|
|
74
|
+
machine->bp_br = io_uring_setup_buf_ring(&machine->ring, BP_BR_ENTRIES, BP_BGID, IOU_PBUF_RING_INC, &ret);
|
|
75
|
+
if (unlikely(!machine->bp_br)) rb_syserr_fail(ret, strerror(ret));
|
|
76
|
+
|
|
77
|
+
machine->bp_buffer_size = BP_INITIAL_BUFFER_SIZE;
|
|
78
|
+
machine->bp_commit_level = BP_INITIAL_COMMIT_LEVEL;
|
|
79
|
+
machine->bp_commited_buffers = malloc(sizeof(struct um_buffer) * BP_BR_ENTRIES);
|
|
80
|
+
memset(machine->bp_commited_buffers, 0, sizeof(struct um_buffer) * BP_BR_ENTRIES);
|
|
81
|
+
memset(machine->bp_avail_bid_bitmap, 0xFF, sizeof(machine->bp_avail_bid_bitmap));
|
|
82
|
+
|
|
83
|
+
machine->bp_buffer_freelist = NULL;
|
|
84
|
+
machine->bp_buffer_count = 0;
|
|
85
|
+
machine->bp_total_commited = 0;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
inline void bp_teardown(struct um *machine) {
|
|
89
|
+
bp_discard_buffer_freelist(machine);
|
|
90
|
+
for (int i = 0; i < BP_BR_ENTRIES; i++) {
|
|
91
|
+
struct um_buffer *buffer = machine->bp_commited_buffers[i];
|
|
92
|
+
if (buffer) bp_buffer_checkin(machine, buffer);
|
|
93
|
+
}
|
|
94
|
+
free(machine->bp_commited_buffers);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
inline int bitmap_find_first_set_bit(uint64_t *bitmap, size_t word_count) {
|
|
98
|
+
uint word = 0;
|
|
99
|
+
for (; word < word_count && !bitmap[word]; word++)
|
|
100
|
+
if (word == word_count) return -1;
|
|
101
|
+
|
|
102
|
+
int bit = ffsll(bitmap[word]);
|
|
103
|
+
return (word * 64 + bit - 1);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
inline void bitmap_set(uint64_t *bitmap, int idx) {
|
|
107
|
+
uint word = idx / 64;
|
|
108
|
+
uint bit = idx % 64;
|
|
109
|
+
bitmap[word] |= (1U << bit);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
inline void bitmap_unset(uint64_t *bitmap, int idx) {
|
|
113
|
+
uint word = idx / 64;
|
|
114
|
+
uint bit = idx % 64;
|
|
115
|
+
bitmap[word] &= ~(1U << bit);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
inline int get_available_bid(struct um *machine) {
|
|
119
|
+
return bitmap_find_first_set_bit(machine->bp_avail_bid_bitmap, BP_AVAIL_BID_BITMAP_WORDS);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Finds an available bid, checks out a buffer and commits it for kernel usage.
|
|
123
|
+
// Returns true if successful.
|
|
124
|
+
static inline int commit_buffer(struct um *machine, int added) {
|
|
125
|
+
static int buf_mask;
|
|
126
|
+
if (!buf_mask) buf_mask = io_uring_buf_ring_mask(BP_BR_ENTRIES);
|
|
127
|
+
|
|
128
|
+
int bid = get_available_bid(machine);
|
|
129
|
+
if (bid < 0) return false;
|
|
130
|
+
|
|
131
|
+
bitmap_unset(machine->bp_avail_bid_bitmap, bid);
|
|
132
|
+
struct um_buffer *buffer = bp_buffer_checkout(machine);
|
|
133
|
+
assert(buffer->ref_count == 0);
|
|
134
|
+
buffer->ref_count = 1;
|
|
135
|
+
|
|
136
|
+
machine->bp_buffer_count++;
|
|
137
|
+
machine->bp_commited_buffers[bid] = buffer;
|
|
138
|
+
machine->bp_total_commited += buffer->len;
|
|
139
|
+
machine->metrics.buffer_space_commited += buffer->len;
|
|
140
|
+
|
|
141
|
+
io_uring_buf_ring_add(machine->bp_br, buffer->buf, buffer->len, bid, buf_mask, added);
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Removes buffer from bid slot, the buffer having already been entirely
|
|
146
|
+
// consumed by the kernel.
|
|
147
|
+
inline void uncommit_buffer(struct um *machine, struct um_op *op, struct um_buffer *buffer, int bid) {
|
|
148
|
+
machine->bp_buffer_count--;
|
|
149
|
+
machine->bp_commited_buffers[bid] = NULL;
|
|
150
|
+
bitmap_set(machine->bp_avail_bid_bitmap, bid);
|
|
151
|
+
bp_buffer_checkin(machine, buffer);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
inline struct um_buffer *get_buffer(struct um *machine, int bid) {
|
|
155
|
+
struct um_buffer *buffer = machine->bp_commited_buffers[bid];
|
|
156
|
+
assert(buffer);
|
|
157
|
+
return buffer;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
inline int should_commit_more_p(struct um *machine) {
|
|
161
|
+
return (machine->bp_buffer_count < BP_BR_ENTRIES) &&
|
|
162
|
+
(machine->bp_total_commited < machine->bp_commit_level);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
inline void bp_ensure_commit_level(struct um *machine) {
|
|
166
|
+
if (machine->bp_total_commited > (machine->bp_commit_level / 2))
|
|
167
|
+
return;
|
|
168
|
+
|
|
169
|
+
int added = 0;
|
|
170
|
+
while (should_commit_more_p(machine)) {
|
|
171
|
+
if (likely(commit_buffer(machine, added))) added++;
|
|
172
|
+
}
|
|
173
|
+
if (added) io_uring_buf_ring_advance(machine->bp_br, added);
|
|
174
|
+
|
|
175
|
+
// if we get to this point, there's nothing more we can do because we used up
|
|
176
|
+
// all buffer ring entries. We need to wait for the kernel to consume buffers
|
|
177
|
+
// in order to put in more. When a ENOBUFS error is received,
|
|
178
|
+
// bp_handle_enobufs is called and among other things increases the buffer
|
|
179
|
+
// size.
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
inline void bp_handle_enobufs(struct um *machine) {
|
|
183
|
+
if (unlikely(machine->bp_commit_level >= BP_MAX_COMMIT_LEVEL))
|
|
184
|
+
rb_raise(eUMError, "Buffer starvation");
|
|
185
|
+
|
|
186
|
+
machine->bp_commit_level *= 2;
|
|
187
|
+
while (machine->bp_buffer_size < machine->bp_commit_level / 4)
|
|
188
|
+
machine->bp_buffer_size *= 2;
|
|
189
|
+
bp_discard_buffer_freelist(machine);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
inline struct um_segment *um_segment_alloc(struct um *machine) {
|
|
193
|
+
if (machine->segment_freelist) {
|
|
194
|
+
struct um_segment *segment = machine->segment_freelist;
|
|
195
|
+
machine->segment_freelist = segment->next;
|
|
196
|
+
machine->metrics.segments_free--;
|
|
197
|
+
segment->next = NULL;
|
|
198
|
+
return segment;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
struct um_segment *batch = malloc(sizeof(struct um_segment) * UM_SEGMENT_ALLOC_BATCH_SIZE);
|
|
202
|
+
memset(batch, 0, sizeof(struct um_segment) * UM_SEGMENT_ALLOC_BATCH_SIZE);
|
|
203
|
+
for (int i = 1; i < (UM_SEGMENT_ALLOC_BATCH_SIZE - 1); i++) {
|
|
204
|
+
batch[i].next = &batch[i + 1];
|
|
205
|
+
}
|
|
206
|
+
machine->segment_freelist = batch + 1;
|
|
207
|
+
machine->metrics.segments_free += (UM_SEGMENT_ALLOC_BATCH_SIZE - 1);
|
|
208
|
+
return batch;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
inline void um_segment_free(struct um *machine, struct um_segment *segment) {
|
|
212
|
+
segment->next = machine->segment_freelist;
|
|
213
|
+
machine->segment_freelist = segment;
|
|
214
|
+
machine->metrics.segments_free++;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
inline struct um_segment *bp_buffer_consume(struct um *machine, struct um_buffer *buffer, size_t len) {
|
|
218
|
+
struct um_segment *segment = um_segment_alloc(machine);
|
|
219
|
+
segment->ptr = buffer->buf + buffer->pos;
|
|
220
|
+
segment->len = len;
|
|
221
|
+
segment->buffer = buffer;
|
|
222
|
+
buffer->pos += len;
|
|
223
|
+
buffer->ref_count++;
|
|
224
|
+
return segment;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
struct um_segment *bp_get_op_result_segment(struct um *machine, struct um_op *op, __s32 res, __u32 flags) {
|
|
228
|
+
assert(res >= 0);
|
|
229
|
+
if (!res) return NULL;
|
|
230
|
+
|
|
231
|
+
uint bid = flags >> IORING_CQE_BUFFER_SHIFT;
|
|
232
|
+
struct um_buffer *buffer = get_buffer(machine, bid);
|
|
233
|
+
|
|
234
|
+
struct um_segment *segment = bp_buffer_consume(machine, buffer, res);
|
|
235
|
+
machine->bp_total_commited -= res;
|
|
236
|
+
machine->metrics.buffer_space_commited -= res;
|
|
237
|
+
bp_ensure_commit_level(machine);
|
|
238
|
+
|
|
239
|
+
if (!(flags & IORING_CQE_F_BUF_MORE))
|
|
240
|
+
uncommit_buffer(machine, op, buffer, bid);
|
|
241
|
+
|
|
242
|
+
return segment;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
inline void um_segment_checkin(struct um *machine, struct um_segment *segment) {
|
|
246
|
+
bp_buffer_checkin(machine, segment->buffer);
|
|
247
|
+
um_segment_free(machine, segment);
|
|
248
|
+
}
|
data/ext/um/um_class.c
CHANGED
|
@@ -22,6 +22,12 @@ VALUE SYM_ops_free;
|
|
|
22
22
|
VALUE SYM_ops_transient;
|
|
23
23
|
VALUE SYM_time_total_cpu;
|
|
24
24
|
VALUE SYM_time_total_wait;
|
|
25
|
+
VALUE SYM_buffer_groups;
|
|
26
|
+
VALUE SYM_buffers_allocated;
|
|
27
|
+
VALUE SYM_buffers_free;
|
|
28
|
+
VALUE SYM_segments_free;
|
|
29
|
+
VALUE SYM_buffer_space_allocated;
|
|
30
|
+
VALUE SYM_buffer_space_commited;
|
|
25
31
|
|
|
26
32
|
VALUE SYM_wd;
|
|
27
33
|
VALUE SYM_mask;
|
|
@@ -328,17 +334,17 @@ VALUE UM_schedule(VALUE self, VALUE fiber, VALUE value) {
|
|
|
328
334
|
}
|
|
329
335
|
|
|
330
336
|
/* Runs the given block, interrupting its execution if its runtime exceeds the
|
|
331
|
-
* given timeout interval (in seconds).
|
|
337
|
+
* given timeout interval (in seconds), raising the specified exception.
|
|
332
338
|
*
|
|
333
339
|
* - https://www.man7.org/linux/man-pages//man3/io_uring_prep_timeoute.3.html
|
|
334
340
|
*
|
|
335
341
|
* @param interval [Number] timeout interval in seconds
|
|
336
|
-
* @param
|
|
342
|
+
* @param exception [any] timeout exception class or instance
|
|
337
343
|
* @return [any] block's return value
|
|
338
344
|
*/
|
|
339
|
-
VALUE UM_timeout(VALUE self, VALUE interval, VALUE
|
|
345
|
+
VALUE UM_timeout(VALUE self, VALUE interval, VALUE exception) {
|
|
340
346
|
struct um *machine = um_get_machine(self);
|
|
341
|
-
return um_timeout(machine, interval,
|
|
347
|
+
return um_timeout(machine, interval, exception);
|
|
342
348
|
}
|
|
343
349
|
|
|
344
350
|
/* Puts the current fiber to sleep for the given time duration (in seconds),
|
|
@@ -374,7 +380,7 @@ VALUE UM_periodically(VALUE self, VALUE interval) {
|
|
|
374
380
|
* which the data will be read. A negative `buffer_offset` denotes a position
|
|
375
381
|
* relative to the end of the buffer, e.g. a value of `-1` means the data will
|
|
376
382
|
* be appended to the buffer.
|
|
377
|
-
*
|
|
383
|
+
*
|
|
378
384
|
* - https://www.man7.org/linux/man-pages/man2/read.2.html
|
|
379
385
|
* - https://www.man7.org/linux/man-pages/man3/io_uring_prep_read.3.html
|
|
380
386
|
*
|
|
@@ -1521,17 +1527,23 @@ void Init_UM(void) {
|
|
|
1521
1527
|
|
|
1522
1528
|
um_define_net_constants(cUM);
|
|
1523
1529
|
|
|
1524
|
-
SYM_size
|
|
1525
|
-
SYM_total_ops
|
|
1526
|
-
SYM_total_switches
|
|
1527
|
-
SYM_total_waits
|
|
1528
|
-
SYM_ops_pending
|
|
1529
|
-
SYM_ops_unsubmitted
|
|
1530
|
-
SYM_ops_runqueue
|
|
1531
|
-
SYM_ops_free
|
|
1532
|
-
SYM_ops_transient
|
|
1533
|
-
SYM_time_total_cpu
|
|
1534
|
-
SYM_time_total_wait
|
|
1530
|
+
SYM_size = ID2SYM(rb_intern("size"));
|
|
1531
|
+
SYM_total_ops = ID2SYM(rb_intern("total_ops"));
|
|
1532
|
+
SYM_total_switches = ID2SYM(rb_intern("total_switches"));
|
|
1533
|
+
SYM_total_waits = ID2SYM(rb_intern("total_waits"));
|
|
1534
|
+
SYM_ops_pending = ID2SYM(rb_intern("ops_pending"));
|
|
1535
|
+
SYM_ops_unsubmitted = ID2SYM(rb_intern("ops_unsubmitted"));
|
|
1536
|
+
SYM_ops_runqueue = ID2SYM(rb_intern("ops_runqueue"));
|
|
1537
|
+
SYM_ops_free = ID2SYM(rb_intern("ops_free"));
|
|
1538
|
+
SYM_ops_transient = ID2SYM(rb_intern("ops_transient"));
|
|
1539
|
+
SYM_time_total_cpu = ID2SYM(rb_intern("time_total_cpu"));
|
|
1540
|
+
SYM_time_total_wait = ID2SYM(rb_intern("time_total_wait"));
|
|
1541
|
+
SYM_buffer_groups = ID2SYM(rb_intern("buffer_groups"));
|
|
1542
|
+
SYM_buffers_allocated = ID2SYM(rb_intern("buffers_allocated"));
|
|
1543
|
+
SYM_buffers_free = ID2SYM(rb_intern("buffers_free"));
|
|
1544
|
+
SYM_segments_free = ID2SYM(rb_intern("segments_free"));
|
|
1545
|
+
SYM_buffer_space_allocated = ID2SYM(rb_intern("buffer_space_allocated"));
|
|
1546
|
+
SYM_buffer_space_commited = ID2SYM(rb_intern("buffer_space_commited"));
|
|
1535
1547
|
|
|
1536
1548
|
SYM_wd = ID2SYM(rb_intern("wd"));
|
|
1537
1549
|
SYM_mask = ID2SYM(rb_intern("mask"));
|
data/ext/um/um_op.c
CHANGED
|
@@ -114,42 +114,56 @@ inline void um_op_list_compact(struct um *machine, struct um_op *head) {
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
#define UM_OP_BATCH_ALLOC_SIZE (sizeof(struct um_op_result) * UM_OP_RESULT_ALLOC_BATCH_SIZE)
|
|
118
|
+
|
|
117
119
|
inline struct um_op_result *multishot_result_alloc(struct um *machine) {
|
|
118
120
|
if (machine->result_freelist) {
|
|
119
121
|
struct um_op_result *result = machine->result_freelist;
|
|
120
122
|
machine->result_freelist = result->next;
|
|
123
|
+
result->segment = NULL;
|
|
124
|
+
result->next = NULL;
|
|
121
125
|
return result;
|
|
122
126
|
}
|
|
123
127
|
|
|
124
|
-
struct um_op_result *batch = malloc(
|
|
128
|
+
struct um_op_result *batch = malloc(UM_OP_BATCH_ALLOC_SIZE);
|
|
129
|
+
memset(batch, 0, UM_OP_BATCH_ALLOC_SIZE);
|
|
125
130
|
for (int i = 1; i < (UM_OP_RESULT_ALLOC_BATCH_SIZE - 1); i++) {
|
|
126
|
-
batch[i].next =
|
|
131
|
+
batch[i].next = batch + i + 1;
|
|
127
132
|
}
|
|
128
133
|
machine->result_freelist = batch + 1;
|
|
129
134
|
return batch;
|
|
130
135
|
}
|
|
131
136
|
|
|
132
137
|
inline void multishot_result_free(struct um *machine, struct um_op_result *result) {
|
|
138
|
+
if (result->segment) {
|
|
139
|
+
um_segment_free(machine, result->segment);
|
|
140
|
+
result->segment = NULL;
|
|
141
|
+
}
|
|
142
|
+
|
|
133
143
|
result->next = machine->result_freelist;
|
|
134
144
|
machine->result_freelist = result;
|
|
135
145
|
}
|
|
136
146
|
|
|
137
147
|
inline void um_op_multishot_results_push(struct um *machine, struct um_op *op, __s32 res, __u32 flags) {
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
op->result
|
|
142
|
-
op->multishot_result_tail = &op->result;
|
|
143
|
-
}
|
|
148
|
+
struct um_op_result *result;
|
|
149
|
+
|
|
150
|
+
if (!op->multishot_result_count)
|
|
151
|
+
result = &op->result;
|
|
144
152
|
else {
|
|
145
|
-
|
|
146
|
-
result->res = res;
|
|
147
|
-
result->flags = flags;
|
|
148
|
-
result->next = NULL;
|
|
153
|
+
result = multishot_result_alloc(machine);
|
|
149
154
|
op->multishot_result_tail->next = result;
|
|
150
|
-
op->multishot_result_tail = result;
|
|
151
155
|
}
|
|
156
|
+
|
|
157
|
+
result->res = res;
|
|
158
|
+
result->flags = flags;
|
|
159
|
+
result->next = NULL;
|
|
160
|
+
if (op->flags & OP_F_BUFFER_POOL && res >= 0) {
|
|
161
|
+
result->segment = bp_get_op_result_segment(machine, op, res, flags);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
op->multishot_result_tail = result;
|
|
152
165
|
op->multishot_result_count++;
|
|
166
|
+
|
|
153
167
|
}
|
|
154
168
|
|
|
155
169
|
inline void um_op_multishot_results_clear(struct um *machine, struct um_op *op) {
|
|
@@ -161,6 +175,7 @@ inline void um_op_multishot_results_clear(struct um *machine, struct um_op *op)
|
|
|
161
175
|
multishot_result_free(machine, result);
|
|
162
176
|
result = next;
|
|
163
177
|
}
|
|
178
|
+
op->result.next = NULL;
|
|
164
179
|
op->multishot_result_tail = NULL;
|
|
165
180
|
op->multishot_result_count = 0;
|
|
166
181
|
}
|
|
@@ -177,6 +192,7 @@ inline struct um_op *um_op_alloc(struct um *machine) {
|
|
|
177
192
|
for (int i = 1; i < (UM_OP_ALLOC_BATCH_SIZE - 1); i++) {
|
|
178
193
|
batch[i].next = &batch[i + 1];
|
|
179
194
|
}
|
|
195
|
+
batch[UM_OP_ALLOC_BATCH_SIZE - 1].next = NULL;
|
|
180
196
|
machine->op_freelist = batch + 1;
|
|
181
197
|
machine->metrics.ops_free += (UM_OP_ALLOC_BATCH_SIZE - 1);
|
|
182
198
|
return batch;
|