uringmachine 0.28.3 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/TODO.md +29 -35
- data/benchmark/common.rb +6 -6
- data/benchmark/gets.rb +49 -0
- data/benchmark/{read_each.rb → output.rb} +27 -24
- data/docs/design/buffer_pool.md +35 -0
- data/docs/um_api.md +2 -0
- data/ext/um/extconf.rb +6 -5
- data/ext/um/um.c +42 -13
- data/ext/um/um.h +103 -31
- data/ext/um/um_buffer_pool.c +246 -0
- data/ext/um/um_class.c +24 -12
- data/ext/um/um_op.c +29 -13
- data/ext/um/um_ssl.c +24 -27
- data/ext/um/um_stream.c +380 -150
- data/ext/um/um_stream_class.c +119 -63
- data/ext/um/um_utils.c +6 -6
- data/grant-2025/tasks.md +12 -7
- data/lib/uringmachine/fiber_scheduler.rb +36 -10
- data/lib/uringmachine/version.rb +1 -1
- data/lib/uringmachine.rb +60 -19
- data/test/helper.rb +2 -0
- data/test/test_fiber.rb +93 -12
- data/test/test_fiber_scheduler.rb +5 -47
- data/test/test_stream.rb +447 -125
- data/test/test_um.rb +119 -49
- metadata +5 -4
- data/ext/um/um_buffer.c +0 -49
data/ext/um/um.h
CHANGED
|
@@ -77,16 +77,24 @@ enum um_op_kind {
|
|
|
77
77
|
OP_TIMEOUT_MULTISHOT,
|
|
78
78
|
};
|
|
79
79
|
|
|
80
|
+
enum um_stream_mode {
|
|
81
|
+
STREAM_BP_READ,
|
|
82
|
+
STREAM_BP_RECV,
|
|
83
|
+
STREAM_SSL,
|
|
84
|
+
STREAM_STRING,
|
|
85
|
+
STREAM_IO_BUFFER
|
|
86
|
+
};
|
|
80
87
|
|
|
81
88
|
#define OP_F_CQE_SEEN (1U << 0) // CQE has been seen
|
|
82
89
|
#define OP_F_CQE_DONE (1U << 1) // CQE has been seen and operation is done
|
|
83
90
|
#define OP_F_SCHEDULED (1U << 2) // op is on runqueue
|
|
84
91
|
#define OP_F_CANCELED (1U << 3) // op is cancelled (disregard CQE results)
|
|
85
92
|
#define OP_F_MULTISHOT (1U << 4) // op is multishot
|
|
86
|
-
#define OP_F_ASYNC (1U << 5) // op is async (
|
|
87
|
-
#define OP_F_TRANSIENT (1U << 6) // op is on transient list (for GC
|
|
93
|
+
#define OP_F_ASYNC (1U << 5) // op is async (don't schedule fiber)
|
|
94
|
+
#define OP_F_TRANSIENT (1U << 6) // op is on transient list (for GC marking)
|
|
88
95
|
#define OP_F_FREE_IOVECS (1U << 7) // op->iovecs should be freed on release
|
|
89
96
|
#define OP_F_SKIP (1U << 8) // op should be skipped when pulled from runqueue
|
|
97
|
+
#define OP_F_BUFFER_POOL (1U << 9) // multishot op using buffer pool
|
|
90
98
|
|
|
91
99
|
#define OP_F_SELECT_POLLIN (1U << 13) // select POLLIN
|
|
92
100
|
#define OP_F_SELECT_POLLOUT (1U << 14) // select POLLOUT
|
|
@@ -101,9 +109,36 @@ enum um_op_kind {
|
|
|
101
109
|
#define OP_TRANSIENT_P(op) ((op)->flags & OP_F_TRANSIENT)
|
|
102
110
|
#define OP_SKIP_P(op) ((op)->flags & OP_F_SKIP)
|
|
103
111
|
|
|
112
|
+
#define BP_BGID 0xF00B
|
|
113
|
+
#define BP_BR_ENTRIES 1024
|
|
114
|
+
|
|
115
|
+
#define BP_INITIAL_BUFFER_SIZE (1U << 14) // 16KB
|
|
116
|
+
#define BP_INITIAL_COMMIT_THRESHOLD (1U << 16) // 64KB
|
|
117
|
+
#define BP_MAX_BUFFER_SIZE (1U << 20) // 1MB
|
|
118
|
+
|
|
119
|
+
#define BP_MAX_COMMIT_THRESHOLD (BP_MAX_BUFFER_SIZE * BP_BR_ENTRIES) // 1GB
|
|
120
|
+
#define BP_AVAIL_BID_BITMAP_WORDS (BP_BR_ENTRIES / 64)
|
|
121
|
+
|
|
122
|
+
struct um_buffer {
|
|
123
|
+
size_t len;
|
|
124
|
+
size_t pos;
|
|
125
|
+
uint ref_count;
|
|
126
|
+
struct um_buffer *next;
|
|
127
|
+
char buf[];
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
struct um_segment {
|
|
131
|
+
char *ptr;
|
|
132
|
+
size_t len;
|
|
133
|
+
|
|
134
|
+
struct um_buffer *buffer;
|
|
135
|
+
struct um_segment *next;
|
|
136
|
+
};
|
|
137
|
+
|
|
104
138
|
struct um_op_result {
|
|
105
139
|
__s32 res;
|
|
106
140
|
__u32 flags;
|
|
141
|
+
struct um_segment *segment;
|
|
107
142
|
struct um_op_result *next;
|
|
108
143
|
};
|
|
109
144
|
|
|
@@ -128,15 +163,10 @@ struct um_op {
|
|
|
128
163
|
struct iovec *iovecs; // used for vectorized write/send
|
|
129
164
|
siginfo_t siginfo; // used for waitid
|
|
130
165
|
int int_value; // used for getsockopt
|
|
166
|
+
size_t bp_commit_threshold; // buffer pool commit threshold
|
|
131
167
|
};
|
|
132
168
|
};
|
|
133
169
|
|
|
134
|
-
struct um_buffer {
|
|
135
|
-
struct um_buffer *next;
|
|
136
|
-
void *ptr;
|
|
137
|
-
long len;
|
|
138
|
-
};
|
|
139
|
-
|
|
140
170
|
struct buf_ring_descriptor {
|
|
141
171
|
struct io_uring_buf_ring *br;
|
|
142
172
|
size_t br_size;
|
|
@@ -147,19 +177,25 @@ struct buf_ring_descriptor {
|
|
|
147
177
|
};
|
|
148
178
|
|
|
149
179
|
struct um_metrics {
|
|
150
|
-
ulong total_ops;
|
|
151
|
-
ulong total_switches;
|
|
152
|
-
ulong total_waits;
|
|
153
|
-
|
|
154
|
-
uint ops_pending;
|
|
155
|
-
uint ops_unsubmitted;
|
|
156
|
-
uint ops_runqueue;
|
|
157
|
-
uint ops_free;
|
|
158
|
-
uint ops_transient;
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
180
|
+
ulong total_ops; // total ops submitted
|
|
181
|
+
ulong total_switches; // total fiber switches
|
|
182
|
+
ulong total_waits; // total number of CQE waits
|
|
183
|
+
|
|
184
|
+
uint ops_pending; // number of pending ops
|
|
185
|
+
uint ops_unsubmitted; // number of unsubmitted
|
|
186
|
+
uint ops_runqueue; // number of ops in runqueue
|
|
187
|
+
uint ops_free; // number of ops in freelist
|
|
188
|
+
uint ops_transient; // number of ops in transient list
|
|
189
|
+
|
|
190
|
+
uint buffers_allocated; // number of allocated buffers
|
|
191
|
+
uint buffers_free; // number of available buffers
|
|
192
|
+
uint segments_free; // free segments
|
|
193
|
+
size_t buffer_space_allocated; // total allocated buffer space
|
|
194
|
+
size_t buffer_space_commited; // commited buffer space
|
|
195
|
+
|
|
196
|
+
double time_total_wait; // total CPU time waiting for CQEs
|
|
197
|
+
double time_last_cpu; // last seen time stamp
|
|
198
|
+
double time_first_cpu; // last seen time stamp
|
|
163
199
|
};
|
|
164
200
|
|
|
165
201
|
#define BUFFER_RING_MAX_COUNT 10
|
|
@@ -167,8 +203,6 @@ struct um_metrics {
|
|
|
167
203
|
struct um {
|
|
168
204
|
VALUE self;
|
|
169
205
|
|
|
170
|
-
struct um_buffer *buffer_freelist;
|
|
171
|
-
|
|
172
206
|
struct io_uring ring;
|
|
173
207
|
|
|
174
208
|
uint ring_initialized; // is the ring initialized successfully
|
|
@@ -196,6 +230,17 @@ struct um {
|
|
|
196
230
|
|
|
197
231
|
struct um_op *op_freelist;
|
|
198
232
|
struct um_op_result *result_freelist;
|
|
233
|
+
struct um_segment *segment_freelist;
|
|
234
|
+
|
|
235
|
+
struct io_uring_buf_ring *bp_br;
|
|
236
|
+
size_t bp_buffer_size;
|
|
237
|
+
size_t bp_commit_threshold;
|
|
238
|
+
struct um_buffer **bp_commited_buffers;
|
|
239
|
+
uint64_t bp_avail_bid_bitmap[BP_AVAIL_BID_BITMAP_WORDS];
|
|
240
|
+
|
|
241
|
+
struct um_buffer *bp_buffer_freelist;
|
|
242
|
+
uint bp_buffer_count;
|
|
243
|
+
size_t bp_total_commited;
|
|
199
244
|
};
|
|
200
245
|
|
|
201
246
|
struct um_mutex {
|
|
@@ -227,11 +272,21 @@ struct um_async_op {
|
|
|
227
272
|
};
|
|
228
273
|
|
|
229
274
|
struct um_stream {
|
|
275
|
+
VALUE self;
|
|
230
276
|
struct um *machine;
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
277
|
+
|
|
278
|
+
enum um_stream_mode mode;
|
|
279
|
+
union {
|
|
280
|
+
int fd;
|
|
281
|
+
VALUE target;
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
struct um_buffer *working_buffer;
|
|
285
|
+
struct um_op *op;
|
|
286
|
+
struct um_segment *head;
|
|
287
|
+
struct um_segment *tail;
|
|
288
|
+
size_t pending_len;
|
|
289
|
+
size_t pos;
|
|
235
290
|
int eof;
|
|
236
291
|
};
|
|
237
292
|
|
|
@@ -271,12 +326,15 @@ void um_op_list_compact(struct um *machine, struct um_op *head);
|
|
|
271
326
|
void um_op_multishot_results_push(struct um *machine, struct um_op *op, __s32 res, __u32 flags);
|
|
272
327
|
void um_op_multishot_results_clear(struct um *machine, struct um_op *op);
|
|
273
328
|
|
|
329
|
+
struct um_segment *um_segment_alloc(struct um *machine);
|
|
330
|
+
void um_segment_free(struct um *machine, struct um_segment *segment);
|
|
331
|
+
|
|
274
332
|
void um_runqueue_push(struct um *machine, struct um_op *op);
|
|
275
333
|
struct um_op *um_runqueue_shift(struct um *machine);
|
|
276
334
|
|
|
277
335
|
struct um_buffer *um_buffer_checkout(struct um *machine, int len);
|
|
278
|
-
void
|
|
279
|
-
void
|
|
336
|
+
void bp_buffer_checkin(struct um *machine, struct um_buffer *buffer);
|
|
337
|
+
void bp_discard_buffer_freelist(struct um *machine);
|
|
280
338
|
|
|
281
339
|
struct __kernel_timespec um_double_to_timespec(double value);
|
|
282
340
|
double um_timestamp_to_double(__s64 tv_sec, __u32 tv_nsec);
|
|
@@ -375,8 +433,10 @@ VALUE um_queue_pop(struct um *machine, struct um_queue *queue);
|
|
|
375
433
|
VALUE um_queue_unshift(struct um *machine, struct um_queue *queue, VALUE value);
|
|
376
434
|
VALUE um_queue_shift(struct um *machine, struct um_queue *queue);
|
|
377
435
|
|
|
378
|
-
|
|
379
|
-
|
|
436
|
+
void stream_teardown(struct um_stream *stream);
|
|
437
|
+
void stream_clear(struct um_stream *stream);
|
|
438
|
+
VALUE stream_get_line(struct um_stream *stream, VALUE buf, size_t maxlen);
|
|
439
|
+
VALUE stream_get_string(struct um_stream *stream, VALUE out_buffer, ssize_t len, size_t inc, int safe_inc);
|
|
380
440
|
VALUE stream_skip(struct um_stream *stream, size_t len);
|
|
381
441
|
VALUE resp_decode(struct um_stream *stream, VALUE out_buffer);
|
|
382
442
|
void resp_encode(struct um_write_buffer *buf, VALUE obj);
|
|
@@ -396,6 +456,18 @@ void um_sidecar_signal_wake(struct um *machine);
|
|
|
396
456
|
|
|
397
457
|
void um_ssl_set_bio(struct um *machine, VALUE ssl_obj);
|
|
398
458
|
int um_ssl_read(struct um *machine, VALUE ssl, VALUE buf, int maxlen);
|
|
459
|
+
int um_ssl_read_raw(struct um *machine, VALUE ssl_obj, char *ptr, int maxlen);
|
|
399
460
|
int um_ssl_write(struct um *machine, VALUE ssl, VALUE buf, int len);
|
|
400
461
|
|
|
462
|
+
void bp_setup(struct um *machine);
|
|
463
|
+
void bp_teardown(struct um *machine);
|
|
464
|
+
|
|
465
|
+
struct um_segment *bp_get_op_result_segment(struct um *machine, struct um_op *op, __s32 res, __u32 flags);
|
|
466
|
+
void um_segment_checkin(struct um *machine, struct um_segment *segment);
|
|
467
|
+
void bp_handle_enobufs(struct um *machine);
|
|
468
|
+
void bp_ensure_commit_level(struct um *machine);
|
|
469
|
+
struct um_buffer *bp_buffer_checkout(struct um *machine);
|
|
470
|
+
void bp_buffer_checkin(struct um *machine, struct um_buffer *buffer);
|
|
471
|
+
struct um_segment *bp_buffer_consume(struct um *machine, struct um_buffer *buffer, size_t len);
|
|
472
|
+
|
|
401
473
|
#endif // UM_H
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
#include <string.h>
|
|
2
|
+
#include "um.h"
|
|
3
|
+
|
|
4
|
+
#define UM_SEGMENT_ALLOC_BATCH_SIZE 256
|
|
5
|
+
|
|
6
|
+
inline struct um_buffer *buffer_alloc(struct um *machine) {
|
|
7
|
+
struct um_buffer *buffer = malloc(sizeof(struct um_buffer) + machine->bp_buffer_size);
|
|
8
|
+
if (unlikely(!buffer)) {
|
|
9
|
+
fprintf(stderr, "!ENOMEM!\n");
|
|
10
|
+
exit(1);
|
|
11
|
+
rb_syserr_fail(errno, strerror(errno));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
machine->metrics.buffers_allocated += 1;
|
|
15
|
+
machine->metrics.buffer_space_allocated += machine->bp_buffer_size;
|
|
16
|
+
buffer->len = machine->bp_buffer_size;
|
|
17
|
+
buffer->ref_count = 0;
|
|
18
|
+
return buffer;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
inline struct um_buffer *bp_buffer_checkout(struct um *machine) {
|
|
22
|
+
struct um_buffer *buffer = machine->bp_buffer_freelist;
|
|
23
|
+
if (buffer) {
|
|
24
|
+
struct um_buffer *next = buffer->next;
|
|
25
|
+
machine->bp_buffer_freelist = next;
|
|
26
|
+
machine->metrics.buffers_free--;
|
|
27
|
+
}
|
|
28
|
+
else
|
|
29
|
+
buffer = buffer_alloc(machine);
|
|
30
|
+
|
|
31
|
+
buffer->ref_count++;
|
|
32
|
+
buffer->pos = 0;
|
|
33
|
+
buffer->next = NULL;
|
|
34
|
+
|
|
35
|
+
return buffer;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
inline void buffer_free(struct um *machine, struct um_buffer *buffer) {
|
|
39
|
+
if (unlikely(!machine->ring_initialized || buffer->len != machine->bp_buffer_size)) {
|
|
40
|
+
// The machine is being shut down or working buffer size has changed, so the
|
|
41
|
+
// buffer can be freed.
|
|
42
|
+
machine->metrics.buffers_allocated -= 1;
|
|
43
|
+
machine->metrics.buffer_space_allocated -= buffer->len;
|
|
44
|
+
free(buffer);
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
// otherwise, keep it on the freelist
|
|
48
|
+
buffer->next = machine->bp_buffer_freelist;
|
|
49
|
+
machine->bp_buffer_freelist = buffer;
|
|
50
|
+
machine->metrics.buffers_free++;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
inline void bp_buffer_checkin(struct um *machine, struct um_buffer *buffer) {
|
|
55
|
+
assert(buffer->ref_count > 0);
|
|
56
|
+
buffer->ref_count--;
|
|
57
|
+
if (!buffer->ref_count) buffer_free(machine, buffer);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
inline void bp_discard_buffer_freelist(struct um *machine) {
|
|
61
|
+
while (machine->bp_buffer_freelist) {
|
|
62
|
+
struct um_buffer *buffer = machine->bp_buffer_freelist;
|
|
63
|
+
struct um_buffer *next = buffer->next;
|
|
64
|
+
|
|
65
|
+
machine->metrics.buffers_allocated -= 1;
|
|
66
|
+
machine->metrics.buffer_space_allocated -= buffer->len;
|
|
67
|
+
|
|
68
|
+
free(buffer);
|
|
69
|
+
machine->bp_buffer_freelist = next;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
inline void bp_setup(struct um *machine) {
|
|
74
|
+
int ret;
|
|
75
|
+
machine->bp_br = io_uring_setup_buf_ring(&machine->ring, BP_BR_ENTRIES, BP_BGID, IOU_PBUF_RING_INC, &ret);
|
|
76
|
+
if (unlikely(!machine->bp_br)) rb_syserr_fail(ret, strerror(ret));
|
|
77
|
+
|
|
78
|
+
machine->bp_buffer_size = BP_INITIAL_BUFFER_SIZE;
|
|
79
|
+
machine->bp_commit_threshold = BP_INITIAL_COMMIT_THRESHOLD;
|
|
80
|
+
machine->bp_commited_buffers = malloc(sizeof(struct um_buffer) * BP_BR_ENTRIES);
|
|
81
|
+
memset(machine->bp_commited_buffers, 0, sizeof(struct um_buffer) * BP_BR_ENTRIES);
|
|
82
|
+
memset(machine->bp_avail_bid_bitmap, 0xFF, sizeof(machine->bp_avail_bid_bitmap));
|
|
83
|
+
|
|
84
|
+
machine->bp_buffer_freelist = NULL;
|
|
85
|
+
machine->bp_buffer_count = 0;
|
|
86
|
+
machine->bp_total_commited = 0;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
inline void bp_teardown(struct um *machine) {
|
|
90
|
+
bp_discard_buffer_freelist(machine);
|
|
91
|
+
for (int i = 0; i < BP_BR_ENTRIES; i++) {
|
|
92
|
+
struct um_buffer *buffer = machine->bp_commited_buffers[i];
|
|
93
|
+
if (buffer) bp_buffer_checkin(machine, buffer);
|
|
94
|
+
}
|
|
95
|
+
free(machine->bp_commited_buffers);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
inline int bitmap_find_first_set_bit(uint64_t *bitmap, size_t word_count) {
|
|
99
|
+
uint word = 0;
|
|
100
|
+
for (; word < word_count && !bitmap[word]; word++)
|
|
101
|
+
if (word == word_count) return -1;
|
|
102
|
+
|
|
103
|
+
int bit = ffsll(bitmap[word]);
|
|
104
|
+
return (word * 64 + bit - 1);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
inline void bitmap_set(uint64_t *bitmap, int idx) {
|
|
108
|
+
uint word = idx / 64;
|
|
109
|
+
uint bit = idx % 64;
|
|
110
|
+
bitmap[word] |= (1U << bit);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
inline void bitmap_unset(uint64_t *bitmap, int idx) {
|
|
114
|
+
uint word = idx / 64;
|
|
115
|
+
uint bit = idx % 64;
|
|
116
|
+
bitmap[word] &= ~(1U << bit);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
inline int get_available_bid(struct um *machine) {
|
|
120
|
+
return bitmap_find_first_set_bit(machine->bp_avail_bid_bitmap, BP_AVAIL_BID_BITMAP_WORDS);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Finds an available bid, checks out a buffer and commits it for kernel usage.
|
|
124
|
+
// Returns true if successful.
|
|
125
|
+
static inline int commit_buffer(struct um *machine, int added) {
|
|
126
|
+
static int buf_mask;
|
|
127
|
+
if (!buf_mask) buf_mask = io_uring_buf_ring_mask(BP_BR_ENTRIES);
|
|
128
|
+
|
|
129
|
+
int bid = get_available_bid(machine);
|
|
130
|
+
if (bid < 0) return false;
|
|
131
|
+
|
|
132
|
+
bitmap_unset(machine->bp_avail_bid_bitmap, bid);
|
|
133
|
+
struct um_buffer *buffer = bp_buffer_checkout(machine);
|
|
134
|
+
assert(buffer->ref_count == 0);
|
|
135
|
+
buffer->ref_count = 1;
|
|
136
|
+
|
|
137
|
+
machine->bp_buffer_count++;
|
|
138
|
+
machine->bp_commited_buffers[bid] = buffer;
|
|
139
|
+
machine->bp_total_commited += buffer->len;
|
|
140
|
+
machine->metrics.buffer_space_commited += buffer->len;
|
|
141
|
+
|
|
142
|
+
io_uring_buf_ring_add(machine->bp_br, buffer->buf, buffer->len, bid, buf_mask, added);
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Removes buffer from bid slot, the buffer having already been entirely
|
|
147
|
+
// consumed by the kernel.
|
|
148
|
+
inline void uncommit_buffer(struct um *machine, struct um_op *op, struct um_buffer *buffer, int bid) {
|
|
149
|
+
machine->bp_buffer_count--;
|
|
150
|
+
machine->bp_commited_buffers[bid] = NULL;
|
|
151
|
+
bitmap_set(machine->bp_avail_bid_bitmap, bid);
|
|
152
|
+
bp_buffer_checkin(machine, buffer);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
inline struct um_buffer *get_buffer(struct um *machine, int bid) {
|
|
156
|
+
struct um_buffer *buffer = machine->bp_commited_buffers[bid];
|
|
157
|
+
assert(buffer);
|
|
158
|
+
return buffer;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
inline int should_commit_more_p(struct um *machine) {
|
|
162
|
+
return (machine->bp_buffer_count < BP_BR_ENTRIES) &&
|
|
163
|
+
(machine->bp_total_commited < machine->bp_commit_threshold);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
inline void bp_ensure_commit_level(struct um *machine) {
|
|
167
|
+
int added = 0;
|
|
168
|
+
while (should_commit_more_p(machine)) {
|
|
169
|
+
if (likely(commit_buffer(machine, added))) added++;
|
|
170
|
+
}
|
|
171
|
+
if (added) io_uring_buf_ring_advance(machine->bp_br, added);
|
|
172
|
+
|
|
173
|
+
// if we get to this point, there's nothing more we can do because we used up
|
|
174
|
+
// all buffer ring entries. We need to wait for the kernel to consume buffers
|
|
175
|
+
// in order to put in more. When a ENOBUFS error is received,
|
|
176
|
+
// bp_handle_enobufs is called and among other things increases the buffer
|
|
177
|
+
// size.
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
inline void bp_handle_enobufs(struct um *machine) {
|
|
181
|
+
if (unlikely(machine->bp_commit_threshold >= BP_MAX_COMMIT_THRESHOLD))
|
|
182
|
+
rb_raise(eUMError, "Buffer starvation");
|
|
183
|
+
|
|
184
|
+
machine->bp_commit_threshold *= 2;
|
|
185
|
+
while (machine->bp_buffer_size < machine->bp_commit_threshold / 4)
|
|
186
|
+
machine->bp_buffer_size *= 2;
|
|
187
|
+
bp_discard_buffer_freelist(machine);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
inline struct um_segment *um_segment_alloc(struct um *machine) {
|
|
191
|
+
if (machine->segment_freelist) {
|
|
192
|
+
struct um_segment *segment = machine->segment_freelist;
|
|
193
|
+
machine->segment_freelist = segment->next;
|
|
194
|
+
machine->metrics.segments_free--;
|
|
195
|
+
segment->next = NULL;
|
|
196
|
+
return segment;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
struct um_segment *batch = malloc(sizeof(struct um_segment) * UM_SEGMENT_ALLOC_BATCH_SIZE);
|
|
200
|
+
memset(batch, 0, sizeof(struct um_segment) * UM_SEGMENT_ALLOC_BATCH_SIZE);
|
|
201
|
+
for (int i = 1; i < (UM_SEGMENT_ALLOC_BATCH_SIZE - 1); i++) {
|
|
202
|
+
batch[i].next = &batch[i + 1];
|
|
203
|
+
}
|
|
204
|
+
machine->segment_freelist = batch + 1;
|
|
205
|
+
machine->metrics.segments_free += (UM_SEGMENT_ALLOC_BATCH_SIZE - 1);
|
|
206
|
+
return batch;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
inline void um_segment_free(struct um *machine, struct um_segment *segment) {
|
|
210
|
+
segment->next = machine->segment_freelist;
|
|
211
|
+
machine->segment_freelist = segment;
|
|
212
|
+
machine->metrics.segments_free++;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
inline struct um_segment *bp_buffer_consume(struct um *machine, struct um_buffer *buffer, size_t len) {
|
|
216
|
+
struct um_segment *segment = um_segment_alloc(machine);
|
|
217
|
+
segment->ptr = buffer->buf + buffer->pos;
|
|
218
|
+
segment->len = len;
|
|
219
|
+
segment->buffer = buffer;
|
|
220
|
+
buffer->pos += len;
|
|
221
|
+
buffer->ref_count++;
|
|
222
|
+
return segment;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
struct um_segment *bp_get_op_result_segment(struct um *machine, struct um_op *op, __s32 res, __u32 flags) {
|
|
226
|
+
assert(res >= 0);
|
|
227
|
+
if (!res) return NULL;
|
|
228
|
+
|
|
229
|
+
uint bid = flags >> IORING_CQE_BUFFER_SHIFT;
|
|
230
|
+
struct um_buffer *buffer = get_buffer(machine, bid);
|
|
231
|
+
|
|
232
|
+
struct um_segment *segment = bp_buffer_consume(machine, buffer, res);
|
|
233
|
+
machine->bp_total_commited -= res;
|
|
234
|
+
machine->metrics.buffer_space_commited -= res;
|
|
235
|
+
bp_ensure_commit_level(machine);
|
|
236
|
+
|
|
237
|
+
if (!(flags & IORING_CQE_F_BUF_MORE))
|
|
238
|
+
uncommit_buffer(machine, op, buffer, bid);
|
|
239
|
+
|
|
240
|
+
return segment;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
inline void um_segment_checkin(struct um *machine, struct um_segment *segment) {
|
|
244
|
+
bp_buffer_checkin(machine, segment->buffer);
|
|
245
|
+
um_segment_free(machine, segment);
|
|
246
|
+
}
|
data/ext/um/um_class.c
CHANGED
|
@@ -22,6 +22,12 @@ VALUE SYM_ops_free;
|
|
|
22
22
|
VALUE SYM_ops_transient;
|
|
23
23
|
VALUE SYM_time_total_cpu;
|
|
24
24
|
VALUE SYM_time_total_wait;
|
|
25
|
+
VALUE SYM_buffer_groups;
|
|
26
|
+
VALUE SYM_buffers_allocated;
|
|
27
|
+
VALUE SYM_buffers_free;
|
|
28
|
+
VALUE SYM_segments_free;
|
|
29
|
+
VALUE SYM_buffer_space_allocated;
|
|
30
|
+
VALUE SYM_buffer_space_commited;
|
|
25
31
|
|
|
26
32
|
VALUE SYM_wd;
|
|
27
33
|
VALUE SYM_mask;
|
|
@@ -374,7 +380,7 @@ VALUE UM_periodically(VALUE self, VALUE interval) {
|
|
|
374
380
|
* which the data will be read. A negative `buffer_offset` denotes a position
|
|
375
381
|
* relative to the end of the buffer, e.g. a value of `-1` means the data will
|
|
376
382
|
* be appended to the buffer.
|
|
377
|
-
*
|
|
383
|
+
*
|
|
378
384
|
* - https://www.man7.org/linux/man-pages/man2/read.2.html
|
|
379
385
|
* - https://www.man7.org/linux/man-pages/man3/io_uring_prep_read.3.html
|
|
380
386
|
*
|
|
@@ -1521,17 +1527,23 @@ void Init_UM(void) {
|
|
|
1521
1527
|
|
|
1522
1528
|
um_define_net_constants(cUM);
|
|
1523
1529
|
|
|
1524
|
-
SYM_size
|
|
1525
|
-
SYM_total_ops
|
|
1526
|
-
SYM_total_switches
|
|
1527
|
-
SYM_total_waits
|
|
1528
|
-
SYM_ops_pending
|
|
1529
|
-
SYM_ops_unsubmitted
|
|
1530
|
-
SYM_ops_runqueue
|
|
1531
|
-
SYM_ops_free
|
|
1532
|
-
SYM_ops_transient
|
|
1533
|
-
SYM_time_total_cpu
|
|
1534
|
-
SYM_time_total_wait
|
|
1530
|
+
SYM_size = ID2SYM(rb_intern("size"));
|
|
1531
|
+
SYM_total_ops = ID2SYM(rb_intern("total_ops"));
|
|
1532
|
+
SYM_total_switches = ID2SYM(rb_intern("total_switches"));
|
|
1533
|
+
SYM_total_waits = ID2SYM(rb_intern("total_waits"));
|
|
1534
|
+
SYM_ops_pending = ID2SYM(rb_intern("ops_pending"));
|
|
1535
|
+
SYM_ops_unsubmitted = ID2SYM(rb_intern("ops_unsubmitted"));
|
|
1536
|
+
SYM_ops_runqueue = ID2SYM(rb_intern("ops_runqueue"));
|
|
1537
|
+
SYM_ops_free = ID2SYM(rb_intern("ops_free"));
|
|
1538
|
+
SYM_ops_transient = ID2SYM(rb_intern("ops_transient"));
|
|
1539
|
+
SYM_time_total_cpu = ID2SYM(rb_intern("time_total_cpu"));
|
|
1540
|
+
SYM_time_total_wait = ID2SYM(rb_intern("time_total_wait"));
|
|
1541
|
+
SYM_buffer_groups = ID2SYM(rb_intern("buffer_groups"));
|
|
1542
|
+
SYM_buffers_allocated = ID2SYM(rb_intern("buffers_allocated"));
|
|
1543
|
+
SYM_buffers_free = ID2SYM(rb_intern("buffers_free"));
|
|
1544
|
+
SYM_segments_free = ID2SYM(rb_intern("segments_free"));
|
|
1545
|
+
SYM_buffer_space_allocated = ID2SYM(rb_intern("buffer_space_allocated"));
|
|
1546
|
+
SYM_buffer_space_commited = ID2SYM(rb_intern("buffer_space_commited"));
|
|
1535
1547
|
|
|
1536
1548
|
SYM_wd = ID2SYM(rb_intern("wd"));
|
|
1537
1549
|
SYM_mask = ID2SYM(rb_intern("mask"));
|
data/ext/um/um_op.c
CHANGED
|
@@ -114,42 +114,56 @@ inline void um_op_list_compact(struct um *machine, struct um_op *head) {
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
#define UM_OP_BATCH_ALLOC_SIZE (sizeof(struct um_op_result) * UM_OP_RESULT_ALLOC_BATCH_SIZE)
|
|
118
|
+
|
|
117
119
|
inline struct um_op_result *multishot_result_alloc(struct um *machine) {
|
|
118
120
|
if (machine->result_freelist) {
|
|
119
121
|
struct um_op_result *result = machine->result_freelist;
|
|
120
122
|
machine->result_freelist = result->next;
|
|
123
|
+
result->segment = NULL;
|
|
124
|
+
result->next = NULL;
|
|
121
125
|
return result;
|
|
122
126
|
}
|
|
123
127
|
|
|
124
|
-
struct um_op_result *batch = malloc(
|
|
128
|
+
struct um_op_result *batch = malloc(UM_OP_BATCH_ALLOC_SIZE);
|
|
129
|
+
memset(batch, 0, UM_OP_BATCH_ALLOC_SIZE);
|
|
125
130
|
for (int i = 1; i < (UM_OP_RESULT_ALLOC_BATCH_SIZE - 1); i++) {
|
|
126
|
-
batch[i].next =
|
|
131
|
+
batch[i].next = batch + i + 1;
|
|
127
132
|
}
|
|
128
133
|
machine->result_freelist = batch + 1;
|
|
129
134
|
return batch;
|
|
130
135
|
}
|
|
131
136
|
|
|
132
137
|
inline void multishot_result_free(struct um *machine, struct um_op_result *result) {
|
|
138
|
+
if (result->segment) {
|
|
139
|
+
um_segment_free(machine, result->segment);
|
|
140
|
+
result->segment = NULL;
|
|
141
|
+
}
|
|
142
|
+
|
|
133
143
|
result->next = machine->result_freelist;
|
|
134
144
|
machine->result_freelist = result;
|
|
135
145
|
}
|
|
136
146
|
|
|
137
147
|
inline void um_op_multishot_results_push(struct um *machine, struct um_op *op, __s32 res, __u32 flags) {
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
op->result
|
|
142
|
-
op->multishot_result_tail = &op->result;
|
|
143
|
-
}
|
|
148
|
+
struct um_op_result *result;
|
|
149
|
+
|
|
150
|
+
if (!op->multishot_result_count)
|
|
151
|
+
result = &op->result;
|
|
144
152
|
else {
|
|
145
|
-
|
|
146
|
-
result->res = res;
|
|
147
|
-
result->flags = flags;
|
|
148
|
-
result->next = NULL;
|
|
153
|
+
result = multishot_result_alloc(machine);
|
|
149
154
|
op->multishot_result_tail->next = result;
|
|
150
|
-
op->multishot_result_tail = result;
|
|
151
155
|
}
|
|
156
|
+
|
|
157
|
+
result->res = res;
|
|
158
|
+
result->flags = flags;
|
|
159
|
+
result->next = NULL;
|
|
160
|
+
if (op->flags & OP_F_BUFFER_POOL && res >= 0) {
|
|
161
|
+
result->segment = bp_get_op_result_segment(machine, op, res, flags);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
op->multishot_result_tail = result;
|
|
152
165
|
op->multishot_result_count++;
|
|
166
|
+
|
|
153
167
|
}
|
|
154
168
|
|
|
155
169
|
inline void um_op_multishot_results_clear(struct um *machine, struct um_op *op) {
|
|
@@ -161,6 +175,7 @@ inline void um_op_multishot_results_clear(struct um *machine, struct um_op *op)
|
|
|
161
175
|
multishot_result_free(machine, result);
|
|
162
176
|
result = next;
|
|
163
177
|
}
|
|
178
|
+
op->result.next = NULL;
|
|
164
179
|
op->multishot_result_tail = NULL;
|
|
165
180
|
op->multishot_result_count = 0;
|
|
166
181
|
}
|
|
@@ -177,6 +192,7 @@ inline struct um_op *um_op_alloc(struct um *machine) {
|
|
|
177
192
|
for (int i = 1; i < (UM_OP_ALLOC_BATCH_SIZE - 1); i++) {
|
|
178
193
|
batch[i].next = &batch[i + 1];
|
|
179
194
|
}
|
|
195
|
+
batch[UM_OP_ALLOC_BATCH_SIZE - 1].next = NULL;
|
|
180
196
|
machine->op_freelist = batch + 1;
|
|
181
197
|
machine->metrics.ops_free += (UM_OP_ALLOC_BATCH_SIZE - 1);
|
|
182
198
|
return batch;
|