uringmachine 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/README.md +85 -0
  4. data/TODO.md +5 -0
  5. data/examples/echo_server.rb +18 -40
  6. data/examples/inout.rb +19 -0
  7. data/examples/nc.rb +36 -0
  8. data/ext/um/extconf.rb +6 -15
  9. data/ext/um/um.c +245 -53
  10. data/ext/um/um.h +21 -9
  11. data/ext/um/um_class.c +74 -87
  12. data/ext/um/um_const.c +184 -0
  13. data/ext/um/um_op.c +10 -13
  14. data/ext/um/um_utils.c +48 -3
  15. data/lib/uringmachine/version.rb +1 -1
  16. data/lib/uringmachine.rb +12 -0
  17. data/test/helper.rb +8 -0
  18. data/test/test_um.rb +227 -7
  19. data/vendor/liburing/.github/workflows/build.yml +29 -1
  20. data/vendor/liburing/.gitignore +1 -0
  21. data/vendor/liburing/CHANGELOG +15 -0
  22. data/vendor/liburing/CONTRIBUTING.md +165 -0
  23. data/vendor/liburing/configure +32 -0
  24. data/vendor/liburing/examples/Makefile +8 -1
  25. data/vendor/liburing/examples/kdigest.c +405 -0
  26. data/vendor/liburing/examples/proxy.c +75 -8
  27. data/vendor/liburing/liburing.pc.in +1 -1
  28. data/vendor/liburing/src/Makefile +16 -2
  29. data/vendor/liburing/src/include/liburing/io_uring.h +31 -0
  30. data/vendor/liburing/src/include/liburing/sanitize.h +39 -0
  31. data/vendor/liburing/src/include/liburing.h +31 -4
  32. data/vendor/liburing/src/liburing-ffi.map +5 -0
  33. data/vendor/liburing/src/liburing.map +1 -0
  34. data/vendor/liburing/src/queue.c +3 -0
  35. data/vendor/liburing/src/register.c +36 -0
  36. data/vendor/liburing/src/sanitize.c +176 -0
  37. data/vendor/liburing/src/setup.c +1 -1
  38. data/vendor/liburing/test/35fa71a030ca.c +7 -0
  39. data/vendor/liburing/test/500f9fbadef8.c +2 -0
  40. data/vendor/liburing/test/7ad0e4b2f83c.c +0 -25
  41. data/vendor/liburing/test/917257daa0fe.c +7 -0
  42. data/vendor/liburing/test/Makefile +31 -4
  43. data/vendor/liburing/test/a0908ae19763.c +7 -0
  44. data/vendor/liburing/test/a4c0b3decb33.c +7 -0
  45. data/vendor/liburing/test/accept.c +14 -4
  46. data/vendor/liburing/test/b19062a56726.c +7 -0
  47. data/vendor/liburing/test/bind-listen.c +2 -2
  48. data/vendor/liburing/test/buf-ring-nommap.c +10 -3
  49. data/vendor/liburing/test/buf-ring.c +2 -0
  50. data/vendor/liburing/test/coredump.c +7 -0
  51. data/vendor/liburing/test/cq-overflow.c +13 -1
  52. data/vendor/liburing/test/d4ae271dfaae.c +11 -3
  53. data/vendor/liburing/test/defer-taskrun.c +2 -2
  54. data/vendor/liburing/test/defer-tw-timeout.c +4 -1
  55. data/vendor/liburing/test/defer.c +2 -2
  56. data/vendor/liburing/test/double-poll-crash.c +1 -1
  57. data/vendor/liburing/test/eeed8b54e0df.c +2 -0
  58. data/vendor/liburing/test/eventfd.c +0 -1
  59. data/vendor/liburing/test/exit-no-cleanup.c +11 -0
  60. data/vendor/liburing/test/fadvise.c +9 -26
  61. data/vendor/liburing/test/fdinfo.c +9 -1
  62. data/vendor/liburing/test/file-register.c +14 -2
  63. data/vendor/liburing/test/file-update.c +1 -1
  64. data/vendor/liburing/test/file-verify.c +27 -16
  65. data/vendor/liburing/test/files-exit-hang-timeout.c +1 -2
  66. data/vendor/liburing/test/fixed-buf-iter.c +3 -1
  67. data/vendor/liburing/test/fixed-hugepage.c +12 -1
  68. data/vendor/liburing/test/fsnotify.c +1 -0
  69. data/vendor/liburing/test/futex.c +16 -4
  70. data/vendor/liburing/test/helpers.c +47 -0
  71. data/vendor/liburing/test/helpers.h +6 -0
  72. data/vendor/liburing/test/init-mem.c +5 -3
  73. data/vendor/liburing/test/io-cancel.c +0 -24
  74. data/vendor/liburing/test/io_uring_passthrough.c +2 -0
  75. data/vendor/liburing/test/io_uring_register.c +25 -6
  76. data/vendor/liburing/test/iopoll-leak.c +4 -0
  77. data/vendor/liburing/test/iopoll-overflow.c +1 -1
  78. data/vendor/liburing/test/iopoll.c +3 -3
  79. data/vendor/liburing/test/kallsyms.c +203 -0
  80. data/vendor/liburing/test/link-timeout.c +159 -0
  81. data/vendor/liburing/test/linked-defer-close.c +224 -0
  82. data/vendor/liburing/test/madvise.c +12 -25
  83. data/vendor/liburing/test/min-timeout-wait.c +0 -25
  84. data/vendor/liburing/test/min-timeout.c +0 -25
  85. data/vendor/liburing/test/mkdir.c +6 -0
  86. data/vendor/liburing/test/msg-ring.c +8 -2
  87. data/vendor/liburing/test/napi-test.c +15 -2
  88. data/vendor/liburing/test/no-mmap-inval.c +2 -0
  89. data/vendor/liburing/test/nop.c +44 -0
  90. data/vendor/liburing/test/ooo-file-unreg.c +1 -1
  91. data/vendor/liburing/test/open-close.c +40 -0
  92. data/vendor/liburing/test/openat2.c +37 -14
  93. data/vendor/liburing/test/poll-many.c +13 -7
  94. data/vendor/liburing/test/poll-mshot-update.c +17 -10
  95. data/vendor/liburing/test/poll-v-poll.c +6 -3
  96. data/vendor/liburing/test/pollfree.c +148 -0
  97. data/vendor/liburing/test/read-mshot-empty.c +156 -153
  98. data/vendor/liburing/test/read-mshot.c +276 -27
  99. data/vendor/liburing/test/read-write.c +78 -13
  100. data/vendor/liburing/test/recv-msgall-stream.c +3 -0
  101. data/vendor/liburing/test/recv-msgall.c +5 -0
  102. data/vendor/liburing/test/recvsend_bundle-inc.c +680 -0
  103. data/vendor/liburing/test/recvsend_bundle.c +92 -29
  104. data/vendor/liburing/test/reg-fd-only.c +14 -4
  105. data/vendor/liburing/test/regbuf-clone.c +187 -0
  106. data/vendor/liburing/test/regbuf-merge.c +7 -0
  107. data/vendor/liburing/test/register-restrictions.c +86 -85
  108. data/vendor/liburing/test/rename.c +59 -1
  109. data/vendor/liburing/test/ringbuf-read.c +5 -0
  110. data/vendor/liburing/test/ringbuf-status.c +5 -1
  111. data/vendor/liburing/test/runtests.sh +16 -1
  112. data/vendor/liburing/test/send-zerocopy.c +59 -0
  113. data/vendor/liburing/test/short-read.c +1 -0
  114. data/vendor/liburing/test/socket.c +43 -0
  115. data/vendor/liburing/test/splice.c +3 -1
  116. data/vendor/liburing/test/sq-poll-dup.c +1 -1
  117. data/vendor/liburing/test/sq-poll-share.c +2 -0
  118. data/vendor/liburing/test/sqpoll-disable-exit.c +8 -0
  119. data/vendor/liburing/test/sqpoll-exit-hang.c +1 -25
  120. data/vendor/liburing/test/sqpoll-sleep.c +1 -25
  121. data/vendor/liburing/test/statx.c +89 -0
  122. data/vendor/liburing/test/stdout.c +2 -0
  123. data/vendor/liburing/test/submit-and-wait.c +1 -25
  124. data/vendor/liburing/test/submit-reuse.c +4 -26
  125. data/vendor/liburing/test/symlink.c +12 -1
  126. data/vendor/liburing/test/sync-cancel.c +48 -21
  127. data/vendor/liburing/test/thread-exit.c +5 -0
  128. data/vendor/liburing/test/timeout-new.c +1 -26
  129. data/vendor/liburing/test/timeout.c +12 -26
  130. data/vendor/liburing/test/unlink.c +94 -1
  131. data/vendor/liburing/test/uring_cmd_ublk.c +1252 -0
  132. data/vendor/liburing/test/waitid.c +62 -8
  133. data/vendor/liburing/test/wq-aff.c +35 -0
  134. data/vendor/liburing/test/xfail_prep_link_timeout_out_of_scope.c +46 -0
  135. data/vendor/liburing/test/xfail_register_buffers_out_of_scope.c +51 -0
  136. metadata +17 -4
  137. data/examples/event_loop.rb +0 -69
  138. data/examples/fibers.rb +0 -105
@@ -0,0 +1,405 @@
1
+ /* SPDX-License-Identifier: MIT */
2
+ /*
3
+ * Proof-of-concept for doing file digests using the kernel's AF_ALG API.
4
+ * Needs a bit of error handling.
5
+ */
6
+ #include <stdio.h>
7
+ #include <fcntl.h>
8
+ #include <string.h>
9
+ #include <stdlib.h>
10
+ #include <unistd.h>
11
+ #include <assert.h>
12
+ #include <errno.h>
13
+ #include <inttypes.h>
14
+ #include <sys/types.h>
15
+ #include <sys/stat.h>
16
+ #include <sys/ioctl.h>
17
+ #include <linux/if_alg.h>
18
+ #include "liburing.h"
19
+
20
+ #define QD 64
21
+ #define WAIT_BATCH (QD / 8)
22
+ #define BS (64*1024)
23
+
24
+ #define BGID 1
25
+ #define BID_MASK (QD - 1)
26
+
27
+ enum req_state {
28
+ IO_INIT = 0,
29
+ IO_READ,
30
+ IO_READ_COMPLETE,
31
+ IO_WRITE,
32
+ IO_WRITE_COMPLETE,
33
+ };
34
+
35
+ struct req {
36
+ off_t offset;
37
+ enum req_state state;
38
+ struct iovec iov;
39
+ };
40
+
41
+ struct kdigest {
42
+ struct io_uring ring;
43
+ struct io_uring_buf_ring *br;
44
+ struct req reqs[QD];
45
+ /* heap allocated, aligned QD*BS buffer */
46
+ uint8_t *bufs;
47
+ };
48
+
49
+ static int infd, outfd;
50
+
51
+ static int get_file_size(int fd, size_t *size)
52
+ {
53
+ struct stat st;
54
+
55
+ if (fstat(fd, &st) < 0)
56
+ return -1;
57
+ if (S_ISREG(st.st_mode)) {
58
+ *size = st.st_size;
59
+ } else if (S_ISBLK(st.st_mode)) {
60
+ unsigned long long bytes;
61
+
62
+ if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
63
+ return -1;
64
+
65
+ *size = bytes;
66
+ } else {
67
+ return -1;
68
+ }
69
+
70
+ return 0;
71
+ }
72
+
73
+ static int reap_completions(struct io_uring *ring, int *inflight,
74
+ size_t *outsize)
75
+ {
76
+ struct io_uring_cqe *cqe;
77
+ unsigned head;
78
+ int ret = 0, nr;
79
+
80
+ nr = 0;
81
+ io_uring_for_each_cqe(ring, head, cqe) {
82
+ struct req *req;
83
+
84
+ req = io_uring_cqe_get_data(cqe);
85
+ assert(req->state == IO_READ || req->state == IO_WRITE);
86
+ if (cqe->res < 0) {
87
+ fprintf(stderr, "%s: cqe error %d\n",
88
+ req->state == IO_WRITE ? "send" : "read",
89
+ cqe->res);
90
+ *outsize = 0;
91
+ ret = 1;
92
+ break;
93
+ }
94
+
95
+ (*inflight)--;
96
+ req->state++;
97
+ if (req->state == IO_WRITE_COMPLETE)
98
+ *outsize -= cqe->res;
99
+ nr++;
100
+ }
101
+
102
+ io_uring_cq_advance(ring, nr);
103
+ return ret;
104
+ }
105
+
106
+ /*
107
+ * Add buffers to the outgoing ring, and submit a single bundle send that
108
+ * will finish when all of them have completed.
109
+ */
110
+ static void submit_sends_br(struct kdigest *kdigest, int *write_idx,
111
+ int *inflight)
112
+ {
113
+ struct io_uring_buf_ring *br = kdigest->br;
114
+ struct req *req, *first_req = NULL;
115
+ struct io_uring_sqe *sqe;
116
+ int nr = 0;
117
+
118
+ /*
119
+ * Find any completed reads, and add the buffers to the outgoing
120
+ * send ring. That will serialize the data sent.
121
+ */
122
+ while (kdigest->reqs[*write_idx].state == IO_READ_COMPLETE) {
123
+ req = &kdigest->reqs[*write_idx];
124
+ io_uring_buf_ring_add(br, req->iov.iov_base, req->iov.iov_len,
125
+ *write_idx, BID_MASK, nr++);
126
+ /*
127
+ * Mark as a write/send if it's the first one, that serve
128
+ * as the "barrier" in the array. The rest can be marked
129
+ * complete upfront, if there's more in this bundle, as
130
+ * the first will serve a the stopping point.
131
+ */
132
+ if (!first_req) {
133
+ req->state = IO_WRITE;
134
+ first_req = req;
135
+ } else {
136
+ req->state = IO_WRITE_COMPLETE;
137
+ }
138
+ *write_idx = (*write_idx + 1) % QD;
139
+ }
140
+
141
+ /*
142
+ * If any completed reads were found and we added buffers, advance
143
+ * the buffer ring and prepare a single bundle send for all of them.
144
+ */
145
+ if (first_req) {
146
+ io_uring_buf_ring_advance(br, nr);
147
+
148
+ sqe = io_uring_get_sqe(&kdigest->ring);
149
+ io_uring_prep_send_bundle(sqe, outfd, 0, MSG_MORE);
150
+ sqe->flags |= IOSQE_BUFFER_SELECT;
151
+ sqe->buf_group = BGID;
152
+ io_uring_sqe_set_data(sqe, first_req);
153
+ (*inflight)++;
154
+ }
155
+ }
156
+
157
+ /*
158
+ * Serialize multiple writes with IOSQE_IO_LINK. Not the most efficient
159
+ * way, as it's both more expensive on the kernel side to handle link, and
160
+ * if there's bundle support, all of the below can be done with a single
161
+ * send rather than multiple ones.
162
+ */
163
+ static void submit_sends_linked(struct kdigest *kdigest, int *write_idx,
164
+ int *inflight)
165
+ {
166
+ struct io_uring_sqe *sqe;
167
+ struct req *req;
168
+
169
+ /* Queue up any possible writes. Link flag ensures ordering. */
170
+ sqe = NULL;
171
+ while (kdigest->reqs[*write_idx].state == IO_READ_COMPLETE) {
172
+ if (sqe)
173
+ sqe->flags |= IOSQE_IO_LINK;
174
+
175
+ req = &kdigest->reqs[*write_idx];
176
+ req->state = IO_WRITE;
177
+ sqe = io_uring_get_sqe(&kdigest->ring);
178
+ io_uring_prep_send(sqe, outfd, req->iov.iov_base,
179
+ req->iov.iov_len, MSG_MORE);
180
+ io_uring_sqe_set_data(sqe, req);
181
+ (*inflight)++;
182
+
183
+ *write_idx = (*write_idx + 1) % QD;
184
+ }
185
+ }
186
+
187
+ static void submit_sends(struct kdigest *kdigest, int *write_idx, int *inflight)
188
+ {
189
+ if (kdigest->br)
190
+ submit_sends_br(kdigest, write_idx, inflight);
191
+ else
192
+ submit_sends_linked(kdigest, write_idx, inflight);
193
+ }
194
+
195
+ static int digest_file(struct kdigest *kdigest, size_t insize)
196
+ {
197
+ struct io_uring *ring = &kdigest->ring;
198
+ off_t read_off = 0;
199
+ size_t outsize = insize;
200
+ int read_idx = 0, write_idx = 0, inflight = 0;
201
+
202
+ while (outsize) {
203
+ struct io_uring_sqe *sqe;
204
+ struct req *req;
205
+ int to_wait;
206
+
207
+ submit_sends(kdigest, &write_idx, &inflight);
208
+
209
+ /* Queue up any reads. Completions may arrive out of order. */
210
+ while (insize && (kdigest->reqs[read_idx].state == IO_INIT
211
+ || kdigest->reqs[read_idx].state == IO_WRITE_COMPLETE)) {
212
+ size_t this_size = (insize < BS ? insize : BS);
213
+
214
+ req = &kdigest->reqs[read_idx];
215
+ req->state = IO_READ;
216
+ req->offset = read_off;
217
+ req->iov.iov_base = &kdigest->bufs[read_idx * BS];
218
+ req->iov.iov_len = this_size;
219
+
220
+ sqe = io_uring_get_sqe(ring);
221
+ io_uring_prep_read(sqe, infd, req->iov.iov_base,
222
+ req->iov.iov_len, read_off);
223
+ io_uring_sqe_set_data(sqe, req);
224
+
225
+ read_off += this_size;
226
+ insize -= this_size;
227
+ inflight++;
228
+
229
+ read_idx = (read_idx + 1) % QD;
230
+ }
231
+
232
+ /* wait for about half queue completion before resubmit */
233
+ for (to_wait = (inflight >> 1) | 1; to_wait; to_wait--) {
234
+ int ret, wait_nr;
235
+
236
+ wait_nr = inflight;
237
+ if (wait_nr > WAIT_BATCH)
238
+ wait_nr = WAIT_BATCH;
239
+
240
+ ret = io_uring_submit_and_wait(ring, wait_nr);
241
+ if (ret < 0) {
242
+ fprintf(stderr, "wait cqe: %s\n",
243
+ strerror(-ret));
244
+ return 1;
245
+ }
246
+
247
+ if (reap_completions(ring, &inflight, &outsize))
248
+ return 1;
249
+ }
250
+ }
251
+ assert(!inflight);
252
+
253
+ return 0;
254
+ }
255
+
256
+ static int get_result(struct kdigest *kdigest, const char *alg, const char *file)
257
+ {
258
+ struct io_uring *ring = &kdigest->ring;
259
+ struct io_uring_sqe *sqe;
260
+ struct io_uring_cqe *cqe;
261
+ int i, ret;
262
+ /* reuse I/O buf block to stash hash result */
263
+
264
+ sqe = io_uring_get_sqe(ring);
265
+ io_uring_prep_recv(sqe, outfd, kdigest->bufs, BS, 0);
266
+
267
+ if (io_uring_submit_and_wait(ring, 1) < 0)
268
+ return 1;
269
+
270
+ ret = io_uring_peek_cqe(ring, &cqe);
271
+ if (ret < 0) {
272
+ fprintf(stderr, "peek cqe: %s\n", strerror(-ret));
273
+ return 1;
274
+ }
275
+
276
+ if (cqe->res < 0) {
277
+ fprintf(stderr, "cqe error: %s\n", strerror(-cqe->res));
278
+ goto err;
279
+ }
280
+
281
+ fprintf(stdout, "uring %s%s(%s) returned(len=%u): ",
282
+ kdigest->br ? "bundled " : "", alg, file, cqe->res);
283
+ for (i = 0; i < cqe->res; i++)
284
+ fprintf(stdout, "%02x", kdigest->bufs[i]);
285
+ putc('\n', stdout);
286
+ ret = 0;
287
+ err:
288
+ io_uring_cqe_seen(ring, cqe);
289
+ return ret;
290
+ }
291
+
292
+ int main(int argc, char *argv[])
293
+ {
294
+ const char *alg;
295
+ const char *infile;
296
+ size_t alg_len, insize;
297
+ struct sockaddr_alg sa = {
298
+ .salg_family = AF_ALG,
299
+ .salg_type = "hash",
300
+ };
301
+ struct kdigest kdigest = { };
302
+ struct io_uring_params p = { };
303
+ int sfd, ret;
304
+
305
+ if (argc < 3) {
306
+ fprintf(stderr, "%s: algorithm infile\n", argv[0]);
307
+ return 1;
308
+ }
309
+
310
+ alg = argv[1];
311
+ infile = argv[2];
312
+ alg_len = strlen(alg);
313
+ if (alg_len >= sizeof(sa.salg_name)) {
314
+ fprintf(stderr, "algorithm name too long\n");
315
+ return 1;
316
+ }
317
+ /* +1 for null terminator */
318
+ memcpy(sa.salg_name, alg, alg_len + 1);
319
+
320
+ infd = open(infile, O_RDONLY);
321
+ if (infd < 0) {
322
+ perror("open infile");
323
+ return 1;
324
+ }
325
+
326
+ sfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
327
+ if (sfd < 0) {
328
+ if (errno == EAFNOSUPPORT)
329
+ fprintf(stderr, "kernel AF_ALG support not available. "
330
+ "CONFIG_CRYPTO_USER_API_HASH required.\n");
331
+ else
332
+ perror("AF_ALG socket");
333
+ return 1;
334
+ }
335
+
336
+ if (bind(sfd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
337
+ if (errno == ENOENT)
338
+ fprintf(stderr, "AF_ALG bind(%s): hash not available. "
339
+ "See /proc/crypto hash algorithm list.\n",
340
+ alg);
341
+ else
342
+ fprintf(stderr, "AF_ALG bind(%s): %s\n",
343
+ alg, strerror(errno));
344
+ return 1;
345
+ }
346
+
347
+ outfd = accept(sfd, NULL, 0);
348
+ if (outfd < 0) {
349
+ perror("AF_ALG accept");
350
+ return 1;
351
+ }
352
+
353
+ if (posix_memalign((void **)&kdigest.bufs, 4096, QD * BS)) {
354
+ fprintf(stderr, "failed to alloc I/O bufs\n");
355
+ return 1;
356
+ }
357
+
358
+ p.flags = IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
359
+ do {
360
+ ret = io_uring_queue_init_params(QD, &kdigest.ring, &p);
361
+ if (!ret)
362
+ break;
363
+ if (!p.flags) {
364
+ fprintf(stderr, "queue_init: %s\n", strerror(-ret));
365
+ return 1;
366
+ }
367
+ p.flags = 0;
368
+ } while (1);
369
+
370
+ /* use send bundles, if available */
371
+ if (p.features & IORING_FEAT_RECVSEND_BUNDLE) {
372
+ kdigest.br = io_uring_setup_buf_ring(&kdigest.ring, QD, BGID, 0, &ret);
373
+ if (!kdigest.br) {
374
+ fprintf(stderr, "Failed setting up bundle buffer ring: %d\n", ret);
375
+ return 1;
376
+ }
377
+ }
378
+
379
+ if (get_file_size(infd, &insize))
380
+ return 1;
381
+
382
+ ret = digest_file(&kdigest, insize);
383
+ if (ret) {
384
+ fprintf(stderr, "%s digest failed\n", alg);
385
+ return 1;
386
+ }
387
+
388
+ ret = get_result(&kdigest, alg, infile);
389
+ if (ret) {
390
+ fprintf(stderr, "failed to retrieve %s digest result\n", alg);
391
+ return 1;
392
+ }
393
+
394
+ if (kdigest.br)
395
+ io_uring_free_buf_ring(&kdigest.ring, kdigest.br, QD, BGID);
396
+ io_uring_queue_exit(&kdigest.ring);
397
+ free(kdigest.bufs);
398
+ if (close(infd) < 0)
399
+ ret |= 1;
400
+ if (close(sfd) < 0)
401
+ ret |= 1;
402
+ if (close(outfd) < 0)
403
+ ret |= 1;
404
+ return ret;
405
+ }
@@ -82,6 +82,7 @@ static char *host = "192.168.3.2";
82
82
  static int send_port = 4445;
83
83
  static int receive_port = 4444;
84
84
  static int buf_size = 32;
85
+ static int buf_ring_inc;
85
86
  static int bidi;
86
87
  static int ipv6;
87
88
  static int napi;
@@ -356,6 +357,7 @@ static void free_buffer_rings(struct io_uring *ring, struct conn *c)
356
357
  static int setup_recv_ring(struct io_uring *ring, struct conn *c)
357
358
  {
358
359
  struct conn_buf_ring *cbr = &c->in_br;
360
+ int br_flags = 0;
359
361
  int ret, i;
360
362
  size_t len;
361
363
  void *ptr;
@@ -375,7 +377,9 @@ static int setup_recv_ring(struct io_uring *ring, struct conn *c)
375
377
  return 1;
376
378
  }
377
379
  }
378
- cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, 0, &ret);
380
+ if (buf_ring_inc)
381
+ br_flags = IOU_PBUF_RING_INC;
382
+ cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, br_flags, &ret);
379
383
  if (!cbr->br) {
380
384
  fprintf(stderr, "Buffer ring register failed %d\n", ret);
381
385
  return 1;
@@ -401,9 +405,12 @@ static int setup_recv_ring(struct io_uring *ring, struct conn *c)
401
405
  static int setup_send_ring(struct io_uring *ring, struct conn *c)
402
406
  {
403
407
  struct conn_buf_ring *cbr = &c->out_br;
408
+ int br_flags = 0;
404
409
  int ret;
405
410
 
406
- cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, 0, &ret);
411
+ if (buf_ring_inc)
412
+ br_flags = IOU_PBUF_RING_INC;
413
+ cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, br_flags, &ret);
407
414
  if (!cbr->br) {
408
415
  fprintf(stderr, "Buffer ring register failed %d\n", ret);
409
416
  return 1;
@@ -1148,6 +1155,32 @@ static int recv_done_res(int res)
1148
1155
  return 0;
1149
1156
  }
1150
1157
 
1158
+ static int recv_inc(struct conn *c, struct conn_dir *cd, int *bid,
1159
+ struct io_uring_cqe *cqe)
1160
+ {
1161
+ struct conn_buf_ring *cbr = &c->out_br;
1162
+ struct conn_buf_ring *in_cbr = &c->in_br;
1163
+ void *data;
1164
+
1165
+ if (!cqe->res)
1166
+ return 0;
1167
+ if (cqe->flags & IORING_CQE_F_BUF_MORE)
1168
+ return 0;
1169
+
1170
+ data = in_cbr->buf + *bid * buf_size;
1171
+ if (is_sink) {
1172
+ io_uring_buf_ring_add(in_cbr->br, data, buf_size, *bid, br_mask, 0);
1173
+ io_uring_buf_ring_advance(in_cbr->br, 1);
1174
+ } else if (send_ring) {
1175
+ io_uring_buf_ring_add(cbr->br, data, buf_size, *bid, br_mask, 0);
1176
+ io_uring_buf_ring_advance(cbr->br, 1);
1177
+ } else {
1178
+ send_append(c, cd, data, *bid, buf_size);
1179
+ }
1180
+ *bid = (*bid + 1) & (nr_bufs - 1);
1181
+ return 1;
1182
+ }
1183
+
1151
1184
  /*
1152
1185
  * Any receive that isn't recvmsg with multishot can be handled the same way.
1153
1186
  * Iterate from '*bid' and 'in_bytes' in total, and append the data to the
@@ -1291,7 +1324,9 @@ start_close:
1291
1324
  * end and the buffer will be replenished once the send is done with
1292
1325
  * it.
1293
1326
  */
1294
- if (is_sink)
1327
+ if (buf_ring_inc)
1328
+ nr_packets = recv_inc(c, ocd, &bid, cqe);
1329
+ else if (is_sink)
1295
1330
  nr_packets = replenish_buffers(c, &bid, cqe->res);
1296
1331
  else if (rcv_msg && recv_mshot)
1297
1332
  nr_packets = recv_mshot_msg(c, ocd, &bid, cqe->res);
@@ -1318,7 +1353,7 @@ start_close:
1318
1353
  cd->pending_recv = 0;
1319
1354
  if (recv_done_res(cqe->res))
1320
1355
  goto start_close;
1321
- if (is_sink)
1356
+ if (is_sink || !ocd->pending_send)
1322
1357
  __submit_receive(ring, c, &c->cd[0], c->in_fd);
1323
1358
  }
1324
1359
 
@@ -1480,12 +1515,39 @@ static int prep_next_send(struct io_uring *ring, struct conn *c,
1480
1515
  }
1481
1516
  }
1482
1517
 
1518
+ static int handle_send_inc(struct conn *c, struct conn_dir *cd, int bid,
1519
+ struct io_uring_cqe *cqe)
1520
+ {
1521
+ struct conn_buf_ring *in_cbr = &c->in_br;
1522
+ int ret = 0;
1523
+ void *data;
1524
+
1525
+ if (!cqe->res)
1526
+ goto out;
1527
+ if (cqe->flags & IORING_CQE_F_BUF_MORE)
1528
+ return 0;
1529
+
1530
+ assert(cqe->res <= buf_size);
1531
+ cd->out_bytes += cqe->res;
1532
+
1533
+ data = in_cbr->buf + bid * buf_size;
1534
+ io_uring_buf_ring_add(in_cbr->br, data, buf_size, bid, br_mask, 0);
1535
+ io_uring_buf_ring_advance(in_cbr->br, 1);
1536
+ bid = (bid + 1) & (nr_bufs - 1);
1537
+ ret = 1;
1538
+ out:
1539
+ if (pending_shutdown(c))
1540
+ close_cd(c, cd);
1541
+
1542
+ return ret;
1543
+ }
1544
+
1483
1545
  /*
1484
1546
  * Handling a send with an outgoing send ring. Get the buffers from the
1485
1547
  * receive side, and add them to the ingoing buffer ring again.
1486
1548
  */
1487
- static int handle_send_ring(struct conn *c, struct conn_dir *cd,
1488
- int bid, int bytes)
1549
+ static int handle_send_ring(struct conn *c, struct conn_dir *cd, int bid,
1550
+ int bytes)
1489
1551
  {
1490
1552
  struct conn_buf_ring *in_cbr = &c->in_br;
1491
1553
  struct conn_buf_ring *out_cbr = &c->out_br;
@@ -1605,7 +1667,9 @@ static int __handle_send(struct io_uring *ring, struct conn *c,
1605
1667
 
1606
1668
  vlog("send: got %d, %lu\n", cqe->res, cd->out_bytes);
1607
1669
 
1608
- if (send_ring)
1670
+ if (buf_ring_inc)
1671
+ nr_packets = handle_send_inc(c, cd, bid, cqe);
1672
+ else if (send_ring)
1609
1673
  nr_packets = handle_send_ring(c, cd, bid, cqe->res);
1610
1674
  else
1611
1675
  nr_packets = handle_send_buf(c, cd, bid, cqe->res);
@@ -2305,7 +2369,7 @@ int main(int argc, char *argv[])
2305
2369
 
2306
2370
  pthread_mutex_init(&thread_lock, NULL);
2307
2371
 
2308
- optstring = "m:d:S:s:b:f:H:r:p:n:B:N:T:w:t:M:R:u:c:C:q:a:x:z:6Vh?";
2372
+ optstring = "m:d:S:s:b:f:H:r:p:n:B:N:T:w:t:M:R:u:c:C:q:a:x:z:i:6Vh?";
2309
2373
  while ((opt = getopt(argc, argv, optstring)) != -1) {
2310
2374
  switch (opt) {
2311
2375
  case 'm':
@@ -2377,6 +2441,9 @@ int main(int argc, char *argv[])
2377
2441
  case 'q':
2378
2442
  ring_size = atoi(optarg);
2379
2443
  break;
2444
+ case 'i':
2445
+ buf_ring_inc = !!atoi(optarg);
2446
+ break;
2380
2447
  case 'a':
2381
2448
  use_huge = !!atoi(optarg);
2382
2449
  break;
@@ -9,4 +9,4 @@ Description: io_uring library
9
9
  URL: https://git.kernel.dk/cgit/liburing/
10
10
 
11
11
  Libs: -L${libdir} -luring
12
- Cflags: -I${includedir} -D_GNU_SOURCE
12
+ Cflags: -I${includedir}
@@ -16,8 +16,6 @@ override CFLAGS += -Wno-unused-parameter \
16
16
  $(LIBURING_CFLAGS)
17
17
  SO_CFLAGS=-fPIC $(CFLAGS)
18
18
  L_CFLAGS=$(CFLAGS)
19
- LINK_FLAGS=-Wl,-z,defs
20
- LINK_FLAGS+=$(LDFLAGS)
21
19
  ENABLE_SHARED ?= 1
22
20
 
23
21
  soname=liburing.so.$(VERSION_MAJOR)
@@ -39,6 +37,13 @@ ifneq ($(MAKECMDGOALS),clean)
39
37
  include ../config-host.mak
40
38
  endif
41
39
 
40
+ ifeq ($(CONFIG_USE_SANITIZER),y)
41
+ LINK_FLAGS=
42
+ else
43
+ LINK_FLAGS=-Wl,-z,defs
44
+ endif
45
+ LINK_FLAGS+=$(LDFLAGS)
46
+
42
47
  all: $(all_targets)
43
48
 
44
49
  liburing_srcs := setup.c queue.c register.c syscall.c version.c
@@ -50,6 +55,13 @@ ifeq ($(CONFIG_NOLIBC),y)
50
55
  override LINK_FLAGS += -nostdlib -nodefaultlibs $(libgcc_link_flag)
51
56
  endif
52
57
 
58
+ ifeq ($(CONFIG_USE_SANITIZER),y)
59
+ override CFLAGS += -fsanitize=address,undefined -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
60
+ override CPPFLAGS += -fsanitize=address,undefined -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
61
+ override LINK_FLAGS += -fsanitize=address,undefined
62
+ liburing_srcs += sanitize.c
63
+ endif
64
+
53
65
  override CPPFLAGS += -MT "$@" -MMD -MP -MF "$@.d"
54
66
  liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs))
55
67
  liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs))
@@ -89,6 +101,7 @@ install: $(all_targets)
89
101
  install -D -m 644 include/liburing.h $(includedir)/liburing.h
90
102
  install -D -m 644 include/liburing/compat.h $(includedir)/liburing/compat.h
91
103
  install -D -m 644 include/liburing/barrier.h $(includedir)/liburing/barrier.h
104
+ install -D -m 644 include/liburing/sanitize.h $(includedir)/liburing/sanitize.h
92
105
  install -D -m 644 include/liburing/io_uring_version.h $(includedir)/liburing/io_uring_version.h
93
106
  install -D -m 644 liburing.a $(libdevdir)/liburing.a
94
107
  install -D -m 644 liburing-ffi.a $(libdevdir)/liburing-ffi.a
@@ -106,6 +119,7 @@ uninstall:
106
119
  @rm -f $(includedir)/liburing.h
107
120
  @rm -f $(includedir)/liburing/compat.h
108
121
  @rm -f $(includedir)/liburing/barrier.h
122
+ @rm -f $(includedir)/liburing/sanitize.h
109
123
  @rm -f $(includedir)/liburing/io_uring_version.h
110
124
  @rm -f $(libdevdir)/liburing.a
111
125
  @rm -f $(libdevdir)/liburing-ffi.a
@@ -440,11 +440,21 @@ struct io_uring_cqe {
440
440
  * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
441
441
  * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
442
442
  * them from sends.
443
+ * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get
444
+ * more completions. In other words, the buffer is being
445
+ * partially consumed, and will be used by the kernel for
446
+ * more completions. This is only set for buffers used via
447
+ * the incremental buffer consumption, as provided by
448
+ * a ring buffer setup with IOU_PBUF_RING_INC. For any
449
+ * other provided buffer type, all completions with a
450
+ * buffer passed back is automatically returned to the
451
+ * application.
443
452
  */
444
453
  #define IORING_CQE_F_BUFFER (1U << 0)
445
454
  #define IORING_CQE_F_MORE (1U << 1)
446
455
  #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
447
456
  #define IORING_CQE_F_NOTIF (1U << 3)
457
+ #define IORING_CQE_F_BUF_MORE (1U << 4)
448
458
 
449
459
  #define IORING_CQE_BUFFER_SHIFT 16
450
460
 
@@ -599,6 +609,9 @@ enum io_uring_register_op {
599
609
 
600
610
  IORING_REGISTER_CLOCK = 29,
601
611
 
612
+ /* clone registered buffers from source ring to current ring */
613
+ IORING_REGISTER_CLONE_BUFFERS = 30,
614
+
602
615
  /* this goes last */
603
616
  IORING_REGISTER_LAST,
604
617
 
@@ -684,6 +697,16 @@ struct io_uring_clock_register {
684
697
  __u32 __resv[3];
685
698
  };
686
699
 
700
+ enum {
701
+ IORING_REGISTER_SRC_REGISTERED = 1,
702
+ };
703
+
704
+ struct io_uring_clone_buffers {
705
+ __u32 src_fd;
706
+ __u32 flags;
707
+ __u32 pad[6];
708
+ };
709
+
687
710
  struct io_uring_buf {
688
711
  __u64 addr;
689
712
  __u32 len;
@@ -716,9 +739,17 @@ struct io_uring_buf_ring {
716
739
  * mmap(2) with the offset set as:
717
740
  * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
718
741
  * to get a virtual mapping for the ring.
742
+ * IOU_PBUF_RING_INC: If set, buffers consumed from this buffer ring can be
743
+ * consumed incrementally. Normally one (or more) buffers
744
+ * are fully consumed. With incremental consumptions, it's
745
+ * feasible to register big ranges of buffers, and each
746
+ * use of it will consume only as much as it needs. This
747
+ * requires that both the kernel and application keep
748
+ * track of where the current read/recv index is at.
719
749
  */
720
750
  enum io_uring_register_pbuf_ring_flags {
721
751
  IOU_PBUF_RING_MMAP = 1,
752
+ IOU_PBUF_RING_INC = 2,
722
753
  };
723
754
 
724
755
  /* argument for IORING_(UN)REGISTER_PBUF_RING */