uringmachine 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +2 -1
  3. data/CHANGELOG.md +14 -0
  4. data/README.md +44 -1
  5. data/TODO.md +12 -3
  6. data/examples/bm_snooze.rb +89 -0
  7. data/examples/bm_write.rb +56 -0
  8. data/examples/dns_client.rb +12 -0
  9. data/examples/http_server.rb +42 -43
  10. data/examples/server_client.rb +64 -0
  11. data/examples/snooze.rb +44 -0
  12. data/examples/write_dev_null.rb +16 -0
  13. data/ext/um/extconf.rb +24 -14
  14. data/ext/um/um.c +468 -414
  15. data/ext/um/um.h +129 -39
  16. data/ext/um/um_buffer.c +49 -0
  17. data/ext/um/um_class.c +148 -24
  18. data/ext/um/um_const.c +30 -1
  19. data/ext/um/um_ext.c +4 -0
  20. data/ext/um/um_mutex_class.c +47 -0
  21. data/ext/um/um_op.c +86 -111
  22. data/ext/um/um_queue_class.c +58 -0
  23. data/ext/um/um_sync.c +273 -0
  24. data/ext/um/um_utils.c +1 -1
  25. data/lib/uringmachine/dns_resolver.rb +84 -0
  26. data/lib/uringmachine/version.rb +1 -1
  27. data/lib/uringmachine.rb +19 -3
  28. data/supressions/ruby.supp +71 -0
  29. data/test/test_um.rb +466 -47
  30. data/vendor/liburing/.gitignore +5 -0
  31. data/vendor/liburing/CHANGELOG +1 -0
  32. data/vendor/liburing/configure +32 -0
  33. data/vendor/liburing/examples/Makefile +1 -0
  34. data/vendor/liburing/examples/reg-wait.c +159 -0
  35. data/vendor/liburing/liburing.spec +1 -1
  36. data/vendor/liburing/src/include/liburing/io_uring.h +48 -2
  37. data/vendor/liburing/src/include/liburing.h +28 -2
  38. data/vendor/liburing/src/int_flags.h +10 -3
  39. data/vendor/liburing/src/liburing-ffi.map +13 -2
  40. data/vendor/liburing/src/liburing.map +9 -0
  41. data/vendor/liburing/src/queue.c +25 -16
  42. data/vendor/liburing/src/register.c +73 -4
  43. data/vendor/liburing/src/setup.c +46 -18
  44. data/vendor/liburing/src/setup.h +6 -0
  45. data/vendor/liburing/test/Makefile +7 -0
  46. data/vendor/liburing/test/cmd-discard.c +427 -0
  47. data/vendor/liburing/test/fifo-nonblock-read.c +69 -0
  48. data/vendor/liburing/test/file-exit-unreg.c +48 -0
  49. data/vendor/liburing/test/io_uring_passthrough.c +2 -0
  50. data/vendor/liburing/test/io_uring_register.c +13 -2
  51. data/vendor/liburing/test/napi-test.c +1 -1
  52. data/vendor/liburing/test/no-mmap-inval.c +1 -1
  53. data/vendor/liburing/test/read-mshot-empty.c +2 -0
  54. data/vendor/liburing/test/read-mshot-stdin.c +121 -0
  55. data/vendor/liburing/test/read-mshot.c +6 -0
  56. data/vendor/liburing/test/recvsend_bundle.c +2 -2
  57. data/vendor/liburing/test/reg-fd-only.c +1 -1
  58. data/vendor/liburing/test/reg-wait.c +251 -0
  59. data/vendor/liburing/test/regbuf-clone.c +458 -0
  60. data/vendor/liburing/test/resize-rings.c +643 -0
  61. data/vendor/liburing/test/rsrc_tags.c +1 -1
  62. data/vendor/liburing/test/sqpoll-sleep.c +39 -8
  63. data/vendor/liburing/test/sqwait.c +136 -0
  64. data/vendor/liburing/test/sync-cancel.c +8 -1
  65. data/vendor/liburing/test/timeout.c +13 -8
  66. metadata +22 -4
  67. data/examples/http_server_multishot.rb +0 -57
  68. data/examples/http_server_simpler.rb +0 -34
@@ -417,6 +417,21 @@ if compile_prog "" "" "futexv"; then
417
417
  fi
418
418
  print_config "futex waitv support" "$futexv"
419
419
 
420
+ ##########################################
421
+ # Check block discard cmd support
422
+ discard_cmd="no"
423
+ cat > $TMPC << EOF
424
+ #include <linux/blkdev.h>
425
+ int main(void)
426
+ {
427
+ return BLOCK_URING_CMD_DISCARD;
428
+ }
429
+ EOF
430
+ if compile_prog "" "" "discard command"; then
431
+ discard_cmd="yes"
432
+ fi
433
+ print_config "io_uring discard command support" "$discard_cmd"
434
+
420
435
  ##########################################
421
436
  # Check idtype_t support
422
437
  has_idtype_t="no"
@@ -651,6 +666,23 @@ typedef enum
651
666
  } idtype_t;
652
667
  EOF
653
668
  fi
669
+
670
+ if test "$discard_cmd" != "yes"; then
671
+ cat >> $compat_h << EOF
672
+
673
+ #include <linux/ioctl.h>
674
+
675
+ #ifndef BLOCK_URING_CMD_DISCARD
676
+ #define BLOCK_URING_CMD_DISCARD _IO(0x12, 0)
677
+ #endif
678
+
679
+ EOF
680
+ else cat >> $compat_h << EOF
681
+ #include <linux/blkdev.h>
682
+
683
+ EOF
684
+ fi
685
+
654
686
  cat >> $compat_h << EOF
655
687
  #endif
656
688
  EOF
@@ -28,6 +28,7 @@ example_srcs := \
28
28
  napi-busy-poll-client.c \
29
29
  napi-busy-poll-server.c \
30
30
  poll-bench.c \
31
+ reg-wait.c \
31
32
  send-zerocopy.c \
32
33
  rsrc-update-bench.c \
33
34
  proxy.c \
@@ -0,0 +1,159 @@
1
+ /* SPDX-License-Identifier: MIT */
2
+ /*
3
+ * Sample program that shows how to use registered waits.
4
+ *
5
+ * (C) 2024 Jens Axboe <axboe@kernel.dk>
6
+ */
7
+ #include <stdint.h>
8
+ #include <stdio.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+ #include <unistd.h>
12
+ #include <assert.h>
13
+ #include <sys/time.h>
14
+ #include <liburing.h>
15
+
16
+ static unsigned long long mtime_since(const struct timeval *s,
17
+ const struct timeval *e)
18
+ {
19
+ long long sec, usec;
20
+
21
+ sec = e->tv_sec - s->tv_sec;
22
+ usec = (e->tv_usec - s->tv_usec);
23
+ if (sec > 0 && usec < 0) {
24
+ sec--;
25
+ usec += 1000000;
26
+ }
27
+
28
+ sec *= 1000;
29
+ usec /= 1000;
30
+ return sec + usec;
31
+ }
32
+
33
+ static unsigned long long mtime_since_now(struct timeval *tv)
34
+ {
35
+ struct timeval end;
36
+
37
+ gettimeofday(&end, NULL);
38
+ return mtime_since(tv, &end);
39
+ }
40
+
41
+ int main(int argc, char *argv[])
42
+ {
43
+ struct io_uring_reg_wait *reg;
44
+ struct io_uring_sqe *sqe;
45
+ struct io_uring_cqe *cqe[2];
46
+ struct io_uring ring;
47
+ char b1[8], b2[8];
48
+ unsigned long msec;
49
+ struct timeval tv;
50
+ int ret, fds[2];
51
+
52
+ if (argc > 1) {
53
+ fprintf(stdout, "%s: takes no arguments\n", argv[0]);
54
+ return 0;
55
+ }
56
+
57
+ if (pipe(fds) < 0) {
58
+ perror("pipe");
59
+ return 1;
60
+ }
61
+
62
+ ret = io_uring_queue_init(8, &ring, 0);
63
+ if (ret) {
64
+ fprintf(stderr, "Queue init: %d\n", ret);
65
+ return 1;
66
+ }
67
+
68
+ /*
69
+ * Setup wait region. We'll use 32 here, but 64 is probably a more
70
+ * logical value, as it'll pin a page regardless of size. 64 is the
71
+ * max value on a 4k page size architecture.
72
+ */
73
+ reg = io_uring_setup_reg_wait(&ring, 32, &ret);
74
+ if (!reg) {
75
+ if (ret == -EINVAL) {
76
+ fprintf(stderr, "Kernel doesn't support registered waits\n");
77
+ return 1;
78
+ }
79
+ fprintf(stderr, "Registered wait: %d\n", ret);
80
+ return 1;
81
+ }
82
+
83
+ /*
84
+ * Setup two distinct wait regions. Index 0 will be a 1 second wait,
85
+ * and region 2 is a short wait using min_wait_usec as well. Neither
86
+ * of these use a signal mask, but sigmask/sigmask_sz can be set as
87
+ * well for that.
88
+ */
89
+ reg[0].ts.tv_sec = 1;
90
+ reg[0].ts.tv_nsec = 0;
91
+ reg[0].flags = IORING_REG_WAIT_TS;
92
+
93
+ reg[1].ts.tv_sec = 0;
94
+ reg[1].ts.tv_nsec = 100000000LL;
95
+ reg[1].min_wait_usec = 10000;
96
+ reg[1].flags = IORING_REG_WAIT_TS;
97
+
98
+ /*
99
+ * No pending completions. Wait with region 0, which should time
100
+ * out after 1 second.
101
+ */
102
+ gettimeofday(&tv, NULL);
103
+ ret = io_uring_submit_and_wait_reg(&ring, cqe, 1, 0);
104
+ if (ret == -EINVAL) {
105
+ fprintf(stderr, "Kernel doesn't support registered waits\n");
106
+ return 1;
107
+ } else if (ret != -ETIME) {
108
+ fprintf(stderr, "Wait should've timed out... %d\n", ret);
109
+ return 1;
110
+ }
111
+ msec = mtime_since_now(&tv);
112
+ if (msec < 900 || msec > 1100) {
113
+ fprintf(stderr, "Wait took an unexpected amount of time: %lu\n",
114
+ msec);
115
+ return 1;
116
+ }
117
+
118
+ /*
119
+ * Now prepare two pipe reads. We'll trigger one completion quickly,
120
+ * but the other one will never happen. Use min_wait_usec timeout
121
+ * to abort after 10 msec of time, where the overall timeout is
122
+ * otherwise 100 msec. Since we're waiting on two events, the min
123
+ * timeout ends up aborting us.
124
+ */
125
+ sqe = io_uring_get_sqe(&ring);
126
+ io_uring_prep_read(sqe, fds[0], b1, sizeof(b1), 0);
127
+ sqe = io_uring_get_sqe(&ring);
128
+ io_uring_prep_read(sqe, fds[0], b2, sizeof(b2), 0);
129
+
130
+ /* trigger one read */
131
+ ret = write(fds[1], "Hello", 5);
132
+ if (ret < 0) {
133
+ perror("write");
134
+ return 1;
135
+ }
136
+
137
+ /*
138
+ * This should will wait for 2 entries, where 1 is already available.
139
+ * Since we're using min_wait_usec == 10 msec here with an overall
140
+ * wait of 100 msec, we expect the wait to abort after 10 msec since
141
+ * one or more events are available.
142
+ */
143
+ gettimeofday(&tv, NULL);
144
+ ret = io_uring_submit_and_wait_reg(&ring, cqe, 2, 1);
145
+ msec = mtime_since_now(&tv);
146
+ if (ret != 2) {
147
+ fprintf(stderr, "Should have submitted 2: %d\n", ret);
148
+ return 1;
149
+ }
150
+ if (msec < 8 || msec > 12)
151
+ fprintf(stderr, "min_wait_usec should take ~10 msec: %lu\n", msec);
152
+
153
+ /*
154
+ * Cleanup after ourselves
155
+ */
156
+ io_uring_queue_exit(&ring);
157
+ io_uring_free_reg_wait(reg, 32);
158
+ return 0;
159
+ }
@@ -1,5 +1,5 @@
1
1
  Name: liburing
2
- Version: 2.8
2
+ Version: 2.9
3
3
  Release: 1%{?dist}
4
4
  Summary: Linux-native io_uring I/O access library
5
5
  License: (GPLv2 with exceptions and LGPLv2+) or MIT
@@ -518,6 +518,7 @@ struct io_cqring_offsets {
518
518
  #define IORING_ENTER_EXT_ARG (1U << 3)
519
519
  #define IORING_ENTER_REGISTERED_RING (1U << 4)
520
520
  #define IORING_ENTER_ABS_TIMER (1U << 5)
521
+ #define IORING_ENTER_EXT_ARG_REG (1U << 6)
521
522
 
522
523
  /*
523
524
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -612,6 +613,10 @@ enum io_uring_register_op {
612
613
  /* clone registered buffers from source ring to current ring */
613
614
  IORING_REGISTER_CLONE_BUFFERS = 30,
614
615
 
616
+ IORING_REGISTER_RESIZE_RINGS = 33,
617
+
618
+ IORING_REGISTER_CQWAIT_REG = 34,
619
+
615
620
  /* this goes last */
616
621
  IORING_REGISTER_LAST,
617
622
 
@@ -698,13 +703,17 @@ struct io_uring_clock_register {
698
703
  };
699
704
 
700
705
  enum {
701
- IORING_REGISTER_SRC_REGISTERED = 1,
706
+ IORING_REGISTER_SRC_REGISTERED = (1U << 0),
707
+ IORING_REGISTER_DST_REPLACE = (1U << 1),
702
708
  };
703
709
 
704
710
  struct io_uring_clone_buffers {
705
711
  __u32 src_fd;
706
712
  __u32 flags;
707
- __u32 pad[6];
713
+ __u32 src_off;
714
+ __u32 dst_off;
715
+ __u32 nr;
716
+ __u32 pad[3];
708
717
  };
709
718
 
710
719
  struct io_uring_buf {
@@ -795,6 +804,43 @@ enum io_uring_register_restriction_op {
795
804
  IORING_RESTRICTION_LAST
796
805
  };
797
806
 
807
+ enum {
808
+ IORING_REG_WAIT_TS = (1U << 0),
809
+ };
810
+
811
+ /*
812
+ * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of
813
+ * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is
814
+ * called rather than pass in a wait argument structure separately.
815
+ */
816
+ struct io_uring_cqwait_reg_arg {
817
+ __u32 flags;
818
+ __u32 struct_size;
819
+ __u32 nr_entries;
820
+ __u32 pad;
821
+ __u64 user_addr;
822
+ __u64 pad2[3];
823
+ };
824
+
825
+ /*
826
+ * Argument for io_uring_enter(2) with
827
+ * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument
828
+ * is an index into a previously registered fixed wait region described by
829
+ * the below structure.
830
+ */
831
+ struct io_uring_reg_wait {
832
+ struct __kernel_timespec ts;
833
+ __u32 min_wait_usec;
834
+ __u32 flags;
835
+ __u64 sigmask;
836
+ __u32 sigmask_sz;
837
+ __u32 pad[3];
838
+ __u64 pad2[2];
839
+ };
840
+
841
+ /*
842
+ * Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG
843
+ */
798
844
  struct io_uring_getevents_arg {
799
845
  __u64 sigmask;
800
846
  __u32 sigmask_sz;
@@ -196,7 +196,16 @@ int io_uring_submit_and_wait_min_timeout(struct io_uring *ring,
196
196
  struct __kernel_timespec *ts,
197
197
  unsigned min_wait,
198
198
  sigset_t *sigmask);
199
-
199
+ int io_uring_submit_and_wait_reg(struct io_uring *ring,
200
+ struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
201
+ int reg_index);
202
+
203
+ int io_uring_register_wait_reg(struct io_uring *ring,
204
+ struct io_uring_reg_wait *reg, int nr);
205
+ int io_uring_resize_rings(struct io_uring *ring, struct io_uring_params *p);
206
+ int io_uring_clone_buffers_offset(struct io_uring *dst, struct io_uring *src,
207
+ unsigned int dst_off, unsigned int src_off,
208
+ unsigned int nr, unsigned int flags);
200
209
  int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src);
201
210
  int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
202
211
  unsigned nr_iovecs);
@@ -275,13 +284,20 @@ int io_uring_setup(unsigned int entries, struct io_uring_params *p);
275
284
  int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
276
285
  unsigned int nr_args);
277
286
 
287
+ /*
288
+ * Mapped/registered wait regions
289
+ */
290
+ struct io_uring_reg_wait *io_uring_setup_reg_wait(struct io_uring *ring,
291
+ unsigned nentries, int *err);
292
+ void io_uring_free_reg_wait(struct io_uring_reg_wait *reg, unsigned nentries);
293
+
278
294
  /*
279
295
  * Mapped buffer ring alloc/register + unregister/free helpers
280
296
  */
281
297
  struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring,
282
298
  unsigned int nentries,
283
299
  int bgid, unsigned int flags,
284
- int *ret);
300
+ int *err);
285
301
  int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br,
286
302
  unsigned int nentries, int bgid);
287
303
 
@@ -1292,6 +1308,16 @@ IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe,
1292
1308
  }
1293
1309
  #endif
1294
1310
 
1311
+ IOURINGINLINE void io_uring_prep_cmd_discard(struct io_uring_sqe *sqe,
1312
+ int fd,
1313
+ uint64_t offset, uint64_t nbytes)
1314
+ {
1315
+ io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, 0, 0, 0);
1316
+ sqe->cmd_op = BLOCK_URING_CMD_DISCARD;
1317
+ sqe->addr = offset;
1318
+ sqe->addr3 = nbytes;
1319
+ }
1320
+
1295
1321
  /*
1296
1322
  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
1297
1323
  * the SQ ring
@@ -2,10 +2,17 @@
2
2
  #ifndef LIBURING_INT_FLAGS
3
3
  #define LIBURING_INT_FLAGS
4
4
 
5
+ #define INT_FLAGS_MASK (IORING_ENTER_REGISTERED_RING)
6
+
5
7
  enum {
6
- INT_FLAG_REG_RING = 1,
7
- INT_FLAG_REG_REG_RING = 2,
8
- INT_FLAG_APP_MEM = 4,
8
+ INT_FLAG_REG_RING = IORING_ENTER_REGISTERED_RING,
9
+ INT_FLAG_REG_REG_RING = 1,
10
+ INT_FLAG_APP_MEM = 2,
9
11
  };
10
12
 
13
+ static inline int ring_enter_flags(struct io_uring *ring)
14
+ {
15
+ return ring->int_flags & INT_FLAGS_MASK;
16
+ }
17
+
11
18
  #endif
@@ -211,6 +211,17 @@ LIBURING_2.8 {
211
211
  io_uring_submit_and_wait_min_timeout;
212
212
  io_uring_wait_cqes_min_timeout;
213
213
  io_uring_clone_buffers;
214
- io_uring_prep_open;
215
- io_uring_prep_open_direct;
214
+ io_uring_prep_cmd_discard;
215
+ io_uring_prep_open;
216
+ io_uring_prep_open_direct;
216
217
  } LIBURING_2.7;
218
+
219
+ LIBURING_2.9 {
220
+ global:
221
+ io_uring_resize_rings;
222
+ io_uring_register_wait_reg;
223
+ io_uring_submit_and_wait_reg;
224
+ io_uring_free_reg_wait;
225
+ io_uring_setup_reg_wait;
226
+ io_uring_clone_buffers_offset;
227
+ } LIBURING_2.8;
@@ -103,3 +103,12 @@ LIBURING_2.8 {
103
103
  io_uring_wait_cqes_min_timeout;
104
104
  io_uring_clone_buffers;
105
105
  } LIBURING_2.7;
106
+
107
+ LIBURING_2.9 {
108
+ io_uring_resize_rings;
109
+ io_uring_register_wait_reg;
110
+ io_uring_submit_and_wait_reg;
111
+ io_uring_free_reg_wait;
112
+ io_uring_setup_reg_wait;
113
+ io_uring_clone_buffers_offset;
114
+ } LIBURING_2.8;
@@ -6,7 +6,6 @@
6
6
  #include "liburing.h"
7
7
  #include "int_flags.h"
8
8
  #include "liburing/sanitize.h"
9
- #include "liburing/compat.h"
10
9
  #include "liburing/io_uring.h"
11
10
 
12
11
  /*
@@ -70,7 +69,7 @@ static int _io_uring_get_cqe(struct io_uring *ring,
70
69
 
71
70
  do {
72
71
  bool need_enter = false;
73
- unsigned flags = 0;
72
+ unsigned flags = ring_enter_flags(ring);
74
73
  unsigned nr_available;
75
74
  int ret;
76
75
 
@@ -94,7 +93,7 @@ static int _io_uring_get_cqe(struct io_uring *ring,
94
93
  need_enter = true;
95
94
  }
96
95
  if (data->wait_nr > nr_available || need_enter) {
97
- flags = IORING_ENTER_GETEVENTS | data->get_flags;
96
+ flags |= IORING_ENTER_GETEVENTS | data->get_flags;
98
97
  need_enter = true;
99
98
  }
100
99
  if (sq_ring_needs_enter(ring, data->submit, &flags))
@@ -109,8 +108,6 @@ static int _io_uring_get_cqe(struct io_uring *ring,
109
108
  break;
110
109
  }
111
110
 
112
- if (ring->int_flags & INT_FLAG_REG_RING)
113
- flags |= IORING_ENTER_REGISTERED_RING;
114
111
  ret = __sys_io_uring_enter2(ring->enter_ring_fd, data->submit,
115
112
  data->wait_nr, flags, data->arg,
116
113
  data->sz);
@@ -149,10 +146,8 @@ int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
149
146
 
150
147
  int io_uring_get_events(struct io_uring *ring)
151
148
  {
152
- int flags = IORING_ENTER_GETEVENTS;
149
+ int flags = IORING_ENTER_GETEVENTS | ring_enter_flags(ring);
153
150
 
154
- if (ring->int_flags & INT_FLAG_REG_RING)
155
- flags |= IORING_ENTER_REGISTERED_RING;
156
151
  return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
157
152
  }
158
153
 
@@ -325,6 +320,26 @@ int io_uring_wait_cqes_min_timeout(struct io_uring *ring,
325
320
  sigmask);
326
321
  }
327
322
 
323
+ int io_uring_submit_and_wait_reg(struct io_uring *ring,
324
+ struct io_uring_cqe **cqe_ptr,
325
+ unsigned wait_nr, int reg_index)
326
+ {
327
+ struct get_data data = {
328
+ .submit = __io_uring_flush_sq(ring),
329
+ .wait_nr = wait_nr,
330
+ .get_flags = IORING_ENTER_EXT_ARG |
331
+ IORING_ENTER_EXT_ARG_REG,
332
+ .sz = sizeof(struct io_uring_reg_wait),
333
+ .has_ts = true,
334
+ .arg = (void *) (uintptr_t) reg_index,
335
+ };
336
+
337
+ if (!(ring->features & IORING_FEAT_EXT_ARG))
338
+ return -EINVAL;
339
+
340
+ return _io_uring_get_cqe(ring, cqe_ptr, &data);
341
+ }
342
+
328
343
  static int __io_uring_submit_and_wait_timeout(struct io_uring *ring,
329
344
  struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
330
345
  struct __kernel_timespec *ts,
@@ -403,17 +418,14 @@ static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
403
418
  unsigned wait_nr, bool getevents)
404
419
  {
405
420
  bool cq_needs_enter = getevents || wait_nr || cq_ring_needs_enter(ring);
406
- unsigned flags;
421
+ unsigned flags = ring_enter_flags(ring);
407
422
  int ret;
408
423
 
409
424
  liburing_sanitize_ring(ring);
410
425
 
411
- flags = 0;
412
426
  if (sq_ring_needs_enter(ring, submitted, &flags) || cq_needs_enter) {
413
427
  if (cq_needs_enter)
414
428
  flags |= IORING_ENTER_GETEVENTS;
415
- if (ring->int_flags & INT_FLAG_REG_RING)
416
- flags |= IORING_ENTER_REGISTERED_RING;
417
429
 
418
430
  ret = __sys_io_uring_enter(ring->enter_ring_fd, submitted,
419
431
  wait_nr, flags, NULL);
@@ -462,10 +474,7 @@ struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
462
474
 
463
475
  int __io_uring_sqring_wait(struct io_uring *ring)
464
476
  {
465
- int flags = IORING_ENTER_SQ_WAIT;
466
-
467
- if (ring->int_flags & INT_FLAG_REG_RING)
468
- flags |= IORING_ENTER_REGISTERED_RING;
477
+ int flags = IORING_ENTER_SQ_WAIT | ring_enter_flags(ring);
469
478
 
470
479
  return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
471
480
  }
@@ -4,8 +4,8 @@
4
4
  #include "lib.h"
5
5
  #include "syscall.h"
6
6
  #include "liburing.h"
7
+ #include "setup.h"
7
8
  #include "int_flags.h"
8
- #include "liburing/compat.h"
9
9
  #include "liburing/io_uring.h"
10
10
  #include "liburing/sanitize.h"
11
11
 
@@ -395,16 +395,85 @@ int io_uring_register_clock(struct io_uring *ring,
395
395
  return do_register(ring, IORING_REGISTER_CLOCK, arg, 0);
396
396
  }
397
397
 
398
- int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src)
398
+ int io_uring_clone_buffers_offset(struct io_uring *dst, struct io_uring *src,
399
+ unsigned int dst_off, unsigned int src_off,
400
+ unsigned int nr, unsigned int flags)
399
401
  {
400
- struct io_uring_clone_buffers buf = { .src_fd = src->ring_fd, };
402
+ struct io_uring_clone_buffers buf = {
403
+ .src_fd = src->ring_fd,
404
+ .flags = flags,
405
+ .src_off = src_off,
406
+ .dst_off = dst_off,
407
+ .nr = nr,
408
+ };
401
409
 
402
410
  if (src->int_flags & INT_FLAG_REG_REG_RING) {
403
411
  buf.src_fd = src->enter_ring_fd;
404
- buf.flags = IORING_REGISTER_SRC_REGISTERED;
412
+ buf.flags |= IORING_REGISTER_SRC_REGISTERED;
405
413
  } else {
406
414
  buf.src_fd = src->ring_fd;
407
415
  }
408
416
 
409
417
  return do_register(dst, IORING_REGISTER_CLONE_BUFFERS, &buf, 1);
410
418
  }
419
+
420
+ int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src)
421
+ {
422
+ return io_uring_clone_buffers_offset(dst, src, 0, 0, 0, 0);
423
+ }
424
+
425
+ int io_uring_resize_rings(struct io_uring *ring, struct io_uring_params *p)
426
+ {
427
+ unsigned sq_head, sq_tail;
428
+ int ret;
429
+
430
+ if (ring->flags & IORING_SETUP_NO_MMAP)
431
+ return -EINVAL;
432
+
433
+ memset(&p->sq_off, 0, sizeof(p->sq_off));
434
+ memset(&p->cq_off, 0, sizeof(p->cq_off));
435
+
436
+ ret = do_register(ring, IORING_REGISTER_RESIZE_RINGS, p, 1);
437
+ if (ret < 0)
438
+ goto out;
439
+
440
+ sq_head = ring->sq.sqe_head;
441
+ sq_tail = ring->sq.sqe_tail;
442
+ io_uring_unmap_rings(&ring->sq, &ring->cq);
443
+ memset(&ring->sq, 0, sizeof(ring->sq));
444
+ memset(&ring->cq, 0, sizeof(ring->cq));
445
+ ret = io_uring_mmap(ring->ring_fd, p, &ring->sq, &ring->cq);
446
+ if (ret)
447
+ goto out;
448
+
449
+ ring->sq.sqe_head = sq_head;
450
+ ring->sq.sqe_tail = sq_tail;
451
+
452
+ /*
453
+ * Directly map SQ slots to SQEs
454
+ */
455
+ if (!(p->flags & IORING_SETUP_NO_SQARRAY)) {
456
+ unsigned index;
457
+
458
+ for (index = 0; index < p->sq_entries; index++)
459
+ ring->sq.array[index] = index;
460
+ }
461
+
462
+ /* clear for next use */
463
+ out:
464
+ p->flags = 0;
465
+ return ret;
466
+ }
467
+
468
+ int io_uring_register_wait_reg(struct io_uring *ring,
469
+ struct io_uring_reg_wait *reg, int nr)
470
+ {
471
+ struct io_uring_cqwait_reg_arg arg = {
472
+ .flags = 0,
473
+ .struct_size = sizeof(*reg),
474
+ .nr_entries = nr,
475
+ .user_addr = (unsigned long) (uintptr_t) reg,
476
+ };
477
+
478
+ return do_register(ring, IORING_REGISTER_CQWAIT_REG, &arg, 1);
479
+ }