uringmachine 0.8.2 → 0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/TODO.md +0 -1
  4. data/examples/bm_side_running.rb +83 -0
  5. data/examples/bm_sqlite.rb +1 -1
  6. data/ext/um/um.c +66 -4
  7. data/ext/um/um.h +36 -0
  8. data/ext/um/um_class.c +6 -0
  9. data/ext/um/um_const.c +36 -0
  10. data/ext/um/um_ext.c +2 -0
  11. data/ext/um/um_stream.c +344 -0
  12. data/ext/um/um_stream_class.c +140 -0
  13. data/ext/um/um_utils.c +4 -0
  14. data/lib/uringmachine/actor.rb +1 -1
  15. data/lib/uringmachine/version.rb +1 -1
  16. data/lib/uringmachine.rb +35 -17
  17. data/test/test_fiber.rb +23 -3
  18. data/test/test_stream.rb +133 -0
  19. data/test/test_um.rb +109 -2
  20. data/uringmachine.gemspec +0 -2
  21. data/vendor/liburing/.github/workflows/{build.yml → ci.yml} +107 -42
  22. data/vendor/liburing/.gitignore +1 -0
  23. data/vendor/liburing/CHANGELOG +10 -0
  24. data/vendor/liburing/README +5 -0
  25. data/vendor/liburing/configure +1 -1
  26. data/vendor/liburing/examples/Makefile +1 -0
  27. data/vendor/liburing/examples/helpers.c +25 -0
  28. data/vendor/liburing/examples/helpers.h +13 -0
  29. data/vendor/liburing/examples/io_uring-test.c +3 -0
  30. data/vendor/liburing/examples/proxy.c +1 -1
  31. data/vendor/liburing/examples/reg-wait.c +41 -6
  32. data/vendor/liburing/examples/send-zerocopy.c +79 -32
  33. data/vendor/liburing/examples/zcrx.c +436 -0
  34. data/vendor/liburing/liburing.spec +1 -1
  35. data/vendor/liburing/src/Makefile +0 -1
  36. data/vendor/liburing/src/arch/generic/syscall.h +2 -2
  37. data/vendor/liburing/src/arch/syscall-defs.h +2 -2
  38. data/vendor/liburing/src/include/liburing/io_uring.h +101 -17
  39. data/vendor/liburing/src/include/liburing.h +179 -59
  40. data/vendor/liburing/src/int_flags.h +4 -1
  41. data/vendor/liburing/src/liburing-ffi.map +14 -2
  42. data/vendor/liburing/src/liburing.map +9 -2
  43. data/vendor/liburing/src/queue.c +35 -30
  44. data/vendor/liburing/src/register.c +46 -15
  45. data/vendor/liburing/src/sanitize.c +6 -9
  46. data/vendor/liburing/src/setup.c +37 -71
  47. data/vendor/liburing/src/syscall.c +2 -2
  48. data/vendor/liburing/test/232c93d07b74.c +1 -0
  49. data/vendor/liburing/test/Makefile +9 -0
  50. data/vendor/liburing/test/accept-test.c +1 -0
  51. data/vendor/liburing/test/cmd-discard.c +16 -8
  52. data/vendor/liburing/test/connect.c +11 -7
  53. data/vendor/liburing/test/epwait.c +420 -0
  54. data/vendor/liburing/test/eventfd-ring.c +30 -5
  55. data/vendor/liburing/test/fallocate.c +1 -1
  56. data/vendor/liburing/test/fixed-hugepage.c +10 -7
  57. data/vendor/liburing/test/fixed-seg.c +187 -0
  58. data/vendor/liburing/test/helpers.c +121 -0
  59. data/vendor/liburing/test/helpers.h +13 -0
  60. data/vendor/liburing/test/init-mem.c +2 -0
  61. data/vendor/liburing/test/io_uring_passthrough.c +78 -62
  62. data/vendor/liburing/test/iopoll-overflow.c +5 -4
  63. data/vendor/liburing/test/iopoll.c +20 -10
  64. data/vendor/liburing/test/iowait.c +141 -0
  65. data/vendor/liburing/test/nvme.h +2 -0
  66. data/vendor/liburing/test/pipe-bug.c +11 -5
  67. data/vendor/liburing/test/pipe-eof.c +11 -1
  68. data/vendor/liburing/test/read-inc-file.c +150 -0
  69. data/vendor/liburing/test/read-write.c +21 -14
  70. data/vendor/liburing/test/recv-bundle-short-ooo.c +435 -0
  71. data/vendor/liburing/test/recv-multishot.c +2 -2
  72. data/vendor/liburing/test/reg-wait.c +449 -120
  73. data/vendor/liburing/test/regbuf-clone.c +53 -0
  74. data/vendor/liburing/test/resize-rings.c +25 -2
  75. data/vendor/liburing/test/rsrc_tags.c +67 -14
  76. data/vendor/liburing/test/send-zerocopy.c +52 -130
  77. data/vendor/liburing/test/sendmsg_iov_clean.c +216 -0
  78. data/vendor/liburing/test/socket-nb.c +158 -0
  79. data/vendor/liburing/test/sqwait.c +9 -11
  80. data/vendor/liburing/test/timeout.c +198 -0
  81. data/vendor/liburing/test/vec-regbuf.c +609 -0
  82. data/vendor/liburing/test/wait-timeout.c +1 -1
  83. data/vendor/liburing/test/wq-aff.c +5 -1
  84. data/vendor/liburing/test/zcrx.c +928 -0
  85. metadata +16 -32
  86. data/vendor/liburing/.github/workflows/codespell.yml +0 -25
  87. data/vendor/liburing/.github/workflows/shellcheck.yml +0 -20
@@ -76,11 +76,11 @@ static inline int __sys_io_uring_setup(unsigned int entries,
76
76
 
77
77
  static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit,
78
78
  unsigned int min_complete,
79
- unsigned int flags, sigset_t *sig,
79
+ unsigned int flags, void *arg,
80
80
  size_t sz)
81
81
  {
82
82
  return (int) __do_syscall6(__NR_io_uring_enter, fd, to_submit,
83
- min_complete, flags, sig, sz);
83
+ min_complete, flags, arg, sz);
84
84
  }
85
85
 
86
86
  static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit,
@@ -87,6 +87,7 @@ struct io_uring_sqe {
87
87
  union {
88
88
  __s32 splice_fd_in;
89
89
  __u32 file_index;
90
+ __u32 zcrx_ifq_idx;
90
91
  __u32 optlen;
91
92
  struct {
92
93
  __u16 addr_len;
@@ -200,6 +201,9 @@ enum io_uring_sqe_flags_bit {
200
201
  */
201
202
  #define IORING_SETUP_NO_SQARRAY (1U << 16)
202
203
 
204
+ /* Use hybrid poll in iopoll process */
205
+ #define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
206
+
203
207
  enum io_uring_op {
204
208
  IORING_OP_NOP,
205
209
  IORING_OP_READV,
@@ -259,6 +263,10 @@ enum io_uring_op {
259
263
  IORING_OP_FTRUNCATE,
260
264
  IORING_OP_BIND,
261
265
  IORING_OP_LISTEN,
266
+ IORING_OP_RECV_ZC,
267
+ IORING_OP_EPOLL_WAIT,
268
+ IORING_OP_READV_FIXED,
269
+ IORING_OP_WRITEV_FIXED,
262
270
 
263
271
  /* this goes last, obviously */
264
272
  IORING_OP_LAST,
@@ -361,7 +369,7 @@ enum io_uring_op {
361
369
  * result will be the number of buffers send, with
362
370
  * the starting buffer ID in cqe->flags as per
363
371
  * usual for provided buffer usage. The buffers
364
- * will be contiguous from the starting buffer ID.
372
+ * will be contiguous from the starting buffer ID.
365
373
  */
366
374
  #define IORING_RECVSEND_POLL_FIRST (1U << 0)
367
375
  #define IORING_RECV_MULTISHOT (1U << 1)
@@ -421,7 +429,7 @@ enum io_uring_msg_ring_flags {
421
429
  * IO completion data structure (Completion Queue Entry)
422
430
  */
423
431
  struct io_uring_cqe {
424
- __u64 user_data; /* sqe->user_data submission passed back */
432
+ __u64 user_data; /* sqe->user_data value passed back */
425
433
  __s32 res; /* result code for this event */
426
434
  __u32 flags;
427
435
 
@@ -519,6 +527,7 @@ struct io_cqring_offsets {
519
527
  #define IORING_ENTER_REGISTERED_RING (1U << 4)
520
528
  #define IORING_ENTER_ABS_TIMER (1U << 5)
521
529
  #define IORING_ENTER_EXT_ARG_REG (1U << 6)
530
+ #define IORING_ENTER_NO_IOWAIT (1U << 7)
522
531
 
523
532
  /*
524
533
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -555,6 +564,8 @@ struct io_uring_params {
555
564
  #define IORING_FEAT_REG_REG_RING (1U << 13)
556
565
  #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
557
566
  #define IORING_FEAT_MIN_TIMEOUT (1U << 15)
567
+ #define IORING_FEAT_RW_ATTR (1U << 16)
568
+ #define IORING_FEAT_NO_IOWAIT (1U << 17)
558
569
 
559
570
  /*
560
571
  * io_uring_register(2) opcodes and arguments
@@ -613,9 +624,16 @@ enum io_uring_register_op {
613
624
  /* clone registered buffers from source ring to current ring */
614
625
  IORING_REGISTER_CLONE_BUFFERS = 30,
615
626
 
627
+ /* send MSG_RING without having a ring */
628
+ IORING_REGISTER_SEND_MSG_RING = 31,
629
+
630
+ /* register a netdev hw rx queue for zerocopy */
631
+ IORING_REGISTER_ZCRX_IFQ = 32,
632
+
633
+ /* resize CQ ring */
616
634
  IORING_REGISTER_RESIZE_RINGS = 33,
617
635
 
618
- IORING_REGISTER_CQWAIT_REG = 34,
636
+ IORING_REGISTER_MEM_REGION = 34,
619
637
 
620
638
  /* this goes last */
621
639
  IORING_REGISTER_LAST,
@@ -637,6 +655,31 @@ struct io_uring_files_update {
637
655
  __aligned_u64 /* __s32 * */ fds;
638
656
  };
639
657
 
658
+ enum {
659
+ /* initialise with user provided memory pointed by user_addr */
660
+ IORING_MEM_REGION_TYPE_USER = 1,
661
+ };
662
+
663
+ struct io_uring_region_desc {
664
+ __u64 user_addr;
665
+ __u64 size;
666
+ __u32 flags;
667
+ __u32 id;
668
+ __u64 mmap_offset;
669
+ __u64 __resv[4];
670
+ };
671
+
672
+ enum {
673
+ /* expose the region as registered wait arguments */
674
+ IORING_MEM_REGION_REG_WAIT_ARG = 1,
675
+ };
676
+
677
+ struct io_uring_mem_region_reg {
678
+ __u64 region_uptr; /* struct io_uring_region_desc * */
679
+ __u64 flags;
680
+ __u64 __resv[2];
681
+ };
682
+
640
683
  /*
641
684
  * Register a fully sparse file space, rather than pass in an array of all
642
685
  * -1 file descriptors.
@@ -808,20 +851,6 @@ enum {
808
851
  IORING_REG_WAIT_TS = (1U << 0),
809
852
  };
810
853
 
811
- /*
812
- * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of
813
- * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is
814
- * called rather than pass in a wait argument structure separately.
815
- */
816
- struct io_uring_cqwait_reg_arg {
817
- __u32 flags;
818
- __u32 struct_size;
819
- __u32 nr_entries;
820
- __u32 pad;
821
- __u64 user_addr;
822
- __u64 pad2[3];
823
- };
824
-
825
854
  /*
826
855
  * Argument for io_uring_enter(2) with
827
856
  * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument
@@ -888,6 +917,61 @@ enum io_uring_socket_op {
888
917
  SOCKET_URING_OP_SETSOCKOPT,
889
918
  };
890
919
 
920
+ /* Zero copy receive refill queue entry */
921
+ struct io_uring_zcrx_rqe {
922
+ __u64 off;
923
+ __u32 len;
924
+ __u32 __pad;
925
+ };
926
+
927
+ struct io_uring_zcrx_cqe {
928
+ __u64 off;
929
+ __u64 __pad;
930
+ };
931
+
932
+ /* The bit from which area id is encoded into offsets */
933
+ #define IORING_ZCRX_AREA_SHIFT 48
934
+ #define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
935
+
936
+ struct io_uring_zcrx_offsets {
937
+ __u32 head;
938
+ __u32 tail;
939
+ __u32 rqes;
940
+ __u32 __resv2;
941
+ __u64 __resv[2];
942
+ };
943
+
944
+ enum io_uring_zcrx_area_flags {
945
+ IORING_ZCRX_AREA_DMABUF = 1,
946
+ };
947
+
948
+ struct io_uring_zcrx_area_reg {
949
+ __u64 addr;
950
+ __u64 len;
951
+ __u64 rq_area_token;
952
+ __u32 flags;
953
+ __u32 dmabuf_fd;
954
+ __u64 __resv2[2];
955
+ };
956
+
957
+ /*
958
+ * Argument for IORING_REGISTER_ZCRX_IFQ
959
+ */
960
+ struct io_uring_zcrx_ifq_reg {
961
+ __u32 if_idx;
962
+ __u32 if_rxq;
963
+ __u32 rq_entries;
964
+ __u32 flags;
965
+
966
+ __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
967
+ __u64 region_ptr; /* struct io_uring_region_desc * */
968
+
969
+ struct io_uring_zcrx_offsets offsets;
970
+ __u32 zcrx_id;
971
+ __u32 __resv2;
972
+ __u64 __resv[3];
973
+ };
974
+
891
975
  #ifdef __cplusplus
892
976
  }
893
977
  #endif
@@ -132,6 +132,16 @@ struct io_uring {
132
132
  unsigned pad2;
133
133
  };
134
134
 
135
+ struct io_uring_zcrx_rq {
136
+ __u32 *khead;
137
+ __u32 *ktail;
138
+ __u32 rq_tail;
139
+ unsigned ring_entries;
140
+
141
+ struct io_uring_zcrx_rqe *rqes;
142
+ void *ring_ptr;
143
+ };
144
+
135
145
  /*
136
146
  * Library interface
137
147
  */
@@ -206,7 +216,12 @@ int io_uring_resize_rings(struct io_uring *ring, struct io_uring_params *p);
206
216
  int io_uring_clone_buffers_offset(struct io_uring *dst, struct io_uring *src,
207
217
  unsigned int dst_off, unsigned int src_off,
208
218
  unsigned int nr, unsigned int flags);
219
+ int __io_uring_clone_buffers_offset(struct io_uring *dst, struct io_uring *src,
220
+ unsigned int dst_off, unsigned int src_off,
221
+ unsigned int nr, unsigned int flags);
209
222
  int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src);
223
+ int __io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src,
224
+ unsigned int flags);
210
225
  int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
211
226
  unsigned nr_iovecs);
212
227
  int io_uring_register_buffers_tags(struct io_uring *ring,
@@ -265,6 +280,8 @@ int io_uring_register_file_alloc_range(struct io_uring *ring,
265
280
 
266
281
  int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi);
267
282
  int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi);
283
+ int io_uring_register_ifq(struct io_uring *ring,
284
+ struct io_uring_zcrx_ifq_reg *reg);
268
285
 
269
286
  int io_uring_register_clock(struct io_uring *ring,
270
287
  struct io_uring_clock_register *arg);
@@ -279,17 +296,16 @@ int io_uring_enter(unsigned int fd, unsigned int to_submit,
279
296
  unsigned int min_complete, unsigned int flags, sigset_t *sig);
280
297
  int io_uring_enter2(unsigned int fd, unsigned int to_submit,
281
298
  unsigned int min_complete, unsigned int flags,
282
- sigset_t *sig, size_t sz);
299
+ void *arg, size_t sz);
283
300
  int io_uring_setup(unsigned int entries, struct io_uring_params *p);
284
301
  int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
285
302
  unsigned int nr_args);
286
303
 
287
304
  /*
288
- * Mapped/registered wait regions
305
+ * Mapped/registered regions
289
306
  */
290
- struct io_uring_reg_wait *io_uring_setup_reg_wait(struct io_uring *ring,
291
- unsigned nentries, int *err);
292
- void io_uring_free_reg_wait(struct io_uring_reg_wait *reg, unsigned nentries);
307
+ int io_uring_register_region(struct io_uring *ring,
308
+ struct io_uring_mem_region_reg *reg);
293
309
 
294
310
  /*
295
311
  * Mapped buffer ring alloc/register + unregister/free helpers
@@ -309,34 +325,69 @@ int __io_uring_get_cqe(struct io_uring *ring,
309
325
  struct io_uring_cqe **cqe_ptr, unsigned submit,
310
326
  unsigned wait_nr, sigset_t *sigmask);
311
327
 
328
+ /*
329
+ * Enable/disable setting of iowait by the kernel.
330
+ */
331
+ int io_uring_set_iowait(struct io_uring *ring, bool enable_iowait);
332
+
312
333
  #define LIBURING_UDATA_TIMEOUT ((__u64) -1)
313
334
 
314
335
  /*
315
- * Calculates the step size for CQE iteration.
316
- * For standard CQE's its 1, for big CQE's its two.
336
+ * Returns the bit shift needed to index the CQ.
337
+ * This shift is 1 for rings with big CQEs, and 0 for rings with normal CQEs.
338
+ * CQE `index` can be computed as &cq.cqes[(index & cq.ring_mask) << cqe_shift].
317
339
  */
318
- #define io_uring_cqe_shift(ring) \
319
- (!!((ring)->flags & IORING_SETUP_CQE32))
340
+ IOURINGINLINE unsigned io_uring_cqe_shift_from_flags(unsigned flags)
341
+ {
342
+ return !!(flags & IORING_SETUP_CQE32);
343
+ }
344
+
345
+ IOURINGINLINE unsigned io_uring_cqe_shift(const struct io_uring *ring)
346
+ {
347
+ return io_uring_cqe_shift_from_flags(ring->flags);
348
+ }
349
+
350
+ struct io_uring_cqe_iter {
351
+ struct io_uring_cqe *cqes;
352
+ unsigned mask;
353
+ unsigned shift;
354
+ unsigned head;
355
+ unsigned tail;
356
+ };
357
+
358
+ IOURINGINLINE struct io_uring_cqe_iter
359
+ io_uring_cqe_iter_init(const struct io_uring *ring)
360
+ {
361
+ return (struct io_uring_cqe_iter) {
362
+ .cqes = ring->cq.cqes,
363
+ .mask = ring->cq.ring_mask,
364
+ .shift = io_uring_cqe_shift(ring),
365
+ .head = *ring->cq.khead,
366
+ /* Acquire ordering ensures tail is loaded before any CQEs */
367
+ .tail = io_uring_smp_load_acquire(ring->cq.ktail),
368
+ };
369
+ }
320
370
 
321
- #define io_uring_cqe_index(ring,ptr,mask) \
322
- (((ptr) & (mask)) << io_uring_cqe_shift(ring))
371
+ IOURINGINLINE bool io_uring_cqe_iter_next(struct io_uring_cqe_iter *iter,
372
+ struct io_uring_cqe **cqe)
373
+ {
374
+ if (iter->head == iter->tail)
375
+ return false;
376
+
377
+ *cqe = &iter->cqes[(iter->head++ & iter->mask) << iter->shift];
378
+ return true;
379
+ }
323
380
 
324
381
  /*
325
- * NOTE: we should just get rid of the 'head' being passed in here, it doesn't
382
+ * NOTE: we should just get rid of the '__head__' being passed in here, it doesn't
326
383
  * serve a purpose anymore. The below is a bit of a work-around to ensure that
327
- * the compiler doesn't complain about 'head' being unused (or only written,
384
+ * the compiler doesn't complain about '__head__' being unused (or only written,
328
385
  * never read), as we use a local iterator for both the head and tail tracking.
329
386
  */
330
- #define io_uring_for_each_cqe(ring, head, cqe) \
331
- /* \
332
- * io_uring_smp_load_acquire() enforces the order of tail \
333
- * and CQE reads. \
334
- */ \
335
- for (__u32 __HEAD__ = (head) = *(ring)->cq.khead, \
336
- __TAIL__ = io_uring_smp_load_acquire((ring)->cq.ktail); \
337
- (cqe = ((head) != __TAIL__ ? \
338
- &(ring)->cq.cqes[io_uring_cqe_index(ring, __HEAD__, (ring)->cq.ring_mask)] : NULL)); \
339
- (head) = ++__HEAD__)
387
+ #define io_uring_for_each_cqe(ring, __head__, cqe) \
388
+ for (struct io_uring_cqe_iter __ITER__ = io_uring_cqe_iter_init(ring); \
389
+ (__head__) = __ITER__.head, io_uring_cqe_iter_next(&__ITER__, &(cqe)); \
390
+ (void)(__head__))
340
391
 
341
392
  /*
342
393
  * Must be called after io_uring_for_each_cqe()
@@ -410,6 +461,12 @@ IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
410
461
  sqe->flags = (__u8) flags;
411
462
  }
412
463
 
464
+ IOURINGINLINE void io_uring_sqe_set_buf_group(struct io_uring_sqe *sqe,
465
+ int bgid)
466
+ {
467
+ sqe->buf_group = (__u16) bgid;
468
+ }
469
+
413
470
  IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
414
471
  unsigned int file_index)
415
472
  {
@@ -509,6 +566,16 @@ IOURINGINLINE void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
509
566
  sqe->buf_index = (__u16) buf_index;
510
567
  }
511
568
 
569
+ IOURINGINLINE void io_uring_prep_readv_fixed(struct io_uring_sqe *sqe, int fd,
570
+ const struct iovec *iovecs,
571
+ unsigned nr_vecs, __u64 offset,
572
+ int flags, int buf_index)
573
+ {
574
+ io_uring_prep_readv2(sqe, fd, iovecs, nr_vecs, offset, flags);
575
+ sqe->opcode = IORING_OP_READV_FIXED;
576
+ sqe->buf_index = (__u16)buf_index;
577
+ }
578
+
512
579
  IOURINGINLINE void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
513
580
  const struct iovec *iovecs,
514
581
  unsigned nr_vecs, __u64 offset)
@@ -533,6 +600,16 @@ IOURINGINLINE void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
533
600
  sqe->buf_index = (__u16) buf_index;
534
601
  }
535
602
 
603
+ IOURINGINLINE void io_uring_prep_writev_fixed(struct io_uring_sqe *sqe, int fd,
604
+ const struct iovec *iovecs,
605
+ unsigned nr_vecs, __u64 offset,
606
+ int flags, int buf_index)
607
+ {
608
+ io_uring_prep_writev2(sqe, fd, iovecs, nr_vecs, offset, flags);
609
+ sqe->opcode = IORING_OP_WRITEV_FIXED;
610
+ sqe->buf_index = (__u16)buf_index;
611
+ }
612
+
536
613
  IOURINGINLINE void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
537
614
  struct msghdr *msg, unsigned flags)
538
615
  {
@@ -724,6 +801,15 @@ IOURINGINLINE void io_uring_prep_listen(struct io_uring_sqe *sqe, int fd,
724
801
  io_uring_prep_rw(IORING_OP_LISTEN, sqe, fd, 0, backlog, 0);
725
802
  }
726
803
 
804
+ struct epoll_event;
805
+ IOURINGINLINE void io_uring_prep_epoll_wait(struct io_uring_sqe *sqe, int fd,
806
+ struct epoll_event *events,
807
+ int maxevents, unsigned flags)
808
+ {
809
+ io_uring_prep_rw(IORING_OP_EPOLL_WAIT, sqe, fd, events, maxevents, 0);
810
+ sqe->rw_flags = flags;
811
+ }
812
+
727
813
  IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe,
728
814
  int *fds, unsigned nr_fds,
729
815
  int offset)
@@ -908,6 +994,17 @@ IOURINGINLINE void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd,
908
994
  sqe->opcode = IORING_OP_SENDMSG_ZC;
909
995
  }
910
996
 
997
+ IOURINGINLINE void io_uring_prep_sendmsg_zc_fixed(struct io_uring_sqe *sqe,
998
+ int fd,
999
+ const struct msghdr *msg,
1000
+ unsigned flags,
1001
+ unsigned buf_index)
1002
+ {
1003
+ io_uring_prep_sendmsg_zc(sqe, fd, msg, flags);
1004
+ sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
1005
+ sqe->buf_index = buf_index;
1006
+ }
1007
+
911
1008
  IOURINGINLINE void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
912
1009
  void *buf, size_t len, int flags)
913
1010
  {
@@ -1318,26 +1415,28 @@ IOURINGINLINE void io_uring_prep_cmd_discard(struct io_uring_sqe *sqe,
1318
1415
  sqe->addr3 = nbytes;
1319
1416
  }
1320
1417
 
1418
+ /* Read the kernel's SQ head index with appropriate memory ordering */
1419
+ IOURINGINLINE unsigned io_uring_load_sq_head(const struct io_uring *ring)
1420
+ {
1421
+ /*
1422
+ * Without acquire ordering, we could overwrite a SQE before the kernel
1423
+ * finished reading it. We don't need the acquire ordering for
1424
+ * non-SQPOLL since then we drive updates.
1425
+ */
1426
+ if (ring->flags & IORING_SETUP_SQPOLL)
1427
+ return io_uring_smp_load_acquire(ring->sq.khead);
1428
+
1429
+ return *ring->sq.khead;
1430
+ }
1431
+
1321
1432
  /*
1322
1433
  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
1323
1434
  * the SQ ring
1324
1435
  */
1325
1436
  IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
1326
1437
  {
1327
- unsigned khead;
1328
-
1329
- /*
1330
- * Without a barrier, we could miss an update and think the SQ wasn't
1331
- * ready. We don't need the load acquire for non-SQPOLL since then we
1332
- * drive updates.
1333
- */
1334
- if (ring->flags & IORING_SETUP_SQPOLL)
1335
- khead = io_uring_smp_load_acquire(ring->sq.khead);
1336
- else
1337
- khead = *ring->sq.khead;
1338
-
1339
1438
  /* always use real head, to avoid losing sync for short submit */
1340
- return ring->sq.sqe_tail - khead;
1439
+ return ring->sq.sqe_tail - io_uring_load_sq_head(ring);
1341
1440
  }
1342
1441
 
1343
1442
  /*
@@ -1348,6 +1447,21 @@ IOURINGINLINE unsigned io_uring_sq_space_left(const struct io_uring *ring)
1348
1447
  return ring->sq.ring_entries - io_uring_sq_ready(ring);
1349
1448
  }
1350
1449
 
1450
+ /*
1451
+ * Returns the bit shift needed to index the SQ.
1452
+ * This shift is 1 for rings with big SQEs, and 0 for rings with normal SQEs.
1453
+ * SQE `index` can be computed as &sq.sqes[(index & sq.ring_mask) << sqe_shift].
1454
+ */
1455
+ IOURINGINLINE unsigned io_uring_sqe_shift_from_flags(unsigned flags)
1456
+ {
1457
+ return !!(flags & IORING_SETUP_SQE128);
1458
+ }
1459
+
1460
+ IOURINGINLINE unsigned io_uring_sqe_shift(const struct io_uring *ring)
1461
+ {
1462
+ return io_uring_sqe_shift_from_flags(ring->flags);
1463
+ }
1464
+
1351
1465
  /*
1352
1466
  * Only applicable when using SQPOLL - allows the caller to wait for space
1353
1467
  * to free up in the SQ ring, which happens when the kernel side thread has
@@ -1402,7 +1516,7 @@ IOURINGINLINE int io_uring_cq_eventfd_toggle(struct io_uring *ring,
1402
1516
  {
1403
1517
  uint32_t flags;
1404
1518
 
1405
- if (!!enabled == io_uring_cq_eventfd_enabled(ring))
1519
+ if (enabled == io_uring_cq_eventfd_enabled(ring))
1406
1520
  return 0;
1407
1521
 
1408
1522
  if (!ring->cq.kflags)
@@ -1445,10 +1559,7 @@ IOURINGINLINE int __io_uring_peek_cqe(struct io_uring *ring,
1445
1559
  int err = 0;
1446
1560
  unsigned available;
1447
1561
  unsigned mask = ring->cq.ring_mask;
1448
- int shift = 0;
1449
-
1450
- if (ring->flags & IORING_SETUP_CQE32)
1451
- shift = 1;
1562
+ unsigned shift = io_uring_cqe_shift(ring);
1452
1563
 
1453
1564
  do {
1454
1565
  unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
@@ -1515,26 +1626,16 @@ IOURINGINLINE int io_uring_wait_cqe(struct io_uring *ring,
1515
1626
  IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
1516
1627
  {
1517
1628
  struct io_uring_sq *sq = &ring->sq;
1518
- unsigned int head, next = sq->sqe_tail + 1;
1519
- int shift = 0;
1520
-
1521
- if (ring->flags & IORING_SETUP_SQE128)
1522
- shift = 1;
1523
- if (!(ring->flags & IORING_SETUP_SQPOLL))
1524
- head = *sq->khead;
1525
- else
1526
- head = io_uring_smp_load_acquire(sq->khead);
1527
-
1528
- if (next - head <= sq->ring_entries) {
1529
- struct io_uring_sqe *sqe;
1629
+ unsigned head = io_uring_load_sq_head(ring), tail = sq->sqe_tail;
1630
+ struct io_uring_sqe *sqe;
1530
1631
 
1531
- sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift];
1532
- sq->sqe_tail = next;
1533
- io_uring_initialize_sqe(sqe);
1534
- return sqe;
1535
- }
1632
+ if (tail - head >= sq->ring_entries)
1633
+ return NULL;
1536
1634
 
1537
- return NULL;
1635
+ sqe = &sq->sqes[(tail & sq->ring_mask) << io_uring_sqe_shift(ring)];
1636
+ sq->sqe_tail = tail + 1;
1637
+ io_uring_initialize_sqe(sqe);
1638
+ return sqe;
1538
1639
  }
1539
1640
 
1540
1641
  /*
@@ -1614,6 +1715,25 @@ IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring,
1614
1715
  return (uint16_t) (br->tail - head);
1615
1716
  }
1616
1717
 
1718
+ /*
1719
+ * As of liburing-2.2, io_uring_get_sqe() has been converted into a
1720
+ * "static inline" function. However, this change breaks seamless
1721
+ * updates of liburing.so, as applications would need to be recompiled.
1722
+ * To ensure backward compatibility, liburing keeps the original
1723
+ * io_uring_get_sqe() symbol available in the shared library.
1724
+ *
1725
+ * To accomplish this, io_uring_get_sqe() is defined as a non-static
1726
+ * inline function when LIBURING_INTERNAL is set, which only applies
1727
+ * during liburing.so builds.
1728
+ *
1729
+ * This strategy ensures new users adopt the "static inline" version
1730
+ * while preserving compatibility for old applications linked against
1731
+ * the shared library.
1732
+ *
1733
+ * Relevant commits:
1734
+ * 8be8af4afcb4 ("queue: provide io_uring_get_sqe() symbol again")
1735
+ * 52dcdbba35c8 ("src/queue: protect io_uring_get_sqe() with LIBURING_INTERNAL")
1736
+ */
1617
1737
  #ifndef LIBURING_INTERNAL
1618
1738
  IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
1619
1739
  {
@@ -2,12 +2,15 @@
2
2
  #ifndef LIBURING_INT_FLAGS
3
3
  #define LIBURING_INT_FLAGS
4
4
 
5
- #define INT_FLAGS_MASK (IORING_ENTER_REGISTERED_RING)
5
+ #define INT_FLAGS_MASK (IORING_ENTER_REGISTERED_RING | \
6
+ IORING_ENTER_NO_IOWAIT)
6
7
 
7
8
  enum {
8
9
  INT_FLAG_REG_RING = IORING_ENTER_REGISTERED_RING,
10
+ INT_FLAG_NO_IOWAIT = IORING_ENTER_NO_IOWAIT,
9
11
  INT_FLAG_REG_REG_RING = 1,
10
12
  INT_FLAG_APP_MEM = 2,
13
+ INT_FLAG_CQ_ENTER = 4,
11
14
  };
12
15
 
13
16
  static inline int ring_enter_flags(struct io_uring *ring)
@@ -221,7 +221,19 @@ LIBURING_2.9 {
221
221
  io_uring_resize_rings;
222
222
  io_uring_register_wait_reg;
223
223
  io_uring_submit_and_wait_reg;
224
- io_uring_free_reg_wait;
225
- io_uring_setup_reg_wait;
226
224
  io_uring_clone_buffers_offset;
225
+ io_uring_register_region;
226
+ io_uring_sqe_set_buf_group;
227
227
  } LIBURING_2.8;
228
+
229
+ LIBURING_2.10 {
230
+ global:
231
+ io_uring_register_ifq;
232
+ io_uring_prep_epoll_wait;
233
+ io_uring_prep_writev_fixed;
234
+ io_uring_prep_readv_fixed;
235
+ io_uring_prep_sendmsg_zc_fixed;
236
+ io_uring_set_iowait;
237
+ __io_uring_clone_buffers;
238
+ __io_uring_clone_buffers_offset;
239
+ } LIBURING_2.9;
@@ -108,7 +108,14 @@ LIBURING_2.9 {
108
108
  io_uring_resize_rings;
109
109
  io_uring_register_wait_reg;
110
110
  io_uring_submit_and_wait_reg;
111
- io_uring_free_reg_wait;
112
- io_uring_setup_reg_wait;
113
111
  io_uring_clone_buffers_offset;
112
+ io_uring_register_region;
114
113
  } LIBURING_2.8;
114
+
115
+ LIBURING_2.10 {
116
+ global:
117
+ io_uring_register_ifq;
118
+ io_uring_set_iowait;
119
+ __io_uring_clone_buffers;
120
+ __io_uring_clone_buffers_offset;
121
+ } LIBURING_2.9;