uringmachine 0.8.2 → 0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/TODO.md +0 -1
- data/examples/bm_side_running.rb +83 -0
- data/examples/bm_sqlite.rb +1 -1
- data/ext/um/um.c +66 -4
- data/ext/um/um.h +36 -0
- data/ext/um/um_class.c +6 -0
- data/ext/um/um_const.c +36 -0
- data/ext/um/um_ext.c +2 -0
- data/ext/um/um_stream.c +344 -0
- data/ext/um/um_stream_class.c +140 -0
- data/ext/um/um_utils.c +4 -0
- data/lib/uringmachine/actor.rb +1 -1
- data/lib/uringmachine/version.rb +1 -1
- data/lib/uringmachine.rb +35 -17
- data/test/test_fiber.rb +23 -3
- data/test/test_stream.rb +133 -0
- data/test/test_um.rb +109 -2
- data/uringmachine.gemspec +0 -2
- data/vendor/liburing/.github/workflows/{build.yml → ci.yml} +107 -42
- data/vendor/liburing/.gitignore +1 -0
- data/vendor/liburing/CHANGELOG +10 -0
- data/vendor/liburing/README +5 -0
- data/vendor/liburing/configure +1 -1
- data/vendor/liburing/examples/Makefile +1 -0
- data/vendor/liburing/examples/helpers.c +25 -0
- data/vendor/liburing/examples/helpers.h +13 -0
- data/vendor/liburing/examples/io_uring-test.c +3 -0
- data/vendor/liburing/examples/proxy.c +1 -1
- data/vendor/liburing/examples/reg-wait.c +41 -6
- data/vendor/liburing/examples/send-zerocopy.c +79 -32
- data/vendor/liburing/examples/zcrx.c +436 -0
- data/vendor/liburing/liburing.spec +1 -1
- data/vendor/liburing/src/Makefile +0 -1
- data/vendor/liburing/src/arch/generic/syscall.h +2 -2
- data/vendor/liburing/src/arch/syscall-defs.h +2 -2
- data/vendor/liburing/src/include/liburing/io_uring.h +101 -17
- data/vendor/liburing/src/include/liburing.h +179 -59
- data/vendor/liburing/src/int_flags.h +4 -1
- data/vendor/liburing/src/liburing-ffi.map +14 -2
- data/vendor/liburing/src/liburing.map +9 -2
- data/vendor/liburing/src/queue.c +35 -30
- data/vendor/liburing/src/register.c +46 -15
- data/vendor/liburing/src/sanitize.c +6 -9
- data/vendor/liburing/src/setup.c +37 -71
- data/vendor/liburing/src/syscall.c +2 -2
- data/vendor/liburing/test/232c93d07b74.c +1 -0
- data/vendor/liburing/test/Makefile +9 -0
- data/vendor/liburing/test/accept-test.c +1 -0
- data/vendor/liburing/test/cmd-discard.c +16 -8
- data/vendor/liburing/test/connect.c +11 -7
- data/vendor/liburing/test/epwait.c +420 -0
- data/vendor/liburing/test/eventfd-ring.c +30 -5
- data/vendor/liburing/test/fallocate.c +1 -1
- data/vendor/liburing/test/fixed-hugepage.c +10 -7
- data/vendor/liburing/test/fixed-seg.c +187 -0
- data/vendor/liburing/test/helpers.c +121 -0
- data/vendor/liburing/test/helpers.h +13 -0
- data/vendor/liburing/test/init-mem.c +2 -0
- data/vendor/liburing/test/io_uring_passthrough.c +78 -62
- data/vendor/liburing/test/iopoll-overflow.c +5 -4
- data/vendor/liburing/test/iopoll.c +20 -10
- data/vendor/liburing/test/iowait.c +141 -0
- data/vendor/liburing/test/nvme.h +2 -0
- data/vendor/liburing/test/pipe-bug.c +11 -5
- data/vendor/liburing/test/pipe-eof.c +11 -1
- data/vendor/liburing/test/read-inc-file.c +150 -0
- data/vendor/liburing/test/read-write.c +21 -14
- data/vendor/liburing/test/recv-bundle-short-ooo.c +435 -0
- data/vendor/liburing/test/recv-multishot.c +2 -2
- data/vendor/liburing/test/reg-wait.c +449 -120
- data/vendor/liburing/test/regbuf-clone.c +53 -0
- data/vendor/liburing/test/resize-rings.c +25 -2
- data/vendor/liburing/test/rsrc_tags.c +67 -14
- data/vendor/liburing/test/send-zerocopy.c +52 -130
- data/vendor/liburing/test/sendmsg_iov_clean.c +216 -0
- data/vendor/liburing/test/socket-nb.c +158 -0
- data/vendor/liburing/test/sqwait.c +9 -11
- data/vendor/liburing/test/timeout.c +198 -0
- data/vendor/liburing/test/vec-regbuf.c +609 -0
- data/vendor/liburing/test/wait-timeout.c +1 -1
- data/vendor/liburing/test/wq-aff.c +5 -1
- data/vendor/liburing/test/zcrx.c +928 -0
- metadata +16 -32
- data/vendor/liburing/.github/workflows/codespell.yml +0 -25
- data/vendor/liburing/.github/workflows/shellcheck.yml +0 -20
@@ -76,11 +76,11 @@ static inline int __sys_io_uring_setup(unsigned int entries,
|
|
76
76
|
|
77
77
|
static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit,
|
78
78
|
unsigned int min_complete,
|
79
|
-
unsigned int flags,
|
79
|
+
unsigned int flags, void *arg,
|
80
80
|
size_t sz)
|
81
81
|
{
|
82
82
|
return (int) __do_syscall6(__NR_io_uring_enter, fd, to_submit,
|
83
|
-
min_complete, flags,
|
83
|
+
min_complete, flags, arg, sz);
|
84
84
|
}
|
85
85
|
|
86
86
|
static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit,
|
@@ -87,6 +87,7 @@ struct io_uring_sqe {
|
|
87
87
|
union {
|
88
88
|
__s32 splice_fd_in;
|
89
89
|
__u32 file_index;
|
90
|
+
__u32 zcrx_ifq_idx;
|
90
91
|
__u32 optlen;
|
91
92
|
struct {
|
92
93
|
__u16 addr_len;
|
@@ -200,6 +201,9 @@ enum io_uring_sqe_flags_bit {
|
|
200
201
|
*/
|
201
202
|
#define IORING_SETUP_NO_SQARRAY (1U << 16)
|
202
203
|
|
204
|
+
/* Use hybrid poll in iopoll process */
|
205
|
+
#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
|
206
|
+
|
203
207
|
enum io_uring_op {
|
204
208
|
IORING_OP_NOP,
|
205
209
|
IORING_OP_READV,
|
@@ -259,6 +263,10 @@ enum io_uring_op {
|
|
259
263
|
IORING_OP_FTRUNCATE,
|
260
264
|
IORING_OP_BIND,
|
261
265
|
IORING_OP_LISTEN,
|
266
|
+
IORING_OP_RECV_ZC,
|
267
|
+
IORING_OP_EPOLL_WAIT,
|
268
|
+
IORING_OP_READV_FIXED,
|
269
|
+
IORING_OP_WRITEV_FIXED,
|
262
270
|
|
263
271
|
/* this goes last, obviously */
|
264
272
|
IORING_OP_LAST,
|
@@ -361,7 +369,7 @@ enum io_uring_op {
|
|
361
369
|
* result will be the number of buffers send, with
|
362
370
|
* the starting buffer ID in cqe->flags as per
|
363
371
|
* usual for provided buffer usage. The buffers
|
364
|
-
* will be
|
372
|
+
* will be contiguous from the starting buffer ID.
|
365
373
|
*/
|
366
374
|
#define IORING_RECVSEND_POLL_FIRST (1U << 0)
|
367
375
|
#define IORING_RECV_MULTISHOT (1U << 1)
|
@@ -421,7 +429,7 @@ enum io_uring_msg_ring_flags {
|
|
421
429
|
* IO completion data structure (Completion Queue Entry)
|
422
430
|
*/
|
423
431
|
struct io_uring_cqe {
|
424
|
-
__u64 user_data; /* sqe->user_data
|
432
|
+
__u64 user_data; /* sqe->user_data value passed back */
|
425
433
|
__s32 res; /* result code for this event */
|
426
434
|
__u32 flags;
|
427
435
|
|
@@ -519,6 +527,7 @@ struct io_cqring_offsets {
|
|
519
527
|
#define IORING_ENTER_REGISTERED_RING (1U << 4)
|
520
528
|
#define IORING_ENTER_ABS_TIMER (1U << 5)
|
521
529
|
#define IORING_ENTER_EXT_ARG_REG (1U << 6)
|
530
|
+
#define IORING_ENTER_NO_IOWAIT (1U << 7)
|
522
531
|
|
523
532
|
/*
|
524
533
|
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
@@ -555,6 +564,8 @@ struct io_uring_params {
|
|
555
564
|
#define IORING_FEAT_REG_REG_RING (1U << 13)
|
556
565
|
#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
|
557
566
|
#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
|
567
|
+
#define IORING_FEAT_RW_ATTR (1U << 16)
|
568
|
+
#define IORING_FEAT_NO_IOWAIT (1U << 17)
|
558
569
|
|
559
570
|
/*
|
560
571
|
* io_uring_register(2) opcodes and arguments
|
@@ -613,9 +624,16 @@ enum io_uring_register_op {
|
|
613
624
|
/* clone registered buffers from source ring to current ring */
|
614
625
|
IORING_REGISTER_CLONE_BUFFERS = 30,
|
615
626
|
|
627
|
+
/* send MSG_RING without having a ring */
|
628
|
+
IORING_REGISTER_SEND_MSG_RING = 31,
|
629
|
+
|
630
|
+
/* register a netdev hw rx queue for zerocopy */
|
631
|
+
IORING_REGISTER_ZCRX_IFQ = 32,
|
632
|
+
|
633
|
+
/* resize CQ ring */
|
616
634
|
IORING_REGISTER_RESIZE_RINGS = 33,
|
617
635
|
|
618
|
-
|
636
|
+
IORING_REGISTER_MEM_REGION = 34,
|
619
637
|
|
620
638
|
/* this goes last */
|
621
639
|
IORING_REGISTER_LAST,
|
@@ -637,6 +655,31 @@ struct io_uring_files_update {
|
|
637
655
|
__aligned_u64 /* __s32 * */ fds;
|
638
656
|
};
|
639
657
|
|
658
|
+
enum {
|
659
|
+
/* initialise with user provided memory pointed by user_addr */
|
660
|
+
IORING_MEM_REGION_TYPE_USER = 1,
|
661
|
+
};
|
662
|
+
|
663
|
+
struct io_uring_region_desc {
|
664
|
+
__u64 user_addr;
|
665
|
+
__u64 size;
|
666
|
+
__u32 flags;
|
667
|
+
__u32 id;
|
668
|
+
__u64 mmap_offset;
|
669
|
+
__u64 __resv[4];
|
670
|
+
};
|
671
|
+
|
672
|
+
enum {
|
673
|
+
/* expose the region as registered wait arguments */
|
674
|
+
IORING_MEM_REGION_REG_WAIT_ARG = 1,
|
675
|
+
};
|
676
|
+
|
677
|
+
struct io_uring_mem_region_reg {
|
678
|
+
__u64 region_uptr; /* struct io_uring_region_desc * */
|
679
|
+
__u64 flags;
|
680
|
+
__u64 __resv[2];
|
681
|
+
};
|
682
|
+
|
640
683
|
/*
|
641
684
|
* Register a fully sparse file space, rather than pass in an array of all
|
642
685
|
* -1 file descriptors.
|
@@ -808,20 +851,6 @@ enum {
|
|
808
851
|
IORING_REG_WAIT_TS = (1U << 0),
|
809
852
|
};
|
810
853
|
|
811
|
-
/*
|
812
|
-
* Argument for IORING_REGISTER_CQWAIT_REG, registering a region of
|
813
|
-
* struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is
|
814
|
-
* called rather than pass in a wait argument structure separately.
|
815
|
-
*/
|
816
|
-
struct io_uring_cqwait_reg_arg {
|
817
|
-
__u32 flags;
|
818
|
-
__u32 struct_size;
|
819
|
-
__u32 nr_entries;
|
820
|
-
__u32 pad;
|
821
|
-
__u64 user_addr;
|
822
|
-
__u64 pad2[3];
|
823
|
-
};
|
824
|
-
|
825
854
|
/*
|
826
855
|
* Argument for io_uring_enter(2) with
|
827
856
|
* IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument
|
@@ -888,6 +917,61 @@ enum io_uring_socket_op {
|
|
888
917
|
SOCKET_URING_OP_SETSOCKOPT,
|
889
918
|
};
|
890
919
|
|
920
|
+
/* Zero copy receive refill queue entry */
|
921
|
+
struct io_uring_zcrx_rqe {
|
922
|
+
__u64 off;
|
923
|
+
__u32 len;
|
924
|
+
__u32 __pad;
|
925
|
+
};
|
926
|
+
|
927
|
+
struct io_uring_zcrx_cqe {
|
928
|
+
__u64 off;
|
929
|
+
__u64 __pad;
|
930
|
+
};
|
931
|
+
|
932
|
+
/* The bit from which area id is encoded into offsets */
|
933
|
+
#define IORING_ZCRX_AREA_SHIFT 48
|
934
|
+
#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
|
935
|
+
|
936
|
+
struct io_uring_zcrx_offsets {
|
937
|
+
__u32 head;
|
938
|
+
__u32 tail;
|
939
|
+
__u32 rqes;
|
940
|
+
__u32 __resv2;
|
941
|
+
__u64 __resv[2];
|
942
|
+
};
|
943
|
+
|
944
|
+
enum io_uring_zcrx_area_flags {
|
945
|
+
IORING_ZCRX_AREA_DMABUF = 1,
|
946
|
+
};
|
947
|
+
|
948
|
+
struct io_uring_zcrx_area_reg {
|
949
|
+
__u64 addr;
|
950
|
+
__u64 len;
|
951
|
+
__u64 rq_area_token;
|
952
|
+
__u32 flags;
|
953
|
+
__u32 dmabuf_fd;
|
954
|
+
__u64 __resv2[2];
|
955
|
+
};
|
956
|
+
|
957
|
+
/*
|
958
|
+
* Argument for IORING_REGISTER_ZCRX_IFQ
|
959
|
+
*/
|
960
|
+
struct io_uring_zcrx_ifq_reg {
|
961
|
+
__u32 if_idx;
|
962
|
+
__u32 if_rxq;
|
963
|
+
__u32 rq_entries;
|
964
|
+
__u32 flags;
|
965
|
+
|
966
|
+
__u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
|
967
|
+
__u64 region_ptr; /* struct io_uring_region_desc * */
|
968
|
+
|
969
|
+
struct io_uring_zcrx_offsets offsets;
|
970
|
+
__u32 zcrx_id;
|
971
|
+
__u32 __resv2;
|
972
|
+
__u64 __resv[3];
|
973
|
+
};
|
974
|
+
|
891
975
|
#ifdef __cplusplus
|
892
976
|
}
|
893
977
|
#endif
|
@@ -132,6 +132,16 @@ struct io_uring {
|
|
132
132
|
unsigned pad2;
|
133
133
|
};
|
134
134
|
|
135
|
+
struct io_uring_zcrx_rq {
|
136
|
+
__u32 *khead;
|
137
|
+
__u32 *ktail;
|
138
|
+
__u32 rq_tail;
|
139
|
+
unsigned ring_entries;
|
140
|
+
|
141
|
+
struct io_uring_zcrx_rqe *rqes;
|
142
|
+
void *ring_ptr;
|
143
|
+
};
|
144
|
+
|
135
145
|
/*
|
136
146
|
* Library interface
|
137
147
|
*/
|
@@ -206,7 +216,12 @@ int io_uring_resize_rings(struct io_uring *ring, struct io_uring_params *p);
|
|
206
216
|
int io_uring_clone_buffers_offset(struct io_uring *dst, struct io_uring *src,
|
207
217
|
unsigned int dst_off, unsigned int src_off,
|
208
218
|
unsigned int nr, unsigned int flags);
|
219
|
+
int __io_uring_clone_buffers_offset(struct io_uring *dst, struct io_uring *src,
|
220
|
+
unsigned int dst_off, unsigned int src_off,
|
221
|
+
unsigned int nr, unsigned int flags);
|
209
222
|
int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src);
|
223
|
+
int __io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src,
|
224
|
+
unsigned int flags);
|
210
225
|
int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
|
211
226
|
unsigned nr_iovecs);
|
212
227
|
int io_uring_register_buffers_tags(struct io_uring *ring,
|
@@ -265,6 +280,8 @@ int io_uring_register_file_alloc_range(struct io_uring *ring,
|
|
265
280
|
|
266
281
|
int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi);
|
267
282
|
int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi);
|
283
|
+
int io_uring_register_ifq(struct io_uring *ring,
|
284
|
+
struct io_uring_zcrx_ifq_reg *reg);
|
268
285
|
|
269
286
|
int io_uring_register_clock(struct io_uring *ring,
|
270
287
|
struct io_uring_clock_register *arg);
|
@@ -279,17 +296,16 @@ int io_uring_enter(unsigned int fd, unsigned int to_submit,
|
|
279
296
|
unsigned int min_complete, unsigned int flags, sigset_t *sig);
|
280
297
|
int io_uring_enter2(unsigned int fd, unsigned int to_submit,
|
281
298
|
unsigned int min_complete, unsigned int flags,
|
282
|
-
|
299
|
+
void *arg, size_t sz);
|
283
300
|
int io_uring_setup(unsigned int entries, struct io_uring_params *p);
|
284
301
|
int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
|
285
302
|
unsigned int nr_args);
|
286
303
|
|
287
304
|
/*
|
288
|
-
* Mapped/registered
|
305
|
+
* Mapped/registered regions
|
289
306
|
*/
|
290
|
-
|
291
|
-
|
292
|
-
void io_uring_free_reg_wait(struct io_uring_reg_wait *reg, unsigned nentries);
|
307
|
+
int io_uring_register_region(struct io_uring *ring,
|
308
|
+
struct io_uring_mem_region_reg *reg);
|
293
309
|
|
294
310
|
/*
|
295
311
|
* Mapped buffer ring alloc/register + unregister/free helpers
|
@@ -309,34 +325,69 @@ int __io_uring_get_cqe(struct io_uring *ring,
|
|
309
325
|
struct io_uring_cqe **cqe_ptr, unsigned submit,
|
310
326
|
unsigned wait_nr, sigset_t *sigmask);
|
311
327
|
|
328
|
+
/*
|
329
|
+
* Enable/disable setting of iowait by the kernel.
|
330
|
+
*/
|
331
|
+
int io_uring_set_iowait(struct io_uring *ring, bool enable_iowait);
|
332
|
+
|
312
333
|
#define LIBURING_UDATA_TIMEOUT ((__u64) -1)
|
313
334
|
|
314
335
|
/*
|
315
|
-
*
|
316
|
-
*
|
336
|
+
* Returns the bit shift needed to index the CQ.
|
337
|
+
* This shift is 1 for rings with big CQEs, and 0 for rings with normal CQEs.
|
338
|
+
* CQE `index` can be computed as &cq.cqes[(index & cq.ring_mask) << cqe_shift].
|
317
339
|
*/
|
318
|
-
|
319
|
-
|
340
|
+
IOURINGINLINE unsigned io_uring_cqe_shift_from_flags(unsigned flags)
|
341
|
+
{
|
342
|
+
return !!(flags & IORING_SETUP_CQE32);
|
343
|
+
}
|
344
|
+
|
345
|
+
IOURINGINLINE unsigned io_uring_cqe_shift(const struct io_uring *ring)
|
346
|
+
{
|
347
|
+
return io_uring_cqe_shift_from_flags(ring->flags);
|
348
|
+
}
|
349
|
+
|
350
|
+
struct io_uring_cqe_iter {
|
351
|
+
struct io_uring_cqe *cqes;
|
352
|
+
unsigned mask;
|
353
|
+
unsigned shift;
|
354
|
+
unsigned head;
|
355
|
+
unsigned tail;
|
356
|
+
};
|
357
|
+
|
358
|
+
IOURINGINLINE struct io_uring_cqe_iter
|
359
|
+
io_uring_cqe_iter_init(const struct io_uring *ring)
|
360
|
+
{
|
361
|
+
return (struct io_uring_cqe_iter) {
|
362
|
+
.cqes = ring->cq.cqes,
|
363
|
+
.mask = ring->cq.ring_mask,
|
364
|
+
.shift = io_uring_cqe_shift(ring),
|
365
|
+
.head = *ring->cq.khead,
|
366
|
+
/* Acquire ordering ensures tail is loaded before any CQEs */
|
367
|
+
.tail = io_uring_smp_load_acquire(ring->cq.ktail),
|
368
|
+
};
|
369
|
+
}
|
320
370
|
|
321
|
-
|
322
|
-
|
371
|
+
IOURINGINLINE bool io_uring_cqe_iter_next(struct io_uring_cqe_iter *iter,
|
372
|
+
struct io_uring_cqe **cqe)
|
373
|
+
{
|
374
|
+
if (iter->head == iter->tail)
|
375
|
+
return false;
|
376
|
+
|
377
|
+
*cqe = &iter->cqes[(iter->head++ & iter->mask) << iter->shift];
|
378
|
+
return true;
|
379
|
+
}
|
323
380
|
|
324
381
|
/*
|
325
|
-
* NOTE: we should just get rid of the '
|
382
|
+
* NOTE: we should just get rid of the '__head__' being passed in here, it doesn't
|
326
383
|
* serve a purpose anymore. The below is a bit of a work-around to ensure that
|
327
|
-
* the compiler doesn't complain about '
|
384
|
+
* the compiler doesn't complain about '__head__' being unused (or only written,
|
328
385
|
* never read), as we use a local iterator for both the head and tail tracking.
|
329
386
|
*/
|
330
|
-
#define io_uring_for_each_cqe(ring,
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
*/ \
|
335
|
-
for (__u32 __HEAD__ = (head) = *(ring)->cq.khead, \
|
336
|
-
__TAIL__ = io_uring_smp_load_acquire((ring)->cq.ktail); \
|
337
|
-
(cqe = ((head) != __TAIL__ ? \
|
338
|
-
&(ring)->cq.cqes[io_uring_cqe_index(ring, __HEAD__, (ring)->cq.ring_mask)] : NULL)); \
|
339
|
-
(head) = ++__HEAD__)
|
387
|
+
#define io_uring_for_each_cqe(ring, __head__, cqe) \
|
388
|
+
for (struct io_uring_cqe_iter __ITER__ = io_uring_cqe_iter_init(ring); \
|
389
|
+
(__head__) = __ITER__.head, io_uring_cqe_iter_next(&__ITER__, &(cqe)); \
|
390
|
+
(void)(__head__))
|
340
391
|
|
341
392
|
/*
|
342
393
|
* Must be called after io_uring_for_each_cqe()
|
@@ -410,6 +461,12 @@ IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
|
|
410
461
|
sqe->flags = (__u8) flags;
|
411
462
|
}
|
412
463
|
|
464
|
+
IOURINGINLINE void io_uring_sqe_set_buf_group(struct io_uring_sqe *sqe,
|
465
|
+
int bgid)
|
466
|
+
{
|
467
|
+
sqe->buf_group = (__u16) bgid;
|
468
|
+
}
|
469
|
+
|
413
470
|
IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
|
414
471
|
unsigned int file_index)
|
415
472
|
{
|
@@ -509,6 +566,16 @@ IOURINGINLINE void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
|
|
509
566
|
sqe->buf_index = (__u16) buf_index;
|
510
567
|
}
|
511
568
|
|
569
|
+
IOURINGINLINE void io_uring_prep_readv_fixed(struct io_uring_sqe *sqe, int fd,
|
570
|
+
const struct iovec *iovecs,
|
571
|
+
unsigned nr_vecs, __u64 offset,
|
572
|
+
int flags, int buf_index)
|
573
|
+
{
|
574
|
+
io_uring_prep_readv2(sqe, fd, iovecs, nr_vecs, offset, flags);
|
575
|
+
sqe->opcode = IORING_OP_READV_FIXED;
|
576
|
+
sqe->buf_index = (__u16)buf_index;
|
577
|
+
}
|
578
|
+
|
512
579
|
IOURINGINLINE void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
|
513
580
|
const struct iovec *iovecs,
|
514
581
|
unsigned nr_vecs, __u64 offset)
|
@@ -533,6 +600,16 @@ IOURINGINLINE void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
|
|
533
600
|
sqe->buf_index = (__u16) buf_index;
|
534
601
|
}
|
535
602
|
|
603
|
+
IOURINGINLINE void io_uring_prep_writev_fixed(struct io_uring_sqe *sqe, int fd,
|
604
|
+
const struct iovec *iovecs,
|
605
|
+
unsigned nr_vecs, __u64 offset,
|
606
|
+
int flags, int buf_index)
|
607
|
+
{
|
608
|
+
io_uring_prep_writev2(sqe, fd, iovecs, nr_vecs, offset, flags);
|
609
|
+
sqe->opcode = IORING_OP_WRITEV_FIXED;
|
610
|
+
sqe->buf_index = (__u16)buf_index;
|
611
|
+
}
|
612
|
+
|
536
613
|
IOURINGINLINE void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
|
537
614
|
struct msghdr *msg, unsigned flags)
|
538
615
|
{
|
@@ -724,6 +801,15 @@ IOURINGINLINE void io_uring_prep_listen(struct io_uring_sqe *sqe, int fd,
|
|
724
801
|
io_uring_prep_rw(IORING_OP_LISTEN, sqe, fd, 0, backlog, 0);
|
725
802
|
}
|
726
803
|
|
804
|
+
struct epoll_event;
|
805
|
+
IOURINGINLINE void io_uring_prep_epoll_wait(struct io_uring_sqe *sqe, int fd,
|
806
|
+
struct epoll_event *events,
|
807
|
+
int maxevents, unsigned flags)
|
808
|
+
{
|
809
|
+
io_uring_prep_rw(IORING_OP_EPOLL_WAIT, sqe, fd, events, maxevents, 0);
|
810
|
+
sqe->rw_flags = flags;
|
811
|
+
}
|
812
|
+
|
727
813
|
IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe,
|
728
814
|
int *fds, unsigned nr_fds,
|
729
815
|
int offset)
|
@@ -908,6 +994,17 @@ IOURINGINLINE void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd,
|
|
908
994
|
sqe->opcode = IORING_OP_SENDMSG_ZC;
|
909
995
|
}
|
910
996
|
|
997
|
+
IOURINGINLINE void io_uring_prep_sendmsg_zc_fixed(struct io_uring_sqe *sqe,
|
998
|
+
int fd,
|
999
|
+
const struct msghdr *msg,
|
1000
|
+
unsigned flags,
|
1001
|
+
unsigned buf_index)
|
1002
|
+
{
|
1003
|
+
io_uring_prep_sendmsg_zc(sqe, fd, msg, flags);
|
1004
|
+
sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
|
1005
|
+
sqe->buf_index = buf_index;
|
1006
|
+
}
|
1007
|
+
|
911
1008
|
IOURINGINLINE void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
|
912
1009
|
void *buf, size_t len, int flags)
|
913
1010
|
{
|
@@ -1318,26 +1415,28 @@ IOURINGINLINE void io_uring_prep_cmd_discard(struct io_uring_sqe *sqe,
|
|
1318
1415
|
sqe->addr3 = nbytes;
|
1319
1416
|
}
|
1320
1417
|
|
1418
|
+
/* Read the kernel's SQ head index with appropriate memory ordering */
|
1419
|
+
IOURINGINLINE unsigned io_uring_load_sq_head(const struct io_uring *ring)
|
1420
|
+
{
|
1421
|
+
/*
|
1422
|
+
* Without acquire ordering, we could overwrite a SQE before the kernel
|
1423
|
+
* finished reading it. We don't need the acquire ordering for
|
1424
|
+
* non-SQPOLL since then we drive updates.
|
1425
|
+
*/
|
1426
|
+
if (ring->flags & IORING_SETUP_SQPOLL)
|
1427
|
+
return io_uring_smp_load_acquire(ring->sq.khead);
|
1428
|
+
|
1429
|
+
return *ring->sq.khead;
|
1430
|
+
}
|
1431
|
+
|
1321
1432
|
/*
|
1322
1433
|
* Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
|
1323
1434
|
* the SQ ring
|
1324
1435
|
*/
|
1325
1436
|
IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
|
1326
1437
|
{
|
1327
|
-
unsigned khead;
|
1328
|
-
|
1329
|
-
/*
|
1330
|
-
* Without a barrier, we could miss an update and think the SQ wasn't
|
1331
|
-
* ready. We don't need the load acquire for non-SQPOLL since then we
|
1332
|
-
* drive updates.
|
1333
|
-
*/
|
1334
|
-
if (ring->flags & IORING_SETUP_SQPOLL)
|
1335
|
-
khead = io_uring_smp_load_acquire(ring->sq.khead);
|
1336
|
-
else
|
1337
|
-
khead = *ring->sq.khead;
|
1338
|
-
|
1339
1438
|
/* always use real head, to avoid losing sync for short submit */
|
1340
|
-
return ring->sq.sqe_tail -
|
1439
|
+
return ring->sq.sqe_tail - io_uring_load_sq_head(ring);
|
1341
1440
|
}
|
1342
1441
|
|
1343
1442
|
/*
|
@@ -1348,6 +1447,21 @@ IOURINGINLINE unsigned io_uring_sq_space_left(const struct io_uring *ring)
|
|
1348
1447
|
return ring->sq.ring_entries - io_uring_sq_ready(ring);
|
1349
1448
|
}
|
1350
1449
|
|
1450
|
+
/*
|
1451
|
+
* Returns the bit shift needed to index the SQ.
|
1452
|
+
* This shift is 1 for rings with big SQEs, and 0 for rings with normal SQEs.
|
1453
|
+
* SQE `index` can be computed as &sq.sqes[(index & sq.ring_mask) << sqe_shift].
|
1454
|
+
*/
|
1455
|
+
IOURINGINLINE unsigned io_uring_sqe_shift_from_flags(unsigned flags)
|
1456
|
+
{
|
1457
|
+
return !!(flags & IORING_SETUP_SQE128);
|
1458
|
+
}
|
1459
|
+
|
1460
|
+
IOURINGINLINE unsigned io_uring_sqe_shift(const struct io_uring *ring)
|
1461
|
+
{
|
1462
|
+
return io_uring_sqe_shift_from_flags(ring->flags);
|
1463
|
+
}
|
1464
|
+
|
1351
1465
|
/*
|
1352
1466
|
* Only applicable when using SQPOLL - allows the caller to wait for space
|
1353
1467
|
* to free up in the SQ ring, which happens when the kernel side thread has
|
@@ -1402,7 +1516,7 @@ IOURINGINLINE int io_uring_cq_eventfd_toggle(struct io_uring *ring,
|
|
1402
1516
|
{
|
1403
1517
|
uint32_t flags;
|
1404
1518
|
|
1405
|
-
if (
|
1519
|
+
if (enabled == io_uring_cq_eventfd_enabled(ring))
|
1406
1520
|
return 0;
|
1407
1521
|
|
1408
1522
|
if (!ring->cq.kflags)
|
@@ -1445,10 +1559,7 @@ IOURINGINLINE int __io_uring_peek_cqe(struct io_uring *ring,
|
|
1445
1559
|
int err = 0;
|
1446
1560
|
unsigned available;
|
1447
1561
|
unsigned mask = ring->cq.ring_mask;
|
1448
|
-
|
1449
|
-
|
1450
|
-
if (ring->flags & IORING_SETUP_CQE32)
|
1451
|
-
shift = 1;
|
1562
|
+
unsigned shift = io_uring_cqe_shift(ring);
|
1452
1563
|
|
1453
1564
|
do {
|
1454
1565
|
unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
|
@@ -1515,26 +1626,16 @@ IOURINGINLINE int io_uring_wait_cqe(struct io_uring *ring,
|
|
1515
1626
|
IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
|
1516
1627
|
{
|
1517
1628
|
struct io_uring_sq *sq = &ring->sq;
|
1518
|
-
unsigned
|
1519
|
-
|
1520
|
-
|
1521
|
-
if (ring->flags & IORING_SETUP_SQE128)
|
1522
|
-
shift = 1;
|
1523
|
-
if (!(ring->flags & IORING_SETUP_SQPOLL))
|
1524
|
-
head = *sq->khead;
|
1525
|
-
else
|
1526
|
-
head = io_uring_smp_load_acquire(sq->khead);
|
1527
|
-
|
1528
|
-
if (next - head <= sq->ring_entries) {
|
1529
|
-
struct io_uring_sqe *sqe;
|
1629
|
+
unsigned head = io_uring_load_sq_head(ring), tail = sq->sqe_tail;
|
1630
|
+
struct io_uring_sqe *sqe;
|
1530
1631
|
|
1531
|
-
|
1532
|
-
|
1533
|
-
io_uring_initialize_sqe(sqe);
|
1534
|
-
return sqe;
|
1535
|
-
}
|
1632
|
+
if (tail - head >= sq->ring_entries)
|
1633
|
+
return NULL;
|
1536
1634
|
|
1537
|
-
|
1635
|
+
sqe = &sq->sqes[(tail & sq->ring_mask) << io_uring_sqe_shift(ring)];
|
1636
|
+
sq->sqe_tail = tail + 1;
|
1637
|
+
io_uring_initialize_sqe(sqe);
|
1638
|
+
return sqe;
|
1538
1639
|
}
|
1539
1640
|
|
1540
1641
|
/*
|
@@ -1614,6 +1715,25 @@ IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring,
|
|
1614
1715
|
return (uint16_t) (br->tail - head);
|
1615
1716
|
}
|
1616
1717
|
|
1718
|
+
/*
|
1719
|
+
* As of liburing-2.2, io_uring_get_sqe() has been converted into a
|
1720
|
+
* "static inline" function. However, this change breaks seamless
|
1721
|
+
* updates of liburing.so, as applications would need to be recompiled.
|
1722
|
+
* To ensure backward compatibility, liburing keeps the original
|
1723
|
+
* io_uring_get_sqe() symbol available in the shared library.
|
1724
|
+
*
|
1725
|
+
* To accomplish this, io_uring_get_sqe() is defined as a non-static
|
1726
|
+
* inline function when LIBURING_INTERNAL is set, which only applies
|
1727
|
+
* during liburing.so builds.
|
1728
|
+
*
|
1729
|
+
* This strategy ensures new users adopt the "static inline" version
|
1730
|
+
* while preserving compatibility for old applications linked against
|
1731
|
+
* the shared library.
|
1732
|
+
*
|
1733
|
+
* Relevant commits:
|
1734
|
+
* 8be8af4afcb4 ("queue: provide io_uring_get_sqe() symbol again")
|
1735
|
+
* 52dcdbba35c8 ("src/queue: protect io_uring_get_sqe() with LIBURING_INTERNAL")
|
1736
|
+
*/
|
1617
1737
|
#ifndef LIBURING_INTERNAL
|
1618
1738
|
IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
|
1619
1739
|
{
|
@@ -2,12 +2,15 @@
|
|
2
2
|
#ifndef LIBURING_INT_FLAGS
|
3
3
|
#define LIBURING_INT_FLAGS
|
4
4
|
|
5
|
-
#define INT_FLAGS_MASK (IORING_ENTER_REGISTERED_RING
|
5
|
+
#define INT_FLAGS_MASK (IORING_ENTER_REGISTERED_RING | \
|
6
|
+
IORING_ENTER_NO_IOWAIT)
|
6
7
|
|
7
8
|
enum {
|
8
9
|
INT_FLAG_REG_RING = IORING_ENTER_REGISTERED_RING,
|
10
|
+
INT_FLAG_NO_IOWAIT = IORING_ENTER_NO_IOWAIT,
|
9
11
|
INT_FLAG_REG_REG_RING = 1,
|
10
12
|
INT_FLAG_APP_MEM = 2,
|
13
|
+
INT_FLAG_CQ_ENTER = 4,
|
11
14
|
};
|
12
15
|
|
13
16
|
static inline int ring_enter_flags(struct io_uring *ring)
|
@@ -221,7 +221,19 @@ LIBURING_2.9 {
|
|
221
221
|
io_uring_resize_rings;
|
222
222
|
io_uring_register_wait_reg;
|
223
223
|
io_uring_submit_and_wait_reg;
|
224
|
-
io_uring_free_reg_wait;
|
225
|
-
io_uring_setup_reg_wait;
|
226
224
|
io_uring_clone_buffers_offset;
|
225
|
+
io_uring_register_region;
|
226
|
+
io_uring_sqe_set_buf_group;
|
227
227
|
} LIBURING_2.8;
|
228
|
+
|
229
|
+
LIBURING_2.10 {
|
230
|
+
global:
|
231
|
+
io_uring_register_ifq;
|
232
|
+
io_uring_prep_epoll_wait;
|
233
|
+
io_uring_prep_writev_fixed;
|
234
|
+
io_uring_prep_readv_fixed;
|
235
|
+
io_uring_prep_sendmsg_zc_fixed;
|
236
|
+
io_uring_set_iowait;
|
237
|
+
__io_uring_clone_buffers;
|
238
|
+
__io_uring_clone_buffers_offset;
|
239
|
+
} LIBURING_2.9;
|
@@ -108,7 +108,14 @@ LIBURING_2.9 {
|
|
108
108
|
io_uring_resize_rings;
|
109
109
|
io_uring_register_wait_reg;
|
110
110
|
io_uring_submit_and_wait_reg;
|
111
|
-
io_uring_free_reg_wait;
|
112
|
-
io_uring_setup_reg_wait;
|
113
111
|
io_uring_clone_buffers_offset;
|
112
|
+
io_uring_register_region;
|
114
113
|
} LIBURING_2.8;
|
114
|
+
|
115
|
+
LIBURING_2.10 {
|
116
|
+
global:
|
117
|
+
io_uring_register_ifq;
|
118
|
+
io_uring_set_iowait;
|
119
|
+
__io_uring_clone_buffers;
|
120
|
+
__io_uring_clone_buffers_offset;
|
121
|
+
} LIBURING_2.9;
|