polyphony 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/TODO.md +4 -0
- data/examples/core/debug.rb +12 -0
- data/examples/core/rpc_benchmark.rb +136 -0
- data/lib/polyphony/extensions/fiber.rb +1 -0
- data/lib/polyphony/extensions/socket.rb +42 -42
- data/lib/polyphony/version.rb +1 -1
- data/polyphony.gemspec +3 -1
- data/test/test_socket.rb +1 -1
- metadata +32 -149
- data/vendor/liburing/man/IO_URING_CHECK_VERSION.3 +0 -1
- data/vendor/liburing/man/IO_URING_VERSION_MAJOR.3 +0 -1
- data/vendor/liburing/man/IO_URING_VERSION_MINOR.3 +0 -1
- data/vendor/liburing/man/io_uring.7 +0 -781
- data/vendor/liburing/man/io_uring_buf_ring_add.3 +0 -53
- data/vendor/liburing/man/io_uring_buf_ring_advance.3 +0 -31
- data/vendor/liburing/man/io_uring_buf_ring_cq_advance.3 +0 -41
- data/vendor/liburing/man/io_uring_buf_ring_init.3 +0 -30
- data/vendor/liburing/man/io_uring_buf_ring_mask.3 +0 -27
- data/vendor/liburing/man/io_uring_check_version.3 +0 -72
- data/vendor/liburing/man/io_uring_close_ring_fd.3 +0 -43
- data/vendor/liburing/man/io_uring_cq_advance.3 +0 -49
- data/vendor/liburing/man/io_uring_cq_has_overflow.3 +0 -25
- data/vendor/liburing/man/io_uring_cq_ready.3 +0 -26
- data/vendor/liburing/man/io_uring_cqe_get_data.3 +0 -53
- data/vendor/liburing/man/io_uring_cqe_get_data64.3 +0 -1
- data/vendor/liburing/man/io_uring_cqe_seen.3 +0 -42
- data/vendor/liburing/man/io_uring_enter.2 +0 -1700
- data/vendor/liburing/man/io_uring_enter2.2 +0 -1
- data/vendor/liburing/man/io_uring_free_probe.3 +0 -27
- data/vendor/liburing/man/io_uring_get_events.3 +0 -33
- data/vendor/liburing/man/io_uring_get_probe.3 +0 -30
- data/vendor/liburing/man/io_uring_get_sqe.3 +0 -57
- data/vendor/liburing/man/io_uring_major_version.3 +0 -1
- data/vendor/liburing/man/io_uring_minor_version.3 +0 -1
- data/vendor/liburing/man/io_uring_opcode_supported.3 +0 -30
- data/vendor/liburing/man/io_uring_peek_cqe.3 +0 -38
- data/vendor/liburing/man/io_uring_prep_accept.3 +0 -197
- data/vendor/liburing/man/io_uring_prep_accept_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_cancel.3 +0 -118
- data/vendor/liburing/man/io_uring_prep_cancel64.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_close.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_close_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_connect.3 +0 -66
- data/vendor/liburing/man/io_uring_prep_fadvise.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_fallocate.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_fgetxattr.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_files_update.3 +0 -92
- data/vendor/liburing/man/io_uring_prep_fsetxattr.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_fsync.3 +0 -70
- data/vendor/liburing/man/io_uring_prep_getxattr.3 +0 -61
- data/vendor/liburing/man/io_uring_prep_link.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_link_timeout.3 +0 -94
- data/vendor/liburing/man/io_uring_prep_linkat.3 +0 -91
- data/vendor/liburing/man/io_uring_prep_madvise.3 +0 -56
- data/vendor/liburing/man/io_uring_prep_mkdir.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_mkdirat.3 +0 -83
- data/vendor/liburing/man/io_uring_prep_msg_ring.3 +0 -92
- data/vendor/liburing/man/io_uring_prep_msg_ring_cqe_flags.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_multishot_accept.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_multishot_accept_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_nop.3 +0 -28
- data/vendor/liburing/man/io_uring_prep_openat.3 +0 -117
- data/vendor/liburing/man/io_uring_prep_openat2.3 +0 -117
- data/vendor/liburing/man/io_uring_prep_openat2_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_openat_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_poll_add.3 +0 -72
- data/vendor/liburing/man/io_uring_prep_poll_multishot.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_poll_remove.3 +0 -55
- data/vendor/liburing/man/io_uring_prep_poll_update.3 +0 -89
- data/vendor/liburing/man/io_uring_prep_provide_buffers.3 +0 -140
- data/vendor/liburing/man/io_uring_prep_read.3 +0 -69
- data/vendor/liburing/man/io_uring_prep_read_fixed.3 +0 -72
- data/vendor/liburing/man/io_uring_prep_readv.3 +0 -85
- data/vendor/liburing/man/io_uring_prep_readv2.3 +0 -111
- data/vendor/liburing/man/io_uring_prep_recv.3 +0 -105
- data/vendor/liburing/man/io_uring_prep_recv_multishot.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_recvmsg.3 +0 -124
- data/vendor/liburing/man/io_uring_prep_recvmsg_multishot.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_remove_buffers.3 +0 -52
- data/vendor/liburing/man/io_uring_prep_rename.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_renameat.3 +0 -96
- data/vendor/liburing/man/io_uring_prep_send.3 +0 -66
- data/vendor/liburing/man/io_uring_prep_send_set_addr.3 +0 -38
- data/vendor/liburing/man/io_uring_prep_send_zc.3 +0 -96
- data/vendor/liburing/man/io_uring_prep_send_zc_fixed.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_sendmsg.3 +0 -89
- data/vendor/liburing/man/io_uring_prep_sendmsg_zc.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_setxattr.3 +0 -64
- data/vendor/liburing/man/io_uring_prep_shutdown.3 +0 -53
- data/vendor/liburing/man/io_uring_prep_socket.3 +0 -118
- data/vendor/liburing/man/io_uring_prep_socket_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_socket_direct_alloc.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_splice.3 +0 -120
- data/vendor/liburing/man/io_uring_prep_statx.3 +0 -74
- data/vendor/liburing/man/io_uring_prep_symlink.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_symlinkat.3 +0 -85
- data/vendor/liburing/man/io_uring_prep_sync_file_range.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_tee.3 +0 -74
- data/vendor/liburing/man/io_uring_prep_timeout.3 +0 -95
- data/vendor/liburing/man/io_uring_prep_timeout_remove.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_timeout_update.3 +0 -98
- data/vendor/liburing/man/io_uring_prep_unlink.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_unlinkat.3 +0 -82
- data/vendor/liburing/man/io_uring_prep_write.3 +0 -67
- data/vendor/liburing/man/io_uring_prep_write_fixed.3 +0 -72
- data/vendor/liburing/man/io_uring_prep_writev.3 +0 -85
- data/vendor/liburing/man/io_uring_prep_writev2.3 +0 -111
- data/vendor/liburing/man/io_uring_queue_exit.3 +0 -26
- data/vendor/liburing/man/io_uring_queue_init.3 +0 -89
- data/vendor/liburing/man/io_uring_queue_init_params.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_cmsg_firsthdr.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_cmsg_nexthdr.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_name.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_out.3 +0 -82
- data/vendor/liburing/man/io_uring_recvmsg_payload.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_payload_length.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_validate.3 +0 -1
- data/vendor/liburing/man/io_uring_register.2 +0 -834
- data/vendor/liburing/man/io_uring_register_buf_ring.3 +0 -140
- data/vendor/liburing/man/io_uring_register_buffers.3 +0 -104
- data/vendor/liburing/man/io_uring_register_buffers_sparse.3 +0 -1
- data/vendor/liburing/man/io_uring_register_buffers_tags.3 +0 -1
- data/vendor/liburing/man/io_uring_register_buffers_update_tag.3 +0 -1
- data/vendor/liburing/man/io_uring_register_eventfd.3 +0 -51
- data/vendor/liburing/man/io_uring_register_eventfd_async.3 +0 -1
- data/vendor/liburing/man/io_uring_register_file_alloc_range.3 +0 -52
- data/vendor/liburing/man/io_uring_register_files.3 +0 -112
- data/vendor/liburing/man/io_uring_register_files_sparse.3 +0 -1
- data/vendor/liburing/man/io_uring_register_files_tags.3 +0 -1
- data/vendor/liburing/man/io_uring_register_files_update.3 +0 -1
- data/vendor/liburing/man/io_uring_register_files_update_tag.3 +0 -1
- data/vendor/liburing/man/io_uring_register_iowq_aff.3 +0 -61
- data/vendor/liburing/man/io_uring_register_iowq_max_workers.3 +0 -71
- data/vendor/liburing/man/io_uring_register_ring_fd.3 +0 -49
- data/vendor/liburing/man/io_uring_register_sync_cancel.3 +0 -71
- data/vendor/liburing/man/io_uring_setup.2 +0 -669
- data/vendor/liburing/man/io_uring_sq_ready.3 +0 -31
- data/vendor/liburing/man/io_uring_sq_space_left.3 +0 -25
- data/vendor/liburing/man/io_uring_sqe_set_data.3 +0 -48
- data/vendor/liburing/man/io_uring_sqe_set_data64.3 +0 -1
- data/vendor/liburing/man/io_uring_sqe_set_flags.3 +0 -87
- data/vendor/liburing/man/io_uring_sqring_wait.3 +0 -34
- data/vendor/liburing/man/io_uring_submit.3 +0 -46
- data/vendor/liburing/man/io_uring_submit_and_get_events.3 +0 -31
- data/vendor/liburing/man/io_uring_submit_and_wait.3 +0 -38
- data/vendor/liburing/man/io_uring_submit_and_wait_timeout.3 +0 -56
- data/vendor/liburing/man/io_uring_unregister_buf_ring.3 +0 -30
- data/vendor/liburing/man/io_uring_unregister_buffers.3 +0 -27
- data/vendor/liburing/man/io_uring_unregister_eventfd.3 +0 -1
- data/vendor/liburing/man/io_uring_unregister_files.3 +0 -27
- data/vendor/liburing/man/io_uring_unregister_iowq_aff.3 +0 -1
- data/vendor/liburing/man/io_uring_unregister_ring_fd.3 +0 -32
- data/vendor/liburing/man/io_uring_wait_cqe.3 +0 -40
- data/vendor/liburing/man/io_uring_wait_cqe_nr.3 +0 -43
- data/vendor/liburing/man/io_uring_wait_cqe_timeout.3 +0 -53
- data/vendor/liburing/man/io_uring_wait_cqes.3 +0 -56
@@ -1,669 +0,0 @@
|
|
1
|
-
.\" Copyright (C) 2019 Jens Axboe <axboe@kernel.dk>
|
2
|
-
.\" Copyright (C) 2019 Jon Corbet <corbet@lwn.net>
|
3
|
-
.\" Copyright (C) 2019 Red Hat, Inc.
|
4
|
-
.\"
|
5
|
-
.\" SPDX-License-Identifier: LGPL-2.0-or-later
|
6
|
-
.\"
|
7
|
-
.TH io_uring_setup 2 2019-01-29 "Linux" "Linux Programmer's Manual"
|
8
|
-
.SH NAME
|
9
|
-
io_uring_setup \- setup a context for performing asynchronous I/O
|
10
|
-
.SH SYNOPSIS
|
11
|
-
.nf
|
12
|
-
.BR "#include <liburing.h>"
|
13
|
-
.PP
|
14
|
-
.BI "int io_uring_setup(u32 " entries ", struct io_uring_params *" p );
|
15
|
-
.fi
|
16
|
-
.PP
|
17
|
-
.SH DESCRIPTION
|
18
|
-
.PP
|
19
|
-
The
|
20
|
-
.BR io_uring_setup (2)
|
21
|
-
system call sets up a submission queue (SQ) and completion queue (CQ) with at
|
22
|
-
least
|
23
|
-
.I entries
|
24
|
-
entries, and returns a file descriptor which can be used to perform
|
25
|
-
subsequent operations on the io_uring instance. The submission and
|
26
|
-
completion queues are shared between userspace and the kernel, which
|
27
|
-
eliminates the need to copy data when initiating and completing I/O.
|
28
|
-
|
29
|
-
.I params
|
30
|
-
is used by the application to pass options to the kernel, and by the
|
31
|
-
kernel to convey information about the ring buffers.
|
32
|
-
.PP
|
33
|
-
.in +4n
|
34
|
-
.EX
|
35
|
-
struct io_uring_params {
|
36
|
-
__u32 sq_entries;
|
37
|
-
__u32 cq_entries;
|
38
|
-
__u32 flags;
|
39
|
-
__u32 sq_thread_cpu;
|
40
|
-
__u32 sq_thread_idle;
|
41
|
-
__u32 features;
|
42
|
-
__u32 wq_fd;
|
43
|
-
__u32 resv[3];
|
44
|
-
struct io_sqring_offsets sq_off;
|
45
|
-
struct io_cqring_offsets cq_off;
|
46
|
-
};
|
47
|
-
.EE
|
48
|
-
.in
|
49
|
-
.PP
|
50
|
-
The
|
51
|
-
.IR flags ,
|
52
|
-
.IR sq_thread_cpu ,
|
53
|
-
and
|
54
|
-
.I sq_thread_idle
|
55
|
-
fields are used to configure the io_uring instance.
|
56
|
-
.I flags
|
57
|
-
is a bit mask of 0 or more of the following values ORed
|
58
|
-
together:
|
59
|
-
.TP
|
60
|
-
.B IORING_SETUP_IOPOLL
|
61
|
-
Perform busy-waiting for an I/O completion, as opposed to getting
|
62
|
-
notifications via an asynchronous IRQ (Interrupt Request). The file
|
63
|
-
system (if any) and block device must support polling in order for
|
64
|
-
this to work. Busy-waiting provides lower latency, but may consume
|
65
|
-
more CPU resources than interrupt driven I/O. Currently, this feature
|
66
|
-
is usable only on a file descriptor opened using the
|
67
|
-
.B O_DIRECT
|
68
|
-
flag. When a read or write is submitted to a polled context, the
|
69
|
-
application must poll for completions on the CQ ring by calling
|
70
|
-
.BR io_uring_enter (2).
|
71
|
-
It is illegal to mix and match polled and non-polled I/O on an io_uring
|
72
|
-
instance.
|
73
|
-
|
74
|
-
This is only applicable for storage devices for now, and the storage device
|
75
|
-
must be configured for polling. How to do that depends on the device type
|
76
|
-
in question. For NVMe devices, the nvme driver must be loaded with the
|
77
|
-
.I poll_queues
|
78
|
-
parameter set to the desired number of polling queues. The polling queues
|
79
|
-
will be shared appropriately between the CPUs in the system, if the number
|
80
|
-
is less than the number of online CPU threads.
|
81
|
-
|
82
|
-
.TP
|
83
|
-
.B IORING_SETUP_SQPOLL
|
84
|
-
When this flag is specified, a kernel thread is created to perform
|
85
|
-
submission queue polling. An io_uring instance configured in this way
|
86
|
-
enables an application to issue I/O without ever context switching
|
87
|
-
into the kernel. By using the submission queue to fill in new
|
88
|
-
submission queue entries and watching for completions on the
|
89
|
-
completion queue, the application can submit and reap I/Os without
|
90
|
-
doing a single system call.
|
91
|
-
|
92
|
-
If the kernel thread is idle for more than
|
93
|
-
.I sq_thread_idle
|
94
|
-
milliseconds, it will set the
|
95
|
-
.B IORING_SQ_NEED_WAKEUP
|
96
|
-
bit in the
|
97
|
-
.I flags
|
98
|
-
field of the
|
99
|
-
.IR "struct io_sq_ring" .
|
100
|
-
When this happens, the application must call
|
101
|
-
.BR io_uring_enter (2)
|
102
|
-
to wake the kernel thread. If I/O is kept busy, the kernel thread
|
103
|
-
will never sleep. An application making use of this feature will need
|
104
|
-
to guard the
|
105
|
-
.BR io_uring_enter (2)
|
106
|
-
call with the following code sequence:
|
107
|
-
|
108
|
-
.in +4n
|
109
|
-
.EX
|
110
|
-
/*
|
111
|
-
* Ensure that the wakeup flag is read after the tail pointer
|
112
|
-
* has been written. It's important to use memory load acquire
|
113
|
-
* semantics for the flags read, as otherwise the application
|
114
|
-
* and the kernel might not agree on the consistency of the
|
115
|
-
* wakeup flag.
|
116
|
-
*/
|
117
|
-
unsigned flags = atomic_load_relaxed(sq_ring->flags);
|
118
|
-
if (flags & IORING_SQ_NEED_WAKEUP)
|
119
|
-
io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP);
|
120
|
-
.EE
|
121
|
-
.in
|
122
|
-
|
123
|
-
where
|
124
|
-
.I sq_ring
|
125
|
-
is a submission queue ring setup using the
|
126
|
-
.I struct io_sqring_offsets
|
127
|
-
described below.
|
128
|
-
.TP
|
129
|
-
.BR
|
130
|
-
Note that, when using a ring setup with
|
131
|
-
.B IORING_SETUP_SQPOLL,
|
132
|
-
you never directly call the
|
133
|
-
.BR io_uring_enter (2)
|
134
|
-
system call. That is usually taken care of by liburing's
|
135
|
-
.BR io_uring_submit (3)
|
136
|
-
function. It automatically determines if you are using
|
137
|
-
polling mode or not and deals with when your program needs to call
|
138
|
-
.BR io_uring_enter (2)
|
139
|
-
without you having to bother about it.
|
140
|
-
.TP
|
141
|
-
.BR
|
142
|
-
Before version 5.11 of the Linux kernel, to successfully use this feature, the
|
143
|
-
application must register a set of files to be used for IO through
|
144
|
-
.BR io_uring_register (2)
|
145
|
-
using the
|
146
|
-
.B IORING_REGISTER_FILES
|
147
|
-
opcode. Failure to do so will result in submitted IO being errored with
|
148
|
-
.B EBADF.
|
149
|
-
The presence of this feature can be detected by the
|
150
|
-
.B IORING_FEAT_SQPOLL_NONFIXED
|
151
|
-
feature flag.
|
152
|
-
In version 5.11 and later, it is no longer necessary to register files to use
|
153
|
-
this feature. 5.11 also allows using this as non-root, if the user has the
|
154
|
-
.B CAP_SYS_NICE
|
155
|
-
capability. In 5.13 this requirement was also relaxed, and no special privileges
|
156
|
-
are needed for SQPOLL in newer kernels. Certain stable kernels older than 5.13
|
157
|
-
may also support unprivileged SQPOLL.
|
158
|
-
.TP
|
159
|
-
.B IORING_SETUP_SQ_AFF
|
160
|
-
If this flag is specified, then the poll thread will be bound to the
|
161
|
-
cpu set in the
|
162
|
-
.I sq_thread_cpu
|
163
|
-
field of the
|
164
|
-
.IR "struct io_uring_params" .
|
165
|
-
This flag is only meaningful when
|
166
|
-
.B IORING_SETUP_SQPOLL
|
167
|
-
is specified. When cgroup setting
|
168
|
-
.I cpuset.cpus
|
169
|
-
changes (typically in container environment), the bounded cpu set may be
|
170
|
-
changed as well.
|
171
|
-
.TP
|
172
|
-
.B IORING_SETUP_CQSIZE
|
173
|
-
Create the completion queue with
|
174
|
-
.IR "struct io_uring_params.cq_entries"
|
175
|
-
entries. The value must be greater than
|
176
|
-
.IR entries ,
|
177
|
-
and may be rounded up to the next power-of-two.
|
178
|
-
.TP
|
179
|
-
.B IORING_SETUP_CLAMP
|
180
|
-
If this flag is specified, and if
|
181
|
-
.IR entries
|
182
|
-
exceeds
|
183
|
-
.B IORING_MAX_ENTRIES ,
|
184
|
-
then
|
185
|
-
.IR entries
|
186
|
-
will be clamped at
|
187
|
-
.B IORING_MAX_ENTRIES .
|
188
|
-
If the flag
|
189
|
-
.BR IORING_SETUP_SQPOLL
|
190
|
-
is set, and if the value of
|
191
|
-
.IR "struct io_uring_params.cq_entries"
|
192
|
-
exceeds
|
193
|
-
.B IORING_MAX_CQ_ENTRIES ,
|
194
|
-
then it will be clamped at
|
195
|
-
.B IORING_MAX_CQ_ENTRIES .
|
196
|
-
.TP
|
197
|
-
.B IORING_SETUP_ATTACH_WQ
|
198
|
-
This flag should be set in conjunction with
|
199
|
-
.IR "struct io_uring_params.wq_fd"
|
200
|
-
being set to an existing io_uring ring file descriptor. When set, the
|
201
|
-
io_uring instance being created will share the asynchronous worker
|
202
|
-
thread backend of the specified io_uring ring, rather than create a new
|
203
|
-
separate thread pool.
|
204
|
-
.TP
|
205
|
-
.B IORING_SETUP_R_DISABLED
|
206
|
-
If this flag is specified, the io_uring ring starts in a disabled state.
|
207
|
-
In this state, restrictions can be registered, but submissions are not allowed.
|
208
|
-
See
|
209
|
-
.BR io_uring_register (2)
|
210
|
-
for details on how to enable the ring. Available since 5.10.
|
211
|
-
.TP
|
212
|
-
.B IORING_SETUP_SUBMIT_ALL
|
213
|
-
Normally io_uring stops submitting a batch of request, if one of these requests
|
214
|
-
results in an error. This can cause submission of less than what is expected,
|
215
|
-
if a request ends in error while being submitted. If the ring is created with
|
216
|
-
this flag,
|
217
|
-
.BR io_uring_enter (2)
|
218
|
-
will continue submitting requests even if it encounters an error submitting
|
219
|
-
a request. CQEs are still posted for errored request regardless of whether or
|
220
|
-
not this flag is set at ring creation time, the only difference is if the
|
221
|
-
submit sequence is halted or continued when an error is observed. Available
|
222
|
-
since 5.18.
|
223
|
-
.TP
|
224
|
-
.B IORING_SETUP_COOP_TASKRUN
|
225
|
-
By default, io_uring will interrupt a task running in userspace when a
|
226
|
-
completion event comes in. This is to ensure that completions run in a timely
|
227
|
-
manner. For a lot of use cases, this is overkill and can cause reduced
|
228
|
-
performance from both the inter-processor interrupt used to do this, the
|
229
|
-
kernel/user transition, the needless interruption of the tasks userspace
|
230
|
-
activities, and reduced batching if completions come in at a rapid rate. Most
|
231
|
-
applications don't need the forceful interruption, as the events are processed
|
232
|
-
at any kernel/user transition. The exception are setups where the application
|
233
|
-
uses multiple threads operating on the same ring, where the application
|
234
|
-
waiting on completions isn't the one that submitted them. For most other
|
235
|
-
use cases, setting this flag will improve performance. Available since 5.19.
|
236
|
-
.TP
|
237
|
-
.B IORING_SETUP_TASKRUN_FLAG
|
238
|
-
Used in conjunction with
|
239
|
-
.B IORING_SETUP_COOP_TASKRUN,
|
240
|
-
this provides a flag,
|
241
|
-
.B IORING_SQ_TASKRUN,
|
242
|
-
which is set in the SQ ring
|
243
|
-
.I flags
|
244
|
-
whenever completions are pending that should be processed. liburing will check
|
245
|
-
for this flag even when doing
|
246
|
-
.BR io_uring_peek_cqe (3)
|
247
|
-
and enter the kernel to process them, and applications can do the same. This
|
248
|
-
makes
|
249
|
-
.B IORING_SETUP_TASKRUN_FLAG
|
250
|
-
safe to use even when applications rely on a peek style operation on the CQ
|
251
|
-
ring to see if anything might be pending to reap. Available since 5.19.
|
252
|
-
.TP
|
253
|
-
.B IORING_SETUP_SQE128
|
254
|
-
If set, io_uring will use 128-byte SQEs rather than the normal 64-byte sized
|
255
|
-
variant. This is a requirement for using certain request types, as of 5.19
|
256
|
-
only the
|
257
|
-
.B IORING_OP_URING_CMD
|
258
|
-
passthrough command for NVMe passthrough needs this. Available since 5.19.
|
259
|
-
.TP
|
260
|
-
.B IORING_SETUP_CQE32
|
261
|
-
If set, io_uring will use 32-byte CQEs rather than the normal 16-byte sized
|
262
|
-
variant. This is a requirement for using certain request types, as of 5.19
|
263
|
-
only the
|
264
|
-
.B IORING_OP_URING_CMD
|
265
|
-
passthrough command for NVMe passthrough needs this. Available since 5.19.
|
266
|
-
.TP
|
267
|
-
.B IORING_SETUP_SINGLE_ISSUER
|
268
|
-
A hint to the kernel that only a single task (or thread) will submit requests, which is
|
269
|
-
used for internal optimisations. The submission task is either the task that created the
|
270
|
-
ring, or if
|
271
|
-
.B IORING_SETUP_R_DISABLED
|
272
|
-
is specified then it is the task that enables the ring through
|
273
|
-
.BR io_uring_register (2) .
|
274
|
-
The kernel enforces this rule, failing requests with
|
275
|
-
.B -EEXIST
|
276
|
-
if the restriction is violated.
|
277
|
-
Note that when
|
278
|
-
.B IORING_SETUP_SQPOLL
|
279
|
-
is set it is considered that the polling task is doing all submissions
|
280
|
-
on behalf of the userspace and so it always complies with the rule disregarding
|
281
|
-
how many userspace tasks do
|
282
|
-
.BR io_uring_enter(2).
|
283
|
-
Available since 6.0.
|
284
|
-
.TP
|
285
|
-
.B IORING_SETUP_DEFER_TASKRUN
|
286
|
-
By default, io_uring will process all outstanding work at the end of any system
|
287
|
-
call or thread interrupt. This can delay the application from making other progress.
|
288
|
-
Setting this flag will hint to io_uring that it should defer work until an
|
289
|
-
.BR io_uring_enter(2)
|
290
|
-
call with the
|
291
|
-
.B IORING_ENTER_GETEVENTS
|
292
|
-
flag set. This allows the application to request work to run just before it wants to
|
293
|
-
process completions.
|
294
|
-
This flag requires the
|
295
|
-
.BR IORING_SETUP_SINGLE_ISSUER
|
296
|
-
flag to be set, and also enforces that the call to
|
297
|
-
.BR io_uring_enter(2)
|
298
|
-
is called from the same thread that submitted requests.
|
299
|
-
Note that if this flag is set then it is the application's responsibility to periodically
|
300
|
-
trigger work (for example via any of the CQE waiting functions) or else completions may
|
301
|
-
not be delivered.
|
302
|
-
Available since 6.1.
|
303
|
-
.PP
|
304
|
-
If no flags are specified, the io_uring instance is setup for
|
305
|
-
interrupt driven I/O. I/O may be submitted using
|
306
|
-
.BR io_uring_enter (2)
|
307
|
-
and can be reaped by polling the completion queue.
|
308
|
-
|
309
|
-
The
|
310
|
-
.I resv
|
311
|
-
array must be initialized to zero.
|
312
|
-
|
313
|
-
.I features
|
314
|
-
is filled in by the kernel, which specifies various features supported
|
315
|
-
by current kernel version.
|
316
|
-
.TP
|
317
|
-
.B IORING_FEAT_SINGLE_MMAP
|
318
|
-
If this flag is set, the two SQ and CQ rings can be mapped with a single
|
319
|
-
.I mmap(2)
|
320
|
-
call. The SQEs must still be allocated separately. This brings the necessary
|
321
|
-
.I mmap(2)
|
322
|
-
calls down from three to two. Available since kernel 5.4.
|
323
|
-
.TP
|
324
|
-
.B IORING_FEAT_NODROP
|
325
|
-
If this flag is set, io_uring supports almost never dropping completion events.
|
326
|
-
If a completion event occurs and the CQ ring is full, the kernel stores
|
327
|
-
the event internally until such a time that the CQ ring has room for more
|
328
|
-
entries. If this overflow condition is entered, attempting to submit more
|
329
|
-
IO will fail with the
|
330
|
-
.B -EBUSY
|
331
|
-
error value, if it can't flush the overflown events to the CQ ring. If this
|
332
|
-
happens, the application must reap events from the CQ ring and attempt the
|
333
|
-
submit again. If the kernel has no free memory to store the event internally
|
334
|
-
it will be visible by an increase in the overflow value on the cqring.
|
335
|
-
Available since kernel 5.5. Additionally
|
336
|
-
.BR io_uring_enter (2)
|
337
|
-
will return
|
338
|
-
.B -EBADR
|
339
|
-
the next time it would otherwise sleep waiting for completions (since kernel 5.19).
|
340
|
-
|
341
|
-
.TP
|
342
|
-
.B IORING_FEAT_SUBMIT_STABLE
|
343
|
-
If this flag is set, applications can be certain that any data for
|
344
|
-
async offload has been consumed when the kernel has consumed the SQE. Available
|
345
|
-
since kernel 5.5.
|
346
|
-
.TP
|
347
|
-
.B IORING_FEAT_RW_CUR_POS
|
348
|
-
If this flag is set, applications can specify
|
349
|
-
.I offset
|
350
|
-
==
|
351
|
-
.B -1
|
352
|
-
with
|
353
|
-
.B IORING_OP_{READV,WRITEV}
|
354
|
-
,
|
355
|
-
.B IORING_OP_{READ,WRITE}_FIXED
|
356
|
-
, and
|
357
|
-
.B IORING_OP_{READ,WRITE}
|
358
|
-
to mean current file position, which behaves like
|
359
|
-
.I preadv2(2)
|
360
|
-
and
|
361
|
-
.I pwritev2(2)
|
362
|
-
with
|
363
|
-
.I offset
|
364
|
-
==
|
365
|
-
.B -1.
|
366
|
-
It'll use (and update) the current file position. This obviously comes
|
367
|
-
with the caveat that if the application has multiple reads or writes in flight,
|
368
|
-
then the end result will not be as expected. This is similar to threads sharing
|
369
|
-
a file descriptor and doing IO using the current file position. Available since
|
370
|
-
kernel 5.6.
|
371
|
-
.TP
|
372
|
-
.B IORING_FEAT_CUR_PERSONALITY
|
373
|
-
If this flag is set, then io_uring guarantees that both sync and async
|
374
|
-
execution of a request assumes the credentials of the task that called
|
375
|
-
.I
|
376
|
-
io_uring_enter(2)
|
377
|
-
to queue the requests. If this flag isn't set, then requests are issued with
|
378
|
-
the credentials of the task that originally registered the io_uring. If only
|
379
|
-
one task is using a ring, then this flag doesn't matter as the credentials
|
380
|
-
will always be the same. Note that this is the default behavior, tasks can
|
381
|
-
still register different personalities through
|
382
|
-
.I
|
383
|
-
io_uring_register(2)
|
384
|
-
with
|
385
|
-
.B IORING_REGISTER_PERSONALITY
|
386
|
-
and specify the personality to use in the sqe. Available since kernel 5.6.
|
387
|
-
.TP
|
388
|
-
.B IORING_FEAT_FAST_POLL
|
389
|
-
If this flag is set, then io_uring supports using an internal poll mechanism
|
390
|
-
to drive data/space readiness. This means that requests that cannot read or
|
391
|
-
write data to a file no longer need to be punted to an async thread for
|
392
|
-
handling, instead they will begin operation when the file is ready. This is
|
393
|
-
similar to doing poll + read/write in userspace, but eliminates the need to do
|
394
|
-
so. If this flag is set, requests waiting on space/data consume a lot less
|
395
|
-
resources doing so as they are not blocking a thread. Available since kernel
|
396
|
-
5.7.
|
397
|
-
.TP
|
398
|
-
.B IORING_FEAT_POLL_32BITS
|
399
|
-
If this flag is set, the
|
400
|
-
.B IORING_OP_POLL_ADD
|
401
|
-
command accepts the full 32-bit range of epoll based flags. Most notably
|
402
|
-
.B EPOLLEXCLUSIVE
|
403
|
-
which allows exclusive (waking single waiters) behavior. Available since kernel
|
404
|
-
5.9.
|
405
|
-
.TP
|
406
|
-
.B IORING_FEAT_SQPOLL_NONFIXED
|
407
|
-
If this flag is set, the
|
408
|
-
.B IORING_SETUP_SQPOLL
|
409
|
-
feature no longer requires the use of fixed files. Any normal file descriptor
|
410
|
-
can be used for IO commands without needing registration. Available since
|
411
|
-
kernel 5.11.
|
412
|
-
.TP
|
413
|
-
.B IORING_FEAT_ENTER_EXT_ARG
|
414
|
-
If this flag is set, then the
|
415
|
-
.BR io_uring_enter (2)
|
416
|
-
system call supports passing in an extended argument instead of just the
|
417
|
-
.IR "sigset_t"
|
418
|
-
of earlier kernels. This.
|
419
|
-
extended argument is of type
|
420
|
-
.IR "struct io_uring_getevents_arg"
|
421
|
-
and allows the caller to pass in both a
|
422
|
-
.IR "sigset_t"
|
423
|
-
and a timeout argument for waiting on events. The struct layout is as follows:
|
424
|
-
.TP
|
425
|
-
.in +8n
|
426
|
-
.EX
|
427
|
-
struct io_uring_getevents_arg {
|
428
|
-
__u64 sigmask;
|
429
|
-
__u32 sigmask_sz;
|
430
|
-
__u32 pad;
|
431
|
-
__u64 ts;
|
432
|
-
};
|
433
|
-
.EE
|
434
|
-
|
435
|
-
and a pointer to this struct must be passed in if
|
436
|
-
.B IORING_ENTER_EXT_ARG
|
437
|
-
is set in the flags for the enter system call. Available since kernel 5.11.
|
438
|
-
.TP
|
439
|
-
.B IORING_FEAT_NATIVE_WORKERS
|
440
|
-
If this flag is set, io_uring is using native workers for its async helpers.
|
441
|
-
Previous kernels used kernel threads that assumed the identity of the
|
442
|
-
original io_uring owning task, but later kernels will actively create what
|
443
|
-
looks more like regular process threads instead. Available since kernel
|
444
|
-
5.12.
|
445
|
-
.TP
|
446
|
-
.B IORING_FEAT_RSRC_TAGS
|
447
|
-
If this flag is set, then io_uring supports a variety of features related
|
448
|
-
to fixed files and buffers. In particular, it indicates that registered
|
449
|
-
buffers can be updated in-place, whereas before the full set would have to
|
450
|
-
be unregistered first. Available since kernel 5.13.
|
451
|
-
.TP
|
452
|
-
.B IORING_FEAT_CQE_SKIP
|
453
|
-
If this flag is set, then io_uring supports setting
|
454
|
-
.B IOSQE_CQE_SKIP_SUCCESS
|
455
|
-
in the submitted SQE, indicating that no CQE should be generated for this
|
456
|
-
SQE if it executes normally. If an error happens processing the SQE, a
|
457
|
-
CQE with the appropriate error value will still be generated. Available since
|
458
|
-
kernel 5.17.
|
459
|
-
.TP
|
460
|
-
.B IORING_FEAT_LINKED_FILE
|
461
|
-
If this flag is set, then io_uring supports sane assignment of files for SQEs
|
462
|
-
that have dependencies. For example, if a chain of SQEs are submitted with
|
463
|
-
.B IOSQE_IO_LINK,
|
464
|
-
then kernels without this flag will prepare the file for each link upfront.
|
465
|
-
If a previous link opens a file with a known index, eg if direct descriptors
|
466
|
-
are used with open or accept, then file assignment needs to happen post
|
467
|
-
execution of that SQE. If this flag is set, then the kernel will defer
|
468
|
-
file assignment until execution of a given request is started. Available since
|
469
|
-
kernel 5.17.
|
470
|
-
.TP
|
471
|
-
.B IORING_FEAT_REG_REG_RING
|
472
|
-
If this flag is set, then io_uring supports calling
|
473
|
-
.BR io_uring_register (2)
|
474
|
-
using a registered ring fd, via
|
475
|
-
.BR IORING_REGISTER_USE_REGISTERED_RING .
|
476
|
-
Available since kernel 6.3.
|
477
|
-
|
478
|
-
.PP
|
479
|
-
The rest of the fields in the
|
480
|
-
.I struct io_uring_params
|
481
|
-
are filled in by the kernel, and provide the information necessary to
|
482
|
-
memory map the submission queue, completion queue, and the array of
|
483
|
-
submission queue entries.
|
484
|
-
.I sq_entries
|
485
|
-
specifies the number of submission queue entries allocated.
|
486
|
-
.I sq_off
|
487
|
-
describes the offsets of various ring buffer fields:
|
488
|
-
.PP
|
489
|
-
.in +4n
|
490
|
-
.EX
|
491
|
-
struct io_sqring_offsets {
|
492
|
-
__u32 head;
|
493
|
-
__u32 tail;
|
494
|
-
__u32 ring_mask;
|
495
|
-
__u32 ring_entries;
|
496
|
-
__u32 flags;
|
497
|
-
__u32 dropped;
|
498
|
-
__u32 array;
|
499
|
-
__u32 resv[3];
|
500
|
-
};
|
501
|
-
.EE
|
502
|
-
.in
|
503
|
-
.PP
|
504
|
-
Taken together,
|
505
|
-
.I sq_entries
|
506
|
-
and
|
507
|
-
.I sq_off
|
508
|
-
provide all of the information necessary for accessing the submission
|
509
|
-
queue ring buffer and the submission queue entry array. The
|
510
|
-
submission queue can be mapped with a call like:
|
511
|
-
.PP
|
512
|
-
.in +4n
|
513
|
-
.EX
|
514
|
-
ptr = mmap(0, sq_off.array + sq_entries * sizeof(__u32),
|
515
|
-
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE,
|
516
|
-
ring_fd, IORING_OFF_SQ_RING);
|
517
|
-
.EE
|
518
|
-
.in
|
519
|
-
.PP
|
520
|
-
where
|
521
|
-
.I sq_off
|
522
|
-
is the
|
523
|
-
.I io_sqring_offsets
|
524
|
-
structure, and
|
525
|
-
.I ring_fd
|
526
|
-
is the file descriptor returned from
|
527
|
-
.BR io_uring_setup (2).
|
528
|
-
The addition of
|
529
|
-
.I sq_off.array
|
530
|
-
to the length of the region accounts for the fact that the ring
|
531
|
-
located at the end of the data structure. As an example, the ring
|
532
|
-
buffer head pointer can be accessed by adding
|
533
|
-
.I sq_off.head
|
534
|
-
to the address returned from
|
535
|
-
.BR mmap (2):
|
536
|
-
.PP
|
537
|
-
.in +4n
|
538
|
-
.EX
|
539
|
-
head = ptr + sq_off.head;
|
540
|
-
.EE
|
541
|
-
.in
|
542
|
-
|
543
|
-
The
|
544
|
-
.I flags
|
545
|
-
field is used by the kernel to communicate state information to the
|
546
|
-
application. Currently, it is used to inform the application when a
|
547
|
-
call to
|
548
|
-
.BR io_uring_enter (2)
|
549
|
-
is necessary. See the documentation for the
|
550
|
-
.B IORING_SETUP_SQPOLL
|
551
|
-
flag above.
|
552
|
-
The
|
553
|
-
.I dropped
|
554
|
-
member is incremented for each invalid submission queue entry
|
555
|
-
encountered in the ring buffer.
|
556
|
-
|
557
|
-
The head and tail track the ring buffer state. The tail is
|
558
|
-
incremented by the application when submitting new I/O, and the head
|
559
|
-
is incremented by the kernel when the I/O has been successfully
|
560
|
-
submitted. Determining the index of the head or tail into the ring is
|
561
|
-
accomplished by applying a mask:
|
562
|
-
.PP
|
563
|
-
.in +4n
|
564
|
-
.EX
|
565
|
-
index = tail & ring_mask;
|
566
|
-
.EE
|
567
|
-
.in
|
568
|
-
.PP
|
569
|
-
The array of submission queue entries is mapped with:
|
570
|
-
.PP
|
571
|
-
.in +4n
|
572
|
-
.EX
|
573
|
-
sqentries = mmap(0, sq_entries * sizeof(struct io_uring_sqe),
|
574
|
-
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE,
|
575
|
-
ring_fd, IORING_OFF_SQES);
|
576
|
-
.EE
|
577
|
-
.in
|
578
|
-
.PP
|
579
|
-
The completion queue is described by
|
580
|
-
.I cq_entries
|
581
|
-
and
|
582
|
-
.I cq_off
|
583
|
-
shown here:
|
584
|
-
.PP
|
585
|
-
.in +4n
|
586
|
-
.EX
|
587
|
-
struct io_cqring_offsets {
|
588
|
-
__u32 head;
|
589
|
-
__u32 tail;
|
590
|
-
__u32 ring_mask;
|
591
|
-
__u32 ring_entries;
|
592
|
-
__u32 overflow;
|
593
|
-
__u32 cqes;
|
594
|
-
__u32 flags;
|
595
|
-
__u32 resv[3];
|
596
|
-
};
|
597
|
-
.EE
|
598
|
-
.in
|
599
|
-
.PP
|
600
|
-
The completion queue is simpler, since the entries are not separated
|
601
|
-
from the queue itself, and can be mapped with:
|
602
|
-
.PP
|
603
|
-
.in +4n
|
604
|
-
.EX
|
605
|
-
ptr = mmap(0, cq_off.cqes + cq_entries * sizeof(struct io_uring_cqe),
|
606
|
-
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd,
|
607
|
-
IORING_OFF_CQ_RING);
|
608
|
-
.EE
|
609
|
-
.in
|
610
|
-
.PP
|
611
|
-
Closing the file descriptor returned by
|
612
|
-
.BR io_uring_setup (2)
|
613
|
-
will free all resources associated with the io_uring context.
|
614
|
-
.PP
|
615
|
-
.SH RETURN VALUE
|
616
|
-
.BR io_uring_setup (2)
|
617
|
-
returns a new file descriptor on success. The application may then
|
618
|
-
provide the file descriptor in a subsequent
|
619
|
-
.BR mmap (2)
|
620
|
-
call to map the submission and completion queues, or to the
|
621
|
-
.BR io_uring_register (2)
|
622
|
-
or
|
623
|
-
.BR io_uring_enter (2)
|
624
|
-
system calls.
|
625
|
-
|
626
|
-
On error, a negative error code is returned. The caller should not rely on
|
627
|
-
.I errno
|
628
|
-
variable.
|
629
|
-
.PP
|
630
|
-
.SH ERRORS
|
631
|
-
.TP
|
632
|
-
.B EFAULT
|
633
|
-
params is outside your accessible address space.
|
634
|
-
.TP
|
635
|
-
.B EINVAL
|
636
|
-
The resv array contains non-zero data, p.flags contains an unsupported
|
637
|
-
flag,
|
638
|
-
.I entries
|
639
|
-
is out of bounds,
|
640
|
-
.B IORING_SETUP_SQ_AFF
|
641
|
-
was specified, but
|
642
|
-
.B IORING_SETUP_SQPOLL
|
643
|
-
was not, or
|
644
|
-
.B IORING_SETUP_CQSIZE
|
645
|
-
was specified, but
|
646
|
-
.I io_uring_params.cq_entries
|
647
|
-
was invalid.
|
648
|
-
.TP
|
649
|
-
.B EMFILE
|
650
|
-
The per-process limit on the number of open file descriptors has been
|
651
|
-
reached (see the description of
|
652
|
-
.B RLIMIT_NOFILE
|
653
|
-
in
|
654
|
-
.BR getrlimit (2)).
|
655
|
-
.TP
|
656
|
-
.B ENFILE
|
657
|
-
The system-wide limit on the total number of open files has been
|
658
|
-
reached.
|
659
|
-
.TP
|
660
|
-
.B ENOMEM
|
661
|
-
Insufficient kernel resources are available.
|
662
|
-
.TP
|
663
|
-
.B EPERM
|
664
|
-
.B IORING_SETUP_SQPOLL
|
665
|
-
was specified, but the effective user ID of the caller did not have sufficient
|
666
|
-
privileges.
|
667
|
-
.SH SEE ALSO
|
668
|
-
.BR io_uring_register (2),
|
669
|
-
.BR io_uring_enter (2)
|
@@ -1,31 +0,0 @@
|
|
1
|
-
.\" Copyright (C) 2022 Stefan Roesch <shr@fb.com>
|
2
|
-
.\"
|
3
|
-
.\" SPDX-License-Identifier: LGPL-2.0-or-later
|
4
|
-
.\"
|
5
|
-
.TH io_uring_sq_ready 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
|
6
|
-
.SH NAME
|
7
|
-
io_uring_sq_ready \- number of unconsumed or unsubmitted entries in the SQ ring
|
8
|
-
.SH SYNOPSIS
|
9
|
-
.nf
|
10
|
-
.B #include <liburing.h>
|
11
|
-
.PP
|
12
|
-
.BI "unsigned io_uring_sq_ready(const struct io_uring *" ring ");"
|
13
|
-
.fi
|
14
|
-
.SH DESCRIPTION
|
15
|
-
.PP
|
16
|
-
The
|
17
|
-
.BR io_uring_sq_ready (3)
|
18
|
-
function returns the number of unconsumed (if SQPOLL) or unsubmitted entries
|
19
|
-
that exist in the SQ ring belonging to the
|
20
|
-
.I ring
|
21
|
-
param.
|
22
|
-
|
23
|
-
Usage of this function only applies if the ring has been setup with
|
24
|
-
.B IORING_SETUP_SQPOLL,
|
25
|
-
where request submissions, and hence consumption from the SQ ring, happens
|
26
|
-
through a polling thread.
|
27
|
-
|
28
|
-
.SH RETURN VALUE
|
29
|
-
Returns the number of unconsumed or unsubmitted entries in the SQ ring.
|
30
|
-
.SH SEE ALSO
|
31
|
-
.BR io_uring_cq_ready (3)
|