polyphony 1.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -0
- data/CHANGELOG.md +16 -3
- data/README.md +1 -0
- data/TODO.md +5 -13
- data/docs/cheat-sheet.md +248 -0
- data/docs/design-principles.md +59 -3
- data/docs/faq.md +15 -32
- data/docs/fiber-scheduling.md +14 -12
- data/docs/overview.md +140 -35
- data/docs/readme.md +4 -3
- data/docs/tutorial.md +19 -149
- data/examples/core/debug.rb +12 -0
- data/examples/core/rpc_benchmark.rb +136 -0
- data/ext/polyphony/polyphony.c +2 -1
- data/lib/polyphony/extensions/fiber.rb +1 -0
- data/lib/polyphony/extensions/io.rb +171 -161
- data/lib/polyphony/extensions/pipe.rb +3 -5
- data/lib/polyphony/extensions/socket.rb +45 -54
- data/lib/polyphony/version.rb +1 -1
- data/polyphony.gemspec +3 -1
- data/test/test_socket.rb +1 -1
- metadata +33 -149
- data/vendor/liburing/man/IO_URING_CHECK_VERSION.3 +0 -1
- data/vendor/liburing/man/IO_URING_VERSION_MAJOR.3 +0 -1
- data/vendor/liburing/man/IO_URING_VERSION_MINOR.3 +0 -1
- data/vendor/liburing/man/io_uring.7 +0 -781
- data/vendor/liburing/man/io_uring_buf_ring_add.3 +0 -53
- data/vendor/liburing/man/io_uring_buf_ring_advance.3 +0 -31
- data/vendor/liburing/man/io_uring_buf_ring_cq_advance.3 +0 -41
- data/vendor/liburing/man/io_uring_buf_ring_init.3 +0 -30
- data/vendor/liburing/man/io_uring_buf_ring_mask.3 +0 -27
- data/vendor/liburing/man/io_uring_check_version.3 +0 -72
- data/vendor/liburing/man/io_uring_close_ring_fd.3 +0 -43
- data/vendor/liburing/man/io_uring_cq_advance.3 +0 -49
- data/vendor/liburing/man/io_uring_cq_has_overflow.3 +0 -25
- data/vendor/liburing/man/io_uring_cq_ready.3 +0 -26
- data/vendor/liburing/man/io_uring_cqe_get_data.3 +0 -53
- data/vendor/liburing/man/io_uring_cqe_get_data64.3 +0 -1
- data/vendor/liburing/man/io_uring_cqe_seen.3 +0 -42
- data/vendor/liburing/man/io_uring_enter.2 +0 -1700
- data/vendor/liburing/man/io_uring_enter2.2 +0 -1
- data/vendor/liburing/man/io_uring_free_probe.3 +0 -27
- data/vendor/liburing/man/io_uring_get_events.3 +0 -33
- data/vendor/liburing/man/io_uring_get_probe.3 +0 -30
- data/vendor/liburing/man/io_uring_get_sqe.3 +0 -57
- data/vendor/liburing/man/io_uring_major_version.3 +0 -1
- data/vendor/liburing/man/io_uring_minor_version.3 +0 -1
- data/vendor/liburing/man/io_uring_opcode_supported.3 +0 -30
- data/vendor/liburing/man/io_uring_peek_cqe.3 +0 -38
- data/vendor/liburing/man/io_uring_prep_accept.3 +0 -197
- data/vendor/liburing/man/io_uring_prep_accept_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_cancel.3 +0 -118
- data/vendor/liburing/man/io_uring_prep_cancel64.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_close.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_close_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_connect.3 +0 -66
- data/vendor/liburing/man/io_uring_prep_fadvise.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_fallocate.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_fgetxattr.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_files_update.3 +0 -92
- data/vendor/liburing/man/io_uring_prep_fsetxattr.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_fsync.3 +0 -70
- data/vendor/liburing/man/io_uring_prep_getxattr.3 +0 -61
- data/vendor/liburing/man/io_uring_prep_link.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_link_timeout.3 +0 -94
- data/vendor/liburing/man/io_uring_prep_linkat.3 +0 -91
- data/vendor/liburing/man/io_uring_prep_madvise.3 +0 -56
- data/vendor/liburing/man/io_uring_prep_mkdir.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_mkdirat.3 +0 -83
- data/vendor/liburing/man/io_uring_prep_msg_ring.3 +0 -92
- data/vendor/liburing/man/io_uring_prep_msg_ring_cqe_flags.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_multishot_accept.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_multishot_accept_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_nop.3 +0 -28
- data/vendor/liburing/man/io_uring_prep_openat.3 +0 -117
- data/vendor/liburing/man/io_uring_prep_openat2.3 +0 -117
- data/vendor/liburing/man/io_uring_prep_openat2_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_openat_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_poll_add.3 +0 -72
- data/vendor/liburing/man/io_uring_prep_poll_multishot.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_poll_remove.3 +0 -55
- data/vendor/liburing/man/io_uring_prep_poll_update.3 +0 -89
- data/vendor/liburing/man/io_uring_prep_provide_buffers.3 +0 -140
- data/vendor/liburing/man/io_uring_prep_read.3 +0 -69
- data/vendor/liburing/man/io_uring_prep_read_fixed.3 +0 -72
- data/vendor/liburing/man/io_uring_prep_readv.3 +0 -85
- data/vendor/liburing/man/io_uring_prep_readv2.3 +0 -111
- data/vendor/liburing/man/io_uring_prep_recv.3 +0 -105
- data/vendor/liburing/man/io_uring_prep_recv_multishot.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_recvmsg.3 +0 -124
- data/vendor/liburing/man/io_uring_prep_recvmsg_multishot.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_remove_buffers.3 +0 -52
- data/vendor/liburing/man/io_uring_prep_rename.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_renameat.3 +0 -96
- data/vendor/liburing/man/io_uring_prep_send.3 +0 -66
- data/vendor/liburing/man/io_uring_prep_send_set_addr.3 +0 -38
- data/vendor/liburing/man/io_uring_prep_send_zc.3 +0 -96
- data/vendor/liburing/man/io_uring_prep_send_zc_fixed.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_sendmsg.3 +0 -89
- data/vendor/liburing/man/io_uring_prep_sendmsg_zc.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_setxattr.3 +0 -64
- data/vendor/liburing/man/io_uring_prep_shutdown.3 +0 -53
- data/vendor/liburing/man/io_uring_prep_socket.3 +0 -118
- data/vendor/liburing/man/io_uring_prep_socket_direct.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_socket_direct_alloc.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_splice.3 +0 -120
- data/vendor/liburing/man/io_uring_prep_statx.3 +0 -74
- data/vendor/liburing/man/io_uring_prep_symlink.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_symlinkat.3 +0 -85
- data/vendor/liburing/man/io_uring_prep_sync_file_range.3 +0 -59
- data/vendor/liburing/man/io_uring_prep_tee.3 +0 -74
- data/vendor/liburing/man/io_uring_prep_timeout.3 +0 -95
- data/vendor/liburing/man/io_uring_prep_timeout_remove.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_timeout_update.3 +0 -98
- data/vendor/liburing/man/io_uring_prep_unlink.3 +0 -1
- data/vendor/liburing/man/io_uring_prep_unlinkat.3 +0 -82
- data/vendor/liburing/man/io_uring_prep_write.3 +0 -67
- data/vendor/liburing/man/io_uring_prep_write_fixed.3 +0 -72
- data/vendor/liburing/man/io_uring_prep_writev.3 +0 -85
- data/vendor/liburing/man/io_uring_prep_writev2.3 +0 -111
- data/vendor/liburing/man/io_uring_queue_exit.3 +0 -26
- data/vendor/liburing/man/io_uring_queue_init.3 +0 -89
- data/vendor/liburing/man/io_uring_queue_init_params.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_cmsg_firsthdr.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_cmsg_nexthdr.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_name.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_out.3 +0 -82
- data/vendor/liburing/man/io_uring_recvmsg_payload.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_payload_length.3 +0 -1
- data/vendor/liburing/man/io_uring_recvmsg_validate.3 +0 -1
- data/vendor/liburing/man/io_uring_register.2 +0 -834
- data/vendor/liburing/man/io_uring_register_buf_ring.3 +0 -140
- data/vendor/liburing/man/io_uring_register_buffers.3 +0 -104
- data/vendor/liburing/man/io_uring_register_buffers_sparse.3 +0 -1
- data/vendor/liburing/man/io_uring_register_buffers_tags.3 +0 -1
- data/vendor/liburing/man/io_uring_register_buffers_update_tag.3 +0 -1
- data/vendor/liburing/man/io_uring_register_eventfd.3 +0 -51
- data/vendor/liburing/man/io_uring_register_eventfd_async.3 +0 -1
- data/vendor/liburing/man/io_uring_register_file_alloc_range.3 +0 -52
- data/vendor/liburing/man/io_uring_register_files.3 +0 -112
- data/vendor/liburing/man/io_uring_register_files_sparse.3 +0 -1
- data/vendor/liburing/man/io_uring_register_files_tags.3 +0 -1
- data/vendor/liburing/man/io_uring_register_files_update.3 +0 -1
- data/vendor/liburing/man/io_uring_register_files_update_tag.3 +0 -1
- data/vendor/liburing/man/io_uring_register_iowq_aff.3 +0 -61
- data/vendor/liburing/man/io_uring_register_iowq_max_workers.3 +0 -71
- data/vendor/liburing/man/io_uring_register_ring_fd.3 +0 -49
- data/vendor/liburing/man/io_uring_register_sync_cancel.3 +0 -71
- data/vendor/liburing/man/io_uring_setup.2 +0 -669
- data/vendor/liburing/man/io_uring_sq_ready.3 +0 -31
- data/vendor/liburing/man/io_uring_sq_space_left.3 +0 -25
- data/vendor/liburing/man/io_uring_sqe_set_data.3 +0 -48
- data/vendor/liburing/man/io_uring_sqe_set_data64.3 +0 -1
- data/vendor/liburing/man/io_uring_sqe_set_flags.3 +0 -87
- data/vendor/liburing/man/io_uring_sqring_wait.3 +0 -34
- data/vendor/liburing/man/io_uring_submit.3 +0 -46
- data/vendor/liburing/man/io_uring_submit_and_get_events.3 +0 -31
- data/vendor/liburing/man/io_uring_submit_and_wait.3 +0 -38
- data/vendor/liburing/man/io_uring_submit_and_wait_timeout.3 +0 -56
- data/vendor/liburing/man/io_uring_unregister_buf_ring.3 +0 -30
- data/vendor/liburing/man/io_uring_unregister_buffers.3 +0 -27
- data/vendor/liburing/man/io_uring_unregister_eventfd.3 +0 -1
- data/vendor/liburing/man/io_uring_unregister_files.3 +0 -27
- data/vendor/liburing/man/io_uring_unregister_iowq_aff.3 +0 -1
- data/vendor/liburing/man/io_uring_unregister_ring_fd.3 +0 -32
- data/vendor/liburing/man/io_uring_wait_cqe.3 +0 -40
- data/vendor/liburing/man/io_uring_wait_cqe_nr.3 +0 -43
- data/vendor/liburing/man/io_uring_wait_cqe_timeout.3 +0 -53
- data/vendor/liburing/man/io_uring_wait_cqes.3 +0 -56
@@ -1,669 +0,0 @@
|
|
1
|
-
.\" Copyright (C) 2019 Jens Axboe <axboe@kernel.dk>
|
2
|
-
.\" Copyright (C) 2019 Jon Corbet <corbet@lwn.net>
|
3
|
-
.\" Copyright (C) 2019 Red Hat, Inc.
|
4
|
-
.\"
|
5
|
-
.\" SPDX-License-Identifier: LGPL-2.0-or-later
|
6
|
-
.\"
|
7
|
-
.TH io_uring_setup 2 2019-01-29 "Linux" "Linux Programmer's Manual"
|
8
|
-
.SH NAME
|
9
|
-
io_uring_setup \- setup a context for performing asynchronous I/O
|
10
|
-
.SH SYNOPSIS
|
11
|
-
.nf
|
12
|
-
.BR "#include <liburing.h>"
|
13
|
-
.PP
|
14
|
-
.BI "int io_uring_setup(u32 " entries ", struct io_uring_params *" p );
|
15
|
-
.fi
|
16
|
-
.PP
|
17
|
-
.SH DESCRIPTION
|
18
|
-
.PP
|
19
|
-
The
|
20
|
-
.BR io_uring_setup (2)
|
21
|
-
system call sets up a submission queue (SQ) and completion queue (CQ) with at
|
22
|
-
least
|
23
|
-
.I entries
|
24
|
-
entries, and returns a file descriptor which can be used to perform
|
25
|
-
subsequent operations on the io_uring instance. The submission and
|
26
|
-
completion queues are shared between userspace and the kernel, which
|
27
|
-
eliminates the need to copy data when initiating and completing I/O.
|
28
|
-
|
29
|
-
.I params
|
30
|
-
is used by the application to pass options to the kernel, and by the
|
31
|
-
kernel to convey information about the ring buffers.
|
32
|
-
.PP
|
33
|
-
.in +4n
|
34
|
-
.EX
|
35
|
-
struct io_uring_params {
|
36
|
-
__u32 sq_entries;
|
37
|
-
__u32 cq_entries;
|
38
|
-
__u32 flags;
|
39
|
-
__u32 sq_thread_cpu;
|
40
|
-
__u32 sq_thread_idle;
|
41
|
-
__u32 features;
|
42
|
-
__u32 wq_fd;
|
43
|
-
__u32 resv[3];
|
44
|
-
struct io_sqring_offsets sq_off;
|
45
|
-
struct io_cqring_offsets cq_off;
|
46
|
-
};
|
47
|
-
.EE
|
48
|
-
.in
|
49
|
-
.PP
|
50
|
-
The
|
51
|
-
.IR flags ,
|
52
|
-
.IR sq_thread_cpu ,
|
53
|
-
and
|
54
|
-
.I sq_thread_idle
|
55
|
-
fields are used to configure the io_uring instance.
|
56
|
-
.I flags
|
57
|
-
is a bit mask of 0 or more of the following values ORed
|
58
|
-
together:
|
59
|
-
.TP
|
60
|
-
.B IORING_SETUP_IOPOLL
|
61
|
-
Perform busy-waiting for an I/O completion, as opposed to getting
|
62
|
-
notifications via an asynchronous IRQ (Interrupt Request). The file
|
63
|
-
system (if any) and block device must support polling in order for
|
64
|
-
this to work. Busy-waiting provides lower latency, but may consume
|
65
|
-
more CPU resources than interrupt driven I/O. Currently, this feature
|
66
|
-
is usable only on a file descriptor opened using the
|
67
|
-
.B O_DIRECT
|
68
|
-
flag. When a read or write is submitted to a polled context, the
|
69
|
-
application must poll for completions on the CQ ring by calling
|
70
|
-
.BR io_uring_enter (2).
|
71
|
-
It is illegal to mix and match polled and non-polled I/O on an io_uring
|
72
|
-
instance.
|
73
|
-
|
74
|
-
This is only applicable for storage devices for now, and the storage device
|
75
|
-
must be configured for polling. How to do that depends on the device type
|
76
|
-
in question. For NVMe devices, the nvme driver must be loaded with the
|
77
|
-
.I poll_queues
|
78
|
-
parameter set to the desired number of polling queues. The polling queues
|
79
|
-
will be shared appropriately between the CPUs in the system, if the number
|
80
|
-
is less than the number of online CPU threads.
|
81
|
-
|
82
|
-
.TP
|
83
|
-
.B IORING_SETUP_SQPOLL
|
84
|
-
When this flag is specified, a kernel thread is created to perform
|
85
|
-
submission queue polling. An io_uring instance configured in this way
|
86
|
-
enables an application to issue I/O without ever context switching
|
87
|
-
into the kernel. By using the submission queue to fill in new
|
88
|
-
submission queue entries and watching for completions on the
|
89
|
-
completion queue, the application can submit and reap I/Os without
|
90
|
-
doing a single system call.
|
91
|
-
|
92
|
-
If the kernel thread is idle for more than
|
93
|
-
.I sq_thread_idle
|
94
|
-
milliseconds, it will set the
|
95
|
-
.B IORING_SQ_NEED_WAKEUP
|
96
|
-
bit in the
|
97
|
-
.I flags
|
98
|
-
field of the
|
99
|
-
.IR "struct io_sq_ring" .
|
100
|
-
When this happens, the application must call
|
101
|
-
.BR io_uring_enter (2)
|
102
|
-
to wake the kernel thread. If I/O is kept busy, the kernel thread
|
103
|
-
will never sleep. An application making use of this feature will need
|
104
|
-
to guard the
|
105
|
-
.BR io_uring_enter (2)
|
106
|
-
call with the following code sequence:
|
107
|
-
|
108
|
-
.in +4n
|
109
|
-
.EX
|
110
|
-
/*
|
111
|
-
* Ensure that the wakeup flag is read after the tail pointer
|
112
|
-
* has been written. It's important to use memory load acquire
|
113
|
-
* semantics for the flags read, as otherwise the application
|
114
|
-
* and the kernel might not agree on the consistency of the
|
115
|
-
* wakeup flag.
|
116
|
-
*/
|
117
|
-
unsigned flags = atomic_load_relaxed(sq_ring->flags);
|
118
|
-
if (flags & IORING_SQ_NEED_WAKEUP)
|
119
|
-
io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP);
|
120
|
-
.EE
|
121
|
-
.in
|
122
|
-
|
123
|
-
where
|
124
|
-
.I sq_ring
|
125
|
-
is a submission queue ring setup using the
|
126
|
-
.I struct io_sqring_offsets
|
127
|
-
described below.
|
128
|
-
.TP
|
129
|
-
.BR
|
130
|
-
Note that, when using a ring setup with
|
131
|
-
.B IORING_SETUP_SQPOLL,
|
132
|
-
you never directly call the
|
133
|
-
.BR io_uring_enter (2)
|
134
|
-
system call. That is usually taken care of by liburing's
|
135
|
-
.BR io_uring_submit (3)
|
136
|
-
function. It automatically determines if you are using
|
137
|
-
polling mode or not and deals with when your program needs to call
|
138
|
-
.BR io_uring_enter (2)
|
139
|
-
without you having to bother about it.
|
140
|
-
.TP
|
141
|
-
.BR
|
142
|
-
Before version 5.11 of the Linux kernel, to successfully use this feature, the
|
143
|
-
application must register a set of files to be used for IO through
|
144
|
-
.BR io_uring_register (2)
|
145
|
-
using the
|
146
|
-
.B IORING_REGISTER_FILES
|
147
|
-
opcode. Failure to do so will result in submitted IO being errored with
|
148
|
-
.B EBADF.
|
149
|
-
The presence of this feature can be detected by the
|
150
|
-
.B IORING_FEAT_SQPOLL_NONFIXED
|
151
|
-
feature flag.
|
152
|
-
In version 5.11 and later, it is no longer necessary to register files to use
|
153
|
-
this feature. 5.11 also allows using this as non-root, if the user has the
|
154
|
-
.B CAP_SYS_NICE
|
155
|
-
capability. In 5.13 this requirement was also relaxed, and no special privileges
|
156
|
-
are needed for SQPOLL in newer kernels. Certain stable kernels older than 5.13
|
157
|
-
may also support unprivileged SQPOLL.
|
158
|
-
.TP
|
159
|
-
.B IORING_SETUP_SQ_AFF
|
160
|
-
If this flag is specified, then the poll thread will be bound to the
|
161
|
-
cpu set in the
|
162
|
-
.I sq_thread_cpu
|
163
|
-
field of the
|
164
|
-
.IR "struct io_uring_params" .
|
165
|
-
This flag is only meaningful when
|
166
|
-
.B IORING_SETUP_SQPOLL
|
167
|
-
is specified. When cgroup setting
|
168
|
-
.I cpuset.cpus
|
169
|
-
changes (typically in container environment), the bounded cpu set may be
|
170
|
-
changed as well.
|
171
|
-
.TP
|
172
|
-
.B IORING_SETUP_CQSIZE
|
173
|
-
Create the completion queue with
|
174
|
-
.IR "struct io_uring_params.cq_entries"
|
175
|
-
entries. The value must be greater than
|
176
|
-
.IR entries ,
|
177
|
-
and may be rounded up to the next power-of-two.
|
178
|
-
.TP
|
179
|
-
.B IORING_SETUP_CLAMP
|
180
|
-
If this flag is specified, and if
|
181
|
-
.IR entries
|
182
|
-
exceeds
|
183
|
-
.B IORING_MAX_ENTRIES ,
|
184
|
-
then
|
185
|
-
.IR entries
|
186
|
-
will be clamped at
|
187
|
-
.B IORING_MAX_ENTRIES .
|
188
|
-
If the flag
|
189
|
-
.BR IORING_SETUP_SQPOLL
|
190
|
-
is set, and if the value of
|
191
|
-
.IR "struct io_uring_params.cq_entries"
|
192
|
-
exceeds
|
193
|
-
.B IORING_MAX_CQ_ENTRIES ,
|
194
|
-
then it will be clamped at
|
195
|
-
.B IORING_MAX_CQ_ENTRIES .
|
196
|
-
.TP
|
197
|
-
.B IORING_SETUP_ATTACH_WQ
|
198
|
-
This flag should be set in conjunction with
|
199
|
-
.IR "struct io_uring_params.wq_fd"
|
200
|
-
being set to an existing io_uring ring file descriptor. When set, the
|
201
|
-
io_uring instance being created will share the asynchronous worker
|
202
|
-
thread backend of the specified io_uring ring, rather than create a new
|
203
|
-
separate thread pool.
|
204
|
-
.TP
|
205
|
-
.B IORING_SETUP_R_DISABLED
|
206
|
-
If this flag is specified, the io_uring ring starts in a disabled state.
|
207
|
-
In this state, restrictions can be registered, but submissions are not allowed.
|
208
|
-
See
|
209
|
-
.BR io_uring_register (2)
|
210
|
-
for details on how to enable the ring. Available since 5.10.
|
211
|
-
.TP
|
212
|
-
.B IORING_SETUP_SUBMIT_ALL
|
213
|
-
Normally io_uring stops submitting a batch of request, if one of these requests
|
214
|
-
results in an error. This can cause submission of less than what is expected,
|
215
|
-
if a request ends in error while being submitted. If the ring is created with
|
216
|
-
this flag,
|
217
|
-
.BR io_uring_enter (2)
|
218
|
-
will continue submitting requests even if it encounters an error submitting
|
219
|
-
a request. CQEs are still posted for errored request regardless of whether or
|
220
|
-
not this flag is set at ring creation time, the only difference is if the
|
221
|
-
submit sequence is halted or continued when an error is observed. Available
|
222
|
-
since 5.18.
|
223
|
-
.TP
|
224
|
-
.B IORING_SETUP_COOP_TASKRUN
|
225
|
-
By default, io_uring will interrupt a task running in userspace when a
|
226
|
-
completion event comes in. This is to ensure that completions run in a timely
|
227
|
-
manner. For a lot of use cases, this is overkill and can cause reduced
|
228
|
-
performance from both the inter-processor interrupt used to do this, the
|
229
|
-
kernel/user transition, the needless interruption of the tasks userspace
|
230
|
-
activities, and reduced batching if completions come in at a rapid rate. Most
|
231
|
-
applications don't need the forceful interruption, as the events are processed
|
232
|
-
at any kernel/user transition. The exception are setups where the application
|
233
|
-
uses multiple threads operating on the same ring, where the application
|
234
|
-
waiting on completions isn't the one that submitted them. For most other
|
235
|
-
use cases, setting this flag will improve performance. Available since 5.19.
|
236
|
-
.TP
|
237
|
-
.B IORING_SETUP_TASKRUN_FLAG
|
238
|
-
Used in conjunction with
|
239
|
-
.B IORING_SETUP_COOP_TASKRUN,
|
240
|
-
this provides a flag,
|
241
|
-
.B IORING_SQ_TASKRUN,
|
242
|
-
which is set in the SQ ring
|
243
|
-
.I flags
|
244
|
-
whenever completions are pending that should be processed. liburing will check
|
245
|
-
for this flag even when doing
|
246
|
-
.BR io_uring_peek_cqe (3)
|
247
|
-
and enter the kernel to process them, and applications can do the same. This
|
248
|
-
makes
|
249
|
-
.B IORING_SETUP_TASKRUN_FLAG
|
250
|
-
safe to use even when applications rely on a peek style operation on the CQ
|
251
|
-
ring to see if anything might be pending to reap. Available since 5.19.
|
252
|
-
.TP
|
253
|
-
.B IORING_SETUP_SQE128
|
254
|
-
If set, io_uring will use 128-byte SQEs rather than the normal 64-byte sized
|
255
|
-
variant. This is a requirement for using certain request types, as of 5.19
|
256
|
-
only the
|
257
|
-
.B IORING_OP_URING_CMD
|
258
|
-
passthrough command for NVMe passthrough needs this. Available since 5.19.
|
259
|
-
.TP
|
260
|
-
.B IORING_SETUP_CQE32
|
261
|
-
If set, io_uring will use 32-byte CQEs rather than the normal 16-byte sized
|
262
|
-
variant. This is a requirement for using certain request types, as of 5.19
|
263
|
-
only the
|
264
|
-
.B IORING_OP_URING_CMD
|
265
|
-
passthrough command for NVMe passthrough needs this. Available since 5.19.
|
266
|
-
.TP
|
267
|
-
.B IORING_SETUP_SINGLE_ISSUER
|
268
|
-
A hint to the kernel that only a single task (or thread) will submit requests, which is
|
269
|
-
used for internal optimisations. The submission task is either the task that created the
|
270
|
-
ring, or if
|
271
|
-
.B IORING_SETUP_R_DISABLED
|
272
|
-
is specified then it is the task that enables the ring through
|
273
|
-
.BR io_uring_register (2) .
|
274
|
-
The kernel enforces this rule, failing requests with
|
275
|
-
.B -EEXIST
|
276
|
-
if the restriction is violated.
|
277
|
-
Note that when
|
278
|
-
.B IORING_SETUP_SQPOLL
|
279
|
-
is set it is considered that the polling task is doing all submissions
|
280
|
-
on behalf of the userspace and so it always complies with the rule disregarding
|
281
|
-
how many userspace tasks do
|
282
|
-
.BR io_uring_enter(2).
|
283
|
-
Available since 6.0.
|
284
|
-
.TP
|
285
|
-
.B IORING_SETUP_DEFER_TASKRUN
|
286
|
-
By default, io_uring will process all outstanding work at the end of any system
|
287
|
-
call or thread interrupt. This can delay the application from making other progress.
|
288
|
-
Setting this flag will hint to io_uring that it should defer work until an
|
289
|
-
.BR io_uring_enter(2)
|
290
|
-
call with the
|
291
|
-
.B IORING_ENTER_GETEVENTS
|
292
|
-
flag set. This allows the application to request work to run just before it wants to
|
293
|
-
process completions.
|
294
|
-
This flag requires the
|
295
|
-
.BR IORING_SETUP_SINGLE_ISSUER
|
296
|
-
flag to be set, and also enforces that the call to
|
297
|
-
.BR io_uring_enter(2)
|
298
|
-
is called from the same thread that submitted requests.
|
299
|
-
Note that if this flag is set then it is the application's responsibility to periodically
|
300
|
-
trigger work (for example via any of the CQE waiting functions) or else completions may
|
301
|
-
not be delivered.
|
302
|
-
Available since 6.1.
|
303
|
-
.PP
|
304
|
-
If no flags are specified, the io_uring instance is setup for
|
305
|
-
interrupt driven I/O. I/O may be submitted using
|
306
|
-
.BR io_uring_enter (2)
|
307
|
-
and can be reaped by polling the completion queue.
|
308
|
-
|
309
|
-
The
|
310
|
-
.I resv
|
311
|
-
array must be initialized to zero.
|
312
|
-
|
313
|
-
.I features
|
314
|
-
is filled in by the kernel, which specifies various features supported
|
315
|
-
by current kernel version.
|
316
|
-
.TP
|
317
|
-
.B IORING_FEAT_SINGLE_MMAP
|
318
|
-
If this flag is set, the two SQ and CQ rings can be mapped with a single
|
319
|
-
.I mmap(2)
|
320
|
-
call. The SQEs must still be allocated separately. This brings the necessary
|
321
|
-
.I mmap(2)
|
322
|
-
calls down from three to two. Available since kernel 5.4.
|
323
|
-
.TP
|
324
|
-
.B IORING_FEAT_NODROP
|
325
|
-
If this flag is set, io_uring supports almost never dropping completion events.
|
326
|
-
If a completion event occurs and the CQ ring is full, the kernel stores
|
327
|
-
the event internally until such a time that the CQ ring has room for more
|
328
|
-
entries. If this overflow condition is entered, attempting to submit more
|
329
|
-
IO will fail with the
|
330
|
-
.B -EBUSY
|
331
|
-
error value, if it can't flush the overflown events to the CQ ring. If this
|
332
|
-
happens, the application must reap events from the CQ ring and attempt the
|
333
|
-
submit again. If the kernel has no free memory to store the event internally
|
334
|
-
it will be visible by an increase in the overflow value on the cqring.
|
335
|
-
Available since kernel 5.5. Additionally
|
336
|
-
.BR io_uring_enter (2)
|
337
|
-
will return
|
338
|
-
.B -EBADR
|
339
|
-
the next time it would otherwise sleep waiting for completions (since kernel 5.19).
|
340
|
-
|
341
|
-
.TP
|
342
|
-
.B IORING_FEAT_SUBMIT_STABLE
|
343
|
-
If this flag is set, applications can be certain that any data for
|
344
|
-
async offload has been consumed when the kernel has consumed the SQE. Available
|
345
|
-
since kernel 5.5.
|
346
|
-
.TP
|
347
|
-
.B IORING_FEAT_RW_CUR_POS
|
348
|
-
If this flag is set, applications can specify
|
349
|
-
.I offset
|
350
|
-
==
|
351
|
-
.B -1
|
352
|
-
with
|
353
|
-
.B IORING_OP_{READV,WRITEV}
|
354
|
-
,
|
355
|
-
.B IORING_OP_{READ,WRITE}_FIXED
|
356
|
-
, and
|
357
|
-
.B IORING_OP_{READ,WRITE}
|
358
|
-
to mean current file position, which behaves like
|
359
|
-
.I preadv2(2)
|
360
|
-
and
|
361
|
-
.I pwritev2(2)
|
362
|
-
with
|
363
|
-
.I offset
|
364
|
-
==
|
365
|
-
.B -1.
|
366
|
-
It'll use (and update) the current file position. This obviously comes
|
367
|
-
with the caveat that if the application has multiple reads or writes in flight,
|
368
|
-
then the end result will not be as expected. This is similar to threads sharing
|
369
|
-
a file descriptor and doing IO using the current file position. Available since
|
370
|
-
kernel 5.6.
|
371
|
-
.TP
|
372
|
-
.B IORING_FEAT_CUR_PERSONALITY
|
373
|
-
If this flag is set, then io_uring guarantees that both sync and async
|
374
|
-
execution of a request assumes the credentials of the task that called
|
375
|
-
.I
|
376
|
-
io_uring_enter(2)
|
377
|
-
to queue the requests. If this flag isn't set, then requests are issued with
|
378
|
-
the credentials of the task that originally registered the io_uring. If only
|
379
|
-
one task is using a ring, then this flag doesn't matter as the credentials
|
380
|
-
will always be the same. Note that this is the default behavior, tasks can
|
381
|
-
still register different personalities through
|
382
|
-
.I
|
383
|
-
io_uring_register(2)
|
384
|
-
with
|
385
|
-
.B IORING_REGISTER_PERSONALITY
|
386
|
-
and specify the personality to use in the sqe. Available since kernel 5.6.
|
387
|
-
.TP
|
388
|
-
.B IORING_FEAT_FAST_POLL
|
389
|
-
If this flag is set, then io_uring supports using an internal poll mechanism
|
390
|
-
to drive data/space readiness. This means that requests that cannot read or
|
391
|
-
write data to a file no longer need to be punted to an async thread for
|
392
|
-
handling, instead they will begin operation when the file is ready. This is
|
393
|
-
similar to doing poll + read/write in userspace, but eliminates the need to do
|
394
|
-
so. If this flag is set, requests waiting on space/data consume a lot less
|
395
|
-
resources doing so as they are not blocking a thread. Available since kernel
|
396
|
-
5.7.
|
397
|
-
.TP
|
398
|
-
.B IORING_FEAT_POLL_32BITS
|
399
|
-
If this flag is set, the
|
400
|
-
.B IORING_OP_POLL_ADD
|
401
|
-
command accepts the full 32-bit range of epoll based flags. Most notably
|
402
|
-
.B EPOLLEXCLUSIVE
|
403
|
-
which allows exclusive (waking single waiters) behavior. Available since kernel
|
404
|
-
5.9.
|
405
|
-
.TP
|
406
|
-
.B IORING_FEAT_SQPOLL_NONFIXED
|
407
|
-
If this flag is set, the
|
408
|
-
.B IORING_SETUP_SQPOLL
|
409
|
-
feature no longer requires the use of fixed files. Any normal file descriptor
|
410
|
-
can be used for IO commands without needing registration. Available since
|
411
|
-
kernel 5.11.
|
412
|
-
.TP
|
413
|
-
.B IORING_FEAT_ENTER_EXT_ARG
|
414
|
-
If this flag is set, then the
|
415
|
-
.BR io_uring_enter (2)
|
416
|
-
system call supports passing in an extended argument instead of just the
|
417
|
-
.IR "sigset_t"
|
418
|
-
of earlier kernels. This.
|
419
|
-
extended argument is of type
|
420
|
-
.IR "struct io_uring_getevents_arg"
|
421
|
-
and allows the caller to pass in both a
|
422
|
-
.IR "sigset_t"
|
423
|
-
and a timeout argument for waiting on events. The struct layout is as follows:
|
424
|
-
.TP
|
425
|
-
.in +8n
|
426
|
-
.EX
|
427
|
-
struct io_uring_getevents_arg {
|
428
|
-
__u64 sigmask;
|
429
|
-
__u32 sigmask_sz;
|
430
|
-
__u32 pad;
|
431
|
-
__u64 ts;
|
432
|
-
};
|
433
|
-
.EE
|
434
|
-
|
435
|
-
and a pointer to this struct must be passed in if
|
436
|
-
.B IORING_ENTER_EXT_ARG
|
437
|
-
is set in the flags for the enter system call. Available since kernel 5.11.
|
438
|
-
.TP
|
439
|
-
.B IORING_FEAT_NATIVE_WORKERS
|
440
|
-
If this flag is set, io_uring is using native workers for its async helpers.
|
441
|
-
Previous kernels used kernel threads that assumed the identity of the
|
442
|
-
original io_uring owning task, but later kernels will actively create what
|
443
|
-
looks more like regular process threads instead. Available since kernel
|
444
|
-
5.12.
|
445
|
-
.TP
|
446
|
-
.B IORING_FEAT_RSRC_TAGS
|
447
|
-
If this flag is set, then io_uring supports a variety of features related
|
448
|
-
to fixed files and buffers. In particular, it indicates that registered
|
449
|
-
buffers can be updated in-place, whereas before the full set would have to
|
450
|
-
be unregistered first. Available since kernel 5.13.
|
451
|
-
.TP
|
452
|
-
.B IORING_FEAT_CQE_SKIP
|
453
|
-
If this flag is set, then io_uring supports setting
|
454
|
-
.B IOSQE_CQE_SKIP_SUCCESS
|
455
|
-
in the submitted SQE, indicating that no CQE should be generated for this
|
456
|
-
SQE if it executes normally. If an error happens processing the SQE, a
|
457
|
-
CQE with the appropriate error value will still be generated. Available since
|
458
|
-
kernel 5.17.
|
459
|
-
.TP
|
460
|
-
.B IORING_FEAT_LINKED_FILE
|
461
|
-
If this flag is set, then io_uring supports sane assignment of files for SQEs
|
462
|
-
that have dependencies. For example, if a chain of SQEs are submitted with
|
463
|
-
.B IOSQE_IO_LINK,
|
464
|
-
then kernels without this flag will prepare the file for each link upfront.
|
465
|
-
If a previous link opens a file with a known index, eg if direct descriptors
|
466
|
-
are used with open or accept, then file assignment needs to happen post
|
467
|
-
execution of that SQE. If this flag is set, then the kernel will defer
|
468
|
-
file assignment until execution of a given request is started. Available since
|
469
|
-
kernel 5.17.
|
470
|
-
.TP
|
471
|
-
.B IORING_FEAT_REG_REG_RING
|
472
|
-
If this flag is set, then io_uring supports calling
|
473
|
-
.BR io_uring_register (2)
|
474
|
-
using a registered ring fd, via
|
475
|
-
.BR IORING_REGISTER_USE_REGISTERED_RING .
|
476
|
-
Available since kernel 6.3.
|
477
|
-
|
478
|
-
.PP
|
479
|
-
The rest of the fields in the
|
480
|
-
.I struct io_uring_params
|
481
|
-
are filled in by the kernel, and provide the information necessary to
|
482
|
-
memory map the submission queue, completion queue, and the array of
|
483
|
-
submission queue entries.
|
484
|
-
.I sq_entries
|
485
|
-
specifies the number of submission queue entries allocated.
|
486
|
-
.I sq_off
|
487
|
-
describes the offsets of various ring buffer fields:
|
488
|
-
.PP
|
489
|
-
.in +4n
|
490
|
-
.EX
|
491
|
-
struct io_sqring_offsets {
|
492
|
-
__u32 head;
|
493
|
-
__u32 tail;
|
494
|
-
__u32 ring_mask;
|
495
|
-
__u32 ring_entries;
|
496
|
-
__u32 flags;
|
497
|
-
__u32 dropped;
|
498
|
-
__u32 array;
|
499
|
-
__u32 resv[3];
|
500
|
-
};
|
501
|
-
.EE
|
502
|
-
.in
|
503
|
-
.PP
|
504
|
-
Taken together,
|
505
|
-
.I sq_entries
|
506
|
-
and
|
507
|
-
.I sq_off
|
508
|
-
provide all of the information necessary for accessing the submission
|
509
|
-
queue ring buffer and the submission queue entry array. The
|
510
|
-
submission queue can be mapped with a call like:
|
511
|
-
.PP
|
512
|
-
.in +4n
|
513
|
-
.EX
|
514
|
-
ptr = mmap(0, sq_off.array + sq_entries * sizeof(__u32),
|
515
|
-
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE,
|
516
|
-
ring_fd, IORING_OFF_SQ_RING);
|
517
|
-
.EE
|
518
|
-
.in
|
519
|
-
.PP
|
520
|
-
where
|
521
|
-
.I sq_off
|
522
|
-
is the
|
523
|
-
.I io_sqring_offsets
|
524
|
-
structure, and
|
525
|
-
.I ring_fd
|
526
|
-
is the file descriptor returned from
|
527
|
-
.BR io_uring_setup (2).
|
528
|
-
The addition of
|
529
|
-
.I sq_off.array
|
530
|
-
to the length of the region accounts for the fact that the ring
|
531
|
-
located at the end of the data structure. As an example, the ring
|
532
|
-
buffer head pointer can be accessed by adding
|
533
|
-
.I sq_off.head
|
534
|
-
to the address returned from
|
535
|
-
.BR mmap (2):
|
536
|
-
.PP
|
537
|
-
.in +4n
|
538
|
-
.EX
|
539
|
-
head = ptr + sq_off.head;
|
540
|
-
.EE
|
541
|
-
.in
|
542
|
-
|
543
|
-
The
|
544
|
-
.I flags
|
545
|
-
field is used by the kernel to communicate state information to the
|
546
|
-
application. Currently, it is used to inform the application when a
|
547
|
-
call to
|
548
|
-
.BR io_uring_enter (2)
|
549
|
-
is necessary. See the documentation for the
|
550
|
-
.B IORING_SETUP_SQPOLL
|
551
|
-
flag above.
|
552
|
-
The
|
553
|
-
.I dropped
|
554
|
-
member is incremented for each invalid submission queue entry
|
555
|
-
encountered in the ring buffer.
|
556
|
-
|
557
|
-
The head and tail track the ring buffer state. The tail is
|
558
|
-
incremented by the application when submitting new I/O, and the head
|
559
|
-
is incremented by the kernel when the I/O has been successfully
|
560
|
-
submitted. Determining the index of the head or tail into the ring is
|
561
|
-
accomplished by applying a mask:
|
562
|
-
.PP
|
563
|
-
.in +4n
|
564
|
-
.EX
|
565
|
-
index = tail & ring_mask;
|
566
|
-
.EE
|
567
|
-
.in
|
568
|
-
.PP
|
569
|
-
The array of submission queue entries is mapped with:
|
570
|
-
.PP
|
571
|
-
.in +4n
|
572
|
-
.EX
|
573
|
-
sqentries = mmap(0, sq_entries * sizeof(struct io_uring_sqe),
|
574
|
-
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE,
|
575
|
-
ring_fd, IORING_OFF_SQES);
|
576
|
-
.EE
|
577
|
-
.in
|
578
|
-
.PP
|
579
|
-
The completion queue is described by
|
580
|
-
.I cq_entries
|
581
|
-
and
|
582
|
-
.I cq_off
|
583
|
-
shown here:
|
584
|
-
.PP
|
585
|
-
.in +4n
|
586
|
-
.EX
|
587
|
-
struct io_cqring_offsets {
|
588
|
-
__u32 head;
|
589
|
-
__u32 tail;
|
590
|
-
__u32 ring_mask;
|
591
|
-
__u32 ring_entries;
|
592
|
-
__u32 overflow;
|
593
|
-
__u32 cqes;
|
594
|
-
__u32 flags;
|
595
|
-
__u32 resv[3];
|
596
|
-
};
|
597
|
-
.EE
|
598
|
-
.in
|
599
|
-
.PP
|
600
|
-
The completion queue is simpler, since the entries are not separated
|
601
|
-
from the queue itself, and can be mapped with:
|
602
|
-
.PP
|
603
|
-
.in +4n
|
604
|
-
.EX
|
605
|
-
ptr = mmap(0, cq_off.cqes + cq_entries * sizeof(struct io_uring_cqe),
|
606
|
-
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd,
|
607
|
-
IORING_OFF_CQ_RING);
|
608
|
-
.EE
|
609
|
-
.in
|
610
|
-
.PP
|
611
|
-
Closing the file descriptor returned by
|
612
|
-
.BR io_uring_setup (2)
|
613
|
-
will free all resources associated with the io_uring context.
|
614
|
-
.PP
|
615
|
-
.SH RETURN VALUE
|
616
|
-
.BR io_uring_setup (2)
|
617
|
-
returns a new file descriptor on success. The application may then
|
618
|
-
provide the file descriptor in a subsequent
|
619
|
-
.BR mmap (2)
|
620
|
-
call to map the submission and completion queues, or to the
|
621
|
-
.BR io_uring_register (2)
|
622
|
-
or
|
623
|
-
.BR io_uring_enter (2)
|
624
|
-
system calls.
|
625
|
-
|
626
|
-
On error, a negative error code is returned. The caller should not rely on
|
627
|
-
.I errno
|
628
|
-
variable.
|
629
|
-
.PP
|
630
|
-
.SH ERRORS
|
631
|
-
.TP
|
632
|
-
.B EFAULT
|
633
|
-
params is outside your accessible address space.
|
634
|
-
.TP
|
635
|
-
.B EINVAL
|
636
|
-
The resv array contains non-zero data, p.flags contains an unsupported
|
637
|
-
flag,
|
638
|
-
.I entries
|
639
|
-
is out of bounds,
|
640
|
-
.B IORING_SETUP_SQ_AFF
|
641
|
-
was specified, but
|
642
|
-
.B IORING_SETUP_SQPOLL
|
643
|
-
was not, or
|
644
|
-
.B IORING_SETUP_CQSIZE
|
645
|
-
was specified, but
|
646
|
-
.I io_uring_params.cq_entries
|
647
|
-
was invalid.
|
648
|
-
.TP
|
649
|
-
.B EMFILE
|
650
|
-
The per-process limit on the number of open file descriptors has been
|
651
|
-
reached (see the description of
|
652
|
-
.B RLIMIT_NOFILE
|
653
|
-
in
|
654
|
-
.BR getrlimit (2)).
|
655
|
-
.TP
|
656
|
-
.B ENFILE
|
657
|
-
The system-wide limit on the total number of open files has been
|
658
|
-
reached.
|
659
|
-
.TP
|
660
|
-
.B ENOMEM
|
661
|
-
Insufficient kernel resources are available.
|
662
|
-
.TP
|
663
|
-
.B EPERM
|
664
|
-
.B IORING_SETUP_SQPOLL
|
665
|
-
was specified, but the effective user ID of the caller did not have sufficient
|
666
|
-
privileges.
|
667
|
-
.SH SEE ALSO
|
668
|
-
.BR io_uring_register (2),
|
669
|
-
.BR io_uring_enter (2)
|
@@ -1,31 +0,0 @@
|
|
1
|
-
.\" Copyright (C) 2022 Stefan Roesch <shr@fb.com>
|
2
|
-
.\"
|
3
|
-
.\" SPDX-License-Identifier: LGPL-2.0-or-later
|
4
|
-
.\"
|
5
|
-
.TH io_uring_sq_ready 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
|
6
|
-
.SH NAME
|
7
|
-
io_uring_sq_ready \- number of unconsumed or unsubmitted entries in the SQ ring
|
8
|
-
.SH SYNOPSIS
|
9
|
-
.nf
|
10
|
-
.B #include <liburing.h>
|
11
|
-
.PP
|
12
|
-
.BI "unsigned io_uring_sq_ready(const struct io_uring *" ring ");"
|
13
|
-
.fi
|
14
|
-
.SH DESCRIPTION
|
15
|
-
.PP
|
16
|
-
The
|
17
|
-
.BR io_uring_sq_ready (3)
|
18
|
-
function returns the number of unconsumed (if SQPOLL) or unsubmitted entries
|
19
|
-
that exist in the SQ ring belonging to the
|
20
|
-
.I ring
|
21
|
-
param.
|
22
|
-
|
23
|
-
Usage of this function only applies if the ring has been setup with
|
24
|
-
.B IORING_SETUP_SQPOLL,
|
25
|
-
where request submissions, and hence consumption from the SQ ring, happens
|
26
|
-
through a polling thread.
|
27
|
-
|
28
|
-
.SH RETURN VALUE
|
29
|
-
Returns the number of unconsumed or unsubmitted entries in the SQ ring.
|
30
|
-
.SH SEE ALSO
|
31
|
-
.BR io_uring_cq_ready (3)
|