@nxtedition/rocksdb 5.2.36 → 5.2.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/binding.cc +66 -91
  2. package/deps/liburing/liburing/COPYING +502 -0
  3. package/deps/liburing/liburing/COPYING.GPL +339 -0
  4. package/deps/liburing/liburing/LICENSE +7 -0
  5. package/deps/liburing/liburing/Makefile +84 -0
  6. package/deps/liburing/liburing/Makefile.quiet +11 -0
  7. package/deps/liburing/liburing/README +46 -0
  8. package/deps/liburing/liburing/configure +420 -0
  9. package/deps/liburing/liburing/debian/README.Debian +7 -0
  10. package/deps/liburing/liburing/debian/changelog +27 -0
  11. package/deps/liburing/liburing/debian/compat +1 -0
  12. package/deps/liburing/liburing/debian/control +48 -0
  13. package/deps/liburing/liburing/debian/copyright +49 -0
  14. package/deps/liburing/liburing/debian/liburing-dev.install +4 -0
  15. package/deps/liburing/liburing/debian/liburing-dev.manpages +6 -0
  16. package/deps/liburing/liburing/debian/liburing1-udeb.install +1 -0
  17. package/deps/liburing/liburing/debian/liburing1.install +1 -0
  18. package/deps/liburing/liburing/debian/liburing1.symbols +32 -0
  19. package/deps/liburing/liburing/debian/patches/series +1 -0
  20. package/deps/liburing/liburing/debian/rules +81 -0
  21. package/deps/liburing/liburing/debian/source/format +1 -0
  22. package/deps/liburing/liburing/debian/source/local-options +2 -0
  23. package/deps/liburing/liburing/debian/source/options +1 -0
  24. package/deps/liburing/liburing/debian/watch +3 -0
  25. package/deps/liburing/liburing/examples/Makefile +29 -0
  26. package/deps/liburing/liburing/examples/io_uring-cp.c +279 -0
  27. package/deps/liburing/liburing/examples/io_uring-test.c +112 -0
  28. package/deps/liburing/liburing/examples/link-cp.c +193 -0
  29. package/deps/liburing/liburing/examples/ucontext-cp.c +273 -0
  30. package/deps/liburing/liburing/liburing.pc.in +12 -0
  31. package/deps/liburing/liburing/liburing.spec +66 -0
  32. package/deps/liburing/liburing/make-debs.sh +53 -0
  33. package/deps/liburing/liburing/man/io_uring.7 +736 -0
  34. package/deps/liburing/liburing/man/io_uring_enter.2 +1403 -0
  35. package/deps/liburing/liburing/man/io_uring_get_sqe.3 +37 -0
  36. package/deps/liburing/liburing/man/io_uring_queue_exit.3 +27 -0
  37. package/deps/liburing/liburing/man/io_uring_queue_init.3 +44 -0
  38. package/deps/liburing/liburing/man/io_uring_register.2 +605 -0
  39. package/deps/liburing/liburing/man/io_uring_setup.2 +515 -0
  40. package/deps/liburing/liburing/src/Makefile +76 -0
  41. package/deps/liburing/liburing/src/include/liburing/barrier.h +73 -0
  42. package/deps/liburing/liburing/src/include/liburing/io_uring.h +422 -0
  43. package/deps/liburing/liburing/src/include/liburing.h +775 -0
  44. package/deps/liburing/liburing/src/liburing.map +46 -0
  45. package/deps/liburing/liburing/src/queue.c +403 -0
  46. package/deps/liburing/liburing/src/register.c +299 -0
  47. package/deps/liburing/liburing/src/setup.c +356 -0
  48. package/deps/liburing/liburing/src/syscall.c +73 -0
  49. package/deps/liburing/liburing/src/syscall.h +20 -0
  50. package/deps/liburing/liburing/test/232c93d07b74-test.c +305 -0
  51. package/deps/liburing/liburing/test/35fa71a030ca-test.c +329 -0
  52. package/deps/liburing/liburing/test/500f9fbadef8-test.c +89 -0
  53. package/deps/liburing/liburing/test/7ad0e4b2f83c-test.c +93 -0
  54. package/deps/liburing/liburing/test/8a9973408177-test.c +106 -0
  55. package/deps/liburing/liburing/test/917257daa0fe-test.c +53 -0
  56. package/deps/liburing/liburing/test/Makefile +312 -0
  57. package/deps/liburing/liburing/test/a0908ae19763-test.c +58 -0
  58. package/deps/liburing/liburing/test/a4c0b3decb33-test.c +180 -0
  59. package/deps/liburing/liburing/test/accept-link.c +251 -0
  60. package/deps/liburing/liburing/test/accept-reuse.c +164 -0
  61. package/deps/liburing/liburing/test/accept-test.c +79 -0
  62. package/deps/liburing/liburing/test/accept.c +476 -0
  63. package/deps/liburing/liburing/test/across-fork.c +283 -0
  64. package/deps/liburing/liburing/test/b19062a56726-test.c +53 -0
  65. package/deps/liburing/liburing/test/b5837bd5311d-test.c +77 -0
  66. package/deps/liburing/liburing/test/ce593a6c480a-test.c +135 -0
  67. package/deps/liburing/liburing/test/close-opath.c +122 -0
  68. package/deps/liburing/liburing/test/config +10 -0
  69. package/deps/liburing/liburing/test/connect.c +398 -0
  70. package/deps/liburing/liburing/test/cq-full.c +96 -0
  71. package/deps/liburing/liburing/test/cq-overflow.c +294 -0
  72. package/deps/liburing/liburing/test/cq-peek-batch.c +102 -0
  73. package/deps/liburing/liburing/test/cq-ready.c +94 -0
  74. package/deps/liburing/liburing/test/cq-size.c +58 -0
  75. package/deps/liburing/liburing/test/d4ae271dfaae-test.c +96 -0
  76. package/deps/liburing/liburing/test/d77a67ed5f27-test.c +65 -0
  77. package/deps/liburing/liburing/test/defer.c +307 -0
  78. package/deps/liburing/liburing/test/double-poll-crash.c +186 -0
  79. package/deps/liburing/liburing/test/eeed8b54e0df-test.c +114 -0
  80. package/deps/liburing/liburing/test/empty-eownerdead.c +42 -0
  81. package/deps/liburing/liburing/test/eventfd-disable.c +151 -0
  82. package/deps/liburing/liburing/test/eventfd-ring.c +97 -0
  83. package/deps/liburing/liburing/test/eventfd.c +112 -0
  84. package/deps/liburing/liburing/test/fadvise.c +202 -0
  85. package/deps/liburing/liburing/test/fallocate.c +249 -0
  86. package/deps/liburing/liburing/test/fc2a85cb02ef-test.c +138 -0
  87. package/deps/liburing/liburing/test/file-register.c +843 -0
  88. package/deps/liburing/liburing/test/file-update.c +173 -0
  89. package/deps/liburing/liburing/test/files-exit-hang-poll.c +128 -0
  90. package/deps/liburing/liburing/test/files-exit-hang-timeout.c +134 -0
  91. package/deps/liburing/liburing/test/fixed-link.c +90 -0
  92. package/deps/liburing/liburing/test/fsync.c +224 -0
  93. package/deps/liburing/liburing/test/hardlink.c +136 -0
  94. package/deps/liburing/liburing/test/helpers.c +135 -0
  95. package/deps/liburing/liburing/test/helpers.h +67 -0
  96. package/deps/liburing/liburing/test/io-cancel.c +537 -0
  97. package/deps/liburing/liburing/test/io_uring_enter.c +296 -0
  98. package/deps/liburing/liburing/test/io_uring_register.c +664 -0
  99. package/deps/liburing/liburing/test/io_uring_setup.c +192 -0
  100. package/deps/liburing/liburing/test/iopoll.c +366 -0
  101. package/deps/liburing/liburing/test/lfs-openat-write.c +117 -0
  102. package/deps/liburing/liburing/test/lfs-openat.c +273 -0
  103. package/deps/liburing/liburing/test/link-timeout.c +1107 -0
  104. package/deps/liburing/liburing/test/link.c +496 -0
  105. package/deps/liburing/liburing/test/link_drain.c +229 -0
  106. package/deps/liburing/liburing/test/madvise.c +195 -0
  107. package/deps/liburing/liburing/test/mkdir.c +108 -0
  108. package/deps/liburing/liburing/test/multicqes_drain.c +383 -0
  109. package/deps/liburing/liburing/test/nop-all-sizes.c +107 -0
  110. package/deps/liburing/liburing/test/nop.c +115 -0
  111. package/deps/liburing/liburing/test/open-close.c +146 -0
  112. package/deps/liburing/liburing/test/openat2.c +240 -0
  113. package/deps/liburing/liburing/test/personality.c +204 -0
  114. package/deps/liburing/liburing/test/pipe-eof.c +81 -0
  115. package/deps/liburing/liburing/test/pipe-reuse.c +105 -0
  116. package/deps/liburing/liburing/test/poll-cancel-ton.c +139 -0
  117. package/deps/liburing/liburing/test/poll-cancel.c +135 -0
  118. package/deps/liburing/liburing/test/poll-link.c +227 -0
  119. package/deps/liburing/liburing/test/poll-many.c +208 -0
  120. package/deps/liburing/liburing/test/poll-mshot-update.c +273 -0
  121. package/deps/liburing/liburing/test/poll-ring.c +48 -0
  122. package/deps/liburing/liburing/test/poll-v-poll.c +353 -0
  123. package/deps/liburing/liburing/test/poll.c +109 -0
  124. package/deps/liburing/liburing/test/probe.c +137 -0
  125. package/deps/liburing/liburing/test/read-write.c +876 -0
  126. package/deps/liburing/liburing/test/register-restrictions.c +633 -0
  127. package/deps/liburing/liburing/test/rename.c +134 -0
  128. package/deps/liburing/liburing/test/ring-leak.c +173 -0
  129. package/deps/liburing/liburing/test/ring-leak2.c +249 -0
  130. package/deps/liburing/liburing/test/rsrc_tags.c +449 -0
  131. package/deps/liburing/liburing/test/runtests-loop.sh +16 -0
  132. package/deps/liburing/liburing/test/runtests.sh +170 -0
  133. package/deps/liburing/liburing/test/rw_merge_test.c +97 -0
  134. package/deps/liburing/liburing/test/self.c +91 -0
  135. package/deps/liburing/liburing/test/send_recv.c +291 -0
  136. package/deps/liburing/liburing/test/send_recvmsg.c +345 -0
  137. package/deps/liburing/liburing/test/sendmsg_fs_cve.c +198 -0
  138. package/deps/liburing/liburing/test/shared-wq.c +84 -0
  139. package/deps/liburing/liburing/test/short-read.c +75 -0
  140. package/deps/liburing/liburing/test/shutdown.c +163 -0
  141. package/deps/liburing/liburing/test/sigfd-deadlock.c +74 -0
  142. package/deps/liburing/liburing/test/socket-rw-eagain.c +156 -0
  143. package/deps/liburing/liburing/test/socket-rw.c +147 -0
  144. package/deps/liburing/liburing/test/splice.c +511 -0
  145. package/deps/liburing/liburing/test/sq-full-cpp.cc +45 -0
  146. package/deps/liburing/liburing/test/sq-full.c +45 -0
  147. package/deps/liburing/liburing/test/sq-poll-dup.c +200 -0
  148. package/deps/liburing/liburing/test/sq-poll-kthread.c +168 -0
  149. package/deps/liburing/liburing/test/sq-poll-share.c +137 -0
  150. package/deps/liburing/liburing/test/sq-space_left.c +159 -0
  151. package/deps/liburing/liburing/test/sqpoll-cancel-hang.c +159 -0
  152. package/deps/liburing/liburing/test/sqpoll-disable-exit.c +195 -0
  153. package/deps/liburing/liburing/test/sqpoll-exit-hang.c +77 -0
  154. package/deps/liburing/liburing/test/sqpoll-sleep.c +68 -0
  155. package/deps/liburing/liburing/test/statx.c +172 -0
  156. package/deps/liburing/liburing/test/stdout.c +232 -0
  157. package/deps/liburing/liburing/test/submit-link-fail.c +154 -0
  158. package/deps/liburing/liburing/test/submit-reuse.c +239 -0
  159. package/deps/liburing/liburing/test/symlink.c +116 -0
  160. package/deps/liburing/liburing/test/teardowns.c +58 -0
  161. package/deps/liburing/liburing/test/thread-exit.c +131 -0
  162. package/deps/liburing/liburing/test/timeout-new.c +246 -0
  163. package/deps/liburing/liburing/test/timeout-overflow.c +204 -0
  164. package/deps/liburing/liburing/test/timeout.c +1354 -0
  165. package/deps/liburing/liburing/test/unlink.c +111 -0
  166. package/deps/liburing/liburing/test/wakeup-hang.c +162 -0
  167. package/deps/liburing/liburing.gyp +20 -0
  168. package/deps/rocksdb/rocksdb/db/corruption_test.cc +62 -0
  169. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +7 -62
  170. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +25 -11
  171. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +74 -155
  172. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
  173. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +2 -2
  174. package/deps/rocksdb/rocksdb/env/fs_posix.cc +13 -0
  175. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +4 -2
  176. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +22 -4
  177. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +5 -0
  178. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +15 -0
  179. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  181. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -0
  182. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +3 -7
  183. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +44 -29
  185. package/deps/rocksdb/rocksdb.gyp +4 -3
  186. package/package.json +1 -1
  187. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  188. package/prebuilds/darwin-x64/node.napi.node +0 -0
  189. package/prebuilds/linux-x64/node.napi.node +0 -0
  190. package/prebuilds/prebuilds/linux-x64/node.napi.node +0 -0
@@ -0,0 +1,1403 @@
1
+ .\" Copyright (C) 2019 Jens Axboe <axboe@kernel.dk>
2
+ .\" Copyright (C) 2019 Red Hat, Inc.
3
+ .\"
4
+ .\" SPDX-License-Identifier: LGPL-2.0-or-later
5
+ .\"
6
+ .TH IO_URING_ENTER 2 2019-01-22 "Linux" "Linux Programmer's Manual"
7
+ .SH NAME
8
+ io_uring_enter \- initiate and/or complete asynchronous I/O
9
+ .SH SYNOPSIS
10
+ .nf
11
+ .BR "#include <linux/io_uring.h>"
12
+ .PP
13
+ .BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
14
+ .BI " unsigned int " min_complete ", unsigned int " flags ,
15
+ .BI " sigset_t *" sig );
16
+ .fi
17
+ .PP
18
+ .SH DESCRIPTION
19
+ .PP
20
+ .BR io_uring_enter ()
21
+ is used to initiate and complete I/O using the shared submission and
22
+ completion queues setup by a call to
23
+ .BR io_uring_setup (2).
24
+ A single call can both submit new I/O and wait for completions of I/O
25
+ initiated by this call or previous calls to
26
+ .BR io_uring_enter ().
27
+
28
+ .I fd
29
+ is the file descriptor returned by
30
+ .BR io_uring_setup (2).
31
+ .I to_submit
32
+ specifies the number of I/Os to submit from the submission queue.
33
+ .I flags
34
+ is a bitmask of the following values:
35
+ .TP
36
+ .B IORING_ENTER_GETEVENTS
37
+ If this flag is set, then the system call will wait for the specificied
38
+ number of events in
39
+ .I min_complete
40
+ before returning. This flag can be set along with
41
+ .I to_submit
42
+ to both submit and complete events in a single system call.
43
+ .TP
44
+ .B IORING_ENTER_SQ_WAKEUP
45
+ If the ring has been created with
46
+ .B IORING_SETUP_SQPOLL,
47
+ then this flag asks the kernel to wakeup the SQ kernel thread to submit IO.
48
+ .TP
49
+ .B IORING_ENTER_SQ_WAIT
50
+ If the ring has been created with
51
+ .B IORING_SETUP_SQPOLL,
52
+ then the application has no real insight into when the SQ kernel thread has
53
+ consumed entries from the SQ ring. This can lead to a situation where the
54
+ application can no longer get a free SQE entry to submit, without knowing
55
+ when it one becomes available as the SQ kernel thread consumes them. If
56
+ the system call is used with this flag set, then it will wait until at least
57
+ one entry is free in the SQ ring.
58
+ .TP
59
+ .B IORING_ENTER_EXT_ARG
60
+ Since kernel 5.11, the system calls arguments have been modified to look like
61
+ the following:
62
+
63
+ .nf
64
+ .BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
65
+ .BI " unsigned int " min_complete ", unsigned int " flags ,
66
+ .BI " const void *" arg ", size_t " argsz );
67
+ .fi
68
+
69
+ which is behaves just like the original definition by default. However, if
70
+ .B IORING_ENTER_EXT_ARG
71
+ is set, then instead of a
72
+ .I sigset_t
73
+ being passed in, a pointer to a
74
+ .I struct io_uring_getevents_arg
75
+ is used instead and
76
+ .I argsz
77
+ must be set to the size of this structure. The definition is as follows:
78
+
79
+ .nf
80
+ .BI "struct io_uring_getevents_args {
81
+ .BI " __u64 sigmask;
82
+ .BI " __u32 sigmask_sz;
83
+ .BI " __u32 pad;
84
+ .BI " __u64 ts;
85
+ .BI "};
86
+ .fi
87
+
88
+ which allows passing in both a signal mask as well as pointer to a
89
+ .I struct __kernel_timespec
90
+ timeout value. If
91
+ .I ts
92
+ is set to a valid pointer, then this time value indicates the timeout for
93
+ waiting on events. If an application is waiting on events and wishes to
94
+ stop waiting after a specified amount of time, then this can be accomplished
95
+ directly in version 5.11 and newer by using this feature.
96
+
97
+ .PP
98
+ .PP
99
+ If the io_uring instance was configured for polling, by specifying
100
+ .B IORING_SETUP_IOPOLL
101
+ in the call to
102
+ .BR io_uring_setup (2),
103
+ then min_complete has a slightly different meaning. Passing a value
104
+ of 0 instructs the kernel to return any events which are already complete,
105
+ without blocking. If
106
+ .I min_complete
107
+ is a non-zero value, the kernel will still return immediately if any
108
+ completion events are available. If no event completions are
109
+ available, then the call will poll either until one or more
110
+ completions become available, or until the process has exceeded its
111
+ scheduler time slice.
112
+
113
+ Note that, for interrupt driven I/O (where
114
+ .B IORING_SETUP_IOPOLL
115
+ was not specified in the call to
116
+ .BR io_uring_setup (2)),
117
+ an application may check the completion queue for event completions
118
+ without entering the kernel at all.
119
+ .PP
120
+ When the system call returns that a certain amount of SQEs have been
121
+ consumed and submitted, it's safe to reuse SQE entries in the ring. This is
122
+ true even if the actual IO submission had to be punted to async context,
123
+ which means that the SQE may in fact not have been submitted yet. If the
124
+ kernel requires later use of a particular SQE entry, it will have made a
125
+ private copy of it.
126
+
127
+ .I sig
128
+ is a pointer to a signal mask (see
129
+ .BR sigprocmask (2));
130
+ if
131
+ .I sig
132
+ is not NULL,
133
+ .BR io_uring_enter ()
134
+ first replaces the current signal mask by the one pointed to by
135
+ .IR sig ,
136
+ then waits for events to become available in the completion queue, and
137
+ then restores the original signal mask. The following
138
+ .BR io_uring_enter ()
139
+ call:
140
+ .PP
141
+ .in +4n
142
+ .EX
143
+ ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, &sig);
144
+ .EE
145
+ .in
146
+ .PP
147
+ is equivalent to
148
+ .I atomically
149
+ executing the following calls:
150
+ .PP
151
+ .in +4n
152
+ .EX
153
+ pthread_sigmask(SIG_SETMASK, &sig, &orig);
154
+ ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, NULL);
155
+ pthread_sigmask(SIG_SETMASK, &orig, NULL);
156
+ .EE
157
+ .in
158
+ .PP
159
+ See the description of
160
+ .BR pselect (2)
161
+ for an explanation of why the
162
+ .I sig
163
+ parameter is necessary.
164
+
165
+ Submission queue entries are represented using the following data
166
+ structure:
167
+ .PP
168
+ .in +4n
169
+ .EX
170
+ /*
171
+ * IO submission data structure (Submission Queue Entry)
172
+ */
173
+ struct io_uring_sqe {
174
+ __u8 opcode; /* type of operation for this sqe */
175
+ __u8 flags; /* IOSQE_ flags */
176
+ __u16 ioprio; /* ioprio for the request */
177
+ __s32 fd; /* file descriptor to do IO on */
178
+ union {
179
+ __u64 off; /* offset into file */
180
+ __u64 addr2;
181
+ };
182
+ union {
183
+ __u64 addr; /* pointer to buffer or iovecs */
184
+ __u64 splice_off_in;
185
+ }
186
+ __u32 len; /* buffer size or number of iovecs */
187
+ union {
188
+ __kernel_rwf_t rw_flags;
189
+ __u32 fsync_flags;
190
+ __u16 poll_events; /* compatibility */
191
+ __u32 poll32_events; /* word-reversed for BE */
192
+ __u32 sync_range_flags;
193
+ __u32 msg_flags;
194
+ __u32 timeout_flags;
195
+ __u32 accept_flags;
196
+ __u32 cancel_flags;
197
+ __u32 open_flags;
198
+ __u32 statx_flags;
199
+ __u32 fadvise_advice;
200
+ __u32 splice_flags;
201
+ __u32 rename_flags;
202
+ __u32 unlink_flags;
203
+ __u32 hardlink_flags;
204
+ };
205
+ __u64 user_data; /* data to be passed back at completion time */
206
+ union {
207
+ struct {
208
+ /* index into fixed buffers, if used */
209
+ union {
210
+ /* index into fixed buffers, if used */
211
+ __u16 buf_index;
212
+ /* for grouped buffer selection */
213
+ __u16 buf_group;
214
+ }
215
+ /* personality to use, if used */
216
+ __u16 personality;
217
+ union {
218
+ __s32 splice_fd_in;
219
+ __u32 file_index;
220
+ };
221
+ };
222
+ __u64 __pad2[3];
223
+ };
224
+ };
225
+ .EE
226
+ .in
227
+ .PP
228
+ The
229
+ .I opcode
230
+ describes the operation to be performed. It can be one of:
231
+ .TP
232
+ .B IORING_OP_NOP
233
+ Do not perform any I/O. This is useful for testing the performance of
234
+ the io_uring implementation itself.
235
+ .TP
236
+ .B IORING_OP_READV
237
+ .TP
238
+ .B IORING_OP_WRITEV
239
+ Vectored read and write operations, similar to
240
+ .BR preadv2 (2)
241
+ and
242
+ .BR pwritev2 (2).
243
+ If the file is not seekable,
244
+ .I off
245
+ must be set to zero.
246
+
247
+ .TP
248
+ .B IORING_OP_READ_FIXED
249
+ .TP
250
+ .B IORING_OP_WRITE_FIXED
251
+ Read from or write to pre-mapped buffers. See
252
+ .BR io_uring_register (2)
253
+ for details on how to setup a context for fixed reads and writes.
254
+
255
+ .TP
256
+ .B IORING_OP_FSYNC
257
+ File sync. See also
258
+ .BR fsync (2).
259
+ Note that, while I/O is initiated in the order in which it appears in
260
+ the submission queue, completions are unordered. For example, an
261
+ application which places a write I/O followed by an fsync in the
262
+ submission queue cannot expect the fsync to apply to the write. The
263
+ two operations execute in parallel, so the fsync may complete before
264
+ the write is issued to the storage. The same is also true for
265
+ previously issued writes that have not completed prior to the fsync.
266
+
267
+ .TP
268
+ .B IORING_OP_POLL_ADD
269
+ Poll the
270
+ .I fd
271
+ specified in the submission queue entry for the events
272
+ specified in the
273
+ .I poll_events
274
+ field. Unlike poll or epoll without
275
+ .BR EPOLLONESHOT ,
276
+ by default this interface always works in one shot mode. That is, once the poll
277
+ operation is completed, it will have to be resubmitted.
278
+
279
+ If
280
+ .B IORING_POLL_ADD_MULTI
281
+ is set in the SQE
282
+ .I len
283
+ field, then the poll will work in multi shot mode instead. That means it'll
284
+ repatedly trigger when the requested event becomes true, and hence multiple
285
+ CQEs can be generated from this single SQE. The CQE
286
+ .I flags
287
+ field will have
288
+ .B IORING_CQE_F_MORE
289
+ set on completion if the application should expect further CQE entries from
290
+ the original request. If this flag isn't set on completion, then the poll
291
+ request has been terminated and no further events will be generated. This mode
292
+ is available since 5.13.
293
+
294
+ If
295
+ .B IORING_POLL_UPDATE_EVENTS
296
+ is set in the SQE
297
+ .I len
298
+ field, then the request will update an existing poll request with the mask of
299
+ events passed in with this request. The lookup is based on the
300
+ .I user_data
301
+ field of the original SQE submitted, and this values is passed in the
302
+ .I addr
303
+ field of the SQE. This mode is available since 5.13.
304
+
305
+ If
306
+ .B IORING_POLL_UPDATE_USER_DATA
307
+ is set in the SQE
308
+ .I len
309
+ field, then the request will update the
310
+ .I user_data
311
+ of an existing poll request based on the value passed in the
312
+ .I off
313
+ field. This mode is available since 5.13.
314
+
315
+ This command works like
316
+ an async
317
+ .BR poll(2)
318
+ and the completion event result is the returned mask of events. For the
319
+ variants that update
320
+ .I user_data
321
+ or
322
+ .I events
323
+ , the completion result will be similar to
324
+ .B IORING_OP_POLL_REMOVE.
325
+
326
+ .TP
327
+ .B IORING_OP_POLL_REMOVE
328
+ Remove an existing poll request. If found, the
329
+ .I res
330
+ field of the
331
+ .I "struct io_uring_cqe"
332
+ will contain 0. If not found,
333
+ .I res
334
+ will contain
335
+ .B -ENOENT,
336
+ or
337
+ .B -EALREADY
338
+ if the poll request was in the process of completing already.
339
+
340
+ .TP
341
+ .B IORING_OP_EPOLL_CTL
342
+ Add, remove or modify entries in the interest list of
343
+ .BR epoll (7).
344
+ See
345
+ .BR epoll_ctl (2)
346
+ for details of the system call.
347
+ .I fd
348
+ holds the file descriptor that represents the epoll instance,
349
+ .I addr
350
+ holds the file descriptor to add, remove or modify,
351
+ .I len
352
+ holds the operation (EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD) to perform and,
353
+ .I off
354
+ holds a pointer to the
355
+ .I epoll_events
356
+ structure. Available since 5.6.
357
+
358
+ .TP
359
+ .B IORING_OP_SYNC_FILE_RANGE
360
+ Issue the equivalent of a \fBsync_file_range\fR (2) on the file descriptor. The
361
+ .I fd
362
+ field is the file descriptor to sync, the
363
+ .I off
364
+ field holds the offset in bytes, the
365
+ .I len
366
+ field holds the length in bytes, and the
367
+ .I sync_range_flags
368
+ field holds the flags for the command. See also
369
+ .BR sync_file_range (2)
370
+ for the general description of the related system call. Available since 5.2.
371
+
372
+ .TP
373
+ .B IORING_OP_SENDMSG
374
+ Issue the equivalent of a
375
+ .BR sendmsg(2)
376
+ system call.
377
+ .I fd
378
+ must be set to the socket file descriptor,
379
+ .I addr
380
+ must contain a pointer to the msghdr structure, and
381
+ .I msg_flags
382
+ holds the flags associated with the system call. See also
383
+ .BR sendmsg (2)
384
+ for the general description of the related system call. Available since 5.3.
385
+
386
+ .TP
387
+ .B IORING_OP_RECVMSG
388
+ Works just like IORING_OP_SENDMSG, except for
389
+ .BR recvmsg(2)
390
+ instead. See the description of IORING_OP_SENDMSG. Available since 5.3.
391
+
392
+ .TP
393
+ .B IORING_OP_SEND
394
+ Issue the equivalent of a
395
+ .BR send(2)
396
+ system call.
397
+ .I fd
398
+ must be set to the socket file descriptor,
399
+ .I addr
400
+ must contain a pointer to the buffer,
401
+ .I len
402
+ denotes the length of the buffer to send, and
403
+ .I msg_flags
404
+ holds the flags associated with the system call. See also
405
+ .BR send(2)
406
+ for the general description of the related system call. Available since 5.6.
407
+
408
+ .TP
409
+ .B IORING_OP_RECV
410
+ Works just like IORING_OP_SEND, except for
411
+ .BR recv(2)
412
+ instead. See the description of IORING_OP_SEND. Available since 5.6.
413
+
414
+ .TP
415
+ .B IORING_OP_TIMEOUT
416
+ This command will register a timeout operation. The
417
+ .I addr
418
+ field must contain a pointer to a struct timespec64 structure,
419
+ .I len
420
+ must contain 1 to signify one timespec64 structure,
421
+ .I timeout_flags
422
+ may contain IORING_TIMEOUT_ABS
423
+ for an absolute timeout value, or 0 for a relative timeout.
424
+ .I off
425
+ may contain a completion event count. A timeout
426
+ will trigger a wakeup event on the completion ring for anyone waiting for
427
+ events. A timeout condition is met when either the specified timeout expires,
428
+ or the specified number of events have completed. Either condition will
429
+ trigger the event. If set to 0, completed events are not counted, which
430
+ effectively acts like a timer. io_uring timeouts use the
431
+ .B CLOCK_MONOTONIC
432
+ clock source. The request will complete with
433
+ .I -ETIME
434
+ if the timeout got completed through expiration of the timer, or
435
+ .I 0
436
+ if the timeout got completed through requests completing on their own. If
437
+ the timeout was cancelled before it expired, the request will complete with
438
+ .I -ECANCELED.
439
+ Available since 5.4.
440
+
441
+ Since 5.15, this command also supports the following modifiers in
442
+ .I timeout_flags:
443
+
444
+ .PP
445
+ .in +12
446
+ .B IORING_TIMEOUT_BOOTTIME
447
+ If set, then the clocksource used is
448
+ .I CLOCK_BOOTTIME
449
+ instead of
450
+ .I CLOCK_MONOTONIC.
451
+ This clocksource differs in that it includes time elapsed if the system was
452
+ suspend while having a timeout request in-flight.
453
+
454
+ .B IORING_TIMEOUT_REALTIME
455
+ If set, then the clocksource used is
456
+ .I CLOCK_BOOTTIME
457
+ instead of
458
+ .I CLOCK_MONOTONIC.
459
+ .EE
460
+ .in
461
+ .PP
462
+
463
+ .TP
464
+ .B IORING_OP_TIMEOUT_REMOVE
465
+ If
466
+ .I timeout_flags are zero, then it attempts to remove an existing timeout
467
+ operation.
468
+ .I addr
469
+ must contain the
470
+ .I user_data
471
+ field of the previously issued timeout operation. If the specified timeout
472
+ request is found and cancelled successfully, this request will terminate
473
+ with a result value of
474
+ .I 0
475
+ If the timeout request was found but expiration was already in progress,
476
+ this request will terminate with a result value of
477
+ .I -EBUSY
478
+ If the timeout request wasn't found, the request will terminate with a result
479
+ value of
480
+ .I -ENOENT
481
+ Available since 5.5.
482
+
483
+ If
484
+ .I timeout_flags
485
+ contain
486
+ .I IORING_TIMEOUT_UPDATE,
487
+ instead of removing an existing operation, it updates it.
488
+ .I addr
489
+ and return values are same as before.
490
+ .I addr2
491
+ field must contain a pointer to a struct timespec64 structure.
492
+ .I timeout_flags
493
+ may also contain IORING_TIMEOUT_ABS, in which case the value given is an
494
+ absolute one, not a relative one.
495
+ Available since 5.11.
496
+
497
+ .TP
498
+ .B IORING_OP_ACCEPT
499
+ Issue the equivalent of an
500
+ .BR accept4(2)
501
+ system call.
502
+ .I fd
503
+ must be set to the socket file descriptor,
504
+ .I addr
505
+ must contain the pointer to the sockaddr structure, and
506
+ .I addr2
507
+ must contain a pointer to the socklen_t addrlen field. Flags can be passed using
508
+ the
509
+ .I accept_flags
510
+ field. See also
511
+ .BR accept4(2)
512
+ for the general description of the related system call. Available since 5.5.
513
+
514
+ If the
515
+ .I file_index
516
+ field is set to a positive number, the file won't be installed into the
517
+ normal file table as usual but will be placed into the fixed file table at index
518
+ .I file_index - 1.
519
+ In this case, instead of returning a file descriptor, the result will contain
520
+ either 0 on success or an error. If there is already a file registered at this
521
+ index, the request will fail with
522
+ .B -EBADF.
523
+ Only io_uring has access to such files and no other syscall can use them. See
524
+ .B IOSQE_FIXED_FILE
525
+ and
526
+ .B IORING_REGISTER_FILES.
527
+
528
+ Available since 5.15.
529
+
530
+ .TP
531
+ .B IORING_OP_ASYNC_CANCEL
532
+ Attempt to cancel an already issued request.
533
+ .I addr
534
+ must contain the
535
+ .I user_data
536
+ field of the request that should be cancelled. The cancellation request will
537
+ complete with one of the following results codes. If found, the
538
+ .I res
539
+ field of the cqe will contain 0. If not found,
540
+ .I res
541
+ will contain -ENOENT. If found and attempted cancelled, the
542
+ .I res
543
+ field will contain -EALREADY. In this case, the request may or may not
544
+ terminate. In general, requests that are interruptible (like socket IO) will
545
+ get cancelled, while disk IO requests cannot be cancelled if already started.
546
+ Available since 5.5.
547
+
548
+ .TP
549
+ .B IORING_OP_LINK_TIMEOUT
550
+ This request must be linked with another request through
551
+ .I IOSQE_IO_LINK
552
+ which is described below. Unlike
553
+ .I IORING_OP_TIMEOUT,
554
+ .I IORING_OP_LINK_TIMEOUT
555
+ acts on the linked request, not the completion queue. The format of the command
556
+ is otherwise like
557
+ .I IORING_OP_TIMEOUT,
558
+ except there's no completion event count as it's tied to a specific request.
559
+ If used, the timeout specified in the command will cancel the linked command,
560
+ unless the linked command completes before the timeout. The timeout will
561
+ complete with
562
+ .I -ETIME
563
+ if the timer expired and the linked request was attempted cancelled, or
564
+ .I -ECANCELED
565
+ if the timer got cancelled because of completion of the linked request. Like
566
+ .B IORING_OP_TIMEOUT
567
+ the clock source used is
568
+ .B CLOCK_MONOTONIC
569
+ Available since 5.5.
570
+
571
+
572
+ .TP
573
+ .B IORING_OP_CONNECT
574
+ Issue the equivalent of a
575
+ .BR connect(2)
576
+ system call.
577
+ .I fd
578
+ must be set to the socket file descriptor,
579
+ .I addr
580
+ must contain the const pointer to the sockaddr structure, and
581
+ .I off
582
+ must contain the socklen_t addrlen field. See also
583
+ .BR connect(2)
584
+ for the general description of the related system call. Available since 5.5.
585
+
586
+ .TP
587
+ .B IORING_OP_FALLOCATE
588
+ Issue the equivalent of a
589
+ .BR fallocate(2)
590
+ system call.
591
+ .I fd
592
+ must be set to the file descriptor,
593
+ .I len
594
+ must contain the mode associated with the operation,
595
+ .I off
596
+ must contain the offset on which to operate, and
597
+ .I addr
598
+ must contain the length. See also
599
+ .BR fallocate(2)
600
+ for the general description of the related system call. Available since 5.6.
601
+
602
+ .TP
603
+ .B IORING_OP_FADVISE
604
+ Issue the equivalent of a
605
+ .BR posix_fadvise(2)
606
+ system call.
607
+ .I fd
608
+ must be set to the file descriptor,
609
+ .I off
610
+ must contain the offset on which to operate,
611
+ .I len
612
+ must contain the length, and
613
+ .I fadvise_advice
614
+ must contain the advice associated with the operation. See also
615
+ .BR posix_fadvise(2)
616
+ for the general description of the related system call. Available since 5.6.
617
+
618
+ .TP
619
+ .B IORING_OP_MADVISE
620
+ Issue the equivalent of a
621
+ .BR madvise(2)
622
+ system call.
623
+ .I addr
624
+ must contain the address to operate on,
625
+ .I len
626
+ must contain the length on which to operate,
627
+ and
628
+ .I fadvise_advice
629
+ must contain the advice associated with the operation. See also
630
+ .BR madvise(2)
631
+ for the general description of the related system call. Available since 5.6.
632
+
633
+ .TP
634
+ .B IORING_OP_OPENAT
635
+ Issue the equivalent of a
636
+ .BR openat(2)
637
+ system call.
638
+ .I fd
639
+ is the
640
+ .I dirfd
641
+ argument,
642
+ .I addr
643
+ must contain a pointer to the
644
+ .I *pathname
645
+ argument,
646
+ .I open_flags
647
+ should contain any flags passed in, and
648
+ .I len
649
+ is access mode of the file. See also
650
+ .BR openat(2)
651
+ for the general description of the related system call. Available since 5.6.
652
+
653
+ If the
654
+ .I file_index
655
+ field is set to a positive number, the file won't be installed into the
656
+ normal file table as usual but will be placed into the fixed file table at index
657
+ .I file_index - 1.
658
+ In this case, instead of returning a file descriptor, the result will contain
659
+ either 0 on success or an error. If there is already a file registered at this
660
+ index, the request will fail with
661
+ .B -EBADF.
662
+ Only io_uring has access to such files and no other syscall can use them. See
663
+ .B IOSQE_FIXED_FILE
664
+ and
665
+ .B IORING_REGISTER_FILES.
666
+
667
+ Available since 5.15.
668
+
669
+ .TP
670
+ .B IORING_OP_OPENAT2
671
+ Issue the equivalent of a
672
+ .BR openat2(2)
673
+ system call.
674
+ .I fd
675
+ is the
676
+ .I dirfd
677
+ argument,
678
+ .I addr
679
+ must contain a pointer to the
680
+ .I *pathname
681
+ argument,
682
+ .I len
683
+ should contain the size of the open_how structure, and
684
+ .I off
685
+ should be set to the address of the open_how structure. See also
686
+ .BR openat2(2)
687
+ for the general description of the related system call. Available since 5.6.
688
+
689
+ If the
690
+ .I file_index
691
+ field is set to a positive number, the file won't be installed into the
692
+ normal file table as usual but will be placed into the fixed file table at index
693
+ .I file_index - 1.
694
+ In this case, instead of returning a file descriptor, the result will contain
695
+ either 0 on success or an error. If there is already a file registered at this
696
+ index, the request will fail with
697
+ .B -EBADF.
698
+ Only io_uring has access to such files and no other syscall can use them. See
699
+ .B IOSQE_FIXED_FILE
700
+ and
701
+ .B IORING_REGISTER_FILES.
702
+
703
+ Available since 5.15.
704
+
705
+ .TP
706
+ .B IORING_OP_CLOSE
707
+ Issue the equivalent of a
708
+ .BR close(2)
709
+ system call.
710
+ .I fd
711
+ is the file descriptor to be closed. See also
712
+ .BR close(2)
713
+ for the general description of the related system call. Available since 5.6.
714
+
715
+ .TP
716
+ .B IORING_OP_STATX
717
+ Issue the equivalent of a
718
+ .BR statx(2)
719
+ system call.
720
+ .I fd
721
+ is the
722
+ .I dirfd
723
+ argument,
724
+ .I addr
725
+ must contain a pointer to the
726
+ .I *pathname
727
+ string,
728
+ .I statx_flags
729
+ is the
730
+ .I flags
731
+ argument,
732
+ .I len
733
+ should be the
734
+ .I mask
735
+ argument, and
736
+ .I off
737
+ must contain a pointer to the
738
+ .I statxbuf
739
+ to be filled in. See also
740
+ .BR statx(2)
741
+ for the general description of the related system call. Available since 5.6.
742
+
743
+ .TP
744
+ .B IORING_OP_READ
745
+ .TP
746
+ .B IORING_OP_WRITE
747
+ Issue the equivalent of a
748
+ .BR pread(2)
749
+ or
750
+ .BR pwrite(2)
751
+ system call.
752
+ .I fd
753
+ is the file descriptor to be operated on,
754
+ .I addr
755
+ contains the buffer in question,
756
+ .I len
757
+ contains the length of the IO operation, and
758
+ .I offs
759
+ contains the read or write offset. If
760
+ .I fd
761
+ does not refer to a seekable file,
762
+ .I off
763
+ must be set to zero. If
764
+ .I offs
765
+ is set to -1, the offset will use (and advance) the file position, like the
766
+ .BR read(2)
767
+ and
768
+ .BR write(2)
769
+ system calls. These are non-vectored versions of the
770
+ .B IORING_OP_READV
771
+ and
772
+ .B IORING_OP_WRITEV
773
+ opcodes. See also
774
+ .BR read(2)
775
+ and
776
+ .BR write(2)
777
+ for the general description of the related system call. Available since 5.6.
778
+
779
+ .TP
780
+ .B IORING_OP_SPLICE
781
+ Issue the equivalent of a
782
+ .BR splice(2)
783
+ system call.
784
+ .I splice_fd_in
785
+ is the file descriptor to read from,
786
+ .I splice_off_in
787
+ is an offset to read from,
788
+ .I fd
789
+ is the file descriptor to write to,
790
+ .I off
791
+ is an offset from which to start writing to. A sentinel value of -1 is used
792
+ to pass the equivalent of a NULL for the offsets to
793
+ .BR splice(2).
794
+ .I len
795
+ contains the number of bytes to copy.
796
+ .I splice_flags
797
+ contains a bit mask for the flag field associated with the system call.
798
+ Please note that one of the file descriptors must refer to a pipe.
799
+ See also
800
+ .BR splice(2)
801
+ for the general description of the related system call. Available since 5.7.
802
+
803
+ .TP
804
+ .B IORING_OP_TEE
805
+ Issue the equivalent of a
806
+ .BR tee(2)
807
+ system call.
808
+ .I splice_fd_in
809
+ is the file descriptor to read from,
810
+ .I fd
811
+ is the file descriptor to write to,
812
+ .I len
813
+ contains the number of bytes to copy, and
814
+ .I splice_flags
815
+ contains a bit mask for the flag field associated with the system call.
816
+ Please note that both of the file descriptors must refer to a pipe.
817
+ See also
818
+ .BR tee(2)
819
+ for the general description of the related system call. Available since 5.8.
820
+
821
+ .TP
822
+ .B IORING_OP_FILES_UPDATE
823
+ This command is an alternative to using
824
+ .B IORING_REGISTER_FILES_UPDATE
825
+ which then works in an async fashion, like the rest of the io_uring commands.
826
+ The arguments passed in are the same.
827
+ .I addr
828
+ must contain a pointer to the array of file descriptors,
829
+ .I len
830
+ must contain the length of the array, and
831
+ .I off
832
+ must contain the offset at which to operate. Note that the array of file
833
+ descriptors pointed to in
834
+ .I addr
835
+ must remain valid until this operation has completed. Available since 5.6.
836
+
837
+ .TP
838
+ .B IORING_OP_PROVIDE_BUFFERS
839
+ This command allows an application to register a group of buffers to be used
840
+ by commands that read/receive data. Using buffers in this manner can eliminate
841
+ the need to separate the poll + read, which provides a convenient point in
842
+ time to allocate a buffer for a given request. It's often infeasible to have
843
+ as many buffers available as pending reads or receive. With this feature, the
844
+ application can have its pool of buffers ready in the kernel, and when the
845
+ file or socket is ready to read/receive data, a buffer can be selected for the
846
+ operation.
847
+ .I fd
848
+ must contain the number of buffers to provide,
849
+ .I addr
850
+ must contain the starting address to add buffers from,
851
+ .I len
852
+ must contain the length of each buffer to add from the range,
853
+ .I buf_group
854
+ must contain the group ID of this range of buffers, and
855
+ .I off
856
+ must contain the starting buffer ID of this range of buffers. With that set,
857
+ the kernel adds buffers starting with the memory address in
858
+ .I addr,
859
+ each with a length of
860
+ .I len.
861
+ Hence the application should provide
862
+ .I len * fd
863
+ worth of memory in
864
+ .I addr.
865
+ Buffers are grouped by the group ID, and each buffer within this group will be
866
+ identical in size according to the above arguments. This allows the application
867
+ to provide different groups of buffers, and this is often used to have
868
+ differently sized buffers available depending on what the expectations are of
869
+ the individual request. When submitting a request that should use a provided
870
+ buffer, the
871
+ .B IOSQE_BUFFER_SELECT
872
+ flag must be set, and
873
+ .I buf_group
874
+ must be set to the desired buffer group ID where the buffer should be selected
875
+ from. Available since 5.7.
876
+
877
+ .TP
878
+ .B IORING_OP_REMOVE_BUFFERS
879
+ Remove buffers previously registered with
880
+ .B IORING_OP_PROVIDE_BUFFERS.
881
+ .I fd
882
+ must contain the number of buffers to remove, and
883
+ .I buf_group
884
+ must contain the buffer group ID from which to remove the buffers. Available
885
+ since 5.7.
886
+
887
+ .TP
888
+ .B IORING_OP_SHUTDOWN
889
+ Issue the equivalent of a
890
+ .BR shutdown(2)
891
+ system call.
892
+ .I fd
893
+ is the file descriptor to the socket being shutdown, no other fields should
894
+ be set. Available since 5.11.
895
+
896
+ .TP
897
+ .B IORING_OP_RENAMEAT
898
+ Issue the equivalent of a
899
+ .BR renameat2(2)
900
+ system call.
901
+ .I fd
902
+ should be set to the
903
+ .I olddirfd,
904
+ .I addr
905
+ should be set to the
906
+ .I oldpath,
907
+ .I len
908
+ should be set to the
909
+ .I newdirfd,
910
+ .I addr
911
+ should be set to the
912
+ .I oldpath,
913
+ .I addr2
914
+ should be set to the
915
+ .I newpath,
916
+ and finally
917
+ .I rename_flags
918
+ should be set to the
919
+ .I flags
920
+ passed in to
921
+ .BR renameat2(2).
922
+ Available since 5.11.
923
+
924
+ .TP
925
+ .B IORING_OP_UNLINKAT
926
+ Issue the equivalent of a
927
+ .BR unlinkat2(2)
928
+ system call.
929
+ .I fd
930
+ should be set to the
931
+ .I dirfd,
932
+ .I addr
933
+ should be set to the
934
+ .I pathname,
935
+ and
936
+ .I unlink_flags
937
+ should be set to the
938
+ .I flags
939
+ being passed in to
940
+ .BR unlinkat(2).
941
+ Available since 5.11.
942
+
943
+ .TP
944
+ .B IORING_OP_MKDIRAT
945
+ Issue the equivalent of a
946
+ .BR mkdirat2(2)
947
+ system call.
948
+ .I fd
949
+ should be set to the
950
+ .I dirfd,
951
+ .I addr
952
+ should be set to the
953
+ .I pathname,
954
+ and
955
+ .I len
956
+ should be set to the
957
+ .I mode
958
+ being passed in to
959
+ .BR mkdirat(2).
960
+ Available since 5.15.
961
+
962
+ .TP
963
+ .B IORING_OP_SYMLINKAT
964
+ Issue the equivalent of a
965
+ .BR symlinkat2(2)
966
+ system call.
967
+ .I fd
968
+ should be set to the
969
+ .I newdirfd,
970
+ .I addr
971
+ should be set to the
972
+ .I target
973
+ and
974
+ .I addr2
975
+ should be set to the
976
+ .I linkpath
977
+ being passed in to
978
+ .BR symlinkat(2).
979
+ Available since 5.15.
980
+
981
+ .TP
982
+ .B IORING_OP_LINKAT
983
+ Issue the equivalent of a
984
+ .BR linkat2(2)
985
+ system call.
986
+ .I fd
987
+ should be set to the
988
+ .I olddirfd,
989
+ .I addr
990
+ should be set to the
991
+ .I oldpath,
992
+ .I len
993
+ should be set to the
994
+ .I newdirfd,
995
+ .I addr2
996
+ should be set to the
997
+ .I newpath,
998
+ and
999
+ .I hardlink_flags
1000
+ should be set to the
1001
+ .I flags
1002
+ being passed in to
1003
+ .BR linkat(2).
1004
+ Available since 5.15.
1005
+
1006
+ .PP
1007
+ The
1008
+ .I flags
1009
+ field is a bit mask. The supported flags are:
1010
+ .TP
1011
+ .B IOSQE_FIXED_FILE
1012
+ When this flag is specified,
1013
+ .I fd
1014
+ is an index into the files array registered with the io_uring instance (see the
1015
+ .B IORING_REGISTER_FILES
1016
+ section of the
1017
+ .BR io_uring_register (2)
1018
+ man page). Note that this isn't always available for all commands. If used on
1019
+ a command that doesn't support fixed files, the SQE will error with
1020
+ .B -EBADF.
1021
+ Available since 5.1.
1022
+ .TP
1023
+ .B IOSQE_IO_DRAIN
1024
+ When this flag is specified, the SQE will not be started before previously
1025
+ submitted SQEs have completed, and new SQEs will not be started before this
1026
+ one completes. Available since 5.2.
1027
+ .TP
1028
+ .B IOSQE_IO_LINK
1029
+ When this flag is specified, it forms a link with the next SQE in the
1030
+ submission ring. That next SQE will not be started before this one completes.
1031
+ This, in effect, forms a chain of SQEs, which can be arbitrarily long. The tail
1032
+ of the chain is denoted by the first SQE that does not have this flag set.
1033
+ This flag has no effect on previous SQE submissions, nor does it impact SQEs
1034
+ that are outside of the chain tail. This means that multiple chains can be
1035
+ executing in parallel, or chains and individual SQEs. Only members inside the
1036
+ chain are serialized. A chain of SQEs will be broken, if any request in that
1037
+ chain ends in error. io_uring considers any unexpected result an error. This
1038
+ means that, eg, a short read will also terminate the remainder of the chain.
1039
+ If a chain of SQE links is broken, the remaining unstarted part of the chain
1040
+ will be terminated and completed with
1041
+ .B -ECANCELED
1042
+ as the error code. Available since 5.3.
1043
+ .TP
1044
+ .B IOSQE_IO_HARDLINK
1045
+ Like IOSQE_IO_LINK, but it doesn't sever regardless of the completion result.
1046
+ Note that the link will still sever if we fail submitting the parent request,
1047
+ hard links are only resilient in the presence of completion results for
1048
+ requests that did submit correctly. IOSQE_IO_HARDLINK implies IOSQE_IO_LINK.
1049
+ Available since 5.5.
1050
+ .TP
1051
+ .B IOSQE_ASYNC
1052
+ Normal operation for io_uring is to try and issue an sqe as non-blocking first,
1053
+ and if that fails, execute it in an async manner. To support more efficient
1054
+ overlapped operation of requests that the application knows/assumes will
1055
+ always (or most of the time) block, the application can ask for an sqe to be
1056
+ issued async from the start. Available since 5.6.
1057
+ .TP
1058
+ .B IOSQE_BUFFER_SELECT
1059
+ Used in conjunction with the
1060
+ .B IORING_OP_PROVIDE_BUFFERS
1061
+ command, which registers a pool of buffers to be used by commands that read
1062
+ or receive data. When buffers are registered for this use case, and this
1063
+ flag is set in the command, io_uring will grab a buffer from this pool when
1064
+ the request is ready to receive or read data. If successful, the resulting CQE
1065
+ will have
1066
+ .B IORING_CQE_F_BUFFER
1067
+ set in the flags part of the struct, and the upper
1068
+ .B IORING_CQE_BUFFER_SHIFT
1069
+ bits will contain the ID of the selected buffers. This allows the application
1070
+ to know exactly which buffer was selected for the operation. If no buffers
1071
+ are available and this flag is set, then the request will fail with
1072
+ .B -ENOBUFS
1073
+ as the error code. Once a buffer has been used, it is no longer available in
1074
+ the kernel pool. The application must re-register the given buffer again when
1075
+ it is ready to recycle it (eg has completed using it). Available since 5.7.
1076
+
1077
+ .PP
1078
+ .I ioprio
1079
+ specifies the I/O priority. See
1080
+ .BR ioprio_get (2)
1081
+ for a description of Linux I/O priorities.
1082
+
1083
+ .I fd
1084
+ specifies the file descriptor against which the operation will be
1085
+ performed, with the exception noted above.
1086
+
1087
+ If the operation is one of
1088
+ .B IORING_OP_READ_FIXED
1089
+ or
1090
+ .BR IORING_OP_WRITE_FIXED ,
1091
+ .I addr
1092
+ and
1093
+ .I len
1094
+ must fall within the buffer located at
1095
+ .I buf_index
1096
+ in the fixed buffer array. If the operation is either
1097
+ .B IORING_OP_READV
1098
+ or
1099
+ .BR IORING_OP_WRITEV ,
1100
+ then
1101
+ .I addr
1102
+ points to an iovec array of
1103
+ .I len
1104
+ entries.
1105
+
1106
+ .IR rw_flags ,
1107
+ specified for read and write operations, contains a bitwise OR of
1108
+ per-I/O flags, as described in the
1109
+ .BR preadv2 (2)
1110
+ man page.
1111
+
1112
+ The
1113
+ .I fsync_flags
1114
+ bit mask may contain either 0, for a normal file integrity sync, or
1115
+ .B IORING_FSYNC_DATASYNC
1116
+ to provide data sync only semantics. See the descriptions of
1117
+ .B O_SYNC
1118
+ and
1119
+ .B O_DSYNC
1120
+ in the
1121
+ .BR open (2)
1122
+ manual page for more information.
1123
+
1124
+ The bits that may be set in
1125
+ .I poll_events
1126
+ are defined in \fI<poll.h>\fP, and documented in
1127
+ .BR poll (2).
1128
+
1129
+ .I user_data
1130
+ is an application-supplied value that will be copied into
1131
+ the completion queue entry (see below).
1132
+ .I buf_index
1133
+ is an index into an array of fixed buffers, and is only valid if fixed
1134
+ buffers were registered.
1135
+ .I personality
1136
+ is the credentials id to use for this operation. See
1137
+ .BR io_uring_register(2)
1138
+ for how to register personalities with io_uring. If set to 0, the current
1139
+ personality of the submitting task is used.
1140
+ .PP
1141
+ Once the submission queue entry is initialized, I/O is submitted by
1142
+ placing the index of the submission queue entry into the tail of the
1143
+ submission queue. After one or more indexes are added to the queue,
1144
+ and the queue tail is advanced, the
1145
+ .BR io_uring_enter (2)
1146
+ system call can be invoked to initiate the I/O.
1147
+
1148
+ Completions use the following data structure:
1149
+ .PP
1150
+ .in +4n
1151
+ .EX
1152
+ /*
1153
+ * IO completion data structure (Completion Queue Entry)
1154
+ */
1155
+ struct io_uring_cqe {
1156
+ __u64 user_data; /* sqe->data submission passed back */
1157
+ __s32 res; /* result code for this event */
1158
+ __u32 flags;
1159
+ };
1160
+ .EE
1161
+ .in
1162
+ .PP
1163
+ .I user_data
1164
+ is copied from the field of the same name in the submission queue
1165
+ entry. The primary use case is to store data that the application
1166
+ will need to access upon completion of this particular I/O. The
1167
+ .I flags
1168
+ is used for certain commands, like
1169
+ .B IORING_OP_POLL_ADD
1170
+ or in conjunction with
1171
+ .B IOSQE_BUFFER_SELECT
1172
+ , see those entries.
1173
+ .I res
1174
+ is the operation-specific result, but io_uring-specific errors
1175
+ (e.g. flags or opcode invalid) are returned through this field.
1176
+ They are described in section
1177
+ .B CQE ERRORS.
1178
+ .PP
1179
+ For read and write opcodes, the
1180
+ return values match
1181
+ .I errno
1182
+ values documented in the
1183
+ .BR preadv2 (2)
1184
+ and
1185
+ .BR pwritev2 (2)
1186
+ man pages, with
1187
+ .I
1188
+ res
1189
+ holding the equivalent of
1190
+ .I -errno
1191
+ for error cases, or the transferred number of bytes in case the operation
1192
+ is successful. Hence both error and success return can be found in that
1193
+ field in the CQE. For other request types, the return values are documented
1194
+ in the matching man page for that type, or in the opcodes section above for
1195
+ io_uring-specific opcodes.
1196
+ .PP
1197
+ .SH RETURN VALUE
1198
+ .BR io_uring_enter ()
1199
+ returns the number of I/Os successfully consumed. This can be zero
1200
+ if
1201
+ .I to_submit
1202
+ was zero or if the submission queue was empty. Note that if the ring was
1203
+ created with
1204
+ .B IORING_SETUP_SQPOLL
1205
+ specified, then the return value will generally be the same as
1206
+ .I to_submit
1207
+ as submission happens outside the context of the system call.
1208
+
1209
+ The errors related to a submission queue entry will be returned through a
1210
+ completion queue entry (see section
1211
+ .B CQE ERRORS),
1212
+ rather than through the system call itself.
1213
+
1214
+ Errors that occur not on behalf of a submission queue entry are returned via the
1215
+ system call directly. On such an error, -1 is returned and
1216
+ .I errno
1217
+ is set appropriately.
1218
+ .PP
1219
+ .SH ERRORS
1220
+ These are the errors returned by
1221
+ .BR io_uring_enter ()
1222
+ system call.
1223
+ .TP
1224
+ .B EAGAIN
1225
+ The kernel was unable to allocate memory for the request, or otherwise ran out
1226
+ of resources to handle it. The application should wait for some completions and
1227
+ try again.
1228
+ .TP
1229
+ .B EBADF
1230
+ .I fd
1231
+ is not a valid file descriptor.
1232
+ .TP
1233
+ .B EBADFD
1234
+ .I fd
1235
+ is a valid file descriptor, but the io_uring ring is not in the right state
1236
+ (enabled). See
1237
+ .BR io_uring_register (2)
1238
+ for details on how to enable the ring.
1239
+ .TP
1240
+ .B EBUSY
1241
+ The application is attempting to overcommit the number of requests it can have
1242
+ pending. The application should wait for some completions and try again. May
1243
+ occur if the application tries to queue more requests than we have room for in
1244
+ the CQ ring, or if the application attempts to wait for more events without
1245
+ having reaped the ones already present in the CQ ring.
1246
+ .TP
1247
+ .B EINVAL
1248
+ Some bits in the
1249
+ .I flags
1250
+ argument are invalid.
1251
+ .TP
1252
+ .B EFAULT
1253
+ An invalid user space address was specified for the
1254
+ .I sig
1255
+ argument.
1256
+ .TP
1257
+ .B ENXIO
1258
+ The io_uring instance is in the process of being torn down.
1259
+ .TP
1260
+ .B EOPNOTSUPP
1261
+ .I fd
1262
+ does not refer to an io_uring instance.
1263
+ .TP
1264
+ .B EINTR
1265
+ The operation was interrupted by a delivery of a signal before it could
1266
+ complete; see
1267
+ .BR signal(7).
1268
+ Can happen while waiting for events with
1269
+ .B IORING_ENTER_GETEVENTS.
1270
+
1271
+ .SH CQE ERRORS
1272
+ These io_uring-specific errors are returned as a negative value in the
1273
+ .I res
1274
+ field of the completion queue entry.
1275
+ .TP
1276
+ .B EACCES
1277
+ The
1278
+ .I flags
1279
+ field or
1280
+ .I opcode
1281
+ in a submission queue entry is not allowed due to registered restrictions.
1282
+ See
1283
+ .BR io_uring_register (2)
1284
+ for details on how restrictions work.
1285
+ .TP
1286
+ .B EBADF
1287
+ The
1288
+ .I fd
1289
+ field in the submission queue entry is invalid, or the
1290
+ .B IOSQE_FIXED_FILE
1291
+ flag was set in the submission queue entry, but no files were registered
1292
+ with the io_uring instance.
1293
+ .TP
1294
+ .B EFAULT
1295
+ buffer is outside of the process' accessible address space
1296
+ .TP
1297
+ .B EFAULT
1298
+ .B IORING_OP_READ_FIXED
1299
+ or
1300
+ .B IORING_OP_WRITE_FIXED
1301
+ was specified in the
1302
+ .I opcode
1303
+ field of the submission queue entry, but either buffers were not
1304
+ registered for this io_uring instance, or the address range described
1305
+ by
1306
+ .I addr
1307
+ and
1308
+ .I len
1309
+ does not fit within the buffer registered at
1310
+ .IR buf_index .
1311
+ .TP
1312
+ .B EINVAL
1313
+ The
1314
+ .I flags
1315
+ field or
1316
+ .I opcode
1317
+ in a submission queue entry is invalid.
1318
+ .TP
1319
+ .B EINVAL
1320
+ The
1321
+ .I buf_index
1322
+ member of the submission queue entry is invalid.
1323
+ .TP
1324
+ .B EINVAL
1325
+ The
1326
+ .I personality
1327
+ field in a submission queue entry is invalid.
1328
+ .TP
1329
+ .B EINVAL
1330
+ .B IORING_OP_NOP
1331
+ was specified in the submission queue entry, but the io_uring context
1332
+ was setup for polling
1333
+ .RB ( IORING_SETUP_IOPOLL
1334
+ was specified in the call to io_uring_setup).
1335
+ .TP
1336
+ .B EINVAL
1337
+ .B IORING_OP_READV
1338
+ or
1339
+ .B IORING_OP_WRITEV
1340
+ was specified in the submission queue entry, but the io_uring instance
1341
+ has fixed buffers registered.
1342
+ .TP
1343
+ .B EINVAL
1344
+ .B IORING_OP_READ_FIXED
1345
+ or
1346
+ .B IORING_OP_WRITE_FIXED
1347
+ was specified in the submission queue entry, and the
1348
+ .I buf_index
1349
+ is invalid.
1350
+ .TP
1351
+ .B EINVAL
1352
+ .BR IORING_OP_READV ,
1353
+ .BR IORING_OP_WRITEV ,
1354
+ .BR IORING_OP_READ_FIXED ,
1355
+ .B IORING_OP_WRITE_FIXED
1356
+ or
1357
+ .B IORING_OP_FSYNC
1358
+ was specified in the submission queue entry, but the io_uring instance
1359
+ was configured for IOPOLLing, or any of
1360
+ .IR addr ,
1361
+ .IR ioprio ,
1362
+ .IR off ,
1363
+ .IR len ,
1364
+ or
1365
+ .I buf_index
1366
+ was set in the submission queue entry.
1367
+ .TP
1368
+ .B EINVAL
1369
+ .B IORING_OP_POLL_ADD
1370
+ or
1371
+ .B IORING_OP_POLL_REMOVE
1372
+ was specified in the
1373
+ .I opcode
1374
+ field of the submission queue entry, but the io_uring instance was
1375
+ configured for busy-wait polling
1376
+ .RB ( IORING_SETUP_IOPOLL ),
1377
+ or any of
1378
+ .IR ioprio ,
1379
+ .IR off ,
1380
+ .IR len ,
1381
+ or
1382
+ .I buf_index
1383
+ was non-zero in the submission queue entry.
1384
+ .TP
1385
+ .B EINVAL
1386
+ .B IORING_OP_POLL_ADD
1387
+ was specified in the
1388
+ .I opcode
1389
+ field of the submission queue entry, and the
1390
+ .I addr
1391
+ field was non-zero.
1392
+ .TP
1393
+ .B EOPNOTSUPP
1394
+ .I opcode
1395
+ is valid, but not supported by this kernel.
1396
+ .TP
1397
+ .B EOPNOTSUPP
1398
+ .B IOSQE_BUFFER_SELECT
1399
+ was set in the
1400
+ .I flags
1401
+ field of the submission queue entry, but the
1402
+ .I opcode
1403
+ doesn't support buffer selection.