polyphony 0.45.0 → 0.46.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +2 -0
- data/.gitmodules +0 -0
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile.lock +11 -3
- data/README.md +3 -3
- data/Rakefile +1 -1
- data/TODO.md +10 -18
- data/examples/adapters/redis_client.rb +3 -1
- data/examples/adapters/redis_pubsub_perf.rb +11 -8
- data/examples/adapters/sequel_mysql.rb +1 -1
- data/examples/adapters/sequel_pg.rb +24 -0
- data/examples/core/{02-awaiting-fibers.rb → await.rb} +0 -0
- data/examples/core/{xx-channels.rb → channels.rb} +0 -0
- data/examples/core/deferring-an-operation.rb +16 -0
- data/examples/core/{xx-erlang-style-genserver.rb → erlang-style-genserver.rb} +16 -9
- data/examples/core/{xx-forking.rb → forking.rb} +1 -1
- data/examples/core/handling-signals.rb +11 -0
- data/examples/core/{03-interrupting.rb → interrupt.rb} +0 -0
- data/examples/core/{xx-pingpong.rb → pingpong.rb} +7 -5
- data/examples/core/{xx-recurrent-timer.rb → recurrent-timer.rb} +1 -1
- data/examples/core/{xx-resource_delegate.rb → resource_delegate.rb} +3 -4
- data/examples/core/{01-spinning-up-fibers.rb → spin.rb} +1 -1
- data/examples/core/{xx-spin_error_backtrace.rb → spin_error_backtrace.rb} +1 -1
- data/examples/core/{xx-supervise-process.rb → supervise-process.rb} +8 -5
- data/examples/core/supervisor.rb +20 -0
- data/examples/core/{xx-thread-sleep.rb → thread-sleep.rb} +0 -0
- data/examples/core/{xx-thread_pool.rb → thread_pool.rb} +0 -0
- data/examples/core/{xx-throttling.rb → throttling.rb} +0 -0
- data/examples/core/{xx-timeout.rb → timeout.rb} +0 -0
- data/examples/core/{xx-using-a-mutex.rb → using-a-mutex.rb} +0 -0
- data/examples/core/{xx-worker-thread.rb → worker-thread.rb} +2 -2
- data/examples/io/{xx-backticks.rb → backticks.rb} +0 -0
- data/examples/io/{xx-echo_client.rb → echo_client.rb} +1 -1
- data/examples/io/{xx-echo_client_from_stdin.rb → echo_client_from_stdin.rb} +2 -2
- data/examples/io/{xx-echo_pipe.rb → echo_pipe.rb} +1 -1
- data/examples/io/{xx-echo_server.rb → echo_server.rb} +0 -0
- data/examples/io/{xx-echo_server_with_timeout.rb → echo_server_with_timeout.rb} +1 -1
- data/examples/io/{xx-echo_stdin.rb → echo_stdin.rb} +0 -0
- data/examples/io/{xx-happy-eyeballs.rb → happy-eyeballs.rb} +0 -0
- data/examples/io/{xx-httparty.rb → httparty.rb} +4 -13
- data/examples/io/{xx-irb.rb → irb.rb} +0 -0
- data/examples/io/{xx-net-http.rb → net-http.rb} +0 -0
- data/examples/io/{xx-open.rb → open.rb} +0 -0
- data/examples/io/{xx-pry.rb → pry.rb} +0 -0
- data/examples/io/{xx-rack_server.rb → rack_server.rb} +0 -0
- data/examples/io/raw.rb +14 -0
- data/examples/io/reline.rb +18 -0
- data/examples/io/{xx-system.rb → system.rb} +1 -1
- data/examples/io/{xx-tcpserver.rb → tcpserver.rb} +0 -0
- data/examples/io/{xx-tcpsocket.rb → tcpsocket.rb} +0 -0
- data/examples/io/tunnel.rb +6 -1
- data/examples/io/{xx-zip.rb → zip.rb} +0 -0
- data/examples/performance/fiber_transfer.rb +2 -1
- data/examples/performance/fs_read.rb +5 -6
- data/examples/performance/multi_snooze.rb +0 -1
- data/examples/{io/xx-switch.rb → performance/switch.rb} +2 -1
- data/examples/performance/thread-vs-fiber/{xx-httparty_multi.rb → httparty_multi.rb} +3 -4
- data/examples/performance/thread-vs-fiber/{xx-httparty_threaded.rb → httparty_threaded.rb} +0 -0
- data/examples/performance/thread-vs-fiber/polyphony_mt_server.rb +1 -1
- data/examples/performance/thread-vs-fiber/polyphony_server.rb +1 -2
- data/examples/performance/thread-vs-fiber/threaded_server.rb +1 -5
- data/examples/performance/thread_pool_perf.rb +6 -7
- data/ext/liburing/liburing.h +585 -0
- data/ext/liburing/liburing/README.md +4 -0
- data/ext/liburing/liburing/barrier.h +73 -0
- data/ext/liburing/liburing/compat.h +15 -0
- data/ext/liburing/liburing/io_uring.h +343 -0
- data/ext/liburing/queue.c +333 -0
- data/ext/liburing/register.c +187 -0
- data/ext/liburing/setup.c +210 -0
- data/ext/liburing/syscall.c +54 -0
- data/ext/liburing/syscall.h +18 -0
- data/ext/polyphony/backend.h +1 -16
- data/ext/polyphony/backend_common.h +109 -0
- data/ext/polyphony/backend_io_uring.c +884 -0
- data/ext/polyphony/backend_io_uring_context.c +73 -0
- data/ext/polyphony/backend_io_uring_context.h +52 -0
- data/ext/polyphony/{libev_backend.c → backend_libev.c} +255 -345
- data/ext/polyphony/event.c +1 -1
- data/ext/polyphony/extconf.rb +31 -13
- data/ext/polyphony/fiber.c +111 -27
- data/ext/polyphony/libev.c +4 -0
- data/ext/polyphony/libev.h +8 -2
- data/ext/polyphony/liburing.c +8 -0
- data/ext/polyphony/playground.c +51 -0
- data/ext/polyphony/polyphony.c +6 -8
- data/ext/polyphony/polyphony.h +29 -25
- data/ext/polyphony/polyphony_ext.c +13 -6
- data/ext/polyphony/queue.c +3 -4
- data/ext/polyphony/ring_buffer.c +0 -1
- data/ext/polyphony/runqueue.c +102 -0
- data/ext/polyphony/runqueue_ring_buffer.c +85 -0
- data/ext/polyphony/runqueue_ring_buffer.h +31 -0
- data/ext/polyphony/thread.c +45 -92
- data/lib/polyphony.rb +2 -2
- data/lib/polyphony/adapters/fs.rb +1 -1
- data/lib/polyphony/adapters/process.rb +0 -3
- data/lib/polyphony/adapters/redis.rb +1 -1
- data/lib/polyphony/adapters/trace.rb +2 -2
- data/lib/polyphony/core/global_api.rb +9 -12
- data/lib/polyphony/core/sync.rb +6 -2
- data/lib/polyphony/extensions/core.rb +6 -24
- data/lib/polyphony/extensions/debug.rb +13 -0
- data/lib/polyphony/extensions/fiber.rb +21 -44
- data/lib/polyphony/extensions/io.rb +55 -10
- data/lib/polyphony/extensions/socket.rb +70 -12
- data/lib/polyphony/version.rb +1 -1
- data/polyphony.gemspec +3 -2
- data/test/helper.rb +36 -4
- data/test/io_uring_test.rb +55 -0
- data/test/stress.rb +5 -2
- data/test/test_backend.rb +4 -6
- data/test/test_ext.rb +1 -2
- data/test/test_fiber.rb +31 -24
- data/test/test_global_api.rb +58 -31
- data/test/test_io.rb +58 -0
- data/test/test_signal.rb +11 -8
- data/test/test_socket.rb +17 -0
- data/test/test_sync.rb +21 -0
- data/test/test_throttler.rb +3 -6
- data/test/test_trace.rb +7 -5
- metadata +86 -76
- data/examples/adapters/concurrent-ruby.rb +0 -9
- data/examples/core/04-handling-signals.rb +0 -19
- data/examples/core/xx-at_exit.rb +0 -29
- data/examples/core/xx-backend.rb +0 -102
- data/examples/core/xx-caller.rb +0 -12
- data/examples/core/xx-daemon.rb +0 -14
- data/examples/core/xx-deadlock.rb +0 -8
- data/examples/core/xx-deferring-an-operation.rb +0 -14
- data/examples/core/xx-exception-backtrace.rb +0 -40
- data/examples/core/xx-fork-cleanup.rb +0 -22
- data/examples/core/xx-fork-spin.rb +0 -42
- data/examples/core/xx-fork-terminate.rb +0 -27
- data/examples/core/xx-move_on.rb +0 -23
- data/examples/core/xx-queue-async.rb +0 -120
- data/examples/core/xx-readpartial.rb +0 -18
- data/examples/core/xx-signals.rb +0 -16
- data/examples/core/xx-sleep-forever.rb +0 -9
- data/examples/core/xx-sleeping.rb +0 -25
- data/examples/core/xx-snooze-starve.rb +0 -16
- data/examples/core/xx-spin-fork.rb +0 -49
- data/examples/core/xx-state-machine.rb +0 -51
- data/examples/core/xx-stop.rb +0 -20
- data/examples/core/xx-supervisors.rb +0 -21
- data/examples/core/xx-thread-selector-sleep.rb +0 -51
- data/examples/core/xx-thread-selector-snooze.rb +0 -46
- data/examples/core/xx-thread-snooze.rb +0 -34
- data/examples/core/xx-timer-gc.rb +0 -17
- data/examples/core/xx-trace.rb +0 -79
- data/examples/performance/xx-array.rb +0 -11
- data/examples/performance/xx-fiber-switch.rb +0 -9
- data/examples/performance/xx-snooze.rb +0 -15
- data/examples/xx-spin.rb +0 -32
@@ -0,0 +1,73 @@
|
|
1
|
+
/* SPDX-License-Identifier: MIT */
|
2
|
+
#ifndef LIBURING_BARRIER_H
|
3
|
+
#define LIBURING_BARRIER_H
|
4
|
+
|
5
|
+
/*
|
6
|
+
From the kernel documentation file refcount-vs-atomic.rst:
|
7
|
+
|
8
|
+
A RELEASE memory ordering guarantees that all prior loads and
|
9
|
+
stores (all po-earlier instructions) on the same CPU are completed
|
10
|
+
before the operation. It also guarantees that all po-earlier
|
11
|
+
stores on the same CPU and all propagated stores from other CPUs
|
12
|
+
must propagate to all other CPUs before the release operation
|
13
|
+
(A-cumulative property). This is implemented using
|
14
|
+
:c:func:`smp_store_release`.
|
15
|
+
|
16
|
+
An ACQUIRE memory ordering guarantees that all post loads and
|
17
|
+
stores (all po-later instructions) on the same CPU are
|
18
|
+
completed after the acquire operation. It also guarantees that all
|
19
|
+
po-later stores on the same CPU must propagate to all other CPUs
|
20
|
+
after the acquire operation executes. This is implemented using
|
21
|
+
:c:func:`smp_acquire__after_ctrl_dep`.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#ifdef __cplusplus
|
25
|
+
#include <atomic>
|
26
|
+
|
27
|
+
template <typename T>
|
28
|
+
static inline void IO_URING_WRITE_ONCE(T &var, T val)
|
29
|
+
{
|
30
|
+
std::atomic_store_explicit(reinterpret_cast<std::atomic<T> *>(&var),
|
31
|
+
val, std::memory_order_relaxed);
|
32
|
+
}
|
33
|
+
template <typename T>
|
34
|
+
static inline T IO_URING_READ_ONCE(const T &var)
|
35
|
+
{
|
36
|
+
return std::atomic_load_explicit(
|
37
|
+
reinterpret_cast<const std::atomic<T> *>(&var),
|
38
|
+
std::memory_order_relaxed);
|
39
|
+
}
|
40
|
+
|
41
|
+
template <typename T>
|
42
|
+
static inline void io_uring_smp_store_release(T *p, T v)
|
43
|
+
{
|
44
|
+
std::atomic_store_explicit(reinterpret_cast<std::atomic<T> *>(p), v,
|
45
|
+
std::memory_order_release);
|
46
|
+
}
|
47
|
+
|
48
|
+
template <typename T>
|
49
|
+
static inline T io_uring_smp_load_acquire(const T *p)
|
50
|
+
{
|
51
|
+
return std::atomic_load_explicit(
|
52
|
+
reinterpret_cast<const std::atomic<T> *>(p),
|
53
|
+
std::memory_order_acquire);
|
54
|
+
}
|
55
|
+
#else
|
56
|
+
#include <stdatomic.h>
|
57
|
+
|
58
|
+
#define IO_URING_WRITE_ONCE(var, val) \
|
59
|
+
atomic_store_explicit((_Atomic typeof(var) *)&(var), \
|
60
|
+
(val), memory_order_relaxed)
|
61
|
+
#define IO_URING_READ_ONCE(var) \
|
62
|
+
atomic_load_explicit((_Atomic typeof(var) *)&(var), \
|
63
|
+
memory_order_relaxed)
|
64
|
+
|
65
|
+
#define io_uring_smp_store_release(p, v) \
|
66
|
+
atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \
|
67
|
+
memory_order_release)
|
68
|
+
#define io_uring_smp_load_acquire(p) \
|
69
|
+
atomic_load_explicit((_Atomic typeof(*(p)) *)(p), \
|
70
|
+
memory_order_acquire)
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#endif /* defined(LIBURING_BARRIER_H) */
|
@@ -0,0 +1,343 @@
|
|
1
|
+
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
|
2
|
+
/*
|
3
|
+
* Header file for the io_uring interface.
|
4
|
+
*
|
5
|
+
* Copyright (C) 2019 Jens Axboe
|
6
|
+
* Copyright (C) 2019 Christoph Hellwig
|
7
|
+
*/
|
8
|
+
#ifndef LINUX_IO_URING_H
|
9
|
+
#define LINUX_IO_URING_H
|
10
|
+
|
11
|
+
#include <linux/fs.h>
|
12
|
+
#include <linux/types.h>
|
13
|
+
|
14
|
+
#ifdef __cplusplus
|
15
|
+
extern "C" {
|
16
|
+
#endif
|
17
|
+
|
18
|
+
/*
|
19
|
+
* IO submission data structure (Submission Queue Entry)
|
20
|
+
*/
|
21
|
+
struct io_uring_sqe {
|
22
|
+
__u8 opcode; /* type of operation for this sqe */
|
23
|
+
__u8 flags; /* IOSQE_ flags */
|
24
|
+
__u16 ioprio; /* ioprio for the request */
|
25
|
+
__s32 fd; /* file descriptor to do IO on */
|
26
|
+
union {
|
27
|
+
__u64 off; /* offset into file */
|
28
|
+
__u64 addr2;
|
29
|
+
};
|
30
|
+
union {
|
31
|
+
__u64 addr; /* pointer to buffer or iovecs */
|
32
|
+
__u64 splice_off_in;
|
33
|
+
};
|
34
|
+
__u32 len; /* buffer size or number of iovecs */
|
35
|
+
union {
|
36
|
+
__kernel_rwf_t rw_flags;
|
37
|
+
__u32 fsync_flags;
|
38
|
+
__u16 poll_events; /* compatibility */
|
39
|
+
__u32 poll32_events; /* word-reversed for BE */
|
40
|
+
__u32 sync_range_flags;
|
41
|
+
__u32 msg_flags;
|
42
|
+
__u32 timeout_flags;
|
43
|
+
__u32 accept_flags;
|
44
|
+
__u32 cancel_flags;
|
45
|
+
__u32 open_flags;
|
46
|
+
__u32 statx_flags;
|
47
|
+
__u32 fadvise_advice;
|
48
|
+
__u32 splice_flags;
|
49
|
+
};
|
50
|
+
__u64 user_data; /* data to be passed back at completion time */
|
51
|
+
union {
|
52
|
+
struct {
|
53
|
+
/* pack this to avoid bogus arm OABI complaints */
|
54
|
+
union {
|
55
|
+
/* index into fixed buffers, if used */
|
56
|
+
__u16 buf_index;
|
57
|
+
/* for grouped buffer selection */
|
58
|
+
__u16 buf_group;
|
59
|
+
} __attribute__((packed));
|
60
|
+
/* personality to use, if used */
|
61
|
+
__u16 personality;
|
62
|
+
__s32 splice_fd_in;
|
63
|
+
};
|
64
|
+
__u64 __pad2[3];
|
65
|
+
};
|
66
|
+
};
|
67
|
+
|
68
|
+
enum {
|
69
|
+
IOSQE_FIXED_FILE_BIT,
|
70
|
+
IOSQE_IO_DRAIN_BIT,
|
71
|
+
IOSQE_IO_LINK_BIT,
|
72
|
+
IOSQE_IO_HARDLINK_BIT,
|
73
|
+
IOSQE_ASYNC_BIT,
|
74
|
+
IOSQE_BUFFER_SELECT_BIT,
|
75
|
+
};
|
76
|
+
|
77
|
+
/*
|
78
|
+
* sqe->flags
|
79
|
+
*/
|
80
|
+
/* use fixed fileset */
|
81
|
+
#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
|
82
|
+
/* issue after inflight IO */
|
83
|
+
#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
|
84
|
+
/* links next sqe */
|
85
|
+
#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
|
86
|
+
/* like LINK, but stronger */
|
87
|
+
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
|
88
|
+
/* always go async */
|
89
|
+
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
|
90
|
+
/* select buffer from sqe->buf_group */
|
91
|
+
#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
|
92
|
+
|
93
|
+
/*
|
94
|
+
* io_uring_setup() flags
|
95
|
+
*/
|
96
|
+
#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
|
97
|
+
#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
|
98
|
+
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
|
99
|
+
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
|
100
|
+
#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
|
101
|
+
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
|
102
|
+
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
|
103
|
+
|
104
|
+
enum {
|
105
|
+
IORING_OP_NOP,
|
106
|
+
IORING_OP_READV,
|
107
|
+
IORING_OP_WRITEV,
|
108
|
+
IORING_OP_FSYNC,
|
109
|
+
IORING_OP_READ_FIXED,
|
110
|
+
IORING_OP_WRITE_FIXED,
|
111
|
+
IORING_OP_POLL_ADD,
|
112
|
+
IORING_OP_POLL_REMOVE,
|
113
|
+
IORING_OP_SYNC_FILE_RANGE,
|
114
|
+
IORING_OP_SENDMSG,
|
115
|
+
IORING_OP_RECVMSG,
|
116
|
+
IORING_OP_TIMEOUT,
|
117
|
+
IORING_OP_TIMEOUT_REMOVE,
|
118
|
+
IORING_OP_ACCEPT,
|
119
|
+
IORING_OP_ASYNC_CANCEL,
|
120
|
+
IORING_OP_LINK_TIMEOUT,
|
121
|
+
IORING_OP_CONNECT,
|
122
|
+
IORING_OP_FALLOCATE,
|
123
|
+
IORING_OP_OPENAT,
|
124
|
+
IORING_OP_CLOSE,
|
125
|
+
IORING_OP_FILES_UPDATE,
|
126
|
+
IORING_OP_STATX,
|
127
|
+
IORING_OP_READ,
|
128
|
+
IORING_OP_WRITE,
|
129
|
+
IORING_OP_FADVISE,
|
130
|
+
IORING_OP_MADVISE,
|
131
|
+
IORING_OP_SEND,
|
132
|
+
IORING_OP_RECV,
|
133
|
+
IORING_OP_OPENAT2,
|
134
|
+
IORING_OP_EPOLL_CTL,
|
135
|
+
IORING_OP_SPLICE,
|
136
|
+
IORING_OP_PROVIDE_BUFFERS,
|
137
|
+
IORING_OP_REMOVE_BUFFERS,
|
138
|
+
IORING_OP_TEE,
|
139
|
+
IORING_OP_SHUTDOWN,
|
140
|
+
|
141
|
+
/* this goes last, obviously */
|
142
|
+
IORING_OP_LAST,
|
143
|
+
};
|
144
|
+
|
145
|
+
/*
|
146
|
+
* sqe->fsync_flags
|
147
|
+
*/
|
148
|
+
#define IORING_FSYNC_DATASYNC (1U << 0)
|
149
|
+
|
150
|
+
/*
|
151
|
+
* sqe->timeout_flags
|
152
|
+
*/
|
153
|
+
#define IORING_TIMEOUT_ABS (1U << 0)
|
154
|
+
|
155
|
+
/*
|
156
|
+
* sqe->splice_flags
|
157
|
+
* extends splice(2) flags
|
158
|
+
*/
|
159
|
+
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
|
160
|
+
|
161
|
+
/*
|
162
|
+
* IO completion data structure (Completion Queue Entry)
|
163
|
+
*/
|
164
|
+
struct io_uring_cqe {
|
165
|
+
__u64 user_data; /* sqe->data submission passed back */
|
166
|
+
__s32 res; /* result code for this event */
|
167
|
+
__u32 flags;
|
168
|
+
};
|
169
|
+
|
170
|
+
/*
|
171
|
+
* cqe->flags
|
172
|
+
*
|
173
|
+
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
|
174
|
+
*/
|
175
|
+
#define IORING_CQE_F_BUFFER (1U << 0)
|
176
|
+
|
177
|
+
enum {
|
178
|
+
IORING_CQE_BUFFER_SHIFT = 16,
|
179
|
+
};
|
180
|
+
|
181
|
+
/*
|
182
|
+
* Magic offsets for the application to mmap the data it needs
|
183
|
+
*/
|
184
|
+
#define IORING_OFF_SQ_RING 0ULL
|
185
|
+
#define IORING_OFF_CQ_RING 0x8000000ULL
|
186
|
+
#define IORING_OFF_SQES 0x10000000ULL
|
187
|
+
|
188
|
+
/*
|
189
|
+
* Filled with the offset for mmap(2)
|
190
|
+
*/
|
191
|
+
struct io_sqring_offsets {
|
192
|
+
__u32 head;
|
193
|
+
__u32 tail;
|
194
|
+
__u32 ring_mask;
|
195
|
+
__u32 ring_entries;
|
196
|
+
__u32 flags;
|
197
|
+
__u32 dropped;
|
198
|
+
__u32 array;
|
199
|
+
__u32 resv1;
|
200
|
+
__u64 resv2;
|
201
|
+
};
|
202
|
+
|
203
|
+
/*
|
204
|
+
* sq_ring->flags
|
205
|
+
*/
|
206
|
+
#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
|
207
|
+
#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
|
208
|
+
|
209
|
+
struct io_cqring_offsets {
|
210
|
+
__u32 head;
|
211
|
+
__u32 tail;
|
212
|
+
__u32 ring_mask;
|
213
|
+
__u32 ring_entries;
|
214
|
+
__u32 overflow;
|
215
|
+
__u32 cqes;
|
216
|
+
__u32 flags;
|
217
|
+
__u32 resv1;
|
218
|
+
__u64 resv2;
|
219
|
+
};
|
220
|
+
|
221
|
+
/*
|
222
|
+
* cq_ring->flags
|
223
|
+
*/
|
224
|
+
|
225
|
+
/* disable eventfd notifications */
|
226
|
+
#define IORING_CQ_EVENTFD_DISABLED (1U << 0)
|
227
|
+
|
228
|
+
/*
|
229
|
+
* io_uring_enter(2) flags
|
230
|
+
*/
|
231
|
+
#define IORING_ENTER_GETEVENTS (1U << 0)
|
232
|
+
#define IORING_ENTER_SQ_WAKEUP (1U << 1)
|
233
|
+
#define IORING_ENTER_SQ_WAIT (1U << 2)
|
234
|
+
|
235
|
+
/*
|
236
|
+
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
237
|
+
*/
|
238
|
+
struct io_uring_params {
|
239
|
+
__u32 sq_entries;
|
240
|
+
__u32 cq_entries;
|
241
|
+
__u32 flags;
|
242
|
+
__u32 sq_thread_cpu;
|
243
|
+
__u32 sq_thread_idle;
|
244
|
+
__u32 features;
|
245
|
+
__u32 wq_fd;
|
246
|
+
__u32 resv[3];
|
247
|
+
struct io_sqring_offsets sq_off;
|
248
|
+
struct io_cqring_offsets cq_off;
|
249
|
+
};
|
250
|
+
|
251
|
+
/*
|
252
|
+
* io_uring_params->features flags
|
253
|
+
*/
|
254
|
+
#define IORING_FEAT_SINGLE_MMAP (1U << 0)
|
255
|
+
#define IORING_FEAT_NODROP (1U << 1)
|
256
|
+
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
|
257
|
+
#define IORING_FEAT_RW_CUR_POS (1U << 3)
|
258
|
+
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
|
259
|
+
#define IORING_FEAT_FAST_POLL (1U << 5)
|
260
|
+
#define IORING_FEAT_POLL_32BITS (1U << 6)
|
261
|
+
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
|
262
|
+
|
263
|
+
/*
|
264
|
+
* io_uring_register(2) opcodes and arguments
|
265
|
+
*/
|
266
|
+
enum {
|
267
|
+
IORING_REGISTER_BUFFERS = 0,
|
268
|
+
IORING_UNREGISTER_BUFFERS = 1,
|
269
|
+
IORING_REGISTER_FILES = 2,
|
270
|
+
IORING_UNREGISTER_FILES = 3,
|
271
|
+
IORING_REGISTER_EVENTFD = 4,
|
272
|
+
IORING_UNREGISTER_EVENTFD = 5,
|
273
|
+
IORING_REGISTER_FILES_UPDATE = 6,
|
274
|
+
IORING_REGISTER_EVENTFD_ASYNC = 7,
|
275
|
+
IORING_REGISTER_PROBE = 8,
|
276
|
+
IORING_REGISTER_PERSONALITY = 9,
|
277
|
+
IORING_UNREGISTER_PERSONALITY = 10,
|
278
|
+
IORING_REGISTER_RESTRICTIONS = 11,
|
279
|
+
IORING_REGISTER_ENABLE_RINGS = 12,
|
280
|
+
|
281
|
+
/* this goes last */
|
282
|
+
IORING_REGISTER_LAST
|
283
|
+
};
|
284
|
+
|
285
|
+
struct io_uring_files_update {
|
286
|
+
__u32 offset;
|
287
|
+
__u32 resv;
|
288
|
+
__aligned_u64 /* __s32 * */ fds;
|
289
|
+
};
|
290
|
+
|
291
|
+
#define IO_URING_OP_SUPPORTED (1U << 0)
|
292
|
+
|
293
|
+
struct io_uring_probe_op {
|
294
|
+
__u8 op;
|
295
|
+
__u8 resv;
|
296
|
+
__u16 flags; /* IO_URING_OP_* flags */
|
297
|
+
__u32 resv2;
|
298
|
+
};
|
299
|
+
|
300
|
+
struct io_uring_probe {
|
301
|
+
__u8 last_op; /* last opcode supported */
|
302
|
+
__u8 ops_len; /* length of ops[] array below */
|
303
|
+
__u16 resv;
|
304
|
+
__u32 resv2[3];
|
305
|
+
struct io_uring_probe_op ops[0];
|
306
|
+
};
|
307
|
+
|
308
|
+
struct io_uring_restriction {
|
309
|
+
__u16 opcode;
|
310
|
+
union {
|
311
|
+
__u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
|
312
|
+
__u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
|
313
|
+
__u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
|
314
|
+
};
|
315
|
+
__u8 resv;
|
316
|
+
__u32 resv2[3];
|
317
|
+
};
|
318
|
+
|
319
|
+
/*
|
320
|
+
* io_uring_restriction->opcode values
|
321
|
+
*/
|
322
|
+
enum {
|
323
|
+
/* Allow an io_uring_register(2) opcode */
|
324
|
+
IORING_RESTRICTION_REGISTER_OP = 0,
|
325
|
+
|
326
|
+
/* Allow an sqe opcode */
|
327
|
+
IORING_RESTRICTION_SQE_OP = 1,
|
328
|
+
|
329
|
+
/* Allow sqe flags */
|
330
|
+
IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,
|
331
|
+
|
332
|
+
/* Require sqe flags (these flags must be set on each submission) */
|
333
|
+
IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,
|
334
|
+
|
335
|
+
IORING_RESTRICTION_LAST
|
336
|
+
};
|
337
|
+
|
338
|
+
|
339
|
+
#ifdef __cplusplus
|
340
|
+
}
|
341
|
+
#endif
|
342
|
+
|
343
|
+
#endif
|
@@ -0,0 +1,333 @@
|
|
1
|
+
/* SPDX-License-Identifier: MIT */
|
2
|
+
#include <sys/types.h>
|
3
|
+
#include <sys/stat.h>
|
4
|
+
#include <sys/mman.h>
|
5
|
+
#include <unistd.h>
|
6
|
+
#include <errno.h>
|
7
|
+
#include <string.h>
|
8
|
+
#include <stdbool.h>
|
9
|
+
|
10
|
+
#include "liburing/compat.h"
|
11
|
+
#include "liburing/io_uring.h"
|
12
|
+
#include "liburing.h"
|
13
|
+
#include "liburing/barrier.h"
|
14
|
+
|
15
|
+
#include "syscall.h"
|
16
|
+
|
17
|
+
/*
|
18
|
+
* Returns true if we're not using SQ thread (thus nobody submits but us)
|
19
|
+
* or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
|
20
|
+
* awakened. For the latter case, we set the thread wakeup flag.
|
21
|
+
*/
|
22
|
+
static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
|
23
|
+
{
|
24
|
+
if (!(ring->flags & IORING_SETUP_SQPOLL))
|
25
|
+
return true;
|
26
|
+
if (IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP) {
|
27
|
+
*flags |= IORING_ENTER_SQ_WAKEUP;
|
28
|
+
return true;
|
29
|
+
}
|
30
|
+
|
31
|
+
return false;
|
32
|
+
}
|
33
|
+
|
34
|
+
static inline bool cq_ring_needs_flush(struct io_uring *ring)
|
35
|
+
{
|
36
|
+
return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW;
|
37
|
+
}
|
38
|
+
|
39
|
+
static int __io_uring_peek_cqe(struct io_uring *ring,
|
40
|
+
struct io_uring_cqe **cqe_ptr)
|
41
|
+
{
|
42
|
+
struct io_uring_cqe *cqe;
|
43
|
+
unsigned head;
|
44
|
+
int err = 0;
|
45
|
+
|
46
|
+
do {
|
47
|
+
io_uring_for_each_cqe(ring, head, cqe)
|
48
|
+
break;
|
49
|
+
if (cqe) {
|
50
|
+
if (cqe->user_data == LIBURING_UDATA_TIMEOUT) {
|
51
|
+
if (cqe->res < 0)
|
52
|
+
err = cqe->res;
|
53
|
+
io_uring_cq_advance(ring, 1);
|
54
|
+
if (!err)
|
55
|
+
continue;
|
56
|
+
cqe = NULL;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
break;
|
60
|
+
} while (1);
|
61
|
+
|
62
|
+
*cqe_ptr = cqe;
|
63
|
+
return err;
|
64
|
+
}
|
65
|
+
|
66
|
+
int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
|
67
|
+
unsigned submit, unsigned wait_nr, sigset_t *sigmask)
|
68
|
+
{
|
69
|
+
struct io_uring_cqe *cqe = NULL;
|
70
|
+
const int to_wait = wait_nr;
|
71
|
+
int ret = 0, err;
|
72
|
+
|
73
|
+
do {
|
74
|
+
bool cq_overflow_flush = false;
|
75
|
+
unsigned flags = 0;
|
76
|
+
|
77
|
+
err = __io_uring_peek_cqe(ring, &cqe);
|
78
|
+
if (err)
|
79
|
+
break;
|
80
|
+
if (!cqe && !to_wait && !submit) {
|
81
|
+
if (!cq_ring_needs_flush(ring)) {
|
82
|
+
err = -EAGAIN;
|
83
|
+
break;
|
84
|
+
}
|
85
|
+
cq_overflow_flush = true;
|
86
|
+
}
|
87
|
+
if (wait_nr && cqe)
|
88
|
+
wait_nr--;
|
89
|
+
if (wait_nr || cq_overflow_flush)
|
90
|
+
flags = IORING_ENTER_GETEVENTS;
|
91
|
+
if (submit)
|
92
|
+
sq_ring_needs_enter(ring, &flags);
|
93
|
+
if (wait_nr || submit || cq_overflow_flush)
|
94
|
+
ret = __sys_io_uring_enter(ring->ring_fd, submit,
|
95
|
+
wait_nr, flags, sigmask);
|
96
|
+
if (ret < 0) {
|
97
|
+
err = -errno;
|
98
|
+
} else if (ret == (int)submit) {
|
99
|
+
submit = 0;
|
100
|
+
/*
|
101
|
+
* When SETUP_IOPOLL is set, __sys_io_uring enter()
|
102
|
+
* must be called to reap new completions but the call
|
103
|
+
* won't be made if both wait_nr and submit are zero
|
104
|
+
* so preserve wait_nr.
|
105
|
+
*/
|
106
|
+
if (!(ring->flags & IORING_SETUP_IOPOLL))
|
107
|
+
wait_nr = 0;
|
108
|
+
} else {
|
109
|
+
submit -= ret;
|
110
|
+
}
|
111
|
+
if (cqe)
|
112
|
+
break;
|
113
|
+
} while (!err);
|
114
|
+
|
115
|
+
*cqe_ptr = cqe;
|
116
|
+
return err;
|
117
|
+
}
|
118
|
+
|
119
|
+
/*
|
120
|
+
* Fill in an array of IO completions up to count, if any are available.
|
121
|
+
* Returns the amount of IO completions filled.
|
122
|
+
*/
|
123
|
+
unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
|
124
|
+
struct io_uring_cqe **cqes, unsigned count)
|
125
|
+
{
|
126
|
+
unsigned ready;
|
127
|
+
bool overflow_checked = false;
|
128
|
+
|
129
|
+
again:
|
130
|
+
ready = io_uring_cq_ready(ring);
|
131
|
+
if (ready) {
|
132
|
+
unsigned head = *ring->cq.khead;
|
133
|
+
unsigned mask = *ring->cq.kring_mask;
|
134
|
+
unsigned last;
|
135
|
+
int i = 0;
|
136
|
+
|
137
|
+
count = count > ready ? ready : count;
|
138
|
+
last = head + count;
|
139
|
+
for (;head != last; head++, i++)
|
140
|
+
cqes[i] = &ring->cq.cqes[head & mask];
|
141
|
+
|
142
|
+
return count;
|
143
|
+
}
|
144
|
+
|
145
|
+
if (overflow_checked)
|
146
|
+
goto done;
|
147
|
+
|
148
|
+
if (cq_ring_needs_flush(ring)) {
|
149
|
+
__sys_io_uring_enter(ring->ring_fd, 0, 0,
|
150
|
+
IORING_ENTER_GETEVENTS, NULL);
|
151
|
+
overflow_checked = true;
|
152
|
+
goto again;
|
153
|
+
}
|
154
|
+
|
155
|
+
done:
|
156
|
+
return 0;
|
157
|
+
}
|
158
|
+
|
159
|
+
/*
|
160
|
+
* Sync internal state with kernel ring state on the SQ side. Returns the
|
161
|
+
* number of pending items in the SQ ring, for the shared ring.
|
162
|
+
*/
|
163
|
+
int __io_uring_flush_sq(struct io_uring *ring)
|
164
|
+
{
|
165
|
+
struct io_uring_sq *sq = &ring->sq;
|
166
|
+
const unsigned mask = *sq->kring_mask;
|
167
|
+
unsigned ktail, to_submit;
|
168
|
+
|
169
|
+
if (sq->sqe_head == sq->sqe_tail) {
|
170
|
+
ktail = *sq->ktail;
|
171
|
+
goto out;
|
172
|
+
}
|
173
|
+
|
174
|
+
/*
|
175
|
+
* Fill in sqes that we have queued up, adding them to the kernel ring
|
176
|
+
*/
|
177
|
+
ktail = *sq->ktail;
|
178
|
+
to_submit = sq->sqe_tail - sq->sqe_head;
|
179
|
+
while (to_submit--) {
|
180
|
+
sq->array[ktail & mask] = sq->sqe_head & mask;
|
181
|
+
ktail++;
|
182
|
+
sq->sqe_head++;
|
183
|
+
}
|
184
|
+
|
185
|
+
/*
|
186
|
+
* Ensure that the kernel sees the SQE updates before it sees the tail
|
187
|
+
* update.
|
188
|
+
*/
|
189
|
+
io_uring_smp_store_release(sq->ktail, ktail);
|
190
|
+
out:
|
191
|
+
return ktail - *sq->khead;
|
192
|
+
}
|
193
|
+
|
194
|
+
/*
|
195
|
+
* Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
|
196
|
+
* that an sqe is used internally to handle the timeout. Applications using
|
197
|
+
* this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
|
198
|
+
*
|
199
|
+
* If 'ts' is specified, the application need not call io_uring_submit() before
|
200
|
+
* calling this function, as we will do that on its behalf. From this it also
|
201
|
+
* follows that this function isn't safe to use for applications that split SQ
|
202
|
+
* and CQ handling between two threads and expect that to work without
|
203
|
+
* synchronization, as this function manipulates both the SQ and CQ side.
|
204
|
+
*/
|
205
|
+
int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
|
206
|
+
unsigned wait_nr, struct __kernel_timespec *ts,
|
207
|
+
sigset_t *sigmask)
|
208
|
+
{
|
209
|
+
unsigned to_submit = 0;
|
210
|
+
|
211
|
+
if (ts) {
|
212
|
+
struct io_uring_sqe *sqe;
|
213
|
+
int ret;
|
214
|
+
|
215
|
+
/*
|
216
|
+
* If the SQ ring is full, we may need to submit IO first
|
217
|
+
*/
|
218
|
+
sqe = io_uring_get_sqe(ring);
|
219
|
+
if (!sqe) {
|
220
|
+
ret = io_uring_submit(ring);
|
221
|
+
if (ret < 0)
|
222
|
+
return ret;
|
223
|
+
sqe = io_uring_get_sqe(ring);
|
224
|
+
if (!sqe)
|
225
|
+
return -EAGAIN;
|
226
|
+
}
|
227
|
+
io_uring_prep_timeout(sqe, ts, wait_nr, 0);
|
228
|
+
sqe->user_data = LIBURING_UDATA_TIMEOUT;
|
229
|
+
to_submit = __io_uring_flush_sq(ring);
|
230
|
+
}
|
231
|
+
|
232
|
+
return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
|
233
|
+
}
|
234
|
+
|
235
|
+
/*
|
236
|
+
* See io_uring_wait_cqes() - this function is the same, it just always uses
|
237
|
+
* '1' as the wait_nr.
|
238
|
+
*/
|
239
|
+
int io_uring_wait_cqe_timeout(struct io_uring *ring,
|
240
|
+
struct io_uring_cqe **cqe_ptr,
|
241
|
+
struct __kernel_timespec *ts)
|
242
|
+
{
|
243
|
+
return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL);
|
244
|
+
}
|
245
|
+
|
246
|
+
/*
|
247
|
+
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
|
248
|
+
*
|
249
|
+
* Returns number of sqes submitted
|
250
|
+
*/
|
251
|
+
static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
|
252
|
+
unsigned wait_nr)
|
253
|
+
{
|
254
|
+
unsigned flags;
|
255
|
+
int ret;
|
256
|
+
|
257
|
+
flags = 0;
|
258
|
+
if (sq_ring_needs_enter(ring, &flags) || wait_nr) {
|
259
|
+
if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL))
|
260
|
+
flags |= IORING_ENTER_GETEVENTS;
|
261
|
+
|
262
|
+
ret = __sys_io_uring_enter(ring->ring_fd, submitted, wait_nr,
|
263
|
+
flags, NULL);
|
264
|
+
if (ret < 0)
|
265
|
+
return -errno;
|
266
|
+
} else
|
267
|
+
ret = submitted;
|
268
|
+
|
269
|
+
return ret;
|
270
|
+
}
|
271
|
+
|
272
|
+
static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
|
273
|
+
{
|
274
|
+
return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr);
|
275
|
+
}
|
276
|
+
|
277
|
+
/*
|
278
|
+
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
|
279
|
+
*
|
280
|
+
* Returns number of sqes submitted
|
281
|
+
*/
|
282
|
+
int io_uring_submit(struct io_uring *ring)
|
283
|
+
{
|
284
|
+
return __io_uring_submit_and_wait(ring, 0);
|
285
|
+
}
|
286
|
+
|
287
|
+
/*
|
288
|
+
* Like io_uring_submit(), but allows waiting for events as well.
|
289
|
+
*
|
290
|
+
* Returns number of sqes submitted
|
291
|
+
*/
|
292
|
+
int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
|
293
|
+
{
|
294
|
+
return __io_uring_submit_and_wait(ring, wait_nr);
|
295
|
+
}
|
296
|
+
|
297
|
+
static inline struct io_uring_sqe *
|
298
|
+
__io_uring_get_sqe(struct io_uring_sq *sq, unsigned int __head)
|
299
|
+
{
|
300
|
+
unsigned int __next = (sq)->sqe_tail + 1;
|
301
|
+
struct io_uring_sqe *__sqe = NULL;
|
302
|
+
|
303
|
+
if (__next - __head <= *(sq)->kring_entries) {
|
304
|
+
__sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask];
|
305
|
+
(sq)->sqe_tail = __next;
|
306
|
+
}
|
307
|
+
return __sqe;
|
308
|
+
}
|
309
|
+
|
310
|
+
/*
|
311
|
+
* Return an sqe to fill. Application must later call io_uring_submit()
|
312
|
+
* when it's ready to tell the kernel about it. The caller may call this
|
313
|
+
* function multiple times before calling io_uring_submit().
|
314
|
+
*
|
315
|
+
* Returns a vacant sqe, or NULL if we're full.
|
316
|
+
*/
|
317
|
+
struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
|
318
|
+
{
|
319
|
+
struct io_uring_sq *sq = &ring->sq;
|
320
|
+
|
321
|
+
return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead));
|
322
|
+
}
|
323
|
+
|
324
|
+
int __io_uring_sqring_wait(struct io_uring *ring)
|
325
|
+
{
|
326
|
+
int ret;
|
327
|
+
|
328
|
+
ret = __sys_io_uring_enter(ring->ring_fd, 0, 0, IORING_ENTER_SQ_WAIT,
|
329
|
+
NULL);
|
330
|
+
if (ret < 0)
|
331
|
+
ret = -errno;
|
332
|
+
return ret;
|
333
|
+
}
|