uringmachine 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +85 -0
- data/TODO.md +5 -0
- data/examples/echo_server.rb +18 -40
- data/examples/inout.rb +19 -0
- data/examples/nc.rb +36 -0
- data/ext/um/extconf.rb +6 -15
- data/ext/um/um.c +245 -53
- data/ext/um/um.h +21 -9
- data/ext/um/um_class.c +74 -87
- data/ext/um/um_const.c +184 -0
- data/ext/um/um_op.c +10 -13
- data/ext/um/um_utils.c +48 -3
- data/lib/uringmachine/version.rb +1 -1
- data/lib/uringmachine.rb +12 -0
- data/test/helper.rb +8 -0
- data/test/test_um.rb +227 -7
- data/vendor/liburing/.github/workflows/build.yml +29 -1
- data/vendor/liburing/.gitignore +1 -0
- data/vendor/liburing/CHANGELOG +15 -0
- data/vendor/liburing/CONTRIBUTING.md +165 -0
- data/vendor/liburing/configure +32 -0
- data/vendor/liburing/examples/Makefile +8 -1
- data/vendor/liburing/examples/kdigest.c +405 -0
- data/vendor/liburing/examples/proxy.c +75 -8
- data/vendor/liburing/liburing.pc.in +1 -1
- data/vendor/liburing/src/Makefile +16 -2
- data/vendor/liburing/src/include/liburing/io_uring.h +31 -0
- data/vendor/liburing/src/include/liburing/sanitize.h +39 -0
- data/vendor/liburing/src/include/liburing.h +31 -4
- data/vendor/liburing/src/liburing-ffi.map +5 -0
- data/vendor/liburing/src/liburing.map +1 -0
- data/vendor/liburing/src/queue.c +3 -0
- data/vendor/liburing/src/register.c +36 -0
- data/vendor/liburing/src/sanitize.c +176 -0
- data/vendor/liburing/src/setup.c +1 -1
- data/vendor/liburing/test/35fa71a030ca.c +7 -0
- data/vendor/liburing/test/500f9fbadef8.c +2 -0
- data/vendor/liburing/test/7ad0e4b2f83c.c +0 -25
- data/vendor/liburing/test/917257daa0fe.c +7 -0
- data/vendor/liburing/test/Makefile +31 -4
- data/vendor/liburing/test/a0908ae19763.c +7 -0
- data/vendor/liburing/test/a4c0b3decb33.c +7 -0
- data/vendor/liburing/test/accept.c +14 -4
- data/vendor/liburing/test/b19062a56726.c +7 -0
- data/vendor/liburing/test/bind-listen.c +2 -2
- data/vendor/liburing/test/buf-ring-nommap.c +10 -3
- data/vendor/liburing/test/buf-ring.c +2 -0
- data/vendor/liburing/test/coredump.c +7 -0
- data/vendor/liburing/test/cq-overflow.c +13 -1
- data/vendor/liburing/test/d4ae271dfaae.c +11 -3
- data/vendor/liburing/test/defer-taskrun.c +2 -2
- data/vendor/liburing/test/defer-tw-timeout.c +4 -1
- data/vendor/liburing/test/defer.c +2 -2
- data/vendor/liburing/test/double-poll-crash.c +1 -1
- data/vendor/liburing/test/eeed8b54e0df.c +2 -0
- data/vendor/liburing/test/eventfd.c +0 -1
- data/vendor/liburing/test/exit-no-cleanup.c +11 -0
- data/vendor/liburing/test/fadvise.c +9 -26
- data/vendor/liburing/test/fdinfo.c +9 -1
- data/vendor/liburing/test/file-register.c +14 -2
- data/vendor/liburing/test/file-update.c +1 -1
- data/vendor/liburing/test/file-verify.c +27 -16
- data/vendor/liburing/test/files-exit-hang-timeout.c +1 -2
- data/vendor/liburing/test/fixed-buf-iter.c +3 -1
- data/vendor/liburing/test/fixed-hugepage.c +12 -1
- data/vendor/liburing/test/fsnotify.c +1 -0
- data/vendor/liburing/test/futex.c +16 -4
- data/vendor/liburing/test/helpers.c +47 -0
- data/vendor/liburing/test/helpers.h +6 -0
- data/vendor/liburing/test/init-mem.c +5 -3
- data/vendor/liburing/test/io-cancel.c +0 -24
- data/vendor/liburing/test/io_uring_passthrough.c +2 -0
- data/vendor/liburing/test/io_uring_register.c +25 -6
- data/vendor/liburing/test/iopoll-leak.c +4 -0
- data/vendor/liburing/test/iopoll-overflow.c +1 -1
- data/vendor/liburing/test/iopoll.c +3 -3
- data/vendor/liburing/test/kallsyms.c +203 -0
- data/vendor/liburing/test/link-timeout.c +159 -0
- data/vendor/liburing/test/linked-defer-close.c +224 -0
- data/vendor/liburing/test/madvise.c +12 -25
- data/vendor/liburing/test/min-timeout-wait.c +0 -25
- data/vendor/liburing/test/min-timeout.c +0 -25
- data/vendor/liburing/test/mkdir.c +6 -0
- data/vendor/liburing/test/msg-ring.c +8 -2
- data/vendor/liburing/test/napi-test.c +15 -2
- data/vendor/liburing/test/no-mmap-inval.c +2 -0
- data/vendor/liburing/test/nop.c +44 -0
- data/vendor/liburing/test/ooo-file-unreg.c +1 -1
- data/vendor/liburing/test/open-close.c +40 -0
- data/vendor/liburing/test/openat2.c +37 -14
- data/vendor/liburing/test/poll-many.c +13 -7
- data/vendor/liburing/test/poll-mshot-update.c +17 -10
- data/vendor/liburing/test/poll-v-poll.c +6 -3
- data/vendor/liburing/test/pollfree.c +148 -0
- data/vendor/liburing/test/read-mshot-empty.c +156 -153
- data/vendor/liburing/test/read-mshot.c +276 -27
- data/vendor/liburing/test/read-write.c +78 -13
- data/vendor/liburing/test/recv-msgall-stream.c +3 -0
- data/vendor/liburing/test/recv-msgall.c +5 -0
- data/vendor/liburing/test/recvsend_bundle-inc.c +680 -0
- data/vendor/liburing/test/recvsend_bundle.c +92 -29
- data/vendor/liburing/test/reg-fd-only.c +14 -4
- data/vendor/liburing/test/regbuf-clone.c +187 -0
- data/vendor/liburing/test/regbuf-merge.c +7 -0
- data/vendor/liburing/test/register-restrictions.c +86 -85
- data/vendor/liburing/test/rename.c +59 -1
- data/vendor/liburing/test/ringbuf-read.c +5 -0
- data/vendor/liburing/test/ringbuf-status.c +5 -1
- data/vendor/liburing/test/runtests.sh +16 -1
- data/vendor/liburing/test/send-zerocopy.c +59 -0
- data/vendor/liburing/test/short-read.c +1 -0
- data/vendor/liburing/test/socket.c +43 -0
- data/vendor/liburing/test/splice.c +3 -1
- data/vendor/liburing/test/sq-poll-dup.c +1 -1
- data/vendor/liburing/test/sq-poll-share.c +2 -0
- data/vendor/liburing/test/sqpoll-disable-exit.c +8 -0
- data/vendor/liburing/test/sqpoll-exit-hang.c +1 -25
- data/vendor/liburing/test/sqpoll-sleep.c +1 -25
- data/vendor/liburing/test/statx.c +89 -0
- data/vendor/liburing/test/stdout.c +2 -0
- data/vendor/liburing/test/submit-and-wait.c +1 -25
- data/vendor/liburing/test/submit-reuse.c +4 -26
- data/vendor/liburing/test/symlink.c +12 -1
- data/vendor/liburing/test/sync-cancel.c +48 -21
- data/vendor/liburing/test/thread-exit.c +5 -0
- data/vendor/liburing/test/timeout-new.c +1 -26
- data/vendor/liburing/test/timeout.c +12 -26
- data/vendor/liburing/test/unlink.c +94 -1
- data/vendor/liburing/test/uring_cmd_ublk.c +1252 -0
- data/vendor/liburing/test/waitid.c +62 -8
- data/vendor/liburing/test/wq-aff.c +35 -0
- data/vendor/liburing/test/xfail_prep_link_timeout_out_of_scope.c +46 -0
- data/vendor/liburing/test/xfail_register_buffers_out_of_scope.c +51 -0
- metadata +17 -4
- data/examples/event_loop.rb +0 -69
- data/examples/fibers.rb +0 -105
@@ -0,0 +1,1252 @@
|
|
1
|
+
/* SPDX-License-Identifier: MIT */
|
2
|
+
/*
|
3
|
+
* Description: uring_cmd based ublk
|
4
|
+
*
|
5
|
+
* Covers cancellable uring_cmd feature.
|
6
|
+
*/
|
7
|
+
#include <unistd.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <assert.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <stdarg.h>
|
12
|
+
#include <string.h>
|
13
|
+
#include <pthread.h>
|
14
|
+
#include <limits.h>
|
15
|
+
#include <poll.h>
|
16
|
+
#include <sys/syscall.h>
|
17
|
+
#include <sys/mman.h>
|
18
|
+
#include <sys/ioctl.h>
|
19
|
+
#include <sys/inotify.h>
|
20
|
+
#include <sys/wait.h>
|
21
|
+
|
22
|
+
#include "liburing.h"
|
23
|
+
#include "helpers.h"
|
24
|
+
#ifdef CONFIG_HAVE_UBLK_HEADER
|
25
|
+
#include <linux/ublk_cmd.h>
|
26
|
+
|
27
|
+
/****************** part 1: libublk ********************/
|
28
|
+
|
29
|
+
#define CTRL_DEV "/dev/ublk-control"
|
30
|
+
#define UBLKC_DEV "/dev/ublkc"
|
31
|
+
#define UBLKB_DEV "/dev/ublkb"
|
32
|
+
#define UBLK_CTRL_RING_DEPTH 32
|
33
|
+
|
34
|
+
/* queue idle timeout */
|
35
|
+
#define UBLKSRV_IO_IDLE_SECS 20
|
36
|
+
|
37
|
+
#define UBLK_IO_MAX_BYTES 65536
|
38
|
+
#define UBLK_MAX_QUEUES 4
|
39
|
+
#define UBLK_QUEUE_DEPTH 128
|
40
|
+
|
41
|
+
#define UBLK_DBG_DEV (1U << 0)
|
42
|
+
#define UBLK_DBG_QUEUE (1U << 1)
|
43
|
+
#define UBLK_DBG_IO_CMD (1U << 2)
|
44
|
+
#define UBLK_DBG_IO (1U << 3)
|
45
|
+
#define UBLK_DBG_CTRL_CMD (1U << 4)
|
46
|
+
#define UBLK_LOG (1U << 5)
|
47
|
+
|
48
|
+
struct ublk_dev;
|
49
|
+
struct ublk_queue;
|
50
|
+
|
51
|
+
struct ublk_ctrl_cmd_data {
|
52
|
+
__u32 cmd_op;
|
53
|
+
#define CTRL_CMD_HAS_DATA 1
|
54
|
+
#define CTRL_CMD_HAS_BUF 2
|
55
|
+
__u32 flags;
|
56
|
+
|
57
|
+
__u64 data[2];
|
58
|
+
__u64 addr;
|
59
|
+
__u32 len;
|
60
|
+
};
|
61
|
+
|
62
|
+
struct ublk_io {
|
63
|
+
char *buf_addr;
|
64
|
+
|
65
|
+
#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
|
66
|
+
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
|
67
|
+
#define UBLKSRV_IO_FREE (1UL << 2)
|
68
|
+
unsigned int flags;
|
69
|
+
|
70
|
+
unsigned int result;
|
71
|
+
};
|
72
|
+
|
73
|
+
struct ublk_tgt_ops {
|
74
|
+
const char *name;
|
75
|
+
int (*init_tgt)(struct ublk_dev *);
|
76
|
+
void (*deinit_tgt)(struct ublk_dev *);
|
77
|
+
|
78
|
+
int (*queue_io)(struct ublk_queue *, int tag);
|
79
|
+
void (*tgt_io_done)(struct ublk_queue *,
|
80
|
+
int tag, const struct io_uring_cqe *);
|
81
|
+
};
|
82
|
+
|
83
|
+
struct ublk_tgt {
|
84
|
+
unsigned long dev_size;
|
85
|
+
const struct ublk_tgt_ops *ops;
|
86
|
+
struct ublk_params params;
|
87
|
+
};
|
88
|
+
|
89
|
+
struct ublk_queue {
|
90
|
+
int q_id;
|
91
|
+
int q_depth;
|
92
|
+
unsigned int cmd_inflight;
|
93
|
+
unsigned int io_inflight;
|
94
|
+
struct ublk_dev *dev;
|
95
|
+
const struct ublk_tgt_ops *tgt_ops;
|
96
|
+
char *io_cmd_buf;
|
97
|
+
struct io_uring ring;
|
98
|
+
struct ublk_io ios[UBLK_QUEUE_DEPTH];
|
99
|
+
#define UBLKSRV_QUEUE_STOPPING (1U << 0)
|
100
|
+
#define UBLKSRV_QUEUE_IDLE (1U << 1)
|
101
|
+
unsigned state;
|
102
|
+
pid_t tid;
|
103
|
+
pthread_t thread;
|
104
|
+
};
|
105
|
+
|
106
|
+
struct ublk_dev {
|
107
|
+
struct ublk_tgt tgt;
|
108
|
+
struct ublksrv_ctrl_dev_info dev_info;
|
109
|
+
struct ublk_queue q[UBLK_MAX_QUEUES];
|
110
|
+
|
111
|
+
int fds[2]; /* fds[0] points to /dev/ublkcN */
|
112
|
+
int nr_fds;
|
113
|
+
int ctrl_fd;
|
114
|
+
struct io_uring ring;
|
115
|
+
};
|
116
|
+
|
117
|
+
#ifndef offsetof
|
118
|
+
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
|
119
|
+
#endif
|
120
|
+
|
121
|
+
#ifndef container_of
|
122
|
+
#define container_of(ptr, type, member) ({ \
|
123
|
+
unsigned long __mptr = (unsigned long)(ptr); \
|
124
|
+
((type *)(__mptr - offsetof(type, member))); })
|
125
|
+
#endif
|
126
|
+
|
127
|
+
#define round_up(val, rnd) \
|
128
|
+
(((val) + ((rnd) - 1)) & ~((rnd) - 1))
|
129
|
+
|
130
|
+
static unsigned int ublk_dbg_mask = 0;
|
131
|
+
|
132
|
+
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
|
133
|
+
|
134
|
+
static inline int is_target_io(__u64 user_data)
|
135
|
+
{
|
136
|
+
return (user_data & (1ULL << 63)) != 0;
|
137
|
+
}
|
138
|
+
|
139
|
+
static inline __u64 build_user_data(unsigned tag, unsigned op,
|
140
|
+
unsigned tgt_data, unsigned is_target_io)
|
141
|
+
{
|
142
|
+
assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
|
143
|
+
|
144
|
+
return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
|
145
|
+
}
|
146
|
+
|
147
|
+
static inline unsigned int user_data_to_tag(__u64 user_data)
|
148
|
+
{
|
149
|
+
return user_data & 0xffff;
|
150
|
+
}
|
151
|
+
|
152
|
+
static inline unsigned int user_data_to_op(__u64 user_data)
|
153
|
+
{
|
154
|
+
return (user_data >> 16) & 0xff;
|
155
|
+
}
|
156
|
+
|
157
|
+
static void ublk_err(const char *fmt, ...)
|
158
|
+
{
|
159
|
+
va_list ap;
|
160
|
+
|
161
|
+
va_start(ap, fmt);
|
162
|
+
vfprintf(stderr, fmt, ap);
|
163
|
+
}
|
164
|
+
|
165
|
+
static void ublk_dbg(int level, const char *fmt, ...)
|
166
|
+
{
|
167
|
+
if (level & ublk_dbg_mask) {
|
168
|
+
va_list ap;
|
169
|
+
va_start(ap, fmt);
|
170
|
+
vfprintf(stdout, fmt, ap);
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
|
175
|
+
{
|
176
|
+
return (void *)&sqe->cmd;
|
177
|
+
}
|
178
|
+
|
179
|
+
static inline void ublk_mark_io_done(struct ublk_io *io, int res)
|
180
|
+
{
|
181
|
+
io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
|
182
|
+
io->result = res;
|
183
|
+
}
|
184
|
+
|
185
|
+
static inline const struct ublksrv_io_desc *ublk_get_iod(
|
186
|
+
const struct ublk_queue *q, int tag)
|
187
|
+
{
|
188
|
+
return (struct ublksrv_io_desc *)
|
189
|
+
&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
|
190
|
+
}
|
191
|
+
|
192
|
+
static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
|
193
|
+
__u32 cmd_op)
|
194
|
+
{
|
195
|
+
__u32 *addr = (__u32 *)&sqe->off;
|
196
|
+
|
197
|
+
addr[0] = cmd_op;
|
198
|
+
addr[1] = 0;
|
199
|
+
}
|
200
|
+
|
201
|
+
static inline int ublk_setup_ring(struct io_uring *r, int depth,
|
202
|
+
int cq_depth, unsigned flags)
|
203
|
+
{
|
204
|
+
struct io_uring_params p;
|
205
|
+
|
206
|
+
memset(&p, 0, sizeof(p));
|
207
|
+
p.flags = flags | IORING_SETUP_CQSIZE;
|
208
|
+
p.cq_entries = cq_depth;
|
209
|
+
|
210
|
+
return io_uring_queue_init_params(depth, r, &p);
|
211
|
+
}
|
212
|
+
|
213
|
+
static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
|
214
|
+
struct io_uring_sqe *sqe,
|
215
|
+
struct ublk_ctrl_cmd_data *data)
|
216
|
+
{
|
217
|
+
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
|
218
|
+
struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
|
219
|
+
|
220
|
+
sqe->fd = dev->ctrl_fd;
|
221
|
+
sqe->opcode = IORING_OP_URING_CMD;
|
222
|
+
sqe->ioprio = 0;
|
223
|
+
|
224
|
+
if (data->flags & CTRL_CMD_HAS_BUF) {
|
225
|
+
cmd->addr = data->addr;
|
226
|
+
cmd->len = data->len;
|
227
|
+
}
|
228
|
+
|
229
|
+
if (data->flags & CTRL_CMD_HAS_DATA)
|
230
|
+
cmd->data[0] = data->data[0];
|
231
|
+
|
232
|
+
cmd->dev_id = info->dev_id;
|
233
|
+
cmd->queue_id = -1;
|
234
|
+
|
235
|
+
ublk_set_sqe_cmd_op(sqe, data->cmd_op);
|
236
|
+
|
237
|
+
io_uring_sqe_set_data(sqe, cmd);
|
238
|
+
}
|
239
|
+
|
240
|
+
static int __ublk_ctrl_cmd(struct ublk_dev *dev,
|
241
|
+
struct ublk_ctrl_cmd_data *data)
|
242
|
+
{
|
243
|
+
struct io_uring_sqe *sqe;
|
244
|
+
struct io_uring_cqe *cqe;
|
245
|
+
int ret = -EINVAL;
|
246
|
+
|
247
|
+
sqe = io_uring_get_sqe(&dev->ring);
|
248
|
+
if (!sqe) {
|
249
|
+
ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
|
250
|
+
return ret;
|
251
|
+
}
|
252
|
+
|
253
|
+
ublk_ctrl_init_cmd(dev, sqe, data);
|
254
|
+
|
255
|
+
ret = io_uring_submit(&dev->ring);
|
256
|
+
if (ret < 0) {
|
257
|
+
ublk_err("uring submit ret %d\n", ret);
|
258
|
+
return ret;
|
259
|
+
}
|
260
|
+
|
261
|
+
ret = io_uring_wait_cqe(&dev->ring, &cqe);
|
262
|
+
if (ret < 0) {
|
263
|
+
ublk_err("wait cqe: %s\n", strerror(-ret));
|
264
|
+
return ret;
|
265
|
+
}
|
266
|
+
io_uring_cqe_seen(&dev->ring, cqe);
|
267
|
+
|
268
|
+
return cqe->res;
|
269
|
+
}
|
270
|
+
|
271
|
+
static int ublk_ctrl_start_dev(struct ublk_dev *dev,
|
272
|
+
int daemon_pid)
|
273
|
+
{
|
274
|
+
struct ublk_ctrl_cmd_data data = {
|
275
|
+
.cmd_op = UBLK_U_CMD_START_DEV,
|
276
|
+
.flags = CTRL_CMD_HAS_DATA,
|
277
|
+
};
|
278
|
+
|
279
|
+
dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
|
280
|
+
|
281
|
+
return __ublk_ctrl_cmd(dev, &data);
|
282
|
+
}
|
283
|
+
|
284
|
+
static int ublk_ctrl_add_dev(struct ublk_dev *dev)
|
285
|
+
{
|
286
|
+
struct ublk_ctrl_cmd_data data = {
|
287
|
+
.cmd_op = UBLK_U_CMD_ADD_DEV,
|
288
|
+
.flags = CTRL_CMD_HAS_BUF,
|
289
|
+
.addr = (__u64) (uintptr_t) &dev->dev_info,
|
290
|
+
.len = sizeof(struct ublksrv_ctrl_dev_info),
|
291
|
+
};
|
292
|
+
|
293
|
+
return __ublk_ctrl_cmd(dev, &data);
|
294
|
+
}
|
295
|
+
|
296
|
+
static int ublk_ctrl_del_dev(struct ublk_dev *dev)
|
297
|
+
{
|
298
|
+
struct ublk_ctrl_cmd_data data = {
|
299
|
+
.cmd_op = UBLK_U_CMD_DEL_DEV,
|
300
|
+
.flags = 0,
|
301
|
+
};
|
302
|
+
|
303
|
+
return __ublk_ctrl_cmd(dev, &data);
|
304
|
+
}
|
305
|
+
|
306
|
+
static int ublk_ctrl_get_info(struct ublk_dev *dev)
|
307
|
+
{
|
308
|
+
struct ublk_ctrl_cmd_data data = {
|
309
|
+
.cmd_op = UBLK_U_CMD_GET_DEV_INFO,
|
310
|
+
.flags = CTRL_CMD_HAS_BUF,
|
311
|
+
.addr = (__u64) (uintptr_t) &dev->dev_info,
|
312
|
+
.len = sizeof(struct ublksrv_ctrl_dev_info),
|
313
|
+
};
|
314
|
+
|
315
|
+
return __ublk_ctrl_cmd(dev, &data);
|
316
|
+
}
|
317
|
+
|
318
|
+
static int ublk_ctrl_set_params(struct ublk_dev *dev,
|
319
|
+
struct ublk_params *params)
|
320
|
+
{
|
321
|
+
struct ublk_ctrl_cmd_data data = {
|
322
|
+
.cmd_op = UBLK_U_CMD_SET_PARAMS,
|
323
|
+
.flags = CTRL_CMD_HAS_BUF,
|
324
|
+
.addr = (__u64) (uintptr_t) params,
|
325
|
+
.len = sizeof(*params),
|
326
|
+
};
|
327
|
+
params->len = sizeof(*params);
|
328
|
+
return __ublk_ctrl_cmd(dev, &data);
|
329
|
+
}
|
330
|
+
|
331
|
+
static int ublk_ctrl_get_features(struct ublk_dev *dev,
|
332
|
+
__u64 *features)
|
333
|
+
{
|
334
|
+
struct ublk_ctrl_cmd_data data = {
|
335
|
+
.cmd_op = UBLK_U_CMD_GET_FEATURES,
|
336
|
+
.flags = CTRL_CMD_HAS_BUF,
|
337
|
+
.addr = (__u64) (uintptr_t) features,
|
338
|
+
.len = sizeof(*features),
|
339
|
+
};
|
340
|
+
|
341
|
+
return __ublk_ctrl_cmd(dev, &data);
|
342
|
+
}
|
343
|
+
|
344
|
+
static void ublk_ctrl_deinit(struct ublk_dev *dev)
|
345
|
+
{
|
346
|
+
close(dev->ctrl_fd);
|
347
|
+
free(dev);
|
348
|
+
}
|
349
|
+
|
350
|
+
static struct ublk_dev *ublk_ctrl_init(void)
|
351
|
+
{
|
352
|
+
struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
|
353
|
+
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
|
354
|
+
int ret;
|
355
|
+
|
356
|
+
dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
|
357
|
+
if (dev->ctrl_fd < 0) {
|
358
|
+
free(dev);
|
359
|
+
return NULL;
|
360
|
+
}
|
361
|
+
|
362
|
+
info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
|
363
|
+
|
364
|
+
ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
|
365
|
+
UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
|
366
|
+
if (ret < 0) {
|
367
|
+
ublk_err("queue_init: %s\n", strerror(-ret));
|
368
|
+
free(dev);
|
369
|
+
return NULL;
|
370
|
+
}
|
371
|
+
dev->nr_fds = 1;
|
372
|
+
|
373
|
+
return dev;
|
374
|
+
}
|
375
|
+
|
376
|
+
static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
|
377
|
+
{
|
378
|
+
int size = q->q_depth * sizeof(struct ublksrv_io_desc);
|
379
|
+
unsigned int page_sz = getpagesize();
|
380
|
+
|
381
|
+
return round_up(size, page_sz);
|
382
|
+
}
|
383
|
+
|
384
|
+
static void ublk_queue_deinit(struct ublk_queue *q)
|
385
|
+
{
|
386
|
+
int i;
|
387
|
+
int nr_ios = q->q_depth;
|
388
|
+
|
389
|
+
io_uring_unregister_ring_fd(&q->ring);
|
390
|
+
|
391
|
+
if (q->ring.ring_fd > 0) {
|
392
|
+
io_uring_unregister_files(&q->ring);
|
393
|
+
close(q->ring.ring_fd);
|
394
|
+
q->ring.ring_fd = -1;
|
395
|
+
}
|
396
|
+
|
397
|
+
if (q->io_cmd_buf)
|
398
|
+
munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
|
399
|
+
|
400
|
+
for (i = 0; i < nr_ios; i++)
|
401
|
+
free(q->ios[i].buf_addr);
|
402
|
+
}
|
403
|
+
|
404
|
+
static int ublk_queue_init(struct ublk_queue *q)
|
405
|
+
{
|
406
|
+
struct ublk_dev *dev = q->dev;
|
407
|
+
int depth = dev->dev_info.queue_depth;
|
408
|
+
int i, ret = -1;
|
409
|
+
int cmd_buf_size, io_buf_size;
|
410
|
+
unsigned long off;
|
411
|
+
int ring_depth = depth, cq_depth = depth;
|
412
|
+
|
413
|
+
q->tgt_ops = dev->tgt.ops;
|
414
|
+
q->state = 0;
|
415
|
+
q->q_depth = depth;
|
416
|
+
q->cmd_inflight = 0;
|
417
|
+
q->tid = gettid();
|
418
|
+
|
419
|
+
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
|
420
|
+
off = UBLKSRV_CMD_BUF_OFFSET +
|
421
|
+
q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
|
422
|
+
q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
|
423
|
+
MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
|
424
|
+
if (q->io_cmd_buf == MAP_FAILED) {
|
425
|
+
ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
|
426
|
+
q->dev->dev_info.dev_id, q->q_id);
|
427
|
+
goto fail;
|
428
|
+
}
|
429
|
+
|
430
|
+
io_buf_size = dev->dev_info.max_io_buf_bytes;
|
431
|
+
for (i = 0; i < q->q_depth; i++) {
|
432
|
+
q->ios[i].buf_addr = NULL;
|
433
|
+
|
434
|
+
if (posix_memalign((void **)&q->ios[i].buf_addr,
|
435
|
+
getpagesize(), io_buf_size)) {
|
436
|
+
ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
|
437
|
+
dev->dev_info.dev_id, q->q_id, i);
|
438
|
+
goto fail;
|
439
|
+
}
|
440
|
+
q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
|
441
|
+
}
|
442
|
+
|
443
|
+
ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
|
444
|
+
IORING_SETUP_COOP_TASKRUN);
|
445
|
+
if (ret < 0) {
|
446
|
+
ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
|
447
|
+
q->dev->dev_info.dev_id, q->q_id, ret);
|
448
|
+
goto fail;
|
449
|
+
}
|
450
|
+
|
451
|
+
io_uring_register_ring_fd(&q->ring);
|
452
|
+
|
453
|
+
ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
|
454
|
+
if (ret) {
|
455
|
+
ublk_err("ublk dev %d queue %d register files failed %d\n",
|
456
|
+
q->dev->dev_info.dev_id, q->q_id, ret);
|
457
|
+
goto fail;
|
458
|
+
}
|
459
|
+
|
460
|
+
return 0;
|
461
|
+
fail:
|
462
|
+
ublk_queue_deinit(q);
|
463
|
+
ublk_err("ublk dev %d queue %d failed\n",
|
464
|
+
dev->dev_info.dev_id, q->q_id);
|
465
|
+
return -ENOMEM;
|
466
|
+
}
|
467
|
+
|
468
|
+
static int ublk_dev_prep(struct ublk_dev *dev)
|
469
|
+
{
|
470
|
+
int dev_id = dev->dev_info.dev_id;
|
471
|
+
char buf[64];
|
472
|
+
int ret = 0;
|
473
|
+
|
474
|
+
snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
|
475
|
+
dev->fds[0] = open(buf, O_RDWR);
|
476
|
+
if (dev->fds[0] < 0) {
|
477
|
+
ret = -EBADF;
|
478
|
+
ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
|
479
|
+
goto fail;
|
480
|
+
}
|
481
|
+
|
482
|
+
if (dev->tgt.ops->init_tgt)
|
483
|
+
ret = dev->tgt.ops->init_tgt(dev);
|
484
|
+
|
485
|
+
return ret;
|
486
|
+
fail:
|
487
|
+
close(dev->fds[0]);
|
488
|
+
return ret;
|
489
|
+
}
|
490
|
+
|
491
|
+
static void ublk_dev_unprep(struct ublk_dev *dev)
|
492
|
+
{
|
493
|
+
if (dev->tgt.ops->deinit_tgt)
|
494
|
+
dev->tgt.ops->deinit_tgt(dev);
|
495
|
+
close(dev->fds[0]);
|
496
|
+
}
|
497
|
+
|
498
|
+
static int ublk_queue_io_cmd(struct ublk_queue *q,
|
499
|
+
struct ublk_io *io, unsigned tag)
|
500
|
+
{
|
501
|
+
struct ublksrv_io_cmd *cmd;
|
502
|
+
struct io_uring_sqe *sqe;
|
503
|
+
unsigned int cmd_op = 0;
|
504
|
+
__u64 user_data;
|
505
|
+
|
506
|
+
/* only freed io can be issued */
|
507
|
+
if (!(io->flags & UBLKSRV_IO_FREE))
|
508
|
+
return 0;
|
509
|
+
|
510
|
+
/* we issue because we need either fetching or committing */
|
511
|
+
if (!(io->flags &
|
512
|
+
(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
|
513
|
+
return 0;
|
514
|
+
|
515
|
+
if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
|
516
|
+
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
|
517
|
+
else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
|
518
|
+
cmd_op = UBLK_U_IO_FETCH_REQ;
|
519
|
+
|
520
|
+
sqe = io_uring_get_sqe(&q->ring);
|
521
|
+
if (!sqe) {
|
522
|
+
ublk_err("%s: run out of sqe %d, tag %d\n",
|
523
|
+
__func__, q->q_id, tag);
|
524
|
+
return -1;
|
525
|
+
}
|
526
|
+
|
527
|
+
cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
|
528
|
+
|
529
|
+
if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
|
530
|
+
cmd->result = io->result;
|
531
|
+
|
532
|
+
/* These fields should be written once, never change */
|
533
|
+
ublk_set_sqe_cmd_op(sqe, cmd_op);
|
534
|
+
sqe->fd = 0; /* dev->fds[0] */
|
535
|
+
sqe->opcode = IORING_OP_URING_CMD;
|
536
|
+
sqe->flags = IOSQE_FIXED_FILE;
|
537
|
+
sqe->rw_flags = 0;
|
538
|
+
cmd->tag = tag;
|
539
|
+
cmd->addr = (__u64) (uintptr_t) io->buf_addr;
|
540
|
+
cmd->q_id = q->q_id;
|
541
|
+
|
542
|
+
user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
|
543
|
+
io_uring_sqe_set_data64(sqe, user_data);
|
544
|
+
|
545
|
+
io->flags = 0;
|
546
|
+
|
547
|
+
q->cmd_inflight += 1;
|
548
|
+
|
549
|
+
ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
|
550
|
+
__func__, q->q_id, tag, cmd_op,
|
551
|
+
io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
|
552
|
+
return 1;
|
553
|
+
}
|
554
|
+
|
555
|
+
static int ublk_complete_io(struct ublk_queue *q,
|
556
|
+
unsigned tag, int res)
|
557
|
+
{
|
558
|
+
struct ublk_io *io = &q->ios[tag];
|
559
|
+
|
560
|
+
ublk_mark_io_done(io, res);
|
561
|
+
|
562
|
+
return ublk_queue_io_cmd(q, io, tag);
|
563
|
+
}
|
564
|
+
|
565
|
+
static void ublk_submit_fetch_commands(struct ublk_queue *q)
|
566
|
+
{
|
567
|
+
int i = 0;
|
568
|
+
|
569
|
+
for (i = 0; i < q->q_depth; i++)
|
570
|
+
ublk_queue_io_cmd(q, &q->ios[i], i);
|
571
|
+
}
|
572
|
+
|
573
|
+
static int ublk_queue_is_idle(struct ublk_queue *q)
|
574
|
+
{
|
575
|
+
return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
|
576
|
+
}
|
577
|
+
|
578
|
+
static int ublk_queue_is_done(struct ublk_queue *q)
|
579
|
+
{
|
580
|
+
return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
|
581
|
+
}
|
582
|
+
|
583
|
+
static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
|
584
|
+
struct io_uring_cqe *cqe)
|
585
|
+
{
|
586
|
+
unsigned tag = user_data_to_tag(cqe->user_data);
|
587
|
+
|
588
|
+
if (cqe->res < 0 && cqe->res != -EAGAIN)
|
589
|
+
ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
|
590
|
+
__func__, cqe->res, q->q_id,
|
591
|
+
user_data_to_tag(cqe->user_data),
|
592
|
+
user_data_to_op(cqe->user_data));
|
593
|
+
|
594
|
+
if (q->tgt_ops->tgt_io_done)
|
595
|
+
q->tgt_ops->tgt_io_done(q, tag, cqe);
|
596
|
+
}
|
597
|
+
|
598
|
+
static void ublk_handle_cqe(struct io_uring *r,
|
599
|
+
struct io_uring_cqe *cqe, void *data)
|
600
|
+
{
|
601
|
+
struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
|
602
|
+
unsigned tag = user_data_to_tag(cqe->user_data);
|
603
|
+
unsigned cmd_op = user_data_to_op(cqe->user_data);
|
604
|
+
int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
|
605
|
+
!(q->state & UBLKSRV_QUEUE_STOPPING);
|
606
|
+
struct ublk_io *io;
|
607
|
+
|
608
|
+
ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
|
609
|
+
__func__, cqe->res, q->q_id, tag, cmd_op,
|
610
|
+
is_target_io(cqe->user_data),
|
611
|
+
(q->state & UBLKSRV_QUEUE_STOPPING));
|
612
|
+
|
613
|
+
/* Don't retrieve io in case of target io */
|
614
|
+
if (is_target_io(cqe->user_data)) {
|
615
|
+
ublksrv_handle_tgt_cqe(q, cqe);
|
616
|
+
return;
|
617
|
+
}
|
618
|
+
|
619
|
+
io = &q->ios[tag];
|
620
|
+
q->cmd_inflight--;
|
621
|
+
|
622
|
+
if (!fetch) {
|
623
|
+
q->state |= UBLKSRV_QUEUE_STOPPING;
|
624
|
+
io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
|
625
|
+
}
|
626
|
+
|
627
|
+
if (cqe->res == UBLK_IO_RES_OK) {
|
628
|
+
assert(tag < q->q_depth);
|
629
|
+
q->tgt_ops->queue_io(q, tag);
|
630
|
+
} else {
|
631
|
+
/*
|
632
|
+
* COMMIT_REQ will be completed immediately since no fetching
|
633
|
+
* piggyback is required.
|
634
|
+
*
|
635
|
+
* Marking IO_FREE only, then this io won't be issued since
|
636
|
+
* we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
|
637
|
+
*
|
638
|
+
* */
|
639
|
+
io->flags = UBLKSRV_IO_FREE;
|
640
|
+
}
|
641
|
+
}
|
642
|
+
|
643
|
+
static int ublk_reap_events_uring(struct io_uring *r)
|
644
|
+
{
|
645
|
+
struct io_uring_cqe *cqe;
|
646
|
+
unsigned head;
|
647
|
+
int count = 0;
|
648
|
+
|
649
|
+
io_uring_for_each_cqe(r, head, cqe) {
|
650
|
+
ublk_handle_cqe(r, cqe, NULL);
|
651
|
+
count += 1;
|
652
|
+
}
|
653
|
+
io_uring_cq_advance(r, count);
|
654
|
+
|
655
|
+
return count;
|
656
|
+
}
|
657
|
+
|
658
|
+
static int ublk_process_io(struct ublk_queue *q)
|
659
|
+
{
|
660
|
+
int ret, reapped;
|
661
|
+
|
662
|
+
ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
|
663
|
+
q->dev->dev_info.dev_id,
|
664
|
+
q->q_id, io_uring_sq_ready(&q->ring),
|
665
|
+
q->cmd_inflight,
|
666
|
+
(q->state & UBLKSRV_QUEUE_STOPPING));
|
667
|
+
|
668
|
+
if (ublk_queue_is_done(q))
|
669
|
+
return -ENODEV;
|
670
|
+
|
671
|
+
ret = io_uring_submit_and_wait(&q->ring, 1);
|
672
|
+
reapped = ublk_reap_events_uring(&q->ring);
|
673
|
+
|
674
|
+
ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
|
675
|
+
ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
|
676
|
+
(q->state & UBLKSRV_QUEUE_IDLE));
|
677
|
+
|
678
|
+
return reapped;
|
679
|
+
}
|
680
|
+
|
681
|
+
static void *ublk_io_handler_fn(void *data)
|
682
|
+
{
|
683
|
+
struct ublk_queue *q = data;
|
684
|
+
int dev_id = q->dev->dev_info.dev_id;
|
685
|
+
int ret;
|
686
|
+
|
687
|
+
ret = ublk_queue_init(q);
|
688
|
+
if (ret) {
|
689
|
+
ublk_err("ublk dev %d queue %d init queue failed\n",
|
690
|
+
dev_id, q->q_id);
|
691
|
+
return NULL;
|
692
|
+
}
|
693
|
+
ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
|
694
|
+
q->tid, dev_id, q->q_id);
|
695
|
+
|
696
|
+
/* submit all io commands to ublk driver */
|
697
|
+
ublk_submit_fetch_commands(q);
|
698
|
+
do {
|
699
|
+
if (ublk_process_io(q) < 0)
|
700
|
+
break;
|
701
|
+
} while (1);
|
702
|
+
|
703
|
+
ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
|
704
|
+
ublk_queue_deinit(q);
|
705
|
+
return NULL;
|
706
|
+
}
|
707
|
+
|
708
|
+
static void ublk_set_parameters(struct ublk_dev *dev)
|
709
|
+
{
|
710
|
+
int ret;
|
711
|
+
|
712
|
+
ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
|
713
|
+
if (ret)
|
714
|
+
ublk_err("dev %d set basic parameter failed %d\n",
|
715
|
+
dev->dev_info.dev_id, ret);
|
716
|
+
}
|
717
|
+
|
718
|
+
static int ublk_start_daemon(struct ublk_dev *dev)
|
719
|
+
{
|
720
|
+
int ret, i;
|
721
|
+
void *thread_ret;
|
722
|
+
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
|
723
|
+
|
724
|
+
if (daemon(1, 1) < 0)
|
725
|
+
return -errno;
|
726
|
+
|
727
|
+
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
|
728
|
+
|
729
|
+
ret = ublk_dev_prep(dev);
|
730
|
+
if (ret)
|
731
|
+
return ret;
|
732
|
+
|
733
|
+
for (i = 0; i < dinfo->nr_hw_queues; i++) {
|
734
|
+
dev->q[i].dev = dev;
|
735
|
+
dev->q[i].q_id = i;
|
736
|
+
pthread_create(&dev->q[i].thread, NULL,
|
737
|
+
ublk_io_handler_fn,
|
738
|
+
&dev->q[i]);
|
739
|
+
}
|
740
|
+
|
741
|
+
/* everything is fine now, start us */
|
742
|
+
ublk_set_parameters(dev);
|
743
|
+
ret = ublk_ctrl_start_dev(dev, getpid());
|
744
|
+
if (ret < 0) {
|
745
|
+
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
|
746
|
+
goto fail;
|
747
|
+
}
|
748
|
+
|
749
|
+
/* wait until we are terminated */
|
750
|
+
for (i = 0; i < dinfo->nr_hw_queues; i++)
|
751
|
+
pthread_join(dev->q[i].thread, &thread_ret);
|
752
|
+
fail:
|
753
|
+
ublk_dev_unprep(dev);
|
754
|
+
ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
|
755
|
+
|
756
|
+
return ret;
|
757
|
+
}
|
758
|
+
|
759
|
+
static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
|
760
|
+
{
|
761
|
+
#define EV_SIZE (sizeof(struct inotify_event))
|
762
|
+
#define EV_BUF_LEN (128 * (EV_SIZE + 16))
|
763
|
+
struct pollfd pfd;
|
764
|
+
int fd, wd;
|
765
|
+
int ret = -EINVAL;
|
766
|
+
|
767
|
+
fd = inotify_init();
|
768
|
+
if (fd < 0) {
|
769
|
+
ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
|
770
|
+
return fd;
|
771
|
+
}
|
772
|
+
|
773
|
+
wd = inotify_add_watch(fd, "/dev", evt_mask);
|
774
|
+
if (wd == -1) {
|
775
|
+
ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
|
776
|
+
goto fail;
|
777
|
+
}
|
778
|
+
|
779
|
+
pfd.fd = fd;
|
780
|
+
pfd.events = POLL_IN;
|
781
|
+
while (1) {
|
782
|
+
int i = 0;
|
783
|
+
char buffer[EV_BUF_LEN];
|
784
|
+
ret = poll(&pfd, 1, 1000 * timeout);
|
785
|
+
|
786
|
+
if (ret == -1) {
|
787
|
+
ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
|
788
|
+
goto rm_watch;
|
789
|
+
} else if (ret == 0) {
|
790
|
+
ublk_err("%s: poll inotify timeout\n", __func__);
|
791
|
+
ret = -ENOENT;
|
792
|
+
goto rm_watch;
|
793
|
+
}
|
794
|
+
|
795
|
+
ret = read(fd, buffer, EV_BUF_LEN);
|
796
|
+
if (ret < 0) {
|
797
|
+
ublk_err("%s: read inotify fd failed\n", __func__);
|
798
|
+
goto rm_watch;
|
799
|
+
}
|
800
|
+
|
801
|
+
while (i < ret) {
|
802
|
+
struct inotify_event *event = (struct inotify_event *)&buffer[i];
|
803
|
+
|
804
|
+
ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
|
805
|
+
__func__, event->mask, event->name);
|
806
|
+
if (event->mask & evt_mask) {
|
807
|
+
if (!strcmp(event->name, dev_name)) {
|
808
|
+
ret = 0;
|
809
|
+
goto rm_watch;
|
810
|
+
}
|
811
|
+
}
|
812
|
+
i += EV_SIZE + event->len;
|
813
|
+
}
|
814
|
+
}
|
815
|
+
rm_watch:
|
816
|
+
inotify_rm_watch(fd, wd);
|
817
|
+
fail:
|
818
|
+
close(fd);
|
819
|
+
return ret;
|
820
|
+
}
|
821
|
+
|
822
|
+
static int ublk_stop_io_daemon(const struct ublk_dev *dev)
|
823
|
+
{
|
824
|
+
int daemon_pid = dev->dev_info.ublksrv_pid;
|
825
|
+
int dev_id = dev->dev_info.dev_id;
|
826
|
+
char ublkc[64];
|
827
|
+
int ret;
|
828
|
+
|
829
|
+
/*
|
830
|
+
* Wait until ublk char device is closed, when our daemon is shutdown
|
831
|
+
*/
|
832
|
+
snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
|
833
|
+
ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
|
834
|
+
waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
|
835
|
+
ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
|
836
|
+
__func__, daemon_pid, dev_id, ret);
|
837
|
+
|
838
|
+
return ret;
|
839
|
+
}
|
840
|
+
|
841
|
+
static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
|
842
|
+
unsigned depth)
|
843
|
+
{
|
844
|
+
const struct ublk_tgt_ops *ops;
|
845
|
+
struct ublksrv_ctrl_dev_info *info;
|
846
|
+
struct ublk_dev *dev;
|
847
|
+
int dev_id = *exp_id;
|
848
|
+
char ublkb[64];
|
849
|
+
int ret;
|
850
|
+
|
851
|
+
ops = ublk_find_tgt(tgt_type);
|
852
|
+
if (!ops) {
|
853
|
+
ublk_err("%s: no such tgt type, type %s\n",
|
854
|
+
__func__, tgt_type);
|
855
|
+
return -ENODEV;
|
856
|
+
}
|
857
|
+
|
858
|
+
if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
|
859
|
+
ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
|
860
|
+
__func__, nr_queues, depth);
|
861
|
+
return -EINVAL;
|
862
|
+
}
|
863
|
+
|
864
|
+
dev = ublk_ctrl_init();
|
865
|
+
if (!dev) {
|
866
|
+
ublk_err("%s: can't alloc dev id %d, type %s\n",
|
867
|
+
__func__, dev_id, tgt_type);
|
868
|
+
return -ENOMEM;
|
869
|
+
}
|
870
|
+
|
871
|
+
info = &dev->dev_info;
|
872
|
+
info->dev_id = dev_id;
|
873
|
+
info->nr_hw_queues = nr_queues;
|
874
|
+
info->queue_depth = depth;
|
875
|
+
dev->tgt.ops = ops;
|
876
|
+
|
877
|
+
ret = ublk_ctrl_add_dev(dev);
|
878
|
+
if (ret < 0) {
|
879
|
+
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
|
880
|
+
__func__, dev_id, tgt_type, ret);
|
881
|
+
goto fail;
|
882
|
+
}
|
883
|
+
|
884
|
+
switch (fork()) {
|
885
|
+
case -1:
|
886
|
+
goto fail;
|
887
|
+
case 0:
|
888
|
+
ublk_start_daemon(dev);
|
889
|
+
return 0;
|
890
|
+
}
|
891
|
+
|
892
|
+
/*
|
893
|
+
* Wait until ublk disk is added, when our daemon is started
|
894
|
+
* successfully
|
895
|
+
*/
|
896
|
+
snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
|
897
|
+
ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
|
898
|
+
if (ret < 0) {
|
899
|
+
ublk_err("%s: can't start daemon id %d, type %s\n",
|
900
|
+
__func__, dev_id, tgt_type);
|
901
|
+
ublk_ctrl_del_dev(dev);
|
902
|
+
} else {
|
903
|
+
*exp_id = dev->dev_info.dev_id;
|
904
|
+
}
|
905
|
+
fail:
|
906
|
+
ublk_ctrl_deinit(dev);
|
907
|
+
return ret;
|
908
|
+
}
|
909
|
+
|
910
|
+
static int cmd_dev_del_by_kill(int number)
|
911
|
+
{
|
912
|
+
struct ublk_dev *dev;
|
913
|
+
int ret;
|
914
|
+
|
915
|
+
dev = ublk_ctrl_init();
|
916
|
+
dev->dev_info.dev_id = number;
|
917
|
+
|
918
|
+
ret = ublk_ctrl_get_info(dev);
|
919
|
+
if (ret < 0)
|
920
|
+
goto fail;
|
921
|
+
|
922
|
+
/* simulate one ublk daemon panic */
|
923
|
+
kill(dev->dev_info.ublksrv_pid, 9);
|
924
|
+
|
925
|
+
ret = ublk_stop_io_daemon(dev);
|
926
|
+
if (ret < 0)
|
927
|
+
ublk_err("%s: can't stop daemon id %d\n", __func__, number);
|
928
|
+
ublk_ctrl_del_dev(dev);
|
929
|
+
fail:
|
930
|
+
if (ret >= 0)
|
931
|
+
ret = ublk_ctrl_get_info(dev);
|
932
|
+
ublk_ctrl_deinit(dev);
|
933
|
+
|
934
|
+
return (ret != 0) ? 0 : -EIO;
|
935
|
+
}
|
936
|
+
|
937
|
+
/****************** part 2: target implementation ********************/
|
938
|
+
|
939
|
+
static int ublk_null_tgt_init(struct ublk_dev *dev)
|
940
|
+
{
|
941
|
+
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
|
942
|
+
unsigned long dev_size = 250UL << 30;
|
943
|
+
|
944
|
+
dev->tgt.dev_size = dev_size;
|
945
|
+
dev->tgt.params = (struct ublk_params) {
|
946
|
+
.types = UBLK_PARAM_TYPE_BASIC,
|
947
|
+
.basic = {
|
948
|
+
.logical_bs_shift = 9,
|
949
|
+
.physical_bs_shift = 12,
|
950
|
+
.io_opt_shift = 12,
|
951
|
+
.io_min_shift = 9,
|
952
|
+
.max_sectors = info->max_io_buf_bytes >> 9,
|
953
|
+
.dev_sectors = dev_size >> 9,
|
954
|
+
},
|
955
|
+
};
|
956
|
+
|
957
|
+
return 0;
|
958
|
+
}
|
959
|
+
|
960
|
+
static int ublk_null_queue_io(struct ublk_queue *q, int tag)
|
961
|
+
{
|
962
|
+
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
963
|
+
|
964
|
+
ublk_complete_io(q, tag, iod->nr_sectors << 9);
|
965
|
+
|
966
|
+
return 0;
|
967
|
+
}
|
968
|
+
|
969
|
+
static const struct ublk_tgt_ops tgt_ops_list[] = {
|
970
|
+
{
|
971
|
+
.name = "null",
|
972
|
+
.init_tgt = ublk_null_tgt_init,
|
973
|
+
.queue_io = ublk_null_queue_io,
|
974
|
+
},
|
975
|
+
};
|
976
|
+
|
977
|
+
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
|
978
|
+
{
|
979
|
+
const struct ublk_tgt_ops *ops;
|
980
|
+
int i;
|
981
|
+
|
982
|
+
if (name == NULL)
|
983
|
+
return NULL;
|
984
|
+
|
985
|
+
for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
|
986
|
+
if (strcmp(tgt_ops_list[i].name, name) == 0)
|
987
|
+
return &tgt_ops_list[i];
|
988
|
+
return NULL;
|
989
|
+
}
|
990
|
+
|
991
|
+
|
992
|
+
/****************** part 3: IO test over ublk disk ********************/
|
993
|
+
|
994
|
+
#include "helpers.h"
|
995
|
+
#include "liburing.h"
|
996
|
+
#define BS 4096
|
997
|
+
#define BUFFERS 128
|
998
|
+
|
999
|
+
struct io_ctx {
|
1000
|
+
int dev_id;
|
1001
|
+
int write;
|
1002
|
+
int seq;
|
1003
|
+
|
1004
|
+
/* output */
|
1005
|
+
int res;
|
1006
|
+
pthread_t handle;
|
1007
|
+
};
|
1008
|
+
|
1009
|
+
static int __test_io(struct io_uring *ring, int fd, int write,
|
1010
|
+
int seq, struct iovec *vecs, int exp_len, off_t start)
|
1011
|
+
{
|
1012
|
+
struct io_uring_sqe *sqe;
|
1013
|
+
struct io_uring_cqe *cqe;
|
1014
|
+
int i, ret;
|
1015
|
+
off_t offset;
|
1016
|
+
|
1017
|
+
offset = start;
|
1018
|
+
for (i = 0; i < BUFFERS; i++) {
|
1019
|
+
sqe = io_uring_get_sqe(ring);
|
1020
|
+
if (!sqe) {
|
1021
|
+
fprintf(stderr, "sqe get failed\n");
|
1022
|
+
goto err;
|
1023
|
+
}
|
1024
|
+
if (!seq)
|
1025
|
+
offset = start + BS * (rand() % BUFFERS);
|
1026
|
+
if (write) {
|
1027
|
+
io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
|
1028
|
+
vecs[i].iov_len,
|
1029
|
+
offset, i);
|
1030
|
+
} else {
|
1031
|
+
io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
|
1032
|
+
vecs[i].iov_len,
|
1033
|
+
offset, i);
|
1034
|
+
}
|
1035
|
+
sqe->user_data = i;
|
1036
|
+
if (seq)
|
1037
|
+
offset += BS;
|
1038
|
+
}
|
1039
|
+
|
1040
|
+
ret = io_uring_submit(ring);
|
1041
|
+
if (ret != BUFFERS) {
|
1042
|
+
fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
|
1043
|
+
goto err;
|
1044
|
+
}
|
1045
|
+
|
1046
|
+
for (i = 0; i < BUFFERS; i++) {
|
1047
|
+
ret = io_uring_wait_cqe(ring, &cqe);
|
1048
|
+
if (ret) {
|
1049
|
+
fprintf(stderr, "wait_cqe=%d\n", ret);
|
1050
|
+
goto err;
|
1051
|
+
}
|
1052
|
+
if (exp_len == -1) {
|
1053
|
+
int iov_len = vecs[cqe->user_data].iov_len;
|
1054
|
+
|
1055
|
+
if (cqe->res != iov_len) {
|
1056
|
+
fprintf(stderr, "cqe res %d, wanted %d\n",
|
1057
|
+
cqe->res, iov_len);
|
1058
|
+
goto err;
|
1059
|
+
}
|
1060
|
+
} else if (cqe->res != exp_len) {
|
1061
|
+
fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
|
1062
|
+
goto err;
|
1063
|
+
}
|
1064
|
+
io_uring_cqe_seen(ring, cqe);
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
return 0;
|
1068
|
+
err:
|
1069
|
+
return 1;
|
1070
|
+
}
|
1071
|
+
|
1072
|
+
/* Run IO over ublk block device */
|
1073
|
+
static int test_io(struct io_ctx *ctx)
|
1074
|
+
{
|
1075
|
+
struct io_uring ring;
|
1076
|
+
int ret, ring_flags = 0;
|
1077
|
+
char buf[256];
|
1078
|
+
int fd = -1;
|
1079
|
+
off_t offset = 0;
|
1080
|
+
unsigned long long bytes;
|
1081
|
+
int open_flags = O_DIRECT;
|
1082
|
+
struct iovec *vecs = t_create_buffers(BUFFERS, BS);
|
1083
|
+
|
1084
|
+
ret = t_create_ring(BUFFERS, &ring, ring_flags);
|
1085
|
+
if (ret == T_SETUP_SKIP)
|
1086
|
+
return 0;
|
1087
|
+
if (ret != T_SETUP_OK) {
|
1088
|
+
fprintf(stderr, "ring create failed: %d\n", ret);
|
1089
|
+
return 1;
|
1090
|
+
}
|
1091
|
+
|
1092
|
+
snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
|
1093
|
+
|
1094
|
+
if (ctx->write)
|
1095
|
+
open_flags |= O_WRONLY;
|
1096
|
+
else
|
1097
|
+
open_flags |= O_RDONLY;
|
1098
|
+
fd = open(buf, open_flags);
|
1099
|
+
if (fd < 0) {
|
1100
|
+
if (errno == EINVAL)
|
1101
|
+
return 0;
|
1102
|
+
return 1;
|
1103
|
+
}
|
1104
|
+
|
1105
|
+
if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
|
1106
|
+
return 1;
|
1107
|
+
|
1108
|
+
ret = t_register_buffers(&ring, vecs, BUFFERS);
|
1109
|
+
if (ret == T_SETUP_SKIP)
|
1110
|
+
return 0;
|
1111
|
+
if (ret != T_SETUP_OK) {
|
1112
|
+
fprintf(stderr, "buffer reg failed: %d\n", ret);
|
1113
|
+
return 1;
|
1114
|
+
}
|
1115
|
+
|
1116
|
+
for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
|
1117
|
+
ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
|
1118
|
+
offset);
|
1119
|
+
if (ret != T_SETUP_OK) {
|
1120
|
+
fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
|
1121
|
+
ctx->dev_id, (unsigned long) offset, ret);
|
1122
|
+
break;
|
1123
|
+
}
|
1124
|
+
}
|
1125
|
+
|
1126
|
+
close(fd);
|
1127
|
+
io_uring_unregister_buffers(&ring);
|
1128
|
+
io_uring_queue_exit(&ring);
|
1129
|
+
|
1130
|
+
return ret;
|
1131
|
+
}
|
1132
|
+
|
1133
|
+
static void *test_io_fn(void *data)
|
1134
|
+
{
|
1135
|
+
struct io_ctx *ctx = data;
|
1136
|
+
|
1137
|
+
ctx->res = test_io(ctx);
|
1138
|
+
|
1139
|
+
return data;
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
static void ignore_stderr(void)
|
1143
|
+
{
|
1144
|
+
int devnull = open("/dev/null", O_WRONLY);
|
1145
|
+
|
1146
|
+
if (devnull >= 0) {
|
1147
|
+
dup2(devnull, fileno(stderr));
|
1148
|
+
close(devnull);
|
1149
|
+
}
|
1150
|
+
}
|
1151
|
+
|
1152
|
+
static int test_io_worker(int dev_id)
|
1153
|
+
{
|
1154
|
+
const int nr_jobs = 4;
|
1155
|
+
struct io_ctx ctx[nr_jobs];
|
1156
|
+
int i, ret = 0;
|
1157
|
+
|
1158
|
+
for (i = 0; i < nr_jobs; i++) {
|
1159
|
+
ctx[i].dev_id = dev_id;
|
1160
|
+
ctx[i].write = (i & 0x1) ? 0 : 1;
|
1161
|
+
ctx[i].seq = 1;
|
1162
|
+
|
1163
|
+
pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
|
1164
|
+
}
|
1165
|
+
|
1166
|
+
for (i = 0; i < nr_jobs; i++) {
|
1167
|
+
pthread_join(ctx[i].handle, NULL);
|
1168
|
+
|
1169
|
+
if (!ret && ctx[i].res)
|
1170
|
+
ret = ctx[i].res;
|
1171
|
+
}
|
1172
|
+
|
1173
|
+
return ret;
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
/*
|
1177
|
+
* Run IO over created ublk device, meantime delete this ublk device
|
1178
|
+
*
|
1179
|
+
* Cover cancellable uring_cmd
|
1180
|
+
* */
|
1181
|
+
static int test_del_ublk_with_io(void)
|
1182
|
+
{
|
1183
|
+
const unsigned wait_ms = 200;
|
1184
|
+
char *tgt_type = "null";
|
1185
|
+
int dev_id = -1;
|
1186
|
+
int ret, pid;
|
1187
|
+
|
1188
|
+
ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
|
1189
|
+
if (ret != T_SETUP_OK) {
|
1190
|
+
fprintf(stderr, "buffer reg failed: %d\n", ret);
|
1191
|
+
return T_EXIT_FAIL;
|
1192
|
+
}
|
1193
|
+
|
1194
|
+
switch ((pid = fork())) {
|
1195
|
+
case -1:
|
1196
|
+
fprintf(stderr, "fork failed\n");
|
1197
|
+
return T_EXIT_FAIL;
|
1198
|
+
case 0:
|
1199
|
+
/* io error is expected since the parent is killing ublk */
|
1200
|
+
ignore_stderr();
|
1201
|
+
test_io_worker(dev_id);
|
1202
|
+
return 0;
|
1203
|
+
default:
|
1204
|
+
/*
|
1205
|
+
* Wait a little while until ublk IO pipeline is warm up,
|
1206
|
+
* then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
|
1207
|
+
*
|
1208
|
+
* cancellable uring_cmd code path can be covered in this way.
|
1209
|
+
*/
|
1210
|
+
usleep(wait_ms * 1000);
|
1211
|
+
ret = cmd_dev_del_by_kill(dev_id);
|
1212
|
+
waitpid(pid, NULL, 0);
|
1213
|
+
return ret;
|
1214
|
+
}
|
1215
|
+
}
|
1216
|
+
|
1217
|
+
int main(int argc, char *argv[])
|
1218
|
+
{
|
1219
|
+
const int nr_loop = 4;
|
1220
|
+
struct ublk_dev *dev;
|
1221
|
+
__u64 features;
|
1222
|
+
int ret, i;
|
1223
|
+
|
1224
|
+
if (argc > 1)
|
1225
|
+
return T_EXIT_SKIP;
|
1226
|
+
|
1227
|
+
dev = ublk_ctrl_init();
|
1228
|
+
/* ublk isn't supported or the module isn't loaded */
|
1229
|
+
if (!dev)
|
1230
|
+
return T_EXIT_SKIP;
|
1231
|
+
|
1232
|
+
/* kernel doesn't support get_features */
|
1233
|
+
ret = ublk_ctrl_get_features(dev, &features);
|
1234
|
+
if (ret < 0)
|
1235
|
+
return T_EXIT_SKIP;
|
1236
|
+
|
1237
|
+
if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
|
1238
|
+
return T_EXIT_SKIP;
|
1239
|
+
|
1240
|
+
for (i = 0; i < nr_loop; i++) {
|
1241
|
+
if (test_del_ublk_with_io())
|
1242
|
+
return T_EXIT_FAIL;
|
1243
|
+
}
|
1244
|
+
ublk_ctrl_deinit(dev);
|
1245
|
+
return T_EXIT_PASS;
|
1246
|
+
}
|
1247
|
+
#else
|
1248
|
+
int main(int argc, char *argv[])
|
1249
|
+
{
|
1250
|
+
return T_EXIT_SKIP;
|
1251
|
+
}
|
1252
|
+
#endif
|