uringmachine 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/README.md +85 -0
  4. data/TODO.md +5 -0
  5. data/examples/echo_server.rb +18 -40
  6. data/examples/inout.rb +19 -0
  7. data/examples/nc.rb +36 -0
  8. data/ext/um/extconf.rb +6 -15
  9. data/ext/um/um.c +245 -53
  10. data/ext/um/um.h +21 -9
  11. data/ext/um/um_class.c +74 -87
  12. data/ext/um/um_const.c +184 -0
  13. data/ext/um/um_op.c +10 -13
  14. data/ext/um/um_utils.c +48 -3
  15. data/lib/uringmachine/version.rb +1 -1
  16. data/lib/uringmachine.rb +12 -0
  17. data/test/helper.rb +8 -0
  18. data/test/test_um.rb +227 -7
  19. data/vendor/liburing/.github/workflows/build.yml +29 -1
  20. data/vendor/liburing/.gitignore +1 -0
  21. data/vendor/liburing/CHANGELOG +15 -0
  22. data/vendor/liburing/CONTRIBUTING.md +165 -0
  23. data/vendor/liburing/configure +32 -0
  24. data/vendor/liburing/examples/Makefile +8 -1
  25. data/vendor/liburing/examples/kdigest.c +405 -0
  26. data/vendor/liburing/examples/proxy.c +75 -8
  27. data/vendor/liburing/liburing.pc.in +1 -1
  28. data/vendor/liburing/src/Makefile +16 -2
  29. data/vendor/liburing/src/include/liburing/io_uring.h +31 -0
  30. data/vendor/liburing/src/include/liburing/sanitize.h +39 -0
  31. data/vendor/liburing/src/include/liburing.h +31 -4
  32. data/vendor/liburing/src/liburing-ffi.map +5 -0
  33. data/vendor/liburing/src/liburing.map +1 -0
  34. data/vendor/liburing/src/queue.c +3 -0
  35. data/vendor/liburing/src/register.c +36 -0
  36. data/vendor/liburing/src/sanitize.c +176 -0
  37. data/vendor/liburing/src/setup.c +1 -1
  38. data/vendor/liburing/test/35fa71a030ca.c +7 -0
  39. data/vendor/liburing/test/500f9fbadef8.c +2 -0
  40. data/vendor/liburing/test/7ad0e4b2f83c.c +0 -25
  41. data/vendor/liburing/test/917257daa0fe.c +7 -0
  42. data/vendor/liburing/test/Makefile +31 -4
  43. data/vendor/liburing/test/a0908ae19763.c +7 -0
  44. data/vendor/liburing/test/a4c0b3decb33.c +7 -0
  45. data/vendor/liburing/test/accept.c +14 -4
  46. data/vendor/liburing/test/b19062a56726.c +7 -0
  47. data/vendor/liburing/test/bind-listen.c +2 -2
  48. data/vendor/liburing/test/buf-ring-nommap.c +10 -3
  49. data/vendor/liburing/test/buf-ring.c +2 -0
  50. data/vendor/liburing/test/coredump.c +7 -0
  51. data/vendor/liburing/test/cq-overflow.c +13 -1
  52. data/vendor/liburing/test/d4ae271dfaae.c +11 -3
  53. data/vendor/liburing/test/defer-taskrun.c +2 -2
  54. data/vendor/liburing/test/defer-tw-timeout.c +4 -1
  55. data/vendor/liburing/test/defer.c +2 -2
  56. data/vendor/liburing/test/double-poll-crash.c +1 -1
  57. data/vendor/liburing/test/eeed8b54e0df.c +2 -0
  58. data/vendor/liburing/test/eventfd.c +0 -1
  59. data/vendor/liburing/test/exit-no-cleanup.c +11 -0
  60. data/vendor/liburing/test/fadvise.c +9 -26
  61. data/vendor/liburing/test/fdinfo.c +9 -1
  62. data/vendor/liburing/test/file-register.c +14 -2
  63. data/vendor/liburing/test/file-update.c +1 -1
  64. data/vendor/liburing/test/file-verify.c +27 -16
  65. data/vendor/liburing/test/files-exit-hang-timeout.c +1 -2
  66. data/vendor/liburing/test/fixed-buf-iter.c +3 -1
  67. data/vendor/liburing/test/fixed-hugepage.c +12 -1
  68. data/vendor/liburing/test/fsnotify.c +1 -0
  69. data/vendor/liburing/test/futex.c +16 -4
  70. data/vendor/liburing/test/helpers.c +47 -0
  71. data/vendor/liburing/test/helpers.h +6 -0
  72. data/vendor/liburing/test/init-mem.c +5 -3
  73. data/vendor/liburing/test/io-cancel.c +0 -24
  74. data/vendor/liburing/test/io_uring_passthrough.c +2 -0
  75. data/vendor/liburing/test/io_uring_register.c +25 -6
  76. data/vendor/liburing/test/iopoll-leak.c +4 -0
  77. data/vendor/liburing/test/iopoll-overflow.c +1 -1
  78. data/vendor/liburing/test/iopoll.c +3 -3
  79. data/vendor/liburing/test/kallsyms.c +203 -0
  80. data/vendor/liburing/test/link-timeout.c +159 -0
  81. data/vendor/liburing/test/linked-defer-close.c +224 -0
  82. data/vendor/liburing/test/madvise.c +12 -25
  83. data/vendor/liburing/test/min-timeout-wait.c +0 -25
  84. data/vendor/liburing/test/min-timeout.c +0 -25
  85. data/vendor/liburing/test/mkdir.c +6 -0
  86. data/vendor/liburing/test/msg-ring.c +8 -2
  87. data/vendor/liburing/test/napi-test.c +15 -2
  88. data/vendor/liburing/test/no-mmap-inval.c +2 -0
  89. data/vendor/liburing/test/nop.c +44 -0
  90. data/vendor/liburing/test/ooo-file-unreg.c +1 -1
  91. data/vendor/liburing/test/open-close.c +40 -0
  92. data/vendor/liburing/test/openat2.c +37 -14
  93. data/vendor/liburing/test/poll-many.c +13 -7
  94. data/vendor/liburing/test/poll-mshot-update.c +17 -10
  95. data/vendor/liburing/test/poll-v-poll.c +6 -3
  96. data/vendor/liburing/test/pollfree.c +148 -0
  97. data/vendor/liburing/test/read-mshot-empty.c +156 -153
  98. data/vendor/liburing/test/read-mshot.c +276 -27
  99. data/vendor/liburing/test/read-write.c +78 -13
  100. data/vendor/liburing/test/recv-msgall-stream.c +3 -0
  101. data/vendor/liburing/test/recv-msgall.c +5 -0
  102. data/vendor/liburing/test/recvsend_bundle-inc.c +680 -0
  103. data/vendor/liburing/test/recvsend_bundle.c +92 -29
  104. data/vendor/liburing/test/reg-fd-only.c +14 -4
  105. data/vendor/liburing/test/regbuf-clone.c +187 -0
  106. data/vendor/liburing/test/regbuf-merge.c +7 -0
  107. data/vendor/liburing/test/register-restrictions.c +86 -85
  108. data/vendor/liburing/test/rename.c +59 -1
  109. data/vendor/liburing/test/ringbuf-read.c +5 -0
  110. data/vendor/liburing/test/ringbuf-status.c +5 -1
  111. data/vendor/liburing/test/runtests.sh +16 -1
  112. data/vendor/liburing/test/send-zerocopy.c +59 -0
  113. data/vendor/liburing/test/short-read.c +1 -0
  114. data/vendor/liburing/test/socket.c +43 -0
  115. data/vendor/liburing/test/splice.c +3 -1
  116. data/vendor/liburing/test/sq-poll-dup.c +1 -1
  117. data/vendor/liburing/test/sq-poll-share.c +2 -0
  118. data/vendor/liburing/test/sqpoll-disable-exit.c +8 -0
  119. data/vendor/liburing/test/sqpoll-exit-hang.c +1 -25
  120. data/vendor/liburing/test/sqpoll-sleep.c +1 -25
  121. data/vendor/liburing/test/statx.c +89 -0
  122. data/vendor/liburing/test/stdout.c +2 -0
  123. data/vendor/liburing/test/submit-and-wait.c +1 -25
  124. data/vendor/liburing/test/submit-reuse.c +4 -26
  125. data/vendor/liburing/test/symlink.c +12 -1
  126. data/vendor/liburing/test/sync-cancel.c +48 -21
  127. data/vendor/liburing/test/thread-exit.c +5 -0
  128. data/vendor/liburing/test/timeout-new.c +1 -26
  129. data/vendor/liburing/test/timeout.c +12 -26
  130. data/vendor/liburing/test/unlink.c +94 -1
  131. data/vendor/liburing/test/uring_cmd_ublk.c +1252 -0
  132. data/vendor/liburing/test/waitid.c +62 -8
  133. data/vendor/liburing/test/wq-aff.c +35 -0
  134. data/vendor/liburing/test/xfail_prep_link_timeout_out_of_scope.c +46 -0
  135. data/vendor/liburing/test/xfail_register_buffers_out_of_scope.c +51 -0
  136. metadata +17 -4
  137. data/examples/event_loop.rb +0 -69
  138. data/examples/fibers.rb +0 -105
@@ -0,0 +1,1252 @@
1
+ /* SPDX-License-Identifier: MIT */
2
+ /*
3
+ * Description: uring_cmd based ublk
4
+ *
5
+ * Covers cancellable uring_cmd feature.
6
+ */
7
+ #include <unistd.h>
8
+ #include <stdlib.h>
9
+ #include <assert.h>
10
+ #include <stdio.h>
11
+ #include <stdarg.h>
12
+ #include <string.h>
13
+ #include <pthread.h>
14
+ #include <limits.h>
15
+ #include <poll.h>
16
+ #include <sys/syscall.h>
17
+ #include <sys/mman.h>
18
+ #include <sys/ioctl.h>
19
+ #include <sys/inotify.h>
20
+ #include <sys/wait.h>
21
+
22
+ #include "liburing.h"
23
+ #include "helpers.h"
24
+ #ifdef CONFIG_HAVE_UBLK_HEADER
25
+ #include <linux/ublk_cmd.h>
26
+
27
+ /****************** part 1: libublk ********************/
28
+
29
+ #define CTRL_DEV "/dev/ublk-control"
30
+ #define UBLKC_DEV "/dev/ublkc"
31
+ #define UBLKB_DEV "/dev/ublkb"
32
+ #define UBLK_CTRL_RING_DEPTH 32
33
+
34
+ /* queue idle timeout */
35
+ #define UBLKSRV_IO_IDLE_SECS 20
36
+
37
+ #define UBLK_IO_MAX_BYTES 65536
38
+ #define UBLK_MAX_QUEUES 4
39
+ #define UBLK_QUEUE_DEPTH 128
40
+
41
+ #define UBLK_DBG_DEV (1U << 0)
42
+ #define UBLK_DBG_QUEUE (1U << 1)
43
+ #define UBLK_DBG_IO_CMD (1U << 2)
44
+ #define UBLK_DBG_IO (1U << 3)
45
+ #define UBLK_DBG_CTRL_CMD (1U << 4)
46
+ #define UBLK_LOG (1U << 5)
47
+
48
+ struct ublk_dev;
49
+ struct ublk_queue;
50
+
51
+ struct ublk_ctrl_cmd_data {
52
+ __u32 cmd_op;
53
+ #define CTRL_CMD_HAS_DATA 1
54
+ #define CTRL_CMD_HAS_BUF 2
55
+ __u32 flags;
56
+
57
+ __u64 data[2];
58
+ __u64 addr;
59
+ __u32 len;
60
+ };
61
+
62
+ struct ublk_io {
63
+ char *buf_addr;
64
+
65
+ #define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
66
+ #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
67
+ #define UBLKSRV_IO_FREE (1UL << 2)
68
+ unsigned int flags;
69
+
70
+ unsigned int result;
71
+ };
72
+
73
+ struct ublk_tgt_ops {
74
+ const char *name;
75
+ int (*init_tgt)(struct ublk_dev *);
76
+ void (*deinit_tgt)(struct ublk_dev *);
77
+
78
+ int (*queue_io)(struct ublk_queue *, int tag);
79
+ void (*tgt_io_done)(struct ublk_queue *,
80
+ int tag, const struct io_uring_cqe *);
81
+ };
82
+
83
+ struct ublk_tgt {
84
+ unsigned long dev_size;
85
+ const struct ublk_tgt_ops *ops;
86
+ struct ublk_params params;
87
+ };
88
+
89
+ struct ublk_queue {
90
+ int q_id;
91
+ int q_depth;
92
+ unsigned int cmd_inflight;
93
+ unsigned int io_inflight;
94
+ struct ublk_dev *dev;
95
+ const struct ublk_tgt_ops *tgt_ops;
96
+ char *io_cmd_buf;
97
+ struct io_uring ring;
98
+ struct ublk_io ios[UBLK_QUEUE_DEPTH];
99
+ #define UBLKSRV_QUEUE_STOPPING (1U << 0)
100
+ #define UBLKSRV_QUEUE_IDLE (1U << 1)
101
+ unsigned state;
102
+ pid_t tid;
103
+ pthread_t thread;
104
+ };
105
+
106
+ struct ublk_dev {
107
+ struct ublk_tgt tgt;
108
+ struct ublksrv_ctrl_dev_info dev_info;
109
+ struct ublk_queue q[UBLK_MAX_QUEUES];
110
+
111
+ int fds[2]; /* fds[0] points to /dev/ublkcN */
112
+ int nr_fds;
113
+ int ctrl_fd;
114
+ struct io_uring ring;
115
+ };
116
+
117
+ #ifndef offsetof
118
+ #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
119
+ #endif
120
+
121
+ #ifndef container_of
122
+ #define container_of(ptr, type, member) ({ \
123
+ unsigned long __mptr = (unsigned long)(ptr); \
124
+ ((type *)(__mptr - offsetof(type, member))); })
125
+ #endif
126
+
127
+ #define round_up(val, rnd) \
128
+ (((val) + ((rnd) - 1)) & ~((rnd) - 1))
129
+
130
+ static unsigned int ublk_dbg_mask = 0;
131
+
132
+ static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
133
+
134
+ static inline int is_target_io(__u64 user_data)
135
+ {
136
+ return (user_data & (1ULL << 63)) != 0;
137
+ }
138
+
139
+ static inline __u64 build_user_data(unsigned tag, unsigned op,
140
+ unsigned tgt_data, unsigned is_target_io)
141
+ {
142
+ assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
143
+
144
+ return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
145
+ }
146
+
147
+ static inline unsigned int user_data_to_tag(__u64 user_data)
148
+ {
149
+ return user_data & 0xffff;
150
+ }
151
+
152
+ static inline unsigned int user_data_to_op(__u64 user_data)
153
+ {
154
+ return (user_data >> 16) & 0xff;
155
+ }
156
+
157
+ static void ublk_err(const char *fmt, ...)
158
+ {
159
+ va_list ap;
160
+
161
+ va_start(ap, fmt);
162
+ vfprintf(stderr, fmt, ap);
163
+ }
164
+
165
+ static void ublk_dbg(int level, const char *fmt, ...)
166
+ {
167
+ if (level & ublk_dbg_mask) {
168
+ va_list ap;
169
+ va_start(ap, fmt);
170
+ vfprintf(stdout, fmt, ap);
171
+ }
172
+ }
173
+
174
+ static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
175
+ {
176
+ return (void *)&sqe->cmd;
177
+ }
178
+
179
+ static inline void ublk_mark_io_done(struct ublk_io *io, int res)
180
+ {
181
+ io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
182
+ io->result = res;
183
+ }
184
+
185
+ static inline const struct ublksrv_io_desc *ublk_get_iod(
186
+ const struct ublk_queue *q, int tag)
187
+ {
188
+ return (struct ublksrv_io_desc *)
189
+ &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
190
+ }
191
+
192
+ static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
193
+ __u32 cmd_op)
194
+ {
195
+ __u32 *addr = (__u32 *)&sqe->off;
196
+
197
+ addr[0] = cmd_op;
198
+ addr[1] = 0;
199
+ }
200
+
201
+ static inline int ublk_setup_ring(struct io_uring *r, int depth,
202
+ int cq_depth, unsigned flags)
203
+ {
204
+ struct io_uring_params p;
205
+
206
+ memset(&p, 0, sizeof(p));
207
+ p.flags = flags | IORING_SETUP_CQSIZE;
208
+ p.cq_entries = cq_depth;
209
+
210
+ return io_uring_queue_init_params(depth, r, &p);
211
+ }
212
+
213
+ static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
214
+ struct io_uring_sqe *sqe,
215
+ struct ublk_ctrl_cmd_data *data)
216
+ {
217
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
218
+ struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
219
+
220
+ sqe->fd = dev->ctrl_fd;
221
+ sqe->opcode = IORING_OP_URING_CMD;
222
+ sqe->ioprio = 0;
223
+
224
+ if (data->flags & CTRL_CMD_HAS_BUF) {
225
+ cmd->addr = data->addr;
226
+ cmd->len = data->len;
227
+ }
228
+
229
+ if (data->flags & CTRL_CMD_HAS_DATA)
230
+ cmd->data[0] = data->data[0];
231
+
232
+ cmd->dev_id = info->dev_id;
233
+ cmd->queue_id = -1;
234
+
235
+ ublk_set_sqe_cmd_op(sqe, data->cmd_op);
236
+
237
+ io_uring_sqe_set_data(sqe, cmd);
238
+ }
239
+
240
+ static int __ublk_ctrl_cmd(struct ublk_dev *dev,
241
+ struct ublk_ctrl_cmd_data *data)
242
+ {
243
+ struct io_uring_sqe *sqe;
244
+ struct io_uring_cqe *cqe;
245
+ int ret = -EINVAL;
246
+
247
+ sqe = io_uring_get_sqe(&dev->ring);
248
+ if (!sqe) {
249
+ ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
250
+ return ret;
251
+ }
252
+
253
+ ublk_ctrl_init_cmd(dev, sqe, data);
254
+
255
+ ret = io_uring_submit(&dev->ring);
256
+ if (ret < 0) {
257
+ ublk_err("uring submit ret %d\n", ret);
258
+ return ret;
259
+ }
260
+
261
+ ret = io_uring_wait_cqe(&dev->ring, &cqe);
262
+ if (ret < 0) {
263
+ ublk_err("wait cqe: %s\n", strerror(-ret));
264
+ return ret;
265
+ }
266
+ io_uring_cqe_seen(&dev->ring, cqe);
267
+
268
+ return cqe->res;
269
+ }
270
+
271
+ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
272
+ int daemon_pid)
273
+ {
274
+ struct ublk_ctrl_cmd_data data = {
275
+ .cmd_op = UBLK_U_CMD_START_DEV,
276
+ .flags = CTRL_CMD_HAS_DATA,
277
+ };
278
+
279
+ dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
280
+
281
+ return __ublk_ctrl_cmd(dev, &data);
282
+ }
283
+
284
+ static int ublk_ctrl_add_dev(struct ublk_dev *dev)
285
+ {
286
+ struct ublk_ctrl_cmd_data data = {
287
+ .cmd_op = UBLK_U_CMD_ADD_DEV,
288
+ .flags = CTRL_CMD_HAS_BUF,
289
+ .addr = (__u64) (uintptr_t) &dev->dev_info,
290
+ .len = sizeof(struct ublksrv_ctrl_dev_info),
291
+ };
292
+
293
+ return __ublk_ctrl_cmd(dev, &data);
294
+ }
295
+
296
+ static int ublk_ctrl_del_dev(struct ublk_dev *dev)
297
+ {
298
+ struct ublk_ctrl_cmd_data data = {
299
+ .cmd_op = UBLK_U_CMD_DEL_DEV,
300
+ .flags = 0,
301
+ };
302
+
303
+ return __ublk_ctrl_cmd(dev, &data);
304
+ }
305
+
306
+ static int ublk_ctrl_get_info(struct ublk_dev *dev)
307
+ {
308
+ struct ublk_ctrl_cmd_data data = {
309
+ .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
310
+ .flags = CTRL_CMD_HAS_BUF,
311
+ .addr = (__u64) (uintptr_t) &dev->dev_info,
312
+ .len = sizeof(struct ublksrv_ctrl_dev_info),
313
+ };
314
+
315
+ return __ublk_ctrl_cmd(dev, &data);
316
+ }
317
+
318
+ static int ublk_ctrl_set_params(struct ublk_dev *dev,
319
+ struct ublk_params *params)
320
+ {
321
+ struct ublk_ctrl_cmd_data data = {
322
+ .cmd_op = UBLK_U_CMD_SET_PARAMS,
323
+ .flags = CTRL_CMD_HAS_BUF,
324
+ .addr = (__u64) (uintptr_t) params,
325
+ .len = sizeof(*params),
326
+ };
327
+ params->len = sizeof(*params);
328
+ return __ublk_ctrl_cmd(dev, &data);
329
+ }
330
+
331
+ static int ublk_ctrl_get_features(struct ublk_dev *dev,
332
+ __u64 *features)
333
+ {
334
+ struct ublk_ctrl_cmd_data data = {
335
+ .cmd_op = UBLK_U_CMD_GET_FEATURES,
336
+ .flags = CTRL_CMD_HAS_BUF,
337
+ .addr = (__u64) (uintptr_t) features,
338
+ .len = sizeof(*features),
339
+ };
340
+
341
+ return __ublk_ctrl_cmd(dev, &data);
342
+ }
343
+
344
+ static void ublk_ctrl_deinit(struct ublk_dev *dev)
345
+ {
346
+ close(dev->ctrl_fd);
347
+ free(dev);
348
+ }
349
+
350
+ static struct ublk_dev *ublk_ctrl_init(void)
351
+ {
352
+ struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
353
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
354
+ int ret;
355
+
356
+ dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
357
+ if (dev->ctrl_fd < 0) {
358
+ free(dev);
359
+ return NULL;
360
+ }
361
+
362
+ info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
363
+
364
+ ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
365
+ UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
366
+ if (ret < 0) {
367
+ ublk_err("queue_init: %s\n", strerror(-ret));
368
+ free(dev);
369
+ return NULL;
370
+ }
371
+ dev->nr_fds = 1;
372
+
373
+ return dev;
374
+ }
375
+
376
+ static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
377
+ {
378
+ int size = q->q_depth * sizeof(struct ublksrv_io_desc);
379
+ unsigned int page_sz = getpagesize();
380
+
381
+ return round_up(size, page_sz);
382
+ }
383
+
384
+ static void ublk_queue_deinit(struct ublk_queue *q)
385
+ {
386
+ int i;
387
+ int nr_ios = q->q_depth;
388
+
389
+ io_uring_unregister_ring_fd(&q->ring);
390
+
391
+ if (q->ring.ring_fd > 0) {
392
+ io_uring_unregister_files(&q->ring);
393
+ close(q->ring.ring_fd);
394
+ q->ring.ring_fd = -1;
395
+ }
396
+
397
+ if (q->io_cmd_buf)
398
+ munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
399
+
400
+ for (i = 0; i < nr_ios; i++)
401
+ free(q->ios[i].buf_addr);
402
+ }
403
+
404
+ static int ublk_queue_init(struct ublk_queue *q)
405
+ {
406
+ struct ublk_dev *dev = q->dev;
407
+ int depth = dev->dev_info.queue_depth;
408
+ int i, ret = -1;
409
+ int cmd_buf_size, io_buf_size;
410
+ unsigned long off;
411
+ int ring_depth = depth, cq_depth = depth;
412
+
413
+ q->tgt_ops = dev->tgt.ops;
414
+ q->state = 0;
415
+ q->q_depth = depth;
416
+ q->cmd_inflight = 0;
417
+ q->tid = gettid();
418
+
419
+ cmd_buf_size = ublk_queue_cmd_buf_sz(q);
420
+ off = UBLKSRV_CMD_BUF_OFFSET +
421
+ q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
422
+ q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
423
+ MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
424
+ if (q->io_cmd_buf == MAP_FAILED) {
425
+ ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
426
+ q->dev->dev_info.dev_id, q->q_id);
427
+ goto fail;
428
+ }
429
+
430
+ io_buf_size = dev->dev_info.max_io_buf_bytes;
431
+ for (i = 0; i < q->q_depth; i++) {
432
+ q->ios[i].buf_addr = NULL;
433
+
434
+ if (posix_memalign((void **)&q->ios[i].buf_addr,
435
+ getpagesize(), io_buf_size)) {
436
+ ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
437
+ dev->dev_info.dev_id, q->q_id, i);
438
+ goto fail;
439
+ }
440
+ q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
441
+ }
442
+
443
+ ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
444
+ IORING_SETUP_COOP_TASKRUN);
445
+ if (ret < 0) {
446
+ ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
447
+ q->dev->dev_info.dev_id, q->q_id, ret);
448
+ goto fail;
449
+ }
450
+
451
+ io_uring_register_ring_fd(&q->ring);
452
+
453
+ ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
454
+ if (ret) {
455
+ ublk_err("ublk dev %d queue %d register files failed %d\n",
456
+ q->dev->dev_info.dev_id, q->q_id, ret);
457
+ goto fail;
458
+ }
459
+
460
+ return 0;
461
+ fail:
462
+ ublk_queue_deinit(q);
463
+ ublk_err("ublk dev %d queue %d failed\n",
464
+ dev->dev_info.dev_id, q->q_id);
465
+ return -ENOMEM;
466
+ }
467
+
468
+ static int ublk_dev_prep(struct ublk_dev *dev)
469
+ {
470
+ int dev_id = dev->dev_info.dev_id;
471
+ char buf[64];
472
+ int ret = 0;
473
+
474
+ snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
475
+ dev->fds[0] = open(buf, O_RDWR);
476
+ if (dev->fds[0] < 0) {
477
+ ret = -EBADF;
478
+ ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
479
+ goto fail;
480
+ }
481
+
482
+ if (dev->tgt.ops->init_tgt)
483
+ ret = dev->tgt.ops->init_tgt(dev);
484
+
485
+ return ret;
486
+ fail:
487
+ close(dev->fds[0]);
488
+ return ret;
489
+ }
490
+
491
+ static void ublk_dev_unprep(struct ublk_dev *dev)
492
+ {
493
+ if (dev->tgt.ops->deinit_tgt)
494
+ dev->tgt.ops->deinit_tgt(dev);
495
+ close(dev->fds[0]);
496
+ }
497
+
498
+ static int ublk_queue_io_cmd(struct ublk_queue *q,
499
+ struct ublk_io *io, unsigned tag)
500
+ {
501
+ struct ublksrv_io_cmd *cmd;
502
+ struct io_uring_sqe *sqe;
503
+ unsigned int cmd_op = 0;
504
+ __u64 user_data;
505
+
506
+ /* only freed io can be issued */
507
+ if (!(io->flags & UBLKSRV_IO_FREE))
508
+ return 0;
509
+
510
+ /* we issue because we need either fetching or committing */
511
+ if (!(io->flags &
512
+ (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
513
+ return 0;
514
+
515
+ if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
516
+ cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
517
+ else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
518
+ cmd_op = UBLK_U_IO_FETCH_REQ;
519
+
520
+ sqe = io_uring_get_sqe(&q->ring);
521
+ if (!sqe) {
522
+ ublk_err("%s: run out of sqe %d, tag %d\n",
523
+ __func__, q->q_id, tag);
524
+ return -1;
525
+ }
526
+
527
+ cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
528
+
529
+ if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
530
+ cmd->result = io->result;
531
+
532
+ /* These fields should be written once, never change */
533
+ ublk_set_sqe_cmd_op(sqe, cmd_op);
534
+ sqe->fd = 0; /* dev->fds[0] */
535
+ sqe->opcode = IORING_OP_URING_CMD;
536
+ sqe->flags = IOSQE_FIXED_FILE;
537
+ sqe->rw_flags = 0;
538
+ cmd->tag = tag;
539
+ cmd->addr = (__u64) (uintptr_t) io->buf_addr;
540
+ cmd->q_id = q->q_id;
541
+
542
+ user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
543
+ io_uring_sqe_set_data64(sqe, user_data);
544
+
545
+ io->flags = 0;
546
+
547
+ q->cmd_inflight += 1;
548
+
549
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
550
+ __func__, q->q_id, tag, cmd_op,
551
+ io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
552
+ return 1;
553
+ }
554
+
555
+ static int ublk_complete_io(struct ublk_queue *q,
556
+ unsigned tag, int res)
557
+ {
558
+ struct ublk_io *io = &q->ios[tag];
559
+
560
+ ublk_mark_io_done(io, res);
561
+
562
+ return ublk_queue_io_cmd(q, io, tag);
563
+ }
564
+
565
+ static void ublk_submit_fetch_commands(struct ublk_queue *q)
566
+ {
567
+ int i = 0;
568
+
569
+ for (i = 0; i < q->q_depth; i++)
570
+ ublk_queue_io_cmd(q, &q->ios[i], i);
571
+ }
572
+
573
+ static int ublk_queue_is_idle(struct ublk_queue *q)
574
+ {
575
+ return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
576
+ }
577
+
578
+ static int ublk_queue_is_done(struct ublk_queue *q)
579
+ {
580
+ return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
581
+ }
582
+
583
+ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
584
+ struct io_uring_cqe *cqe)
585
+ {
586
+ unsigned tag = user_data_to_tag(cqe->user_data);
587
+
588
+ if (cqe->res < 0 && cqe->res != -EAGAIN)
589
+ ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
590
+ __func__, cqe->res, q->q_id,
591
+ user_data_to_tag(cqe->user_data),
592
+ user_data_to_op(cqe->user_data));
593
+
594
+ if (q->tgt_ops->tgt_io_done)
595
+ q->tgt_ops->tgt_io_done(q, tag, cqe);
596
+ }
597
+
598
+ static void ublk_handle_cqe(struct io_uring *r,
599
+ struct io_uring_cqe *cqe, void *data)
600
+ {
601
+ struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
602
+ unsigned tag = user_data_to_tag(cqe->user_data);
603
+ unsigned cmd_op = user_data_to_op(cqe->user_data);
604
+ int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
605
+ !(q->state & UBLKSRV_QUEUE_STOPPING);
606
+ struct ublk_io *io;
607
+
608
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
609
+ __func__, cqe->res, q->q_id, tag, cmd_op,
610
+ is_target_io(cqe->user_data),
611
+ (q->state & UBLKSRV_QUEUE_STOPPING));
612
+
613
+ /* Don't retrieve io in case of target io */
614
+ if (is_target_io(cqe->user_data)) {
615
+ ublksrv_handle_tgt_cqe(q, cqe);
616
+ return;
617
+ }
618
+
619
+ io = &q->ios[tag];
620
+ q->cmd_inflight--;
621
+
622
+ if (!fetch) {
623
+ q->state |= UBLKSRV_QUEUE_STOPPING;
624
+ io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
625
+ }
626
+
627
+ if (cqe->res == UBLK_IO_RES_OK) {
628
+ assert(tag < q->q_depth);
629
+ q->tgt_ops->queue_io(q, tag);
630
+ } else {
631
+ /*
632
+ * COMMIT_REQ will be completed immediately since no fetching
633
+ * piggyback is required.
634
+ *
635
+ * Marking IO_FREE only, then this io won't be issued since
636
+ * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
637
+ *
638
+ * */
639
+ io->flags = UBLKSRV_IO_FREE;
640
+ }
641
+ }
642
+
643
+ static int ublk_reap_events_uring(struct io_uring *r)
644
+ {
645
+ struct io_uring_cqe *cqe;
646
+ unsigned head;
647
+ int count = 0;
648
+
649
+ io_uring_for_each_cqe(r, head, cqe) {
650
+ ublk_handle_cqe(r, cqe, NULL);
651
+ count += 1;
652
+ }
653
+ io_uring_cq_advance(r, count);
654
+
655
+ return count;
656
+ }
657
+
658
+ static int ublk_process_io(struct ublk_queue *q)
659
+ {
660
+ int ret, reapped;
661
+
662
+ ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
663
+ q->dev->dev_info.dev_id,
664
+ q->q_id, io_uring_sq_ready(&q->ring),
665
+ q->cmd_inflight,
666
+ (q->state & UBLKSRV_QUEUE_STOPPING));
667
+
668
+ if (ublk_queue_is_done(q))
669
+ return -ENODEV;
670
+
671
+ ret = io_uring_submit_and_wait(&q->ring, 1);
672
+ reapped = ublk_reap_events_uring(&q->ring);
673
+
674
+ ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
675
+ ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
676
+ (q->state & UBLKSRV_QUEUE_IDLE));
677
+
678
+ return reapped;
679
+ }
680
+
681
+ static void *ublk_io_handler_fn(void *data)
682
+ {
683
+ struct ublk_queue *q = data;
684
+ int dev_id = q->dev->dev_info.dev_id;
685
+ int ret;
686
+
687
+ ret = ublk_queue_init(q);
688
+ if (ret) {
689
+ ublk_err("ublk dev %d queue %d init queue failed\n",
690
+ dev_id, q->q_id);
691
+ return NULL;
692
+ }
693
+ ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
694
+ q->tid, dev_id, q->q_id);
695
+
696
+ /* submit all io commands to ublk driver */
697
+ ublk_submit_fetch_commands(q);
698
+ do {
699
+ if (ublk_process_io(q) < 0)
700
+ break;
701
+ } while (1);
702
+
703
+ ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
704
+ ublk_queue_deinit(q);
705
+ return NULL;
706
+ }
707
+
708
+ static void ublk_set_parameters(struct ublk_dev *dev)
709
+ {
710
+ int ret;
711
+
712
+ ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
713
+ if (ret)
714
+ ublk_err("dev %d set basic parameter failed %d\n",
715
+ dev->dev_info.dev_id, ret);
716
+ }
717
+
718
+ static int ublk_start_daemon(struct ublk_dev *dev)
719
+ {
720
+ int ret, i;
721
+ void *thread_ret;
722
+ const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
723
+
724
+ if (daemon(1, 1) < 0)
725
+ return -errno;
726
+
727
+ ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
728
+
729
+ ret = ublk_dev_prep(dev);
730
+ if (ret)
731
+ return ret;
732
+
733
+ for (i = 0; i < dinfo->nr_hw_queues; i++) {
734
+ dev->q[i].dev = dev;
735
+ dev->q[i].q_id = i;
736
+ pthread_create(&dev->q[i].thread, NULL,
737
+ ublk_io_handler_fn,
738
+ &dev->q[i]);
739
+ }
740
+
741
+ /* everything is fine now, start us */
742
+ ublk_set_parameters(dev);
743
+ ret = ublk_ctrl_start_dev(dev, getpid());
744
+ if (ret < 0) {
745
+ ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
746
+ goto fail;
747
+ }
748
+
749
+ /* wait until we are terminated */
750
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
751
+ pthread_join(dev->q[i].thread, &thread_ret);
752
+ fail:
753
+ ublk_dev_unprep(dev);
754
+ ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
755
+
756
+ return ret;
757
+ }
758
+
759
+ static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
760
+ {
761
+ #define EV_SIZE (sizeof(struct inotify_event))
762
+ #define EV_BUF_LEN (128 * (EV_SIZE + 16))
763
+ struct pollfd pfd;
764
+ int fd, wd;
765
+ int ret = -EINVAL;
766
+
767
+ fd = inotify_init();
768
+ if (fd < 0) {
769
+ ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
770
+ return fd;
771
+ }
772
+
773
+ wd = inotify_add_watch(fd, "/dev", evt_mask);
774
+ if (wd == -1) {
775
+ ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
776
+ goto fail;
777
+ }
778
+
779
+ pfd.fd = fd;
780
+ pfd.events = POLL_IN;
781
+ while (1) {
782
+ int i = 0;
783
+ char buffer[EV_BUF_LEN];
784
+ ret = poll(&pfd, 1, 1000 * timeout);
785
+
786
+ if (ret == -1) {
787
+ ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
788
+ goto rm_watch;
789
+ } else if (ret == 0) {
790
+ ublk_err("%s: poll inotify timeout\n", __func__);
791
+ ret = -ENOENT;
792
+ goto rm_watch;
793
+ }
794
+
795
+ ret = read(fd, buffer, EV_BUF_LEN);
796
+ if (ret < 0) {
797
+ ublk_err("%s: read inotify fd failed\n", __func__);
798
+ goto rm_watch;
799
+ }
800
+
801
+ while (i < ret) {
802
+ struct inotify_event *event = (struct inotify_event *)&buffer[i];
803
+
804
+ ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
805
+ __func__, event->mask, event->name);
806
+ if (event->mask & evt_mask) {
807
+ if (!strcmp(event->name, dev_name)) {
808
+ ret = 0;
809
+ goto rm_watch;
810
+ }
811
+ }
812
+ i += EV_SIZE + event->len;
813
+ }
814
+ }
815
+ rm_watch:
816
+ inotify_rm_watch(fd, wd);
817
+ fail:
818
+ close(fd);
819
+ return ret;
820
+ }
821
+
822
+ static int ublk_stop_io_daemon(const struct ublk_dev *dev)
823
+ {
824
+ int daemon_pid = dev->dev_info.ublksrv_pid;
825
+ int dev_id = dev->dev_info.dev_id;
826
+ char ublkc[64];
827
+ int ret;
828
+
829
+ /*
830
+ * Wait until ublk char device is closed, when our daemon is shutdown
831
+ */
832
+ snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
833
+ ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
834
+ waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
835
+ ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
836
+ __func__, daemon_pid, dev_id, ret);
837
+
838
+ return ret;
839
+ }
840
+
841
+ static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
842
+ unsigned depth)
843
+ {
844
+ const struct ublk_tgt_ops *ops;
845
+ struct ublksrv_ctrl_dev_info *info;
846
+ struct ublk_dev *dev;
847
+ int dev_id = *exp_id;
848
+ char ublkb[64];
849
+ int ret;
850
+
851
+ ops = ublk_find_tgt(tgt_type);
852
+ if (!ops) {
853
+ ublk_err("%s: no such tgt type, type %s\n",
854
+ __func__, tgt_type);
855
+ return -ENODEV;
856
+ }
857
+
858
+ if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
859
+ ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
860
+ __func__, nr_queues, depth);
861
+ return -EINVAL;
862
+ }
863
+
864
+ dev = ublk_ctrl_init();
865
+ if (!dev) {
866
+ ublk_err("%s: can't alloc dev id %d, type %s\n",
867
+ __func__, dev_id, tgt_type);
868
+ return -ENOMEM;
869
+ }
870
+
871
+ info = &dev->dev_info;
872
+ info->dev_id = dev_id;
873
+ info->nr_hw_queues = nr_queues;
874
+ info->queue_depth = depth;
875
+ dev->tgt.ops = ops;
876
+
877
+ ret = ublk_ctrl_add_dev(dev);
878
+ if (ret < 0) {
879
+ ublk_err("%s: can't add dev id %d, type %s ret %d\n",
880
+ __func__, dev_id, tgt_type, ret);
881
+ goto fail;
882
+ }
883
+
884
+ switch (fork()) {
885
+ case -1:
886
+ goto fail;
887
+ case 0:
888
+ ublk_start_daemon(dev);
889
+ return 0;
890
+ }
891
+
892
+ /*
893
+ * Wait until ublk disk is added, when our daemon is started
894
+ * successfully
895
+ */
896
+ snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
897
+ ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
898
+ if (ret < 0) {
899
+ ublk_err("%s: can't start daemon id %d, type %s\n",
900
+ __func__, dev_id, tgt_type);
901
+ ublk_ctrl_del_dev(dev);
902
+ } else {
903
+ *exp_id = dev->dev_info.dev_id;
904
+ }
905
+ fail:
906
+ ublk_ctrl_deinit(dev);
907
+ return ret;
908
+ }
909
+
910
+ static int cmd_dev_del_by_kill(int number)
911
+ {
912
+ struct ublk_dev *dev;
913
+ int ret;
914
+
915
+ dev = ublk_ctrl_init();
916
+ dev->dev_info.dev_id = number;
917
+
918
+ ret = ublk_ctrl_get_info(dev);
919
+ if (ret < 0)
920
+ goto fail;
921
+
922
+ /* simulate one ublk daemon panic */
923
+ kill(dev->dev_info.ublksrv_pid, 9);
924
+
925
+ ret = ublk_stop_io_daemon(dev);
926
+ if (ret < 0)
927
+ ublk_err("%s: can't stop daemon id %d\n", __func__, number);
928
+ ublk_ctrl_del_dev(dev);
929
+ fail:
930
+ if (ret >= 0)
931
+ ret = ublk_ctrl_get_info(dev);
932
+ ublk_ctrl_deinit(dev);
933
+
934
+ return (ret != 0) ? 0 : -EIO;
935
+ }
936
+
937
+ /****************** part 2: target implementation ********************/
938
+
939
+ static int ublk_null_tgt_init(struct ublk_dev *dev)
940
+ {
941
+ const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
942
+ unsigned long dev_size = 250UL << 30;
943
+
944
+ dev->tgt.dev_size = dev_size;
945
+ dev->tgt.params = (struct ublk_params) {
946
+ .types = UBLK_PARAM_TYPE_BASIC,
947
+ .basic = {
948
+ .logical_bs_shift = 9,
949
+ .physical_bs_shift = 12,
950
+ .io_opt_shift = 12,
951
+ .io_min_shift = 9,
952
+ .max_sectors = info->max_io_buf_bytes >> 9,
953
+ .dev_sectors = dev_size >> 9,
954
+ },
955
+ };
956
+
957
+ return 0;
958
+ }
959
+
960
+ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
961
+ {
962
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
963
+
964
+ ublk_complete_io(q, tag, iod->nr_sectors << 9);
965
+
966
+ return 0;
967
+ }
968
+
969
+ static const struct ublk_tgt_ops tgt_ops_list[] = {
970
+ {
971
+ .name = "null",
972
+ .init_tgt = ublk_null_tgt_init,
973
+ .queue_io = ublk_null_queue_io,
974
+ },
975
+ };
976
+
977
+ static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
978
+ {
979
+ const struct ublk_tgt_ops *ops;
980
+ int i;
981
+
982
+ if (name == NULL)
983
+ return NULL;
984
+
985
+ for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
986
+ if (strcmp(tgt_ops_list[i].name, name) == 0)
987
+ return &tgt_ops_list[i];
988
+ return NULL;
989
+ }
990
+
991
+
992
+ /****************** part 3: IO test over ublk disk ********************/
993
+
994
+ #include "helpers.h"
995
+ #include "liburing.h"
996
+ #define BS 4096
997
+ #define BUFFERS 128
998
+
999
+ struct io_ctx {
1000
+ int dev_id;
1001
+ int write;
1002
+ int seq;
1003
+
1004
+ /* output */
1005
+ int res;
1006
+ pthread_t handle;
1007
+ };
1008
+
1009
+ static int __test_io(struct io_uring *ring, int fd, int write,
1010
+ int seq, struct iovec *vecs, int exp_len, off_t start)
1011
+ {
1012
+ struct io_uring_sqe *sqe;
1013
+ struct io_uring_cqe *cqe;
1014
+ int i, ret;
1015
+ off_t offset;
1016
+
1017
+ offset = start;
1018
+ for (i = 0; i < BUFFERS; i++) {
1019
+ sqe = io_uring_get_sqe(ring);
1020
+ if (!sqe) {
1021
+ fprintf(stderr, "sqe get failed\n");
1022
+ goto err;
1023
+ }
1024
+ if (!seq)
1025
+ offset = start + BS * (rand() % BUFFERS);
1026
+ if (write) {
1027
+ io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
1028
+ vecs[i].iov_len,
1029
+ offset, i);
1030
+ } else {
1031
+ io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
1032
+ vecs[i].iov_len,
1033
+ offset, i);
1034
+ }
1035
+ sqe->user_data = i;
1036
+ if (seq)
1037
+ offset += BS;
1038
+ }
1039
+
1040
+ ret = io_uring_submit(ring);
1041
+ if (ret != BUFFERS) {
1042
+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
1043
+ goto err;
1044
+ }
1045
+
1046
+ for (i = 0; i < BUFFERS; i++) {
1047
+ ret = io_uring_wait_cqe(ring, &cqe);
1048
+ if (ret) {
1049
+ fprintf(stderr, "wait_cqe=%d\n", ret);
1050
+ goto err;
1051
+ }
1052
+ if (exp_len == -1) {
1053
+ int iov_len = vecs[cqe->user_data].iov_len;
1054
+
1055
+ if (cqe->res != iov_len) {
1056
+ fprintf(stderr, "cqe res %d, wanted %d\n",
1057
+ cqe->res, iov_len);
1058
+ goto err;
1059
+ }
1060
+ } else if (cqe->res != exp_len) {
1061
+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
1062
+ goto err;
1063
+ }
1064
+ io_uring_cqe_seen(ring, cqe);
1065
+ }
1066
+
1067
+ return 0;
1068
+ err:
1069
+ return 1;
1070
+ }
1071
+
1072
+ /* Run IO over ublk block device */
1073
+ static int test_io(struct io_ctx *ctx)
1074
+ {
1075
+ struct io_uring ring;
1076
+ int ret, ring_flags = 0;
1077
+ char buf[256];
1078
+ int fd = -1;
1079
+ off_t offset = 0;
1080
+ unsigned long long bytes;
1081
+ int open_flags = O_DIRECT;
1082
+ struct iovec *vecs = t_create_buffers(BUFFERS, BS);
1083
+
1084
+ ret = t_create_ring(BUFFERS, &ring, ring_flags);
1085
+ if (ret == T_SETUP_SKIP)
1086
+ return 0;
1087
+ if (ret != T_SETUP_OK) {
1088
+ fprintf(stderr, "ring create failed: %d\n", ret);
1089
+ return 1;
1090
+ }
1091
+
1092
+ snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
1093
+
1094
+ if (ctx->write)
1095
+ open_flags |= O_WRONLY;
1096
+ else
1097
+ open_flags |= O_RDONLY;
1098
+ fd = open(buf, open_flags);
1099
+ if (fd < 0) {
1100
+ if (errno == EINVAL)
1101
+ return 0;
1102
+ return 1;
1103
+ }
1104
+
1105
+ if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
1106
+ return 1;
1107
+
1108
+ ret = t_register_buffers(&ring, vecs, BUFFERS);
1109
+ if (ret == T_SETUP_SKIP)
1110
+ return 0;
1111
+ if (ret != T_SETUP_OK) {
1112
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
1113
+ return 1;
1114
+ }
1115
+
1116
+ for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
1117
+ ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
1118
+ offset);
1119
+ if (ret != T_SETUP_OK) {
1120
+ fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
1121
+ ctx->dev_id, (unsigned long) offset, ret);
1122
+ break;
1123
+ }
1124
+ }
1125
+
1126
+ close(fd);
1127
+ io_uring_unregister_buffers(&ring);
1128
+ io_uring_queue_exit(&ring);
1129
+
1130
+ return ret;
1131
+ }
1132
+
1133
+ static void *test_io_fn(void *data)
1134
+ {
1135
+ struct io_ctx *ctx = data;
1136
+
1137
+ ctx->res = test_io(ctx);
1138
+
1139
+ return data;
1140
+ }
1141
+
1142
+ static void ignore_stderr(void)
1143
+ {
1144
+ int devnull = open("/dev/null", O_WRONLY);
1145
+
1146
+ if (devnull >= 0) {
1147
+ dup2(devnull, fileno(stderr));
1148
+ close(devnull);
1149
+ }
1150
+ }
1151
+
1152
+ static int test_io_worker(int dev_id)
1153
+ {
1154
+ const int nr_jobs = 4;
1155
+ struct io_ctx ctx[nr_jobs];
1156
+ int i, ret = 0;
1157
+
1158
+ for (i = 0; i < nr_jobs; i++) {
1159
+ ctx[i].dev_id = dev_id;
1160
+ ctx[i].write = (i & 0x1) ? 0 : 1;
1161
+ ctx[i].seq = 1;
1162
+
1163
+ pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
1164
+ }
1165
+
1166
+ for (i = 0; i < nr_jobs; i++) {
1167
+ pthread_join(ctx[i].handle, NULL);
1168
+
1169
+ if (!ret && ctx[i].res)
1170
+ ret = ctx[i].res;
1171
+ }
1172
+
1173
+ return ret;
1174
+ }
1175
+
1176
+ /*
1177
+ * Run IO over created ublk device, meantime delete this ublk device
1178
+ *
1179
+ * Cover cancellable uring_cmd
1180
+ * */
1181
+ static int test_del_ublk_with_io(void)
1182
+ {
1183
+ const unsigned wait_ms = 200;
1184
+ char *tgt_type = "null";
1185
+ int dev_id = -1;
1186
+ int ret, pid;
1187
+
1188
+ ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
1189
+ if (ret != T_SETUP_OK) {
1190
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
1191
+ return T_EXIT_FAIL;
1192
+ }
1193
+
1194
+ switch ((pid = fork())) {
1195
+ case -1:
1196
+ fprintf(stderr, "fork failed\n");
1197
+ return T_EXIT_FAIL;
1198
+ case 0:
1199
+ /* io error is expected since the parent is killing ublk */
1200
+ ignore_stderr();
1201
+ test_io_worker(dev_id);
1202
+ return 0;
1203
+ default:
1204
+ /*
1205
+ * Wait a little while until ublk IO pipeline is warm up,
1206
+ * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
1207
+ *
1208
+ * cancellable uring_cmd code path can be covered in this way.
1209
+ */
1210
+ usleep(wait_ms * 1000);
1211
+ ret = cmd_dev_del_by_kill(dev_id);
1212
+ waitpid(pid, NULL, 0);
1213
+ return ret;
1214
+ }
1215
+ }
1216
+
1217
+ int main(int argc, char *argv[])
1218
+ {
1219
+ const int nr_loop = 4;
1220
+ struct ublk_dev *dev;
1221
+ __u64 features;
1222
+ int ret, i;
1223
+
1224
+ if (argc > 1)
1225
+ return T_EXIT_SKIP;
1226
+
1227
+ dev = ublk_ctrl_init();
1228
+ /* ublk isn't supported or the module isn't loaded */
1229
+ if (!dev)
1230
+ return T_EXIT_SKIP;
1231
+
1232
+ /* kernel doesn't support get_features */
1233
+ ret = ublk_ctrl_get_features(dev, &features);
1234
+ if (ret < 0)
1235
+ return T_EXIT_SKIP;
1236
+
1237
+ if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
1238
+ return T_EXIT_SKIP;
1239
+
1240
+ for (i = 0; i < nr_loop; i++) {
1241
+ if (test_del_ublk_with_io())
1242
+ return T_EXIT_FAIL;
1243
+ }
1244
+ ublk_ctrl_deinit(dev);
1245
+ return T_EXIT_PASS;
1246
+ }
1247
+ #else
1248
+ int main(int argc, char *argv[])
1249
+ {
1250
+ return T_EXIT_SKIP;
1251
+ }
1252
+ #endif