uringmachine 0.3 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +2 -1
  3. data/CHANGELOG.md +23 -0
  4. data/README.md +128 -0
  5. data/TODO.md +14 -0
  6. data/examples/bm_snooze.rb +89 -0
  7. data/examples/bm_write.rb +56 -0
  8. data/examples/dns_client.rb +12 -0
  9. data/examples/echo_server.rb +18 -40
  10. data/examples/http_server.rb +42 -43
  11. data/examples/inout.rb +19 -0
  12. data/examples/nc.rb +36 -0
  13. data/examples/server_client.rb +64 -0
  14. data/examples/snooze.rb +44 -0
  15. data/examples/write_dev_null.rb +16 -0
  16. data/ext/um/extconf.rb +24 -23
  17. data/ext/um/um.c +524 -278
  18. data/ext/um/um.h +146 -44
  19. data/ext/um/um_buffer.c +49 -0
  20. data/ext/um/um_class.c +217 -106
  21. data/ext/um/um_const.c +213 -0
  22. data/ext/um/um_ext.c +4 -0
  23. data/ext/um/um_mutex_class.c +47 -0
  24. data/ext/um/um_op.c +86 -114
  25. data/ext/um/um_queue_class.c +58 -0
  26. data/ext/um/um_sync.c +273 -0
  27. data/ext/um/um_utils.c +49 -4
  28. data/lib/uringmachine/dns_resolver.rb +84 -0
  29. data/lib/uringmachine/version.rb +1 -1
  30. data/lib/uringmachine.rb +28 -0
  31. data/supressions/ruby.supp +71 -0
  32. data/test/helper.rb +8 -0
  33. data/test/test_um.rb +685 -46
  34. data/vendor/liburing/.github/workflows/build.yml +29 -1
  35. data/vendor/liburing/.gitignore +6 -0
  36. data/vendor/liburing/CHANGELOG +16 -0
  37. data/vendor/liburing/CONTRIBUTING.md +165 -0
  38. data/vendor/liburing/configure +64 -0
  39. data/vendor/liburing/examples/Makefile +9 -1
  40. data/vendor/liburing/examples/kdigest.c +405 -0
  41. data/vendor/liburing/examples/proxy.c +75 -8
  42. data/vendor/liburing/examples/reg-wait.c +159 -0
  43. data/vendor/liburing/liburing.pc.in +1 -1
  44. data/vendor/liburing/liburing.spec +1 -1
  45. data/vendor/liburing/src/Makefile +16 -2
  46. data/vendor/liburing/src/include/liburing/io_uring.h +77 -0
  47. data/vendor/liburing/src/include/liburing/sanitize.h +39 -0
  48. data/vendor/liburing/src/include/liburing.h +59 -6
  49. data/vendor/liburing/src/int_flags.h +10 -3
  50. data/vendor/liburing/src/liburing-ffi.map +16 -0
  51. data/vendor/liburing/src/liburing.map +10 -0
  52. data/vendor/liburing/src/queue.c +28 -16
  53. data/vendor/liburing/src/register.c +106 -1
  54. data/vendor/liburing/src/sanitize.c +176 -0
  55. data/vendor/liburing/src/setup.c +47 -19
  56. data/vendor/liburing/src/setup.h +6 -0
  57. data/vendor/liburing/test/35fa71a030ca.c +7 -0
  58. data/vendor/liburing/test/500f9fbadef8.c +2 -0
  59. data/vendor/liburing/test/7ad0e4b2f83c.c +0 -25
  60. data/vendor/liburing/test/917257daa0fe.c +7 -0
  61. data/vendor/liburing/test/Makefile +38 -4
  62. data/vendor/liburing/test/a0908ae19763.c +7 -0
  63. data/vendor/liburing/test/a4c0b3decb33.c +7 -0
  64. data/vendor/liburing/test/accept.c +14 -4
  65. data/vendor/liburing/test/b19062a56726.c +7 -0
  66. data/vendor/liburing/test/bind-listen.c +2 -2
  67. data/vendor/liburing/test/buf-ring-nommap.c +10 -3
  68. data/vendor/liburing/test/buf-ring.c +2 -0
  69. data/vendor/liburing/test/cmd-discard.c +427 -0
  70. data/vendor/liburing/test/coredump.c +7 -0
  71. data/vendor/liburing/test/cq-overflow.c +13 -1
  72. data/vendor/liburing/test/d4ae271dfaae.c +11 -3
  73. data/vendor/liburing/test/defer-taskrun.c +2 -2
  74. data/vendor/liburing/test/defer-tw-timeout.c +4 -1
  75. data/vendor/liburing/test/defer.c +2 -2
  76. data/vendor/liburing/test/double-poll-crash.c +1 -1
  77. data/vendor/liburing/test/eeed8b54e0df.c +2 -0
  78. data/vendor/liburing/test/eventfd.c +0 -1
  79. data/vendor/liburing/test/exit-no-cleanup.c +11 -0
  80. data/vendor/liburing/test/fadvise.c +9 -26
  81. data/vendor/liburing/test/fdinfo.c +9 -1
  82. data/vendor/liburing/test/fifo-nonblock-read.c +69 -0
  83. data/vendor/liburing/test/file-exit-unreg.c +48 -0
  84. data/vendor/liburing/test/file-register.c +14 -2
  85. data/vendor/liburing/test/file-update.c +1 -1
  86. data/vendor/liburing/test/file-verify.c +27 -16
  87. data/vendor/liburing/test/files-exit-hang-timeout.c +1 -2
  88. data/vendor/liburing/test/fixed-buf-iter.c +3 -1
  89. data/vendor/liburing/test/fixed-hugepage.c +12 -1
  90. data/vendor/liburing/test/fsnotify.c +1 -0
  91. data/vendor/liburing/test/futex.c +16 -4
  92. data/vendor/liburing/test/helpers.c +47 -0
  93. data/vendor/liburing/test/helpers.h +6 -0
  94. data/vendor/liburing/test/init-mem.c +5 -3
  95. data/vendor/liburing/test/io-cancel.c +0 -24
  96. data/vendor/liburing/test/io_uring_passthrough.c +4 -0
  97. data/vendor/liburing/test/io_uring_register.c +38 -8
  98. data/vendor/liburing/test/iopoll-leak.c +4 -0
  99. data/vendor/liburing/test/iopoll-overflow.c +1 -1
  100. data/vendor/liburing/test/iopoll.c +3 -3
  101. data/vendor/liburing/test/kallsyms.c +203 -0
  102. data/vendor/liburing/test/link-timeout.c +159 -0
  103. data/vendor/liburing/test/linked-defer-close.c +224 -0
  104. data/vendor/liburing/test/madvise.c +12 -25
  105. data/vendor/liburing/test/min-timeout-wait.c +0 -25
  106. data/vendor/liburing/test/min-timeout.c +0 -25
  107. data/vendor/liburing/test/mkdir.c +6 -0
  108. data/vendor/liburing/test/msg-ring.c +8 -2
  109. data/vendor/liburing/test/napi-test.c +16 -3
  110. data/vendor/liburing/test/no-mmap-inval.c +3 -1
  111. data/vendor/liburing/test/nop.c +44 -0
  112. data/vendor/liburing/test/ooo-file-unreg.c +1 -1
  113. data/vendor/liburing/test/open-close.c +40 -0
  114. data/vendor/liburing/test/openat2.c +37 -14
  115. data/vendor/liburing/test/poll-many.c +13 -7
  116. data/vendor/liburing/test/poll-mshot-update.c +17 -10
  117. data/vendor/liburing/test/poll-v-poll.c +6 -3
  118. data/vendor/liburing/test/pollfree.c +148 -0
  119. data/vendor/liburing/test/read-mshot-empty.c +158 -153
  120. data/vendor/liburing/test/read-mshot-stdin.c +121 -0
  121. data/vendor/liburing/test/read-mshot.c +282 -27
  122. data/vendor/liburing/test/read-write.c +78 -13
  123. data/vendor/liburing/test/recv-msgall-stream.c +3 -0
  124. data/vendor/liburing/test/recv-msgall.c +5 -0
  125. data/vendor/liburing/test/recvsend_bundle-inc.c +680 -0
  126. data/vendor/liburing/test/recvsend_bundle.c +94 -31
  127. data/vendor/liburing/test/reg-fd-only.c +15 -5
  128. data/vendor/liburing/test/reg-wait.c +251 -0
  129. data/vendor/liburing/test/regbuf-clone.c +645 -0
  130. data/vendor/liburing/test/regbuf-merge.c +7 -0
  131. data/vendor/liburing/test/register-restrictions.c +86 -85
  132. data/vendor/liburing/test/rename.c +59 -1
  133. data/vendor/liburing/test/resize-rings.c +643 -0
  134. data/vendor/liburing/test/ringbuf-read.c +5 -0
  135. data/vendor/liburing/test/ringbuf-status.c +5 -1
  136. data/vendor/liburing/test/rsrc_tags.c +1 -1
  137. data/vendor/liburing/test/runtests.sh +16 -1
  138. data/vendor/liburing/test/send-zerocopy.c +59 -0
  139. data/vendor/liburing/test/short-read.c +1 -0
  140. data/vendor/liburing/test/socket.c +43 -0
  141. data/vendor/liburing/test/splice.c +3 -1
  142. data/vendor/liburing/test/sq-poll-dup.c +1 -1
  143. data/vendor/liburing/test/sq-poll-share.c +2 -0
  144. data/vendor/liburing/test/sqpoll-disable-exit.c +8 -0
  145. data/vendor/liburing/test/sqpoll-exit-hang.c +1 -25
  146. data/vendor/liburing/test/sqpoll-sleep.c +40 -33
  147. data/vendor/liburing/test/sqwait.c +136 -0
  148. data/vendor/liburing/test/statx.c +89 -0
  149. data/vendor/liburing/test/stdout.c +2 -0
  150. data/vendor/liburing/test/submit-and-wait.c +1 -25
  151. data/vendor/liburing/test/submit-reuse.c +4 -26
  152. data/vendor/liburing/test/symlink.c +12 -1
  153. data/vendor/liburing/test/sync-cancel.c +56 -22
  154. data/vendor/liburing/test/thread-exit.c +5 -0
  155. data/vendor/liburing/test/timeout-new.c +1 -26
  156. data/vendor/liburing/test/timeout.c +25 -34
  157. data/vendor/liburing/test/unlink.c +94 -1
  158. data/vendor/liburing/test/uring_cmd_ublk.c +1252 -0
  159. data/vendor/liburing/test/waitid.c +62 -8
  160. data/vendor/liburing/test/wq-aff.c +35 -0
  161. data/vendor/liburing/test/xfail_prep_link_timeout_out_of_scope.c +46 -0
  162. data/vendor/liburing/test/xfail_register_buffers_out_of_scope.c +51 -0
  163. metadata +37 -6
  164. data/examples/event_loop.rb +0 -69
  165. data/examples/fibers.rb +0 -105
  166. data/examples/http_server_multishot.rb +0 -57
  167. data/examples/http_server_simpler.rb +0 -34
@@ -0,0 +1,1252 @@
1
+ /* SPDX-License-Identifier: MIT */
2
+ /*
3
+ * Description: uring_cmd based ublk
4
+ *
5
+ * Covers cancellable uring_cmd feature.
6
+ */
7
+ #include <unistd.h>
8
+ #include <stdlib.h>
9
+ #include <assert.h>
10
+ #include <stdio.h>
11
+ #include <stdarg.h>
12
+ #include <string.h>
13
+ #include <pthread.h>
14
+ #include <limits.h>
15
+ #include <poll.h>
16
+ #include <sys/syscall.h>
17
+ #include <sys/mman.h>
18
+ #include <sys/ioctl.h>
19
+ #include <sys/inotify.h>
20
+ #include <sys/wait.h>
21
+
22
+ #include "liburing.h"
23
+ #include "helpers.h"
24
+ #ifdef CONFIG_HAVE_UBLK_HEADER
25
+ #include <linux/ublk_cmd.h>
26
+
27
+ /****************** part 1: libublk ********************/
28
+
29
+ #define CTRL_DEV "/dev/ublk-control"
30
+ #define UBLKC_DEV "/dev/ublkc"
31
+ #define UBLKB_DEV "/dev/ublkb"
32
+ #define UBLK_CTRL_RING_DEPTH 32
33
+
34
+ /* queue idle timeout */
35
+ #define UBLKSRV_IO_IDLE_SECS 20
36
+
37
+ #define UBLK_IO_MAX_BYTES 65536
38
+ #define UBLK_MAX_QUEUES 4
39
+ #define UBLK_QUEUE_DEPTH 128
40
+
41
+ #define UBLK_DBG_DEV (1U << 0)
42
+ #define UBLK_DBG_QUEUE (1U << 1)
43
+ #define UBLK_DBG_IO_CMD (1U << 2)
44
+ #define UBLK_DBG_IO (1U << 3)
45
+ #define UBLK_DBG_CTRL_CMD (1U << 4)
46
+ #define UBLK_LOG (1U << 5)
47
+
48
+ struct ublk_dev;
49
+ struct ublk_queue;
50
+
51
+ struct ublk_ctrl_cmd_data {
52
+ __u32 cmd_op;
53
+ #define CTRL_CMD_HAS_DATA 1
54
+ #define CTRL_CMD_HAS_BUF 2
55
+ __u32 flags;
56
+
57
+ __u64 data[2];
58
+ __u64 addr;
59
+ __u32 len;
60
+ };
61
+
62
+ struct ublk_io {
63
+ char *buf_addr;
64
+
65
+ #define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
66
+ #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
67
+ #define UBLKSRV_IO_FREE (1UL << 2)
68
+ unsigned int flags;
69
+
70
+ unsigned int result;
71
+ };
72
+
73
+ struct ublk_tgt_ops {
74
+ const char *name;
75
+ int (*init_tgt)(struct ublk_dev *);
76
+ void (*deinit_tgt)(struct ublk_dev *);
77
+
78
+ int (*queue_io)(struct ublk_queue *, int tag);
79
+ void (*tgt_io_done)(struct ublk_queue *,
80
+ int tag, const struct io_uring_cqe *);
81
+ };
82
+
83
+ struct ublk_tgt {
84
+ unsigned long dev_size;
85
+ const struct ublk_tgt_ops *ops;
86
+ struct ublk_params params;
87
+ };
88
+
89
+ struct ublk_queue {
90
+ int q_id;
91
+ int q_depth;
92
+ unsigned int cmd_inflight;
93
+ unsigned int io_inflight;
94
+ struct ublk_dev *dev;
95
+ const struct ublk_tgt_ops *tgt_ops;
96
+ char *io_cmd_buf;
97
+ struct io_uring ring;
98
+ struct ublk_io ios[UBLK_QUEUE_DEPTH];
99
+ #define UBLKSRV_QUEUE_STOPPING (1U << 0)
100
+ #define UBLKSRV_QUEUE_IDLE (1U << 1)
101
+ unsigned state;
102
+ pid_t tid;
103
+ pthread_t thread;
104
+ };
105
+
106
+ struct ublk_dev {
107
+ struct ublk_tgt tgt;
108
+ struct ublksrv_ctrl_dev_info dev_info;
109
+ struct ublk_queue q[UBLK_MAX_QUEUES];
110
+
111
+ int fds[2]; /* fds[0] points to /dev/ublkcN */
112
+ int nr_fds;
113
+ int ctrl_fd;
114
+ struct io_uring ring;
115
+ };
116
+
117
+ #ifndef offsetof
118
+ #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
119
+ #endif
120
+
121
+ #ifndef container_of
122
+ #define container_of(ptr, type, member) ({ \
123
+ unsigned long __mptr = (unsigned long)(ptr); \
124
+ ((type *)(__mptr - offsetof(type, member))); })
125
+ #endif
126
+
127
+ #define round_up(val, rnd) \
128
+ (((val) + ((rnd) - 1)) & ~((rnd) - 1))
129
+
130
+ static unsigned int ublk_dbg_mask = 0;
131
+
132
+ static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
133
+
134
+ static inline int is_target_io(__u64 user_data)
135
+ {
136
+ return (user_data & (1ULL << 63)) != 0;
137
+ }
138
+
139
+ static inline __u64 build_user_data(unsigned tag, unsigned op,
140
+ unsigned tgt_data, unsigned is_target_io)
141
+ {
142
+ assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
143
+
144
+ return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
145
+ }
146
+
147
+ static inline unsigned int user_data_to_tag(__u64 user_data)
148
+ {
149
+ return user_data & 0xffff;
150
+ }
151
+
152
+ static inline unsigned int user_data_to_op(__u64 user_data)
153
+ {
154
+ return (user_data >> 16) & 0xff;
155
+ }
156
+
157
+ static void ublk_err(const char *fmt, ...)
158
+ {
159
+ va_list ap;
160
+
161
+ va_start(ap, fmt);
162
+ vfprintf(stderr, fmt, ap);
163
+ }
164
+
165
+ static void ublk_dbg(int level, const char *fmt, ...)
166
+ {
167
+ if (level & ublk_dbg_mask) {
168
+ va_list ap;
169
+ va_start(ap, fmt);
170
+ vfprintf(stdout, fmt, ap);
171
+ }
172
+ }
173
+
174
+ static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
175
+ {
176
+ return (void *)&sqe->cmd;
177
+ }
178
+
179
+ static inline void ublk_mark_io_done(struct ublk_io *io, int res)
180
+ {
181
+ io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
182
+ io->result = res;
183
+ }
184
+
185
+ static inline const struct ublksrv_io_desc *ublk_get_iod(
186
+ const struct ublk_queue *q, int tag)
187
+ {
188
+ return (struct ublksrv_io_desc *)
189
+ &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
190
+ }
191
+
192
+ static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
193
+ __u32 cmd_op)
194
+ {
195
+ __u32 *addr = (__u32 *)&sqe->off;
196
+
197
+ addr[0] = cmd_op;
198
+ addr[1] = 0;
199
+ }
200
+
201
+ static inline int ublk_setup_ring(struct io_uring *r, int depth,
202
+ int cq_depth, unsigned flags)
203
+ {
204
+ struct io_uring_params p;
205
+
206
+ memset(&p, 0, sizeof(p));
207
+ p.flags = flags | IORING_SETUP_CQSIZE;
208
+ p.cq_entries = cq_depth;
209
+
210
+ return io_uring_queue_init_params(depth, r, &p);
211
+ }
212
+
213
+ static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
214
+ struct io_uring_sqe *sqe,
215
+ struct ublk_ctrl_cmd_data *data)
216
+ {
217
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
218
+ struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
219
+
220
+ sqe->fd = dev->ctrl_fd;
221
+ sqe->opcode = IORING_OP_URING_CMD;
222
+ sqe->ioprio = 0;
223
+
224
+ if (data->flags & CTRL_CMD_HAS_BUF) {
225
+ cmd->addr = data->addr;
226
+ cmd->len = data->len;
227
+ }
228
+
229
+ if (data->flags & CTRL_CMD_HAS_DATA)
230
+ cmd->data[0] = data->data[0];
231
+
232
+ cmd->dev_id = info->dev_id;
233
+ cmd->queue_id = -1;
234
+
235
+ ublk_set_sqe_cmd_op(sqe, data->cmd_op);
236
+
237
+ io_uring_sqe_set_data(sqe, cmd);
238
+ }
239
+
240
+ static int __ublk_ctrl_cmd(struct ublk_dev *dev,
241
+ struct ublk_ctrl_cmd_data *data)
242
+ {
243
+ struct io_uring_sqe *sqe;
244
+ struct io_uring_cqe *cqe;
245
+ int ret = -EINVAL;
246
+
247
+ sqe = io_uring_get_sqe(&dev->ring);
248
+ if (!sqe) {
249
+ ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
250
+ return ret;
251
+ }
252
+
253
+ ublk_ctrl_init_cmd(dev, sqe, data);
254
+
255
+ ret = io_uring_submit(&dev->ring);
256
+ if (ret < 0) {
257
+ ublk_err("uring submit ret %d\n", ret);
258
+ return ret;
259
+ }
260
+
261
+ ret = io_uring_wait_cqe(&dev->ring, &cqe);
262
+ if (ret < 0) {
263
+ ublk_err("wait cqe: %s\n", strerror(-ret));
264
+ return ret;
265
+ }
266
+ io_uring_cqe_seen(&dev->ring, cqe);
267
+
268
+ return cqe->res;
269
+ }
270
+
271
+ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
272
+ int daemon_pid)
273
+ {
274
+ struct ublk_ctrl_cmd_data data = {
275
+ .cmd_op = UBLK_U_CMD_START_DEV,
276
+ .flags = CTRL_CMD_HAS_DATA,
277
+ };
278
+
279
+ dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
280
+
281
+ return __ublk_ctrl_cmd(dev, &data);
282
+ }
283
+
284
+ static int ublk_ctrl_add_dev(struct ublk_dev *dev)
285
+ {
286
+ struct ublk_ctrl_cmd_data data = {
287
+ .cmd_op = UBLK_U_CMD_ADD_DEV,
288
+ .flags = CTRL_CMD_HAS_BUF,
289
+ .addr = (__u64) (uintptr_t) &dev->dev_info,
290
+ .len = sizeof(struct ublksrv_ctrl_dev_info),
291
+ };
292
+
293
+ return __ublk_ctrl_cmd(dev, &data);
294
+ }
295
+
296
+ static int ublk_ctrl_del_dev(struct ublk_dev *dev)
297
+ {
298
+ struct ublk_ctrl_cmd_data data = {
299
+ .cmd_op = UBLK_U_CMD_DEL_DEV,
300
+ .flags = 0,
301
+ };
302
+
303
+ return __ublk_ctrl_cmd(dev, &data);
304
+ }
305
+
306
+ static int ublk_ctrl_get_info(struct ublk_dev *dev)
307
+ {
308
+ struct ublk_ctrl_cmd_data data = {
309
+ .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
310
+ .flags = CTRL_CMD_HAS_BUF,
311
+ .addr = (__u64) (uintptr_t) &dev->dev_info,
312
+ .len = sizeof(struct ublksrv_ctrl_dev_info),
313
+ };
314
+
315
+ return __ublk_ctrl_cmd(dev, &data);
316
+ }
317
+
318
+ static int ublk_ctrl_set_params(struct ublk_dev *dev,
319
+ struct ublk_params *params)
320
+ {
321
+ struct ublk_ctrl_cmd_data data = {
322
+ .cmd_op = UBLK_U_CMD_SET_PARAMS,
323
+ .flags = CTRL_CMD_HAS_BUF,
324
+ .addr = (__u64) (uintptr_t) params,
325
+ .len = sizeof(*params),
326
+ };
327
+ params->len = sizeof(*params);
328
+ return __ublk_ctrl_cmd(dev, &data);
329
+ }
330
+
331
+ static int ublk_ctrl_get_features(struct ublk_dev *dev,
332
+ __u64 *features)
333
+ {
334
+ struct ublk_ctrl_cmd_data data = {
335
+ .cmd_op = UBLK_U_CMD_GET_FEATURES,
336
+ .flags = CTRL_CMD_HAS_BUF,
337
+ .addr = (__u64) (uintptr_t) features,
338
+ .len = sizeof(*features),
339
+ };
340
+
341
+ return __ublk_ctrl_cmd(dev, &data);
342
+ }
343
+
344
+ static void ublk_ctrl_deinit(struct ublk_dev *dev)
345
+ {
346
+ close(dev->ctrl_fd);
347
+ free(dev);
348
+ }
349
+
350
+ static struct ublk_dev *ublk_ctrl_init(void)
351
+ {
352
+ struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
353
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
354
+ int ret;
355
+
356
+ dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
357
+ if (dev->ctrl_fd < 0) {
358
+ free(dev);
359
+ return NULL;
360
+ }
361
+
362
+ info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
363
+
364
+ ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
365
+ UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
366
+ if (ret < 0) {
367
+ ublk_err("queue_init: %s\n", strerror(-ret));
368
+ free(dev);
369
+ return NULL;
370
+ }
371
+ dev->nr_fds = 1;
372
+
373
+ return dev;
374
+ }
375
+
376
+ static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
377
+ {
378
+ int size = q->q_depth * sizeof(struct ublksrv_io_desc);
379
+ unsigned int page_sz = getpagesize();
380
+
381
+ return round_up(size, page_sz);
382
+ }
383
+
384
+ static void ublk_queue_deinit(struct ublk_queue *q)
385
+ {
386
+ int i;
387
+ int nr_ios = q->q_depth;
388
+
389
+ io_uring_unregister_ring_fd(&q->ring);
390
+
391
+ if (q->ring.ring_fd > 0) {
392
+ io_uring_unregister_files(&q->ring);
393
+ close(q->ring.ring_fd);
394
+ q->ring.ring_fd = -1;
395
+ }
396
+
397
+ if (q->io_cmd_buf)
398
+ munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
399
+
400
+ for (i = 0; i < nr_ios; i++)
401
+ free(q->ios[i].buf_addr);
402
+ }
403
+
404
+ static int ublk_queue_init(struct ublk_queue *q)
405
+ {
406
+ struct ublk_dev *dev = q->dev;
407
+ int depth = dev->dev_info.queue_depth;
408
+ int i, ret = -1;
409
+ int cmd_buf_size, io_buf_size;
410
+ unsigned long off;
411
+ int ring_depth = depth, cq_depth = depth;
412
+
413
+ q->tgt_ops = dev->tgt.ops;
414
+ q->state = 0;
415
+ q->q_depth = depth;
416
+ q->cmd_inflight = 0;
417
+ q->tid = gettid();
418
+
419
+ cmd_buf_size = ublk_queue_cmd_buf_sz(q);
420
+ off = UBLKSRV_CMD_BUF_OFFSET +
421
+ q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
422
+ q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
423
+ MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
424
+ if (q->io_cmd_buf == MAP_FAILED) {
425
+ ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
426
+ q->dev->dev_info.dev_id, q->q_id);
427
+ goto fail;
428
+ }
429
+
430
+ io_buf_size = dev->dev_info.max_io_buf_bytes;
431
+ for (i = 0; i < q->q_depth; i++) {
432
+ q->ios[i].buf_addr = NULL;
433
+
434
+ if (posix_memalign((void **)&q->ios[i].buf_addr,
435
+ getpagesize(), io_buf_size)) {
436
+ ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
437
+ dev->dev_info.dev_id, q->q_id, i);
438
+ goto fail;
439
+ }
440
+ q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
441
+ }
442
+
443
+ ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
444
+ IORING_SETUP_COOP_TASKRUN);
445
+ if (ret < 0) {
446
+ ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
447
+ q->dev->dev_info.dev_id, q->q_id, ret);
448
+ goto fail;
449
+ }
450
+
451
+ io_uring_register_ring_fd(&q->ring);
452
+
453
+ ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
454
+ if (ret) {
455
+ ublk_err("ublk dev %d queue %d register files failed %d\n",
456
+ q->dev->dev_info.dev_id, q->q_id, ret);
457
+ goto fail;
458
+ }
459
+
460
+ return 0;
461
+ fail:
462
+ ublk_queue_deinit(q);
463
+ ublk_err("ublk dev %d queue %d failed\n",
464
+ dev->dev_info.dev_id, q->q_id);
465
+ return -ENOMEM;
466
+ }
467
+
468
+ static int ublk_dev_prep(struct ublk_dev *dev)
469
+ {
470
+ int dev_id = dev->dev_info.dev_id;
471
+ char buf[64];
472
+ int ret = 0;
473
+
474
+ snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
475
+ dev->fds[0] = open(buf, O_RDWR);
476
+ if (dev->fds[0] < 0) {
477
+ ret = -EBADF;
478
+ ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
479
+ goto fail;
480
+ }
481
+
482
+ if (dev->tgt.ops->init_tgt)
483
+ ret = dev->tgt.ops->init_tgt(dev);
484
+
485
+ return ret;
486
+ fail:
487
+ close(dev->fds[0]);
488
+ return ret;
489
+ }
490
+
491
+ static void ublk_dev_unprep(struct ublk_dev *dev)
492
+ {
493
+ if (dev->tgt.ops->deinit_tgt)
494
+ dev->tgt.ops->deinit_tgt(dev);
495
+ close(dev->fds[0]);
496
+ }
497
+
498
+ static int ublk_queue_io_cmd(struct ublk_queue *q,
499
+ struct ublk_io *io, unsigned tag)
500
+ {
501
+ struct ublksrv_io_cmd *cmd;
502
+ struct io_uring_sqe *sqe;
503
+ unsigned int cmd_op = 0;
504
+ __u64 user_data;
505
+
506
+ /* only freed io can be issued */
507
+ if (!(io->flags & UBLKSRV_IO_FREE))
508
+ return 0;
509
+
510
+ /* we issue because we need either fetching or committing */
511
+ if (!(io->flags &
512
+ (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
513
+ return 0;
514
+
515
+ if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
516
+ cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
517
+ else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
518
+ cmd_op = UBLK_U_IO_FETCH_REQ;
519
+
520
+ sqe = io_uring_get_sqe(&q->ring);
521
+ if (!sqe) {
522
+ ublk_err("%s: run out of sqe %d, tag %d\n",
523
+ __func__, q->q_id, tag);
524
+ return -1;
525
+ }
526
+
527
+ cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
528
+
529
+ if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
530
+ cmd->result = io->result;
531
+
532
+ /* These fields should be written once, never change */
533
+ ublk_set_sqe_cmd_op(sqe, cmd_op);
534
+ sqe->fd = 0; /* dev->fds[0] */
535
+ sqe->opcode = IORING_OP_URING_CMD;
536
+ sqe->flags = IOSQE_FIXED_FILE;
537
+ sqe->rw_flags = 0;
538
+ cmd->tag = tag;
539
+ cmd->addr = (__u64) (uintptr_t) io->buf_addr;
540
+ cmd->q_id = q->q_id;
541
+
542
+ user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
543
+ io_uring_sqe_set_data64(sqe, user_data);
544
+
545
+ io->flags = 0;
546
+
547
+ q->cmd_inflight += 1;
548
+
549
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
550
+ __func__, q->q_id, tag, cmd_op,
551
+ io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
552
+ return 1;
553
+ }
554
+
555
+ static int ublk_complete_io(struct ublk_queue *q,
556
+ unsigned tag, int res)
557
+ {
558
+ struct ublk_io *io = &q->ios[tag];
559
+
560
+ ublk_mark_io_done(io, res);
561
+
562
+ return ublk_queue_io_cmd(q, io, tag);
563
+ }
564
+
565
+ static void ublk_submit_fetch_commands(struct ublk_queue *q)
566
+ {
567
+ int i = 0;
568
+
569
+ for (i = 0; i < q->q_depth; i++)
570
+ ublk_queue_io_cmd(q, &q->ios[i], i);
571
+ }
572
+
573
+ static int ublk_queue_is_idle(struct ublk_queue *q)
574
+ {
575
+ return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
576
+ }
577
+
578
+ static int ublk_queue_is_done(struct ublk_queue *q)
579
+ {
580
+ return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
581
+ }
582
+
583
+ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
584
+ struct io_uring_cqe *cqe)
585
+ {
586
+ unsigned tag = user_data_to_tag(cqe->user_data);
587
+
588
+ if (cqe->res < 0 && cqe->res != -EAGAIN)
589
+ ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
590
+ __func__, cqe->res, q->q_id,
591
+ user_data_to_tag(cqe->user_data),
592
+ user_data_to_op(cqe->user_data));
593
+
594
+ if (q->tgt_ops->tgt_io_done)
595
+ q->tgt_ops->tgt_io_done(q, tag, cqe);
596
+ }
597
+
598
+ static void ublk_handle_cqe(struct io_uring *r,
599
+ struct io_uring_cqe *cqe, void *data)
600
+ {
601
+ struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
602
+ unsigned tag = user_data_to_tag(cqe->user_data);
603
+ unsigned cmd_op = user_data_to_op(cqe->user_data);
604
+ int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
605
+ !(q->state & UBLKSRV_QUEUE_STOPPING);
606
+ struct ublk_io *io;
607
+
608
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
609
+ __func__, cqe->res, q->q_id, tag, cmd_op,
610
+ is_target_io(cqe->user_data),
611
+ (q->state & UBLKSRV_QUEUE_STOPPING));
612
+
613
+ /* Don't retrieve io in case of target io */
614
+ if (is_target_io(cqe->user_data)) {
615
+ ublksrv_handle_tgt_cqe(q, cqe);
616
+ return;
617
+ }
618
+
619
+ io = &q->ios[tag];
620
+ q->cmd_inflight--;
621
+
622
+ if (!fetch) {
623
+ q->state |= UBLKSRV_QUEUE_STOPPING;
624
+ io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
625
+ }
626
+
627
+ if (cqe->res == UBLK_IO_RES_OK) {
628
+ assert(tag < q->q_depth);
629
+ q->tgt_ops->queue_io(q, tag);
630
+ } else {
631
+ /*
632
+ * COMMIT_REQ will be completed immediately since no fetching
633
+ * piggyback is required.
634
+ *
635
+ * Marking IO_FREE only, then this io won't be issued since
636
+ * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
637
+ *
638
+ * */
639
+ io->flags = UBLKSRV_IO_FREE;
640
+ }
641
+ }
642
+
643
+ static int ublk_reap_events_uring(struct io_uring *r)
644
+ {
645
+ struct io_uring_cqe *cqe;
646
+ unsigned head;
647
+ int count = 0;
648
+
649
+ io_uring_for_each_cqe(r, head, cqe) {
650
+ ublk_handle_cqe(r, cqe, NULL);
651
+ count += 1;
652
+ }
653
+ io_uring_cq_advance(r, count);
654
+
655
+ return count;
656
+ }
657
+
658
+ static int ublk_process_io(struct ublk_queue *q)
659
+ {
660
+ int ret, reapped;
661
+
662
+ ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
663
+ q->dev->dev_info.dev_id,
664
+ q->q_id, io_uring_sq_ready(&q->ring),
665
+ q->cmd_inflight,
666
+ (q->state & UBLKSRV_QUEUE_STOPPING));
667
+
668
+ if (ublk_queue_is_done(q))
669
+ return -ENODEV;
670
+
671
+ ret = io_uring_submit_and_wait(&q->ring, 1);
672
+ reapped = ublk_reap_events_uring(&q->ring);
673
+
674
+ ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
675
+ ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
676
+ (q->state & UBLKSRV_QUEUE_IDLE));
677
+
678
+ return reapped;
679
+ }
680
+
681
+ static void *ublk_io_handler_fn(void *data)
682
+ {
683
+ struct ublk_queue *q = data;
684
+ int dev_id = q->dev->dev_info.dev_id;
685
+ int ret;
686
+
687
+ ret = ublk_queue_init(q);
688
+ if (ret) {
689
+ ublk_err("ublk dev %d queue %d init queue failed\n",
690
+ dev_id, q->q_id);
691
+ return NULL;
692
+ }
693
+ ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
694
+ q->tid, dev_id, q->q_id);
695
+
696
+ /* submit all io commands to ublk driver */
697
+ ublk_submit_fetch_commands(q);
698
+ do {
699
+ if (ublk_process_io(q) < 0)
700
+ break;
701
+ } while (1);
702
+
703
+ ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
704
+ ublk_queue_deinit(q);
705
+ return NULL;
706
+ }
707
+
708
+ static void ublk_set_parameters(struct ublk_dev *dev)
709
+ {
710
+ int ret;
711
+
712
+ ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
713
+ if (ret)
714
+ ublk_err("dev %d set basic parameter failed %d\n",
715
+ dev->dev_info.dev_id, ret);
716
+ }
717
+
718
+ static int ublk_start_daemon(struct ublk_dev *dev)
719
+ {
720
+ int ret, i;
721
+ void *thread_ret;
722
+ const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
723
+
724
+ if (daemon(1, 1) < 0)
725
+ return -errno;
726
+
727
+ ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
728
+
729
+ ret = ublk_dev_prep(dev);
730
+ if (ret)
731
+ return ret;
732
+
733
+ for (i = 0; i < dinfo->nr_hw_queues; i++) {
734
+ dev->q[i].dev = dev;
735
+ dev->q[i].q_id = i;
736
+ pthread_create(&dev->q[i].thread, NULL,
737
+ ublk_io_handler_fn,
738
+ &dev->q[i]);
739
+ }
740
+
741
+ /* everything is fine now, start us */
742
+ ublk_set_parameters(dev);
743
+ ret = ublk_ctrl_start_dev(dev, getpid());
744
+ if (ret < 0) {
745
+ ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
746
+ goto fail;
747
+ }
748
+
749
+ /* wait until we are terminated */
750
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
751
+ pthread_join(dev->q[i].thread, &thread_ret);
752
+ fail:
753
+ ublk_dev_unprep(dev);
754
+ ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
755
+
756
+ return ret;
757
+ }
758
+
759
+ static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
760
+ {
761
+ #define EV_SIZE (sizeof(struct inotify_event))
762
+ #define EV_BUF_LEN (128 * (EV_SIZE + 16))
763
+ struct pollfd pfd;
764
+ int fd, wd;
765
+ int ret = -EINVAL;
766
+
767
+ fd = inotify_init();
768
+ if (fd < 0) {
769
+ ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
770
+ return fd;
771
+ }
772
+
773
+ wd = inotify_add_watch(fd, "/dev", evt_mask);
774
+ if (wd == -1) {
775
+ ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
776
+ goto fail;
777
+ }
778
+
779
+ pfd.fd = fd;
780
+ pfd.events = POLL_IN;
781
+ while (1) {
782
+ int i = 0;
783
+ char buffer[EV_BUF_LEN];
784
+ ret = poll(&pfd, 1, 1000 * timeout);
785
+
786
+ if (ret == -1) {
787
+ ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
788
+ goto rm_watch;
789
+ } else if (ret == 0) {
790
+ ublk_err("%s: poll inotify timeout\n", __func__);
791
+ ret = -ENOENT;
792
+ goto rm_watch;
793
+ }
794
+
795
+ ret = read(fd, buffer, EV_BUF_LEN);
796
+ if (ret < 0) {
797
+ ublk_err("%s: read inotify fd failed\n", __func__);
798
+ goto rm_watch;
799
+ }
800
+
801
+ while (i < ret) {
802
+ struct inotify_event *event = (struct inotify_event *)&buffer[i];
803
+
804
+ ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
805
+ __func__, event->mask, event->name);
806
+ if (event->mask & evt_mask) {
807
+ if (!strcmp(event->name, dev_name)) {
808
+ ret = 0;
809
+ goto rm_watch;
810
+ }
811
+ }
812
+ i += EV_SIZE + event->len;
813
+ }
814
+ }
815
+ rm_watch:
816
+ inotify_rm_watch(fd, wd);
817
+ fail:
818
+ close(fd);
819
+ return ret;
820
+ }
821
+
822
+ static int ublk_stop_io_daemon(const struct ublk_dev *dev)
823
+ {
824
+ int daemon_pid = dev->dev_info.ublksrv_pid;
825
+ int dev_id = dev->dev_info.dev_id;
826
+ char ublkc[64];
827
+ int ret;
828
+
829
+ /*
830
+ * Wait until ublk char device is closed, when our daemon is shutdown
831
+ */
832
+ snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
833
+ ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
834
+ waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
835
+ ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
836
+ __func__, daemon_pid, dev_id, ret);
837
+
838
+ return ret;
839
+ }
840
+
841
+ static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
842
+ unsigned depth)
843
+ {
844
+ const struct ublk_tgt_ops *ops;
845
+ struct ublksrv_ctrl_dev_info *info;
846
+ struct ublk_dev *dev;
847
+ int dev_id = *exp_id;
848
+ char ublkb[64];
849
+ int ret;
850
+
851
+ ops = ublk_find_tgt(tgt_type);
852
+ if (!ops) {
853
+ ublk_err("%s: no such tgt type, type %s\n",
854
+ __func__, tgt_type);
855
+ return -ENODEV;
856
+ }
857
+
858
+ if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
859
+ ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
860
+ __func__, nr_queues, depth);
861
+ return -EINVAL;
862
+ }
863
+
864
+ dev = ublk_ctrl_init();
865
+ if (!dev) {
866
+ ublk_err("%s: can't alloc dev id %d, type %s\n",
867
+ __func__, dev_id, tgt_type);
868
+ return -ENOMEM;
869
+ }
870
+
871
+ info = &dev->dev_info;
872
+ info->dev_id = dev_id;
873
+ info->nr_hw_queues = nr_queues;
874
+ info->queue_depth = depth;
875
+ dev->tgt.ops = ops;
876
+
877
+ ret = ublk_ctrl_add_dev(dev);
878
+ if (ret < 0) {
879
+ ublk_err("%s: can't add dev id %d, type %s ret %d\n",
880
+ __func__, dev_id, tgt_type, ret);
881
+ goto fail;
882
+ }
883
+
884
+ switch (fork()) {
885
+ case -1:
886
+ goto fail;
887
+ case 0:
888
+ ublk_start_daemon(dev);
889
+ return 0;
890
+ }
891
+
892
+ /*
893
+ * Wait until ublk disk is added, when our daemon is started
894
+ * successfully
895
+ */
896
+ snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
897
+ ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
898
+ if (ret < 0) {
899
+ ublk_err("%s: can't start daemon id %d, type %s\n",
900
+ __func__, dev_id, tgt_type);
901
+ ublk_ctrl_del_dev(dev);
902
+ } else {
903
+ *exp_id = dev->dev_info.dev_id;
904
+ }
905
+ fail:
906
+ ublk_ctrl_deinit(dev);
907
+ return ret;
908
+ }
909
+
910
+ static int cmd_dev_del_by_kill(int number)
911
+ {
912
+ struct ublk_dev *dev;
913
+ int ret;
914
+
915
+ dev = ublk_ctrl_init();
916
+ dev->dev_info.dev_id = number;
917
+
918
+ ret = ublk_ctrl_get_info(dev);
919
+ if (ret < 0)
920
+ goto fail;
921
+
922
+ /* simulate one ublk daemon panic */
923
+ kill(dev->dev_info.ublksrv_pid, 9);
924
+
925
+ ret = ublk_stop_io_daemon(dev);
926
+ if (ret < 0)
927
+ ublk_err("%s: can't stop daemon id %d\n", __func__, number);
928
+ ublk_ctrl_del_dev(dev);
929
+ fail:
930
+ if (ret >= 0)
931
+ ret = ublk_ctrl_get_info(dev);
932
+ ublk_ctrl_deinit(dev);
933
+
934
+ return (ret != 0) ? 0 : -EIO;
935
+ }
936
+
937
+ /****************** part 2: target implementation ********************/
938
+
939
+ static int ublk_null_tgt_init(struct ublk_dev *dev)
940
+ {
941
+ const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
942
+ unsigned long dev_size = 250UL << 30;
943
+
944
+ dev->tgt.dev_size = dev_size;
945
+ dev->tgt.params = (struct ublk_params) {
946
+ .types = UBLK_PARAM_TYPE_BASIC,
947
+ .basic = {
948
+ .logical_bs_shift = 9,
949
+ .physical_bs_shift = 12,
950
+ .io_opt_shift = 12,
951
+ .io_min_shift = 9,
952
+ .max_sectors = info->max_io_buf_bytes >> 9,
953
+ .dev_sectors = dev_size >> 9,
954
+ },
955
+ };
956
+
957
+ return 0;
958
+ }
959
+
960
+ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
961
+ {
962
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
963
+
964
+ ublk_complete_io(q, tag, iod->nr_sectors << 9);
965
+
966
+ return 0;
967
+ }
968
+
969
+ static const struct ublk_tgt_ops tgt_ops_list[] = {
970
+ {
971
+ .name = "null",
972
+ .init_tgt = ublk_null_tgt_init,
973
+ .queue_io = ublk_null_queue_io,
974
+ },
975
+ };
976
+
977
+ static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
978
+ {
979
+ const struct ublk_tgt_ops *ops;
980
+ int i;
981
+
982
+ if (name == NULL)
983
+ return NULL;
984
+
985
+ for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
986
+ if (strcmp(tgt_ops_list[i].name, name) == 0)
987
+ return &tgt_ops_list[i];
988
+ return NULL;
989
+ }
990
+
991
+
992
+ /****************** part 3: IO test over ublk disk ********************/
993
+
994
+ #include "helpers.h"
995
+ #include "liburing.h"
996
+ #define BS 4096
997
+ #define BUFFERS 128
998
+
999
+ struct io_ctx {
1000
+ int dev_id;
1001
+ int write;
1002
+ int seq;
1003
+
1004
+ /* output */
1005
+ int res;
1006
+ pthread_t handle;
1007
+ };
1008
+
1009
+ static int __test_io(struct io_uring *ring, int fd, int write,
1010
+ int seq, struct iovec *vecs, int exp_len, off_t start)
1011
+ {
1012
+ struct io_uring_sqe *sqe;
1013
+ struct io_uring_cqe *cqe;
1014
+ int i, ret;
1015
+ off_t offset;
1016
+
1017
+ offset = start;
1018
+ for (i = 0; i < BUFFERS; i++) {
1019
+ sqe = io_uring_get_sqe(ring);
1020
+ if (!sqe) {
1021
+ fprintf(stderr, "sqe get failed\n");
1022
+ goto err;
1023
+ }
1024
+ if (!seq)
1025
+ offset = start + BS * (rand() % BUFFERS);
1026
+ if (write) {
1027
+ io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
1028
+ vecs[i].iov_len,
1029
+ offset, i);
1030
+ } else {
1031
+ io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
1032
+ vecs[i].iov_len,
1033
+ offset, i);
1034
+ }
1035
+ sqe->user_data = i;
1036
+ if (seq)
1037
+ offset += BS;
1038
+ }
1039
+
1040
+ ret = io_uring_submit(ring);
1041
+ if (ret != BUFFERS) {
1042
+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
1043
+ goto err;
1044
+ }
1045
+
1046
+ for (i = 0; i < BUFFERS; i++) {
1047
+ ret = io_uring_wait_cqe(ring, &cqe);
1048
+ if (ret) {
1049
+ fprintf(stderr, "wait_cqe=%d\n", ret);
1050
+ goto err;
1051
+ }
1052
+ if (exp_len == -1) {
1053
+ int iov_len = vecs[cqe->user_data].iov_len;
1054
+
1055
+ if (cqe->res != iov_len) {
1056
+ fprintf(stderr, "cqe res %d, wanted %d\n",
1057
+ cqe->res, iov_len);
1058
+ goto err;
1059
+ }
1060
+ } else if (cqe->res != exp_len) {
1061
+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
1062
+ goto err;
1063
+ }
1064
+ io_uring_cqe_seen(ring, cqe);
1065
+ }
1066
+
1067
+ return 0;
1068
+ err:
1069
+ return 1;
1070
+ }
1071
+
1072
+ /* Run IO over ublk block device */
1073
+ static int test_io(struct io_ctx *ctx)
1074
+ {
1075
+ struct io_uring ring;
1076
+ int ret, ring_flags = 0;
1077
+ char buf[256];
1078
+ int fd = -1;
1079
+ off_t offset = 0;
1080
+ unsigned long long bytes;
1081
+ int open_flags = O_DIRECT;
1082
+ struct iovec *vecs = t_create_buffers(BUFFERS, BS);
1083
+
1084
+ ret = t_create_ring(BUFFERS, &ring, ring_flags);
1085
+ if (ret == T_SETUP_SKIP)
1086
+ return 0;
1087
+ if (ret != T_SETUP_OK) {
1088
+ fprintf(stderr, "ring create failed: %d\n", ret);
1089
+ return 1;
1090
+ }
1091
+
1092
+ snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
1093
+
1094
+ if (ctx->write)
1095
+ open_flags |= O_WRONLY;
1096
+ else
1097
+ open_flags |= O_RDONLY;
1098
+ fd = open(buf, open_flags);
1099
+ if (fd < 0) {
1100
+ if (errno == EINVAL)
1101
+ return 0;
1102
+ return 1;
1103
+ }
1104
+
1105
+ if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
1106
+ return 1;
1107
+
1108
+ ret = t_register_buffers(&ring, vecs, BUFFERS);
1109
+ if (ret == T_SETUP_SKIP)
1110
+ return 0;
1111
+ if (ret != T_SETUP_OK) {
1112
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
1113
+ return 1;
1114
+ }
1115
+
1116
+ for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
1117
+ ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
1118
+ offset);
1119
+ if (ret != T_SETUP_OK) {
1120
+ fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
1121
+ ctx->dev_id, (unsigned long) offset, ret);
1122
+ break;
1123
+ }
1124
+ }
1125
+
1126
+ close(fd);
1127
+ io_uring_unregister_buffers(&ring);
1128
+ io_uring_queue_exit(&ring);
1129
+
1130
+ return ret;
1131
+ }
1132
+
1133
+ static void *test_io_fn(void *data)
1134
+ {
1135
+ struct io_ctx *ctx = data;
1136
+
1137
+ ctx->res = test_io(ctx);
1138
+
1139
+ return data;
1140
+ }
1141
+
1142
+ static void ignore_stderr(void)
1143
+ {
1144
+ int devnull = open("/dev/null", O_WRONLY);
1145
+
1146
+ if (devnull >= 0) {
1147
+ dup2(devnull, fileno(stderr));
1148
+ close(devnull);
1149
+ }
1150
+ }
1151
+
1152
+ static int test_io_worker(int dev_id)
1153
+ {
1154
+ const int nr_jobs = 4;
1155
+ struct io_ctx ctx[nr_jobs];
1156
+ int i, ret = 0;
1157
+
1158
+ for (i = 0; i < nr_jobs; i++) {
1159
+ ctx[i].dev_id = dev_id;
1160
+ ctx[i].write = (i & 0x1) ? 0 : 1;
1161
+ ctx[i].seq = 1;
1162
+
1163
+ pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
1164
+ }
1165
+
1166
+ for (i = 0; i < nr_jobs; i++) {
1167
+ pthread_join(ctx[i].handle, NULL);
1168
+
1169
+ if (!ret && ctx[i].res)
1170
+ ret = ctx[i].res;
1171
+ }
1172
+
1173
+ return ret;
1174
+ }
1175
+
1176
+ /*
1177
+ * Run IO over created ublk device, meantime delete this ublk device
1178
+ *
1179
+ * Cover cancellable uring_cmd
1180
+ * */
1181
+ static int test_del_ublk_with_io(void)
1182
+ {
1183
+ const unsigned wait_ms = 200;
1184
+ char *tgt_type = "null";
1185
+ int dev_id = -1;
1186
+ int ret, pid;
1187
+
1188
+ ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
1189
+ if (ret != T_SETUP_OK) {
1190
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
1191
+ return T_EXIT_FAIL;
1192
+ }
1193
+
1194
+ switch ((pid = fork())) {
1195
+ case -1:
1196
+ fprintf(stderr, "fork failed\n");
1197
+ return T_EXIT_FAIL;
1198
+ case 0:
1199
+ /* io error is expected since the parent is killing ublk */
1200
+ ignore_stderr();
1201
+ test_io_worker(dev_id);
1202
+ return 0;
1203
+ default:
1204
+ /*
1205
+ * Wait a little while until ublk IO pipeline is warm up,
1206
+ * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
1207
+ *
1208
+ * cancellable uring_cmd code path can be covered in this way.
1209
+ */
1210
+ usleep(wait_ms * 1000);
1211
+ ret = cmd_dev_del_by_kill(dev_id);
1212
+ waitpid(pid, NULL, 0);
1213
+ return ret;
1214
+ }
1215
+ }
1216
+
1217
+ int main(int argc, char *argv[])
1218
+ {
1219
+ const int nr_loop = 4;
1220
+ struct ublk_dev *dev;
1221
+ __u64 features;
1222
+ int ret, i;
1223
+
1224
+ if (argc > 1)
1225
+ return T_EXIT_SKIP;
1226
+
1227
+ dev = ublk_ctrl_init();
1228
+ /* ublk isn't supported or the module isn't loaded */
1229
+ if (!dev)
1230
+ return T_EXIT_SKIP;
1231
+
1232
+ /* kernel doesn't support get_features */
1233
+ ret = ublk_ctrl_get_features(dev, &features);
1234
+ if (ret < 0)
1235
+ return T_EXIT_SKIP;
1236
+
1237
+ if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
1238
+ return T_EXIT_SKIP;
1239
+
1240
+ for (i = 0; i < nr_loop; i++) {
1241
+ if (test_del_ublk_with_io())
1242
+ return T_EXIT_FAIL;
1243
+ }
1244
+ ublk_ctrl_deinit(dev);
1245
+ return T_EXIT_PASS;
1246
+ }
1247
+ #else
1248
+ int main(int argc, char *argv[])
1249
+ {
1250
+ return T_EXIT_SKIP;
1251
+ }
1252
+ #endif