polyphony 0.98 → 0.99

Sign up to get free protection for your applications and to get access to all the features.
Files changed (182) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/examples/io/https_server_sni_2.rb +14 -8
  4. data/ext/polyphony/extconf.rb +6 -5
  5. data/ext/polyphony/libev.h +0 -2
  6. data/lib/polyphony/core/sync.rb +53 -0
  7. data/lib/polyphony/extensions/io.rb +37 -14
  8. data/lib/polyphony/extensions/openssl.rb +1 -1
  9. data/lib/polyphony/version.rb +1 -1
  10. data/test/test_io.rb +6 -7
  11. data/test/test_socket.rb +61 -0
  12. data/test/test_sync.rb +42 -1
  13. data/vendor/liburing/.github/workflows/build.yml +7 -16
  14. data/vendor/liburing/.gitignore +5 -0
  15. data/vendor/liburing/CHANGELOG +23 -1
  16. data/vendor/liburing/Makefile +4 -3
  17. data/vendor/liburing/Makefile.common +1 -0
  18. data/vendor/liburing/README +48 -0
  19. data/vendor/liburing/configure +76 -6
  20. data/vendor/liburing/debian/changelog +11 -0
  21. data/vendor/liburing/debian/control +7 -16
  22. data/vendor/liburing/debian/liburing-dev.manpages +3 -6
  23. data/vendor/liburing/debian/liburing2.install +1 -0
  24. data/vendor/liburing/debian/liburing2.symbols +56 -0
  25. data/vendor/liburing/debian/rules +15 -68
  26. data/vendor/liburing/examples/Makefile +4 -0
  27. data/vendor/liburing/examples/io_uring-close-test.c +123 -0
  28. data/vendor/liburing/examples/io_uring-udp.c +1 -1
  29. data/vendor/liburing/examples/send-zerocopy.c +315 -56
  30. data/vendor/liburing/examples/ucontext-cp.c +2 -17
  31. data/vendor/liburing/liburing-ffi.pc.in +12 -0
  32. data/vendor/liburing/liburing.pc.in +1 -1
  33. data/vendor/liburing/liburing.spec +1 -1
  34. data/vendor/liburing/make-debs.sh +3 -3
  35. data/vendor/liburing/man/IO_URING_CHECK_VERSION.3 +1 -0
  36. data/vendor/liburing/man/IO_URING_VERSION_MAJOR.3 +1 -0
  37. data/vendor/liburing/man/IO_URING_VERSION_MINOR.3 +1 -0
  38. data/vendor/liburing/man/io_uring_buf_ring_add.3 +6 -6
  39. data/vendor/liburing/man/io_uring_check_version.3 +72 -0
  40. data/vendor/liburing/man/io_uring_close_ring_fd.3 +43 -0
  41. data/vendor/liburing/man/io_uring_major_version.3 +1 -0
  42. data/vendor/liburing/man/io_uring_minor_version.3 +1 -0
  43. data/vendor/liburing/man/io_uring_prep_accept.3 +1 -1
  44. data/vendor/liburing/man/io_uring_prep_fgetxattr.3 +1 -0
  45. data/vendor/liburing/man/io_uring_prep_fsetxattr.3 +1 -0
  46. data/vendor/liburing/man/io_uring_prep_getxattr.3 +61 -0
  47. data/vendor/liburing/man/io_uring_prep_link_timeout.3 +94 -0
  48. data/vendor/liburing/man/io_uring_prep_msg_ring.3 +22 -2
  49. data/vendor/liburing/man/io_uring_prep_msg_ring_cqe_flags.3 +1 -0
  50. data/vendor/liburing/man/io_uring_prep_poll_add.3 +1 -1
  51. data/vendor/liburing/man/io_uring_prep_provide_buffers.3 +18 -9
  52. data/vendor/liburing/man/io_uring_prep_readv.3 +3 -3
  53. data/vendor/liburing/man/io_uring_prep_readv2.3 +3 -3
  54. data/vendor/liburing/man/io_uring_prep_recv.3 +5 -5
  55. data/vendor/liburing/man/io_uring_prep_recvmsg.3 +4 -4
  56. data/vendor/liburing/man/io_uring_prep_send.3 +9 -0
  57. data/vendor/liburing/man/io_uring_prep_send_set_addr.3 +38 -0
  58. data/vendor/liburing/man/io_uring_prep_send_zc.3 +39 -7
  59. data/vendor/liburing/man/io_uring_prep_send_zc_fixed.3 +1 -0
  60. data/vendor/liburing/man/io_uring_prep_sendmsg.3 +20 -0
  61. data/vendor/liburing/man/io_uring_prep_sendmsg_zc.3 +1 -0
  62. data/vendor/liburing/man/io_uring_prep_setxattr.3 +64 -0
  63. data/vendor/liburing/man/io_uring_prep_splice.3 +40 -0
  64. data/vendor/liburing/man/io_uring_prep_writev.3 +2 -2
  65. data/vendor/liburing/man/io_uring_prep_writev2.3 +2 -2
  66. data/vendor/liburing/man/io_uring_recvmsg_out.3 +13 -9
  67. data/vendor/liburing/man/io_uring_register.2 +15 -9
  68. data/vendor/liburing/man/io_uring_register_buf_ring.3 +4 -4
  69. data/vendor/liburing/man/io_uring_register_buffers.3 +49 -6
  70. data/vendor/liburing/man/io_uring_register_buffers_sparse.3 +1 -0
  71. data/vendor/liburing/man/io_uring_register_buffers_tags.3 +1 -0
  72. data/vendor/liburing/man/io_uring_register_buffers_update_tag.3 +1 -0
  73. data/vendor/liburing/man/io_uring_register_files.3 +60 -5
  74. data/vendor/liburing/man/io_uring_register_files_tags.3 +1 -0
  75. data/vendor/liburing/man/io_uring_register_files_update.3 +1 -0
  76. data/vendor/liburing/man/io_uring_register_files_update_tag.3 +1 -0
  77. data/vendor/liburing/man/io_uring_setup.2 +31 -2
  78. data/vendor/liburing/man/io_uring_wait_cqe_timeout.3 +1 -1
  79. data/vendor/liburing/src/Makefile +25 -3
  80. data/vendor/liburing/src/ffi.c +15 -0
  81. data/vendor/liburing/src/include/liburing/io_uring.h +30 -7
  82. data/vendor/liburing/src/include/liburing.h +190 -148
  83. data/vendor/liburing/src/int_flags.h +1 -0
  84. data/vendor/liburing/src/lib.h +5 -16
  85. data/vendor/liburing/src/liburing-ffi.map +172 -0
  86. data/vendor/liburing/src/liburing.map +11 -0
  87. data/vendor/liburing/src/nolibc.c +9 -2
  88. data/vendor/liburing/src/queue.c +2 -2
  89. data/vendor/liburing/src/register.c +66 -96
  90. data/vendor/liburing/src/setup.c +5 -4
  91. data/vendor/liburing/src/version.c +21 -0
  92. data/vendor/liburing/test/232c93d07b74.c +3 -3
  93. data/vendor/liburing/test/35fa71a030ca.c +3 -3
  94. data/vendor/liburing/test/500f9fbadef8.c +2 -0
  95. data/vendor/liburing/test/917257daa0fe.c +1 -1
  96. data/vendor/liburing/test/Makefile +27 -7
  97. data/vendor/liburing/test/a0908ae19763.c +2 -2
  98. data/vendor/liburing/test/a4c0b3decb33.c +2 -2
  99. data/vendor/liburing/test/accept-link.c +4 -4
  100. data/vendor/liburing/test/accept-reuse.c +5 -7
  101. data/vendor/liburing/test/accept.c +34 -31
  102. data/vendor/liburing/test/b19062a56726.c +1 -1
  103. data/vendor/liburing/test/buf-ring.c +58 -4
  104. data/vendor/liburing/test/ce593a6c480a.c +2 -2
  105. data/vendor/liburing/test/close-opath.c +2 -1
  106. data/vendor/liburing/test/connect.c +8 -0
  107. data/vendor/liburing/test/cq-overflow.c +14 -8
  108. data/vendor/liburing/test/d4ae271dfaae.c +1 -1
  109. data/vendor/liburing/test/defer-taskrun.c +64 -9
  110. data/vendor/liburing/test/defer.c +1 -1
  111. data/vendor/liburing/test/double-poll-crash.c +3 -3
  112. data/vendor/liburing/test/eeed8b54e0df.c +8 -3
  113. data/vendor/liburing/test/eploop.c +74 -0
  114. data/vendor/liburing/test/eventfd-ring.c +1 -1
  115. data/vendor/liburing/test/eventfd.c +1 -1
  116. data/vendor/liburing/test/evloop.c +73 -0
  117. data/vendor/liburing/test/exit-no-cleanup.c +1 -1
  118. data/vendor/liburing/test/fadvise.c +1 -1
  119. data/vendor/liburing/test/fc2a85cb02ef.c +3 -3
  120. data/vendor/liburing/test/fd-pass.c +35 -16
  121. data/vendor/liburing/test/file-register.c +61 -0
  122. data/vendor/liburing/test/file-verify.c +2 -2
  123. data/vendor/liburing/test/files-exit-hang-timeout.c +2 -2
  124. data/vendor/liburing/test/fixed-link.c +1 -1
  125. data/vendor/liburing/test/fsnotify.c +118 -0
  126. data/vendor/liburing/test/hardlink.c +1 -1
  127. data/vendor/liburing/test/helpers.c +54 -2
  128. data/vendor/liburing/test/helpers.h +4 -0
  129. data/vendor/liburing/test/io-cancel.c +3 -1
  130. data/vendor/liburing/test/io_uring_passthrough.c +39 -8
  131. data/vendor/liburing/test/io_uring_setup.c +3 -80
  132. data/vendor/liburing/test/iopoll-overflow.c +118 -0
  133. data/vendor/liburing/test/iopoll.c +90 -4
  134. data/vendor/liburing/test/lfs-openat-write.c +7 -9
  135. data/vendor/liburing/test/lfs-openat.c +6 -8
  136. data/vendor/liburing/test/link_drain.c +31 -5
  137. data/vendor/liburing/test/madvise.c +1 -1
  138. data/vendor/liburing/test/msg-ring-flags.c +192 -0
  139. data/vendor/liburing/test/msg-ring-overflow.c +159 -0
  140. data/vendor/liburing/test/msg-ring.c +173 -13
  141. data/vendor/liburing/test/multicqes_drain.c +22 -19
  142. data/vendor/liburing/test/nvme.h +4 -3
  143. data/vendor/liburing/test/pipe-bug.c +95 -0
  144. data/vendor/liburing/test/poll-link.c +3 -3
  145. data/vendor/liburing/test/poll-many.c +41 -19
  146. data/vendor/liburing/test/poll-mshot-overflow.c +105 -2
  147. data/vendor/liburing/test/poll-race-mshot.c +292 -0
  148. data/vendor/liburing/test/poll-race.c +105 -0
  149. data/vendor/liburing/test/poll.c +244 -26
  150. data/vendor/liburing/test/pollfree.c +5 -5
  151. data/vendor/liburing/test/read-before-exit.c +20 -3
  152. data/vendor/liburing/test/read-write.c +2 -0
  153. data/vendor/liburing/test/recv-multishot.c +96 -3
  154. data/vendor/liburing/test/reg-reg-ring.c +90 -0
  155. data/vendor/liburing/test/rename.c +1 -1
  156. data/vendor/liburing/test/ring-leak.c +0 -1
  157. data/vendor/liburing/test/ring-leak2.c +1 -1
  158. data/vendor/liburing/test/ringbuf-read.c +10 -6
  159. data/vendor/liburing/test/send-zerocopy.c +273 -103
  160. data/vendor/liburing/test/send_recv.c +7 -4
  161. data/vendor/liburing/test/sendmsg_fs_cve.c +2 -2
  162. data/vendor/liburing/test/single-issuer.c +7 -9
  163. data/vendor/liburing/test/skip-cqe.c +3 -4
  164. data/vendor/liburing/test/socket.c +0 -1
  165. data/vendor/liburing/test/sq-poll-dup.c +10 -3
  166. data/vendor/liburing/test/sq-poll-kthread.c +1 -1
  167. data/vendor/liburing/test/sq-poll-share.c +3 -2
  168. data/vendor/liburing/test/sqpoll-cancel-hang.c +17 -6
  169. data/vendor/liburing/test/sqpoll-disable-exit.c +4 -4
  170. data/vendor/liburing/test/symlink.c +2 -1
  171. data/vendor/liburing/test/test.h +2 -1
  172. data/vendor/liburing/test/timeout-new.c +11 -7
  173. data/vendor/liburing/test/timeout.c +1 -2
  174. data/vendor/liburing/test/unlink.c +1 -1
  175. data/vendor/liburing/test/version.c +25 -0
  176. data/vendor/liburing/test/wakeup-hang.c +1 -1
  177. data/vendor/liburing/test/xattr.c +8 -4
  178. metadata +42 -6
  179. data/vendor/liburing/debian/compat +0 -1
  180. data/vendor/liburing/debian/liburing1-udeb.install +0 -1
  181. data/vendor/liburing/debian/liburing1.install +0 -1
  182. data/vendor/liburing/debian/liburing1.symbols +0 -32
@@ -52,10 +52,13 @@ static int test(const char *filename, int dio, int async)
52
52
  return 1;
53
53
  }
54
54
 
55
- if (dio)
55
+ if (dio) {
56
56
  fd = open(filename, O_DIRECT | O_RDONLY);
57
- else
57
+ if (fd < 0 && errno == EINVAL)
58
+ return T_EXIT_SKIP;
59
+ } else {
58
60
  fd = open(filename, O_RDONLY);
61
+ }
59
62
  if (fd < 0) {
60
63
  perror("open");
61
64
  return 1;
@@ -65,9 +68,10 @@ static int test(const char *filename, int dio, int async)
65
68
 
66
69
  if (posix_memalign((void **) &buf, 4096, FSIZE))
67
70
  return 1;
68
- if (posix_memalign((void **) &br, 4096, 4096))
71
+ if (posix_memalign((void **) &br, 4096, NR_BUFS * sizeof(struct io_uring_buf)))
69
72
  return 1;
70
73
 
74
+ io_uring_buf_ring_init(br);
71
75
  reg.ring_addr = (unsigned long) br;
72
76
  reg.ring_entries = NR_BUFS;
73
77
  reg.bgid = 1;
@@ -163,7 +167,7 @@ int main(int argc, char *argv[])
163
167
  close(fd);
164
168
 
165
169
  ret = test(fname, 1, 0);
166
- if (ret) {
170
+ if (ret == T_EXIT_FAIL) {
167
171
  fprintf(stderr, "dio test failed\n");
168
172
  goto err;
169
173
  }
@@ -177,13 +181,13 @@ int main(int argc, char *argv[])
177
181
  }
178
182
 
179
183
  ret = test(fname, 1, 1);
180
- if (ret) {
184
+ if (ret == T_EXIT_FAIL) {
181
185
  fprintf(stderr, "dio async test failed\n");
182
186
  goto err;
183
187
  }
184
188
 
185
189
  ret = test(fname, 0, 1);
186
- if (ret) {
190
+ if (ret == T_EXIT_FAIL) {
187
191
  fprintf(stderr, "buffered async test failed\n");
188
192
  goto err;
189
193
  }
@@ -4,7 +4,6 @@
4
4
  #include <stdint.h>
5
5
  #include <assert.h>
6
6
  #include <errno.h>
7
- #include <error.h>
8
7
  #include <limits.h>
9
8
  #include <fcntl.h>
10
9
  #include <unistd.h>
@@ -12,7 +11,6 @@
12
11
  #include <string.h>
13
12
 
14
13
  #include <arpa/inet.h>
15
- #include <linux/errqueue.h>
16
14
  #include <linux/if_packet.h>
17
15
  #include <linux/ipv6.h>
18
16
  #include <linux/socket.h>
@@ -34,16 +32,18 @@
34
32
  #include <sys/time.h>
35
33
  #include <sys/types.h>
36
34
  #include <sys/wait.h>
35
+ #include <sys/mman.h>
36
+ #include <linux/mman.h>
37
37
 
38
38
  #include "liburing.h"
39
39
  #include "helpers.h"
40
40
 
41
41
  #define MAX_MSG 128
42
42
 
43
- #define PORT 10200
44
43
  #define HOST "127.0.0.1"
45
44
  #define HOSTV6 "::1"
46
45
 
46
+ #define MAX_IOV 32
47
47
  #define CORK_REQS 5
48
48
  #define RX_TAG 10000
49
49
  #define BUFFER_OFFSET 41
@@ -57,10 +57,17 @@ enum {
57
57
  BUF_T_SMALL,
58
58
  BUF_T_NONALIGNED,
59
59
  BUF_T_LARGE,
60
+ BUF_T_HUGETLB,
61
+
62
+ __BUF_NR,
60
63
  };
61
64
 
65
+ /* 32MB, should be enough to trigger a short send */
66
+ #define LARGE_BUF_SIZE (1U << 25)
67
+
68
+ static size_t page_sz;
62
69
  static char *tx_buffer, *rx_buffer;
63
- static struct iovec buffers_iov[4];
70
+ static struct iovec buffers_iov[__BUF_NR];
64
71
  static bool has_sendmsg;
65
72
 
66
73
  static bool check_cq_empty(struct io_uring *ring)
@@ -115,43 +122,72 @@ static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
115
122
  return T_EXIT_PASS;
116
123
  }
117
124
 
118
- static int test_send_faults(struct io_uring *ring, int sock_tx, int sock_rx)
125
+ static int test_send_faults(int sock_tx, int sock_rx)
119
126
  {
120
127
  struct io_uring_sqe *sqe;
121
128
  struct io_uring_cqe *cqe;
122
129
  int msg_flags = 0;
123
130
  unsigned zc_flags = 0;
124
131
  int payload_size = 100;
125
- int ret, i, nr_cqes = 2;
132
+ int ret, i, nr_cqes, nr_reqs = 3;
133
+ struct io_uring ring;
126
134
 
127
- sqe = io_uring_get_sqe(ring);
135
+ ret = io_uring_queue_init(32, &ring, IORING_SETUP_SUBMIT_ALL);
136
+ if (ret) {
137
+ fprintf(stderr, "queue init failed: %d\n", ret);
138
+ return -1;
139
+ }
140
+
141
+ /* invalid buffer */
142
+ sqe = io_uring_get_sqe(&ring);
128
143
  io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
129
144
  msg_flags, zc_flags);
130
145
  sqe->user_data = 1;
131
146
 
132
- sqe = io_uring_get_sqe(ring);
147
+ /* invalid address */
148
+ sqe = io_uring_get_sqe(&ring);
133
149
  io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
134
150
  msg_flags, zc_flags);
135
- sqe->user_data = 2;
136
151
  io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
137
152
  sizeof(struct sockaddr_in6));
153
+ sqe->user_data = 2;
138
154
 
139
- ret = io_uring_submit(ring);
140
- assert(ret == 2);
155
+ /* invalid send/recv flags */
156
+ sqe = io_uring_get_sqe(&ring);
157
+ io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
158
+ msg_flags, ~0U);
159
+ sqe->user_data = 3;
141
160
 
161
+ ret = io_uring_submit(&ring);
162
+ assert(ret == nr_reqs);
163
+
164
+ nr_cqes = nr_reqs;
142
165
  for (i = 0; i < nr_cqes; i++) {
143
- ret = io_uring_wait_cqe(ring, &cqe);
166
+ ret = io_uring_wait_cqe(&ring, &cqe);
144
167
  assert(!ret);
145
- assert(cqe->user_data <= 2);
168
+ assert(cqe->user_data <= nr_reqs);
146
169
 
147
170
  if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
148
- assert(cqe->res == -EFAULT);
171
+ int expected = (cqe->user_data == 3) ? -EINVAL : -EFAULT;
172
+
173
+ if (cqe->res != expected) {
174
+ fprintf(stderr, "invalid cqe res %i vs expected %i, "
175
+ "user_data %i\n",
176
+ cqe->res, expected, (int)cqe->user_data);
177
+ return -1;
178
+ }
149
179
  if (cqe->flags & IORING_CQE_F_MORE)
150
180
  nr_cqes++;
181
+ } else {
182
+ if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
183
+ fprintf(stderr, "invalid notif cqe %i %i\n",
184
+ cqe->res, cqe->flags);
185
+ return -1;
186
+ }
151
187
  }
152
- io_uring_cqe_seen(ring, cqe);
188
+ io_uring_cqe_seen(&ring, cqe);
153
189
  }
154
- assert(check_cq_empty(ring));
190
+ assert(check_cq_empty(&ring));
155
191
  return T_EXIT_PASS;
156
192
  }
157
193
 
@@ -160,10 +196,9 @@ static int create_socketpair_ip(struct sockaddr_storage *addr,
160
196
  bool ipv6, bool client_connect,
161
197
  bool msg_zc, bool tcp)
162
198
  {
163
- int family, addr_size;
164
- int ret, val;
165
- int listen_sock = -1;
166
- int sock;
199
+ socklen_t addr_size;
200
+ int family, sock, listen_sock = -1;
201
+ int ret;
167
202
 
168
203
  memset(addr, 0, sizeof(*addr));
169
204
  if (ipv6) {
@@ -171,14 +206,14 @@ static int create_socketpair_ip(struct sockaddr_storage *addr,
171
206
 
172
207
  family = AF_INET6;
173
208
  saddr->sin6_family = family;
174
- saddr->sin6_port = htons(PORT);
209
+ saddr->sin6_port = htons(0);
175
210
  addr_size = sizeof(*saddr);
176
211
  } else {
177
212
  struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
178
213
 
179
214
  family = AF_INET;
180
215
  saddr->sin_family = family;
181
- saddr->sin_port = htons(PORT);
216
+ saddr->sin_port = htons(0);
182
217
  saddr->sin_addr.s_addr = htonl(INADDR_ANY);
183
218
  addr_size = sizeof(*saddr);
184
219
  }
@@ -193,16 +228,19 @@ static int create_socketpair_ip(struct sockaddr_storage *addr,
193
228
  perror("socket");
194
229
  return 1;
195
230
  }
196
- val = 1;
197
- setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
198
- val = 1;
199
- setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val));
200
231
 
201
232
  ret = bind(sock, (struct sockaddr *)addr, addr_size);
202
233
  if (ret < 0) {
203
234
  perror("bind");
204
235
  return 1;
205
236
  }
237
+
238
+ ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
239
+ if (ret < 0) {
240
+ fprintf(stderr, "getsockname failed %i\n", errno);
241
+ return 1;
242
+ }
243
+
206
244
  if (tcp) {
207
245
  ret = listen(sock, 128);
208
246
  assert(ret != -1);
@@ -237,11 +275,17 @@ static int create_socketpair_ip(struct sockaddr_storage *addr,
237
275
  }
238
276
  }
239
277
  if (msg_zc) {
240
- val = 1;
278
+ #ifdef SO_ZEROCOPY
279
+ int val = 1;
280
+
241
281
  if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
242
282
  perror("setsockopt zc");
243
283
  return 1;
244
284
  }
285
+ #else
286
+ fprintf(stderr, "no SO_ZEROCOPY\n");
287
+ return 1;
288
+ #endif
245
289
  }
246
290
  if (tcp) {
247
291
  *sock_server = accept(listen_sock, NULL, NULL);
@@ -254,25 +298,40 @@ static int create_socketpair_ip(struct sockaddr_storage *addr,
254
298
  return 0;
255
299
  }
256
300
 
301
+ struct send_conf {
302
+ bool fixed_buf;
303
+ bool mix_register;
304
+ bool cork;
305
+ bool force_async;
306
+ bool use_sendmsg;
307
+ bool tcp;
308
+ bool zc;
309
+ bool iovec;
310
+ bool long_iovec;
311
+ bool poll_first;
312
+ int buf_index;
313
+ struct sockaddr_storage *addr;
314
+ };
315
+
257
316
  static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
258
- bool fixed_buf, struct sockaddr_storage *addr,
259
- bool cork, bool mix_register,
260
- int buf_idx, bool force_async, bool use_sendmsg)
317
+ struct send_conf *conf)
261
318
  {
262
- struct iovec iov[CORK_REQS];
319
+ struct iovec iov[MAX_IOV];
263
320
  struct msghdr msghdr[CORK_REQS];
264
321
  const unsigned zc_flags = 0;
265
322
  struct io_uring_sqe *sqe;
266
323
  struct io_uring_cqe *cqe;
267
- int nr_reqs = cork ? CORK_REQS : 1;
324
+ int nr_reqs = conf->cork ? CORK_REQS : 1;
268
325
  int i, ret, nr_cqes, addr_len = 0;
269
- size_t send_size = buffers_iov[buf_idx].iov_len;
326
+ size_t send_size = buffers_iov[conf->buf_index].iov_len;
270
327
  size_t chunk_size = send_size / nr_reqs;
271
328
  size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
272
- char *buf = buffers_iov[buf_idx].iov_base;
329
+ char *buf = buffers_iov[conf->buf_index].iov_base;
273
330
 
274
- if (addr) {
275
- sa_family_t fam = ((struct sockaddr_in *)addr)->sin_family;
331
+ assert(MAX_IOV >= CORK_REQS);
332
+
333
+ if (conf->addr) {
334
+ sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
276
335
 
277
336
  addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
278
337
  sizeof(struct sockaddr_in6);
@@ -281,46 +340,87 @@ static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_se
281
340
  memset(rx_buffer, 0, send_size);
282
341
 
283
342
  for (i = 0; i < nr_reqs; i++) {
284
- bool real_fixed_buf = fixed_buf;
343
+ bool real_fixed_buf = conf->fixed_buf;
285
344
  size_t cur_size = chunk_size;
286
345
  int msg_flags = MSG_WAITALL;
287
346
 
288
- if (mix_register)
347
+ if (conf->mix_register)
289
348
  real_fixed_buf = rand() & 1;
290
349
 
291
- if (cork && i != nr_reqs - 1)
350
+ if (i != nr_reqs - 1)
292
351
  msg_flags |= MSG_MORE;
293
- if (i == nr_reqs - 1)
352
+ else
294
353
  cur_size = chunk_size_last;
295
354
 
296
355
  sqe = io_uring_get_sqe(ring);
297
356
 
298
- if (!use_sendmsg) {
299
- io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
300
- cur_size, msg_flags, zc_flags);
357
+ if (!conf->use_sendmsg) {
358
+ if (conf->zc) {
359
+ io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
360
+ cur_size, msg_flags, zc_flags);
361
+ } else {
362
+ io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
363
+ cur_size, msg_flags);
364
+ }
365
+
301
366
  if (real_fixed_buf) {
302
367
  sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
303
- sqe->buf_index = buf_idx;
368
+ sqe->buf_index = conf->buf_index;
304
369
  }
305
- if (addr)
306
- io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)addr,
370
+ if (conf->addr)
371
+ io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
307
372
  addr_len);
308
373
  } else {
309
- io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
374
+ struct iovec *io;
375
+ int iov_len;
376
+
377
+ if (conf->zc)
378
+ io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
379
+ else
380
+ io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
381
+
382
+ if (!conf->iovec) {
383
+ io = &iov[i];
384
+ iov_len = 1;
385
+ iov[i].iov_len = cur_size;
386
+ iov[i].iov_base = buf + i * chunk_size;
387
+ } else {
388
+ char *it = buf;
389
+ int j;
390
+
391
+ assert(nr_reqs == 1);
392
+ iov_len = conf->long_iovec ? MAX_IOV : 4;
393
+ io = iov;
394
+
395
+ for (j = 0; j < iov_len; j++)
396
+ io[j].iov_len = 1;
397
+ /* first want to be easily advanced */
398
+ io[0].iov_base = it;
399
+ it += io[0].iov_len;
400
+ /* this should cause retry */
401
+ io[1].iov_len = chunk_size - iov_len + 1;
402
+ io[1].iov_base = it;
403
+ it += io[1].iov_len;
404
+ /* fill the rest */
405
+ for (j = 2; j < iov_len; j++) {
406
+ io[j].iov_base = it;
407
+ it += io[j].iov_len;
408
+ }
409
+ }
310
410
 
311
411
  memset(&msghdr[i], 0, sizeof(msghdr[i]));
312
- iov[i].iov_len = cur_size;
313
- iov[i].iov_base = buf + i * chunk_size;
314
- msghdr[i].msg_iov = &iov[i];
315
- msghdr[i].msg_iovlen = 1;
316
- if (addr) {
317
- msghdr[i].msg_name = addr;
412
+ msghdr[i].msg_iov = io;
413
+ msghdr[i].msg_iovlen = iov_len;
414
+ if (conf->addr) {
415
+ msghdr[i].msg_name = conf->addr;
318
416
  msghdr[i].msg_namelen = addr_len;
319
417
  }
320
418
  }
321
419
  sqe->user_data = i;
322
- if (force_async)
420
+ if (conf->force_async)
323
421
  sqe->flags |= IOSQE_ASYNC;
422
+ if (conf->poll_first)
423
+ sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
324
424
  if (i != nr_reqs - 1)
325
425
  sqe->flags |= IOSQE_IO_LINK;
326
426
  }
@@ -335,7 +435,7 @@ static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_se
335
435
  return 1;
336
436
  }
337
437
 
338
- nr_cqes = 2 * nr_reqs + 1;
438
+ nr_cqes = nr_reqs + 1;
339
439
  for (i = 0; i < nr_cqes; i++) {
340
440
  int expected = chunk_size;
341
441
 
@@ -346,19 +446,26 @@ static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_se
346
446
  }
347
447
  if (cqe->user_data == RX_TAG) {
348
448
  if (cqe->res != send_size) {
349
- fprintf(stderr, "rx failed %i\n", cqe->res);
449
+ fprintf(stderr, "rx failed res: %i, expected %i\n",
450
+ cqe->res, (int)send_size);
350
451
  return 1;
351
452
  }
352
453
  io_uring_cqe_seen(ring, cqe);
353
454
  continue;
354
455
  }
355
-
456
+ if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
457
+ fprintf(stderr, "unexpected cflags %i res %i\n",
458
+ cqe->flags, cqe->res);
459
+ return 1;
460
+ }
356
461
  if (cqe->user_data >= nr_reqs) {
357
462
  fprintf(stderr, "invalid user_data %lu\n",
358
463
  (unsigned long)cqe->user_data);
359
464
  return 1;
360
465
  }
361
466
  if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
467
+ if (cqe->flags & IORING_CQE_F_MORE)
468
+ nr_cqes++;
362
469
  if (cqe->user_data == nr_reqs - 1)
363
470
  expected = chunk_size_last;
364
471
  if (cqe->res != expected) {
@@ -367,12 +474,6 @@ static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_se
367
474
  return 1;
368
475
  }
369
476
  }
370
- if ((cqe->flags & IORING_CQE_F_MORE) ==
371
- (cqe->flags & IORING_CQE_F_NOTIF)) {
372
- fprintf(stderr, "unexpected cflags %i res %i\n",
373
- cqe->flags, cqe->res);
374
- return 1;
375
- }
376
477
  io_uring_cqe_seen(ring, cqe);
377
478
  }
378
479
 
@@ -388,57 +489,96 @@ static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_se
388
489
 
389
490
  static int test_inet_send(struct io_uring *ring)
390
491
  {
492
+ struct send_conf conf;
391
493
  struct sockaddr_storage addr;
392
494
  int sock_client = -1, sock_server = -1;
393
495
  int ret, j, i;
496
+ int buf_index;
394
497
 
395
- for (j = 0; j < 16; j++) {
498
+ for (j = 0; j < 32; j++) {
396
499
  bool ipv6 = j & 1;
397
500
  bool client_connect = j & 2;
398
501
  bool msg_zc_set = j & 4;
399
502
  bool tcp = j & 8;
503
+ bool swap_sockets = j & 16;
400
504
 
401
505
  if (tcp && !client_connect)
402
506
  continue;
403
-
507
+ if (swap_sockets && !tcp)
508
+ continue;
509
+ #ifndef SO_ZEROCOPY
510
+ if (msg_zc_set)
511
+ continue;
512
+ #endif
404
513
  ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
405
514
  client_connect, msg_zc_set, tcp);
406
515
  if (ret) {
407
516
  fprintf(stderr, "sock prep failed %d\n", ret);
408
517
  return 1;
409
518
  }
519
+ if (swap_sockets) {
520
+ int tmp_sock = sock_client;
410
521
 
411
- for (i = 0; i < 256; i++) {
412
- int buf_flavour = i & 3;
413
- bool fixed_buf = i & 4;
414
- struct sockaddr_storage *addr_arg = (i & 8) ? &addr : NULL;
415
- bool cork = i & 16;
416
- bool mix_register = i & 32;
417
- bool force_async = i & 64;
418
- bool use_sendmsg = i & 128;
522
+ sock_client = sock_server;
523
+ sock_server = tmp_sock;
524
+ }
419
525
 
420
- if (buf_flavour == BUF_T_LARGE && !tcp)
526
+ for (i = 0; i < 1024; i++) {
527
+ bool regbuf;
528
+
529
+ conf.use_sendmsg = i & 1;
530
+ conf.poll_first = i & 2;
531
+ conf.fixed_buf = i & 4;
532
+ conf.addr = (i & 8) ? &addr : NULL;
533
+ conf.cork = i & 16;
534
+ conf.mix_register = i & 32;
535
+ conf.force_async = i & 64;
536
+ conf.zc = i & 128;
537
+ conf.iovec = i & 256;
538
+ conf.long_iovec = i & 512;
539
+ conf.tcp = tcp;
540
+ regbuf = conf.mix_register || conf.fixed_buf;
541
+
542
+ if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
421
543
  continue;
422
- if (!buffers_iov[buf_flavour].iov_base)
544
+ if (!conf.zc) {
545
+ if (regbuf)
546
+ continue;
547
+ /*
548
+ * Non zerocopy send w/ addr was added together with sendmsg_zc,
549
+ * skip if we the kernel doesn't support it.
550
+ */
551
+ if (conf.addr && !has_sendmsg)
552
+ continue;
553
+ }
554
+ if (tcp && (conf.cork || conf.addr))
423
555
  continue;
424
- if (tcp && (cork || addr_arg))
556
+ if (conf.mix_register && (!conf.cork || conf.fixed_buf))
425
557
  continue;
426
- if (mix_register && (!cork || fixed_buf))
558
+ if (!client_connect && conf.addr == NULL)
427
559
  continue;
428
- if (!client_connect && addr_arg == NULL)
560
+ if (conf.use_sendmsg && (regbuf || !has_sendmsg))
429
561
  continue;
430
- if (use_sendmsg && (mix_register || fixed_buf || !has_sendmsg))
562
+ if (msg_zc_set && !conf.zc)
431
563
  continue;
432
564
 
433
- ret = do_test_inet_send(ring, sock_client, sock_server, fixed_buf,
434
- addr_arg, cork, mix_register,
435
- buf_flavour, force_async, use_sendmsg);
436
- if (ret) {
437
- fprintf(stderr, "send failed fixed buf %i, conn %i, addr %i, "
438
- "cork %i\n",
439
- fixed_buf, client_connect, !!addr_arg,
440
- cork);
441
- return 1;
565
+ for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
566
+ size_t len = buffers_iov[buf_index].iov_len;
567
+
568
+ if (!buffers_iov[buf_index].iov_base)
569
+ continue;
570
+ if (!tcp && len > 4 * page_sz)
571
+ continue;
572
+
573
+ conf.buf_index = buf_index;
574
+ ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
575
+ if (ret) {
576
+ fprintf(stderr, "send failed fixed buf %i, "
577
+ "conn %i, addr %i, cork %i\n",
578
+ conf.fixed_buf, client_connect,
579
+ !!conf.addr, conf.cork);
580
+ return 1;
581
+ }
442
582
  }
443
583
  }
444
584
 
@@ -588,6 +728,8 @@ int main(int argc, char *argv[])
588
728
  if (argc > 1)
589
729
  return T_EXIT_SKIP;
590
730
 
731
+ page_sz = sysconf(_SC_PAGESIZE);
732
+
591
733
  /* create TCP IPv6 pair */
592
734
  ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
593
735
  if (ret) {
@@ -595,30 +737,54 @@ int main(int argc, char *argv[])
595
737
  return T_EXIT_FAIL;
596
738
  }
597
739
 
598
- len = 1U << 25; /* 32MB, should be enough to trigger a short send */
599
- tx_buffer = aligned_alloc(4096, len);
600
- rx_buffer = aligned_alloc(4096, len);
740
+ len = LARGE_BUF_SIZE;
741
+ tx_buffer = aligned_alloc(page_sz, len);
742
+ rx_buffer = aligned_alloc(page_sz, len);
601
743
  if (tx_buffer && rx_buffer) {
602
744
  buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
603
745
  buffers_iov[BUF_T_LARGE].iov_len = len;
604
746
  } else {
747
+ if (tx_buffer)
748
+ free(tx_buffer);
749
+ if (rx_buffer)
750
+ free(rx_buffer);
751
+
605
752
  printf("skip large buffer tests, can't alloc\n");
606
753
 
607
- len = 8192;
608
- tx_buffer = aligned_alloc(4096, len);
609
- rx_buffer = aligned_alloc(4096, len);
754
+ len = 2 * page_sz;
755
+ tx_buffer = aligned_alloc(page_sz, len);
756
+ rx_buffer = aligned_alloc(page_sz, len);
610
757
  }
611
758
  if (!tx_buffer || !rx_buffer) {
612
759
  fprintf(stderr, "can't allocate buffers\n");
613
760
  return T_EXIT_FAIL;
614
761
  }
615
762
 
616
- buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + 4096;
617
- buffers_iov[BUF_T_NORMAL].iov_len = 4096;
763
+ srand((unsigned)time(NULL));
764
+ for (i = 0; i < len; i++)
765
+ tx_buffer[i] = i;
766
+ memset(rx_buffer, 0, len);
767
+
768
+ buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
769
+ buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
618
770
  buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
619
771
  buffers_iov[BUF_T_SMALL].iov_len = 137;
620
772
  buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
621
- buffers_iov[BUF_T_NONALIGNED].iov_len = 8192 - BUFFER_OFFSET - 13;
773
+ buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
774
+
775
+ if (len == LARGE_BUF_SIZE) {
776
+ void *huge_page;
777
+ int off = page_sz + 27;
778
+
779
+ len = 1U << 22;
780
+ huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
781
+ MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
782
+ -1, 0);
783
+ if (huge_page != MAP_FAILED) {
784
+ buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
785
+ buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
786
+ }
787
+ }
622
788
 
623
789
  ret = io_uring_queue_init(32, &ring, 0);
624
790
  if (ret) {
@@ -626,11 +792,6 @@ int main(int argc, char *argv[])
626
792
  return T_EXIT_FAIL;
627
793
  }
628
794
 
629
- srand((unsigned)time(NULL));
630
- for (i = 0; i < len; i++)
631
- tx_buffer[i] = i;
632
- memset(rx_buffer, 0, len);
633
-
634
795
  ret = test_basic_send(&ring, sp[0], sp[1]);
635
796
  if (ret == T_EXIT_SKIP)
636
797
  return ret;
@@ -641,7 +802,7 @@ int main(int argc, char *argv[])
641
802
 
642
803
  has_sendmsg = io_check_zc_sendmsg(&ring);
643
804
 
644
- ret = test_send_faults(&ring, sp[0], sp[1]);
805
+ ret = test_send_faults(sp[0], sp[1]);
645
806
  if (ret) {
646
807
  fprintf(stderr, "test_send_faults() failed\n");
647
808
  return T_EXIT_FAIL;
@@ -671,6 +832,15 @@ int main(int argc, char *argv[])
671
832
  return T_EXIT_FAIL;
672
833
  }
673
834
 
835
+ if (buffers_iov[BUF_T_HUGETLB].iov_base) {
836
+ buffers_iov[BUF_T_HUGETLB].iov_base += 13;
837
+ buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
838
+ }
839
+ if (buffers_iov[BUF_T_LARGE].iov_base) {
840
+ buffers_iov[BUF_T_LARGE].iov_base += 13;
841
+ buffers_iov[BUF_T_LARGE].iov_len -= 26;
842
+ }
843
+
674
844
  ret = test_inet_send(&ring);
675
845
  if (ret) {
676
846
  fprintf(stderr, "test_inet_send() failed\n");