polyphony 0.45.1 → 0.46.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +2 -0
  3. data/.gitmodules +0 -0
  4. data/CHANGELOG.md +35 -0
  5. data/Gemfile.lock +3 -3
  6. data/README.md +3 -3
  7. data/Rakefile +1 -1
  8. data/TODO.md +20 -14
  9. data/bin/test +4 -0
  10. data/examples/io/raw.rb +14 -0
  11. data/examples/io/reline.rb +18 -0
  12. data/examples/performance/fiber_transfer.rb +13 -4
  13. data/examples/performance/multi_snooze.rb +0 -1
  14. data/examples/performance/thread-vs-fiber/polyphony_server.rb +8 -20
  15. data/ext/liburing/liburing.h +585 -0
  16. data/ext/liburing/liburing/README.md +4 -0
  17. data/ext/liburing/liburing/barrier.h +73 -0
  18. data/ext/liburing/liburing/compat.h +15 -0
  19. data/ext/liburing/liburing/io_uring.h +343 -0
  20. data/ext/liburing/queue.c +333 -0
  21. data/ext/liburing/register.c +187 -0
  22. data/ext/liburing/setup.c +210 -0
  23. data/ext/liburing/syscall.c +54 -0
  24. data/ext/liburing/syscall.h +18 -0
  25. data/ext/polyphony/backend.h +1 -15
  26. data/ext/polyphony/backend_common.h +120 -0
  27. data/ext/polyphony/backend_io_uring.c +919 -0
  28. data/ext/polyphony/backend_io_uring_context.c +73 -0
  29. data/ext/polyphony/backend_io_uring_context.h +52 -0
  30. data/ext/polyphony/{libev_backend.c → backend_libev.c} +241 -297
  31. data/ext/polyphony/event.c +1 -1
  32. data/ext/polyphony/extconf.rb +31 -13
  33. data/ext/polyphony/fiber.c +107 -28
  34. data/ext/polyphony/libev.c +4 -0
  35. data/ext/polyphony/libev.h +8 -2
  36. data/ext/polyphony/liburing.c +8 -0
  37. data/ext/polyphony/playground.c +51 -0
  38. data/ext/polyphony/polyphony.c +6 -6
  39. data/ext/polyphony/polyphony.h +34 -14
  40. data/ext/polyphony/polyphony_ext.c +12 -4
  41. data/ext/polyphony/queue.c +1 -1
  42. data/ext/polyphony/runqueue.c +102 -0
  43. data/ext/polyphony/runqueue_ring_buffer.c +85 -0
  44. data/ext/polyphony/runqueue_ring_buffer.h +31 -0
  45. data/ext/polyphony/thread.c +42 -90
  46. data/lib/polyphony.rb +2 -2
  47. data/lib/polyphony/adapters/process.rb +0 -3
  48. data/lib/polyphony/adapters/trace.rb +2 -2
  49. data/lib/polyphony/core/exceptions.rb +0 -4
  50. data/lib/polyphony/core/global_api.rb +13 -11
  51. data/lib/polyphony/core/sync.rb +7 -5
  52. data/lib/polyphony/extensions/core.rb +14 -33
  53. data/lib/polyphony/extensions/debug.rb +13 -0
  54. data/lib/polyphony/extensions/fiber.rb +21 -44
  55. data/lib/polyphony/extensions/io.rb +15 -4
  56. data/lib/polyphony/extensions/openssl.rb +6 -0
  57. data/lib/polyphony/extensions/socket.rb +63 -10
  58. data/lib/polyphony/version.rb +1 -1
  59. data/polyphony.gemspec +1 -1
  60. data/test/helper.rb +36 -4
  61. data/test/io_uring_test.rb +55 -0
  62. data/test/stress.rb +4 -1
  63. data/test/test_backend.rb +15 -6
  64. data/test/test_ext.rb +1 -2
  65. data/test/test_fiber.rb +31 -24
  66. data/test/test_global_api.rb +71 -31
  67. data/test/test_io.rb +42 -0
  68. data/test/test_queue.rb +1 -1
  69. data/test/test_signal.rb +11 -8
  70. data/test/test_socket.rb +2 -2
  71. data/test/test_sync.rb +21 -0
  72. data/test/test_throttler.rb +3 -7
  73. data/test/test_trace.rb +7 -5
  74. metadata +31 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5f54b88851ca73f121975839812e0a1ad5686d017199b83a6365deda2b656d3a
4
- data.tar.gz: 92671a26215f19e36c0b0f49edb990a6818e2c64d7a8ed8bc9c4f2621a5de170
3
+ metadata.gz: 831c85a16a22fe3877044430a4d1cc4a270d18dc73e9ceca8d4826623ab0ddc8
4
+ data.tar.gz: f0432473abb769be2805ad354dddf44465789ebe2d5220f194660e4c6521ec87
5
5
  SHA512:
6
- metadata.gz: fd3caf340b523eccb1f86b10e06996ec26704ed1906bd82188363fb5142901b585992df8d88d0ca20adc188e51a010144fba8f10843ba79503d2984f63a08a05
7
- data.tar.gz: 2b44d80a6ac0bf3f0f42ece1b7d2008c3d0b933c1df11be7f698bbc9dae80341bd1d302a1e0ccaa56b223468b607b7f024bbaa949a88ff1c343028d6cd256ba5
6
+ metadata.gz: 9bfecbed04a8052c3a885dfde73975694313801d2a8341260041be84e4910ac459af71766f7df6fa96e5afad66a737b97e36531811bd47cab407ad43b239e073
7
+ data.tar.gz: 2caa40f7193cf1954b69adc5810844ad4e578a4472382602a6ee4204d548e8327344f913bc0aef076030e7deacb911cab1733b281f7a2e8849ffcbd009a00ebc
@@ -23,6 +23,8 @@ jobs:
23
23
  run: |
24
24
  gem install bundler
25
25
  bundle install
26
+ - name: Show Linux kernel version
27
+ run: uname -r
26
28
  - name: Compile C-extension
27
29
  run: bundle exec rake compile
28
30
  - name: Run tests
File without changes
@@ -1,3 +1,38 @@
1
+ ## 0.46.1
2
+
3
+ * Add `TCPServer#accept_loop`, `OpenSSL::SSL::SSLSocket#accept_loop` method
4
+ * Fix compilation error on MacOS (#43)
5
+ * Fix backtrace for `Timeout.timeout`
6
+ * Add `Backend#timer_loop`
7
+
8
+ ## 0.46.0
9
+
10
+ * Implement [io_uring backend](https://github.com/digital-fabric/polyphony/pull/44)
11
+
12
+ ## 0.45.5
13
+
14
+ * Fix compilation error (#43)
15
+ * Add support for resetting move_on_after, cancel_after timeouts
16
+ * Optimize anti-event starvation polling
17
+ * Implement optimized runqueue for better performance
18
+ * Schedule parent with priority on uncaught exception
19
+ * Fix race condition in `Mutex#synchronize` (#41)
20
+
21
+ ## 0.45.4
22
+
23
+ * Improve signal trapping mechanism
24
+
25
+ ## 0.45.3
26
+
27
+ * Don't swallow error in `Process#kill_and_await`
28
+ * Add `Fiber#mailbox` attribute reader
29
+ * Fix bug in `Fiber.await`
30
+ * Implement `IO#getc`, `IO#getbyte`
31
+
32
+ ## 0.45.2
33
+
34
+ * Rewrite `Fiber#<<`, `Fiber#await`, `Fiber#receive` in C
35
+
1
36
  ## 0.45.1
2
37
 
3
38
  * Fix Net::HTTP compatibility
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- polyphony (0.45.1)
4
+ polyphony (0.46.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -87,7 +87,7 @@ GEM
87
87
  rack (2.2.3)
88
88
  rainbow (3.0.0)
89
89
  rake (12.3.3)
90
- rake-compiler (1.0.5)
90
+ rake-compiler (1.1.1)
91
91
  rake
92
92
  rb-fsevent (0.10.3)
93
93
  rb-inotify (0.10.1)
@@ -141,7 +141,7 @@ DEPENDENCIES
141
141
  polyphony!
142
142
  pry (= 0.13.1)
143
143
  rack (>= 2.0.8, < 2.3.0)
144
- rake-compiler (= 1.0.5)
144
+ rake-compiler (= 1.1.1)
145
145
  redis (= 4.1.0)
146
146
  rubocop (= 0.85.1)
147
147
  sequel (= 5.34.0)
data/README.md CHANGED
@@ -35,9 +35,9 @@
35
35
  Polyphony is a library for building concurrent applications in Ruby. Polyphony
36
36
  harnesses the power of [Ruby fibers](https://ruby-doc.org/core-2.5.1/Fiber.html)
37
37
  to provide a cooperative, sequential coroutine-based concurrency model. Under
38
- the hood, Polyphony uses [libev](https://github.com/enki/libev) as a
39
- high-performance event reactor that provides timers, I/O watchers and other
40
- asynchronous event primitives.
38
+ the hood, Polyphony uses
39
+ [io_uring](https://unixism.net/loti/what_is_io_uring.html) or
40
+ [libev](https://github.com/enki/libev) to maximize I/O performance.
41
41
 
42
42
  ## Features
43
43
 
data/Rakefile CHANGED
@@ -23,4 +23,4 @@ task :docs do
23
23
  exec 'RUBYOPT=-W0 jekyll serve -s docs -H ec2-18-156-117-172.eu-central-1.compute.amazonaws.com'
24
24
  end
25
25
 
26
- CLEAN.include "**/*.o", "**/*.so", "**/*.bundle", "**/*.jar", "pkg", "tmp"
26
+ CLEAN.include "**/*.o", "**/*.so", "**/*.so.*", "**/*.a", "**/*.bundle", "**/*.jar", "pkg", "tmp"
data/TODO.md CHANGED
@@ -1,15 +1,24 @@
1
- 0.45.1
1
+ ## Roadmap for Polyphony 1.0
2
2
 
3
- - Adapter for Pry and IRB (Which fixes #5 and #6)
4
- - Redesign signal handling - the current mechanism is problematic in that it
5
- does not address signals that do not kill, for instance HUP or USR1.
3
+ - Check why worker-thread example doesn't work.
4
+ - Add test that mimics the original design for Monocrono:
5
+ - 256 fibers each waiting for a message
6
+ - When message received do some blocking work using a `ThreadPool`
7
+ - Send messages, collect responses, check for correctness
6
8
  - Improve `#supervise`. It does not work as advertised, and seems to exhibit an
7
9
  inconsistent behaviour (see supervisor example).
8
- - Fix backtrace for `Timeout.timeout` API (see timeout example).
9
- - Check why worker-thread example doesn't work.
10
10
 
11
- 0.46.0
11
+ - io_uring
12
+ - Use playground.c to find out why we when submitting and waiting for
13
+ completion in single syscall signals seem to be blocked until the syscall
14
+ returns. Is this a bug in io_uring/liburing?
12
15
 
16
+ -----------------------------------------------------
17
+
18
+ - Add `Backend#splice(in, out, nbytes)` API
19
+ - Adapter for io/console (what does `IO#raw` do?)
20
+ - Adapter for Pry and IRB (Which fixes #5 and #6)
21
+ - allow backend selection at runtime
13
22
  - Debugging
14
23
  - Eat your own dogfood: need a good tool to check what's going on when some
15
24
  test fails
@@ -122,8 +131,6 @@
122
131
  - discuss using `snooze` for ensuring responsiveness when executing CPU-bound work
123
132
 
124
133
 
125
- ## 0.47
126
-
127
134
  ### Some more API work, more docs
128
135
 
129
136
  - sintra app with database access (postgresql)
@@ -135,14 +142,10 @@
135
142
  - proceed from there
136
143
 
137
144
 
138
- ## 0.48
139
-
140
145
  ### Sinatra / Sidekiq
141
146
 
142
147
  - Pull out redis/postgres code, put into new `polyphony-xxx` gems
143
148
 
144
- ## 0.49
145
-
146
149
  ### Testing && Docs
147
150
 
148
151
  - More tests
@@ -153,7 +156,10 @@
153
156
  - `IO.foreach`
154
157
  - `Process.waitpid`
155
158
 
156
- ## 0.50 DNS
159
+ ### Quic / HTTP/3
160
+
161
+ - Python impl: https://github.com/aiortc/aioquic/
162
+ - Go impl: https://github.com/lucas-clemente/quic-go
157
163
 
158
164
  ### DNS client
159
165
 
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+ clear && POLYPHONY_USE_LIBEV=1 rake recompile && ruby test/run.rb
4
+ clear && rake recompile && ruby test/run.rb
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'polyphony'
5
+ require 'io/console'
6
+
7
+ c = STDIN.raw(min: 1, tim: 0, &:getbyte)
8
+ p result: c
9
+ exit
10
+
11
+ puts '?' * 40
12
+ c = STDIN.getbyte
13
+ puts '*' * 40
14
+ p c
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'polyphony'
5
+ require 'polyphony/adapters/readline'
6
+ require 'pry'
7
+
8
+ $counter = 0
9
+ timer = spin do
10
+ throttled_loop(5) do
11
+ $counter += 1
12
+ end
13
+ end
14
+
15
+ at_exit { timer.stop }
16
+
17
+ puts 'try typing $counter to see the counter incremented in the background'
18
+ binding.pry
@@ -12,6 +12,7 @@ end
12
12
  def run(num_fibers)
13
13
  count = 0
14
14
 
15
+ GC.start
15
16
  GC.disable
16
17
 
17
18
  first = nil
@@ -36,13 +37,21 @@ def run(num_fibers)
36
37
  last.next = first
37
38
 
38
39
  t0 = Time.now
40
+ puts "start transfer..."
39
41
  first.transfer
40
42
  elapsed = Time.now - t0
41
43
 
42
- puts "fibers: #{num_fibers} count: #{count} rate: #{count / elapsed}"
43
- GC.start
44
+ rss = `ps -o rss= -p #{Process.pid}`.to_i
45
+
46
+ puts "fibers: #{num_fibers} rss: #{rss} count: #{count} rate: #{count / elapsed}"
47
+ rescue Exception => e
48
+ puts "Stopped at #{count} fibers"
49
+ p e
44
50
  end
45
51
 
52
+ puts "pid: #{Process.pid}"
46
53
  run(100)
47
- run(1000)
48
- run(10000)
54
+ # run(1000)
55
+ # run(10000)
56
+ # run(100000)
57
+ # run(400000)
@@ -18,7 +18,6 @@ def bm(fibers, iterations)
18
18
  Fiber.current.await_all_children
19
19
  dt = Time.now - t0
20
20
  puts "#{[fibers, iterations].inspect} setup: #{t0 - t_pre}s count: #{count} #{count / dt.to_f}/s"
21
- Thread.current.run_queue_trace
22
21
  end
23
22
 
24
23
  GC.disable
@@ -4,42 +4,30 @@ require 'bundler/setup'
4
4
  require 'polyphony'
5
5
  require 'http/parser'
6
6
 
7
- $connection_count = 0
8
-
9
7
  def handle_client(socket)
10
- $connection_count += 1
8
+ pending_requests = []
11
9
  parser = Http::Parser.new
12
- reqs = []
13
- parser.on_message_complete = proc do |env|
14
- reqs << Object.new # parser
15
- end
16
- socket.read_loop |data|
10
+ parser.on_message_complete = proc { pending_requests << parser }
11
+
12
+ socket.recv_loop do |data|
17
13
  parser << data
18
- while (req = reqs.shift)
19
- handle_request(socket, req)
20
- req = nil
21
- snooze
22
- end
14
+ write_response(socket) while pending_requests.shift
23
15
  end
24
16
  rescue IOError, SystemCallError => e
25
17
  # do nothing
26
18
  ensure
27
- $connection_count -= 1
28
19
  socket&.close
29
20
  end
30
21
 
31
- def handle_request(client, parser)
22
+ def write_response(socket)
32
23
  status_code = "200 OK"
33
24
  data = "Hello world!\n"
34
25
  headers = "Content-Type: text/plain\r\nContent-Length: #{data.bytesize}\r\n"
35
- client.write "HTTP/1.1 #{status_code}\r\n#{headers}\r\n#{data}"
26
+ socket.write "HTTP/1.1 #{status_code}\r\n#{headers}\r\n#{data}"
36
27
  end
37
28
 
38
29
  server = TCPServer.open('0.0.0.0', 1234)
39
30
  puts "pid #{Process.pid}"
40
31
  puts "listening on port 1234"
41
32
 
42
- loop do
43
- client = server.accept
44
- spin { handle_client(client) }
45
- end
33
+ server.accept_loop { |c| handle_client(c) }
@@ -0,0 +1,585 @@
1
+ /* SPDX-License-Identifier: MIT */
2
+ #ifndef LIB_URING_H
3
+ #define LIB_URING_H
4
+
5
+ #include <sys/socket.h>
6
+ #include <sys/uio.h>
7
+ #include <sys/stat.h>
8
+ #include <errno.h>
9
+ #include <signal.h>
10
+ #include <stdbool.h>
11
+ #include <inttypes.h>
12
+ #include <time.h>
13
+ #include <linux/swab.h>
14
+ #include "liburing/compat.h"
15
+ #include "liburing/io_uring.h"
16
+ #include "liburing/barrier.h"
17
+
18
+ #ifdef __cplusplus
19
+ extern "C" {
20
+ #endif
21
+
22
+ /*
23
+ * Library interface to io_uring
24
+ */
25
+ struct io_uring_sq {
26
+ unsigned *khead;
27
+ unsigned *ktail;
28
+ unsigned *kring_mask;
29
+ unsigned *kring_entries;
30
+ unsigned *kflags;
31
+ unsigned *kdropped;
32
+ unsigned *array;
33
+ struct io_uring_sqe *sqes;
34
+
35
+ unsigned sqe_head;
36
+ unsigned sqe_tail;
37
+
38
+ size_t ring_sz;
39
+ void *ring_ptr;
40
+
41
+ unsigned pad[4];
42
+ };
43
+
44
+ struct io_uring_cq {
45
+ unsigned *khead;
46
+ unsigned *ktail;
47
+ unsigned *kring_mask;
48
+ unsigned *kring_entries;
49
+ unsigned *kflags;
50
+ unsigned *koverflow;
51
+ struct io_uring_cqe *cqes;
52
+
53
+ size_t ring_sz;
54
+ void *ring_ptr;
55
+
56
+ unsigned pad[4];
57
+ };
58
+
59
+ struct io_uring {
60
+ struct io_uring_sq sq;
61
+ struct io_uring_cq cq;
62
+ unsigned flags;
63
+ int ring_fd;
64
+
65
+ unsigned pad[4];
66
+ };
67
+
68
+ /*
69
+ * Library interface
70
+ */
71
+
72
+ /*
73
+ * return an allocated io_uring_probe structure, or NULL if probe fails (for
74
+ * example, if it is not available). The caller is responsible for freeing it
75
+ */
76
+ extern struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
77
+ /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
78
+ extern struct io_uring_probe *io_uring_get_probe(void);
79
+
80
+ /*
81
+ * frees a probe allocated through io_uring_get_probe() or
82
+ * io_uring_get_probe_ring()
83
+ */
84
+ extern void io_uring_free_probe(struct io_uring_probe *probe);
85
+
86
+ static inline int io_uring_opcode_supported(struct io_uring_probe *p, int op)
87
+ {
88
+ if (op > p->last_op)
89
+ return 0;
90
+ return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
91
+ }
92
+
93
+ extern int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
94
+ struct io_uring_params *p);
95
+ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
96
+ unsigned flags);
97
+ extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
98
+ struct io_uring *ring);
99
+ extern int io_uring_ring_dontfork(struct io_uring *ring);
100
+ extern void io_uring_queue_exit(struct io_uring *ring);
101
+ unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
102
+ struct io_uring_cqe **cqes, unsigned count);
103
+ extern int io_uring_wait_cqes(struct io_uring *ring,
104
+ struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
105
+ struct __kernel_timespec *ts, sigset_t *sigmask);
106
+ extern int io_uring_wait_cqe_timeout(struct io_uring *ring,
107
+ struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts);
108
+ extern int io_uring_submit(struct io_uring *ring);
109
+ extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
110
+ extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
111
+
112
+ extern int io_uring_register_buffers(struct io_uring *ring,
113
+ const struct iovec *iovecs,
114
+ unsigned nr_iovecs);
115
+ extern int io_uring_unregister_buffers(struct io_uring *ring);
116
+ extern int io_uring_register_files(struct io_uring *ring, const int *files,
117
+ unsigned nr_files);
118
+ extern int io_uring_unregister_files(struct io_uring *ring);
119
+ extern int io_uring_register_files_update(struct io_uring *ring, unsigned off,
120
+ int *files, unsigned nr_files);
121
+ extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
122
+ extern int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
123
+ extern int io_uring_unregister_eventfd(struct io_uring *ring);
124
+ extern int io_uring_register_probe(struct io_uring *ring,
125
+ struct io_uring_probe *p, unsigned nr);
126
+ extern int io_uring_register_personality(struct io_uring *ring);
127
+ extern int io_uring_unregister_personality(struct io_uring *ring, int id);
128
+ extern int io_uring_register_restrictions(struct io_uring *ring,
129
+ struct io_uring_restriction *res,
130
+ unsigned int nr_res);
131
+ extern int io_uring_enable_rings(struct io_uring *ring);
132
+ extern int __io_uring_sqring_wait(struct io_uring *ring);
133
+
134
+ /*
135
+ * Helper for the peek/wait single cqe functions. Exported because of that,
136
+ * but probably shouldn't be used directly in an application.
137
+ */
138
+ extern int __io_uring_get_cqe(struct io_uring *ring,
139
+ struct io_uring_cqe **cqe_ptr, unsigned submit,
140
+ unsigned wait_nr, sigset_t *sigmask);
141
+
142
+ #define LIBURING_UDATA_TIMEOUT ((__u64) -1)
143
+
144
+ #define io_uring_for_each_cqe(ring, head, cqe) \
145
+ /* \
146
+ * io_uring_smp_load_acquire() enforces the order of tail \
147
+ * and CQE reads. \
148
+ */ \
149
+ for (head = *(ring)->cq.khead; \
150
+ (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
151
+ &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
152
+ head++) \
153
+
154
+ /*
155
+ * Must be called after io_uring_for_each_cqe()
156
+ */
157
+ static inline void io_uring_cq_advance(struct io_uring *ring,
158
+ unsigned nr)
159
+ {
160
+ if (nr) {
161
+ struct io_uring_cq *cq = &ring->cq;
162
+
163
+ /*
164
+ * Ensure that the kernel only sees the new value of the head
165
+ * index after the CQEs have been read.
166
+ */
167
+ io_uring_smp_store_release(cq->khead, *cq->khead + nr);
168
+ }
169
+ }
170
+
171
+ /*
172
+ * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
173
+ * been processed by the application.
174
+ */
175
+ static inline void io_uring_cqe_seen(struct io_uring *ring,
176
+ struct io_uring_cqe *cqe)
177
+ {
178
+ if (cqe)
179
+ io_uring_cq_advance(ring, 1);
180
+ }
181
+
182
+ /*
183
+ * Command prep helpers
184
+ */
185
+ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
186
+ {
187
+ sqe->user_data = (unsigned long) data;
188
+ }
189
+
190
+ static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
191
+ {
192
+ return (void *) (uintptr_t) cqe->user_data;
193
+ }
194
+
195
+ static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
196
+ unsigned flags)
197
+ {
198
+ sqe->flags = flags;
199
+ }
200
+
201
+ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
202
+ const void *addr, unsigned len,
203
+ __u64 offset)
204
+ {
205
+ sqe->opcode = op;
206
+ sqe->flags = 0;
207
+ sqe->ioprio = 0;
208
+ sqe->fd = fd;
209
+ sqe->off = offset;
210
+ sqe->addr = (unsigned long) addr;
211
+ sqe->len = len;
212
+ sqe->rw_flags = 0;
213
+ sqe->user_data = 0;
214
+ sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
215
+ }
216
+
217
+ static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
218
+ int fd_in, int64_t off_in,
219
+ int fd_out, int64_t off_out,
220
+ unsigned int nbytes,
221
+ unsigned int splice_flags)
222
+ {
223
+ io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, off_out);
224
+ sqe->splice_off_in = off_in;
225
+ sqe->splice_fd_in = fd_in;
226
+ sqe->splice_flags = splice_flags;
227
+ }
228
+
229
+ static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
230
+ const struct iovec *iovecs,
231
+ unsigned nr_vecs, off_t offset)
232
+ {
233
+ io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
234
+ }
235
+
236
+ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
237
+ void *buf, unsigned nbytes,
238
+ off_t offset, int buf_index)
239
+ {
240
+ io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
241
+ sqe->buf_index = buf_index;
242
+ }
243
+
244
+ static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
245
+ const struct iovec *iovecs,
246
+ unsigned nr_vecs, off_t offset)
247
+ {
248
+ io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
249
+ }
250
+
251
+ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
252
+ const void *buf, unsigned nbytes,
253
+ off_t offset, int buf_index)
254
+ {
255
+ io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
256
+ sqe->buf_index = buf_index;
257
+ }
258
+
259
+ static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
260
+ struct msghdr *msg, unsigned flags)
261
+ {
262
+ io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
263
+ sqe->msg_flags = flags;
264
+ }
265
+
266
+ static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
267
+ const struct msghdr *msg, unsigned flags)
268
+ {
269
+ io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
270
+ sqe->msg_flags = flags;
271
+ }
272
+
273
+ static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
274
+ unsigned poll_mask)
275
+ {
276
+ io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
277
+ #if __BYTE_ORDER == __BIG_ENDIAN
278
+ poll_mask = __swahw32(poll_mask);
279
+ #endif
280
+ sqe->poll32_events = poll_mask;
281
+ }
282
+
283
+ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
284
+ void *user_data)
285
+ {
286
+ io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, user_data, 0, 0);
287
+ }
288
+
289
+ static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
290
+ unsigned fsync_flags)
291
+ {
292
+ io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
293
+ sqe->fsync_flags = fsync_flags;
294
+ }
295
+
296
+ static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
297
+ {
298
+ io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
299
+ }
300
+
301
+ static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe,
302
+ struct __kernel_timespec *ts,
303
+ unsigned count, unsigned flags)
304
+ {
305
+ io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
306
+ sqe->timeout_flags = flags;
307
+ }
308
+
309
+ static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
310
+ __u64 user_data, unsigned flags)
311
+ {
312
+ io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1,
313
+ (void *)(unsigned long)user_data, 0, 0);
314
+ sqe->timeout_flags = flags;
315
+ }
316
+
317
+ static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
318
+ struct sockaddr *addr,
319
+ socklen_t *addrlen, int flags)
320
+ {
321
+ io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
322
+ (__u64) (unsigned long) addrlen);
323
+ sqe->accept_flags = flags;
324
+ }
325
+
326
+ static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data,
327
+ int flags)
328
+ {
329
+ io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, user_data, 0, 0);
330
+ sqe->cancel_flags = flags;
331
+ }
332
+
333
+ static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
334
+ struct __kernel_timespec *ts,
335
+ unsigned flags)
336
+ {
337
+ io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
338
+ sqe->timeout_flags = flags;
339
+ }
340
+
341
+ static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
342
+ const struct sockaddr *addr,
343
+ socklen_t addrlen)
344
+ {
345
+ io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
346
+ }
347
+
348
+ static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
349
+ int *fds, unsigned nr_fds,
350
+ int offset)
351
+ {
352
+ io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds, offset);
353
+ }
354
+
355
+ static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
356
+ int mode, off_t offset, off_t len)
357
+ {
358
+
359
+ io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
360
+ (const uintptr_t *) (unsigned long) len, mode, offset);
361
+ }
362
+
363
+ static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
364
+ const char *path, int flags, mode_t mode)
365
+ {
366
+ io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
367
+ sqe->open_flags = flags;
368
+ }
369
+
370
+ static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
371
+ {
372
+ io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
373
+ }
374
+
375
+ static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
376
+ void *buf, unsigned nbytes, off_t offset)
377
+ {
378
+ io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
379
+ }
380
+
381
+ static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
382
+ const void *buf, unsigned nbytes, off_t offset)
383
+ {
384
+ io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
385
+ }
386
+
387
+ struct statx;
388
+ static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
389
+ const char *path, int flags, unsigned mask,
390
+ struct statx *statxbuf)
391
+ {
392
+ io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
393
+ (__u64) (unsigned long) statxbuf);
394
+ sqe->statx_flags = flags;
395
+ }
396
+
397
+ static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
398
+ off_t offset, off_t len, int advice)
399
+ {
400
+ io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, len, offset);
401
+ sqe->fadvise_advice = advice;
402
+ }
403
+
404
+ static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
405
+ off_t length, int advice)
406
+ {
407
+ io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, length, 0);
408
+ sqe->fadvise_advice = advice;
409
+ }
410
+
411
+ static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
412
+ const void *buf, size_t len, int flags)
413
+ {
414
+ io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, len, 0);
415
+ sqe->msg_flags = flags;
416
+ }
417
+
418
+ static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
419
+ void *buf, size_t len, int flags)
420
+ {
421
+ io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, len, 0);
422
+ sqe->msg_flags = flags;
423
+ }
424
+
425
+ static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
426
+ const char *path, struct open_how *how)
427
+ {
428
+ io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
429
+ (uint64_t) (uintptr_t) how);
430
+ }
431
+
432
+ struct epoll_event;
433
+ static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
434
+ int fd, int op,
435
+ struct epoll_event *ev)
436
+ {
437
+ io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev, op, fd);
438
+ }
439
+
440
+ static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
441
+ void *addr, int len, int nr,
442
+ int bgid, int bid)
443
+ {
444
+ io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, len, bid);
445
+ sqe->buf_group = bgid;
446
+ }
447
+
448
+ static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
449
+ int nr, int bgid)
450
+ {
451
+ io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
452
+ sqe->buf_group = bgid;
453
+ }
454
+
455
+ static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
456
+ int how)
457
+ {
458
+ io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, how, 0);
459
+ }
460
+
461
+ /*
462
+ * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
463
+ * the SQ ring
464
+ */
465
+ static inline unsigned io_uring_sq_ready(struct io_uring *ring)
466
+ {
467
+ /*
468
+ * Without a barrier, we could miss an update and think the SQ wasn't ready.
469
+ * We don't need the load acquire for non-SQPOLL since then we drive updates.
470
+ */
471
+ if (ring->flags & IORING_SETUP_SQPOLL)
472
+ return ring->sq.sqe_tail - io_uring_smp_load_acquire(ring->sq.khead);
473
+
474
+ /* always use real head, to avoid losing sync for short submit */
475
+ return ring->sq.sqe_tail - *ring->sq.khead;
476
+ }
477
+
478
+ /*
479
+ * Returns how much space is left in the SQ ring.
480
+ */
481
+ static inline unsigned io_uring_sq_space_left(struct io_uring *ring)
482
+ {
483
+ return *ring->sq.kring_entries - io_uring_sq_ready(ring);
484
+ }
485
+
486
+ /*
487
+ * Only applicable when using SQPOLL - allows the caller to wait for space
488
+ * to free up in the SQ ring, which happens when the kernel side thread has
489
+ * consumed one or more entries. If the SQ ring is currently non-full, no
490
+ * action is taken. Note: may return -EINVAL if the kernel doesn't support
491
+ * this feature.
492
+ */
493
+ static inline int io_uring_sqring_wait(struct io_uring *ring)
494
+ {
495
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
496
+ return 0;
497
+ if (io_uring_sq_space_left(ring))
498
+ return 0;
499
+
500
+ return __io_uring_sqring_wait(ring);
501
+ }
502
+
503
+ /*
504
+ * Returns how many unconsumed entries are ready in the CQ ring
505
+ */
506
+ static inline unsigned io_uring_cq_ready(struct io_uring *ring)
507
+ {
508
+ return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
509
+ }
510
+
511
+ /*
512
+ * Returns true if the eventfd notification is currently enabled
513
+ */
514
+ static inline bool io_uring_cq_eventfd_enabled(struct io_uring *ring)
515
+ {
516
+ if (!ring->cq.kflags)
517
+ return true;
518
+
519
+ return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
520
+ }
521
+
522
+ /*
523
+ * Toggle eventfd notification on or off, if an eventfd is registered with
524
+ * the ring.
525
+ */
526
+ static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring,
527
+ bool enabled)
528
+ {
529
+ uint32_t flags;
530
+
531
+ if (!!enabled == io_uring_cq_eventfd_enabled(ring))
532
+ return 0;
533
+
534
+ if (!ring->cq.kflags)
535
+ return -EOPNOTSUPP;
536
+
537
+ flags = *ring->cq.kflags;
538
+
539
+ if (enabled)
540
+ flags &= ~IORING_CQ_EVENTFD_DISABLED;
541
+ else
542
+ flags |= IORING_CQ_EVENTFD_DISABLED;
543
+
544
+ IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
545
+
546
+ return 0;
547
+ }
548
+
549
+ /*
550
+ * Return an IO completion, waiting for 'wait_nr' completions if one isn't
551
+ * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
552
+ * failure.
553
+ */
554
+ static inline int io_uring_wait_cqe_nr(struct io_uring *ring,
555
+ struct io_uring_cqe **cqe_ptr,
556
+ unsigned wait_nr)
557
+ {
558
+ return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
559
+ }
560
+
561
+ /*
562
+ * Return an IO completion, if one is readily available. Returns 0 with
563
+ * cqe_ptr filled in on success, -errno on failure.
564
+ */
565
+ static inline int io_uring_peek_cqe(struct io_uring *ring,
566
+ struct io_uring_cqe **cqe_ptr)
567
+ {
568
+ return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
569
+ }
570
+
571
+ /*
572
+ * Return an IO completion, waiting for it if necessary. Returns 0 with
573
+ * cqe_ptr filled in on success, -errno on failure.
574
+ */
575
+ static inline int io_uring_wait_cqe(struct io_uring *ring,
576
+ struct io_uring_cqe **cqe_ptr)
577
+ {
578
+ return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
579
+ }
580
+
581
+ #ifdef __cplusplus
582
+ }
583
+ #endif
584
+
585
+ #endif