polyphony 0.45.0 → 0.46.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +2 -0
  3. data/.gitmodules +0 -0
  4. data/.rubocop.yml +1 -0
  5. data/CHANGELOG.md +38 -0
  6. data/Gemfile.lock +11 -3
  7. data/README.md +3 -3
  8. data/Rakefile +1 -1
  9. data/TODO.md +10 -18
  10. data/examples/adapters/redis_client.rb +3 -1
  11. data/examples/adapters/redis_pubsub_perf.rb +11 -8
  12. data/examples/adapters/sequel_mysql.rb +1 -1
  13. data/examples/adapters/sequel_pg.rb +24 -0
  14. data/examples/core/{02-awaiting-fibers.rb → await.rb} +0 -0
  15. data/examples/core/{xx-channels.rb → channels.rb} +0 -0
  16. data/examples/core/deferring-an-operation.rb +16 -0
  17. data/examples/core/{xx-erlang-style-genserver.rb → erlang-style-genserver.rb} +16 -9
  18. data/examples/core/{xx-forking.rb → forking.rb} +1 -1
  19. data/examples/core/handling-signals.rb +11 -0
  20. data/examples/core/{03-interrupting.rb → interrupt.rb} +0 -0
  21. data/examples/core/{xx-pingpong.rb → pingpong.rb} +7 -5
  22. data/examples/core/{xx-recurrent-timer.rb → recurrent-timer.rb} +1 -1
  23. data/examples/core/{xx-resource_delegate.rb → resource_delegate.rb} +3 -4
  24. data/examples/core/{01-spinning-up-fibers.rb → spin.rb} +1 -1
  25. data/examples/core/{xx-spin_error_backtrace.rb → spin_error_backtrace.rb} +1 -1
  26. data/examples/core/{xx-supervise-process.rb → supervise-process.rb} +8 -5
  27. data/examples/core/supervisor.rb +20 -0
  28. data/examples/core/{xx-thread-sleep.rb → thread-sleep.rb} +0 -0
  29. data/examples/core/{xx-thread_pool.rb → thread_pool.rb} +0 -0
  30. data/examples/core/{xx-throttling.rb → throttling.rb} +0 -0
  31. data/examples/core/{xx-timeout.rb → timeout.rb} +0 -0
  32. data/examples/core/{xx-using-a-mutex.rb → using-a-mutex.rb} +0 -0
  33. data/examples/core/{xx-worker-thread.rb → worker-thread.rb} +2 -2
  34. data/examples/io/{xx-backticks.rb → backticks.rb} +0 -0
  35. data/examples/io/{xx-echo_client.rb → echo_client.rb} +1 -1
  36. data/examples/io/{xx-echo_client_from_stdin.rb → echo_client_from_stdin.rb} +2 -2
  37. data/examples/io/{xx-echo_pipe.rb → echo_pipe.rb} +1 -1
  38. data/examples/io/{xx-echo_server.rb → echo_server.rb} +0 -0
  39. data/examples/io/{xx-echo_server_with_timeout.rb → echo_server_with_timeout.rb} +1 -1
  40. data/examples/io/{xx-echo_stdin.rb → echo_stdin.rb} +0 -0
  41. data/examples/io/{xx-happy-eyeballs.rb → happy-eyeballs.rb} +0 -0
  42. data/examples/io/{xx-httparty.rb → httparty.rb} +4 -13
  43. data/examples/io/{xx-irb.rb → irb.rb} +0 -0
  44. data/examples/io/{xx-net-http.rb → net-http.rb} +0 -0
  45. data/examples/io/{xx-open.rb → open.rb} +0 -0
  46. data/examples/io/{xx-pry.rb → pry.rb} +0 -0
  47. data/examples/io/{xx-rack_server.rb → rack_server.rb} +0 -0
  48. data/examples/io/raw.rb +14 -0
  49. data/examples/io/reline.rb +18 -0
  50. data/examples/io/{xx-system.rb → system.rb} +1 -1
  51. data/examples/io/{xx-tcpserver.rb → tcpserver.rb} +0 -0
  52. data/examples/io/{xx-tcpsocket.rb → tcpsocket.rb} +0 -0
  53. data/examples/io/tunnel.rb +6 -1
  54. data/examples/io/{xx-zip.rb → zip.rb} +0 -0
  55. data/examples/performance/fiber_transfer.rb +2 -1
  56. data/examples/performance/fs_read.rb +5 -6
  57. data/examples/performance/multi_snooze.rb +0 -1
  58. data/examples/{io/xx-switch.rb → performance/switch.rb} +2 -1
  59. data/examples/performance/thread-vs-fiber/{xx-httparty_multi.rb → httparty_multi.rb} +3 -4
  60. data/examples/performance/thread-vs-fiber/{xx-httparty_threaded.rb → httparty_threaded.rb} +0 -0
  61. data/examples/performance/thread-vs-fiber/polyphony_mt_server.rb +1 -1
  62. data/examples/performance/thread-vs-fiber/polyphony_server.rb +1 -2
  63. data/examples/performance/thread-vs-fiber/threaded_server.rb +1 -5
  64. data/examples/performance/thread_pool_perf.rb +6 -7
  65. data/ext/liburing/liburing.h +585 -0
  66. data/ext/liburing/liburing/README.md +4 -0
  67. data/ext/liburing/liburing/barrier.h +73 -0
  68. data/ext/liburing/liburing/compat.h +15 -0
  69. data/ext/liburing/liburing/io_uring.h +343 -0
  70. data/ext/liburing/queue.c +333 -0
  71. data/ext/liburing/register.c +187 -0
  72. data/ext/liburing/setup.c +210 -0
  73. data/ext/liburing/syscall.c +54 -0
  74. data/ext/liburing/syscall.h +18 -0
  75. data/ext/polyphony/backend.h +1 -16
  76. data/ext/polyphony/backend_common.h +109 -0
  77. data/ext/polyphony/backend_io_uring.c +884 -0
  78. data/ext/polyphony/backend_io_uring_context.c +73 -0
  79. data/ext/polyphony/backend_io_uring_context.h +52 -0
  80. data/ext/polyphony/{libev_backend.c → backend_libev.c} +255 -345
  81. data/ext/polyphony/event.c +1 -1
  82. data/ext/polyphony/extconf.rb +31 -13
  83. data/ext/polyphony/fiber.c +111 -27
  84. data/ext/polyphony/libev.c +4 -0
  85. data/ext/polyphony/libev.h +8 -2
  86. data/ext/polyphony/liburing.c +8 -0
  87. data/ext/polyphony/playground.c +51 -0
  88. data/ext/polyphony/polyphony.c +6 -8
  89. data/ext/polyphony/polyphony.h +29 -25
  90. data/ext/polyphony/polyphony_ext.c +13 -6
  91. data/ext/polyphony/queue.c +3 -4
  92. data/ext/polyphony/ring_buffer.c +0 -1
  93. data/ext/polyphony/runqueue.c +102 -0
  94. data/ext/polyphony/runqueue_ring_buffer.c +85 -0
  95. data/ext/polyphony/runqueue_ring_buffer.h +31 -0
  96. data/ext/polyphony/thread.c +45 -92
  97. data/lib/polyphony.rb +2 -2
  98. data/lib/polyphony/adapters/fs.rb +1 -1
  99. data/lib/polyphony/adapters/process.rb +0 -3
  100. data/lib/polyphony/adapters/redis.rb +1 -1
  101. data/lib/polyphony/adapters/trace.rb +2 -2
  102. data/lib/polyphony/core/global_api.rb +9 -12
  103. data/lib/polyphony/core/sync.rb +6 -2
  104. data/lib/polyphony/extensions/core.rb +6 -24
  105. data/lib/polyphony/extensions/debug.rb +13 -0
  106. data/lib/polyphony/extensions/fiber.rb +21 -44
  107. data/lib/polyphony/extensions/io.rb +55 -10
  108. data/lib/polyphony/extensions/socket.rb +70 -12
  109. data/lib/polyphony/version.rb +1 -1
  110. data/polyphony.gemspec +3 -2
  111. data/test/helper.rb +36 -4
  112. data/test/io_uring_test.rb +55 -0
  113. data/test/stress.rb +5 -2
  114. data/test/test_backend.rb +4 -6
  115. data/test/test_ext.rb +1 -2
  116. data/test/test_fiber.rb +31 -24
  117. data/test/test_global_api.rb +58 -31
  118. data/test/test_io.rb +58 -0
  119. data/test/test_signal.rb +11 -8
  120. data/test/test_socket.rb +17 -0
  121. data/test/test_sync.rb +21 -0
  122. data/test/test_throttler.rb +3 -6
  123. data/test/test_trace.rb +7 -5
  124. metadata +86 -76
  125. data/examples/adapters/concurrent-ruby.rb +0 -9
  126. data/examples/core/04-handling-signals.rb +0 -19
  127. data/examples/core/xx-at_exit.rb +0 -29
  128. data/examples/core/xx-backend.rb +0 -102
  129. data/examples/core/xx-caller.rb +0 -12
  130. data/examples/core/xx-daemon.rb +0 -14
  131. data/examples/core/xx-deadlock.rb +0 -8
  132. data/examples/core/xx-deferring-an-operation.rb +0 -14
  133. data/examples/core/xx-exception-backtrace.rb +0 -40
  134. data/examples/core/xx-fork-cleanup.rb +0 -22
  135. data/examples/core/xx-fork-spin.rb +0 -42
  136. data/examples/core/xx-fork-terminate.rb +0 -27
  137. data/examples/core/xx-move_on.rb +0 -23
  138. data/examples/core/xx-queue-async.rb +0 -120
  139. data/examples/core/xx-readpartial.rb +0 -18
  140. data/examples/core/xx-signals.rb +0 -16
  141. data/examples/core/xx-sleep-forever.rb +0 -9
  142. data/examples/core/xx-sleeping.rb +0 -25
  143. data/examples/core/xx-snooze-starve.rb +0 -16
  144. data/examples/core/xx-spin-fork.rb +0 -49
  145. data/examples/core/xx-state-machine.rb +0 -51
  146. data/examples/core/xx-stop.rb +0 -20
  147. data/examples/core/xx-supervisors.rb +0 -21
  148. data/examples/core/xx-thread-selector-sleep.rb +0 -51
  149. data/examples/core/xx-thread-selector-snooze.rb +0 -46
  150. data/examples/core/xx-thread-snooze.rb +0 -34
  151. data/examples/core/xx-timer-gc.rb +0 -17
  152. data/examples/core/xx-trace.rb +0 -79
  153. data/examples/performance/xx-array.rb +0 -11
  154. data/examples/performance/xx-fiber-switch.rb +0 -9
  155. data/examples/performance/xx-snooze.rb +0 -15
  156. data/examples/xx-spin.rb +0 -32
@@ -3,7 +3,7 @@
3
3
  require 'bundler/setup'
4
4
  require 'polyphony'
5
5
 
6
- move_on_after(3) do
6
+ move_on_after(3.1) do
7
7
  puts 'Start...'
8
8
  every(1) do
9
9
  puts Time.now
@@ -10,7 +10,7 @@ class Number
10
10
 
11
11
  def greet(other)
12
12
  puts "You are number #{other}, I am number #{@id}"
13
- sleep(0.05 + rand * 0.2)
13
+ sleep rand(0.2..0.3)
14
14
  end
15
15
  end
16
16
 
@@ -25,7 +25,6 @@ def meet(number)
25
25
  end
26
26
  end
27
27
 
28
- 3.times { |x| spin { meet(x) } }
28
+ (4..10).each { |x| spin { meet(x) } }
29
29
 
30
- t0 = Time.now
31
- every(10) { puts "uptime: #{Time.now - t0}" }
30
+ sleep 1
@@ -14,5 +14,5 @@ end
14
14
  spin { nap(:a, 1) }
15
15
  spin { nap(:b, 2) }
16
16
 
17
- # Calling suspend will block until all child fibers have terminated
17
+ # Calling suspend will block until no work is left to do
18
18
  suspend
@@ -9,7 +9,7 @@ end
9
9
 
10
10
  def deferred_error(t)
11
11
  puts "deferred_error"
12
- spin { de2(t) }
12
+ spin { de2(t) }.await
13
13
  end
14
14
 
15
15
  def de2(t)
@@ -8,7 +8,7 @@ Exception.__disable_sanitized_backtrace__ = true
8
8
  supervisor = spin do
9
9
  puts "parent pid #{Process.pid}"
10
10
 
11
- Polyphony::ProcessSupervisor.supervise do
11
+ Polyphony.watch_process do
12
12
  puts "child pid #{Process.pid}"
13
13
  puts "go to sleep"
14
14
  sleep 5
@@ -22,9 +22,12 @@ supervisor = spin do
22
22
  end
23
23
 
24
24
  begin
25
+ spin do
26
+ sleep 2.5
27
+ Process.kill('TERM', Process.pid)
28
+ end
29
+ supervisor.await
30
+ rescue SystemExit
31
+ supervisor.terminate
25
32
  supervisor.await
26
- rescue Interrupt
27
- exit!
28
- # supervisor.terminate
29
- # supervisor.await
30
33
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'polyphony'
5
+
6
+ def my_sleep(t)
7
+ puts "#{t} start"
8
+ sleep(t)
9
+ puts "#{t} done"
10
+ end
11
+
12
+ spin { my_sleep(1) }
13
+ spin { my_sleep(2) }
14
+ spin { my_sleep(3) }
15
+ spin { puts "fiber count: #{Fiber.current.children.count}" }
16
+ snooze
17
+
18
+ puts "#{Time.now} supervising..."
19
+ supervise
20
+ puts "#{Time.now} done supervising"
@@ -5,9 +5,9 @@ require 'polyphony'
5
5
 
6
6
  def do_work(client)
7
7
  result = yield
8
- client.schedule(result)
8
+ client << result
9
9
  rescue Exception => e
10
- client.schedule(e)
10
+ client << e
11
11
  end
12
12
 
13
13
  $worker = Thread.new do
@@ -15,7 +15,7 @@ end
15
15
 
16
16
  reader = spin do
17
17
  puts 'received from echo server:'
18
- while (data = socket.readpartial(8192))
18
+ socket.read_loop do |data|
19
19
  STDOUT << data
20
20
  end
21
21
  end
@@ -11,8 +11,8 @@ writer = spin do
11
11
  end
12
12
  end
13
13
 
14
- spin do
15
- while (data = socket.readpartial(8192))
14
+ reader = spin do
15
+ socket.read_loop do |data|
16
16
  STDOUT << 'received: ' + data
17
17
  end
18
18
  writer.interrupt
@@ -11,6 +11,6 @@ spin do
11
11
  o.close
12
12
  end
13
13
 
14
- while (data = i.readpartial(8192))
14
+ i.read_loop do |data|
15
15
  STDOUT << "You said: #{data}"
16
16
  end
@@ -17,7 +17,7 @@ begin
17
17
  scope.when_cancelled do
18
18
  client.write "Disconnecting due to inactivity\n"
19
19
  end
20
- while (data = client.readpartial(8192))
20
+ client.read_loop do |data|
21
21
  scope.reset_timeout
22
22
  client.write "You said: #{data}"
23
23
  end
@@ -16,23 +16,14 @@ end
16
16
  zones = %w{
17
17
  Europe/London Europe/Paris Europe/Bucharest America/New_York Asia/Bangkok
18
18
  }
19
- # zones.each do |tzone|
20
- # spin do
21
- # time = get_time(tzone)
22
- # puts "Time in #{tzone}: #{time}"
23
- # end
24
- # end
25
-
26
- # suspend
27
19
 
28
20
  def get_times(zones)
29
- Polyphony::Supervisor.new do |s|
30
- zones.each do |tzone|
31
- s.spin { [tzone, get_time(tzone)] }
32
- end
21
+ fibers = zones.map do |tzone|
22
+ spin { [tzone, get_time(tzone)] }
33
23
  end
24
+ Fiber.await(*fibers)
34
25
  end
35
26
 
36
- get_times(zones).await.each do |tzone, time|
27
+ get_times(zones).each do |tzone, time|
37
28
  puts "Time in #{tzone}: #{time}"
38
29
  end
File without changes
File without changes
File without changes
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'polyphony'
5
+ require 'io/console'
6
+
7
+ c = STDIN.raw(min: 1, tim: 0, &:getbyte)
8
+ p result: c
9
+ exit
10
+
11
+ puts '?' * 40
12
+ c = STDIN.getbyte
13
+ puts '*' * 40
14
+ p c
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'polyphony'
5
+ require 'polyphony/adapters/readline'
6
+ require 'pry'
7
+
8
+ $counter = 0
9
+ timer = spin do
10
+ throttled_loop(5) do
11
+ $counter += 1
12
+ end
13
+ end
14
+
15
+ at_exit { timer.stop }
16
+
17
+ puts 'try typing $counter to see the counter incremented in the background'
18
+ binding.pry
@@ -7,5 +7,5 @@ timer = spin do
7
7
  throttled_loop(5) { STDOUT << '.' }
8
8
  end
9
9
 
10
- puts system('ruby -e "sleep 1; puts :done; STDOUT.close"')
10
+ puts system('ruby -e "puts :sleeping; STDOUT.flush; sleep 1; puts :done"')
11
11
  timer.stop
@@ -3,6 +3,11 @@
3
3
  require 'bundler/setup'
4
4
  require 'polyphony'
5
5
 
6
+ if ARGV.size < 2
7
+ puts "Usage: ruby examples/tunnel.rb <port1> <port2>"
8
+ exit
9
+ end
10
+
6
11
  Ports = ARGV[0..1]
7
12
  EndPoints = []
8
13
 
@@ -24,7 +29,7 @@ def endpoint_loop(idx, peer_idx)
24
29
  conn.binmode
25
30
  EndPoints[idx] = conn
26
31
  log "Client connected on port #{port} (#{conn.remote_address.inspect})"
27
- while data = conn.readpartial(8192)
32
+ conn.read_loop do |data|
28
33
  peer = EndPoints[peer_idx]
29
34
  if peer
30
35
  peer << data
File without changes
@@ -6,7 +6,8 @@ class Fiber
6
6
  attr_accessor :next
7
7
  end
8
8
 
9
- # This program shows how the performance
9
+ # This program shows how the performance of Fiber.transfer degrades as the fiber
10
+ # count increases
10
11
 
11
12
  def run(num_fibers)
12
13
  count = 0
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'bundler/setup'
4
4
  require 'polyphony'
5
- require 'polyphony/fs'
5
+ require 'polyphony/adapters/fs'
6
6
 
7
7
  def raw_read_file(x)
8
8
  t0 = Time.now
@@ -14,7 +14,7 @@ def threaded_read_file(x, y)
14
14
  t0 = Time.now
15
15
  threads = []
16
16
  y.times do
17
- threads << Thread.new { x.times { IO.orig_read(PATH) } }
17
+ threads << Thread.new { x.times { IO.orig_read(__FILE__) } }
18
18
  end
19
19
  threads.each(&:join)
20
20
  puts "threaded_read_file: #{Time.now - t0}"
@@ -22,11 +22,10 @@ end
22
22
 
23
23
  def thread_pool_read_file(x, y)
24
24
  t0 = Time.now
25
- supervise do |s|
26
- y.times do
27
- s.spin { x.times { IO.read(PATH) } }
28
- end
25
+ y.times do
26
+ spin { x.times { IO.read(__FILE__) } }
29
27
  end
28
+ Fiber.current.await_all_children
30
29
  puts "thread_pool_read_file: #{Time.now - t0}"
31
30
  end
32
31
 
@@ -18,7 +18,6 @@ def bm(fibers, iterations)
18
18
  Fiber.current.await_all_children
19
19
  dt = Time.now - t0
20
20
  puts "#{[fibers, iterations].inspect} setup: #{t0 - t_pre}s count: #{count} #{count / dt.to_f}/s"
21
- Thread.current.run_queue_trace
22
21
  end
23
22
 
24
23
  GC.disable
@@ -12,4 +12,5 @@ end
12
12
  t0 = Time.now
13
13
  X.times { f.transfer }
14
14
  dt = Time.now - t0
15
- puts "#{X / dt.to_f}/s"
15
+ puts "#{X / dt.to_f}/s"
16
+ puts fs.size
@@ -21,11 +21,10 @@ end
21
21
  t0 = Time.now
22
22
  results = []
23
23
  move_on_after(3) do
24
- supervise do |s|
25
- 10.times do
26
- s.spin { get_time(results) }
27
- end
24
+ 10.times do
25
+ spin { get_time(results) }
28
26
  end
27
+ supervise
29
28
  puts 'done'
30
29
  end
31
30
 
@@ -24,7 +24,7 @@ def handle_client(socket)
24
24
  parser.on_message_complete = proc do |env|
25
25
  reqs << Object.new # parser
26
26
  end
27
- while (data = socket.readpartial(8192)) do
27
+ socket.read_loop do |data|
28
28
  parser << data
29
29
  while (req = reqs.shift)
30
30
  handle_request(socket, req)
@@ -13,12 +13,11 @@ def handle_client(socket)
13
13
  parser.on_message_complete = proc do |env|
14
14
  reqs << Object.new # parser
15
15
  end
16
- while (data = socket.readpartial(8192)) do
16
+ socket.recv_loop do |data|
17
17
  parser << data
18
18
  while (req = reqs.shift)
19
19
  handle_request(socket, req)
20
20
  req = nil
21
- snooze
22
21
  end
23
22
  end
24
23
  rescue IOError, SystemCallError => e
@@ -11,11 +11,7 @@ def handle_client(client)
11
11
  headers = "Content-Length: #{data.bytesize}\r\n"
12
12
  client.write "HTTP/1.1 #{status_code}\r\n#{headers}\r\n#{data}"
13
13
  end
14
- loop do
15
- while data = client.readpartial(8192) rescue nil
16
- parser << data
17
- end
18
- end
14
+ client.read_loop { |data| parser << data }
19
15
  client.close
20
16
  end
21
17
  end
@@ -10,7 +10,7 @@ def lengthy_op
10
10
  # Digest::SHA256.digest(IO.read('doc/Promise.html'))
11
11
  end
12
12
 
13
- X = 100
13
+ X = 10000
14
14
 
15
15
  def compare_performance
16
16
  t0 = Time.now
@@ -35,20 +35,19 @@ def compare_performance
35
35
 
36
36
  acc = 0
37
37
  count = 0
38
- 10.times do |_i|
38
+ 1.times do |_i|
39
39
  t0 = Time.now
40
- supervise do |s|
41
- X.times do
42
- s.spin { Polyphony::ThreadPool.process { lengthy_op } }
43
- end
40
+ X.times do
41
+ spin { Polyphony::ThreadPool.process { lengthy_op } }
44
42
  end
43
+ Fiber.current.await_all_children
45
44
  thread_pool_perf = X / (Time.now - t0)
46
45
  acc += thread_pool_perf
47
46
  count += 1
48
47
  end
49
48
  avg_perf = acc / count
50
49
  puts format(
51
- 'avg thread pool performance: %g (X %0.2f)',
50
+ 'spin X thread pool performance: %g (X %0.2f)',
52
51
  avg_perf,
53
52
  avg_perf / native_perf
54
53
  )
@@ -0,0 +1,585 @@
1
+ /* SPDX-License-Identifier: MIT */
2
+ #ifndef LIB_URING_H
3
+ #define LIB_URING_H
4
+
5
+ #include <sys/socket.h>
6
+ #include <sys/uio.h>
7
+ #include <sys/stat.h>
8
+ #include <errno.h>
9
+ #include <signal.h>
10
+ #include <stdbool.h>
11
+ #include <inttypes.h>
12
+ #include <time.h>
13
+ #include <linux/swab.h>
14
+ #include "liburing/compat.h"
15
+ #include "liburing/io_uring.h"
16
+ #include "liburing/barrier.h"
17
+
18
+ #ifdef __cplusplus
19
+ extern "C" {
20
+ #endif
21
+
22
+ /*
23
+ * Library interface to io_uring
24
+ */
25
+ struct io_uring_sq {
26
+ unsigned *khead;
27
+ unsigned *ktail;
28
+ unsigned *kring_mask;
29
+ unsigned *kring_entries;
30
+ unsigned *kflags;
31
+ unsigned *kdropped;
32
+ unsigned *array;
33
+ struct io_uring_sqe *sqes;
34
+
35
+ unsigned sqe_head;
36
+ unsigned sqe_tail;
37
+
38
+ size_t ring_sz;
39
+ void *ring_ptr;
40
+
41
+ unsigned pad[4];
42
+ };
43
+
44
+ struct io_uring_cq {
45
+ unsigned *khead;
46
+ unsigned *ktail;
47
+ unsigned *kring_mask;
48
+ unsigned *kring_entries;
49
+ unsigned *kflags;
50
+ unsigned *koverflow;
51
+ struct io_uring_cqe *cqes;
52
+
53
+ size_t ring_sz;
54
+ void *ring_ptr;
55
+
56
+ unsigned pad[4];
57
+ };
58
+
59
+ struct io_uring {
60
+ struct io_uring_sq sq;
61
+ struct io_uring_cq cq;
62
+ unsigned flags;
63
+ int ring_fd;
64
+
65
+ unsigned pad[4];
66
+ };
67
+
68
+ /*
69
+ * Library interface
70
+ */
71
+
72
+ /*
73
+ * return an allocated io_uring_probe structure, or NULL if probe fails (for
74
+ * example, if it is not available). The caller is responsible for freeing it
75
+ */
76
+ extern struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
77
+ /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
78
+ extern struct io_uring_probe *io_uring_get_probe(void);
79
+
80
+ /*
81
+ * frees a probe allocated through io_uring_get_probe() or
82
+ * io_uring_get_probe_ring()
83
+ */
84
+ extern void io_uring_free_probe(struct io_uring_probe *probe);
85
+
86
+ static inline int io_uring_opcode_supported(struct io_uring_probe *p, int op)
87
+ {
88
+ if (op > p->last_op)
89
+ return 0;
90
+ return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
91
+ }
92
+
93
+ extern int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
94
+ struct io_uring_params *p);
95
+ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
96
+ unsigned flags);
97
+ extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
98
+ struct io_uring *ring);
99
+ extern int io_uring_ring_dontfork(struct io_uring *ring);
100
+ extern void io_uring_queue_exit(struct io_uring *ring);
101
+ unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
102
+ struct io_uring_cqe **cqes, unsigned count);
103
+ extern int io_uring_wait_cqes(struct io_uring *ring,
104
+ struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
105
+ struct __kernel_timespec *ts, sigset_t *sigmask);
106
+ extern int io_uring_wait_cqe_timeout(struct io_uring *ring,
107
+ struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts);
108
+ extern int io_uring_submit(struct io_uring *ring);
109
+ extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
110
+ extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
111
+
112
+ extern int io_uring_register_buffers(struct io_uring *ring,
113
+ const struct iovec *iovecs,
114
+ unsigned nr_iovecs);
115
+ extern int io_uring_unregister_buffers(struct io_uring *ring);
116
+ extern int io_uring_register_files(struct io_uring *ring, const int *files,
117
+ unsigned nr_files);
118
+ extern int io_uring_unregister_files(struct io_uring *ring);
119
+ extern int io_uring_register_files_update(struct io_uring *ring, unsigned off,
120
+ int *files, unsigned nr_files);
121
+ extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
122
+ extern int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
123
+ extern int io_uring_unregister_eventfd(struct io_uring *ring);
124
+ extern int io_uring_register_probe(struct io_uring *ring,
125
+ struct io_uring_probe *p, unsigned nr);
126
+ extern int io_uring_register_personality(struct io_uring *ring);
127
+ extern int io_uring_unregister_personality(struct io_uring *ring, int id);
128
+ extern int io_uring_register_restrictions(struct io_uring *ring,
129
+ struct io_uring_restriction *res,
130
+ unsigned int nr_res);
131
+ extern int io_uring_enable_rings(struct io_uring *ring);
132
+ extern int __io_uring_sqring_wait(struct io_uring *ring);
133
+
134
+ /*
135
+ * Helper for the peek/wait single cqe functions. Exported because of that,
136
+ * but probably shouldn't be used directly in an application.
137
+ */
138
+ extern int __io_uring_get_cqe(struct io_uring *ring,
139
+ struct io_uring_cqe **cqe_ptr, unsigned submit,
140
+ unsigned wait_nr, sigset_t *sigmask);
141
+
142
+ #define LIBURING_UDATA_TIMEOUT ((__u64) -1)
143
+
144
+ #define io_uring_for_each_cqe(ring, head, cqe) \
145
+ /* \
146
+ * io_uring_smp_load_acquire() enforces the order of tail \
147
+ * and CQE reads. \
148
+ */ \
149
+ for (head = *(ring)->cq.khead; \
150
+ (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
151
+ &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
152
+ head++) \
153
+
154
+ /*
155
+ * Must be called after io_uring_for_each_cqe()
156
+ */
157
+ static inline void io_uring_cq_advance(struct io_uring *ring,
158
+ unsigned nr)
159
+ {
160
+ if (nr) {
161
+ struct io_uring_cq *cq = &ring->cq;
162
+
163
+ /*
164
+ * Ensure that the kernel only sees the new value of the head
165
+ * index after the CQEs have been read.
166
+ */
167
+ io_uring_smp_store_release(cq->khead, *cq->khead + nr);
168
+ }
169
+ }
170
+
171
+ /*
172
+ * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
173
+ * been processed by the application.
174
+ */
175
+ static inline void io_uring_cqe_seen(struct io_uring *ring,
176
+ struct io_uring_cqe *cqe)
177
+ {
178
+ if (cqe)
179
+ io_uring_cq_advance(ring, 1);
180
+ }
181
+
182
+ /*
183
+ * Command prep helpers
184
+ */
185
+ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
186
+ {
187
+ sqe->user_data = (unsigned long) data;
188
+ }
189
+
190
+ static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
191
+ {
192
+ return (void *) (uintptr_t) cqe->user_data;
193
+ }
194
+
195
+ static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
196
+ unsigned flags)
197
+ {
198
+ sqe->flags = flags;
199
+ }
200
+
201
+ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
202
+ const void *addr, unsigned len,
203
+ __u64 offset)
204
+ {
205
+ sqe->opcode = op;
206
+ sqe->flags = 0;
207
+ sqe->ioprio = 0;
208
+ sqe->fd = fd;
209
+ sqe->off = offset;
210
+ sqe->addr = (unsigned long) addr;
211
+ sqe->len = len;
212
+ sqe->rw_flags = 0;
213
+ sqe->user_data = 0;
214
+ sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
215
+ }
216
+
217
+ static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
218
+ int fd_in, int64_t off_in,
219
+ int fd_out, int64_t off_out,
220
+ unsigned int nbytes,
221
+ unsigned int splice_flags)
222
+ {
223
+ io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, off_out);
224
+ sqe->splice_off_in = off_in;
225
+ sqe->splice_fd_in = fd_in;
226
+ sqe->splice_flags = splice_flags;
227
+ }
228
+
229
+ static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
230
+ const struct iovec *iovecs,
231
+ unsigned nr_vecs, off_t offset)
232
+ {
233
+ io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
234
+ }
235
+
236
+ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
237
+ void *buf, unsigned nbytes,
238
+ off_t offset, int buf_index)
239
+ {
240
+ io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
241
+ sqe->buf_index = buf_index;
242
+ }
243
+
244
+ static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
245
+ const struct iovec *iovecs,
246
+ unsigned nr_vecs, off_t offset)
247
+ {
248
+ io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
249
+ }
250
+
251
+ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
252
+ const void *buf, unsigned nbytes,
253
+ off_t offset, int buf_index)
254
+ {
255
+ io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
256
+ sqe->buf_index = buf_index;
257
+ }
258
+
259
+ static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
260
+ struct msghdr *msg, unsigned flags)
261
+ {
262
+ io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
263
+ sqe->msg_flags = flags;
264
+ }
265
+
266
+ static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
267
+ const struct msghdr *msg, unsigned flags)
268
+ {
269
+ io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
270
+ sqe->msg_flags = flags;
271
+ }
272
+
273
+ static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
274
+ unsigned poll_mask)
275
+ {
276
+ io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
277
+ #if __BYTE_ORDER == __BIG_ENDIAN
278
+ poll_mask = __swahw32(poll_mask);
279
+ #endif
280
+ sqe->poll32_events = poll_mask;
281
+ }
282
+
283
+ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
284
+ void *user_data)
285
+ {
286
+ io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, user_data, 0, 0);
287
+ }
288
+
289
+ static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
290
+ unsigned fsync_flags)
291
+ {
292
+ io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
293
+ sqe->fsync_flags = fsync_flags;
294
+ }
295
+
296
+ static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
297
+ {
298
+ io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
299
+ }
300
+
301
+ static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe,
302
+ struct __kernel_timespec *ts,
303
+ unsigned count, unsigned flags)
304
+ {
305
+ io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
306
+ sqe->timeout_flags = flags;
307
+ }
308
+
309
+ static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
310
+ __u64 user_data, unsigned flags)
311
+ {
312
+ io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1,
313
+ (void *)(unsigned long)user_data, 0, 0);
314
+ sqe->timeout_flags = flags;
315
+ }
316
+
317
+ static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
318
+ struct sockaddr *addr,
319
+ socklen_t *addrlen, int flags)
320
+ {
321
+ io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
322
+ (__u64) (unsigned long) addrlen);
323
+ sqe->accept_flags = flags;
324
+ }
325
+
326
+ static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data,
327
+ int flags)
328
+ {
329
+ io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, user_data, 0, 0);
330
+ sqe->cancel_flags = flags;
331
+ }
332
+
333
+ static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
334
+ struct __kernel_timespec *ts,
335
+ unsigned flags)
336
+ {
337
+ io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
338
+ sqe->timeout_flags = flags;
339
+ }
340
+
341
+ static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
342
+ const struct sockaddr *addr,
343
+ socklen_t addrlen)
344
+ {
345
+ io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
346
+ }
347
+
348
+ static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
349
+ int *fds, unsigned nr_fds,
350
+ int offset)
351
+ {
352
+ io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds, offset);
353
+ }
354
+
355
+ static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
356
+ int mode, off_t offset, off_t len)
357
+ {
358
+
359
+ io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
360
+ (const uintptr_t *) (unsigned long) len, mode, offset);
361
+ }
362
+
363
+ static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
364
+ const char *path, int flags, mode_t mode)
365
+ {
366
+ io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
367
+ sqe->open_flags = flags;
368
+ }
369
+
370
+ static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
371
+ {
372
+ io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
373
+ }
374
+
375
+ static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
376
+ void *buf, unsigned nbytes, off_t offset)
377
+ {
378
+ io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
379
+ }
380
+
381
+ static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
382
+ const void *buf, unsigned nbytes, off_t offset)
383
+ {
384
+ io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
385
+ }
386
+
387
+ struct statx;
388
+ static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
389
+ const char *path, int flags, unsigned mask,
390
+ struct statx *statxbuf)
391
+ {
392
+ io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
393
+ (__u64) (unsigned long) statxbuf);
394
+ sqe->statx_flags = flags;
395
+ }
396
+
397
+ static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
398
+ off_t offset, off_t len, int advice)
399
+ {
400
+ io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, len, offset);
401
+ sqe->fadvise_advice = advice;
402
+ }
403
+
404
+ static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
405
+ off_t length, int advice)
406
+ {
407
+ io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, length, 0);
408
+ sqe->fadvise_advice = advice;
409
+ }
410
+
411
+ static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
412
+ const void *buf, size_t len, int flags)
413
+ {
414
+ io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, len, 0);
415
+ sqe->msg_flags = flags;
416
+ }
417
+
418
+ static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
419
+ void *buf, size_t len, int flags)
420
+ {
421
+ io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, len, 0);
422
+ sqe->msg_flags = flags;
423
+ }
424
+
425
+ static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
426
+ const char *path, struct open_how *how)
427
+ {
428
+ io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
429
+ (uint64_t) (uintptr_t) how);
430
+ }
431
+
432
+ struct epoll_event;
433
+ static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
434
+ int fd, int op,
435
+ struct epoll_event *ev)
436
+ {
437
+ io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev, op, fd);
438
+ }
439
+
440
+ static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
441
+ void *addr, int len, int nr,
442
+ int bgid, int bid)
443
+ {
444
+ io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, len, bid);
445
+ sqe->buf_group = bgid;
446
+ }
447
+
448
+ static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
449
+ int nr, int bgid)
450
+ {
451
+ io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
452
+ sqe->buf_group = bgid;
453
+ }
454
+
455
+ static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
456
+ int how)
457
+ {
458
+ io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, how, 0);
459
+ }
460
+
461
+ /*
462
+ * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
463
+ * the SQ ring
464
+ */
465
+ static inline unsigned io_uring_sq_ready(struct io_uring *ring)
466
+ {
467
+ /*
468
+ * Without a barrier, we could miss an update and think the SQ wasn't ready.
469
+ * We don't need the load acquire for non-SQPOLL since then we drive updates.
470
+ */
471
+ if (ring->flags & IORING_SETUP_SQPOLL)
472
+ return ring->sq.sqe_tail - io_uring_smp_load_acquire(ring->sq.khead);
473
+
474
+ /* always use real head, to avoid losing sync for short submit */
475
+ return ring->sq.sqe_tail - *ring->sq.khead;
476
+ }
477
+
478
+ /*
479
+ * Returns how much space is left in the SQ ring.
480
+ */
481
+ static inline unsigned io_uring_sq_space_left(struct io_uring *ring)
482
+ {
483
+ return *ring->sq.kring_entries - io_uring_sq_ready(ring);
484
+ }
485
+
486
+ /*
487
+ * Only applicable when using SQPOLL - allows the caller to wait for space
488
+ * to free up in the SQ ring, which happens when the kernel side thread has
489
+ * consumed one or more entries. If the SQ ring is currently non-full, no
490
+ * action is taken. Note: may return -EINVAL if the kernel doesn't support
491
+ * this feature.
492
+ */
493
+ static inline int io_uring_sqring_wait(struct io_uring *ring)
494
+ {
495
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
496
+ return 0;
497
+ if (io_uring_sq_space_left(ring))
498
+ return 0;
499
+
500
+ return __io_uring_sqring_wait(ring);
501
+ }
502
+
503
+ /*
504
+ * Returns how many unconsumed entries are ready in the CQ ring
505
+ */
506
+ static inline unsigned io_uring_cq_ready(struct io_uring *ring)
507
+ {
508
+ return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
509
+ }
510
+
511
+ /*
512
+ * Returns true if the eventfd notification is currently enabled
513
+ */
514
+ static inline bool io_uring_cq_eventfd_enabled(struct io_uring *ring)
515
+ {
516
+ if (!ring->cq.kflags)
517
+ return true;
518
+
519
+ return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
520
+ }
521
+
522
+ /*
523
+ * Toggle eventfd notification on or off, if an eventfd is registered with
524
+ * the ring.
525
+ */
526
+ static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring,
527
+ bool enabled)
528
+ {
529
+ uint32_t flags;
530
+
531
+ if (!!enabled == io_uring_cq_eventfd_enabled(ring))
532
+ return 0;
533
+
534
+ if (!ring->cq.kflags)
535
+ return -EOPNOTSUPP;
536
+
537
+ flags = *ring->cq.kflags;
538
+
539
+ if (enabled)
540
+ flags &= ~IORING_CQ_EVENTFD_DISABLED;
541
+ else
542
+ flags |= IORING_CQ_EVENTFD_DISABLED;
543
+
544
+ IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
545
+
546
+ return 0;
547
+ }
548
+
549
+ /*
550
+ * Return an IO completion, waiting for 'wait_nr' completions if one isn't
551
+ * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
552
+ * failure.
553
+ */
554
+ static inline int io_uring_wait_cqe_nr(struct io_uring *ring,
555
+ struct io_uring_cqe **cqe_ptr,
556
+ unsigned wait_nr)
557
+ {
558
+ return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
559
+ }
560
+
561
+ /*
562
+ * Return an IO completion, if one is readily available. Returns 0 with
563
+ * cqe_ptr filled in on success, -errno on failure.
564
+ */
565
+ static inline int io_uring_peek_cqe(struct io_uring *ring,
566
+ struct io_uring_cqe **cqe_ptr)
567
+ {
568
+ return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
569
+ }
570
+
571
+ /*
572
+ * Return an IO completion, waiting for it if necessary. Returns 0 with
573
+ * cqe_ptr filled in on success, -errno on failure.
574
+ */
575
+ static inline int io_uring_wait_cqe(struct io_uring *ring,
576
+ struct io_uring_cqe **cqe_ptr)
577
+ {
578
+ return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
579
+ }
580
+
581
+ #ifdef __cplusplus
582
+ }
583
+ #endif
584
+
585
+ #endif