uringmachine 0.23.1 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +1 -1
- data/CHANGELOG.md +8 -0
- data/Gemfile +1 -1
- data/TODO.md +52 -12
- data/benchmark/bm_io_pipe.rb +43 -1
- data/benchmark/bm_io_socketpair.rb +32 -2
- data/benchmark/bm_mutex_io.rb +47 -5
- data/benchmark/chart_bm_io_pipe_x.png +0 -0
- data/benchmark/common.rb +161 -17
- data/benchmark/http_parse.rb +9 -9
- data/benchmark/http_server_accept_queue.rb +104 -0
- data/benchmark/http_server_multi_accept.rb +93 -0
- data/benchmark/http_server_multi_ractor.rb +99 -0
- data/benchmark/http_server_single_thread.rb +80 -0
- data/benchmark/ips_io_pipe.rb +146 -0
- data/docs/design/buffer_pool.md +183 -0
- data/docs/um_api.md +91 -0
- data/examples/fiber_scheduler_file_io.rb +34 -0
- data/examples/fiber_scheduler_file_io_async.rb +33 -0
- data/ext/um/um.c +65 -48
- data/ext/um/um.h +11 -1
- data/ext/um/um_class.c +54 -11
- data/ext/um/um_sidecar.c +106 -0
- data/ext/um/um_stream.c +31 -0
- data/ext/um/um_stream_class.c +14 -0
- data/grant-2025/interim-report.md +130 -0
- data/grant-2025/journal.md +166 -2
- data/grant-2025/tasks.md +27 -17
- data/lib/uringmachine/fiber_scheduler.rb +35 -27
- data/lib/uringmachine/version.rb +1 -1
- data/lib/uringmachine.rb +4 -6
- data/test/helper.rb +8 -3
- data/test/test_fiber.rb +16 -0
- data/test/test_fiber_scheduler.rb +184 -72
- data/test/test_stream.rb +16 -0
- data/test/test_um.rb +94 -24
- metadata +14 -2
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/inline'
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source 'https://rubygems.org'
|
|
7
|
+
gem 'uringmachine', path: '..'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
require 'uringmachine'
|
|
11
|
+
|
|
12
|
+
RE_REQUEST_LINE = /^([a-z]+)\s+([^\s]+)\s+(http\/[0-9\.]{1,3})/i
|
|
13
|
+
RE_HEADER_LINE = /^([a-z0-9\-]+)\:\s+(.+)/i
|
|
14
|
+
|
|
15
|
+
def stream_get_request_line(stream, buf)
|
|
16
|
+
line = stream.get_line(buf, 0)
|
|
17
|
+
m = line&.match(RE_REQUEST_LINE)
|
|
18
|
+
return nil if !m
|
|
19
|
+
|
|
20
|
+
{
|
|
21
|
+
'method' => m[1].downcase,
|
|
22
|
+
'path' => m[2],
|
|
23
|
+
'protocol' => m[3].downcase
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
class InvalidHeadersError < StandardError; end
|
|
28
|
+
|
|
29
|
+
def get_headers(stream, buf)
|
|
30
|
+
headers = stream_get_request_line(stream, buf)
|
|
31
|
+
return nil if !headers
|
|
32
|
+
|
|
33
|
+
while true
|
|
34
|
+
line = stream.get_line(buf, 0)
|
|
35
|
+
break if line.empty?
|
|
36
|
+
|
|
37
|
+
m = line.match(RE_HEADER_LINE)
|
|
38
|
+
raise InvalidHeadersError, "Invalid header" if !m
|
|
39
|
+
|
|
40
|
+
headers[m[1]] = m[2]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
headers
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
BODY = "Hello, world!" * 1000
|
|
47
|
+
|
|
48
|
+
def send_response(machine, fd)
|
|
49
|
+
headers = "HTTP/1.1 200\r\nContent-Length: #{BODY.bytesize}\r\n\r\n"
|
|
50
|
+
machine.sendv(fd, headers, BODY)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def handle_connection(machine, fd)
|
|
54
|
+
stream = UM::Stream.new(machine, fd)
|
|
55
|
+
buf = String.new(capacity: 65536)
|
|
56
|
+
|
|
57
|
+
while true
|
|
58
|
+
headers = get_headers(stream, buf)
|
|
59
|
+
break if !headers
|
|
60
|
+
|
|
61
|
+
send_response(machine, fd)
|
|
62
|
+
end
|
|
63
|
+
rescue InvalidHeadersError, SystemCallError => e
|
|
64
|
+
# ignore
|
|
65
|
+
ensure
|
|
66
|
+
machine.close_async(fd)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
N = ENV['N']&.to_i || 1
|
|
70
|
+
PORT = ENV['PORT']&.to_i || 1234
|
|
71
|
+
|
|
72
|
+
workers = N.times.map do |idx|
|
|
73
|
+
Thread.new do
|
|
74
|
+
machine = UM.new
|
|
75
|
+
|
|
76
|
+
listen_fd = machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
|
|
77
|
+
machine.setsockopt(listen_fd, UM::SOL_SOCKET, UM::SO_REUSEADDR, true)
|
|
78
|
+
machine.setsockopt(listen_fd, UM::SOL_SOCKET, UM::SO_REUSEPORT, true)
|
|
79
|
+
machine.bind(listen_fd, '127.0.0.1', PORT)
|
|
80
|
+
machine.listen(listen_fd, 128)
|
|
81
|
+
|
|
82
|
+
machine.accept_each(listen_fd) { |fd|
|
|
83
|
+
machine.spin { handle_connection(machine, fd) }
|
|
84
|
+
}
|
|
85
|
+
rescue Exception => e
|
|
86
|
+
p e
|
|
87
|
+
p e.backtrace
|
|
88
|
+
exit!
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
puts "Listening on localhost:#{PORT}, #{N} worker thread(s)"
|
|
93
|
+
workers.each(&:join)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/inline'
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source 'https://rubygems.org'
|
|
7
|
+
gem 'uringmachine', path: '..'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
require 'uringmachine'
|
|
11
|
+
|
|
12
|
+
RE_REQUEST_LINE = /^([a-z]+)\s+([^\s]+)\s+(http\/[0-9\.]{1,3})/i
|
|
13
|
+
RE_HEADER_LINE = /^([a-z0-9\-]+)\:\s+(.+)/i
|
|
14
|
+
|
|
15
|
+
def stream_get_request_line(stream, buf)
|
|
16
|
+
line = stream.get_line(buf, 0)
|
|
17
|
+
m = line&.match(RE_REQUEST_LINE)
|
|
18
|
+
return nil if !m
|
|
19
|
+
|
|
20
|
+
{
|
|
21
|
+
'method' => m[1].downcase,
|
|
22
|
+
'path' => m[2],
|
|
23
|
+
'protocol' => m[3].downcase
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
class InvalidHeadersError < StandardError; end
|
|
28
|
+
|
|
29
|
+
def get_headers(stream, buf)
|
|
30
|
+
headers = stream_get_request_line(stream, buf)
|
|
31
|
+
return nil if !headers
|
|
32
|
+
|
|
33
|
+
while true
|
|
34
|
+
line = stream.get_line(buf, 0)
|
|
35
|
+
break if line.empty?
|
|
36
|
+
|
|
37
|
+
m = line.match(RE_HEADER_LINE)
|
|
38
|
+
raise InvalidHeadersError, "Invalid header" if !m
|
|
39
|
+
|
|
40
|
+
headers[m[1]] = m[2]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
headers
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
BODY = "Hello, world!" * 1000
|
|
47
|
+
Ractor.make_shareable(BODY)
|
|
48
|
+
|
|
49
|
+
def send_response(machine, fd)
|
|
50
|
+
headers = "HTTP/1.1 200\r\nContent-Length: #{BODY.bytesize}\r\n\r\n"
|
|
51
|
+
machine.sendv(fd, headers, BODY)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def handle_connection(machine, fd)
|
|
55
|
+
machine.setsockopt(fd, UM::IPPROTO_TCP, UM::TCP_NODELAY, true)
|
|
56
|
+
stream = UM::Stream.new(machine, fd)
|
|
57
|
+
buf = String.new(capacity: 65536)
|
|
58
|
+
|
|
59
|
+
while true
|
|
60
|
+
headers = get_headers(stream, buf)
|
|
61
|
+
break if !headers
|
|
62
|
+
|
|
63
|
+
send_response(machine, fd)
|
|
64
|
+
end
|
|
65
|
+
# rescue InvalidHeadersError, SystemCallError => e
|
|
66
|
+
# ignore
|
|
67
|
+
rescue Exception => e
|
|
68
|
+
p e: e
|
|
69
|
+
p bt: e.backtrace
|
|
70
|
+
exit!
|
|
71
|
+
ensure
|
|
72
|
+
machine.close_async(fd)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
N = ENV['N']&.to_i || 1
|
|
76
|
+
PORT = ENV['PORT']&.to_i || 1234
|
|
77
|
+
|
|
78
|
+
workers = N.times.map do |idx|
|
|
79
|
+
Ractor.new do
|
|
80
|
+
machine = UM.new
|
|
81
|
+
|
|
82
|
+
listen_fd = machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
|
|
83
|
+
machine.setsockopt(listen_fd, UM::SOL_SOCKET, UM::SO_REUSEADDR, true)
|
|
84
|
+
machine.setsockopt(listen_fd, UM::SOL_SOCKET, UM::SO_REUSEPORT, true)
|
|
85
|
+
machine.bind(listen_fd, '127.0.0.1', PORT)
|
|
86
|
+
machine.listen(listen_fd, 128)
|
|
87
|
+
|
|
88
|
+
machine.accept_each(listen_fd) { |fd|
|
|
89
|
+
machine.spin { handle_connection(machine, fd) }
|
|
90
|
+
}
|
|
91
|
+
rescue Exception => e
|
|
92
|
+
p e
|
|
93
|
+
p e.backtrace
|
|
94
|
+
exit!
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
puts "Listening on localhost:#{PORT}, #{N} worker ractor(s)"
|
|
99
|
+
workers.each(&:join)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/inline'
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source 'https://rubygems.org'
|
|
7
|
+
gem 'uringmachine', path: '..'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
require 'uringmachine'
|
|
11
|
+
|
|
12
|
+
RE_REQUEST_LINE = /^([a-z]+)\s+([^\s]+)\s+(http\/[0-9\.]{1,3})/i
|
|
13
|
+
RE_HEADER_LINE = /^([a-z0-9\-]+)\:\s+(.+)/i
|
|
14
|
+
|
|
15
|
+
def stream_get_request_line(stream, buf)
|
|
16
|
+
line = stream.get_line(buf, 0)
|
|
17
|
+
m = line&.match(RE_REQUEST_LINE)
|
|
18
|
+
return nil if !m
|
|
19
|
+
|
|
20
|
+
{
|
|
21
|
+
'method' => m[1].downcase,
|
|
22
|
+
'path' => m[2],
|
|
23
|
+
'protocol' => m[3].downcase
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
class InvalidHeadersError < StandardError; end
|
|
28
|
+
|
|
29
|
+
def get_headers(stream, buf)
|
|
30
|
+
headers = stream_get_request_line(stream, buf)
|
|
31
|
+
return nil if !headers
|
|
32
|
+
|
|
33
|
+
while true
|
|
34
|
+
line = stream.get_line(buf, 0)
|
|
35
|
+
break if line.empty?
|
|
36
|
+
|
|
37
|
+
m = line.match(RE_HEADER_LINE)
|
|
38
|
+
raise InvalidHeadersError, "Invalid header" if !m
|
|
39
|
+
|
|
40
|
+
headers[m[1]] = m[2]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
headers
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
BODY = "Hello, world!" * 1000
|
|
47
|
+
|
|
48
|
+
def send_response(machine, fd)
|
|
49
|
+
headers = "HTTP/1.1 200\r\nContent-Length: #{BODY.bytesize}\r\n\r\n"
|
|
50
|
+
machine.sendv(fd, headers, BODY)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def handle_connection(machine, fd)
|
|
54
|
+
machine.setsockopt(fd, UM::IPPROTO_TCP, UM::TCP_NODELAY, true)
|
|
55
|
+
stream = UM::Stream.new(machine, fd)
|
|
56
|
+
buf = String.new(capacity: 65536)
|
|
57
|
+
|
|
58
|
+
while true
|
|
59
|
+
headers = get_headers(stream, buf)
|
|
60
|
+
break if !headers
|
|
61
|
+
|
|
62
|
+
send_response(machine, fd)
|
|
63
|
+
end
|
|
64
|
+
rescue InvalidHeadersError, SystemCallError => e
|
|
65
|
+
# ignore
|
|
66
|
+
ensure
|
|
67
|
+
machine.close_async(fd)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
PORT = ENV['PORT']&.to_i || 1234
|
|
71
|
+
|
|
72
|
+
machine = UM.new
|
|
73
|
+
fd = machine.socket(UM::AF_INET, UM::SOCK_STREAM, 0, 0)
|
|
74
|
+
machine.setsockopt(fd, UM::SOL_SOCKET, UM::SO_REUSEADDR, true)
|
|
75
|
+
machine.setsockopt(fd, UM::SOL_SOCKET, UM::SO_REUSEPORT, true)
|
|
76
|
+
machine.bind(fd, '127.0.0.1', PORT)
|
|
77
|
+
machine.listen(fd, 128)
|
|
78
|
+
|
|
79
|
+
puts "Listening on localhost:#{PORT}"
|
|
80
|
+
machine.accept_each(fd) { |conn| machine.spin { handle_connection(machine, conn) } }
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/inline'
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source 'https://rubygems.org'
|
|
7
|
+
gem 'uringmachine', path: '..'
|
|
8
|
+
gem 'benchmark'
|
|
9
|
+
gem 'benchmark-ips'
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
require 'benchmark/ips'
|
|
13
|
+
require 'uringmachine'
|
|
14
|
+
|
|
15
|
+
GROUPS = 16
|
|
16
|
+
|
|
17
|
+
SIZE = 1 << 16
|
|
18
|
+
DATA = '*' * SIZE
|
|
19
|
+
|
|
20
|
+
def threads_setup
|
|
21
|
+
@threads_start_queue = Queue.new
|
|
22
|
+
@threads_stop_queue = Queue.new
|
|
23
|
+
|
|
24
|
+
GROUPS.times do
|
|
25
|
+
r, w = IO.pipe
|
|
26
|
+
r.sync = true
|
|
27
|
+
w.sync = true
|
|
28
|
+
Thread.new do
|
|
29
|
+
loop do
|
|
30
|
+
iterations = @threads_start_queue.shift
|
|
31
|
+
iterations.times { w.write(DATA) }
|
|
32
|
+
@threads_stop_queue << true
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
Thread.new do
|
|
36
|
+
loop do
|
|
37
|
+
iterations = @threads_start_queue.shift
|
|
38
|
+
iterations.times { r.read(SIZE) }
|
|
39
|
+
@threads_stop_queue << true
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def threads_run(times)
|
|
46
|
+
(GROUPS * 2).times { @threads_start_queue << times }
|
|
47
|
+
(GROUPS * 2).times { @threads_stop_queue.shift }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def um_setup
|
|
51
|
+
@machine = UM.new
|
|
52
|
+
|
|
53
|
+
@um_start_queue = UM::Queue.new
|
|
54
|
+
@um_stop_queue = UM::Queue.new
|
|
55
|
+
|
|
56
|
+
GROUPS.times do
|
|
57
|
+
r, w = UM.pipe
|
|
58
|
+
@machine.spin do
|
|
59
|
+
loop do
|
|
60
|
+
iterations = @machine.shift(@um_start_queue)
|
|
61
|
+
iterations.times { @machine.writev(w, DATA) }
|
|
62
|
+
@machine.push(@um_stop_queue, true)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
@machine.spin do
|
|
66
|
+
loop do
|
|
67
|
+
iterations = @machine.shift(@um_start_queue)
|
|
68
|
+
iterations.times {
|
|
69
|
+
left = SIZE
|
|
70
|
+
left -= @machine.read(r, +'', left) while left > 0
|
|
71
|
+
}
|
|
72
|
+
@machine.push(@um_stop_queue, true)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def um_run(times)
|
|
79
|
+
(GROUPS * 2).times { @machine.push(@um_start_queue, times) }
|
|
80
|
+
(GROUPS * 2).times { @machine.shift(@um_stop_queue) }
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def um2_setup
|
|
84
|
+
@um2_start_queue = UM::Queue.new
|
|
85
|
+
@um2_stop_queue = UM::Queue.new
|
|
86
|
+
|
|
87
|
+
thread_count = 2
|
|
88
|
+
tgroups = GROUPS / thread_count
|
|
89
|
+
|
|
90
|
+
@um2_machines = []
|
|
91
|
+
@um2_done = UM::Queue.new
|
|
92
|
+
@um2_threads = thread_count.times.map do
|
|
93
|
+
Thread.new do
|
|
94
|
+
machine = UM.new
|
|
95
|
+
@um2_machines << machine
|
|
96
|
+
tgroups.times do
|
|
97
|
+
r, w = UM.pipe
|
|
98
|
+
machine.spin do
|
|
99
|
+
loop do
|
|
100
|
+
iterations = machine.shift(@um2_start_queue)
|
|
101
|
+
iterations.times { machine.writev(w, DATA) }
|
|
102
|
+
machine.push(@um2_stop_queue, true)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
machine.spin do
|
|
106
|
+
loop do
|
|
107
|
+
iterations = machine.shift(@um2_start_queue)
|
|
108
|
+
iterations.times {
|
|
109
|
+
left = SIZE
|
|
110
|
+
left -= machine.read(r, +'', left) while left > 0
|
|
111
|
+
}
|
|
112
|
+
machine.push(@um2_stop_queue, true)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
machine.shift(@um2_done)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def um2_teardown
|
|
122
|
+
2.times { @machine.push(@um2_done, true) }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def um2_run(times)
|
|
126
|
+
@um2_machine ||= UM.new
|
|
127
|
+
(GROUPS * 2).times { @um2_machine.push(@um2_start_queue, times) }
|
|
128
|
+
(GROUPS * 2).times { @um2_machine.shift(@um2_stop_queue); @um2_machine.snooze }
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
threads_setup
|
|
132
|
+
um_setup
|
|
133
|
+
um2_setup
|
|
134
|
+
|
|
135
|
+
at_exit { um2_teardown }
|
|
136
|
+
|
|
137
|
+
# um2_run(1)
|
|
138
|
+
# p after_run: 1
|
|
139
|
+
|
|
140
|
+
Benchmark.ips do |x|
|
|
141
|
+
x.report('Threads') { |t| threads_run(t) }
|
|
142
|
+
x.report('UM') { |t| um_run(t) }
|
|
143
|
+
x.report('UMx2') { |t| um2_run(t) }
|
|
144
|
+
|
|
145
|
+
x.compare!(order: :baseline)
|
|
146
|
+
end
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# UringMachine Buffer Pool
|
|
2
|
+
|
|
3
|
+
One of the interesting recent features in io_uring is support for buffer rings.
|
|
4
|
+
A buffer ring is a structure that is shared between the application and the
|
|
5
|
+
kernel. The application can add buffers to the buffer ring to be used by the
|
|
6
|
+
kernel to perform multishot read/recv.
|
|
7
|
+
|
|
8
|
+
https://www.man7.org/linux/man-pages/man3/io_uring_setup_buf_ring.3.html
|
|
9
|
+
https://www.man7.org/linux/man-pages/man3/io_uring_prep_recv.3.html
|
|
10
|
+
|
|
11
|
+
On recent kernels (>=6.12), io_uring also supports incremental buffer usage,
|
|
12
|
+
which means it can partially consume buffers, such that buffer space will not be
|
|
13
|
+
wasted in case of a short read/recv.
|
|
14
|
+
|
|
15
|
+
This documents the API and implementation details of a buffer pool that
|
|
16
|
+
automatically manages multiple buffer rings, and allows partial buffer
|
|
17
|
+
consumption and reuse. We also provide a way to integrate this feature with
|
|
18
|
+
UringMachine *streams*, in effect switching streams from relying on their own
|
|
19
|
+
buffers, to using managed buffers from the buffer pool.
|
|
20
|
+
|
|
21
|
+
## Design
|
|
22
|
+
|
|
23
|
+
### The API
|
|
24
|
+
|
|
25
|
+
- The buffer pool is created and managed automatically. No API is involved.
|
|
26
|
+
|
|
27
|
+
- To use the buffer pool, two dedicated APIs are added:
|
|
28
|
+
|
|
29
|
+
- `UM#stream_read(fd) { |stream| ... }`
|
|
30
|
+
- `UM#stream_recv(fd) { |stream| ... }`
|
|
31
|
+
|
|
32
|
+
- The two APIs work equivalently, where they start a multishot read/recv
|
|
33
|
+
operation, repeating it if necessary, until the given block returns, the fd is
|
|
34
|
+
closed, or an exception is encountered.
|
|
35
|
+
|
|
36
|
+
- The stream instance provided to the given block is created automatically, and
|
|
37
|
+
is used to interact with the data read/received as it becomes available. The
|
|
38
|
+
stream instance methods may block until enough data is available in the
|
|
39
|
+
relevant buffers.
|
|
40
|
+
|
|
41
|
+
Example:
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
machine.stream_recv(fd) do |stream|
|
|
45
|
+
loop do
|
|
46
|
+
line = stream.get_line(max: 60)
|
|
47
|
+
if (size = parse_size(line))
|
|
48
|
+
data = stream.read(size)
|
|
49
|
+
process_data(data)
|
|
50
|
+
else
|
|
51
|
+
raise "Protocol error!"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
- Since there is some overhead for setting up streams and multishot operations,
|
|
58
|
+
this API is intended for use in long-running connections, e.g. HTTP, Redis,
|
|
59
|
+
PostgreSQL, etc.
|
|
60
|
+
|
|
61
|
+
- Right now, streams provide support for reading lines (for line-oriented
|
|
62
|
+
protocols), reading data of arbitrary size, and decoding RESP (Redis protocol)
|
|
63
|
+
messages. In the future, we may add more built-in support for decoding other
|
|
64
|
+
protocols, such as HTTP/1.1, HTTP/2, Websocket etc.
|
|
65
|
+
|
|
66
|
+
### The buffer pool
|
|
67
|
+
|
|
68
|
+
- Each UringMachine instance has 1 associated buffer pool.
|
|
69
|
+
|
|
70
|
+
- A buffer pool manages up to 64 buffer groups.
|
|
71
|
+
|
|
72
|
+
- Each buffer group has its own associated buffer ring, and 64 buffers. The
|
|
73
|
+
buffer size is fixed at 64KB, for a total size of 64x64KB = 4MB.
|
|
74
|
+
|
|
75
|
+
- The maximum amount of buffers in a buffer pool is 64X64 = 4096, for a total
|
|
76
|
+
size of 64X4MB = 256MB.
|
|
77
|
+
|
|
78
|
+
- The buffer pool is responsible for selecting a buffer group for each multishot
|
|
79
|
+
read/recv operation, according to the number of buffers available to the
|
|
80
|
+
kernel.
|
|
81
|
+
|
|
82
|
+
- The buffer pool is responsible for setting up additional buffer groups (up to
|
|
83
|
+
64) as needed.
|
|
84
|
+
|
|
85
|
+
- The buffer pool is responsible for maintaining the state of each buffer, and
|
|
86
|
+
whether it is currently committed to the kernel, or available to the
|
|
87
|
+
application.
|
|
88
|
+
|
|
89
|
+
### Streams
|
|
90
|
+
|
|
91
|
+
- A stream is automatically created upon a call to `#stream_read` or
|
|
92
|
+
`#stream_recv`.
|
|
93
|
+
|
|
94
|
+
- The stream provides methods for the application to consume incoming data.
|
|
95
|
+
|
|
96
|
+
- The stream holds zero or more *segments* of data that are added to the stream
|
|
97
|
+
as more data becomes available through CQEs.
|
|
98
|
+
|
|
99
|
+
- The different methods scan through the different segments, potentially
|
|
100
|
+
blocking until more segments arrive, and copies data into strings or other
|
|
101
|
+
data structures according to the APIs used.
|
|
102
|
+
|
|
103
|
+
- Each stream holds a cursor that tells it from which offset and in which
|
|
104
|
+
segment data is currently to be consumed.
|
|
105
|
+
|
|
106
|
+
- As segments are consumed by the application, the underlying buffers are
|
|
107
|
+
committed back to the kernel to be reused in subsequent CQEs, providing no
|
|
108
|
+
segment of which is currently used by a stream.
|
|
109
|
+
|
|
110
|
+
### Principle of Operation
|
|
111
|
+
|
|
112
|
+
- When a multishot operation is started, a buffer group is selected, and its id
|
|
113
|
+
is provided in the corresponding SQE. In case when the buffer group is
|
|
114
|
+
exhausted (no more buffers are available to the kernel), the multishot
|
|
115
|
+
operation is automatically restarted with a newly selected buffer group.
|
|
116
|
+
Buffer groups are created automatically as needed if the currently existing
|
|
117
|
+
buffer groups are exhausted.
|
|
118
|
+
|
|
119
|
+
- When one or more CQEs are encountered for the multishot operation, the
|
|
120
|
+
corresponding fiber is resumed, and the multishot results are processed into
|
|
121
|
+
segments that are added to the stream, to be eventually processed according to
|
|
122
|
+
the APIs used. Importantly, it is the calls to stream methods that drive the
|
|
123
|
+
eventual consumption of buffer segments. That is, many pieces of data may be
|
|
124
|
+
pending actual processing.
|
|
125
|
+
|
|
126
|
+
- The actual submission of multishot operations is driven by usage of the
|
|
127
|
+
different stream APIs and the need for more data.
|
|
128
|
+
|
|
129
|
+
### Data structures
|
|
130
|
+
|
|
131
|
+
```c
|
|
132
|
+
#define UM_BUFFER_POOL_MAX_GROUPS 64
|
|
133
|
+
#define UM_BUFFER_GROUP_SIZE 64
|
|
134
|
+
#define UM_BUFFER_SIZE (1 << 16)
|
|
135
|
+
|
|
136
|
+
/*
|
|
137
|
+
A buffer segment represents a contiguous sequence of bytes coming from a
|
|
138
|
+
managed buffer. buffer segments are arranged in a linked list, each one
|
|
139
|
+
pointing to the next. In order to minimize allocations, those structs are
|
|
140
|
+
reused, and when no longer needed they are added to a freelist on the buffer
|
|
141
|
+
pool.
|
|
142
|
+
*/
|
|
143
|
+
struct um_buffer_segment {
|
|
144
|
+
struct um_buffer_segment *next;
|
|
145
|
+
uint8_t bgid; // buffer group id
|
|
146
|
+
uint8_t bid; // buffer id
|
|
147
|
+
void *ptr;
|
|
148
|
+
size_t len;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/*
|
|
152
|
+
A stream is made of zero or more buffer segments.
|
|
153
|
+
*/
|
|
154
|
+
struct um_stream {
|
|
155
|
+
struct um *machine;
|
|
156
|
+
struct um_buffer_segment *head;
|
|
157
|
+
struct um_buffer_segment *tail;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/*
|
|
161
|
+
A managed buffer. It has a fixed size of 64KB
|
|
162
|
+
*/
|
|
163
|
+
struct um_buffer {
|
|
164
|
+
size_t ofs; // current offset for partial consumption (incremented by CQE result)
|
|
165
|
+
uint16_t commited; // is buffer available to the kernel
|
|
166
|
+
uint16_t ref_count; // how many segments currently use the buffer
|
|
167
|
+
uint8_t data[UM_BUFFER_SIZE]; // buffer space
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
struct um_buffer_group {
|
|
171
|
+
struct io_uring_buf_ring *ring;
|
|
172
|
+
uint commited_count;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/*
|
|
176
|
+
A buffer pool used for managing buffers.
|
|
177
|
+
*/
|
|
178
|
+
struct um_buffer_pool {
|
|
179
|
+
struct um_buffer_group[UM_BUFFER_POOL_MAX_GROUPS];
|
|
180
|
+
uint16_t buffer_group_count;
|
|
181
|
+
struct um_buffer_segment *free_list;
|
|
182
|
+
}
|
|
183
|
+
```
|
data/docs/um_api.md
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# UringMachine API Reeference
|
|
2
|
+
|
|
3
|
+
## UringMachine Class Methods
|
|
4
|
+
|
|
5
|
+
- `debug(msg)` - prints a string message to STDERR.
|
|
6
|
+
- `kernel_version` - returns the linux kernel version as an integer, e.g. 607 =>
|
|
7
|
+
version 6.7.
|
|
8
|
+
- `new(size)` - creates a new UringMachine instance.
|
|
9
|
+
- `pidfd_open(pid)` - creates and returns a file descriptor that refers to the
|
|
10
|
+
given process.
|
|
11
|
+
- `pidfd_send_signal(pidfd, sig, flags)` - sends a signal to a process
|
|
12
|
+
identified by a pidfd.
|
|
13
|
+
- `pipe` - creates a pipe and returns read and write fds.
|
|
14
|
+
- `socketpair(domain, type, protocol)` - creates a socket pair and returns the
|
|
15
|
+
two fds.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## UringMachine Instance Methods
|
|
19
|
+
|
|
20
|
+
- `accept_each(sockfd) { |fd| ... }` - accepts incoming connections to the
|
|
21
|
+
given server socket in an infinite loop, yielding each one to the given block.
|
|
22
|
+
- `accept_into_queue(sockfd, queue)` - accepts incoming connections to the given
|
|
23
|
+
server socket in an infinite loop, pushing each one to the given queue.
|
|
24
|
+
- `accept(sockfd)` - accepts an incoming connection, returning its fd.
|
|
25
|
+
- `bind(sockfd, host, port)` - binds the given socket to the given address.
|
|
26
|
+
- `close_async(fd)` - closes the given fd asynchronously, i.e. <w>ithout waiting
|
|
27
|
+
for the operation to complete.
|
|
28
|
+
- `close(fd)` - closes the given fd.
|
|
29
|
+
- `connect(sockfd, host, port)` - connects the given socket to the given
|
|
30
|
+
address.
|
|
31
|
+
- `getsockopt(sockfd, level, opt)` - returns a socket option value.
|
|
32
|
+
- `listen(sockfd)` - starts listening on the given socket.
|
|
33
|
+
- `metrics` - returns metrics for the machine.
|
|
34
|
+
- `open(pathname, flags)` - opens the given path and returns an fd.
|
|
35
|
+
- `pending_fibers` - returns the set of pending fibers, that is, fibers waiting
|
|
36
|
+
for an operation to complete.
|
|
37
|
+
- `periodically(interval) { ... }` - runs the given block at regular intervals
|
|
38
|
+
in an infinite loop.
|
|
39
|
+
- `poll(fd, mask)` - waits for the given fd to become ready according to the
|
|
40
|
+
given event mask.
|
|
41
|
+
- `pop(queue)` - removes and returns a value off the end of the given queue.
|
|
42
|
+
- `prep_timeout(interval)` - returns a timeout AsyncOp with the given interval.
|
|
43
|
+
- `push(queue, value)` - adds the given value to the end of the given queue.
|
|
44
|
+
- `read_each(fd, bgid) { |data| ... }` - reads repeatedly from the given fd
|
|
45
|
+
using the given buffer group id, yielding each chunk of data to the given
|
|
46
|
+
block.
|
|
47
|
+
- `read(fd, buffer[, maxlen[, buffer_offset[, file_offset]]])` - reads from the
|
|
48
|
+
given fd int o the given buffer (String or IO::Buffer).
|
|
49
|
+
- `recv_each(fd, bgid, flags)` - receives from the given fd using the given
|
|
50
|
+
buffer group id, with the given flags.
|
|
51
|
+
- `recv(fd, buffer, maxlen, flags)` - receives from the given fd into the given
|
|
52
|
+
buffer.
|
|
53
|
+
- `schedule(fiber, value)` - adds the given fiber to the runqueue with the given
|
|
54
|
+
resume value.
|
|
55
|
+
- `select(rfds, wfds, efds)` - selects ready fds from the given readable,
|
|
56
|
+
writable and exeptable fds.
|
|
57
|
+
- `send_bundle(fd, bgid, *strings)` - sends a bundle of buffers to the given fd
|
|
58
|
+
using the given buffer group id.
|
|
59
|
+
- `send(fd, buffer, len, flags)` - sends to the given fd from the given buffer.
|
|
60
|
+
- `sendv(fd, *buffers)` - sends multiple buffers to the given fd.
|
|
61
|
+
- `setsockopt(fd, level, opt, value)` - sets a socket option.
|
|
62
|
+
- `setup_buffer_ring(size, count)` - sets up a buffer ring and returns the
|
|
63
|
+
buffer group id.
|
|
64
|
+
- `shift(queue)` - removes and returns a value from the head of given queue.
|
|
65
|
+
- `shutdown_async(fd, how)` - shuts down the given socket fd without blocking.
|
|
66
|
+
- `shutdown(fd, how)` - shuts down the given socket fd.
|
|
67
|
+
- `size` - returns the number of entries in the submission queue.
|
|
68
|
+
- `sleep(duration)` - sleeps for the given duration.
|
|
69
|
+
- `snooze` - adds the current fiber to the end of the runqueue and yields
|
|
70
|
+
control to the next fiber in the runqueue.
|
|
71
|
+
- `socket(domain, type, protocol, flags)` - creates a socket and returns its fd.
|
|
72
|
+
- `statx(dirfd, path, flags, mask)` - returns information for the given path.
|
|
73
|
+
- `submit` - submits any unsubmitted operations to the submission queue.
|
|
74
|
+
- `switch` - switches to the next fiber in the runqueue.
|
|
75
|
+
- `synchronize(mutex)` - synchronizes access to the given mutex.
|
|
76
|
+
- `timeout(interval, exception_class) { ... }` - runs the given block, raising
|
|
77
|
+
an exception if the timeout interval has elapsed.
|
|
78
|
+
- `unshift(queue, value)` - adds the given value to the beginning of the given
|
|
79
|
+
queue.
|
|
80
|
+
- `waitid_status(idtype, id, options)` - waits for the given pid/pidfd and
|
|
81
|
+
returns a `Process::Status`.
|
|
82
|
+
- `waitid(idtype, id, options)` - waits for the given pid/pidfd and returns
|
|
83
|
+
`[pid, status]`.
|
|
84
|
+
- `wakeup` - wakes up a machine currently waiting for completions.
|
|
85
|
+
- `write_async(fd, buffer[, len[, offset]])` - writes to the given fd without
|
|
86
|
+
waiting for completion.
|
|
87
|
+
- `write(fd, buffer[, len[, offset]])` - writes to the given fd.
|
|
88
|
+
- `writev(fd, *buffers[, file_offset])` - writes multiple buffers to the given
|
|
89
|
+
fd.
|
|
90
|
+
- `yield()` - yields control to the next fiber in the runqueue.
|
|
91
|
+
|