raptor 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mise.toml +2 -0
- data/Brewfile +2 -0
- data/CHANGELOG.md +21 -0
- data/README.md +28 -25
- data/ext/raptor_http2/raptor_http2.c +1 -0
- data/lib/rackup/handler/raptor.rb +20 -21
- data/lib/raptor/cli.rb +46 -14
- data/lib/raptor/cluster.rb +142 -64
- data/lib/raptor/http2.rb +324 -42
- data/lib/raptor/log.rb +55 -0
- data/lib/raptor/reactor.rb +89 -53
- data/lib/raptor/request.rb +106 -61
- data/lib/raptor/server.rb +125 -51
- data/lib/raptor/stats.rb +30 -26
- data/lib/raptor/version.rb +1 -1
- data/sig/generated/raptor/cli.rbs +15 -1
- data/sig/generated/raptor/cluster.rbs +70 -38
- data/sig/generated/raptor/http2.rbs +126 -6
- data/sig/generated/raptor/log.rbs +41 -0
- data/sig/generated/raptor/reactor.rbs +44 -25
- data/sig/generated/raptor/request.rbs +36 -22
- data/sig/generated/raptor/server.rbs +63 -26
- data/sig/generated/raptor/stats.rbs +24 -20
- metadata +5 -1
data/lib/raptor/cluster.rb
CHANGED
|
@@ -7,6 +7,7 @@ require "atomic-ruby/atomic_thread_pool"
|
|
|
7
7
|
require "rack/builder"
|
|
8
8
|
require "ractor-pool"
|
|
9
9
|
|
|
10
|
+
require_relative "log"
|
|
10
11
|
require_relative "binder"
|
|
11
12
|
require_relative "server"
|
|
12
13
|
require_relative "reactor"
|
|
@@ -18,14 +19,15 @@ module Raptor
|
|
|
18
19
|
# Multi-process web server cluster with advanced concurrency architecture.
|
|
19
20
|
#
|
|
20
21
|
# Cluster manages multiple worker processes, each running a complete server
|
|
21
|
-
# stack including a
|
|
22
|
-
#
|
|
23
|
-
# forking, signal management, graceful shutdown, and
|
|
24
|
-
# restart when a worker process unexpectedly exits.
|
|
22
|
+
# stack including a ractor pool for HTTP parsing, a thread pool for
|
|
23
|
+
# application processing, plus dedicated reactor and server threads. It
|
|
24
|
+
# handles process forking, signal management, graceful shutdown, and
|
|
25
|
+
# automatic worker restart when a worker process unexpectedly exits.
|
|
25
26
|
#
|
|
26
27
|
# The architecture provides horizontal scaling through processes while
|
|
27
|
-
# maintaining efficient I/O and CPU utilization within each process
|
|
28
|
-
# the combination of
|
|
28
|
+
# maintaining efficient I/O and CPU utilization within each process
|
|
29
|
+
# through the combination of ractor-based parsing and thread pools on
|
|
30
|
+
# top of NIO reactors.
|
|
29
31
|
#
|
|
30
32
|
# Flow per worker process:
|
|
31
33
|
# 1. Server continuously accepts connections but skips acceptance when backlog is high
|
|
@@ -36,7 +38,7 @@ module Raptor
|
|
|
36
38
|
#
|
|
37
39
|
# @example Basic usage
|
|
38
40
|
# options = {
|
|
39
|
-
#
|
|
41
|
+
# workers: 4, ractors: 2, threads: 8,
|
|
40
42
|
# binds: ["tcp://0.0.0.0:3000"],
|
|
41
43
|
# rackup: "config.ru",
|
|
42
44
|
# client: { first_data_timeout: 30, chunk_data_timeout: 10 }
|
|
@@ -54,50 +56,61 @@ module Raptor
|
|
|
54
56
|
new(options).run
|
|
55
57
|
end
|
|
56
58
|
|
|
57
|
-
# @rbs @thread_count: Integer
|
|
58
|
-
# @rbs @ractor_count: Integer
|
|
59
59
|
# @rbs @worker_count: Integer
|
|
60
|
+
# @rbs @ractor_count: Integer
|
|
61
|
+
# @rbs @thread_count: Integer
|
|
60
62
|
# @rbs @client_options: Hash[Symbol, Integer]
|
|
61
|
-
# @rbs @
|
|
63
|
+
# @rbs @worker_timeout: Integer
|
|
64
|
+
# @rbs @worker_boot_timeout: Integer
|
|
65
|
+
# @rbs @worker_shutdown_timeout: Integer
|
|
62
66
|
# @rbs @stats_file: String?
|
|
63
|
-
# @rbs @
|
|
67
|
+
# @rbs @pid_file: String?
|
|
68
|
+
# @rbs @on_error: ^(Hash[String, untyped]?, Exception) -> void | nil
|
|
64
69
|
# @rbs @binder: Binder
|
|
65
70
|
# @rbs @server_port: Integer
|
|
66
71
|
# @rbs @app: untyped
|
|
67
72
|
# @rbs @shutdown: bool
|
|
68
73
|
# @rbs @workers: Hash[Integer, Integer]
|
|
74
|
+
# @rbs @timed_out: Set[Integer]
|
|
69
75
|
# @rbs @stats: Stats
|
|
76
|
+
# @rbs @phase: Integer
|
|
70
77
|
# @rbs @phased_restart_requested: bool
|
|
71
78
|
# @rbs @phased_restarting: bool
|
|
72
79
|
|
|
73
80
|
# Creates a new Cluster with the specified configuration.
|
|
74
81
|
#
|
|
75
|
-
# Initializes the cluster with
|
|
82
|
+
# Initializes the cluster with worker, ractor, and thread counts,
|
|
76
83
|
# sets up network binding, loads the Rack application, and prepares
|
|
77
84
|
# for multi-process operation.
|
|
78
85
|
#
|
|
79
86
|
# @param options [Hash] cluster configuration options
|
|
80
|
-
# @option options [Integer] :threads number of threads per worker process
|
|
81
|
-
# @option options [Integer] :ractors number of ractors per worker process
|
|
82
|
-
# @option options [Integer] :workers number of worker processes
|
|
83
87
|
# @option options [Array<String>] :binds array of bind URIs
|
|
88
|
+
# @option options [Integer] :workers number of worker processes
|
|
89
|
+
# @option options [Integer] :ractors number of ractors per worker process
|
|
90
|
+
# @option options [Integer] :threads number of threads per worker process
|
|
84
91
|
# @option options [#call] :app pre-built Rack application
|
|
85
92
|
# @option options [String] :rackup path to Rack configuration file
|
|
86
93
|
# @option options [Hash] :client client configuration
|
|
87
|
-
# @option options [
|
|
94
|
+
# @option options [Integer] :worker_timeout seconds to wait for a booted worker to check in before killing it
|
|
95
|
+
# @option options [Integer] :worker_boot_timeout seconds to wait for a worker to finish booting before killing it
|
|
96
|
+
# @option options [Integer] :worker_shutdown_timeout seconds to wait for graceful worker exit before force-killing
|
|
88
97
|
# @option options [String, nil] :stats_file path to write per-worker stats JSON, or nil to disable
|
|
89
|
-
# @option options [String, nil] :
|
|
98
|
+
# @option options [String, nil] :pid_file path to write the master PID to, or nil to disable
|
|
99
|
+
# @option options [#call] :on_error callback invoked with (env, exception) when the Rack app raises
|
|
90
100
|
# @return [void]
|
|
91
101
|
#
|
|
92
102
|
# @rbs (Hash[Symbol, untyped] options) -> void
|
|
93
103
|
def initialize(options)
|
|
94
|
-
@thread_count = options[:threads]
|
|
95
|
-
@ractor_count = options[:ractors]
|
|
96
104
|
@worker_count = options[:workers]
|
|
105
|
+
@ractor_count = options[:ractors]
|
|
106
|
+
@thread_count = options[:threads]
|
|
97
107
|
@client_options = options[:client]
|
|
98
|
-
@
|
|
108
|
+
@worker_timeout = options[:worker_timeout]
|
|
109
|
+
@worker_boot_timeout = options[:worker_boot_timeout]
|
|
110
|
+
@worker_shutdown_timeout = options[:worker_shutdown_timeout]
|
|
99
111
|
@stats_file = options[:stats_file]
|
|
100
|
-
@
|
|
112
|
+
@pid_file = options[:pid_file]
|
|
113
|
+
@on_error = options[:on_error]
|
|
101
114
|
|
|
102
115
|
@binder = Binder.new(options[:binds])
|
|
103
116
|
@server_port = @binder.server_port
|
|
@@ -106,7 +119,9 @@ module Raptor
|
|
|
106
119
|
|
|
107
120
|
@shutdown = false
|
|
108
121
|
@workers = {}
|
|
122
|
+
@timed_out = Set.new
|
|
109
123
|
@stats = Stats.new(@worker_count)
|
|
124
|
+
@phase = 0
|
|
110
125
|
@phased_restart_requested = false
|
|
111
126
|
@phased_restarting = false
|
|
112
127
|
end
|
|
@@ -114,15 +129,15 @@ module Raptor
|
|
|
114
129
|
# Starts the multi-process cluster and manages worker processes.
|
|
115
130
|
#
|
|
116
131
|
# Forks the configured number of worker processes and monitors them,
|
|
117
|
-
#
|
|
118
|
-
# shutdown via INT or TERM signals, stats logging via USR1,
|
|
119
|
-
# restart via USR2.
|
|
132
|
+
# restarting any that exit unexpectedly or stop checking in. Handles
|
|
133
|
+
# graceful shutdown via INT or TERM signals, stats logging via USR1,
|
|
134
|
+
# and phased restart via USR2.
|
|
120
135
|
#
|
|
121
136
|
# Each worker process includes:
|
|
122
137
|
# - 1 server thread (continuously accepts connections with backpressure control)
|
|
123
138
|
# - 1 reactor thread (I/O multiplexing, timeout handling, backlog monitoring)
|
|
124
|
-
# - N
|
|
125
|
-
# - 1
|
|
139
|
+
# - N pipeline ractors (parallel HTTP parsing)
|
|
140
|
+
# - 1 pipeline collector thread (coordinates parsing results)
|
|
126
141
|
# - M worker threads (Rack application processing and response writing)
|
|
127
142
|
# - 1 stats thread (writes per-worker metrics to shared memory every second)
|
|
128
143
|
#
|
|
@@ -135,7 +150,7 @@ module Raptor
|
|
|
135
150
|
trap("USR1") { log_stats }
|
|
136
151
|
trap("USR2") { @phased_restart_requested = true }
|
|
137
152
|
|
|
138
|
-
File.open(@
|
|
153
|
+
File.open(@pid_file, File::CREAT | File::EXCL | File::WRONLY) { |file| file.write(Process.pid.to_s) } if @pid_file
|
|
139
154
|
|
|
140
155
|
@worker_count.times { |index| spawn_worker(index) }
|
|
141
156
|
|
|
@@ -151,15 +166,15 @@ module Raptor
|
|
|
151
166
|
break if reap_workers == :no_children
|
|
152
167
|
|
|
153
168
|
perform_phased_restart if @phased_restart_requested && !@phased_restarting
|
|
169
|
+
timeout_hung_workers
|
|
154
170
|
|
|
155
171
|
sleep 0.1
|
|
156
172
|
end
|
|
157
173
|
|
|
158
|
-
|
|
159
|
-
@workers.values.each { |pid| Process.wait(pid) rescue nil }
|
|
174
|
+
stop_workers
|
|
160
175
|
stats_file_thread&.join
|
|
161
176
|
File.delete(@stats_file) rescue nil if @stats_file
|
|
162
|
-
File.delete(@
|
|
177
|
+
File.delete(@pid_file) rescue nil if @pid_file
|
|
163
178
|
@stats.unmap
|
|
164
179
|
end
|
|
165
180
|
|
|
@@ -176,13 +191,14 @@ module Raptor
|
|
|
176
191
|
private
|
|
177
192
|
|
|
178
193
|
# Forks a new worker process and registers it at the given index.
|
|
194
|
+
# The worker inherits the cluster's current phase.
|
|
179
195
|
#
|
|
180
196
|
# @param index [Integer] slot index for this worker in the stats region
|
|
181
197
|
# @return [void]
|
|
182
198
|
#
|
|
183
199
|
# @rbs (Integer index) -> void
|
|
184
200
|
def spawn_worker(index)
|
|
185
|
-
pid = fork { run_worker(index) }
|
|
201
|
+
pid = fork { run_worker(index, @phase) }
|
|
186
202
|
@workers[index] = pid
|
|
187
203
|
end
|
|
188
204
|
|
|
@@ -199,9 +215,10 @@ module Raptor
|
|
|
199
215
|
|
|
200
216
|
index = @workers.key(pid)
|
|
201
217
|
@workers.delete(index)
|
|
218
|
+
@timed_out.delete(pid)
|
|
202
219
|
|
|
203
220
|
unless @shutdown
|
|
204
|
-
warn "
|
|
221
|
+
Log.warn "Restarting worker #{index} (#{pid}), #{exit_description(status)}"
|
|
205
222
|
spawn_worker(index)
|
|
206
223
|
end
|
|
207
224
|
end
|
|
@@ -209,6 +226,57 @@ module Raptor
|
|
|
209
226
|
:no_children
|
|
210
227
|
end
|
|
211
228
|
|
|
229
|
+
# Stops every worker, escalating from TERM to KILL if any fail to
|
|
230
|
+
# exit within `worker_shutdown_timeout`.
|
|
231
|
+
#
|
|
232
|
+
# @return [void]
|
|
233
|
+
#
|
|
234
|
+
# @rbs () -> void
|
|
235
|
+
def stop_workers
|
|
236
|
+
@workers.values.each { |pid| Process.kill("TERM", pid) rescue nil }
|
|
237
|
+
|
|
238
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @worker_shutdown_timeout
|
|
239
|
+
until @workers.empty? || Process.clock_gettime(Process::CLOCK_MONOTONIC) > deadline
|
|
240
|
+
reap_workers
|
|
241
|
+
sleep 0.05
|
|
242
|
+
end
|
|
243
|
+
return if @workers.empty?
|
|
244
|
+
|
|
245
|
+
Log.warn "Force-killing #{@workers.size} worker(s) after #{@worker_shutdown_timeout}s"
|
|
246
|
+
@workers.values.each { |pid| Process.kill("KILL", pid) rescue nil }
|
|
247
|
+
@workers.values.each { |pid| Process.wait(pid) rescue nil }
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Kills workers that have stopped checking in. A booted worker that
|
|
251
|
+
# fails to update its stats slot within `worker_timeout` seconds is
|
|
252
|
+
# assumed to be hung (deadlocked app, runaway loop, blocked syscall);
|
|
253
|
+
# a worker still in startup is held to `worker_boot_timeout`. Killed
|
|
254
|
+
# workers are then restarted by `reap_workers`.
|
|
255
|
+
#
|
|
256
|
+
# @return [void]
|
|
257
|
+
#
|
|
258
|
+
# @rbs () -> void
|
|
259
|
+
def timeout_hung_workers
|
|
260
|
+
now = Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
261
|
+
stats = @stats.all
|
|
262
|
+
|
|
263
|
+
@workers.each do |index, pid|
|
|
264
|
+
next if @timed_out.include?(pid)
|
|
265
|
+
|
|
266
|
+
stat = stats[index]
|
|
267
|
+
next unless stat[:pid] == pid
|
|
268
|
+
|
|
269
|
+
timeout = stat[:booted] ? @worker_timeout : @worker_boot_timeout
|
|
270
|
+
elapsed = now - stat[:last_checkin]
|
|
271
|
+
next if elapsed <= timeout
|
|
272
|
+
|
|
273
|
+
action = stat[:booted] ? "check in" : "boot"
|
|
274
|
+
Log.warn "Killing worker #{index} (#{pid}), failed to #{action} within #{timeout}s"
|
|
275
|
+
Process.kill("KILL", pid) rescue nil
|
|
276
|
+
@timed_out << pid
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
212
280
|
# Replaces each worker process one at a time, waiting for the new
|
|
213
281
|
# worker to boot before moving on to the next. Triggered by SIGUSR2.
|
|
214
282
|
#
|
|
@@ -218,7 +286,8 @@ module Raptor
|
|
|
218
286
|
def perform_phased_restart
|
|
219
287
|
@phased_restart_requested = false
|
|
220
288
|
@phased_restarting = true
|
|
221
|
-
|
|
289
|
+
@phase += 1
|
|
290
|
+
Log.info "Phased restart starting"
|
|
222
291
|
|
|
223
292
|
begin
|
|
224
293
|
@workers.keys.sort.each do |index|
|
|
@@ -240,7 +309,7 @@ module Raptor
|
|
|
240
309
|
end
|
|
241
310
|
end
|
|
242
311
|
|
|
243
|
-
|
|
312
|
+
Log.info "Phased restart complete"
|
|
244
313
|
ensure
|
|
245
314
|
@phased_restarting = false
|
|
246
315
|
end
|
|
@@ -253,10 +322,11 @@ module Raptor
|
|
|
253
322
|
# critical component fails.
|
|
254
323
|
#
|
|
255
324
|
# @param index [Integer] slot index for this worker in the stats region
|
|
325
|
+
# @param phase [Integer] the cluster phase this worker was forked at
|
|
256
326
|
# @return [void]
|
|
257
327
|
#
|
|
258
|
-
# @rbs (Integer index) -> void
|
|
259
|
-
def run_worker(index)
|
|
328
|
+
# @rbs (Integer index, Integer phase) -> void
|
|
329
|
+
def run_worker(index, phase)
|
|
260
330
|
shutdown_requested = false
|
|
261
331
|
trap("INT") { shutdown_requested = true }
|
|
262
332
|
trap("TERM") { shutdown_requested = true }
|
|
@@ -267,8 +337,11 @@ module Raptor
|
|
|
267
337
|
@stats.write(
|
|
268
338
|
index,
|
|
269
339
|
pid: Process.pid,
|
|
340
|
+
phase: phase,
|
|
270
341
|
requests: 0,
|
|
271
342
|
backlog: 0,
|
|
343
|
+
busy_threads: 0,
|
|
344
|
+
thread_capacity: @thread_count,
|
|
272
345
|
started_at:,
|
|
273
346
|
last_checkin: started_at,
|
|
274
347
|
booted: false
|
|
@@ -288,20 +361,24 @@ module Raptor
|
|
|
288
361
|
size: @ractor_count,
|
|
289
362
|
worker: request.http_parser_worker
|
|
290
363
|
) do |parsed_result|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
364
|
+
begin
|
|
365
|
+
if parsed_result[:protocol] == :http2
|
|
366
|
+
http2.handle_parsed_request(parsed_result, reactor, thread_pool)
|
|
367
|
+
else
|
|
368
|
+
request.handle_parsed_request(parsed_result, reactor, thread_pool)
|
|
369
|
+
end
|
|
370
|
+
rescue => error
|
|
371
|
+
Log.rescued_error(error)
|
|
295
372
|
end
|
|
296
373
|
end
|
|
297
374
|
|
|
298
|
-
reactor = Reactor.new(
|
|
375
|
+
reactor = Reactor.new(ractor_pool, thread_pool, client_options: @client_options)
|
|
299
376
|
reactor_thread = reactor.run
|
|
300
377
|
|
|
301
|
-
server = Server.new(@binder, reactor, thread_pool, request)
|
|
378
|
+
server = Server.new(@binder, reactor, thread_pool, request, client_options: @client_options)
|
|
302
379
|
server_thread = server.run
|
|
303
380
|
|
|
304
|
-
|
|
381
|
+
Log.info "Worker #{index} booted"
|
|
305
382
|
|
|
306
383
|
stats_thread = Thread.new do
|
|
307
384
|
Thread.current.name = "Raptor Stats"
|
|
@@ -310,8 +387,11 @@ module Raptor
|
|
|
310
387
|
@stats.write(
|
|
311
388
|
index,
|
|
312
389
|
pid: Process.pid,
|
|
390
|
+
phase: phase,
|
|
313
391
|
requests: request_count,
|
|
314
392
|
backlog: reactor.backlog,
|
|
393
|
+
busy_threads: thread_pool.active_count,
|
|
394
|
+
thread_capacity: @thread_count,
|
|
315
395
|
started_at:,
|
|
316
396
|
last_checkin: Process.clock_gettime(Process::CLOCK_REALTIME),
|
|
317
397
|
booted: true
|
|
@@ -333,6 +413,7 @@ module Raptor
|
|
|
333
413
|
reactor.shutdown
|
|
334
414
|
reactor_thread.join
|
|
335
415
|
ractor_pool.shutdown
|
|
416
|
+
request.shutdown
|
|
336
417
|
thread_pool.shutdown
|
|
337
418
|
stats_thread.join
|
|
338
419
|
end
|
|
@@ -364,28 +445,25 @@ module Raptor
|
|
|
364
445
|
@shutdown = true
|
|
365
446
|
end
|
|
366
447
|
|
|
367
|
-
#
|
|
368
|
-
#
|
|
369
|
-
# Outputs a hierarchical view of the cluster configuration showing
|
|
370
|
-
# the master process, worker processes, and per-process thread/ractor
|
|
371
|
-
# allocation along with listening addresses.
|
|
448
|
+
# Prints the cluster's startup banner showing process structure
|
|
449
|
+
# and bind addresses.
|
|
372
450
|
#
|
|
373
451
|
# @return [void]
|
|
374
452
|
#
|
|
375
453
|
# @rbs () -> void
|
|
376
454
|
def log_initialization
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
455
|
+
Log.info "Cluster initializing:"
|
|
456
|
+
Log.info "├─ Version: #{VERSION}"
|
|
457
|
+
Log.info "├─ Ruby Version: #{RUBY_DESCRIPTION}"
|
|
458
|
+
Log.info "├─ Master PID: #{Process.pid}"
|
|
459
|
+
Log.info "│ └─ #{@worker_count} worker process#{"es" if @worker_count > 1}"
|
|
460
|
+
Log.info "│ ├─ 1 server thread"
|
|
461
|
+
Log.info "│ ├─ 1 reactor thread"
|
|
462
|
+
Log.info "│ ├─ #{@ractor_count} pipeline ractor#{"s" if @ractor_count > 1}"
|
|
463
|
+
Log.info "│ ├─ 1 pipeline collector thread"
|
|
464
|
+
Log.info "│ ├─ #{@thread_count} worker thread#{"s" if @thread_count > 1}"
|
|
465
|
+
Log.info "│ └─ 1 stats thread"
|
|
466
|
+
Log.info "└─ Listening on #{@binder.addresses.join(", ")}"
|
|
389
467
|
end
|
|
390
468
|
|
|
391
469
|
# Logs current stats for all workers to stdout.
|
|
@@ -396,11 +474,11 @@ module Raptor
|
|
|
396
474
|
#
|
|
397
475
|
# @rbs () -> void
|
|
398
476
|
def log_stats
|
|
399
|
-
@stats.all.
|
|
477
|
+
@stats.all.each do |stat|
|
|
400
478
|
status = stat[:booted] ? "booted" : "starting"
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
479
|
+
Log.info "Worker #{stat[:index]} (phase #{stat[:phase]}): pid=#{stat[:pid]}, requests=#{stat[:requests]}, " \
|
|
480
|
+
"busy=#{stat[:busy_threads]}/#{stat[:thread_capacity]}, backlog=#{stat[:backlog]}, " \
|
|
481
|
+
"#{status}, last_checkin=#{Time.at(stat[:last_checkin]).strftime("%H:%M:%S")}"
|
|
404
482
|
end
|
|
405
483
|
end
|
|
406
484
|
|