raptor 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ require "atomic-ruby/atomic_thread_pool"
7
7
  require "rack/builder"
8
8
  require "ractor-pool"
9
9
 
10
+ require_relative "log"
10
11
  require_relative "binder"
11
12
  require_relative "server"
12
13
  require_relative "reactor"
@@ -18,14 +19,15 @@ module Raptor
18
19
  # Multi-process web server cluster with advanced concurrency architecture.
19
20
  #
20
21
  # Cluster manages multiple worker processes, each running a complete server
21
- # stack including a reactor thread, server thread, ractor pool for HTTP
22
- # parsing, and thread pool for application processing. It handles process
23
- # forking, signal management, graceful shutdown, and automatic worker
24
- # restart when a worker process unexpectedly exits.
22
+ # stack including a ractor pool for HTTP parsing, a thread pool for
23
+ # application processing, plus dedicated reactor and server threads. It
24
+ # handles process forking, signal management, graceful shutdown, and
25
+ # automatic worker restart when a worker process unexpectedly exits.
25
26
  #
26
27
  # The architecture provides horizontal scaling through processes while
27
- # maintaining efficient I/O and CPU utilization within each process through
28
- # the combination of NIO reactors, ractor-based parsing, and thread pools.
28
+ # maintaining efficient I/O and CPU utilization within each process
29
+ # through the combination of ractor-based parsing and thread pools on
30
+ # top of NIO reactors.
29
31
  #
30
32
  # Flow per worker process:
31
33
  # 1. Server continuously accepts connections but skips acceptance when backlog is high
@@ -36,7 +38,7 @@ module Raptor
36
38
  #
37
39
  # @example Basic usage
38
40
  # options = {
39
- # threads: 8, ractors: 2, workers: 4,
41
+ # workers: 4, ractors: 2, threads: 8,
40
42
  # binds: ["tcp://0.0.0.0:3000"],
41
43
  # rackup: "config.ru",
42
44
  # client: { first_data_timeout: 30, chunk_data_timeout: 10 }
@@ -54,50 +56,61 @@ module Raptor
54
56
  new(options).run
55
57
  end
56
58
 
57
- # @rbs @thread_count: Integer
58
- # @rbs @ractor_count: Integer
59
59
  # @rbs @worker_count: Integer
60
+ # @rbs @ractor_count: Integer
61
+ # @rbs @thread_count: Integer
60
62
  # @rbs @client_options: Hash[Symbol, Integer]
61
- # @rbs @on_error: ^(Hash[String, untyped]?, Exception) -> void | nil
63
+ # @rbs @worker_timeout: Integer
64
+ # @rbs @worker_boot_timeout: Integer
65
+ # @rbs @worker_shutdown_timeout: Integer
62
66
  # @rbs @stats_file: String?
63
- # @rbs @pidfile: String?
67
+ # @rbs @pid_file: String?
68
+ # @rbs @on_error: ^(Hash[String, untyped]?, Exception) -> void | nil
64
69
  # @rbs @binder: Binder
65
70
  # @rbs @server_port: Integer
66
71
  # @rbs @app: untyped
67
72
  # @rbs @shutdown: bool
68
73
  # @rbs @workers: Hash[Integer, Integer]
74
+ # @rbs @timed_out: Set[Integer]
69
75
  # @rbs @stats: Stats
76
+ # @rbs @phase: Integer
70
77
  # @rbs @phased_restart_requested: bool
71
78
  # @rbs @phased_restarting: bool
72
79
 
73
80
  # Creates a new Cluster with the specified configuration.
74
81
  #
75
- # Initializes the cluster with thread, ractor, and worker counts,
82
+ # Initializes the cluster with worker, ractor, and thread counts,
76
83
  # sets up network binding, loads the Rack application, and prepares
77
84
  # for multi-process operation.
78
85
  #
79
86
  # @param options [Hash] cluster configuration options
80
- # @option options [Integer] :threads number of threads per worker process
81
- # @option options [Integer] :ractors number of ractors per worker process
82
- # @option options [Integer] :workers number of worker processes
83
87
  # @option options [Array<String>] :binds array of bind URIs
88
+ # @option options [Integer] :workers number of worker processes
89
+ # @option options [Integer] :ractors number of ractors per worker process
90
+ # @option options [Integer] :threads number of threads per worker process
84
91
  # @option options [#call] :app pre-built Rack application
85
92
  # @option options [String] :rackup path to Rack configuration file
86
93
  # @option options [Hash] :client client configuration
87
- # @option options [#call] :on_error callback invoked with (env, exception) when the Rack app raises
94
+ # @option options [Integer] :worker_timeout seconds to wait for a booted worker to check in before killing it
95
+ # @option options [Integer] :worker_boot_timeout seconds to wait for a worker to finish booting before killing it
96
+ # @option options [Integer] :worker_shutdown_timeout seconds to wait for graceful worker exit before force-killing
88
97
  # @option options [String, nil] :stats_file path to write per-worker stats JSON, or nil to disable
89
- # @option options [String, nil] :pidfile path to write the master PID to, or nil to disable
98
+ # @option options [String, nil] :pid_file path to write the master PID to, or nil to disable
99
+ # @option options [#call] :on_error callback invoked with (env, exception) when the Rack app raises
90
100
  # @return [void]
91
101
  #
92
102
  # @rbs (Hash[Symbol, untyped] options) -> void
93
103
  def initialize(options)
94
- @thread_count = options[:threads]
95
- @ractor_count = options[:ractors]
96
104
  @worker_count = options[:workers]
105
+ @ractor_count = options[:ractors]
106
+ @thread_count = options[:threads]
97
107
  @client_options = options[:client]
98
- @on_error = options[:on_error]
108
+ @worker_timeout = options[:worker_timeout]
109
+ @worker_boot_timeout = options[:worker_boot_timeout]
110
+ @worker_shutdown_timeout = options[:worker_shutdown_timeout]
99
111
  @stats_file = options[:stats_file]
100
- @pidfile = options[:pidfile]
112
+ @pid_file = options[:pid_file]
113
+ @on_error = options[:on_error]
101
114
 
102
115
  @binder = Binder.new(options[:binds])
103
116
  @server_port = @binder.server_port
@@ -106,7 +119,9 @@ module Raptor
106
119
 
107
120
  @shutdown = false
108
121
  @workers = {}
122
+ @timed_out = Set.new
109
123
  @stats = Stats.new(@worker_count)
124
+ @phase = 0
110
125
  @phased_restart_requested = false
111
126
  @phased_restarting = false
112
127
  end
@@ -114,15 +129,15 @@ module Raptor
114
129
  # Starts the multi-process cluster and manages worker processes.
115
130
  #
116
131
  # Forks the configured number of worker processes and monitors them,
117
- # automatically restarting any that exit unexpectedly. Handles graceful
118
- # shutdown via INT or TERM signals, stats logging via USR1, and phased
119
- # restart via USR2.
132
+ # restarting any that exit unexpectedly or stop checking in. Handles
133
+ # graceful shutdown via INT or TERM signals, stats logging via USR1,
134
+ # and phased restart via USR2.
120
135
  #
121
136
  # Each worker process includes:
122
137
  # - 1 server thread (continuously accepts connections with backpressure control)
123
138
  # - 1 reactor thread (I/O multiplexing, timeout handling, backlog monitoring)
124
- # - N ractor workers (parallel HTTP parsing)
125
- # - 1 ractor collector thread (coordinates parsing results)
139
+ # - N pipeline ractors (parallel HTTP parsing)
140
+ # - 1 pipeline collector thread (coordinates parsing results)
126
141
  # - M worker threads (Rack application processing and response writing)
127
142
  # - 1 stats thread (writes per-worker metrics to shared memory every second)
128
143
  #
@@ -135,7 +150,7 @@ module Raptor
135
150
  trap("USR1") { log_stats }
136
151
  trap("USR2") { @phased_restart_requested = true }
137
152
 
138
- File.open(@pidfile, File::CREAT | File::EXCL | File::WRONLY) { |file| file.write(Process.pid.to_s) } if @pidfile
153
+ File.open(@pid_file, File::CREAT | File::EXCL | File::WRONLY) { |file| file.write(Process.pid.to_s) } if @pid_file
139
154
 
140
155
  @worker_count.times { |index| spawn_worker(index) }
141
156
 
@@ -151,15 +166,15 @@ module Raptor
151
166
  break if reap_workers == :no_children
152
167
 
153
168
  perform_phased_restart if @phased_restart_requested && !@phased_restarting
169
+ timeout_hung_workers
154
170
 
155
171
  sleep 0.1
156
172
  end
157
173
 
158
- @workers.values.each { |pid| Process.kill("TERM", pid) rescue nil }
159
- @workers.values.each { |pid| Process.wait(pid) rescue nil }
174
+ stop_workers
160
175
  stats_file_thread&.join
161
176
  File.delete(@stats_file) rescue nil if @stats_file
162
- File.delete(@pidfile) rescue nil if @pidfile
177
+ File.delete(@pid_file) rescue nil if @pid_file
163
178
  @stats.unmap
164
179
  end
165
180
 
@@ -176,13 +191,14 @@ module Raptor
176
191
  private
177
192
 
178
193
  # Forks a new worker process and registers it at the given index.
194
+ # The worker inherits the cluster's current phase.
179
195
  #
180
196
  # @param index [Integer] slot index for this worker in the stats region
181
197
  # @return [void]
182
198
  #
183
199
  # @rbs (Integer index) -> void
184
200
  def spawn_worker(index)
185
- pid = fork { run_worker(index) }
201
+ pid = fork { run_worker(index, @phase) }
186
202
  @workers[index] = pid
187
203
  end
188
204
 
@@ -199,9 +215,10 @@ module Raptor
199
215
 
200
216
  index = @workers.key(pid)
201
217
  @workers.delete(index)
218
+ @timed_out.delete(pid)
202
219
 
203
220
  unless @shutdown
204
- warn "[#{Process.pid}] Restarting worker #{index} (#{pid}), #{exit_description(status)}"
221
+ Log.warn "Restarting worker #{index} (#{pid}), #{exit_description(status)}"
205
222
  spawn_worker(index)
206
223
  end
207
224
  end
@@ -209,6 +226,57 @@ module Raptor
209
226
  :no_children
210
227
  end
211
228
 
229
+ # Stops every worker, escalating from TERM to KILL if any fail to
230
+ # exit within `worker_shutdown_timeout`.
231
+ #
232
+ # @return [void]
233
+ #
234
+ # @rbs () -> void
235
+ def stop_workers
236
+ @workers.values.each { |pid| Process.kill("TERM", pid) rescue nil }
237
+
238
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @worker_shutdown_timeout
239
+ until @workers.empty? || Process.clock_gettime(Process::CLOCK_MONOTONIC) > deadline
240
+ reap_workers
241
+ sleep 0.05
242
+ end
243
+ return if @workers.empty?
244
+
245
+ Log.warn "Force-killing #{@workers.size} worker(s) after #{@worker_shutdown_timeout}s"
246
+ @workers.values.each { |pid| Process.kill("KILL", pid) rescue nil }
247
+ @workers.values.each { |pid| Process.wait(pid) rescue nil }
248
+ end
249
+
250
+ # Kills workers that have stopped checking in. A booted worker that
251
+ # fails to update its stats slot within `worker_timeout` seconds is
252
+ # assumed to be hung (deadlocked app, runaway loop, blocked syscall);
253
+ # a worker still in startup is held to `worker_boot_timeout`. Killed
254
+ # workers are then restarted by `reap_workers`.
255
+ #
256
+ # @return [void]
257
+ #
258
+ # @rbs () -> void
259
+ def timeout_hung_workers
260
+ now = Process.clock_gettime(Process::CLOCK_REALTIME)
261
+ stats = @stats.all
262
+
263
+ @workers.each do |index, pid|
264
+ next if @timed_out.include?(pid)
265
+
266
+ stat = stats[index]
267
+ next unless stat[:pid] == pid
268
+
269
+ timeout = stat[:booted] ? @worker_timeout : @worker_boot_timeout
270
+ elapsed = now - stat[:last_checkin]
271
+ next if elapsed <= timeout
272
+
273
+ action = stat[:booted] ? "check in" : "boot"
274
+ Log.warn "Killing worker #{index} (#{pid}), failed to #{action} within #{timeout}s"
275
+ Process.kill("KILL", pid) rescue nil
276
+ @timed_out << pid
277
+ end
278
+ end
279
+
212
280
  # Replaces each worker process one at a time, waiting for the new
213
281
  # worker to boot before moving on to the next. Triggered by SIGUSR2.
214
282
  #
@@ -218,7 +286,8 @@ module Raptor
218
286
  def perform_phased_restart
219
287
  @phased_restart_requested = false
220
288
  @phased_restarting = true
221
- puts "[#{Process.pid}] Phased restart starting"
289
+ @phase += 1
290
+ Log.info "Phased restart starting"
222
291
 
223
292
  begin
224
293
  @workers.keys.sort.each do |index|
@@ -240,7 +309,7 @@ module Raptor
240
309
  end
241
310
  end
242
311
 
243
- puts "[#{Process.pid}] Phased restart complete"
312
+ Log.info "Phased restart complete"
244
313
  ensure
245
314
  @phased_restarting = false
246
315
  end
@@ -253,10 +322,11 @@ module Raptor
253
322
  # critical component fails.
254
323
  #
255
324
  # @param index [Integer] slot index for this worker in the stats region
325
+ # @param phase [Integer] the cluster phase this worker was forked at
256
326
  # @return [void]
257
327
  #
258
- # @rbs (Integer index) -> void
259
- def run_worker(index)
328
+ # @rbs (Integer index, Integer phase) -> void
329
+ def run_worker(index, phase)
260
330
  shutdown_requested = false
261
331
  trap("INT") { shutdown_requested = true }
262
332
  trap("TERM") { shutdown_requested = true }
@@ -267,8 +337,11 @@ module Raptor
267
337
  @stats.write(
268
338
  index,
269
339
  pid: Process.pid,
340
+ phase: phase,
270
341
  requests: 0,
271
342
  backlog: 0,
343
+ busy_threads: 0,
344
+ thread_capacity: @thread_count,
272
345
  started_at:,
273
346
  last_checkin: started_at,
274
347
  booted: false
@@ -288,20 +361,24 @@ module Raptor
288
361
  size: @ractor_count,
289
362
  worker: request.http_parser_worker
290
363
  ) do |parsed_result|
291
- if parsed_result[:protocol] == :http2
292
- http2.handle_parsed_request(parsed_result, reactor, thread_pool)
293
- else
294
- request.handle_parsed_request(parsed_result, reactor, thread_pool)
364
+ begin
365
+ if parsed_result[:protocol] == :http2
366
+ http2.handle_parsed_request(parsed_result, reactor, thread_pool)
367
+ else
368
+ request.handle_parsed_request(parsed_result, reactor, thread_pool)
369
+ end
370
+ rescue => error
371
+ Log.rescued_error(error)
295
372
  end
296
373
  end
297
374
 
298
- reactor = Reactor.new(thread_pool, ractor_pool, client_options: @client_options)
375
+ reactor = Reactor.new(ractor_pool, thread_pool, client_options: @client_options)
299
376
  reactor_thread = reactor.run
300
377
 
301
- server = Server.new(@binder, reactor, thread_pool, request)
378
+ server = Server.new(@binder, reactor, thread_pool, request, client_options: @client_options)
302
379
  server_thread = server.run
303
380
 
304
- puts "[#{Process.pid}] Worker #{index} booted"
381
+ Log.info "Worker #{index} booted"
305
382
 
306
383
  stats_thread = Thread.new do
307
384
  Thread.current.name = "Raptor Stats"
@@ -310,8 +387,11 @@ module Raptor
310
387
  @stats.write(
311
388
  index,
312
389
  pid: Process.pid,
390
+ phase: phase,
313
391
  requests: request_count,
314
392
  backlog: reactor.backlog,
393
+ busy_threads: thread_pool.active_count,
394
+ thread_capacity: @thread_count,
315
395
  started_at:,
316
396
  last_checkin: Process.clock_gettime(Process::CLOCK_REALTIME),
317
397
  booted: true
@@ -333,6 +413,7 @@ module Raptor
333
413
  reactor.shutdown
334
414
  reactor_thread.join
335
415
  ractor_pool.shutdown
416
+ request.shutdown
336
417
  thread_pool.shutdown
337
418
  stats_thread.join
338
419
  end
@@ -364,28 +445,25 @@ module Raptor
364
445
  @shutdown = true
365
446
  end
366
447
 
367
- # Logs cluster initialization details including architecture and bind addresses.
368
- #
369
- # Outputs a hierarchical view of the cluster configuration showing
370
- # the master process, worker processes, and per-process thread/ractor
371
- # allocation along with listening addresses.
448
+ # Prints the cluster's startup banner showing process structure
449
+ # and bind addresses.
372
450
  #
373
451
  # @return [void]
374
452
  #
375
453
  # @rbs () -> void
376
454
  def log_initialization
377
- puts "Raptor Cluster initializing:"
378
- puts "├─ Version: #{VERSION}"
379
- puts "├─ Ruby Version: #{RUBY_DESCRIPTION}"
380
- puts "├─ Master PID: #{Process.pid}"
381
- puts "│ └─ #{@worker_count} worker process#{"es" if @worker_count > 1}"
382
- puts "│ ├─ 1 server thread"
383
- puts "│ ├─ 1 reactor thread"
384
- puts "│ ├─ #{@ractor_count} pipeline ractor#{"s" if @ractor_count > 1}"
385
- puts "│ ├─ 1 pipeline collector thread"
386
- puts "│ ├─ #{@thread_count} worker thread#{"s" if @thread_count > 1}"
387
- puts "│ └─ 1 stats thread"
388
- puts "└─ Listening on #{@binder.addresses.join(", ")}"
455
+ Log.info "Cluster initializing:"
456
+ Log.info "├─ Version: #{VERSION}"
457
+ Log.info "├─ Ruby Version: #{RUBY_DESCRIPTION}"
458
+ Log.info "├─ Master PID: #{Process.pid}"
459
+ Log.info "│ └─ #{@worker_count} worker process#{"es" if @worker_count > 1}"
460
+ Log.info "│ ├─ 1 server thread"
461
+ Log.info "│ ├─ 1 reactor thread"
462
+ Log.info "│ ├─ #{@ractor_count} pipeline ractor#{"s" if @ractor_count > 1}"
463
+ Log.info "│ ├─ 1 pipeline collector thread"
464
+ Log.info "│ ├─ #{@thread_count} worker thread#{"s" if @thread_count > 1}"
465
+ Log.info "│ └─ 1 stats thread"
466
+ Log.info "└─ Listening on #{@binder.addresses.join(", ")}"
389
467
  end
390
468
 
391
469
  # Logs current stats for all workers to stdout.
@@ -396,11 +474,11 @@ module Raptor
396
474
  #
397
475
  # @rbs () -> void
398
476
  def log_stats
399
- @stats.all.each_with_index do |stat, index|
477
+ @stats.all.each do |stat|
400
478
  status = stat[:booted] ? "booted" : "starting"
401
- puts "Worker #{index}: pid=#{stat[:pid]}, requests=#{stat[:requests]}, " \
402
- "backlog=#{stat[:backlog]}, #{status}, " \
403
- "last_checkin=#{Time.at(stat[:last_checkin]).strftime("%H:%M:%S")}"
479
+ Log.info "Worker #{stat[:index]} (phase #{stat[:phase]}): pid=#{stat[:pid]}, requests=#{stat[:requests]}, " \
480
+ "busy=#{stat[:busy_threads]}/#{stat[:thread_capacity]}, backlog=#{stat[:backlog]}, " \
481
+ "#{status}, last_checkin=#{Time.at(stat[:last_checkin]).strftime("%H:%M:%S")}"
404
482
  end
405
483
  end
406
484