pitchfork 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ module Pitchfork
14
14
  include Pitchfork
15
15
 
16
16
  # :stopdoc:
17
- attr_accessor :set, :config_file, :after_load
17
+ attr_accessor :set, :config_file
18
18
 
19
19
  # used to stash stuff for deferred processing of cli options in
20
20
  # config.ru. Do not rely on
@@ -41,21 +41,17 @@ module Pitchfork
41
41
  :worker_processes => 1,
42
42
  :before_fork => nil,
43
43
  :after_worker_fork => lambda { |server, worker|
44
- server.logger.info("worker=#{worker.nr} gen=#{worker.generation} pid=#{$$} spawned")
44
+ server.logger.info("#{worker.to_log} spawned")
45
45
  },
46
46
  :after_mold_fork => lambda { |server, worker|
47
- server.logger.info("mold gen=#{worker.generation} pid=#{$$} spawned")
47
+ server.logger.info("#{worker.to_log} spawned")
48
48
  },
49
49
  :before_worker_exit => nil,
50
50
  :after_worker_exit => lambda { |server, worker, status|
51
51
  m = if worker.nil?
52
- "repead unknown process (#{status.inspect})"
53
- elsif worker.mold?
54
- "mold pid=#{worker.pid rescue 'unknown'} gen=#{worker.generation rescue 'unknown'} reaped (#{status.inspect})"
55
- elsif worker.service?
56
- "service pid=#{worker.pid rescue 'unknown'} gen=#{worker.generation rescue 'unknown'} reaped (#{status.inspect})"
52
+ "reaped unknown process (#{status.inspect})"
57
53
  else
58
- "worker=#{worker.nr rescue 'unknown'} pid=#{worker.pid rescue 'unknown'} gen=#{worker.generation rescue 'unknown'} reaped (#{status.inspect})"
54
+ "#{worker.to_log} reaped (#{status.inspect})"
59
55
  end
60
56
  if status.success?
61
57
  server.logger.info(m)
@@ -64,10 +60,10 @@ module Pitchfork
64
60
  end
65
61
  },
66
62
  :after_worker_ready => lambda { |server, worker|
67
- server.logger.info("worker=#{worker.nr} gen=#{worker.generation} ready")
63
+ server.logger.info("#{worker.to_log} ready")
68
64
  },
69
65
  :after_monitor_ready => lambda { |server|
70
- server.logger.info("Monitor pid=#{Process.pid} ready")
66
+ server.logger.info("monitor pid=#{Process.pid} ready")
71
67
  },
72
68
  :after_worker_timeout => nil,
73
69
  :after_worker_hard_timeout => nil,
@@ -79,13 +75,14 @@ module Pitchfork
79
75
  :client_body_buffer_size => Pitchfork::Const::MAX_BODY,
80
76
  :before_service_worker_ready => nil,
81
77
  :before_service_worker_exit => nil,
78
+ :setpgid => true,
82
79
  }
83
80
  #:startdoc:
84
81
 
85
82
  def initialize(defaults = {}) #:nodoc:
86
83
  self.set = Hash.new(:unset)
87
84
  @use_defaults = defaults.delete(:use_defaults)
88
- self.config_file = defaults.delete(:config_file)
85
+ self.config_file = defaults.delete(:config_file) { "config/pitchfork.rb" if File.exist?("config/pitchfork.rb") }
89
86
 
90
87
  set.merge!(DEFAULTS) if @use_defaults
91
88
  defaults.each { |key, value| self.__send__(key, value) }
@@ -207,6 +204,10 @@ module Pitchfork
207
204
  end
208
205
  end
209
206
 
207
+ def setpgid(bool)
208
+ set_bool(:setpgid, bool)
209
+ end
210
+
210
211
  def spawn_timeout(seconds)
211
212
  set_int(:spawn_timeout, seconds, 1)
212
213
  end
@@ -215,6 +216,14 @@ module Pitchfork
215
216
  set_int(:worker_processes, nr, 1)
216
217
  end
217
218
 
219
+ def refork_max_unavailable(max)
220
+ set_int(:refork_max_unavailable, max, 1)
221
+ end
222
+
223
+ def max_consecutive_spawn_errors(max)
224
+ set_int(:max_consecutive_spawn_errors, max, 1)
225
+ end
226
+
218
227
  def early_hints(bool)
219
228
  set_bool(:early_hints, bool)
220
229
  end
@@ -11,7 +11,7 @@ require 'pitchfork/info'
11
11
  module Pitchfork
12
12
  # This is the process manager of Pitchfork. This manages worker
13
13
  # processes which in turn handle the I/O and application process.
14
- # Listener sockets are started in the master process and shared with
14
+ # Listener sockets are started in the monitor process and shared with
15
15
  # forked worker children.
16
16
  class HttpServer
17
17
  class TimeoutHandler
@@ -50,7 +50,7 @@ module Pitchfork
50
50
 
51
51
  def call(original_thread) # :nodoc:
52
52
  begin
53
- @server.logger.error("worker=#{@worker.nr} pid=#{@worker.pid} timed out, exiting")
53
+ @server.logger.error("#{@worker.to_log} timed out, exiting")
54
54
  if @callback
55
55
  @callback.call(@server, @worker, Info.new(original_thread, @rack_env))
56
56
  end
@@ -80,9 +80,10 @@ module Pitchfork
80
80
  attr_accessor :app, :timeout, :timeout_signal, :soft_timeout, :cleanup_timeout, :spawn_timeout, :worker_processes,
81
81
  :before_fork, :after_worker_fork, :after_mold_fork, :before_service_worker_ready, :before_service_worker_exit,
82
82
  :listener_opts, :children,
83
- :orig_app, :config, :ready_pipe, :early_hints
83
+ :orig_app, :config, :ready_pipe, :early_hints, :setpgid
84
84
  attr_writer :after_worker_exit, :before_worker_exit, :after_worker_ready, :after_request_complete,
85
- :refork_condition, :after_worker_timeout, :after_worker_hard_timeout, :after_monitor_ready
85
+ :refork_condition, :after_worker_timeout, :after_worker_hard_timeout, :after_monitor_ready, :refork_max_unavailable,
86
+ :max_consecutive_spawn_errors
86
87
 
87
88
  attr_reader :logger
88
89
  include Pitchfork::SocketHelper
@@ -103,6 +104,9 @@ module Pitchfork
103
104
  @exit_status = 0
104
105
  @app = app
105
106
  @respawn = false
107
+ @refork_max_unavailable = nil
108
+ @consecutive_spawn_errors = 0
109
+ @max_consecutive_spawn_errors = nil
106
110
  @last_check = Pitchfork.time_now
107
111
  @promotion_lock = Flock.new("pitchfork-promotion")
108
112
  Info.keep_io(@promotion_lock)
@@ -116,11 +120,11 @@ module Pitchfork
116
120
 
117
121
  proc_name role: 'monitor', status: ARGV.join(' ')
118
122
 
119
- # We use @control_socket differently in the master and worker processes:
123
+ # We use @control_socket differently in the monitor and worker processes:
120
124
  #
121
- # * The master process never closes or reinitializes this once
122
- # initialized. Signal handlers in the master process will write to
123
- # it to wake up the master from IO.select in exactly the same manner
125
+ # * The monitor process never closes or reinitializes this once
126
+ # initialized. Signal handlers in the monitor process will write to
127
+ # it to wake up the monitor from IO.select in exactly the same manner
124
128
  # djb describes in https://cr.yp.to/docs/selfpipe.html
125
129
  #
126
130
  # * The workers immediately close the pipe they inherit. See the
@@ -142,7 +146,7 @@ module Pitchfork
142
146
  # attempt to connect to the listener(s)
143
147
  config.commit!(self, :skip => [:listeners, :pid])
144
148
  @orig_app = app
145
- # list of signals we care about and trap in master.
149
+ # list of signals we care about and trap in monitor.
146
150
  @queue_sigs = [
147
151
  :QUIT, :INT, :TERM, :USR2, :TTIN, :TTOU ]
148
152
 
@@ -157,16 +161,16 @@ module Pitchfork
157
161
  # This socketpair is used to wake us up from select(2) in #join when signals
158
162
  # are trapped. See trap_deferred.
159
163
  # It's also used by newly spawned children to send their soft_signal pipe
160
- # to the master when they are spawned.
164
+ # to the monitor when they are spawned.
161
165
  @control_socket.replace(Pitchfork.socketpair)
162
166
  Info.keep_ios(@control_socket)
163
- @master_pid = $$
167
+ @monitor_pid = $$
164
168
 
165
169
  # setup signal handlers before writing pid file in case people get
166
170
  # trigger happy and send signals as soon as the pid file exists.
167
171
  # Note that signals don't actually get handled until the #join method
168
- @queue_sigs.each { |sig| trap(sig) { @sig_queue << sig; awaken_master } }
169
- trap(:CHLD) { awaken_master }
172
+ @queue_sigs.each { |sig| trap(sig) { @sig_queue << sig; awaken_monitor } }
173
+ trap(:CHLD) { awaken_monitor }
170
174
 
171
175
  if REFORKING_AVAILABLE
172
176
  spawn_initial_mold
@@ -224,7 +228,7 @@ module Pitchfork
224
228
  # to delay between retries.
225
229
  # A negative value for +:tries+ indicates the listen will be
226
230
  # retried indefinitely, this is useful when workers belonging to
227
- # different masters are spawned during a transparent upgrade.
231
+ # different monitors are spawned during a transparent upgrade.
228
232
  def listen(address, opt = listener_opts[address] || {})
229
233
  address = config.expand_addr(address)
230
234
  return if String === address && listener_names.include?(address)
@@ -291,7 +295,7 @@ module Pitchfork
291
295
 
292
296
  proc_name role: 'monitor', status: ARGV.join(' ')
293
297
 
294
- logger.info "master process ready" # test_exec.rb relies on this message
298
+ logger.info "monitor process ready" # test_exec.rb relies on this message
295
299
  if @ready_pipe
296
300
  begin
297
301
  @ready_pipe.syswrite($$.to_s)
@@ -306,11 +310,11 @@ module Pitchfork
306
310
  break
307
311
  end
308
312
  rescue => e
309
- Pitchfork.log_error(@logger, "master loop error", e)
313
+ Pitchfork.log_error(@logger, "monitor loop error", e)
310
314
  end
311
315
  end
312
316
  stop # gracefully shutdown all workers on our way out
313
- logger.info "master complete status=#{@exit_status}"
317
+ logger.info "monitor complete status=#{@exit_status}"
314
318
  @exit_status
315
319
  end
316
320
 
@@ -326,10 +330,17 @@ module Pitchfork
326
330
 
327
331
  case message = @sig_queue.shift
328
332
  when nil
329
- # avoid murdering workers after our master process (or the
333
+ # avoid murdering workers after our monitor process (or the
330
334
  # machine) comes out of suspend/hibernation
331
335
  if (@last_check + @timeout) >= (@last_check = Pitchfork.time_now)
332
336
  sleep_time = murder_lazy_workers
337
+ if @max_consecutive_spawn_errors && @consecutive_spawn_errors > @max_consecutive_spawn_errors && !SharedMemory.shutting_down?
338
+ logger.fatal("#{@consecutive_spawn_errors} consecutive failures to spawn children, aborting - broken after_worker_fork callback?")
339
+ @exit_status = 1
340
+ SharedMemory.shutting_down!
341
+ stop(false)
342
+ return StopIteration
343
+ end
333
344
  else
334
345
  sleep_time = @timeout/2.0 + 1
335
346
  @logger.debug("waiting #{sleep_time}s after suspend/hibernation")
@@ -339,7 +350,7 @@ module Pitchfork
339
350
  restart_outdated_workers if REFORKING_AVAILABLE
340
351
  end
341
352
 
342
- master_sleep(sleep_time) if sleep
353
+ monitor_sleep(sleep_time) if sleep
343
354
  when :QUIT, :TERM # graceful shutdown
344
355
  SharedMemory.shutting_down!
345
356
  logger.info "#{message} received, starting graceful shutdown"
@@ -363,28 +374,31 @@ module Pitchfork
363
374
  when Message::WorkerSpawned
364
375
  worker = @children.update(message)
365
376
  # TODO: should we send a message to the worker to acknowledge?
366
- logger.info "worker=#{worker.nr} pid=#{worker.pid} gen=#{worker.generation} registered"
377
+ logger.info "#{worker.to_log} registered"
367
378
  when Message::MoldSpawned
368
379
  new_mold = @children.update(message)
369
- logger.info("mold pid=#{new_mold.pid} gen=#{new_mold.generation} spawned")
380
+ logger.info("#{new_mold.to_log} spawned")
370
381
  when Message::ServiceSpawned
371
382
  new_service = @children.update(message)
372
- logger.info("service pid=#{new_service.pid} gen=#{new_service.generation} spawned")
383
+ logger.info("#{new_service.to_log} spawned")
373
384
  when Message::MoldReady
385
+ @consecutive_spawn_errors = 0
374
386
  old_molds = @children.molds
375
387
  new_mold = @children.update(message)
376
- logger.info("mold pid=#{new_mold.pid} gen=#{new_mold.generation} ready")
388
+ logger.info("#{new_mold.to_log} ready")
377
389
  old_molds.each do |old_mold|
378
- logger.info("Terminating old mold pid=#{old_mold.pid} gen=#{old_mold.generation}")
390
+ logger.info("Terminating old #{old_mold.to_log}")
379
391
  old_mold.soft_kill(:TERM)
380
392
  end
393
+ when Message::WorkerReady, Message::ServiceReady
394
+ @consecutive_spawn_errors = 0
381
395
  else
382
396
  logger.error("Unexpected message in sig_queue #{message.inspect}")
383
397
  logger.error(@sig_queue.inspect)
384
398
  end
385
399
  end
386
400
 
387
- # Terminates all workers, but does not exit master process
401
+ # Terminates all workers, but does not exit monitor process
388
402
  def stop(graceful = true)
389
403
  proc_name role: 'monitor', status: 'shutting down'
390
404
  @respawn = false
@@ -413,7 +427,7 @@ module Pitchfork
413
427
  end
414
428
 
415
429
  def worker_exit(worker)
416
- logger.info "worker=#{worker.nr} pid=#{worker.pid} gen=#{worker.generation} exiting"
430
+ logger.info "#{worker.to_log} exiting"
417
431
  proc_name status: "exiting"
418
432
 
419
433
  if @before_worker_exit
@@ -427,7 +441,7 @@ module Pitchfork
427
441
  end
428
442
 
429
443
  def service_exit(service)
430
- logger.info "service pid=#{service.pid} gen=#{service.generation} exiting"
444
+ logger.info "#{service.to_log} exiting"
431
445
  proc_name status: "exiting"
432
446
 
433
447
  if @before_service_worker_exit
@@ -467,8 +481,12 @@ module Pitchfork
467
481
 
468
482
  private
469
483
 
484
+ def refork_max_unavailable
485
+ @refork_max_unavailable ||= (worker_processes * 0.1).ceil
486
+ end
487
+
470
488
  # wait for a signal handler to wake us up and then consume the pipe
471
- def master_sleep(sec)
489
+ def monitor_sleep(sec)
472
490
  @control_socket[0].wait(sec) or return
473
491
  case message = @control_socket[0].recvmsg_nonblock(exception: false)
474
492
  when :wait_readable, NOOP
@@ -478,9 +496,9 @@ module Pitchfork
478
496
  end
479
497
  end
480
498
 
481
- def awaken_master
482
- return if $$ != @master_pid
483
- @control_socket[1].sendmsg_nonblock(NOOP, exception: false) # wakeup master process from select
499
+ def awaken_monitor
500
+ return if $$ != @monitor_pid
501
+ @control_socket[1].sendmsg_nonblock(NOOP, exception: false) # wakeup monitor process from select
484
502
  end
485
503
 
486
504
  # reaps all unreaped workers
@@ -490,6 +508,9 @@ module Pitchfork
490
508
  wpid or return
491
509
  worker = @children.reap(wpid) and worker.close rescue nil
492
510
  if worker
511
+ unless worker.ready?
512
+ @consecutive_spawn_errors += 1
513
+ end
493
514
  @after_worker_exit.call(self, worker, status)
494
515
  else
495
516
  logger.info("reaped unknown subprocess #{status.inspect}")
@@ -548,11 +569,8 @@ module Pitchfork
548
569
  end
549
570
  end
550
571
 
551
- if child.mold?
552
- logger.error "mold pid=#{child.pid} gen=#{child.generation} timed out, killing"
553
- else
554
- logger.error "worker=#{child.nr} pid=#{child.pid} gen=#{child.generation} timed out, killing"
555
- end
572
+ logger.error "#{child.to_log} timed out, killing"
573
+ @consecutive_spawn_errors += 1 unless child.ready?
556
574
  @children.hard_kill(@timeout_signal.call(child.pid), child) # take no prisoners for hard timeout violations
557
575
  end
558
576
 
@@ -572,7 +590,7 @@ module Pitchfork
572
590
 
573
591
  def after_fork_internal
574
592
  @promotion_lock.at_fork
575
- @control_socket[0].close_write # this is master-only, now
593
+ @control_socket[0].close_write # this is monitor-only, now
576
594
  @ready_pipe.close if @ready_pipe
577
595
  Pitchfork::Configurator::RACKUP.clear
578
596
  @ready_pipe = @init_listeners = nil
@@ -583,12 +601,13 @@ module Pitchfork
583
601
  end
584
602
 
585
603
  def spawn_worker(worker, detach:)
586
- logger.info("worker=#{worker.nr} gen=#{worker.generation} spawning...")
604
+ logger.info("#{worker.to_log} spawning...")
587
605
 
588
606
  # We set the deadline before spawning the child so that if for some
589
607
  # reason it gets stuck before reaching the worker loop,
590
608
  # the monitor process will kill it.
591
609
  worker.update_deadline(@spawn_timeout)
610
+
592
611
  @before_fork&.call(self)
593
612
  fork_sibling("spawn_worker") do
594
613
  worker.pid = Process.pid
@@ -616,6 +635,7 @@ module Pitchfork
616
635
  end
617
636
  end
618
637
 
638
+ service.notify_ready(@control_socket[1])
619
639
  proc_name status: "ready"
620
640
 
621
641
  while readers[0]
@@ -646,7 +666,7 @@ module Pitchfork
646
666
  end
647
667
 
648
668
  def spawn_service(service, detach:)
649
- logger.info("service gen=#{service.generation} spawning...")
669
+ logger.info("#{service.to_log} spawning...")
650
670
 
651
671
  # We set the deadline before spawning the child so that if for some
652
672
  # reason it gets stuck before reaching the worker loop,
@@ -667,7 +687,7 @@ module Pitchfork
667
687
  def spawn_initial_mold
668
688
  mold = Worker.new(nil)
669
689
  mold.create_socketpair!
670
- mold.pid = Pitchfork.clean_fork do
690
+ mold.pid = Pitchfork.clean_fork(setpgid: setpgid) do
671
691
  mold.pid = Process.pid
672
692
  @promotion_lock.try_lock
673
693
  mold.after_fork_in_child
@@ -712,7 +732,7 @@ module Pitchfork
712
732
  spawn_worker(worker, detach: false)
713
733
  end
714
734
  # We could directly register workers when we spawn from the
715
- # master, like pitchfork does. However it is preferable to
735
+ # monitor, like pitchfork does. However it is preferable to
716
736
  # always go through the asynchronous registering process for
717
737
  # consistency.
718
738
  @children.register(worker)
@@ -724,7 +744,7 @@ module Pitchfork
724
744
 
725
745
  def wait_for_pending_workers
726
746
  while @children.pending_workers?
727
- master_sleep(0.5)
747
+ monitor_sleep(0.5)
728
748
  if monitor_loop(false) == StopIteration
729
749
  return StopIteration
730
750
  end
@@ -753,15 +773,14 @@ module Pitchfork
753
773
  # We don't shutdown any outdated worker if any worker is already being
754
774
  # spawned or a worker is exiting. Only 10% of workers can be reforked at
755
775
  # once to minimize the impact on capacity.
756
- max_pending_workers = (worker_processes * 0.1).ceil
757
- workers_to_restart = max_pending_workers - @children.restarting_workers_count
776
+ workers_to_restart = refork_max_unavailable - @children.restarting_workers_count
758
777
 
759
778
  if service = @children.service
760
779
  if service.outdated?
761
780
  if service.soft_kill(:TERM)
762
- logger.info("Sent SIGTERM to service pid=#{service.pid} gen=#{service.generation}")
781
+ logger.info("Sent SIGTERM to #{service.to_log}")
763
782
  else
764
- logger.info("Failed to send SIGTERM to service pid=#{service.pid} gen=#{service.generation}")
783
+ logger.info("Failed to send SIGTERM to #{service.to_log}")
765
784
  end
766
785
  end
767
786
  end
@@ -770,10 +789,10 @@ module Pitchfork
770
789
  outdated_workers = @children.workers.select { |w| !w.exiting? && w.generation < @children.mold.generation }
771
790
  outdated_workers.each do |worker|
772
791
  if worker.soft_kill(:TERM)
773
- logger.info("Sent SIGTERM to worker=#{worker.nr} pid=#{worker.pid} gen=#{worker.generation}")
792
+ logger.info("Sent SIGTERM to #{worker.to_log}")
774
793
  workers_to_restart -= 1
775
794
  else
776
- logger.info("Failed to send SIGTERM to worker=#{worker.nr} pid=#{worker.pid} gen=#{worker.generation}")
795
+ logger.info("Failed to send SIGTERM to #{worker.to_log}")
777
796
  end
778
797
  break if workers_to_restart <= 0
779
798
  end
@@ -874,7 +893,7 @@ module Pitchfork
874
893
  env
875
894
  ensure
876
895
  if env
877
- env["rack.response_finished"].each do |callback|
896
+ env["rack.response_finished"].reverse_each do |callback|
878
897
  if callback.arity == 0
879
898
  callback.call
880
899
  else
@@ -885,7 +904,6 @@ module Pitchfork
885
904
  end
886
905
  end
887
906
  timeout_handler.finished
888
- env
889
907
  end
890
908
 
891
909
  def nuke_listeners!(readers)
@@ -895,14 +913,18 @@ module Pitchfork
895
913
  tmp.each { |io| io.close rescue nil } # break out of IO.select
896
914
  end
897
915
 
916
+ def reset_signal_handlers
917
+ [:QUIT, :TERM, :INT].each { |sig| trap(sig) { exit!(0) } }
918
+ end
919
+
898
920
  # gets rid of stuff the worker has no business keeping track of
899
921
  # to free some resources and drops all sig handlers.
900
- # traps for USR2, and HUP may be set in the after_fork Proc
922
+ # traps for USR2, and HUP may be set in the after_worker_fork/after_mold_fork Procs
901
923
  # by the user.
902
924
  def init_worker_process(worker)
903
925
  proc_name role: "(gen:#{worker.generation}) worker[#{worker.nr}]", status: "init"
904
926
  worker.reset
905
- worker.register_to_master(@control_socket[1])
927
+ worker.register_to_monitor(@control_socket[1])
906
928
  # we'll re-trap :QUIT and :TERM later for graceful shutdown iff we accept clients
907
929
  exit_sigs = [ :QUIT, :TERM, :INT ]
908
930
  exit_sigs.each { |sig| trap(sig) { exit!(0) } }
@@ -926,14 +948,13 @@ module Pitchfork
926
948
  end
927
949
 
928
950
  def init_service_process(service)
929
- proc_name role: "(gen:#{service.generation}) mold", status: "init"
930
- LISTENERS.each(&:close) # Don't appear as listening to incoming requests
931
- service.register_to_master(@control_socket[1])
951
+ proc_name role: "(gen:#{service.generation}) service", status: "init"
952
+ LISTENERS.each(&:close).clear # Don't appear as listening to incoming requests
953
+ service.register_to_monitor(@control_socket[1])
932
954
  readers = [service]
933
955
  trap(:QUIT) { nuke_listeners!(readers) }
934
956
  trap(:TERM) { nuke_listeners!(readers) }
935
957
  trap(:INT) { nuke_listeners!(readers); exit!(0) }
936
- proc_name role: "(gen:#{service.generation}) service", status: "ready"
937
958
  readers
938
959
  end
939
960
 
@@ -944,7 +965,6 @@ module Pitchfork
944
965
  trap(:QUIT) { nuke_listeners!(readers) }
945
966
  trap(:TERM) { nuke_listeners!(readers) }
946
967
  trap(:INT) { nuke_listeners!(readers); exit!(0) }
947
- proc_name role: "(gen:#{mold.generation}) mold", status: "ready"
948
968
  readers
949
969
  end
950
970
 
@@ -969,6 +989,7 @@ module Pitchfork
969
989
  ready = readers.dup
970
990
  @after_worker_ready.call(self, worker)
971
991
 
992
+ worker.notify_ready(@control_socket[1])
972
993
  proc_name status: "ready"
973
994
 
974
995
  while readers[0]
@@ -986,7 +1007,7 @@ module Pitchfork
986
1007
  if Info.fork_safe?
987
1008
  spawn_mold(worker)
988
1009
  else
989
- logger.error("worker=#{worker.nr} gen=#{worker.generation} is no longer fork safe, can't refork")
1010
+ logger.error("#{worker.to_log} is no longer fork safe, can't refork")
990
1011
  end
991
1012
  when Message
992
1013
  worker.update(client)
@@ -1006,7 +1027,7 @@ module Pitchfork
1006
1027
  if @refork_condition.met?(worker, logger)
1007
1028
  proc_name status: "requests: #{worker.requests_count}, spawning mold"
1008
1029
  if spawn_mold(worker)
1009
- logger.info("worker=#{worker.nr} gen=#{worker.generation} Refork condition met, promoting ourselves")
1030
+ logger.info("#{worker.to_log} refork condition met, promoting ourselves")
1010
1031
  end
1011
1032
  @refork_condition.backoff!
1012
1033
  end
@@ -1052,6 +1073,8 @@ module Pitchfork
1052
1073
  ready = readers.dup
1053
1074
 
1054
1075
  mold.finish_promotion(@control_socket[1])
1076
+ mold.ready = true
1077
+ proc_name status: "ready"
1055
1078
 
1056
1079
  while readers[0]
1057
1080
  begin
@@ -1069,11 +1092,11 @@ module Pitchfork
1069
1092
  spawn_worker(Worker.new(message.nr, generation: mold.generation), detach: true)
1070
1093
  rescue ForkFailure
1071
1094
  if retries > 0
1072
- @logger.fatal("mold pid=#{mold.pid} gen=#{mold.generation} Failed to spawn a worker. Retrying.")
1095
+ @logger.fatal("#{mold.to_log} failed to spawn a worker, retrying")
1073
1096
  retries -= 1
1074
1097
  retry
1075
1098
  else
1076
- @logger.fatal("mold pid=#{mold.pid} gen=#{mold.generation} Failed to spawn a worker twice in a row. Corrupted mold process?")
1099
+ @logger.fatal("#{mold.to_log} failed to spawn a worker twice in a row - corrupted mold process?")
1077
1100
  Process.exit(1)
1078
1101
  end
1079
1102
  rescue => error
@@ -1085,11 +1108,11 @@ module Pitchfork
1085
1108
  spawn_service(Service.new(generation: mold.generation), detach: true)
1086
1109
  rescue ForkFailure
1087
1110
  if retries > 0
1088
- @logger.fatal("mold pid=#{mold.pid} gen=#{mold.generation} Failed to spawn a service. Retrying.")
1111
+ @logger.fatal("#{mold.to_log} failed to spawn a service, retrying")
1089
1112
  retries -= 1
1090
1113
  retry
1091
1114
  else
1092
- @logger.fatal("mold pid=#{mold.pid} gen=#{mold.generation} Failed to spawn a service twice in a row. Corrupted mold process?")
1115
+ @logger.fatal("#{mold.to_log} failed to spawn a service twice in a row - corrupted mold process?")
1093
1116
  Process.exit(1)
1094
1117
  end
1095
1118
  rescue => error
@@ -1166,9 +1189,9 @@ module Pitchfork
1166
1189
  if REFORKING_AVAILABLE
1167
1190
  r, w = Pitchfork::Info.keep_ios(IO.pipe)
1168
1191
  # We double fork so that the new worker is re-attached back
1169
- # to the master.
1192
+ # to the monitor.
1170
1193
  # This requires either PR_SET_CHILD_SUBREAPER which is exclusive to Linux 3.4
1171
- # or the master to be PID 1.
1194
+ # or the monitor to be PID 1.
1172
1195
  if middle_pid = FORK_LOCK.synchronize { Process.fork } # parent
1173
1196
  w.close
1174
1197
  # We need to wait(2) so that the middle process doesn't end up a zombie.
@@ -1186,9 +1209,11 @@ module Pitchfork
1186
1209
  raise ForkFailure, "fork_sibling didn't succeed in #{FORK_TIMEOUT} seconds"
1187
1210
  end
1188
1211
  else # first child
1212
+ reset_signal_handlers
1213
+
1189
1214
  r.close
1190
1215
  Process.setproctitle("<pitchfork fork_sibling(#{role})>")
1191
- pid = Pitchfork.clean_fork do
1216
+ pid = Pitchfork.clean_fork(setpgid: setpgid) do
1192
1217
  # detach into a grand child
1193
1218
  w.close
1194
1219
  yield
@@ -1203,7 +1228,7 @@ module Pitchfork
1203
1228
  exit!
1204
1229
  end
1205
1230
  else
1206
- Pitchfork.clean_fork(&block)
1231
+ Pitchfork.clean_fork(setpgid: setpgid, &block)
1207
1232
  end
1208
1233
  end
1209
1234
 
@@ -98,13 +98,14 @@ module Pitchfork
98
98
  def live_workers_count
99
99
  now = Pitchfork.time_now(true)
100
100
  (0...workers_count).count do |nr|
101
- SharedMemory.worker_deadline(nr).value > now
101
+ state = SharedMemory.worker_state(nr)
102
+ state.ready? && state.deadline > now
102
103
  end
103
104
  end
104
105
 
105
106
  # Returns true if the server is shutting down.
106
107
  # This can be useful to implement health check endpoints, so they
107
- # can fail immediately after TERM/QUIT/INT was received by the master
108
+ # can fail immediately after TERM/QUIT/INT was received by the monitor
108
109
  # process.
109
110
  # Otherwise they may succeed while Pitchfork is draining requests causing
110
111
  # more requests to be sent.
@@ -124,6 +124,7 @@ module Pitchfork
124
124
  class Message
125
125
  SpawnWorker = new(:nr)
126
126
  WorkerSpawned = new(:nr, :pid, :generation, :pipe)
127
+ WorkerReady = new(:nr, :pid, :generation)
127
128
  PromoteWorker = new(:generation)
128
129
 
129
130
  MoldSpawned = new(:nr, :pid, :generation, :pipe)
@@ -131,6 +132,7 @@ module Pitchfork
131
132
 
132
133
  SpawnService = new(:_) # Struct.new requires at least 1 member on Ruby < 3.3
133
134
  ServiceSpawned = new(:pid, :generation, :pipe)
135
+ ServiceReady = new(:pid, :generation)
134
136
 
135
137
  SoftKill = new(:signum)
136
138
  end
@@ -19,7 +19,7 @@ module Pitchfork
19
19
  if worker.requests_count >= limit
20
20
  return false if backoff?
21
21
 
22
- logger.info("worker=#{worker.nr} pid=#{worker.pid} processed #{worker.requests_count} requests, triggering a refork")
22
+ logger.info("#{worker.to_log} processed #{worker.requests_count} requests, triggering a refork")
23
23
  return true
24
24
  end
25
25
  end