einhorn 0.7.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Changes.md +10 -0
- data/README.md +36 -30
- data/bin/einhorn +17 -2
- data/einhorn.gemspec +23 -21
- data/example/pool_worker.rb +1 -1
- data/example/thin_example +8 -8
- data/example/time_server +5 -5
- data/lib/einhorn/client.rb +8 -9
- data/lib/einhorn/command/interface.rb +100 -95
- data/lib/einhorn/command.rb +167 -88
- data/lib/einhorn/compat.rb +7 -7
- data/lib/einhorn/event/abstract_text_descriptor.rb +31 -35
- data/lib/einhorn/event/ack_timer.rb +2 -2
- data/lib/einhorn/event/command_server.rb +7 -9
- data/lib/einhorn/event/connection.rb +1 -3
- data/lib/einhorn/event/loop_breaker.rb +2 -1
- data/lib/einhorn/event/persistent.rb +2 -2
- data/lib/einhorn/event/timer.rb +4 -4
- data/lib/einhorn/event.rb +29 -20
- data/lib/einhorn/prctl.rb +26 -0
- data/lib/einhorn/prctl_linux.rb +48 -0
- data/lib/einhorn/safe_yaml.rb +17 -0
- data/lib/einhorn/version.rb +1 -1
- data/lib/einhorn/worker.rb +67 -49
- data/lib/einhorn/worker_pool.rb +9 -9
- data/lib/einhorn.rb +155 -126
- metadata +42 -137
- data/.gitignore +0 -17
- data/.travis.yml +0 -10
- data/CONTRIBUTORS +0 -6
- data/Gemfile +0 -11
- data/History.txt +0 -4
- data/README.md.in +0 -76
- data/Rakefile +0 -27
- data/test/_lib.rb +0 -12
- data/test/integration/_lib/fixtures/env_printer/env_printer.rb +0 -26
- data/test/integration/_lib/fixtures/exit_during_upgrade/exiting_server.rb +0 -22
- data/test/integration/_lib/fixtures/exit_during_upgrade/upgrade_reexec.rb +0 -6
- data/test/integration/_lib/fixtures/upgrade_project/upgrading_server.rb +0 -22
- data/test/integration/_lib/helpers/einhorn_helpers.rb +0 -143
- data/test/integration/_lib/helpers.rb +0 -4
- data/test/integration/_lib.rb +0 -6
- data/test/integration/startup.rb +0 -31
- data/test/integration/upgrading.rb +0 -157
- data/test/unit/einhorn/client.rb +0 -88
- data/test/unit/einhorn/command/interface.rb +0 -49
- data/test/unit/einhorn/command.rb +0 -21
- data/test/unit/einhorn/event.rb +0 -89
- data/test/unit/einhorn/worker_pool.rb +0 -39
- data/test/unit/einhorn.rb +0 -58
- /data/{LICENSE → LICENSE.txt} +0 -0
    
        data/lib/einhorn/command.rb
    CHANGED
    
    | @@ -1,28 +1,25 @@ | |
| 1 | 
            -
            require  | 
| 2 | 
            -
            require  | 
| 3 | 
            -
            require  | 
| 1 | 
            +
            require "pp"
         | 
| 2 | 
            +
            require "set"
         | 
| 3 | 
            +
            require "tmpdir"
         | 
| 4 4 |  | 
| 5 | 
            -
            require  | 
| 5 | 
            +
            require "einhorn/command/interface"
         | 
| 6 | 
            +
            require "einhorn/prctl"
         | 
| 6 7 |  | 
| 7 8 | 
             
            module Einhorn
         | 
| 8 9 | 
             
              module Command
         | 
| 9 10 | 
             
                def self.reap
         | 
| 10 | 
            -
                   | 
| 11 | 
            -
                     | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
                    end
         | 
| 19 | 
            -
                  rescue Errno::ECHILD
         | 
| 20 | 
            -
                  end
         | 
| 11 | 
            +
                  loop do
         | 
| 12 | 
            +
                    Einhorn.log_debug("Going to reap a child process")
         | 
| 13 | 
            +
                    pid = Process.wait(-1, Process::WNOHANG)
         | 
| 14 | 
            +
                    return unless pid
         | 
| 15 | 
            +
                    cleanup(pid)
         | 
| 16 | 
            +
                    Einhorn::Event.break_loop
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                rescue Errno::ECHILD
         | 
| 21 19 | 
             
                end
         | 
| 22 20 |  | 
| 23 | 
            -
                 | 
| 24 | 
            -
             | 
| 25 | 
            -
                  unless spec = Einhorn::State.children[pid]
         | 
| 21 | 
            +
                def self.cleanup(pid)
         | 
| 22 | 
            +
                  unless (spec = Einhorn::State.children[pid])
         | 
| 26 23 | 
             
                    Einhorn.log_error("Could not find any config for exited child #{pid.inspect}! This probably indicates a bug in Einhorn.")
         | 
| 27 24 | 
             
                    return
         | 
| 28 25 | 
             
                  end
         | 
| @@ -32,7 +29,7 @@ module Einhorn | |
| 32 29 | 
             
                  # Unacked worker
         | 
| 33 30 | 
             
                  if spec[:type] == :worker && !spec[:acked]
         | 
| 34 31 | 
             
                    Einhorn::State.consecutive_deaths_before_ack += 1
         | 
| 35 | 
            -
                    extra =  | 
| 32 | 
            +
                    extra = " before it was ACKed"
         | 
| 36 33 | 
             
                  else
         | 
| 37 34 | 
             
                    extra = nil
         | 
| 38 35 | 
             
                  end
         | 
| @@ -47,6 +44,16 @@ module Einhorn | |
| 47 44 | 
             
                  end
         | 
| 48 45 | 
             
                end
         | 
| 49 46 |  | 
| 47 | 
            +
                def self.register_ping(pid, request_id)
         | 
| 48 | 
            +
                  unless (spec = Einhorn::State.children[pid])
         | 
| 49 | 
            +
                    Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
         | 
| 50 | 
            +
                    return
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  spec[:pinged_at] = Time.now
         | 
| 54 | 
            +
                  spec[:pinged_request_id] = request_id
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
             | 
| 50 57 | 
             
                def self.register_manual_ack(pid)
         | 
| 51 58 | 
             
                  ack_mode = Einhorn::State.ack_mode
         | 
| 52 59 | 
             
                  unless ack_mode[:type] == :manual
         | 
| @@ -75,7 +82,7 @@ module Einhorn | |
| 75 82 | 
             
                end
         | 
| 76 83 |  | 
| 77 84 | 
             
                def self.register_ack(pid)
         | 
| 78 | 
            -
                  unless spec = Einhorn::State.children[pid]
         | 
| 85 | 
            +
                  unless (spec = Einhorn::State.children[pid])
         | 
| 79 86 | 
             
                    Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
         | 
| 80 87 | 
             
                    return
         | 
| 81 88 | 
             
                  end
         | 
| @@ -85,10 +92,8 @@ module Einhorn | |
| 85 92 | 
             
                    return
         | 
| 86 93 | 
             
                  end
         | 
| 87 94 |  | 
| 88 | 
            -
                  if Einhorn::State.consecutive_deaths_before_ack > 0
         | 
| 89 | 
            -
                     | 
| 90 | 
            -
                  else
         | 
| 91 | 
            -
                    extra = nil
         | 
| 95 | 
            +
                  extra = if Einhorn::State.consecutive_deaths_before_ack > 0
         | 
| 96 | 
            +
                    ", breaking the streak of #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked workers dying"
         | 
| 92 97 | 
             
                  end
         | 
| 93 98 | 
             
                  Einhorn::State.consecutive_deaths_before_ack = 0
         | 
| 94 99 |  | 
| @@ -98,14 +103,14 @@ module Einhorn | |
| 98 103 | 
             
                  Einhorn::Event.break_loop
         | 
| 99 104 | 
             
                end
         | 
| 100 105 |  | 
| 101 | 
            -
                def self.signal_all(signal, children=nil, record=true)
         | 
| 106 | 
            +
                def self.signal_all(signal, children = nil, record = true)
         | 
| 102 107 | 
             
                  children ||= Einhorn::WorkerPool.workers
         | 
| 103 | 
            -
             | 
| 104 108 | 
             
                  signaled = {}
         | 
| 109 | 
            +
             | 
| 105 110 | 
             
                  Einhorn.log_info("Sending #{signal} to #{children.inspect}", :upgrade)
         | 
| 106 111 |  | 
| 107 112 | 
             
                  children.each do |child|
         | 
| 108 | 
            -
                    unless spec = Einhorn::State.children[child]
         | 
| 113 | 
            +
                    unless (spec = Einhorn::State.children[child])
         | 
| 109 114 | 
             
                      Einhorn.log_error("Trying to send #{signal} to dead child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.", :upgrade)
         | 
| 110 115 | 
             
                      next
         | 
| 111 116 | 
             
                    end
         | 
| @@ -115,11 +120,13 @@ module Einhorn | |
| 115 120 | 
             
                        Einhorn.log_error("Re-sending #{signal} to already-signaled child #{child.inspect}. It may be slow to spin down, or it may be swallowing #{signal}s.", :upgrade)
         | 
| 116 121 | 
             
                      end
         | 
| 117 122 | 
             
                      spec[:signaled].add(signal)
         | 
| 123 | 
            +
                      spec[:last_signaled_at] = Time.now
         | 
| 118 124 | 
             
                    end
         | 
| 119 125 |  | 
| 120 126 | 
             
                    begin
         | 
| 121 127 | 
             
                      Process.kill(signal, child)
         | 
| 122 128 | 
             
                    rescue Errno::ESRCH
         | 
| 129 | 
            +
                      Einhorn.log_debug("Attempted to #{signal} child #{child.inspect} but the process does not exist", :upgrade)
         | 
| 123 130 | 
             
                    else
         | 
| 124 131 | 
             
                      signaled[child] = spec
         | 
| 125 132 | 
             
                    end
         | 
| @@ -129,7 +136,7 @@ module Einhorn | |
| 129 136 | 
             
                    Einhorn::Event::Timer.open(Einhorn::State.signal_timeout) do
         | 
| 130 137 | 
             
                      children.each do |child|
         | 
| 131 138 | 
             
                        spec = Einhorn::State.children[child]
         | 
| 132 | 
            -
                        next unless spec # Process is already dead and removed by  | 
| 139 | 
            +
                        next unless spec # Process is already dead and removed by cleanup
         | 
| 133 140 | 
             
                        signaled_spec = signaled[child]
         | 
| 134 141 | 
             
                        next unless signaled_spec # We got ESRCH when trying to signal
         | 
| 135 142 | 
             
                        if spec[:spinup_time] != signaled_spec[:spinup_time]
         | 
| @@ -139,15 +146,15 @@ module Einhorn | |
| 139 146 |  | 
| 140 147 | 
             
                        Einhorn.log_info("Child #{child.inspect} is still active after #{Einhorn::State.signal_timeout}s. Sending SIGKILL.")
         | 
| 141 148 | 
             
                        begin
         | 
| 142 | 
            -
                          Process.kill( | 
| 149 | 
            +
                          Process.kill("KILL", child)
         | 
| 143 150 | 
             
                        rescue Errno::ESRCH
         | 
| 144 151 | 
             
                        end
         | 
| 145 | 
            -
                        spec[:signaled].add( | 
| 152 | 
            +
                        spec[:signaled].add("KILL")
         | 
| 146 153 | 
             
                      end
         | 
| 147 154 | 
             
                    end
         | 
| 148 | 
            -
                  end
         | 
| 149 155 |  | 
| 150 | 
            -
             | 
| 156 | 
            +
                    Einhorn.log_info("Successfully sent #{signal}s to #{signaled.length} processes: #{signaled.keys}")
         | 
| 157 | 
            +
                  end
         | 
| 151 158 | 
             
                end
         | 
| 152 159 |  | 
| 153 160 | 
             
                def self.increment
         | 
| @@ -155,14 +162,14 @@ module Einhorn | |
| 155 162 | 
             
                  old = Einhorn::State.config[:number]
         | 
| 156 163 | 
             
                  new = (Einhorn::State.config[:number] += 1)
         | 
| 157 164 | 
             
                  output = "Incrementing number of workers from #{old} -> #{new}"
         | 
| 158 | 
            -
                   | 
| 165 | 
            +
                  warn(output)
         | 
| 159 166 | 
             
                  output
         | 
| 160 167 | 
             
                end
         | 
| 161 168 |  | 
| 162 169 | 
             
                def self.decrement
         | 
| 163 170 | 
             
                  if Einhorn::State.config[:number] <= 1
         | 
| 164 171 | 
             
                    output = "Can't decrease number of workers (already at #{Einhorn::State.config[:number]}).  Run kill #{$$} if you really want to kill einhorn."
         | 
| 165 | 
            -
                     | 
| 172 | 
            +
                    warn(output)
         | 
| 166 173 | 
             
                    return output
         | 
| 167 174 | 
             
                  end
         | 
| 168 175 |  | 
| @@ -170,7 +177,7 @@ module Einhorn | |
| 170 177 | 
             
                  old = Einhorn::State.config[:number]
         | 
| 171 178 | 
             
                  new = (Einhorn::State.config[:number] -= 1)
         | 
| 172 179 | 
             
                  output = "Decrementing number of workers from #{old} -> #{new}"
         | 
| 173 | 
            -
                   | 
| 180 | 
            +
                  warn(output)
         | 
| 174 181 | 
             
                  output
         | 
| 175 182 | 
             
                end
         | 
| 176 183 |  | 
| @@ -183,12 +190,12 @@ module Einhorn | |
| 183 190 | 
             
                  old = Einhorn::State.config[:number]
         | 
| 184 191 | 
             
                  Einhorn::State.config[:number] = new
         | 
| 185 192 | 
             
                  output = "Altering worker count, #{old} -> #{new}. Will "
         | 
| 186 | 
            -
                  if old < new
         | 
| 187 | 
            -
                     | 
| 193 | 
            +
                  output << if old < new
         | 
| 194 | 
            +
                    "spin up additional workers."
         | 
| 188 195 | 
             
                  else
         | 
| 189 | 
            -
                     | 
| 196 | 
            +
                    "gracefully terminate workers."
         | 
| 190 197 | 
             
                  end
         | 
| 191 | 
            -
                   | 
| 198 | 
            +
                  warn(output)
         | 
| 192 199 | 
             
                  output
         | 
| 193 200 | 
             
                end
         | 
| 194 201 |  | 
| @@ -199,8 +206,8 @@ module Einhorn | |
| 199 206 | 
             
                  end
         | 
| 200 207 |  | 
| 201 208 | 
             
                  {
         | 
| 202 | 
            -
                    : | 
| 203 | 
            -
                    : | 
| 209 | 
            +
                    state: global_state,
         | 
| 210 | 
            +
                    persistent_descriptors: descriptor_state
         | 
| 204 211 | 
             
                  }
         | 
| 205 212 | 
             
                end
         | 
| 206 213 |  | 
| @@ -245,8 +252,8 @@ module Einhorn | |
| 245 252 |  | 
| 246 253 | 
             
                  begin
         | 
| 247 254 | 
             
                    Einhorn.initialize_reload_environment
         | 
| 248 | 
            -
                    respawn_commandline = Einhorn.upgrade_commandline([ | 
| 249 | 
            -
                    respawn_commandline << { | 
| 255 | 
            +
                    respawn_commandline = Einhorn.upgrade_commandline(["--with-state-fd", read.fileno.to_s])
         | 
| 256 | 
            +
                    respawn_commandline << {close_others: false}
         | 
| 250 257 | 
             
                    Einhorn.log_info("About to re-exec einhorn master as #{respawn_commandline.inspect}", :reload)
         | 
| 251 258 | 
             
                    Einhorn::Compat.exec(*respawn_commandline)
         | 
| 252 259 | 
             
                  rescue SystemCallError => e
         | 
| @@ -263,30 +270,34 @@ module Einhorn | |
| 263 270 | 
             
                  end
         | 
| 264 271 | 
             
                end
         | 
| 265 272 |  | 
| 266 | 
            -
                def self.spinup(cmd=nil)
         | 
| 273 | 
            +
                def self.spinup(cmd = nil)
         | 
| 267 274 | 
             
                  cmd ||= Einhorn::State.cmd
         | 
| 268 275 | 
             
                  index = next_index
         | 
| 269 | 
            -
                   | 
| 270 | 
            -
             | 
| 276 | 
            +
                  expected_ppid = Process.pid
         | 
| 277 | 
            +
                  pid = if Einhorn::State.preloaded
         | 
| 278 | 
            +
                    fork do
         | 
| 271 279 | 
             
                      Einhorn::TransientState.whatami = :worker
         | 
| 272 280 | 
             
                      prepare_child_process
         | 
| 273 281 |  | 
| 274 | 
            -
                      Einhorn.log_info( | 
| 282 | 
            +
                      Einhorn.log_info("About to tear down Einhorn state and run einhorn_main")
         | 
| 275 283 | 
             
                      Einhorn::Command::Interface.uninit
         | 
| 276 284 | 
             
                      Einhorn::Event.close_all_for_worker
         | 
| 277 285 | 
             
                      Einhorn.set_argv(cmd, true)
         | 
| 278 286 |  | 
| 279 287 | 
             
                      reseed_random
         | 
| 280 288 |  | 
| 289 | 
            +
                      setup_parent_watch(expected_ppid)
         | 
| 290 | 
            +
             | 
| 281 291 | 
             
                      prepare_child_environment(index)
         | 
| 282 292 | 
             
                      einhorn_main
         | 
| 283 293 | 
             
                    end
         | 
| 284 294 | 
             
                  else
         | 
| 285 | 
            -
                     | 
| 295 | 
            +
                    fork do
         | 
| 286 296 | 
             
                      Einhorn::TransientState.whatami = :worker
         | 
| 287 297 | 
             
                      prepare_child_process
         | 
| 288 298 |  | 
| 289 299 | 
             
                      Einhorn.log_info("About to exec #{cmd.inspect}")
         | 
| 300 | 
            +
                      Einhorn::Command::Interface.uninit
         | 
| 290 301 | 
             
                      # Here's the only case where cloexec would help. Since we
         | 
| 291 302 | 
             
                      # have to track and manually close FDs for other cases, we
         | 
| 292 303 | 
             
                      # may as well just reuse close_all rather than also set
         | 
| @@ -295,20 +306,23 @@ module Einhorn | |
| 295 306 | 
             
                      # Note that Ruby 1.9's close_others option is useful here.
         | 
| 296 307 | 
             
                      Einhorn::Event.close_all_for_worker
         | 
| 297 308 |  | 
| 309 | 
            +
                      setup_parent_watch(expected_ppid)
         | 
| 310 | 
            +
             | 
| 298 311 | 
             
                      prepare_child_environment(index)
         | 
| 299 | 
            -
                      Einhorn::Compat.exec(cmd[0], cmd[1..-1], : | 
| 312 | 
            +
                      Einhorn::Compat.exec(cmd[0], cmd[1..-1], close_others: false)
         | 
| 300 313 | 
             
                    end
         | 
| 301 314 | 
             
                  end
         | 
| 302 315 |  | 
| 303 316 | 
             
                  Einhorn.log_info("===> Launched #{pid} (index: #{index})", :upgrade)
         | 
| 304 317 | 
             
                  Einhorn::State.last_spinup = Time.now
         | 
| 305 318 | 
             
                  Einhorn::State.children[pid] = {
         | 
| 306 | 
            -
                    : | 
| 307 | 
            -
                    : | 
| 308 | 
            -
                    : | 
| 309 | 
            -
                    : | 
| 310 | 
            -
                    : | 
| 311 | 
            -
                    : | 
| 319 | 
            +
                    type: :worker,
         | 
| 320 | 
            +
                    version: Einhorn::State.version,
         | 
| 321 | 
            +
                    acked: false,
         | 
| 322 | 
            +
                    signaled: Set.new,
         | 
| 323 | 
            +
                    last_signaled_at: nil,
         | 
| 324 | 
            +
                    index: index,
         | 
| 325 | 
            +
                    spinup_time: Einhorn::State.last_spinup
         | 
| 312 326 | 
             
                  }
         | 
| 313 327 |  | 
| 314 328 | 
             
                  # Set up whatever's needed for ACKing
         | 
| @@ -317,6 +331,7 @@ module Einhorn | |
| 317 331 | 
             
                  when :timer
         | 
| 318 332 | 
             
                    Einhorn::Event::ACKTimer.open(ack_mode[:timeout], pid)
         | 
| 319 333 | 
             
                  when :manual
         | 
| 334 | 
            +
                    # nothing to do
         | 
| 320 335 | 
             
                  else
         | 
| 321 336 | 
             
                    Einhorn.log_error("Unrecognized ACK mode #{type.inspect}")
         | 
| 322 337 | 
             
                  end
         | 
| @@ -324,24 +339,18 @@ module Einhorn | |
| 324 339 |  | 
| 325 340 | 
             
                def self.prepare_child_environment(index)
         | 
| 326 341 | 
             
                  # This is run from the child
         | 
| 327 | 
            -
                  ENV[ | 
| 328 | 
            -
                  ENV[ | 
| 342 | 
            +
                  ENV["EINHORN_MASTER_PID"] = Process.ppid.to_s
         | 
| 343 | 
            +
                  ENV["EINHORN_SOCK_PATH"] = Einhorn::Command::Interface.socket_path
         | 
| 329 344 | 
             
                  if Einhorn::State.command_socket_as_fd
         | 
| 330 345 | 
             
                    socket = UNIXSocket.open(Einhorn::Command::Interface.socket_path)
         | 
| 331 346 | 
             
                    Einhorn::TransientState.socket_handles << socket
         | 
| 332 | 
            -
                    ENV[ | 
| 347 | 
            +
                    ENV["EINHORN_SOCK_FD"] = socket.fileno.to_s
         | 
| 333 348 | 
             
                  end
         | 
| 334 349 |  | 
| 335 | 
            -
                  ENV[ | 
| 336 | 
            -
                  Einhorn::State.bind_fds.each_with_index {|fd, i| ENV["EINHORN_FD_#{i}"] = fd.to_s}
         | 
| 337 | 
            -
             | 
| 338 | 
            -
                  ENV['EINHORN_CHILD_INDEX'] = index.to_s
         | 
| 350 | 
            +
                  ENV["EINHORN_FD_COUNT"] = Einhorn::State.bind_fds.length.to_s
         | 
| 351 | 
            +
                  Einhorn::State.bind_fds.each_with_index { |fd, i| ENV["EINHORN_FD_#{i}"] = fd.to_s }
         | 
| 339 352 |  | 
| 340 | 
            -
                   | 
| 341 | 
            -
                  # match Upstart's nominal internal support for space-separated
         | 
| 342 | 
            -
                  # FD lists, but nobody uses that in practice, and it makes
         | 
| 343 | 
            -
                  # finding individual FDs more difficult
         | 
| 344 | 
            -
                  ENV['EINHORN_FDS'] = Einhorn::State.bind_fds.map(&:to_s).join(' ')
         | 
| 353 | 
            +
                  ENV["EINHORN_CHILD_INDEX"] = index.to_s
         | 
| 345 354 | 
             
                end
         | 
| 346 355 |  | 
| 347 356 | 
             
                # Reseed common ruby random number generators.
         | 
| @@ -364,11 +373,11 @@ module Einhorn | |
| 364 373 |  | 
| 365 374 | 
             
                  # reseed OpenSSL::Random if it's loaded
         | 
| 366 375 | 
             
                  if defined?(OpenSSL::Random)
         | 
| 367 | 
            -
                    if defined?(Random)
         | 
| 368 | 
            -
                       | 
| 376 | 
            +
                    seed = if defined?(Random)
         | 
| 377 | 
            +
                      Random.new_seed
         | 
| 369 378 | 
             
                    else
         | 
| 370 379 | 
             
                      # Ruby 1.8
         | 
| 371 | 
            -
                       | 
| 380 | 
            +
                      rand
         | 
| 372 381 | 
             
                    end
         | 
| 373 382 | 
             
                    OpenSSL::Random.seed(seed.to_s)
         | 
| 374 383 | 
             
                  end
         | 
| @@ -379,6 +388,24 @@ module Einhorn | |
| 379 388 | 
             
                  Einhorn.renice_self
         | 
| 380 389 | 
             
                end
         | 
| 381 390 |  | 
| 391 | 
            +
                def self.setup_parent_watch(expected_ppid)
         | 
| 392 | 
            +
                  if Einhorn::State.kill_children_on_exit
         | 
| 393 | 
            +
                    begin
         | 
| 394 | 
            +
                      # NB: Having the USR2 signal handler set to terminate (the default) at
         | 
| 395 | 
            +
                      # this point is required. If it's set to a ruby handler, there are
         | 
| 396 | 
            +
                      # race conditions that could cause the worker to leak.
         | 
| 397 | 
            +
             | 
| 398 | 
            +
                      Einhorn::Prctl.set_pdeathsig("USR2")
         | 
| 399 | 
            +
                      if Process.ppid != expected_ppid
         | 
| 400 | 
            +
                        Einhorn.log_error("Parent process died before we set pdeathsig; cowardly refusing to exec child process.")
         | 
| 401 | 
            +
                        exit(1)
         | 
| 402 | 
            +
                      end
         | 
| 403 | 
            +
                    rescue NotImplementedError
         | 
| 404 | 
            +
                      # Unsupported OS; silently continue.
         | 
| 405 | 
            +
                    end
         | 
| 406 | 
            +
                  end
         | 
| 407 | 
            +
                end
         | 
| 408 | 
            +
             | 
| 382 409 | 
             
                # @param options [Hash]
         | 
| 383 410 | 
             
                #
         | 
| 384 411 | 
             
                # @option options [Boolean] :smooth (false) Whether to perform a smooth or
         | 
| @@ -387,18 +414,19 @@ module Einhorn | |
| 387 414 | 
             
                #   upgrade, bring up all the new workers and don't cull any old workers
         | 
| 388 415 | 
             
                #   until they're all up.
         | 
| 389 416 | 
             
                #
         | 
| 390 | 
            -
                def self.full_upgrade(options={})
         | 
| 391 | 
            -
                  options = {: | 
| 417 | 
            +
                def self.full_upgrade(options = {})
         | 
| 418 | 
            +
                  options = {smooth: false}.merge(options)
         | 
| 392 419 |  | 
| 393 420 | 
             
                  Einhorn::State.smooth_upgrade = options.fetch(:smooth)
         | 
| 394 421 | 
             
                  reload_for_upgrade
         | 
| 395 422 | 
             
                end
         | 
| 396 423 |  | 
| 397 424 | 
             
                def self.full_upgrade_smooth
         | 
| 398 | 
            -
                  full_upgrade(: | 
| 425 | 
            +
                  full_upgrade(smooth: true)
         | 
| 399 426 | 
             
                end
         | 
| 427 | 
            +
             | 
| 400 428 | 
             
                def self.full_upgrade_fleet
         | 
| 401 | 
            -
                  full_upgrade(: | 
| 429 | 
            +
                  full_upgrade(smooth: false)
         | 
| 402 430 | 
             
                end
         | 
| 403 431 |  | 
| 404 432 | 
             
                def self.reload_for_upgrade
         | 
| @@ -411,8 +439,8 @@ module Einhorn | |
| 411 439 | 
             
                    Einhorn.log_info("Currently upgrading (#{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} ACKs; bumping version and starting over)...", :upgrade)
         | 
| 412 440 | 
             
                  else
         | 
| 413 441 | 
             
                    Einhorn::State.upgrading = true
         | 
| 414 | 
            -
                    u_type = Einhorn::State.smooth_upgrade ?  | 
| 415 | 
            -
                    Einhorn.log_info("Starting #{u_type} upgrade from version"  | 
| 442 | 
            +
                    u_type = Einhorn::State.smooth_upgrade ? "smooth" : "fleet"
         | 
| 443 | 
            +
                    Einhorn.log_info("Starting #{u_type} upgrade from version" \
         | 
| 416 444 | 
             
                                     " #{Einhorn::State.version}...", :upgrade)
         | 
| 417 445 | 
             
                  end
         | 
| 418 446 |  | 
| @@ -459,10 +487,45 @@ module Einhorn | |
| 459 487 | 
             
                  end
         | 
| 460 488 |  | 
| 461 489 | 
             
                  if unsignaled > target
         | 
| 462 | 
            -
                    excess = Einhorn::WorkerPool.unsignaled_modern_workers_with_priority[0...(unsignaled-target)]
         | 
| 490 | 
            +
                    excess = Einhorn::WorkerPool.unsignaled_modern_workers_with_priority[0...(unsignaled - target)]
         | 
| 463 491 | 
             
                    Einhorn.log_info("Have too many workers at the current version, so killing off #{excess.length} of them.")
         | 
| 464 492 | 
             
                    signal_all("USR2", excess)
         | 
| 465 493 | 
             
                  end
         | 
| 494 | 
            +
             | 
| 495 | 
            +
                  # Ensure all signaled workers that have outlived signal_timeout get killed.
         | 
| 496 | 
            +
                  kill_expired_signaled_workers if Einhorn::State.signal_timeout
         | 
| 497 | 
            +
                end
         | 
| 498 | 
            +
             | 
| 499 | 
            +
                def self.kill_expired_signaled_workers
         | 
| 500 | 
            +
                  now = Time.now
         | 
| 501 | 
            +
                  children = Einhorn::State.children.select do |_, c|
         | 
| 502 | 
            +
                    # Only interested in USR2 signaled workers
         | 
| 503 | 
            +
                    next unless c[:signaled] && c[:signaled].length > 0
         | 
| 504 | 
            +
                    next unless c[:signaled].include?("USR2")
         | 
| 505 | 
            +
             | 
| 506 | 
            +
                    # Ignore processes that have received KILL since it can't be trapped.
         | 
| 507 | 
            +
                    next if c[:signaled].include?("KILL")
         | 
| 508 | 
            +
             | 
| 509 | 
            +
                    # Filter out those children that have not reached signal_timeout yet.
         | 
| 510 | 
            +
                    next unless c[:last_signaled_at]
         | 
| 511 | 
            +
                    expires_at = c[:last_signaled_at] + Einhorn::State.signal_timeout
         | 
| 512 | 
            +
                    next unless now >= expires_at
         | 
| 513 | 
            +
             | 
| 514 | 
            +
                    true
         | 
| 515 | 
            +
                  end
         | 
| 516 | 
            +
             | 
| 517 | 
            +
                  Einhorn.log_info("#{children.size} expired signaled workers found.") if children.size > 0
         | 
| 518 | 
            +
                  children.each do |pid, child|
         | 
| 519 | 
            +
                    Einhorn.log_info("Child #{pid.inspect} was signaled #{(child[:last_signaled_at] - now).abs.to_i}s ago. Sending SIGKILL as it is still active after #{Einhorn::State.signal_timeout}s timeout.", :upgrade)
         | 
| 520 | 
            +
                    begin
         | 
| 521 | 
            +
                      Process.kill("KILL", pid)
         | 
| 522 | 
            +
                    rescue Errno::ESRCH
         | 
| 523 | 
            +
                      Einhorn.log_debug("Attempted to SIGKILL child #{pid.inspect} but the process does not exist.")
         | 
| 524 | 
            +
                    end
         | 
| 525 | 
            +
             | 
| 526 | 
            +
                    child[:signaled].add("KILL")
         | 
| 527 | 
            +
                    child[:last_signaled_at] = Time.now
         | 
| 528 | 
            +
                  end
         | 
| 466 529 | 
             
                end
         | 
| 467 530 |  | 
| 468 531 | 
             
                def self.stop_respawning
         | 
| @@ -487,7 +550,7 @@ module Einhorn | |
| 487 550 | 
             
                    return
         | 
| 488 551 | 
             
                  end
         | 
| 489 552 | 
             
                  Einhorn.log_info("Launching #{missing} new workers")
         | 
| 490 | 
            -
                  missing.times {spinup}
         | 
| 553 | 
            +
                  missing.times { spinup }
         | 
| 491 554 | 
             
                end
         | 
| 492 555 |  | 
| 493 556 | 
             
                # Unbounded exponential backoff is not a thing: we run into problems if
         | 
| @@ -496,10 +559,12 @@ module Einhorn | |
| 496 559 | 
             
                # don't wait until the heat death of the universe to spin up new capacity.
         | 
| 497 560 | 
             
                MAX_SPINUP_INTERVAL = 30.0
         | 
| 498 561 |  | 
| 499 | 
            -
                def self.replenish_gradually(max_unacked=nil)
         | 
| 562 | 
            +
                def self.replenish_gradually(max_unacked = nil)
         | 
| 500 563 | 
             
                  return if Einhorn::TransientState.has_outstanding_spinup_timer
         | 
| 501 564 | 
             
                  return unless Einhorn::WorkerPool.missing_worker_count > 0
         | 
| 502 565 |  | 
| 566 | 
            +
                  max_unacked ||= Einhorn::State.config[:max_unacked]
         | 
| 567 | 
            +
             | 
| 503 568 | 
             
                  # default to spinning up at most NCPU workers at once
         | 
| 504 569 | 
             
                  unless max_unacked
         | 
| 505 570 | 
             
                    begin
         | 
| @@ -517,16 +582,13 @@ module Einhorn | |
| 517 582 |  | 
| 518 583 | 
             
                  # Exponentially backoff automated spinup if we're just having
         | 
| 519 584 | 
             
                  # things die before ACKing
         | 
| 520 | 
            -
                  spinup_interval = Einhorn::State.config[:seconds] * (1.5 | 
| 585 | 
            +
                  spinup_interval = Einhorn::State.config[:seconds] * (1.5**Einhorn::State.consecutive_deaths_before_ack)
         | 
| 521 586 | 
             
                  spinup_interval = [spinup_interval, MAX_SPINUP_INTERVAL].min
         | 
| 522 587 | 
             
                  seconds_ago = (Time.now - Einhorn::State.last_spinup).to_f
         | 
| 523 588 |  | 
| 524 589 | 
             
                  if seconds_ago > spinup_interval
         | 
| 525 | 
            -
                     | 
| 526 | 
            -
             | 
| 527 | 
            -
                      Einhorn.log_debug("There are #{unacked} unacked new workers, and max_unacked is #{max_unacked}, so not spinning up a new process")
         | 
| 528 | 
            -
                    else
         | 
| 529 | 
            -
                      msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process"
         | 
| 590 | 
            +
                    if trigger_spinup?(max_unacked)
         | 
| 591 | 
            +
                      msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process."
         | 
| 530 592 |  | 
| 531 593 | 
             
                      if Einhorn::State.consecutive_deaths_before_ack > 0
         | 
| 532 594 | 
             
                        Einhorn.log_info("#{msg} (there have been #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked worker deaths)", :upgrade)
         | 
| @@ -537,7 +599,7 @@ module Einhorn | |
| 537 599 | 
             
                      spinup
         | 
| 538 600 | 
             
                    end
         | 
| 539 601 | 
             
                  else
         | 
| 540 | 
            -
                    Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process")
         | 
| 602 | 
            +
                    Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process.")
         | 
| 541 603 | 
             
                  end
         | 
| 542 604 |  | 
| 543 605 | 
             
                  Einhorn::TransientState.has_outstanding_spinup_timer = true
         | 
| @@ -547,18 +609,35 @@ module Einhorn | |
| 547 609 | 
             
                  end
         | 
| 548 610 | 
             
                end
         | 
| 549 611 |  | 
| 550 | 
            -
                def self.quieter(log=true)
         | 
| 612 | 
            +
                def self.quieter(log = true)
         | 
| 551 613 | 
             
                  Einhorn::State.verbosity += 1 if Einhorn::State.verbosity < 2
         | 
| 552 614 | 
             
                  output = "Verbosity set to #{Einhorn::State.verbosity}"
         | 
| 553 615 | 
             
                  Einhorn.log_info(output) if log
         | 
| 554 616 | 
             
                  output
         | 
| 555 617 | 
             
                end
         | 
| 556 618 |  | 
| 557 | 
            -
                def self.louder(log=true)
         | 
| 619 | 
            +
                def self.louder(log = true)
         | 
| 558 620 | 
             
                  Einhorn::State.verbosity -= 1 if Einhorn::State.verbosity > 0
         | 
| 559 621 | 
             
                  output = "Verbosity set to #{Einhorn::State.verbosity}"
         | 
| 560 622 | 
             
                  Einhorn.log_info(output) if log
         | 
| 561 623 | 
             
                  output
         | 
| 562 624 | 
             
                end
         | 
| 625 | 
            +
             | 
| 626 | 
            +
                def self.trigger_spinup?(max_unacked)
         | 
| 627 | 
            +
                  unacked = Einhorn::WorkerPool.unacked_unsignaled_modern_workers.length
         | 
| 628 | 
            +
                  if unacked >= max_unacked
         | 
| 629 | 
            +
                    Einhorn.log_info("There are #{unacked} unacked new workers, and max_unacked is #{max_unacked}, so not spinning up a new process.")
         | 
| 630 | 
            +
                    return false
         | 
| 631 | 
            +
                  elsif Einhorn::State.config[:max_upgrade_additional]
         | 
| 632 | 
            +
                    capacity_exceeded = (Einhorn::State.config[:number] + Einhorn::State.config[:max_upgrade_additional]) - Einhorn::WorkerPool.workers_with_state.length
         | 
| 633 | 
            +
                    if capacity_exceeded < 0
         | 
| 634 | 
            +
                      Einhorn.log_info("Over worker capacity by #{capacity_exceeded.abs} during upgrade, #{Einhorn::WorkerPool.modern_workers.length} new workers of #{Einhorn::WorkerPool.workers_with_state.length} total. Waiting for old workers to exit before spinning up a process.")
         | 
| 635 | 
            +
             | 
| 636 | 
            +
                      return false
         | 
| 637 | 
            +
                    end
         | 
| 638 | 
            +
                  end
         | 
| 639 | 
            +
             | 
| 640 | 
            +
                  true
         | 
| 641 | 
            +
                end
         | 
| 563 642 | 
             
              end
         | 
| 564 643 | 
             
            end
         | 
    
        data/lib/einhorn/compat.rb
    CHANGED
    
    | @@ -11,10 +11,10 @@ module Einhorn | |
| 11 11 |  | 
| 12 12 | 
             
                def self.cloexec!(fd, enable)
         | 
| 13 13 | 
             
                  original = fd.fcntl(Fcntl::F_GETFD)
         | 
| 14 | 
            -
                  if enable
         | 
| 15 | 
            -
                     | 
| 14 | 
            +
                  new = if enable
         | 
| 15 | 
            +
                    original | Fcntl::FD_CLOEXEC
         | 
| 16 16 | 
             
                  else
         | 
| 17 | 
            -
                     | 
| 17 | 
            +
                    original & (-Fcntl::FD_CLOEXEC - 1)
         | 
| 18 18 | 
             
                  end
         | 
| 19 19 | 
             
                  fd.fcntl(Fcntl::F_SETFD, new)
         | 
| 20 20 | 
             
                end
         | 
| @@ -24,7 +24,7 @@ module Einhorn | |
| 24 24 | 
             
                end
         | 
| 25 25 |  | 
| 26 26 | 
             
                # Opts are ignored in Ruby 1.8
         | 
| 27 | 
            -
                def self.exec(script, args, opts={})
         | 
| 27 | 
            +
                def self.exec(script, args, opts = {})
         | 
| 28 28 | 
             
                  cmd = [script, script]
         | 
| 29 29 | 
             
                  begin
         | 
| 30 30 | 
             
                    Kernel.exec(cmd, *(args + [opts]))
         | 
| @@ -53,18 +53,18 @@ module Einhorn | |
| 53 53 |  | 
| 54 54 | 
             
                  # linux / friends
         | 
| 55 55 | 
             
                  begin
         | 
| 56 | 
            -
                    return File.read( | 
| 56 | 
            +
                    return File.read("/proc/cpuinfo").scan(/^processor\s*:/).count
         | 
| 57 57 | 
             
                  rescue Errno::ENOENT
         | 
| 58 58 | 
             
                  end
         | 
| 59 59 |  | 
| 60 60 | 
             
                  # OS X
         | 
| 61 | 
            -
                  if RUBY_PLATFORM | 
| 61 | 
            +
                  if RUBY_PLATFORM.match?(/darwin/)
         | 
| 62 62 | 
             
                    return Integer(`sysctl -n hw.logicalcpu`)
         | 
| 63 63 | 
             
                  end
         | 
| 64 64 |  | 
| 65 65 | 
             
                  # windows / friends
         | 
| 66 66 | 
             
                  begin
         | 
| 67 | 
            -
                    require  | 
| 67 | 
            +
                    require "win32ole"
         | 
| 68 68 | 
             
                  rescue LoadError
         | 
| 69 69 | 
             
                  else
         | 
| 70 70 | 
             
                    wmi = WIN32OLE.connect("winmgmts://")
         | 
| @@ -6,7 +6,7 @@ module Einhorn::Event | |
| 6 6 | 
             
                @@instance_counter = 0
         | 
| 7 7 |  | 
| 8 8 | 
             
                def self.open(sock)
         | 
| 9 | 
            -
                   | 
| 9 | 
            +
                  new(sock)
         | 
| 10 10 | 
             
                end
         | 
| 11 11 |  | 
| 12 12 | 
             
                def initialize(sock)
         | 
| @@ -40,24 +40,22 @@ module Einhorn::Event | |
| 40 40 | 
             
                end
         | 
| 41 41 |  | 
| 42 42 | 
             
                def notify_readable
         | 
| 43 | 
            -
                   | 
| 44 | 
            -
                     | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
                     | 
| 48 | 
            -
             | 
| 49 | 
            -
                     | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
                     | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
                     | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
                      process_read_buffer
         | 
| 60 | 
            -
                    end
         | 
| 43 | 
            +
                  loop do
         | 
| 44 | 
            +
                    return if @closed
         | 
| 45 | 
            +
                    chunk = @socket.read_nonblock(1024)
         | 
| 46 | 
            +
                  rescue Errno::EAGAIN
         | 
| 47 | 
            +
                    break
         | 
| 48 | 
            +
                  rescue EOFError, Errno::EPIPE, Errno::ECONNRESET
         | 
| 49 | 
            +
                    close
         | 
| 50 | 
            +
                    break
         | 
| 51 | 
            +
                  rescue => e
         | 
| 52 | 
            +
                    log_error("Caught unrecognized error while reading from socket: #{e} (#{e.class})")
         | 
| 53 | 
            +
                    close
         | 
| 54 | 
            +
                    break
         | 
| 55 | 
            +
                  else
         | 
| 56 | 
            +
                    log_debug("read #{chunk.length} bytes (#{chunk.inspect[0..20]})")
         | 
| 57 | 
            +
                    @read_buffer << chunk
         | 
| 58 | 
            +
                    process_read_buffer
         | 
| 61 59 | 
             
                  end
         | 
| 62 60 | 
             
                end
         | 
| 63 61 |  | 
| @@ -72,19 +70,17 @@ module Einhorn::Event | |
| 72 70 | 
             
                end
         | 
| 73 71 |  | 
| 74 72 | 
             
                def notify_writeable
         | 
| 75 | 
            -
                   | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
                   | 
| 80 | 
            -
             | 
| 81 | 
            -
                   | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
                   | 
| 85 | 
            -
             | 
| 86 | 
            -
                    @write_buffer = @write_buffer[written..-1]
         | 
| 87 | 
            -
                  end
         | 
| 73 | 
            +
                  return if @closed
         | 
| 74 | 
            +
                  written = @socket.write_nonblock(@write_buffer)
         | 
| 75 | 
            +
                rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR
         | 
| 76 | 
            +
                rescue Errno::EPIPE, Errno::ECONNRESET
         | 
| 77 | 
            +
                  close
         | 
| 78 | 
            +
                rescue => e
         | 
| 79 | 
            +
                  log_error("Caught unrecognized error while writing to socket: #{e} (#{e.class})")
         | 
| 80 | 
            +
                  close
         | 
| 81 | 
            +
                else
         | 
| 82 | 
            +
                  log_debug("wrote #{written} bytes")
         | 
| 83 | 
            +
                  @write_buffer = @write_buffer[written..-1]
         | 
| 88 84 | 
             
                end
         | 
| 89 85 |  | 
| 90 86 | 
             
                def to_io
         | 
| @@ -102,9 +98,9 @@ module Einhorn::Event | |
| 102 98 | 
             
                end
         | 
| 103 99 |  | 
| 104 100 | 
             
                def process_read_buffer
         | 
| 105 | 
            -
                   | 
| 101 | 
            +
                  loop do
         | 
| 106 102 | 
             
                    if @read_buffer.length > 0
         | 
| 107 | 
            -
                      break unless split = parse_record
         | 
| 103 | 
            +
                      break unless (split = parse_record)
         | 
| 108 104 | 
             
                      record, remainder = split
         | 
| 109 105 | 
             
                      log_debug("Read a record of #{record.length} bytes.")
         | 
| 110 106 | 
             
                      @read_buffer = remainder
         | 
| @@ -117,7 +113,7 @@ module Einhorn::Event | |
| 117 113 |  | 
| 118 114 | 
             
                # Override in subclass. This lets you do streaming reads.
         | 
| 119 115 | 
             
                def parse_record
         | 
| 120 | 
            -
                  [@read_buffer,  | 
| 116 | 
            +
                  [@read_buffer, ""]
         | 
| 121 117 | 
             
                end
         | 
| 122 118 |  | 
| 123 119 | 
             
                def consume_record(record)
         |