einhorn 0.7.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Changes.md +10 -0
- data/README.md +36 -30
- data/bin/einhorn +17 -2
- data/einhorn.gemspec +23 -21
- data/example/pool_worker.rb +1 -1
- data/example/thin_example +8 -8
- data/example/time_server +5 -5
- data/lib/einhorn/client.rb +8 -9
- data/lib/einhorn/command/interface.rb +100 -95
- data/lib/einhorn/command.rb +167 -88
- data/lib/einhorn/compat.rb +7 -7
- data/lib/einhorn/event/abstract_text_descriptor.rb +31 -35
- data/lib/einhorn/event/ack_timer.rb +2 -2
- data/lib/einhorn/event/command_server.rb +7 -9
- data/lib/einhorn/event/connection.rb +1 -3
- data/lib/einhorn/event/loop_breaker.rb +2 -1
- data/lib/einhorn/event/persistent.rb +2 -2
- data/lib/einhorn/event/timer.rb +4 -4
- data/lib/einhorn/event.rb +29 -20
- data/lib/einhorn/prctl.rb +26 -0
- data/lib/einhorn/prctl_linux.rb +48 -0
- data/lib/einhorn/safe_yaml.rb +17 -0
- data/lib/einhorn/version.rb +1 -1
- data/lib/einhorn/worker.rb +67 -49
- data/lib/einhorn/worker_pool.rb +9 -9
- data/lib/einhorn.rb +155 -126
- metadata +42 -137
- data/.gitignore +0 -17
- data/.travis.yml +0 -10
- data/CONTRIBUTORS +0 -6
- data/Gemfile +0 -11
- data/History.txt +0 -4
- data/README.md.in +0 -76
- data/Rakefile +0 -27
- data/test/_lib.rb +0 -12
- data/test/integration/_lib/fixtures/env_printer/env_printer.rb +0 -26
- data/test/integration/_lib/fixtures/exit_during_upgrade/exiting_server.rb +0 -22
- data/test/integration/_lib/fixtures/exit_during_upgrade/upgrade_reexec.rb +0 -6
- data/test/integration/_lib/fixtures/upgrade_project/upgrading_server.rb +0 -22
- data/test/integration/_lib/helpers/einhorn_helpers.rb +0 -143
- data/test/integration/_lib/helpers.rb +0 -4
- data/test/integration/_lib.rb +0 -6
- data/test/integration/startup.rb +0 -31
- data/test/integration/upgrading.rb +0 -157
- data/test/unit/einhorn/client.rb +0 -88
- data/test/unit/einhorn/command/interface.rb +0 -49
- data/test/unit/einhorn/command.rb +0 -21
- data/test/unit/einhorn/event.rb +0 -89
- data/test/unit/einhorn/worker_pool.rb +0 -39
- data/test/unit/einhorn.rb +0 -58
- /data/{LICENSE → LICENSE.txt} +0 -0
data/lib/einhorn/command.rb
CHANGED
@@ -1,28 +1,25 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "pp"
|
2
|
+
require "set"
|
3
|
+
require "tmpdir"
|
4
4
|
|
5
|
-
require
|
5
|
+
require "einhorn/command/interface"
|
6
|
+
require "einhorn/prctl"
|
6
7
|
|
7
8
|
module Einhorn
|
8
9
|
module Command
|
9
10
|
def self.reap
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
19
|
-
rescue Errno::ECHILD
|
20
|
-
end
|
11
|
+
loop do
|
12
|
+
Einhorn.log_debug("Going to reap a child process")
|
13
|
+
pid = Process.wait(-1, Process::WNOHANG)
|
14
|
+
return unless pid
|
15
|
+
cleanup(pid)
|
16
|
+
Einhorn::Event.break_loop
|
17
|
+
end
|
18
|
+
rescue Errno::ECHILD
|
21
19
|
end
|
22
20
|
|
23
|
-
|
24
|
-
|
25
|
-
unless spec = Einhorn::State.children[pid]
|
21
|
+
def self.cleanup(pid)
|
22
|
+
unless (spec = Einhorn::State.children[pid])
|
26
23
|
Einhorn.log_error("Could not find any config for exited child #{pid.inspect}! This probably indicates a bug in Einhorn.")
|
27
24
|
return
|
28
25
|
end
|
@@ -32,7 +29,7 @@ module Einhorn
|
|
32
29
|
# Unacked worker
|
33
30
|
if spec[:type] == :worker && !spec[:acked]
|
34
31
|
Einhorn::State.consecutive_deaths_before_ack += 1
|
35
|
-
extra =
|
32
|
+
extra = " before it was ACKed"
|
36
33
|
else
|
37
34
|
extra = nil
|
38
35
|
end
|
@@ -47,6 +44,16 @@ module Einhorn
|
|
47
44
|
end
|
48
45
|
end
|
49
46
|
|
47
|
+
def self.register_ping(pid, request_id)
|
48
|
+
unless (spec = Einhorn::State.children[pid])
|
49
|
+
Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
|
50
|
+
return
|
51
|
+
end
|
52
|
+
|
53
|
+
spec[:pinged_at] = Time.now
|
54
|
+
spec[:pinged_request_id] = request_id
|
55
|
+
end
|
56
|
+
|
50
57
|
def self.register_manual_ack(pid)
|
51
58
|
ack_mode = Einhorn::State.ack_mode
|
52
59
|
unless ack_mode[:type] == :manual
|
@@ -75,7 +82,7 @@ module Einhorn
|
|
75
82
|
end
|
76
83
|
|
77
84
|
def self.register_ack(pid)
|
78
|
-
unless spec = Einhorn::State.children[pid]
|
85
|
+
unless (spec = Einhorn::State.children[pid])
|
79
86
|
Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
|
80
87
|
return
|
81
88
|
end
|
@@ -85,10 +92,8 @@ module Einhorn
|
|
85
92
|
return
|
86
93
|
end
|
87
94
|
|
88
|
-
if Einhorn::State.consecutive_deaths_before_ack > 0
|
89
|
-
|
90
|
-
else
|
91
|
-
extra = nil
|
95
|
+
extra = if Einhorn::State.consecutive_deaths_before_ack > 0
|
96
|
+
", breaking the streak of #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked workers dying"
|
92
97
|
end
|
93
98
|
Einhorn::State.consecutive_deaths_before_ack = 0
|
94
99
|
|
@@ -98,14 +103,14 @@ module Einhorn
|
|
98
103
|
Einhorn::Event.break_loop
|
99
104
|
end
|
100
105
|
|
101
|
-
def self.signal_all(signal, children=nil, record=true)
|
106
|
+
def self.signal_all(signal, children = nil, record = true)
|
102
107
|
children ||= Einhorn::WorkerPool.workers
|
103
|
-
|
104
108
|
signaled = {}
|
109
|
+
|
105
110
|
Einhorn.log_info("Sending #{signal} to #{children.inspect}", :upgrade)
|
106
111
|
|
107
112
|
children.each do |child|
|
108
|
-
unless spec = Einhorn::State.children[child]
|
113
|
+
unless (spec = Einhorn::State.children[child])
|
109
114
|
Einhorn.log_error("Trying to send #{signal} to dead child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.", :upgrade)
|
110
115
|
next
|
111
116
|
end
|
@@ -115,11 +120,13 @@ module Einhorn
|
|
115
120
|
Einhorn.log_error("Re-sending #{signal} to already-signaled child #{child.inspect}. It may be slow to spin down, or it may be swallowing #{signal}s.", :upgrade)
|
116
121
|
end
|
117
122
|
spec[:signaled].add(signal)
|
123
|
+
spec[:last_signaled_at] = Time.now
|
118
124
|
end
|
119
125
|
|
120
126
|
begin
|
121
127
|
Process.kill(signal, child)
|
122
128
|
rescue Errno::ESRCH
|
129
|
+
Einhorn.log_debug("Attempted to #{signal} child #{child.inspect} but the process does not exist", :upgrade)
|
123
130
|
else
|
124
131
|
signaled[child] = spec
|
125
132
|
end
|
@@ -129,7 +136,7 @@ module Einhorn
|
|
129
136
|
Einhorn::Event::Timer.open(Einhorn::State.signal_timeout) do
|
130
137
|
children.each do |child|
|
131
138
|
spec = Einhorn::State.children[child]
|
132
|
-
next unless spec # Process is already dead and removed by
|
139
|
+
next unless spec # Process is already dead and removed by cleanup
|
133
140
|
signaled_spec = signaled[child]
|
134
141
|
next unless signaled_spec # We got ESRCH when trying to signal
|
135
142
|
if spec[:spinup_time] != signaled_spec[:spinup_time]
|
@@ -139,15 +146,15 @@ module Einhorn
|
|
139
146
|
|
140
147
|
Einhorn.log_info("Child #{child.inspect} is still active after #{Einhorn::State.signal_timeout}s. Sending SIGKILL.")
|
141
148
|
begin
|
142
|
-
Process.kill(
|
149
|
+
Process.kill("KILL", child)
|
143
150
|
rescue Errno::ESRCH
|
144
151
|
end
|
145
|
-
spec[:signaled].add(
|
152
|
+
spec[:signaled].add("KILL")
|
146
153
|
end
|
147
154
|
end
|
148
|
-
end
|
149
155
|
|
150
|
-
|
156
|
+
Einhorn.log_info("Successfully sent #{signal}s to #{signaled.length} processes: #{signaled.keys}")
|
157
|
+
end
|
151
158
|
end
|
152
159
|
|
153
160
|
def self.increment
|
@@ -155,14 +162,14 @@ module Einhorn
|
|
155
162
|
old = Einhorn::State.config[:number]
|
156
163
|
new = (Einhorn::State.config[:number] += 1)
|
157
164
|
output = "Incrementing number of workers from #{old} -> #{new}"
|
158
|
-
|
165
|
+
warn(output)
|
159
166
|
output
|
160
167
|
end
|
161
168
|
|
162
169
|
def self.decrement
|
163
170
|
if Einhorn::State.config[:number] <= 1
|
164
171
|
output = "Can't decrease number of workers (already at #{Einhorn::State.config[:number]}). Run kill #{$$} if you really want to kill einhorn."
|
165
|
-
|
172
|
+
warn(output)
|
166
173
|
return output
|
167
174
|
end
|
168
175
|
|
@@ -170,7 +177,7 @@ module Einhorn
|
|
170
177
|
old = Einhorn::State.config[:number]
|
171
178
|
new = (Einhorn::State.config[:number] -= 1)
|
172
179
|
output = "Decrementing number of workers from #{old} -> #{new}"
|
173
|
-
|
180
|
+
warn(output)
|
174
181
|
output
|
175
182
|
end
|
176
183
|
|
@@ -183,12 +190,12 @@ module Einhorn
|
|
183
190
|
old = Einhorn::State.config[:number]
|
184
191
|
Einhorn::State.config[:number] = new
|
185
192
|
output = "Altering worker count, #{old} -> #{new}. Will "
|
186
|
-
if old < new
|
187
|
-
|
193
|
+
output << if old < new
|
194
|
+
"spin up additional workers."
|
188
195
|
else
|
189
|
-
|
196
|
+
"gracefully terminate workers."
|
190
197
|
end
|
191
|
-
|
198
|
+
warn(output)
|
192
199
|
output
|
193
200
|
end
|
194
201
|
|
@@ -199,8 +206,8 @@ module Einhorn
|
|
199
206
|
end
|
200
207
|
|
201
208
|
{
|
202
|
-
:
|
203
|
-
:
|
209
|
+
state: global_state,
|
210
|
+
persistent_descriptors: descriptor_state
|
204
211
|
}
|
205
212
|
end
|
206
213
|
|
@@ -245,8 +252,8 @@ module Einhorn
|
|
245
252
|
|
246
253
|
begin
|
247
254
|
Einhorn.initialize_reload_environment
|
248
|
-
respawn_commandline = Einhorn.upgrade_commandline([
|
249
|
-
respawn_commandline << {
|
255
|
+
respawn_commandline = Einhorn.upgrade_commandline(["--with-state-fd", read.fileno.to_s])
|
256
|
+
respawn_commandline << {close_others: false}
|
250
257
|
Einhorn.log_info("About to re-exec einhorn master as #{respawn_commandline.inspect}", :reload)
|
251
258
|
Einhorn::Compat.exec(*respawn_commandline)
|
252
259
|
rescue SystemCallError => e
|
@@ -263,30 +270,34 @@ module Einhorn
|
|
263
270
|
end
|
264
271
|
end
|
265
272
|
|
266
|
-
def self.spinup(cmd=nil)
|
273
|
+
def self.spinup(cmd = nil)
|
267
274
|
cmd ||= Einhorn::State.cmd
|
268
275
|
index = next_index
|
269
|
-
|
270
|
-
|
276
|
+
expected_ppid = Process.pid
|
277
|
+
pid = if Einhorn::State.preloaded
|
278
|
+
fork do
|
271
279
|
Einhorn::TransientState.whatami = :worker
|
272
280
|
prepare_child_process
|
273
281
|
|
274
|
-
Einhorn.log_info(
|
282
|
+
Einhorn.log_info("About to tear down Einhorn state and run einhorn_main")
|
275
283
|
Einhorn::Command::Interface.uninit
|
276
284
|
Einhorn::Event.close_all_for_worker
|
277
285
|
Einhorn.set_argv(cmd, true)
|
278
286
|
|
279
287
|
reseed_random
|
280
288
|
|
289
|
+
setup_parent_watch(expected_ppid)
|
290
|
+
|
281
291
|
prepare_child_environment(index)
|
282
292
|
einhorn_main
|
283
293
|
end
|
284
294
|
else
|
285
|
-
|
295
|
+
fork do
|
286
296
|
Einhorn::TransientState.whatami = :worker
|
287
297
|
prepare_child_process
|
288
298
|
|
289
299
|
Einhorn.log_info("About to exec #{cmd.inspect}")
|
300
|
+
Einhorn::Command::Interface.uninit
|
290
301
|
# Here's the only case where cloexec would help. Since we
|
291
302
|
# have to track and manually close FDs for other cases, we
|
292
303
|
# may as well just reuse close_all rather than also set
|
@@ -295,20 +306,23 @@ module Einhorn
|
|
295
306
|
# Note that Ruby 1.9's close_others option is useful here.
|
296
307
|
Einhorn::Event.close_all_for_worker
|
297
308
|
|
309
|
+
setup_parent_watch(expected_ppid)
|
310
|
+
|
298
311
|
prepare_child_environment(index)
|
299
|
-
Einhorn::Compat.exec(cmd[0], cmd[1..-1], :
|
312
|
+
Einhorn::Compat.exec(cmd[0], cmd[1..-1], close_others: false)
|
300
313
|
end
|
301
314
|
end
|
302
315
|
|
303
316
|
Einhorn.log_info("===> Launched #{pid} (index: #{index})", :upgrade)
|
304
317
|
Einhorn::State.last_spinup = Time.now
|
305
318
|
Einhorn::State.children[pid] = {
|
306
|
-
:
|
307
|
-
:
|
308
|
-
:
|
309
|
-
:
|
310
|
-
:
|
311
|
-
:
|
319
|
+
type: :worker,
|
320
|
+
version: Einhorn::State.version,
|
321
|
+
acked: false,
|
322
|
+
signaled: Set.new,
|
323
|
+
last_signaled_at: nil,
|
324
|
+
index: index,
|
325
|
+
spinup_time: Einhorn::State.last_spinup
|
312
326
|
}
|
313
327
|
|
314
328
|
# Set up whatever's needed for ACKing
|
@@ -317,6 +331,7 @@ module Einhorn
|
|
317
331
|
when :timer
|
318
332
|
Einhorn::Event::ACKTimer.open(ack_mode[:timeout], pid)
|
319
333
|
when :manual
|
334
|
+
# nothing to do
|
320
335
|
else
|
321
336
|
Einhorn.log_error("Unrecognized ACK mode #{type.inspect}")
|
322
337
|
end
|
@@ -324,24 +339,18 @@ module Einhorn
|
|
324
339
|
|
325
340
|
def self.prepare_child_environment(index)
|
326
341
|
# This is run from the child
|
327
|
-
ENV[
|
328
|
-
ENV[
|
342
|
+
ENV["EINHORN_MASTER_PID"] = Process.ppid.to_s
|
343
|
+
ENV["EINHORN_SOCK_PATH"] = Einhorn::Command::Interface.socket_path
|
329
344
|
if Einhorn::State.command_socket_as_fd
|
330
345
|
socket = UNIXSocket.open(Einhorn::Command::Interface.socket_path)
|
331
346
|
Einhorn::TransientState.socket_handles << socket
|
332
|
-
ENV[
|
347
|
+
ENV["EINHORN_SOCK_FD"] = socket.fileno.to_s
|
333
348
|
end
|
334
349
|
|
335
|
-
ENV[
|
336
|
-
Einhorn::State.bind_fds.each_with_index {|fd, i| ENV["EINHORN_FD_#{i}"] = fd.to_s}
|
337
|
-
|
338
|
-
ENV['EINHORN_CHILD_INDEX'] = index.to_s
|
350
|
+
ENV["EINHORN_FD_COUNT"] = Einhorn::State.bind_fds.length.to_s
|
351
|
+
Einhorn::State.bind_fds.each_with_index { |fd, i| ENV["EINHORN_FD_#{i}"] = fd.to_s }
|
339
352
|
|
340
|
-
|
341
|
-
# match Upstart's nominal internal support for space-separated
|
342
|
-
# FD lists, but nobody uses that in practice, and it makes
|
343
|
-
# finding individual FDs more difficult
|
344
|
-
ENV['EINHORN_FDS'] = Einhorn::State.bind_fds.map(&:to_s).join(' ')
|
353
|
+
ENV["EINHORN_CHILD_INDEX"] = index.to_s
|
345
354
|
end
|
346
355
|
|
347
356
|
# Reseed common ruby random number generators.
|
@@ -364,11 +373,11 @@ module Einhorn
|
|
364
373
|
|
365
374
|
# reseed OpenSSL::Random if it's loaded
|
366
375
|
if defined?(OpenSSL::Random)
|
367
|
-
if defined?(Random)
|
368
|
-
|
376
|
+
seed = if defined?(Random)
|
377
|
+
Random.new_seed
|
369
378
|
else
|
370
379
|
# Ruby 1.8
|
371
|
-
|
380
|
+
rand
|
372
381
|
end
|
373
382
|
OpenSSL::Random.seed(seed.to_s)
|
374
383
|
end
|
@@ -379,6 +388,24 @@ module Einhorn
|
|
379
388
|
Einhorn.renice_self
|
380
389
|
end
|
381
390
|
|
391
|
+
def self.setup_parent_watch(expected_ppid)
|
392
|
+
if Einhorn::State.kill_children_on_exit
|
393
|
+
begin
|
394
|
+
# NB: Having the USR2 signal handler set to terminate (the default) at
|
395
|
+
# this point is required. If it's set to a ruby handler, there are
|
396
|
+
# race conditions that could cause the worker to leak.
|
397
|
+
|
398
|
+
Einhorn::Prctl.set_pdeathsig("USR2")
|
399
|
+
if Process.ppid != expected_ppid
|
400
|
+
Einhorn.log_error("Parent process died before we set pdeathsig; cowardly refusing to exec child process.")
|
401
|
+
exit(1)
|
402
|
+
end
|
403
|
+
rescue NotImplementedError
|
404
|
+
# Unsupported OS; silently continue.
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
382
409
|
# @param options [Hash]
|
383
410
|
#
|
384
411
|
# @option options [Boolean] :smooth (false) Whether to perform a smooth or
|
@@ -387,18 +414,19 @@ module Einhorn
|
|
387
414
|
# upgrade, bring up all the new workers and don't cull any old workers
|
388
415
|
# until they're all up.
|
389
416
|
#
|
390
|
-
def self.full_upgrade(options={})
|
391
|
-
options = {:
|
417
|
+
def self.full_upgrade(options = {})
|
418
|
+
options = {smooth: false}.merge(options)
|
392
419
|
|
393
420
|
Einhorn::State.smooth_upgrade = options.fetch(:smooth)
|
394
421
|
reload_for_upgrade
|
395
422
|
end
|
396
423
|
|
397
424
|
def self.full_upgrade_smooth
|
398
|
-
full_upgrade(:
|
425
|
+
full_upgrade(smooth: true)
|
399
426
|
end
|
427
|
+
|
400
428
|
def self.full_upgrade_fleet
|
401
|
-
full_upgrade(:
|
429
|
+
full_upgrade(smooth: false)
|
402
430
|
end
|
403
431
|
|
404
432
|
def self.reload_for_upgrade
|
@@ -411,8 +439,8 @@ module Einhorn
|
|
411
439
|
Einhorn.log_info("Currently upgrading (#{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} ACKs; bumping version and starting over)...", :upgrade)
|
412
440
|
else
|
413
441
|
Einhorn::State.upgrading = true
|
414
|
-
u_type = Einhorn::State.smooth_upgrade ?
|
415
|
-
Einhorn.log_info("Starting #{u_type} upgrade from version"
|
442
|
+
u_type = Einhorn::State.smooth_upgrade ? "smooth" : "fleet"
|
443
|
+
Einhorn.log_info("Starting #{u_type} upgrade from version" \
|
416
444
|
" #{Einhorn::State.version}...", :upgrade)
|
417
445
|
end
|
418
446
|
|
@@ -459,10 +487,45 @@ module Einhorn
|
|
459
487
|
end
|
460
488
|
|
461
489
|
if unsignaled > target
|
462
|
-
excess = Einhorn::WorkerPool.unsignaled_modern_workers_with_priority[0...(unsignaled-target)]
|
490
|
+
excess = Einhorn::WorkerPool.unsignaled_modern_workers_with_priority[0...(unsignaled - target)]
|
463
491
|
Einhorn.log_info("Have too many workers at the current version, so killing off #{excess.length} of them.")
|
464
492
|
signal_all("USR2", excess)
|
465
493
|
end
|
494
|
+
|
495
|
+
# Ensure all signaled workers that have outlived signal_timeout get killed.
|
496
|
+
kill_expired_signaled_workers if Einhorn::State.signal_timeout
|
497
|
+
end
|
498
|
+
|
499
|
+
def self.kill_expired_signaled_workers
|
500
|
+
now = Time.now
|
501
|
+
children = Einhorn::State.children.select do |_, c|
|
502
|
+
# Only interested in USR2 signaled workers
|
503
|
+
next unless c[:signaled] && c[:signaled].length > 0
|
504
|
+
next unless c[:signaled].include?("USR2")
|
505
|
+
|
506
|
+
# Ignore processes that have received KILL since it can't be trapped.
|
507
|
+
next if c[:signaled].include?("KILL")
|
508
|
+
|
509
|
+
# Filter out those children that have not reached signal_timeout yet.
|
510
|
+
next unless c[:last_signaled_at]
|
511
|
+
expires_at = c[:last_signaled_at] + Einhorn::State.signal_timeout
|
512
|
+
next unless now >= expires_at
|
513
|
+
|
514
|
+
true
|
515
|
+
end
|
516
|
+
|
517
|
+
Einhorn.log_info("#{children.size} expired signaled workers found.") if children.size > 0
|
518
|
+
children.each do |pid, child|
|
519
|
+
Einhorn.log_info("Child #{pid.inspect} was signaled #{(child[:last_signaled_at] - now).abs.to_i}s ago. Sending SIGKILL as it is still active after #{Einhorn::State.signal_timeout}s timeout.", :upgrade)
|
520
|
+
begin
|
521
|
+
Process.kill("KILL", pid)
|
522
|
+
rescue Errno::ESRCH
|
523
|
+
Einhorn.log_debug("Attempted to SIGKILL child #{pid.inspect} but the process does not exist.")
|
524
|
+
end
|
525
|
+
|
526
|
+
child[:signaled].add("KILL")
|
527
|
+
child[:last_signaled_at] = Time.now
|
528
|
+
end
|
466
529
|
end
|
467
530
|
|
468
531
|
def self.stop_respawning
|
@@ -487,7 +550,7 @@ module Einhorn
|
|
487
550
|
return
|
488
551
|
end
|
489
552
|
Einhorn.log_info("Launching #{missing} new workers")
|
490
|
-
missing.times {spinup}
|
553
|
+
missing.times { spinup }
|
491
554
|
end
|
492
555
|
|
493
556
|
# Unbounded exponential backoff is not a thing: we run into problems if
|
@@ -496,10 +559,12 @@ module Einhorn
|
|
496
559
|
# don't wait until the heat death of the universe to spin up new capacity.
|
497
560
|
MAX_SPINUP_INTERVAL = 30.0
|
498
561
|
|
499
|
-
def self.replenish_gradually(max_unacked=nil)
|
562
|
+
def self.replenish_gradually(max_unacked = nil)
|
500
563
|
return if Einhorn::TransientState.has_outstanding_spinup_timer
|
501
564
|
return unless Einhorn::WorkerPool.missing_worker_count > 0
|
502
565
|
|
566
|
+
max_unacked ||= Einhorn::State.config[:max_unacked]
|
567
|
+
|
503
568
|
# default to spinning up at most NCPU workers at once
|
504
569
|
unless max_unacked
|
505
570
|
begin
|
@@ -517,16 +582,13 @@ module Einhorn
|
|
517
582
|
|
518
583
|
# Exponentially backoff automated spinup if we're just having
|
519
584
|
# things die before ACKing
|
520
|
-
spinup_interval = Einhorn::State.config[:seconds] * (1.5
|
585
|
+
spinup_interval = Einhorn::State.config[:seconds] * (1.5**Einhorn::State.consecutive_deaths_before_ack)
|
521
586
|
spinup_interval = [spinup_interval, MAX_SPINUP_INTERVAL].min
|
522
587
|
seconds_ago = (Time.now - Einhorn::State.last_spinup).to_f
|
523
588
|
|
524
589
|
if seconds_ago > spinup_interval
|
525
|
-
|
526
|
-
|
527
|
-
Einhorn.log_debug("There are #{unacked} unacked new workers, and max_unacked is #{max_unacked}, so not spinning up a new process")
|
528
|
-
else
|
529
|
-
msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process"
|
590
|
+
if trigger_spinup?(max_unacked)
|
591
|
+
msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process."
|
530
592
|
|
531
593
|
if Einhorn::State.consecutive_deaths_before_ack > 0
|
532
594
|
Einhorn.log_info("#{msg} (there have been #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked worker deaths)", :upgrade)
|
@@ -537,7 +599,7 @@ module Einhorn
|
|
537
599
|
spinup
|
538
600
|
end
|
539
601
|
else
|
540
|
-
Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process")
|
602
|
+
Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process.")
|
541
603
|
end
|
542
604
|
|
543
605
|
Einhorn::TransientState.has_outstanding_spinup_timer = true
|
@@ -547,18 +609,35 @@ module Einhorn
|
|
547
609
|
end
|
548
610
|
end
|
549
611
|
|
550
|
-
def self.quieter(log=true)
|
612
|
+
def self.quieter(log = true)
|
551
613
|
Einhorn::State.verbosity += 1 if Einhorn::State.verbosity < 2
|
552
614
|
output = "Verbosity set to #{Einhorn::State.verbosity}"
|
553
615
|
Einhorn.log_info(output) if log
|
554
616
|
output
|
555
617
|
end
|
556
618
|
|
557
|
-
def self.louder(log=true)
|
619
|
+
def self.louder(log = true)
|
558
620
|
Einhorn::State.verbosity -= 1 if Einhorn::State.verbosity > 0
|
559
621
|
output = "Verbosity set to #{Einhorn::State.verbosity}"
|
560
622
|
Einhorn.log_info(output) if log
|
561
623
|
output
|
562
624
|
end
|
625
|
+
|
626
|
+
def self.trigger_spinup?(max_unacked)
|
627
|
+
unacked = Einhorn::WorkerPool.unacked_unsignaled_modern_workers.length
|
628
|
+
if unacked >= max_unacked
|
629
|
+
Einhorn.log_info("There are #{unacked} unacked new workers, and max_unacked is #{max_unacked}, so not spinning up a new process.")
|
630
|
+
return false
|
631
|
+
elsif Einhorn::State.config[:max_upgrade_additional]
|
632
|
+
capacity_exceeded = (Einhorn::State.config[:number] + Einhorn::State.config[:max_upgrade_additional]) - Einhorn::WorkerPool.workers_with_state.length
|
633
|
+
if capacity_exceeded < 0
|
634
|
+
Einhorn.log_info("Over worker capacity by #{capacity_exceeded.abs} during upgrade, #{Einhorn::WorkerPool.modern_workers.length} new workers of #{Einhorn::WorkerPool.workers_with_state.length} total. Waiting for old workers to exit before spinning up a process.")
|
635
|
+
|
636
|
+
return false
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
true
|
641
|
+
end
|
563
642
|
end
|
564
643
|
end
|
data/lib/einhorn/compat.rb
CHANGED
@@ -11,10 +11,10 @@ module Einhorn
|
|
11
11
|
|
12
12
|
def self.cloexec!(fd, enable)
|
13
13
|
original = fd.fcntl(Fcntl::F_GETFD)
|
14
|
-
if enable
|
15
|
-
|
14
|
+
new = if enable
|
15
|
+
original | Fcntl::FD_CLOEXEC
|
16
16
|
else
|
17
|
-
|
17
|
+
original & (-Fcntl::FD_CLOEXEC - 1)
|
18
18
|
end
|
19
19
|
fd.fcntl(Fcntl::F_SETFD, new)
|
20
20
|
end
|
@@ -24,7 +24,7 @@ module Einhorn
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# Opts are ignored in Ruby 1.8
|
27
|
-
def self.exec(script, args, opts={})
|
27
|
+
def self.exec(script, args, opts = {})
|
28
28
|
cmd = [script, script]
|
29
29
|
begin
|
30
30
|
Kernel.exec(cmd, *(args + [opts]))
|
@@ -53,18 +53,18 @@ module Einhorn
|
|
53
53
|
|
54
54
|
# linux / friends
|
55
55
|
begin
|
56
|
-
return File.read(
|
56
|
+
return File.read("/proc/cpuinfo").scan(/^processor\s*:/).count
|
57
57
|
rescue Errno::ENOENT
|
58
58
|
end
|
59
59
|
|
60
60
|
# OS X
|
61
|
-
if RUBY_PLATFORM
|
61
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
62
62
|
return Integer(`sysctl -n hw.logicalcpu`)
|
63
63
|
end
|
64
64
|
|
65
65
|
# windows / friends
|
66
66
|
begin
|
67
|
-
require
|
67
|
+
require "win32ole"
|
68
68
|
rescue LoadError
|
69
69
|
else
|
70
70
|
wmi = WIN32OLE.connect("winmgmts://")
|
@@ -6,7 +6,7 @@ module Einhorn::Event
|
|
6
6
|
@@instance_counter = 0
|
7
7
|
|
8
8
|
def self.open(sock)
|
9
|
-
|
9
|
+
new(sock)
|
10
10
|
end
|
11
11
|
|
12
12
|
def initialize(sock)
|
@@ -40,24 +40,22 @@ module Einhorn::Event
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def notify_readable
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
process_read_buffer
|
60
|
-
end
|
43
|
+
loop do
|
44
|
+
return if @closed
|
45
|
+
chunk = @socket.read_nonblock(1024)
|
46
|
+
rescue Errno::EAGAIN
|
47
|
+
break
|
48
|
+
rescue EOFError, Errno::EPIPE, Errno::ECONNRESET
|
49
|
+
close
|
50
|
+
break
|
51
|
+
rescue => e
|
52
|
+
log_error("Caught unrecognized error while reading from socket: #{e} (#{e.class})")
|
53
|
+
close
|
54
|
+
break
|
55
|
+
else
|
56
|
+
log_debug("read #{chunk.length} bytes (#{chunk.inspect[0..20]})")
|
57
|
+
@read_buffer << chunk
|
58
|
+
process_read_buffer
|
61
59
|
end
|
62
60
|
end
|
63
61
|
|
@@ -72,19 +70,17 @@ module Einhorn::Event
|
|
72
70
|
end
|
73
71
|
|
74
72
|
def notify_writeable
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
@write_buffer = @write_buffer[written..-1]
|
87
|
-
end
|
73
|
+
return if @closed
|
74
|
+
written = @socket.write_nonblock(@write_buffer)
|
75
|
+
rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR
|
76
|
+
rescue Errno::EPIPE, Errno::ECONNRESET
|
77
|
+
close
|
78
|
+
rescue => e
|
79
|
+
log_error("Caught unrecognized error while writing to socket: #{e} (#{e.class})")
|
80
|
+
close
|
81
|
+
else
|
82
|
+
log_debug("wrote #{written} bytes")
|
83
|
+
@write_buffer = @write_buffer[written..-1]
|
88
84
|
end
|
89
85
|
|
90
86
|
def to_io
|
@@ -102,9 +98,9 @@ module Einhorn::Event
|
|
102
98
|
end
|
103
99
|
|
104
100
|
def process_read_buffer
|
105
|
-
|
101
|
+
loop do
|
106
102
|
if @read_buffer.length > 0
|
107
|
-
break unless split = parse_record
|
103
|
+
break unless (split = parse_record)
|
108
104
|
record, remainder = split
|
109
105
|
log_debug("Read a record of #{record.length} bytes.")
|
110
106
|
@read_buffer = remainder
|
@@ -117,7 +113,7 @@ module Einhorn::Event
|
|
117
113
|
|
118
114
|
# Override in subclass. This lets you do streaming reads.
|
119
115
|
def parse_record
|
120
|
-
[@read_buffer,
|
116
|
+
[@read_buffer, ""]
|
121
117
|
end
|
122
118
|
|
123
119
|
def consume_record(record)
|