einhorn 0.7.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Changes.md +10 -0
- data/README.md +36 -30
- data/bin/einhorn +17 -2
- data/einhorn.gemspec +23 -21
- data/example/pool_worker.rb +1 -1
- data/example/thin_example +8 -8
- data/example/time_server +5 -5
- data/lib/einhorn/client.rb +8 -9
- data/lib/einhorn/command/interface.rb +100 -95
- data/lib/einhorn/command.rb +167 -88
- data/lib/einhorn/compat.rb +7 -7
- data/lib/einhorn/event/abstract_text_descriptor.rb +31 -35
- data/lib/einhorn/event/ack_timer.rb +2 -2
- data/lib/einhorn/event/command_server.rb +7 -9
- data/lib/einhorn/event/connection.rb +1 -3
- data/lib/einhorn/event/loop_breaker.rb +2 -1
- data/lib/einhorn/event/persistent.rb +2 -2
- data/lib/einhorn/event/timer.rb +4 -4
- data/lib/einhorn/event.rb +29 -20
- data/lib/einhorn/prctl.rb +26 -0
- data/lib/einhorn/prctl_linux.rb +48 -0
- data/lib/einhorn/safe_yaml.rb +17 -0
- data/lib/einhorn/version.rb +1 -1
- data/lib/einhorn/worker.rb +67 -49
- data/lib/einhorn/worker_pool.rb +9 -9
- data/lib/einhorn.rb +155 -126
- metadata +42 -137
- data/.gitignore +0 -17
- data/.travis.yml +0 -10
- data/CONTRIBUTORS +0 -6
- data/Gemfile +0 -11
- data/History.txt +0 -4
- data/README.md.in +0 -76
- data/Rakefile +0 -27
- data/test/_lib.rb +0 -12
- data/test/integration/_lib/fixtures/env_printer/env_printer.rb +0 -26
- data/test/integration/_lib/fixtures/exit_during_upgrade/exiting_server.rb +0 -22
- data/test/integration/_lib/fixtures/exit_during_upgrade/upgrade_reexec.rb +0 -6
- data/test/integration/_lib/fixtures/upgrade_project/upgrading_server.rb +0 -22
- data/test/integration/_lib/helpers/einhorn_helpers.rb +0 -143
- data/test/integration/_lib/helpers.rb +0 -4
- data/test/integration/_lib.rb +0 -6
- data/test/integration/startup.rb +0 -31
- data/test/integration/upgrading.rb +0 -157
- data/test/unit/einhorn/client.rb +0 -88
- data/test/unit/einhorn/command/interface.rb +0 -49
- data/test/unit/einhorn/command.rb +0 -21
- data/test/unit/einhorn/event.rb +0 -89
- data/test/unit/einhorn/worker_pool.rb +0 -39
- data/test/unit/einhorn.rb +0 -58
- /data/{LICENSE → LICENSE.txt} +0 -0
data/lib/einhorn/command.rb
CHANGED
@@ -1,28 +1,25 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "pp"
|
2
|
+
require "set"
|
3
|
+
require "tmpdir"
|
4
4
|
|
5
|
-
require
|
5
|
+
require "einhorn/command/interface"
|
6
|
+
require "einhorn/prctl"
|
6
7
|
|
7
8
|
module Einhorn
|
8
9
|
module Command
|
9
10
|
def self.reap
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
19
|
-
rescue Errno::ECHILD
|
20
|
-
end
|
11
|
+
loop do
|
12
|
+
Einhorn.log_debug("Going to reap a child process")
|
13
|
+
pid = Process.wait(-1, Process::WNOHANG)
|
14
|
+
return unless pid
|
15
|
+
cleanup(pid)
|
16
|
+
Einhorn::Event.break_loop
|
17
|
+
end
|
18
|
+
rescue Errno::ECHILD
|
21
19
|
end
|
22
20
|
|
23
|
-
|
24
|
-
|
25
|
-
unless spec = Einhorn::State.children[pid]
|
21
|
+
def self.cleanup(pid)
|
22
|
+
unless (spec = Einhorn::State.children[pid])
|
26
23
|
Einhorn.log_error("Could not find any config for exited child #{pid.inspect}! This probably indicates a bug in Einhorn.")
|
27
24
|
return
|
28
25
|
end
|
@@ -32,7 +29,7 @@ module Einhorn
|
|
32
29
|
# Unacked worker
|
33
30
|
if spec[:type] == :worker && !spec[:acked]
|
34
31
|
Einhorn::State.consecutive_deaths_before_ack += 1
|
35
|
-
extra =
|
32
|
+
extra = " before it was ACKed"
|
36
33
|
else
|
37
34
|
extra = nil
|
38
35
|
end
|
@@ -47,6 +44,16 @@ module Einhorn
|
|
47
44
|
end
|
48
45
|
end
|
49
46
|
|
47
|
+
def self.register_ping(pid, request_id)
|
48
|
+
unless (spec = Einhorn::State.children[pid])
|
49
|
+
Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
|
50
|
+
return
|
51
|
+
end
|
52
|
+
|
53
|
+
spec[:pinged_at] = Time.now
|
54
|
+
spec[:pinged_request_id] = request_id
|
55
|
+
end
|
56
|
+
|
50
57
|
def self.register_manual_ack(pid)
|
51
58
|
ack_mode = Einhorn::State.ack_mode
|
52
59
|
unless ack_mode[:type] == :manual
|
@@ -75,7 +82,7 @@ module Einhorn
|
|
75
82
|
end
|
76
83
|
|
77
84
|
def self.register_ack(pid)
|
78
|
-
unless spec = Einhorn::State.children[pid]
|
85
|
+
unless (spec = Einhorn::State.children[pid])
|
79
86
|
Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
|
80
87
|
return
|
81
88
|
end
|
@@ -85,10 +92,8 @@ module Einhorn
|
|
85
92
|
return
|
86
93
|
end
|
87
94
|
|
88
|
-
if Einhorn::State.consecutive_deaths_before_ack > 0
|
89
|
-
|
90
|
-
else
|
91
|
-
extra = nil
|
95
|
+
extra = if Einhorn::State.consecutive_deaths_before_ack > 0
|
96
|
+
", breaking the streak of #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked workers dying"
|
92
97
|
end
|
93
98
|
Einhorn::State.consecutive_deaths_before_ack = 0
|
94
99
|
|
@@ -98,14 +103,14 @@ module Einhorn
|
|
98
103
|
Einhorn::Event.break_loop
|
99
104
|
end
|
100
105
|
|
101
|
-
def self.signal_all(signal, children=nil, record=true)
|
106
|
+
def self.signal_all(signal, children = nil, record = true)
|
102
107
|
children ||= Einhorn::WorkerPool.workers
|
103
|
-
|
104
108
|
signaled = {}
|
109
|
+
|
105
110
|
Einhorn.log_info("Sending #{signal} to #{children.inspect}", :upgrade)
|
106
111
|
|
107
112
|
children.each do |child|
|
108
|
-
unless spec = Einhorn::State.children[child]
|
113
|
+
unless (spec = Einhorn::State.children[child])
|
109
114
|
Einhorn.log_error("Trying to send #{signal} to dead child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.", :upgrade)
|
110
115
|
next
|
111
116
|
end
|
@@ -115,11 +120,13 @@ module Einhorn
|
|
115
120
|
Einhorn.log_error("Re-sending #{signal} to already-signaled child #{child.inspect}. It may be slow to spin down, or it may be swallowing #{signal}s.", :upgrade)
|
116
121
|
end
|
117
122
|
spec[:signaled].add(signal)
|
123
|
+
spec[:last_signaled_at] = Time.now
|
118
124
|
end
|
119
125
|
|
120
126
|
begin
|
121
127
|
Process.kill(signal, child)
|
122
128
|
rescue Errno::ESRCH
|
129
|
+
Einhorn.log_debug("Attempted to #{signal} child #{child.inspect} but the process does not exist", :upgrade)
|
123
130
|
else
|
124
131
|
signaled[child] = spec
|
125
132
|
end
|
@@ -129,7 +136,7 @@ module Einhorn
|
|
129
136
|
Einhorn::Event::Timer.open(Einhorn::State.signal_timeout) do
|
130
137
|
children.each do |child|
|
131
138
|
spec = Einhorn::State.children[child]
|
132
|
-
next unless spec # Process is already dead and removed by
|
139
|
+
next unless spec # Process is already dead and removed by cleanup
|
133
140
|
signaled_spec = signaled[child]
|
134
141
|
next unless signaled_spec # We got ESRCH when trying to signal
|
135
142
|
if spec[:spinup_time] != signaled_spec[:spinup_time]
|
@@ -139,15 +146,15 @@ module Einhorn
|
|
139
146
|
|
140
147
|
Einhorn.log_info("Child #{child.inspect} is still active after #{Einhorn::State.signal_timeout}s. Sending SIGKILL.")
|
141
148
|
begin
|
142
|
-
Process.kill(
|
149
|
+
Process.kill("KILL", child)
|
143
150
|
rescue Errno::ESRCH
|
144
151
|
end
|
145
|
-
spec[:signaled].add(
|
152
|
+
spec[:signaled].add("KILL")
|
146
153
|
end
|
147
154
|
end
|
148
|
-
end
|
149
155
|
|
150
|
-
|
156
|
+
Einhorn.log_info("Successfully sent #{signal}s to #{signaled.length} processes: #{signaled.keys}")
|
157
|
+
end
|
151
158
|
end
|
152
159
|
|
153
160
|
def self.increment
|
@@ -155,14 +162,14 @@ module Einhorn
|
|
155
162
|
old = Einhorn::State.config[:number]
|
156
163
|
new = (Einhorn::State.config[:number] += 1)
|
157
164
|
output = "Incrementing number of workers from #{old} -> #{new}"
|
158
|
-
|
165
|
+
warn(output)
|
159
166
|
output
|
160
167
|
end
|
161
168
|
|
162
169
|
def self.decrement
|
163
170
|
if Einhorn::State.config[:number] <= 1
|
164
171
|
output = "Can't decrease number of workers (already at #{Einhorn::State.config[:number]}). Run kill #{$$} if you really want to kill einhorn."
|
165
|
-
|
172
|
+
warn(output)
|
166
173
|
return output
|
167
174
|
end
|
168
175
|
|
@@ -170,7 +177,7 @@ module Einhorn
|
|
170
177
|
old = Einhorn::State.config[:number]
|
171
178
|
new = (Einhorn::State.config[:number] -= 1)
|
172
179
|
output = "Decrementing number of workers from #{old} -> #{new}"
|
173
|
-
|
180
|
+
warn(output)
|
174
181
|
output
|
175
182
|
end
|
176
183
|
|
@@ -183,12 +190,12 @@ module Einhorn
|
|
183
190
|
old = Einhorn::State.config[:number]
|
184
191
|
Einhorn::State.config[:number] = new
|
185
192
|
output = "Altering worker count, #{old} -> #{new}. Will "
|
186
|
-
if old < new
|
187
|
-
|
193
|
+
output << if old < new
|
194
|
+
"spin up additional workers."
|
188
195
|
else
|
189
|
-
|
196
|
+
"gracefully terminate workers."
|
190
197
|
end
|
191
|
-
|
198
|
+
warn(output)
|
192
199
|
output
|
193
200
|
end
|
194
201
|
|
@@ -199,8 +206,8 @@ module Einhorn
|
|
199
206
|
end
|
200
207
|
|
201
208
|
{
|
202
|
-
:
|
203
|
-
:
|
209
|
+
state: global_state,
|
210
|
+
persistent_descriptors: descriptor_state
|
204
211
|
}
|
205
212
|
end
|
206
213
|
|
@@ -245,8 +252,8 @@ module Einhorn
|
|
245
252
|
|
246
253
|
begin
|
247
254
|
Einhorn.initialize_reload_environment
|
248
|
-
respawn_commandline = Einhorn.upgrade_commandline([
|
249
|
-
respawn_commandline << {
|
255
|
+
respawn_commandline = Einhorn.upgrade_commandline(["--with-state-fd", read.fileno.to_s])
|
256
|
+
respawn_commandline << {close_others: false}
|
250
257
|
Einhorn.log_info("About to re-exec einhorn master as #{respawn_commandline.inspect}", :reload)
|
251
258
|
Einhorn::Compat.exec(*respawn_commandline)
|
252
259
|
rescue SystemCallError => e
|
@@ -263,30 +270,34 @@ module Einhorn
|
|
263
270
|
end
|
264
271
|
end
|
265
272
|
|
266
|
-
def self.spinup(cmd=nil)
|
273
|
+
def self.spinup(cmd = nil)
|
267
274
|
cmd ||= Einhorn::State.cmd
|
268
275
|
index = next_index
|
269
|
-
|
270
|
-
|
276
|
+
expected_ppid = Process.pid
|
277
|
+
pid = if Einhorn::State.preloaded
|
278
|
+
fork do
|
271
279
|
Einhorn::TransientState.whatami = :worker
|
272
280
|
prepare_child_process
|
273
281
|
|
274
|
-
Einhorn.log_info(
|
282
|
+
Einhorn.log_info("About to tear down Einhorn state and run einhorn_main")
|
275
283
|
Einhorn::Command::Interface.uninit
|
276
284
|
Einhorn::Event.close_all_for_worker
|
277
285
|
Einhorn.set_argv(cmd, true)
|
278
286
|
|
279
287
|
reseed_random
|
280
288
|
|
289
|
+
setup_parent_watch(expected_ppid)
|
290
|
+
|
281
291
|
prepare_child_environment(index)
|
282
292
|
einhorn_main
|
283
293
|
end
|
284
294
|
else
|
285
|
-
|
295
|
+
fork do
|
286
296
|
Einhorn::TransientState.whatami = :worker
|
287
297
|
prepare_child_process
|
288
298
|
|
289
299
|
Einhorn.log_info("About to exec #{cmd.inspect}")
|
300
|
+
Einhorn::Command::Interface.uninit
|
290
301
|
# Here's the only case where cloexec would help. Since we
|
291
302
|
# have to track and manually close FDs for other cases, we
|
292
303
|
# may as well just reuse close_all rather than also set
|
@@ -295,20 +306,23 @@ module Einhorn
|
|
295
306
|
# Note that Ruby 1.9's close_others option is useful here.
|
296
307
|
Einhorn::Event.close_all_for_worker
|
297
308
|
|
309
|
+
setup_parent_watch(expected_ppid)
|
310
|
+
|
298
311
|
prepare_child_environment(index)
|
299
|
-
Einhorn::Compat.exec(cmd[0], cmd[1..-1], :
|
312
|
+
Einhorn::Compat.exec(cmd[0], cmd[1..-1], close_others: false)
|
300
313
|
end
|
301
314
|
end
|
302
315
|
|
303
316
|
Einhorn.log_info("===> Launched #{pid} (index: #{index})", :upgrade)
|
304
317
|
Einhorn::State.last_spinup = Time.now
|
305
318
|
Einhorn::State.children[pid] = {
|
306
|
-
:
|
307
|
-
:
|
308
|
-
:
|
309
|
-
:
|
310
|
-
:
|
311
|
-
:
|
319
|
+
type: :worker,
|
320
|
+
version: Einhorn::State.version,
|
321
|
+
acked: false,
|
322
|
+
signaled: Set.new,
|
323
|
+
last_signaled_at: nil,
|
324
|
+
index: index,
|
325
|
+
spinup_time: Einhorn::State.last_spinup
|
312
326
|
}
|
313
327
|
|
314
328
|
# Set up whatever's needed for ACKing
|
@@ -317,6 +331,7 @@ module Einhorn
|
|
317
331
|
when :timer
|
318
332
|
Einhorn::Event::ACKTimer.open(ack_mode[:timeout], pid)
|
319
333
|
when :manual
|
334
|
+
# nothing to do
|
320
335
|
else
|
321
336
|
Einhorn.log_error("Unrecognized ACK mode #{type.inspect}")
|
322
337
|
end
|
@@ -324,24 +339,18 @@ module Einhorn
|
|
324
339
|
|
325
340
|
def self.prepare_child_environment(index)
|
326
341
|
# This is run from the child
|
327
|
-
ENV[
|
328
|
-
ENV[
|
342
|
+
ENV["EINHORN_MASTER_PID"] = Process.ppid.to_s
|
343
|
+
ENV["EINHORN_SOCK_PATH"] = Einhorn::Command::Interface.socket_path
|
329
344
|
if Einhorn::State.command_socket_as_fd
|
330
345
|
socket = UNIXSocket.open(Einhorn::Command::Interface.socket_path)
|
331
346
|
Einhorn::TransientState.socket_handles << socket
|
332
|
-
ENV[
|
347
|
+
ENV["EINHORN_SOCK_FD"] = socket.fileno.to_s
|
333
348
|
end
|
334
349
|
|
335
|
-
ENV[
|
336
|
-
Einhorn::State.bind_fds.each_with_index {|fd, i| ENV["EINHORN_FD_#{i}"] = fd.to_s}
|
337
|
-
|
338
|
-
ENV['EINHORN_CHILD_INDEX'] = index.to_s
|
350
|
+
ENV["EINHORN_FD_COUNT"] = Einhorn::State.bind_fds.length.to_s
|
351
|
+
Einhorn::State.bind_fds.each_with_index { |fd, i| ENV["EINHORN_FD_#{i}"] = fd.to_s }
|
339
352
|
|
340
|
-
|
341
|
-
# match Upstart's nominal internal support for space-separated
|
342
|
-
# FD lists, but nobody uses that in practice, and it makes
|
343
|
-
# finding individual FDs more difficult
|
344
|
-
ENV['EINHORN_FDS'] = Einhorn::State.bind_fds.map(&:to_s).join(' ')
|
353
|
+
ENV["EINHORN_CHILD_INDEX"] = index.to_s
|
345
354
|
end
|
346
355
|
|
347
356
|
# Reseed common ruby random number generators.
|
@@ -364,11 +373,11 @@ module Einhorn
|
|
364
373
|
|
365
374
|
# reseed OpenSSL::Random if it's loaded
|
366
375
|
if defined?(OpenSSL::Random)
|
367
|
-
if defined?(Random)
|
368
|
-
|
376
|
+
seed = if defined?(Random)
|
377
|
+
Random.new_seed
|
369
378
|
else
|
370
379
|
# Ruby 1.8
|
371
|
-
|
380
|
+
rand
|
372
381
|
end
|
373
382
|
OpenSSL::Random.seed(seed.to_s)
|
374
383
|
end
|
@@ -379,6 +388,24 @@ module Einhorn
|
|
379
388
|
Einhorn.renice_self
|
380
389
|
end
|
381
390
|
|
391
|
+
def self.setup_parent_watch(expected_ppid)
|
392
|
+
if Einhorn::State.kill_children_on_exit
|
393
|
+
begin
|
394
|
+
# NB: Having the USR2 signal handler set to terminate (the default) at
|
395
|
+
# this point is required. If it's set to a ruby handler, there are
|
396
|
+
# race conditions that could cause the worker to leak.
|
397
|
+
|
398
|
+
Einhorn::Prctl.set_pdeathsig("USR2")
|
399
|
+
if Process.ppid != expected_ppid
|
400
|
+
Einhorn.log_error("Parent process died before we set pdeathsig; cowardly refusing to exec child process.")
|
401
|
+
exit(1)
|
402
|
+
end
|
403
|
+
rescue NotImplementedError
|
404
|
+
# Unsupported OS; silently continue.
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
382
409
|
# @param options [Hash]
|
383
410
|
#
|
384
411
|
# @option options [Boolean] :smooth (false) Whether to perform a smooth or
|
@@ -387,18 +414,19 @@ module Einhorn
|
|
387
414
|
# upgrade, bring up all the new workers and don't cull any old workers
|
388
415
|
# until they're all up.
|
389
416
|
#
|
390
|
-
def self.full_upgrade(options={})
|
391
|
-
options = {:
|
417
|
+
def self.full_upgrade(options = {})
|
418
|
+
options = {smooth: false}.merge(options)
|
392
419
|
|
393
420
|
Einhorn::State.smooth_upgrade = options.fetch(:smooth)
|
394
421
|
reload_for_upgrade
|
395
422
|
end
|
396
423
|
|
397
424
|
def self.full_upgrade_smooth
|
398
|
-
full_upgrade(:
|
425
|
+
full_upgrade(smooth: true)
|
399
426
|
end
|
427
|
+
|
400
428
|
def self.full_upgrade_fleet
|
401
|
-
full_upgrade(:
|
429
|
+
full_upgrade(smooth: false)
|
402
430
|
end
|
403
431
|
|
404
432
|
def self.reload_for_upgrade
|
@@ -411,8 +439,8 @@ module Einhorn
|
|
411
439
|
Einhorn.log_info("Currently upgrading (#{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} ACKs; bumping version and starting over)...", :upgrade)
|
412
440
|
else
|
413
441
|
Einhorn::State.upgrading = true
|
414
|
-
u_type = Einhorn::State.smooth_upgrade ?
|
415
|
-
Einhorn.log_info("Starting #{u_type} upgrade from version"
|
442
|
+
u_type = Einhorn::State.smooth_upgrade ? "smooth" : "fleet"
|
443
|
+
Einhorn.log_info("Starting #{u_type} upgrade from version" \
|
416
444
|
" #{Einhorn::State.version}...", :upgrade)
|
417
445
|
end
|
418
446
|
|
@@ -459,10 +487,45 @@ module Einhorn
|
|
459
487
|
end
|
460
488
|
|
461
489
|
if unsignaled > target
|
462
|
-
excess = Einhorn::WorkerPool.unsignaled_modern_workers_with_priority[0...(unsignaled-target)]
|
490
|
+
excess = Einhorn::WorkerPool.unsignaled_modern_workers_with_priority[0...(unsignaled - target)]
|
463
491
|
Einhorn.log_info("Have too many workers at the current version, so killing off #{excess.length} of them.")
|
464
492
|
signal_all("USR2", excess)
|
465
493
|
end
|
494
|
+
|
495
|
+
# Ensure all signaled workers that have outlived signal_timeout get killed.
|
496
|
+
kill_expired_signaled_workers if Einhorn::State.signal_timeout
|
497
|
+
end
|
498
|
+
|
499
|
+
def self.kill_expired_signaled_workers
|
500
|
+
now = Time.now
|
501
|
+
children = Einhorn::State.children.select do |_, c|
|
502
|
+
# Only interested in USR2 signaled workers
|
503
|
+
next unless c[:signaled] && c[:signaled].length > 0
|
504
|
+
next unless c[:signaled].include?("USR2")
|
505
|
+
|
506
|
+
# Ignore processes that have received KILL since it can't be trapped.
|
507
|
+
next if c[:signaled].include?("KILL")
|
508
|
+
|
509
|
+
# Filter out those children that have not reached signal_timeout yet.
|
510
|
+
next unless c[:last_signaled_at]
|
511
|
+
expires_at = c[:last_signaled_at] + Einhorn::State.signal_timeout
|
512
|
+
next unless now >= expires_at
|
513
|
+
|
514
|
+
true
|
515
|
+
end
|
516
|
+
|
517
|
+
Einhorn.log_info("#{children.size} expired signaled workers found.") if children.size > 0
|
518
|
+
children.each do |pid, child|
|
519
|
+
Einhorn.log_info("Child #{pid.inspect} was signaled #{(child[:last_signaled_at] - now).abs.to_i}s ago. Sending SIGKILL as it is still active after #{Einhorn::State.signal_timeout}s timeout.", :upgrade)
|
520
|
+
begin
|
521
|
+
Process.kill("KILL", pid)
|
522
|
+
rescue Errno::ESRCH
|
523
|
+
Einhorn.log_debug("Attempted to SIGKILL child #{pid.inspect} but the process does not exist.")
|
524
|
+
end
|
525
|
+
|
526
|
+
child[:signaled].add("KILL")
|
527
|
+
child[:last_signaled_at] = Time.now
|
528
|
+
end
|
466
529
|
end
|
467
530
|
|
468
531
|
def self.stop_respawning
|
@@ -487,7 +550,7 @@ module Einhorn
|
|
487
550
|
return
|
488
551
|
end
|
489
552
|
Einhorn.log_info("Launching #{missing} new workers")
|
490
|
-
missing.times {spinup}
|
553
|
+
missing.times { spinup }
|
491
554
|
end
|
492
555
|
|
493
556
|
# Unbounded exponential backoff is not a thing: we run into problems if
|
@@ -496,10 +559,12 @@ module Einhorn
|
|
496
559
|
# don't wait until the heat death of the universe to spin up new capacity.
|
497
560
|
MAX_SPINUP_INTERVAL = 30.0
|
498
561
|
|
499
|
-
def self.replenish_gradually(max_unacked=nil)
|
562
|
+
def self.replenish_gradually(max_unacked = nil)
|
500
563
|
return if Einhorn::TransientState.has_outstanding_spinup_timer
|
501
564
|
return unless Einhorn::WorkerPool.missing_worker_count > 0
|
502
565
|
|
566
|
+
max_unacked ||= Einhorn::State.config[:max_unacked]
|
567
|
+
|
503
568
|
# default to spinning up at most NCPU workers at once
|
504
569
|
unless max_unacked
|
505
570
|
begin
|
@@ -517,16 +582,13 @@ module Einhorn
|
|
517
582
|
|
518
583
|
# Exponentially backoff automated spinup if we're just having
|
519
584
|
# things die before ACKing
|
520
|
-
spinup_interval = Einhorn::State.config[:seconds] * (1.5
|
585
|
+
spinup_interval = Einhorn::State.config[:seconds] * (1.5**Einhorn::State.consecutive_deaths_before_ack)
|
521
586
|
spinup_interval = [spinup_interval, MAX_SPINUP_INTERVAL].min
|
522
587
|
seconds_ago = (Time.now - Einhorn::State.last_spinup).to_f
|
523
588
|
|
524
589
|
if seconds_ago > spinup_interval
|
525
|
-
|
526
|
-
|
527
|
-
Einhorn.log_debug("There are #{unacked} unacked new workers, and max_unacked is #{max_unacked}, so not spinning up a new process")
|
528
|
-
else
|
529
|
-
msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process"
|
590
|
+
if trigger_spinup?(max_unacked)
|
591
|
+
msg = "Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so spinning up a new process."
|
530
592
|
|
531
593
|
if Einhorn::State.consecutive_deaths_before_ack > 0
|
532
594
|
Einhorn.log_info("#{msg} (there have been #{Einhorn::State.consecutive_deaths_before_ack} consecutive unacked worker deaths)", :upgrade)
|
@@ -537,7 +599,7 @@ module Einhorn
|
|
537
599
|
spinup
|
538
600
|
end
|
539
601
|
else
|
540
|
-
Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process")
|
602
|
+
Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}s, so not spinning up a new process.")
|
541
603
|
end
|
542
604
|
|
543
605
|
Einhorn::TransientState.has_outstanding_spinup_timer = true
|
@@ -547,18 +609,35 @@ module Einhorn
|
|
547
609
|
end
|
548
610
|
end
|
549
611
|
|
550
|
-
def self.quieter(log=true)
|
612
|
+
def self.quieter(log = true)
|
551
613
|
Einhorn::State.verbosity += 1 if Einhorn::State.verbosity < 2
|
552
614
|
output = "Verbosity set to #{Einhorn::State.verbosity}"
|
553
615
|
Einhorn.log_info(output) if log
|
554
616
|
output
|
555
617
|
end
|
556
618
|
|
557
|
-
def self.louder(log=true)
|
619
|
+
def self.louder(log = true)
|
558
620
|
Einhorn::State.verbosity -= 1 if Einhorn::State.verbosity > 0
|
559
621
|
output = "Verbosity set to #{Einhorn::State.verbosity}"
|
560
622
|
Einhorn.log_info(output) if log
|
561
623
|
output
|
562
624
|
end
|
625
|
+
|
626
|
+
def self.trigger_spinup?(max_unacked)
|
627
|
+
unacked = Einhorn::WorkerPool.unacked_unsignaled_modern_workers.length
|
628
|
+
if unacked >= max_unacked
|
629
|
+
Einhorn.log_info("There are #{unacked} unacked new workers, and max_unacked is #{max_unacked}, so not spinning up a new process.")
|
630
|
+
return false
|
631
|
+
elsif Einhorn::State.config[:max_upgrade_additional]
|
632
|
+
capacity_exceeded = (Einhorn::State.config[:number] + Einhorn::State.config[:max_upgrade_additional]) - Einhorn::WorkerPool.workers_with_state.length
|
633
|
+
if capacity_exceeded < 0
|
634
|
+
Einhorn.log_info("Over worker capacity by #{capacity_exceeded.abs} during upgrade, #{Einhorn::WorkerPool.modern_workers.length} new workers of #{Einhorn::WorkerPool.workers_with_state.length} total. Waiting for old workers to exit before spinning up a process.")
|
635
|
+
|
636
|
+
return false
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
true
|
641
|
+
end
|
563
642
|
end
|
564
643
|
end
|
data/lib/einhorn/compat.rb
CHANGED
@@ -11,10 +11,10 @@ module Einhorn
|
|
11
11
|
|
12
12
|
def self.cloexec!(fd, enable)
|
13
13
|
original = fd.fcntl(Fcntl::F_GETFD)
|
14
|
-
if enable
|
15
|
-
|
14
|
+
new = if enable
|
15
|
+
original | Fcntl::FD_CLOEXEC
|
16
16
|
else
|
17
|
-
|
17
|
+
original & (-Fcntl::FD_CLOEXEC - 1)
|
18
18
|
end
|
19
19
|
fd.fcntl(Fcntl::F_SETFD, new)
|
20
20
|
end
|
@@ -24,7 +24,7 @@ module Einhorn
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# Opts are ignored in Ruby 1.8
|
27
|
-
def self.exec(script, args, opts={})
|
27
|
+
def self.exec(script, args, opts = {})
|
28
28
|
cmd = [script, script]
|
29
29
|
begin
|
30
30
|
Kernel.exec(cmd, *(args + [opts]))
|
@@ -53,18 +53,18 @@ module Einhorn
|
|
53
53
|
|
54
54
|
# linux / friends
|
55
55
|
begin
|
56
|
-
return File.read(
|
56
|
+
return File.read("/proc/cpuinfo").scan(/^processor\s*:/).count
|
57
57
|
rescue Errno::ENOENT
|
58
58
|
end
|
59
59
|
|
60
60
|
# OS X
|
61
|
-
if RUBY_PLATFORM
|
61
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
62
62
|
return Integer(`sysctl -n hw.logicalcpu`)
|
63
63
|
end
|
64
64
|
|
65
65
|
# windows / friends
|
66
66
|
begin
|
67
|
-
require
|
67
|
+
require "win32ole"
|
68
68
|
rescue LoadError
|
69
69
|
else
|
70
70
|
wmi = WIN32OLE.connect("winmgmts://")
|
@@ -6,7 +6,7 @@ module Einhorn::Event
|
|
6
6
|
@@instance_counter = 0
|
7
7
|
|
8
8
|
def self.open(sock)
|
9
|
-
|
9
|
+
new(sock)
|
10
10
|
end
|
11
11
|
|
12
12
|
def initialize(sock)
|
@@ -40,24 +40,22 @@ module Einhorn::Event
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def notify_readable
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
process_read_buffer
|
60
|
-
end
|
43
|
+
loop do
|
44
|
+
return if @closed
|
45
|
+
chunk = @socket.read_nonblock(1024)
|
46
|
+
rescue Errno::EAGAIN
|
47
|
+
break
|
48
|
+
rescue EOFError, Errno::EPIPE, Errno::ECONNRESET
|
49
|
+
close
|
50
|
+
break
|
51
|
+
rescue => e
|
52
|
+
log_error("Caught unrecognized error while reading from socket: #{e} (#{e.class})")
|
53
|
+
close
|
54
|
+
break
|
55
|
+
else
|
56
|
+
log_debug("read #{chunk.length} bytes (#{chunk.inspect[0..20]})")
|
57
|
+
@read_buffer << chunk
|
58
|
+
process_read_buffer
|
61
59
|
end
|
62
60
|
end
|
63
61
|
|
@@ -72,19 +70,17 @@ module Einhorn::Event
|
|
72
70
|
end
|
73
71
|
|
74
72
|
def notify_writeable
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
@write_buffer = @write_buffer[written..-1]
|
87
|
-
end
|
73
|
+
return if @closed
|
74
|
+
written = @socket.write_nonblock(@write_buffer)
|
75
|
+
rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR
|
76
|
+
rescue Errno::EPIPE, Errno::ECONNRESET
|
77
|
+
close
|
78
|
+
rescue => e
|
79
|
+
log_error("Caught unrecognized error while writing to socket: #{e} (#{e.class})")
|
80
|
+
close
|
81
|
+
else
|
82
|
+
log_debug("wrote #{written} bytes")
|
83
|
+
@write_buffer = @write_buffer[written..-1]
|
88
84
|
end
|
89
85
|
|
90
86
|
def to_io
|
@@ -102,9 +98,9 @@ module Einhorn::Event
|
|
102
98
|
end
|
103
99
|
|
104
100
|
def process_read_buffer
|
105
|
-
|
101
|
+
loop do
|
106
102
|
if @read_buffer.length > 0
|
107
|
-
break unless split = parse_record
|
103
|
+
break unless (split = parse_record)
|
108
104
|
record, remainder = split
|
109
105
|
log_debug("Read a record of #{record.length} bytes.")
|
110
106
|
@read_buffer = remainder
|
@@ -117,7 +113,7 @@ module Einhorn::Event
|
|
117
113
|
|
118
114
|
# Override in subclass. This lets you do streaming reads.
|
119
115
|
def parse_record
|
120
|
-
[@read_buffer,
|
116
|
+
[@read_buffer, ""]
|
121
117
|
end
|
122
118
|
|
123
119
|
def consume_record(record)
|