einhorn 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,336 @@
1
+ require 'pp'
2
+ require 'set'
3
+ require 'tmpdir'
4
+ require 'json'
5
+
6
+ require 'einhorn/command/interface'
7
+
8
+ module Einhorn
9
+ module Command
10
+ def self.reap
11
+ begin
12
+ while true
13
+ Einhorn.log_debug('Going to reap a child process')
14
+
15
+ pid = Process.wait(-1, Process::WNOHANG)
16
+ return unless pid
17
+ mourn(pid)
18
+ Einhorn::Event.break_loop
19
+ end
20
+ rescue Errno::ECHILD
21
+ end
22
+ end
23
+
24
+ # Mourn the death of your child
25
+ def self.mourn(pid)
26
+ unless spec = Einhorn::State.children[pid]
27
+ Einhorn.log_error("Could not find any config for exited child #{pid.inspect}! This probably indicates a bug in Einhorn.")
28
+ return
29
+ end
30
+
31
+ Einhorn::State.children.delete(pid)
32
+
33
+ case type = spec[:type]
34
+ when :worker
35
+ Einhorn.log_info("===> Exited worker #{pid.inspect}")
36
+ when :state_passer
37
+ Einhorn.log_debug("===> Exited state passing process #{pid.inspect}")
38
+ else
39
+ Einhorn.log_error("===> Exited process #{pid.inspect} has unrecgonized type #{type.inspect}: #{spec.inspect}")
40
+ end
41
+ end
42
+
43
+ def self.register_manual_ack(pid)
44
+ ack_mode = Einhorn::State.ack_mode
45
+ unless ack_mode[:type] == :manual
46
+ Einhorn.log_error("Received a manual ACK for #{pid.inspect}, but ack_mode is #{ack_mode.inspect}. Ignoring ACK.")
47
+ return
48
+ end
49
+ Einhorn.log_info("Received a manual ACK from #{pid.inspect}")
50
+ register_ack(pid)
51
+ end
52
+
53
+ def self.register_timer_ack(time, pid)
54
+ ack_mode = Einhorn::State.ack_mode
55
+ unless ack_mode[:type] == :timer
56
+ Einhorn.log_error("Received a timer ACK for #{pid.inspect}, but ack_mode is #{ack_mode.inspect}. Ignoring ACK.")
57
+ return
58
+ end
59
+
60
+ unless Einhorn::State.children[pid]
61
+ # TODO: Maybe cancel pending ACK timers upon death?
62
+ Einhorn.log_debug("Worker #{pid.inspect} died before its timer ACK happened.")
63
+ return
64
+ end
65
+
66
+ Einhorn.log_info("Worker #{pid.inspect} has been up for #{time}s, so we are considering it alive.")
67
+ register_ack(pid)
68
+ end
69
+
70
+ def self.register_ack(pid)
71
+ unless spec = Einhorn::State.children[pid]
72
+ Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
73
+ return
74
+ end
75
+
76
+ if spec[:acked]
77
+ Einhorn.log_error("Pid #{pid.inspect} already ACKed; ignoring new ACK.")
78
+ return
79
+ end
80
+
81
+ spec[:acked] = true
82
+ Einhorn.log_info("Up to #{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} #{Einhorn::State.ack_mode[:type]} ACKs")
83
+ # Could call cull here directly instead, I believe.
84
+ Einhorn::Event.break_loop
85
+ end
86
+
87
+ def self.signal_all(signal, children)
88
+ Einhorn.log_info("Sending #{signal} to #{children.inspect}")
89
+
90
+ children.each do |child|
91
+ unless spec = Einhorn::State.children[child]
92
+ Einhorn.log_error("Trying to send #{signal} to dead child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.")
93
+ next
94
+ end
95
+
96
+ if spec[:signaled].include?(child)
97
+ Einhorn.log_error("Not sending #{signal} to already-signaled child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.")
98
+ next
99
+ end
100
+ spec[:signaled].add(child)
101
+
102
+ begin
103
+ Process.kill(signal, child)
104
+ rescue Errno::ESRCH
105
+ end
106
+ end
107
+ end
108
+
109
+ def self.increment
110
+ Einhorn::Event.break_loop
111
+ old = Einhorn::State.config[:number]
112
+ new = (Einhorn::State.config[:number] += 1)
113
+ output = "Incrementing number of workers from #{old} -> #{new}"
114
+ $stderr.puts(output)
115
+ output
116
+ end
117
+
118
+ def self.decrement
119
+ if Einhorn::State.config[:number] <= 1
120
+ output = "Can't decrease number of workers (already at #{Einhorn::State.config[:number]}). Run kill #{$$} if you really want to kill einhorn."
121
+ $stderr.puts output
122
+ return output
123
+ end
124
+
125
+ Einhorn::Event.break_loop
126
+ old = Einhorn::State.config[:number]
127
+ new = (Einhorn::State.config[:number] -= 1)
128
+ output = "Decrementing number of workers from #{old} -> #{new}"
129
+ $stderr.puts(output)
130
+ output
131
+ end
132
+
133
+ def self.dumpable_state
134
+ global_state = Einhorn::State.state
135
+ descriptor_state = Einhorn::Event.persistent_descriptors.map do |descriptor|
136
+ descriptor.to_state
137
+ end
138
+
139
+ {
140
+ :state => global_state,
141
+ :persistent_descriptors => descriptor_state
142
+ }
143
+ end
144
+
145
+ def self.reload
146
+ Einhorn.log_info("Reloading einhorn (#{Einhorn::TransientState.script_name})...")
147
+
148
+ # In case there's anything lurking
149
+ $stdout.flush
150
+
151
+ # Spawn a child to pass the state through the pipe
152
+ read, write = IO.pipe
153
+ fork do
154
+ Einhorn::TransientState.whatami = :state_passer
155
+ Einhorn::State.generation += 1
156
+ Einhorn::State.children[$$] = {
157
+ :type => :state_passer
158
+ }
159
+ read.close
160
+
161
+ write.write(YAML.dump(dumpable_state))
162
+ write.close
163
+
164
+ exit(0)
165
+ end
166
+ write.close
167
+
168
+ Einhorn::Event.uninit
169
+
170
+ exec [Einhorn::TransientState.script_name, Einhorn::TransientState.script_name], *(['--with-state-fd', read.fileno.to_s, '--'] + Einhorn::State.cmd)
171
+ end
172
+
173
+ def self.spinup(cmd=nil)
174
+ cmd ||= Einhorn::State.cmd
175
+ if Einhorn::TransientState.preloaded
176
+ pid = fork do
177
+ Einhorn::TransientState.whatami = :worker
178
+
179
+ Einhorn.log_info('About to tear down Einhorn state and run einhorn_main')
180
+ Einhorn::Command::Interface.uninit
181
+ Einhorn::Event.close_all_for_worker
182
+ Einhorn.set_argv(cmd, true)
183
+
184
+ pass_command_socket_info
185
+ einhorn_main
186
+ end
187
+ else
188
+ pid = fork do
189
+ Einhorn::TransientState.whatami = :worker
190
+
191
+ Einhorn.log_info("About to exec #{cmd.inspect}")
192
+ # Here's the only case where cloexec would help. Since we
193
+ # have to track and manually close FDs for other cases, we
194
+ # may as well just reuse close_all rather than also set
195
+ # cloexec on everything.
196
+ Einhorn::Event.close_all_for_worker
197
+
198
+ pass_command_socket_info
199
+ exec [cmd[0], cmd[0]], *cmd[1..-1]
200
+ end
201
+ end
202
+
203
+ Einhorn.log_info("===> Launched #{pid}")
204
+ Einhorn::State.children[pid] = {
205
+ :type => :worker,
206
+ :version => Einhorn::State.version,
207
+ :acked => false,
208
+ :signaled => Set.new
209
+ }
210
+ Einhorn::State.last_spinup = Time.now
211
+
212
+ # Set up whatever's needed for ACKing
213
+ ack_mode = Einhorn::State.ack_mode
214
+ case type = ack_mode[:type]
215
+ when :timer
216
+ Einhorn::Event::ACKTimer.open(ack_mode[:timeout], pid)
217
+ when :manual
218
+ else
219
+ Einhorn.log_error("Unrecognized ACK mode #{type.inspect}")
220
+ end
221
+ end
222
+
223
+ def self.pass_command_socket_info
224
+ # This is run from the child
225
+ ENV['EINHORN_MASTER_PID'] = Process.ppid.to_s
226
+ ENV['EINHORN_SOCK_PATH'] = Einhorn::Command::Interface.socket_path
227
+ if Einhorn::State.command_socket_as_fd
228
+ socket = UNIXSocket.open(Einhorn::Command::Interface.socket_path)
229
+ Einhorn::TransientState.socket_handles << socket
230
+ ENV['EINHORN_FD'] = socket.fileno.to_s
231
+ end
232
+ end
233
+
234
+ def self.full_upgrade
235
+ if Einhorn::State.path && !Einhorn::State.reloading_for_preload_upgrade
236
+ reload_for_preload_upgrade
237
+ else
238
+ upgrade_workers
239
+ end
240
+ end
241
+
242
+ def self.reload_for_preload_upgrade
243
+ Einhorn::State.reloading_for_preload_upgrade = true
244
+ reload
245
+ end
246
+
247
+ def self.upgrade_workers
248
+ if Einhorn::State.upgrading
249
+ Einhorn.log_info("Currently upgrading (#{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} ACKs; bumping version and starting over)...")
250
+ else
251
+ Einhorn::State.upgrading = true
252
+ Einhorn.log_info("Starting upgrade to #{Einhorn::State.version}...")
253
+ end
254
+
255
+ Einhorn::State.version += 1
256
+ replenish_immediately
257
+ end
258
+
259
+ def self.cull
260
+ acked = Einhorn::WorkerPool.ack_count
261
+ target = Einhorn::WorkerPool.ack_target
262
+
263
+ if Einhorn::State.upgrading && acked >= target
264
+ Einhorn::State.upgrading = false
265
+ Einhorn.log_info("Upgrade to version #{Einhorn::State.version} complete.")
266
+ end
267
+
268
+ old_workers = Einhorn::WorkerPool.old_workers
269
+ if !Einhorn::State.upgrading && old_workers.length > 0
270
+ Einhorn.log_info("Killing off #{old_workers.length} old workers.")
271
+ signal_all("USR2", old_workers)
272
+ end
273
+
274
+ if acked > target
275
+ excess = Einhorn::WorkerPool.acked_unsignaled_modern_workers[0...(acked-target)]
276
+ Einhorn.log_info("Have too many workers at the current version, so killing off #{excess.length} of them.")
277
+ signal_all("USR2", excess)
278
+ end
279
+ end
280
+
281
+ def self.replenish
282
+ return unless Einhorn::State.respawn
283
+
284
+ if !Einhorn::State.last_spinup
285
+ replenish_immediately
286
+ else
287
+ replenish_gradually
288
+ end
289
+ end
290
+
291
+ def self.replenish_immediately
292
+ missing = Einhorn::WorkerPool.missing_worker_count
293
+ if missing <= 0
294
+ Einhorn.log_error("Missing is currently #{missing.inspect}, but should always be > 0 when replenish_immediately is called. This probably indicates a bug in Einhorn.")
295
+ return
296
+ end
297
+ Einhorn.log_info("Launching #{missing} new workers")
298
+ missing.times {spinup}
299
+ end
300
+
301
+ def self.replenish_gradually
302
+ return if Einhorn::TransientState.has_outstanding_spinup_timer
303
+ return unless Einhorn::WorkerPool.missing_worker_count > 0
304
+
305
+ spinup_interval = Einhorn::State.config[:seconds]
306
+ seconds_ago = (Time.now - Einhorn::State.last_spinup).to_f
307
+
308
+ if seconds_ago > spinup_interval
309
+ Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}, so spinning up a new process")
310
+ spinup
311
+ else
312
+ Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}, so not spinning up a new process")
313
+ end
314
+
315
+ Einhorn::TransientState.has_outstanding_spinup_timer = true
316
+ Einhorn::Event::Timer.open(spinup_interval) do
317
+ Einhorn::TransientState.has_outstanding_spinup_timer = false
318
+ replenish
319
+ end
320
+ end
321
+
322
+ def self.quieter(log=true)
323
+ Einhorn::State.verbosity += 1 if Einhorn::State.verbosity < 2
324
+ output = "Verbosity set to #{Einhorn::State.verbosity}"
325
+ Einhorn.log_info(output) if log
326
+ output
327
+ end
328
+
329
+ def self.louder(log=true)
330
+ Einhorn::State.verbosity -= 1 if Einhorn::State.verbosity > 0
331
+ output = "Verbosity set to #{Einhorn::State.verbosity}"
332
+ Einhorn.log_info(output) if log
333
+ output
334
+ end
335
+ end
336
+ end
@@ -0,0 +1,132 @@
1
+ module Einhorn::Event
2
+ class AbstractTextDescriptor
3
+ attr_accessor :read_buffer, :write_buffer
4
+ attr_reader :client_id
5
+
6
+ @@instance_counter = 0
7
+
8
+ def self.open(sock)
9
+ self.new(sock)
10
+ end
11
+
12
+ def initialize(sock)
13
+ @@instance_counter += 1
14
+
15
+ @socket = sock
16
+ @client_id = "#{@@instance_counter}:#{sock.fileno}"
17
+
18
+ @read_buffer = ""
19
+ @write_buffer = ""
20
+
21
+ @closed = false
22
+
23
+ register!
24
+ end
25
+
26
+ def close
27
+ @closed = true
28
+ deregister!
29
+ @socket.close
30
+ end
31
+
32
+ # API method
33
+ def read(&blk)
34
+ raise "Already registered a read block" if @read_blk
35
+ raise "No block provided" unless blk
36
+ raise "Must provide a block that accepts two arguments" unless blk.arity == 2
37
+
38
+ @read_blk = blk
39
+ notify_readable # Read what you can
40
+ end
41
+
42
+ def notify_readable
43
+ while true
44
+ begin
45
+ return if @closed
46
+ chunk = @socket.read_nonblock(1024)
47
+ rescue Errno::EAGAIN
48
+ break
49
+ rescue EOFError, Errno::EPIPE
50
+ close
51
+ break
52
+ else
53
+ log_debug("read #{chunk.length} bytes (#{chunk.inspect[0..20]})")
54
+ @read_buffer << chunk
55
+ process_read_buffer
56
+ end
57
+ end
58
+ end
59
+
60
+ # API method
61
+ def write(data)
62
+ @write_buffer << data
63
+ notify_writeable # Write what you can
64
+ end
65
+
66
+ def write_pending?
67
+ @write_buffer.length > 0
68
+ end
69
+
70
+ def notify_writeable
71
+ begin
72
+ return if @closed
73
+ written = @socket.write_nonblock(@write_buffer)
74
+ rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR
75
+ rescue Errno::EPIPE
76
+ close
77
+ else
78
+ log_debug("wrote #{written} bytes")
79
+ @write_buffer = @write_buffer[written..-1]
80
+ end
81
+ end
82
+
83
+ def to_io
84
+ @socket
85
+ end
86
+
87
+ def register!
88
+ Einhorn::Event.register_readable(self)
89
+ Einhorn::Event.register_writeable(self)
90
+ end
91
+
92
+ def deregister!
93
+ Einhorn::Event.deregister_readable(self)
94
+ Einhorn::Event.deregister_writeable(self)
95
+ end
96
+
97
+ def process_read_buffer
98
+ while true
99
+ if @read_buffer.length > 0
100
+ break unless split = parse_record
101
+ record, remainder = split
102
+ log_debug("Read a record of #{record.length} bytes.")
103
+ @read_buffer = remainder
104
+ consume_record(record)
105
+ else
106
+ break
107
+ end
108
+ end
109
+ end
110
+
111
+ # Override in subclass. This lets you do streaming reads.
112
+ def parse_record
113
+ [@read_buffer, '']
114
+ end
115
+
116
+ def consume_record(record)
117
+ raise NotImplementedError.new
118
+ end
119
+
120
+ def log_debug(msg)
121
+ Einhorn.log_debug("[client #{client_id}] #{msg}")
122
+ end
123
+
124
+ def log_info(msg)
125
+ Einhorn.log_info("[client #{client_id}] #{msg}")
126
+ end
127
+
128
+ def log_error(msg)
129
+ Einhorn.log_error("[client #{client_id}] #{msg}")
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,20 @@
1
+ module Einhorn::Event
2
+ class ACKTimer < Timer
3
+ include Persistent
4
+
5
+ def initialize(time, pid, start=nil)
6
+ super(time, start) do
7
+ Einhorn::Command.register_timer_ack(time, pid)
8
+ end
9
+ @pid = pid
10
+ end
11
+
12
+ def to_state
13
+ {:class => self.class.to_s, :time => @time, :start => @start, :pid => @pid}
14
+ end
15
+
16
+ def self.from_state(state)
17
+ self.open(state[:time], state[:pid], state[:start])
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,58 @@
1
+ module Einhorn::Event
2
+ class CommandServer
3
+ include Persistent
4
+
5
+ def self.open(server)
6
+ self.new(server)
7
+ end
8
+
9
+ def initialize(server)
10
+ @server = server
11
+
12
+ @closed = false
13
+
14
+ register!
15
+ end
16
+
17
+ def notify_readable
18
+ begin
19
+ while true
20
+ return if @closed
21
+ sock = @server.accept_nonblock
22
+ Connection.open(sock)
23
+ end
24
+ rescue Errno::EAGAIN
25
+ end
26
+ end
27
+
28
+ def to_io
29
+ @server
30
+ end
31
+
32
+ def to_state
33
+ {:class => self.class.to_s, :server => @server.fileno}
34
+ end
35
+
36
+ def self.from_state(state)
37
+ fd = state[:server]
38
+ socket = UNIXServer.for_fd(fd)
39
+ self.open(socket)
40
+ end
41
+
42
+ def close
43
+ @closed = true
44
+ deregister!
45
+ @server.close
46
+ end
47
+
48
+ def register!
49
+ Einhorn::Command::Interface.command_server = self
50
+ Einhorn::Event.register_readable(self)
51
+ end
52
+
53
+ def deregister!
54
+ Einhorn::Command::Interface.command_server = nil
55
+ Einhorn::Event.deregister_readable(self)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,45 @@
1
+ module Einhorn::Event
2
+ class Connection < AbstractTextDescriptor
3
+ include Persistent
4
+
5
+ def parse_record
6
+ split = @read_buffer.split("\n", 2)
7
+ if split.length > 1
8
+ split
9
+ else
10
+ nil
11
+ end
12
+ end
13
+
14
+ def consume_record(command)
15
+ Einhorn::Command::Interface.process_command(self, command)
16
+ end
17
+
18
+ def to_state
19
+ state = {:class => self.class.to_s, :socket => @socket.fileno}
20
+ # Don't include by default because it's not that pretty
21
+ state[:read_buffer] = @read_buffer if @read_buffer.length > 0
22
+ state[:write_buffer] = @write_buffer if @write_buffer.length > 0
23
+ state
24
+ end
25
+
26
+ def self.from_state(state)
27
+ fd = state[:socket]
28
+ socket = Socket.for_fd(fd)
29
+ conn = self.open(socket)
30
+ conn.read_buffer = state[:read_buffer] if state[:read_buffer]
31
+ conn.write_buffer = state[:write_buffer] if state[:write_buffer]
32
+ conn
33
+ end
34
+
35
+ def register!
36
+ log_info("client connected")
37
+ super
38
+ end
39
+
40
+ def deregister!
41
+ log_info("client disconnected") if Einhorn::TransientState.whatami == :master
42
+ super
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,6 @@
1
+ # TODO: set lots of cloexecs
2
+ module Einhorn::Event
3
+ class LoopBreaker < AbstractTextDescriptor
4
+ def consume_record(record); end
5
+ end
6
+ end
@@ -0,0 +1,23 @@
1
+ module Einhorn::Event
2
+ module Persistent
3
+ @@persistent = {}
4
+
5
+ def self.included(other)
6
+ @@persistent[other.to_s] = other
7
+ end
8
+
9
+ def self.from_state(state)
10
+ klass_name = state[:class]
11
+ if klass = @@persistent[klass_name]
12
+ klass.from_state(state)
13
+ else
14
+ Einhorn.log_error("Unrecognized persistent descriptor class #{klass_name.inspect}. Ignoring. This most likely indicates that your Einhorn version has upgraded. Everything should still be working, but it may be worth a restart.")
15
+ nil
16
+ end
17
+ end
18
+
19
+ def self.persistent?(descriptor)
20
+ @@persistent.values.any? {|klass| descriptor.kind_of?(klass)}
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,39 @@
1
+ module Einhorn::Event
2
+ class Timer
3
+ attr_reader :time
4
+
5
+ def initialize(time, start=nil, &blk)
6
+ @time = time
7
+ @start = start || Time.now
8
+ @blk = blk
9
+ end
10
+
11
+ # TODO: abstract into some interface
12
+ def self.open(*args, &blk)
13
+ instance = self.new(*args, &blk)
14
+ instance.register!
15
+ instance
16
+ end
17
+
18
+ def expires_at
19
+ @start + @time
20
+ end
21
+
22
+ def ring!
23
+ now = Time.now
24
+ Einhorn.log_debug("Ringing timer that was scheduled #{now - @start}s ago and expired #{now - expires_at}s ago")
25
+ deregister!
26
+ @blk.call
27
+ end
28
+
29
+ def register!
30
+ Einhorn.log_debug("Scheduling a new #{self.time}s timer")
31
+ Einhorn::Event.register_timer(self)
32
+ end
33
+
34
+ def deregister!
35
+ Einhorn.log_debug("Nuking timer that expired #{Time.now - self.expires_at}s ago")
36
+ Einhorn::Event.deregister_timer(self)
37
+ end
38
+ end
39
+ end