einhorn 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,336 @@
1
+ require 'pp'
2
+ require 'set'
3
+ require 'tmpdir'
4
+ require 'json'
5
+
6
+ require 'einhorn/command/interface'
7
+
8
+ module Einhorn
9
+ module Command
10
+ def self.reap
11
+ begin
12
+ while true
13
+ Einhorn.log_debug('Going to reap a child process')
14
+
15
+ pid = Process.wait(-1, Process::WNOHANG)
16
+ return unless pid
17
+ mourn(pid)
18
+ Einhorn::Event.break_loop
19
+ end
20
+ rescue Errno::ECHILD
21
+ end
22
+ end
23
+
24
+ # Mourn the death of your child
25
+ def self.mourn(pid)
26
+ unless spec = Einhorn::State.children[pid]
27
+ Einhorn.log_error("Could not find any config for exited child #{pid.inspect}! This probably indicates a bug in Einhorn.")
28
+ return
29
+ end
30
+
31
+ Einhorn::State.children.delete(pid)
32
+
33
+ case type = spec[:type]
34
+ when :worker
35
+ Einhorn.log_info("===> Exited worker #{pid.inspect}")
36
+ when :state_passer
37
+ Einhorn.log_debug("===> Exited state passing process #{pid.inspect}")
38
+ else
39
+ Einhorn.log_error("===> Exited process #{pid.inspect} has unrecgonized type #{type.inspect}: #{spec.inspect}")
40
+ end
41
+ end
42
+
43
+ def self.register_manual_ack(pid)
44
+ ack_mode = Einhorn::State.ack_mode
45
+ unless ack_mode[:type] == :manual
46
+ Einhorn.log_error("Received a manual ACK for #{pid.inspect}, but ack_mode is #{ack_mode.inspect}. Ignoring ACK.")
47
+ return
48
+ end
49
+ Einhorn.log_info("Received a manual ACK from #{pid.inspect}")
50
+ register_ack(pid)
51
+ end
52
+
53
+ def self.register_timer_ack(time, pid)
54
+ ack_mode = Einhorn::State.ack_mode
55
+ unless ack_mode[:type] == :timer
56
+ Einhorn.log_error("Received a timer ACK for #{pid.inspect}, but ack_mode is #{ack_mode.inspect}. Ignoring ACK.")
57
+ return
58
+ end
59
+
60
+ unless Einhorn::State.children[pid]
61
+ # TODO: Maybe cancel pending ACK timers upon death?
62
+ Einhorn.log_debug("Worker #{pid.inspect} died before its timer ACK happened.")
63
+ return
64
+ end
65
+
66
+ Einhorn.log_info("Worker #{pid.inspect} has been up for #{time}s, so we are considering it alive.")
67
+ register_ack(pid)
68
+ end
69
+
70
+ def self.register_ack(pid)
71
+ unless spec = Einhorn::State.children[pid]
72
+ Einhorn.log_error("Could not find state for PID #{pid.inspect}; ignoring ACK.")
73
+ return
74
+ end
75
+
76
+ if spec[:acked]
77
+ Einhorn.log_error("Pid #{pid.inspect} already ACKed; ignoring new ACK.")
78
+ return
79
+ end
80
+
81
+ spec[:acked] = true
82
+ Einhorn.log_info("Up to #{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} #{Einhorn::State.ack_mode[:type]} ACKs")
83
+ # Could call cull here directly instead, I believe.
84
+ Einhorn::Event.break_loop
85
+ end
86
+
87
+ def self.signal_all(signal, children)
88
+ Einhorn.log_info("Sending #{signal} to #{children.inspect}")
89
+
90
+ children.each do |child|
91
+ unless spec = Einhorn::State.children[child]
92
+ Einhorn.log_error("Trying to send #{signal} to dead child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.")
93
+ next
94
+ end
95
+
96
+ if spec[:signaled].include?(child)
97
+ Einhorn.log_error("Not sending #{signal} to already-signaled child #{child.inspect}. The fact we tried this probably indicates a bug in Einhorn.")
98
+ next
99
+ end
100
+ spec[:signaled].add(child)
101
+
102
+ begin
103
+ Process.kill(signal, child)
104
+ rescue Errno::ESRCH
105
+ end
106
+ end
107
+ end
108
+
109
+ def self.increment
110
+ Einhorn::Event.break_loop
111
+ old = Einhorn::State.config[:number]
112
+ new = (Einhorn::State.config[:number] += 1)
113
+ output = "Incrementing number of workers from #{old} -> #{new}"
114
+ $stderr.puts(output)
115
+ output
116
+ end
117
+
118
+ def self.decrement
119
+ if Einhorn::State.config[:number] <= 1
120
+ output = "Can't decrease number of workers (already at #{Einhorn::State.config[:number]}). Run kill #{$$} if you really want to kill einhorn."
121
+ $stderr.puts output
122
+ return output
123
+ end
124
+
125
+ Einhorn::Event.break_loop
126
+ old = Einhorn::State.config[:number]
127
+ new = (Einhorn::State.config[:number] -= 1)
128
+ output = "Decrementing number of workers from #{old} -> #{new}"
129
+ $stderr.puts(output)
130
+ output
131
+ end
132
+
133
+ def self.dumpable_state
134
+ global_state = Einhorn::State.state
135
+ descriptor_state = Einhorn::Event.persistent_descriptors.map do |descriptor|
136
+ descriptor.to_state
137
+ end
138
+
139
+ {
140
+ :state => global_state,
141
+ :persistent_descriptors => descriptor_state
142
+ }
143
+ end
144
+
145
+ def self.reload
146
+ Einhorn.log_info("Reloading einhorn (#{Einhorn::TransientState.script_name})...")
147
+
148
+ # In case there's anything lurking
149
+ $stdout.flush
150
+
151
+ # Spawn a child to pass the state through the pipe
152
+ read, write = IO.pipe
153
+ fork do
154
+ Einhorn::TransientState.whatami = :state_passer
155
+ Einhorn::State.generation += 1
156
+ Einhorn::State.children[$$] = {
157
+ :type => :state_passer
158
+ }
159
+ read.close
160
+
161
+ write.write(YAML.dump(dumpable_state))
162
+ write.close
163
+
164
+ exit(0)
165
+ end
166
+ write.close
167
+
168
+ Einhorn::Event.uninit
169
+
170
+ exec [Einhorn::TransientState.script_name, Einhorn::TransientState.script_name], *(['--with-state-fd', read.fileno.to_s, '--'] + Einhorn::State.cmd)
171
+ end
172
+
173
+ def self.spinup(cmd=nil)
174
+ cmd ||= Einhorn::State.cmd
175
+ if Einhorn::TransientState.preloaded
176
+ pid = fork do
177
+ Einhorn::TransientState.whatami = :worker
178
+
179
+ Einhorn.log_info('About to tear down Einhorn state and run einhorn_main')
180
+ Einhorn::Command::Interface.uninit
181
+ Einhorn::Event.close_all_for_worker
182
+ Einhorn.set_argv(cmd, true)
183
+
184
+ pass_command_socket_info
185
+ einhorn_main
186
+ end
187
+ else
188
+ pid = fork do
189
+ Einhorn::TransientState.whatami = :worker
190
+
191
+ Einhorn.log_info("About to exec #{cmd.inspect}")
192
+ # Here's the only case where cloexec would help. Since we
193
+ # have to track and manually close FDs for other cases, we
194
+ # may as well just reuse close_all rather than also set
195
+ # cloexec on everything.
196
+ Einhorn::Event.close_all_for_worker
197
+
198
+ pass_command_socket_info
199
+ exec [cmd[0], cmd[0]], *cmd[1..-1]
200
+ end
201
+ end
202
+
203
+ Einhorn.log_info("===> Launched #{pid}")
204
+ Einhorn::State.children[pid] = {
205
+ :type => :worker,
206
+ :version => Einhorn::State.version,
207
+ :acked => false,
208
+ :signaled => Set.new
209
+ }
210
+ Einhorn::State.last_spinup = Time.now
211
+
212
+ # Set up whatever's needed for ACKing
213
+ ack_mode = Einhorn::State.ack_mode
214
+ case type = ack_mode[:type]
215
+ when :timer
216
+ Einhorn::Event::ACKTimer.open(ack_mode[:timeout], pid)
217
+ when :manual
218
+ else
219
+ Einhorn.log_error("Unrecognized ACK mode #{type.inspect}")
220
+ end
221
+ end
222
+
223
+ def self.pass_command_socket_info
224
+ # This is run from the child
225
+ ENV['EINHORN_MASTER_PID'] = Process.ppid.to_s
226
+ ENV['EINHORN_SOCK_PATH'] = Einhorn::Command::Interface.socket_path
227
+ if Einhorn::State.command_socket_as_fd
228
+ socket = UNIXSocket.open(Einhorn::Command::Interface.socket_path)
229
+ Einhorn::TransientState.socket_handles << socket
230
+ ENV['EINHORN_FD'] = socket.fileno.to_s
231
+ end
232
+ end
233
+
234
+ def self.full_upgrade
235
+ if Einhorn::State.path && !Einhorn::State.reloading_for_preload_upgrade
236
+ reload_for_preload_upgrade
237
+ else
238
+ upgrade_workers
239
+ end
240
+ end
241
+
242
+ def self.reload_for_preload_upgrade
243
+ Einhorn::State.reloading_for_preload_upgrade = true
244
+ reload
245
+ end
246
+
247
+ def self.upgrade_workers
248
+ if Einhorn::State.upgrading
249
+ Einhorn.log_info("Currently upgrading (#{Einhorn::WorkerPool.ack_count} / #{Einhorn::WorkerPool.ack_target} ACKs; bumping version and starting over)...")
250
+ else
251
+ Einhorn::State.upgrading = true
252
+ Einhorn.log_info("Starting upgrade to #{Einhorn::State.version}...")
253
+ end
254
+
255
+ Einhorn::State.version += 1
256
+ replenish_immediately
257
+ end
258
+
259
+ def self.cull
260
+ acked = Einhorn::WorkerPool.ack_count
261
+ target = Einhorn::WorkerPool.ack_target
262
+
263
+ if Einhorn::State.upgrading && acked >= target
264
+ Einhorn::State.upgrading = false
265
+ Einhorn.log_info("Upgrade to version #{Einhorn::State.version} complete.")
266
+ end
267
+
268
+ old_workers = Einhorn::WorkerPool.old_workers
269
+ if !Einhorn::State.upgrading && old_workers.length > 0
270
+ Einhorn.log_info("Killing off #{old_workers.length} old workers.")
271
+ signal_all("USR2", old_workers)
272
+ end
273
+
274
+ if acked > target
275
+ excess = Einhorn::WorkerPool.acked_unsignaled_modern_workers[0...(acked-target)]
276
+ Einhorn.log_info("Have too many workers at the current version, so killing off #{excess.length} of them.")
277
+ signal_all("USR2", excess)
278
+ end
279
+ end
280
+
281
+ def self.replenish
282
+ return unless Einhorn::State.respawn
283
+
284
+ if !Einhorn::State.last_spinup
285
+ replenish_immediately
286
+ else
287
+ replenish_gradually
288
+ end
289
+ end
290
+
291
+ def self.replenish_immediately
292
+ missing = Einhorn::WorkerPool.missing_worker_count
293
+ if missing <= 0
294
+ Einhorn.log_error("Missing is currently #{missing.inspect}, but should always be > 0 when replenish_immediately is called. This probably indicates a bug in Einhorn.")
295
+ return
296
+ end
297
+ Einhorn.log_info("Launching #{missing} new workers")
298
+ missing.times {spinup}
299
+ end
300
+
301
+ def self.replenish_gradually
302
+ return if Einhorn::TransientState.has_outstanding_spinup_timer
303
+ return unless Einhorn::WorkerPool.missing_worker_count > 0
304
+
305
+ spinup_interval = Einhorn::State.config[:seconds]
306
+ seconds_ago = (Time.now - Einhorn::State.last_spinup).to_f
307
+
308
+ if seconds_ago > spinup_interval
309
+ Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}, so spinning up a new process")
310
+ spinup
311
+ else
312
+ Einhorn.log_debug("Last spinup was #{seconds_ago}s ago, and spinup_interval is #{spinup_interval}, so not spinning up a new process")
313
+ end
314
+
315
+ Einhorn::TransientState.has_outstanding_spinup_timer = true
316
+ Einhorn::Event::Timer.open(spinup_interval) do
317
+ Einhorn::TransientState.has_outstanding_spinup_timer = false
318
+ replenish
319
+ end
320
+ end
321
+
322
+ def self.quieter(log=true)
323
+ Einhorn::State.verbosity += 1 if Einhorn::State.verbosity < 2
324
+ output = "Verbosity set to #{Einhorn::State.verbosity}"
325
+ Einhorn.log_info(output) if log
326
+ output
327
+ end
328
+
329
+ def self.louder(log=true)
330
+ Einhorn::State.verbosity -= 1 if Einhorn::State.verbosity > 0
331
+ output = "Verbosity set to #{Einhorn::State.verbosity}"
332
+ Einhorn.log_info(output) if log
333
+ output
334
+ end
335
+ end
336
+ end
@@ -0,0 +1,132 @@
1
+ module Einhorn::Event
2
+ class AbstractTextDescriptor
3
+ attr_accessor :read_buffer, :write_buffer
4
+ attr_reader :client_id
5
+
6
+ @@instance_counter = 0
7
+
8
+ def self.open(sock)
9
+ self.new(sock)
10
+ end
11
+
12
+ def initialize(sock)
13
+ @@instance_counter += 1
14
+
15
+ @socket = sock
16
+ @client_id = "#{@@instance_counter}:#{sock.fileno}"
17
+
18
+ @read_buffer = ""
19
+ @write_buffer = ""
20
+
21
+ @closed = false
22
+
23
+ register!
24
+ end
25
+
26
+ def close
27
+ @closed = true
28
+ deregister!
29
+ @socket.close
30
+ end
31
+
32
+ # API method
33
+ def read(&blk)
34
+ raise "Already registered a read block" if @read_blk
35
+ raise "No block provided" unless blk
36
+ raise "Must provide a block that accepts two arguments" unless blk.arity == 2
37
+
38
+ @read_blk = blk
39
+ notify_readable # Read what you can
40
+ end
41
+
42
+ def notify_readable
43
+ while true
44
+ begin
45
+ return if @closed
46
+ chunk = @socket.read_nonblock(1024)
47
+ rescue Errno::EAGAIN
48
+ break
49
+ rescue EOFError, Errno::EPIPE
50
+ close
51
+ break
52
+ else
53
+ log_debug("read #{chunk.length} bytes (#{chunk.inspect[0..20]})")
54
+ @read_buffer << chunk
55
+ process_read_buffer
56
+ end
57
+ end
58
+ end
59
+
60
+ # API method
61
+ def write(data)
62
+ @write_buffer << data
63
+ notify_writeable # Write what you can
64
+ end
65
+
66
+ def write_pending?
67
+ @write_buffer.length > 0
68
+ end
69
+
70
+ def notify_writeable
71
+ begin
72
+ return if @closed
73
+ written = @socket.write_nonblock(@write_buffer)
74
+ rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR
75
+ rescue Errno::EPIPE
76
+ close
77
+ else
78
+ log_debug("wrote #{written} bytes")
79
+ @write_buffer = @write_buffer[written..-1]
80
+ end
81
+ end
82
+
83
+ def to_io
84
+ @socket
85
+ end
86
+
87
+ def register!
88
+ Einhorn::Event.register_readable(self)
89
+ Einhorn::Event.register_writeable(self)
90
+ end
91
+
92
+ def deregister!
93
+ Einhorn::Event.deregister_readable(self)
94
+ Einhorn::Event.deregister_writeable(self)
95
+ end
96
+
97
+ def process_read_buffer
98
+ while true
99
+ if @read_buffer.length > 0
100
+ break unless split = parse_record
101
+ record, remainder = split
102
+ log_debug("Read a record of #{record.length} bytes.")
103
+ @read_buffer = remainder
104
+ consume_record(record)
105
+ else
106
+ break
107
+ end
108
+ end
109
+ end
110
+
111
+ # Override in subclass. This lets you do streaming reads.
112
+ def parse_record
113
+ [@read_buffer, '']
114
+ end
115
+
116
+ def consume_record(record)
117
+ raise NotImplementedError.new
118
+ end
119
+
120
+ def log_debug(msg)
121
+ Einhorn.log_debug("[client #{client_id}] #{msg}")
122
+ end
123
+
124
+ def log_info(msg)
125
+ Einhorn.log_info("[client #{client_id}] #{msg}")
126
+ end
127
+
128
+ def log_error(msg)
129
+ Einhorn.log_error("[client #{client_id}] #{msg}")
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,20 @@
1
+ module Einhorn::Event
2
+ class ACKTimer < Timer
3
+ include Persistent
4
+
5
+ def initialize(time, pid, start=nil)
6
+ super(time, start) do
7
+ Einhorn::Command.register_timer_ack(time, pid)
8
+ end
9
+ @pid = pid
10
+ end
11
+
12
+ def to_state
13
+ {:class => self.class.to_s, :time => @time, :start => @start, :pid => @pid}
14
+ end
15
+
16
+ def self.from_state(state)
17
+ self.open(state[:time], state[:pid], state[:start])
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,58 @@
1
+ module Einhorn::Event
2
+ class CommandServer
3
+ include Persistent
4
+
5
+ def self.open(server)
6
+ self.new(server)
7
+ end
8
+
9
+ def initialize(server)
10
+ @server = server
11
+
12
+ @closed = false
13
+
14
+ register!
15
+ end
16
+
17
+ def notify_readable
18
+ begin
19
+ while true
20
+ return if @closed
21
+ sock = @server.accept_nonblock
22
+ Connection.open(sock)
23
+ end
24
+ rescue Errno::EAGAIN
25
+ end
26
+ end
27
+
28
+ def to_io
29
+ @server
30
+ end
31
+
32
+ def to_state
33
+ {:class => self.class.to_s, :server => @server.fileno}
34
+ end
35
+
36
+ def self.from_state(state)
37
+ fd = state[:server]
38
+ socket = UNIXServer.for_fd(fd)
39
+ self.open(socket)
40
+ end
41
+
42
+ def close
43
+ @closed = true
44
+ deregister!
45
+ @server.close
46
+ end
47
+
48
+ def register!
49
+ Einhorn::Command::Interface.command_server = self
50
+ Einhorn::Event.register_readable(self)
51
+ end
52
+
53
+ def deregister!
54
+ Einhorn::Command::Interface.command_server = nil
55
+ Einhorn::Event.deregister_readable(self)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,45 @@
1
+ module Einhorn::Event
2
+ class Connection < AbstractTextDescriptor
3
+ include Persistent
4
+
5
+ def parse_record
6
+ split = @read_buffer.split("\n", 2)
7
+ if split.length > 1
8
+ split
9
+ else
10
+ nil
11
+ end
12
+ end
13
+
14
+ def consume_record(command)
15
+ Einhorn::Command::Interface.process_command(self, command)
16
+ end
17
+
18
+ def to_state
19
+ state = {:class => self.class.to_s, :socket => @socket.fileno}
20
+ # Don't include by default because it's not that pretty
21
+ state[:read_buffer] = @read_buffer if @read_buffer.length > 0
22
+ state[:write_buffer] = @write_buffer if @write_buffer.length > 0
23
+ state
24
+ end
25
+
26
+ def self.from_state(state)
27
+ fd = state[:socket]
28
+ socket = Socket.for_fd(fd)
29
+ conn = self.open(socket)
30
+ conn.read_buffer = state[:read_buffer] if state[:read_buffer]
31
+ conn.write_buffer = state[:write_buffer] if state[:write_buffer]
32
+ conn
33
+ end
34
+
35
+ def register!
36
+ log_info("client connected")
37
+ super
38
+ end
39
+
40
+ def deregister!
41
+ log_info("client disconnected") if Einhorn::TransientState.whatami == :master
42
+ super
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,6 @@
1
+ # TODO: set lots of cloexecs
2
+ module Einhorn::Event
3
+ class LoopBreaker < AbstractTextDescriptor
4
+ def consume_record(record); end
5
+ end
6
+ end
@@ -0,0 +1,23 @@
1
+ module Einhorn::Event
2
+ module Persistent
3
+ @@persistent = {}
4
+
5
+ def self.included(other)
6
+ @@persistent[other.to_s] = other
7
+ end
8
+
9
+ def self.from_state(state)
10
+ klass_name = state[:class]
11
+ if klass = @@persistent[klass_name]
12
+ klass.from_state(state)
13
+ else
14
+ Einhorn.log_error("Unrecognized persistent descriptor class #{klass_name.inspect}. Ignoring. This most likely indicates that your Einhorn version has upgraded. Everything should still be working, but it may be worth a restart.")
15
+ nil
16
+ end
17
+ end
18
+
19
+ def self.persistent?(descriptor)
20
+ @@persistent.values.any? {|klass| descriptor.kind_of?(klass)}
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,39 @@
1
+ module Einhorn::Event
2
+ class Timer
3
+ attr_reader :time
4
+
5
+ def initialize(time, start=nil, &blk)
6
+ @time = time
7
+ @start = start || Time.now
8
+ @blk = blk
9
+ end
10
+
11
+ # TODO: abstract into some interface
12
+ def self.open(*args, &blk)
13
+ instance = self.new(*args, &blk)
14
+ instance.register!
15
+ instance
16
+ end
17
+
18
+ def expires_at
19
+ @start + @time
20
+ end
21
+
22
+ def ring!
23
+ now = Time.now
24
+ Einhorn.log_debug("Ringing timer that was scheduled #{now - @start}s ago and expired #{now - expires_at}s ago")
25
+ deregister!
26
+ @blk.call
27
+ end
28
+
29
+ def register!
30
+ Einhorn.log_debug("Scheduling a new #{self.time}s timer")
31
+ Einhorn::Event.register_timer(self)
32
+ end
33
+
34
+ def deregister!
35
+ Einhorn.log_debug("Nuking timer that expired #{Time.now - self.expires_at}s ago")
36
+ Einhorn::Event.deregister_timer(self)
37
+ end
38
+ end
39
+ end