gvarela-bluepill 0.0.27

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,410 @@
1
+ require "state_machine"
2
+ require "daemons"
3
+
4
+ module Bluepill
5
+ class Process
6
+ CONFIGURABLE_ATTRIBUTES = [
7
+ :start_command,
8
+ :stop_command,
9
+ :restart_command,
10
+
11
+ :stdout,
12
+ :stderr,
13
+ :stdin,
14
+
15
+ :daemonize,
16
+ :pid_file,
17
+ :working_dir,
18
+
19
+ :start_grace_time,
20
+ :stop_grace_time,
21
+ :restart_grace_time,
22
+
23
+ :uid,
24
+ :gid,
25
+
26
+ :monitor_children,
27
+ :child_process_template
28
+ ]
29
+
30
+ attr_accessor :name, :watches, :triggers, :logger, :skip_ticks_until
31
+ attr_accessor *CONFIGURABLE_ATTRIBUTES
32
+ attr_reader :children
33
+
34
+ state_machine :initial => :unmonitored do
35
+ # These are the idle states, i.e. only an event (either external or internal) will trigger a transition.
36
+ # The distinction between down and unmonitored is that down
37
+ # means we know it is not running and unmonitored is that we don't care if it's running.
38
+ state :unmonitored, :up, :down
39
+
40
+ # These are transitionary states, we expect the process to change state after a certain period of time.
41
+ state :starting, :stopping, :restarting
42
+
43
+ event :tick do
44
+ transition :starting => :up, :if => :process_running?
45
+ transition :starting => :down, :unless => :process_running?
46
+
47
+ transition :up => :up, :if => :process_running?
48
+ transition :up => :down, :unless => :process_running?
49
+
50
+ # The process failed to die after entering the stopping state. Change the state to reflect
51
+ # reality.
52
+ transition :stopping => :up, :if => :process_running?
53
+ transition :stopping => :down, :unless => :process_running?
54
+
55
+ transition :down => :up, :if => :process_running?
56
+ transition :down => :starting, :unless => :process_running?
57
+
58
+ transition :restarting => :up, :if => :process_running?
59
+ transition :restarting => :down, :unless => :process_running?
60
+ end
61
+
62
+ event :start do
63
+ transition [:unmonitored, :down] => :starting
64
+ end
65
+
66
+ event :stop do
67
+ transition :up => :stopping
68
+ end
69
+
70
+ event :unmonitor do
71
+ transition any => :unmonitored
72
+ end
73
+
74
+ event :restart do
75
+ transition [:up, :down] => :restarting
76
+ end
77
+
78
+ before_transition any => any, :do => :notify_triggers
79
+
80
+ after_transition any => :starting, :do => :start_process
81
+ after_transition any => :stopping, :do => :stop_process
82
+ after_transition any => :restarting, :do => :restart_process
83
+
84
+ after_transition any => any, :do => :record_transition
85
+ end
86
+
87
+ def initialize(process_name, options = {})
88
+ @name = process_name
89
+ @event_mutex = Monitor.new
90
+ @transition_history = Util::RotationalArray.new(10)
91
+ @watches = []
92
+ @triggers = []
93
+ @children = []
94
+
95
+ @monitor_children = options[:monitor_children] || false
96
+
97
+ %w(start_grace_time stop_grace_time restart_grace_time).each do |grace|
98
+ instance_variable_set("@#{grace}", options[grace.to_sym] || 3)
99
+ end
100
+
101
+ CONFIGURABLE_ATTRIBUTES.each do |attribute_name|
102
+ self.send("#{attribute_name}=", options[attribute_name]) if options.has_key?(attribute_name)
103
+ end
104
+
105
+ # Let state_machine do its initialization stuff
106
+ super()
107
+ end
108
+
109
+ def tick
110
+ return if self.skipping_ticks?
111
+ self.skip_ticks_until = nil
112
+
113
+ # clear the memoization per tick
114
+ @process_running = nil
115
+
116
+ # run state machine transitions
117
+ super
118
+
119
+ if self.up?
120
+ run_watches
121
+
122
+ if monitor_children?
123
+ refresh_children!
124
+ children.each {|child| child.tick}
125
+ end
126
+ end
127
+ end
128
+
129
+ def logger=(logger)
130
+ @logger = logger
131
+ self.watches.each {|w| w.logger = logger }
132
+ self.triggers.each {|t| t.logger = logger }
133
+ end
134
+
135
+ # State machine methods
136
+ def dispatch!(event)
137
+ @event_mutex.synchronize do
138
+ self.send("#{event}")
139
+ end
140
+ end
141
+
142
+ def record_transition(transition)
143
+ unless transition.loopback?
144
+ @transitioned = true
145
+
146
+ # When a process changes state, we should clear the memory of all the watches
147
+ self.watches.each { |w| w.clear_history! }
148
+
149
+ # Also, when a process changes state, we should re-populate its child list
150
+ if self.monitor_children?
151
+ self.logger.warning "Clearing child list"
152
+ self.children.clear
153
+ end
154
+ logger.info "Going from #{transition.from_name} => #{transition.to_name}"
155
+ end
156
+ end
157
+
158
+ def notify_triggers(transition)
159
+ self.triggers.each {|trigger| trigger.notify(transition)}
160
+ end
161
+
162
+ # Watch related methods
163
+ def add_watch(name, options = {})
164
+ self.watches << ConditionWatch.new(name, options.merge(:logger => self.logger))
165
+ end
166
+
167
+ def add_trigger(name, options = {})
168
+ self.triggers << Trigger[name].new(self, options.merge(:logger => self.logger))
169
+ end
170
+
171
+ def run_watches
172
+ now = Time.now.to_i
173
+
174
+ threads = self.watches.collect do |watch|
175
+ [watch, Thread.new { Thread.current[:events] = watch.run(self.actual_pid, now) }]
176
+ end
177
+
178
+ @transitioned = false
179
+
180
+ threads.inject([]) do |events, (watch, thread)|
181
+ thread.join
182
+ if thread[:events].size > 0
183
+ logger.info "#{watch.name} dispatched: #{thread[:events].join(',')}"
184
+ events << thread[:events]
185
+ end
186
+ events
187
+ end.flatten.uniq.each do |event|
188
+ break if @transitioned
189
+ self.dispatch!(event)
190
+ end
191
+ end
192
+
193
+ def handle_user_command(cmd)
194
+ case cmd
195
+ when "boot"
196
+ # This is only called when bluepill is initially starting up
197
+ if process_running?(true)
198
+ # process was running even before bluepill was
199
+ self.state = 'up'
200
+ else
201
+ self.state = 'starting'
202
+ end
203
+
204
+ when "start"
205
+ if process_running?(true) && daemonize?
206
+ logger.warning("Refusing to re-run start command on an automatically daemonized process to preserve currently running process pid file.")
207
+ return
208
+ end
209
+ dispatch!(:start)
210
+
211
+ when "stop"
212
+ stop_process
213
+ dispatch!(:unmonitor)
214
+
215
+ when "restart"
216
+ restart_process
217
+
218
+ when "unmonitor"
219
+ # When the user issues an unmonitor cmd, reset any triggers so that
220
+ # scheduled events gets cleared
221
+ triggers.each {|t| t.reset! }
222
+ dispatch!(:unmonitor)
223
+ end
224
+ end
225
+
226
+ # System Process Methods
227
+ def process_running?(force = false)
228
+ @process_running = nil if force
229
+ @process_running ||= signal_process(0)
230
+ self.clear_pid unless @process_running
231
+ @process_running
232
+ end
233
+
234
+ def start_process
235
+ logger.warning "Executing start command: #{start_command}"
236
+
237
+ if self.daemonize?
238
+ System.daemonize(start_command, self.system_command_options)
239
+
240
+ else
241
+ # This is a self-daemonizing process
242
+ with_timeout(start_grace_time) do
243
+ result = System.execute_blocking(start_command, self.system_command_options)
244
+
245
+ unless result[:exit_code].zero?
246
+ logger.warning "Start command execution returned non-zero exit code:"
247
+ logger.warning result.inspect
248
+ end
249
+ end
250
+ end
251
+
252
+ self.skip_ticks_for(start_grace_time)
253
+ end
254
+
255
+ def stop_process
256
+ if stop_command
257
+ cmd = process_command(stop_command)
258
+ logger.warning "Executing stop command: #{cmd}"
259
+
260
+ with_timeout(stop_grace_time) do
261
+ result = System.execute_blocking(cmd, self.system_command_options)
262
+
263
+ unless result[:exit_code].zero?
264
+ logger.warning "Stop command execution returned non-zero exit code:"
265
+ logger.warning result.inspect
266
+ end
267
+ end
268
+
269
+ else
270
+ logger.warning "Executing default stop command. Sending TERM signal to #{actual_pid}"
271
+ signal_process("TERM")
272
+ end
273
+ self.unlink_pid # TODO: we only write the pid file if we daemonize, should we only unlink it if we daemonize?
274
+
275
+ self.skip_ticks_for(stop_grace_time)
276
+ end
277
+
278
+ def restart_process
279
+ if restart_command
280
+ cmd = process_command(restart_command)
281
+
282
+ logger.warning "Executing restart command: #{cmd}"
283
+
284
+ with_timeout(restart_grace_time) do
285
+ result = System.execute_blocking(cmd, self.system_command_options)
286
+
287
+ unless result[:exit_code].zero?
288
+ logger.warning "Restart command execution returned non-zero exit code:"
289
+ logger.warning result.inspect
290
+ end
291
+ end
292
+
293
+ self.skip_ticks_for(restart_grace_time)
294
+ else
295
+ logger.warning "No restart_command specified. Must stop and start to restart"
296
+ self.stop_process
297
+ # the tick will bring it back.
298
+ end
299
+ end
300
+
301
+ def daemonize?
302
+ !!self.daemonize
303
+ end
304
+
305
+ def monitor_children?
306
+ !!self.monitor_children
307
+ end
308
+
309
+ def signal_process(code)
310
+ ::Process.kill(code, actual_pid)
311
+ true
312
+ rescue
313
+ false
314
+ end
315
+
316
+ def actual_pid
317
+ @actual_pid ||= begin
318
+ if pid_file
319
+ if File.exists?(pid_file)
320
+ str = File.read(pid_file)
321
+ str.to_i if str.size > 0
322
+ else
323
+ logger.warning("pid_file #{pid_file} does not exist or cannot be read")
324
+ nil
325
+ end
326
+ end
327
+ end
328
+ end
329
+
330
+ def actual_pid=(pid)
331
+ @actual_pid = pid
332
+ end
333
+
334
+ def clear_pid
335
+ @actual_pid = nil
336
+ end
337
+
338
+ def unlink_pid
339
+ File.unlink(pid_file) if pid_file && File.exists?(pid_file)
340
+ end
341
+
342
+ # Internal State Methods
343
+ def skip_ticks_for(seconds)
344
+ # TODO: should this be addative or longest wins?
345
+ # i.e. if two calls for skip_ticks_for come in for 5 and 10, should it skip for 10 or 15?
346
+ self.skip_ticks_until = (self.skip_ticks_until || Time.now.to_i) + seconds.to_i
347
+ end
348
+
349
+ def skipping_ticks?
350
+ self.skip_ticks_until && self.skip_ticks_until > Time.now.to_i
351
+ end
352
+
353
+ def refresh_children!
354
+ # First prune the list of dead children
355
+ @children.delete_if {|child| !child.process_running?(true) }
356
+
357
+ # Add new found children to the list
358
+ new_children_pids = System.get_children(self.actual_pid) - @children.map {|child| child.actual_pid}
359
+
360
+ unless new_children_pids.empty?
361
+ logger.info "Existing children: #{@children.collect{|c| c.actual_pid}.join(",")}. Got new children: #{new_children_pids.inspect} for #{actual_pid}"
362
+ end
363
+
364
+ # Construct a new process wrapper for each new found children
365
+ new_children_pids.each do |child_pid|
366
+ child = self.child_process_template.deep_copy
367
+
368
+ child.name = "<child(pid:#{child_pid})>"
369
+ child.actual_pid = child_pid
370
+ child.logger = self.logger.prefix_with(child.name)
371
+
372
+ child.initialize_state_machines
373
+ child.state = "up"
374
+
375
+ @children << child
376
+ end
377
+ end
378
+
379
+ def deep_copy
380
+ Marshal.load(Marshal.dump(self))
381
+ end
382
+
383
+ def process_command(cmd)
384
+ cmd.to_s.gsub("{{PID}}", actual_pid.to_s)
385
+ end
386
+
387
+ def system_command_options
388
+ {
389
+ :uid => self.uid,
390
+ :gid => self.gid,
391
+ :working_dir => self.working_dir,
392
+ :pid_file => self.pid_file,
393
+ :logger => self.logger,
394
+ :stdin => self.stdin,
395
+ :stdout => self.stdout,
396
+ :stderr => self.stderr
397
+ }
398
+ end
399
+
400
+ def with_timeout(secs, &blk)
401
+ Timeout.timeout(secs.to_f, &blk)
402
+
403
+ rescue Timeout::Error
404
+ logger.err "Execution is taking longer than expected. Unmonitoring."
405
+ logger.err "Did you forget to tell bluepill to daemonize this process?"
406
+ self.dispatch!("unmonitor")
407
+ end
408
+ end
409
+ end
410
+
@@ -0,0 +1,17 @@
1
+ module Bluepill
2
+ module ProcessConditions
3
+ class AlwaysTrue < ProcessCondition
4
+ def initialize(options = {})
5
+ @below = options[:below]
6
+ end
7
+
8
+ def run(pid)
9
+ 1
10
+ end
11
+
12
+ def check(value)
13
+ true
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ module Bluepill
2
+ module ProcessConditions
3
+ class CpuUsage < ProcessCondition
4
+ def initialize(options = {})
5
+ @below = options[:below]
6
+ end
7
+
8
+ def run(pid)
9
+ # third col in the ps axu output
10
+ System.cpu_usage(pid).to_f
11
+ end
12
+
13
+ def check(value)
14
+ value < @below
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,31 @@
1
+ module Bluepill
2
+ module ProcessConditions
3
+ class MemUsage < ProcessCondition
4
+ MB = 1024 ** 2
5
+ FORMAT_STR = "%d%s"
6
+ MB_LABEL = "MB"
7
+ KB_LABEL = "KB"
8
+
9
+ def initialize(options = {})
10
+ @below = options[:below]
11
+ end
12
+
13
+ def run(pid)
14
+ # rss is on the 5th col
15
+ System.memory_usage(pid).to_f
16
+ end
17
+
18
+ def check(value)
19
+ value.kilobytes < @below
20
+ end
21
+
22
+ def format_value(value)
23
+ if value.kilobytes >= MB
24
+ FORMAT_STR % [(value / 1024).round, MB_LABEL]
25
+ else
26
+ FORMAT_STR % [value, KB_LABEL]
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,17 @@
1
+ module Bluepill
2
+ module ProcessConditions
3
+ class ProcessCondition
4
+ def run(pid)
5
+ raise "Implement in subclass!"
6
+ end
7
+
8
+ def check(value)
9
+ raise "Implement in subclass!"
10
+ end
11
+
12
+ def format_value(value)
13
+ value
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ module Bluepill
2
+ module ProcessConditions
3
+ def self.[](name)
4
+ "#{self}::#{name.to_s.camelcase}".constantize
5
+ end
6
+ end
7
+ end
8
+
9
+ require "bluepill/process_conditions/process_condition"
10
+ Dir["#{File.dirname(__FILE__)}/process_conditions/*.rb"].each do |pc|
11
+ require pc
12
+ end
13
+
@@ -0,0 +1,35 @@
1
+ require 'socket'
2
+
3
+ module Bluepill
4
+ module Socket
5
+ TIMEOUT = 10
6
+ extend self
7
+
8
+ def client(base_dir, name)
9
+ UNIXSocket.open(socket_path(base_dir, name))
10
+ end
11
+
12
+ def server(base_dir, name)
13
+ socket_path = self.socket_path(base_dir, name)
14
+ begin
15
+ UNIXServer.open(socket_path)
16
+ rescue Errno::EADDRINUSE
17
+ # if sock file has been created. test to see if there is a server
18
+ begin
19
+ return UNIXSocket.open(socket_path)
20
+ rescue Errno::ECONNREFUSED
21
+ File.delete(socket_path)
22
+ return UNIXServer.open(socket_path)
23
+ else
24
+ logger.err("Server is already running!")
25
+ exit(7)
26
+ end
27
+ end
28
+ end
29
+
30
+ def socket_path(base_dir, name)
31
+ File.join(base_dir, 'socks', name + ".sock")
32
+ end
33
+ end
34
+ end
35
+