kostya-bluepill 0.0.60.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +10 -0
  2. data/.rspec +1 -0
  3. data/DESIGN.md +10 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +22 -0
  6. data/README.md +305 -0
  7. data/Rakefile +38 -0
  8. data/bin/bluepill +104 -0
  9. data/bluepill.gemspec +37 -0
  10. data/examples/example.rb +87 -0
  11. data/examples/new_example.rb +89 -0
  12. data/examples/new_runit_example.rb +29 -0
  13. data/examples/runit_example.rb +26 -0
  14. data/lib/bluepill.rb +38 -0
  15. data/lib/bluepill/application.rb +201 -0
  16. data/lib/bluepill/application/client.rb +8 -0
  17. data/lib/bluepill/application/server.rb +23 -0
  18. data/lib/bluepill/condition_watch.rb +50 -0
  19. data/lib/bluepill/controller.rb +110 -0
  20. data/lib/bluepill/dsl.rb +12 -0
  21. data/lib/bluepill/dsl/app_proxy.rb +25 -0
  22. data/lib/bluepill/dsl/process_factory.rb +122 -0
  23. data/lib/bluepill/dsl/process_proxy.rb +44 -0
  24. data/lib/bluepill/group.rb +72 -0
  25. data/lib/bluepill/process.rb +480 -0
  26. data/lib/bluepill/process_conditions.rb +14 -0
  27. data/lib/bluepill/process_conditions/always_true.rb +18 -0
  28. data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
  29. data/lib/bluepill/process_conditions/http.rb +58 -0
  30. data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
  31. data/lib/bluepill/process_conditions/process_condition.rb +22 -0
  32. data/lib/bluepill/process_statistics.rb +27 -0
  33. data/lib/bluepill/socket.rb +58 -0
  34. data/lib/bluepill/system.rb +236 -0
  35. data/lib/bluepill/trigger.rb +59 -0
  36. data/lib/bluepill/triggers/flapping.rb +56 -0
  37. data/lib/bluepill/util/rotational_array.rb +20 -0
  38. data/lib/bluepill/version.rb +4 -0
  39. data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
  40. data/spec/lib/bluepill/system_spec.rb +36 -0
  41. data/spec/spec_helper.rb +19 -0
  42. metadata +304 -0
@@ -0,0 +1,480 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ # fixes problem with loading on systems with rubyist-aasm installed
4
+ gem "state_machine"
5
+
6
+ require "state_machine"
7
+ require "daemons"
8
+
9
+ module Bluepill
10
+ class Process
11
+ CONFIGURABLE_ATTRIBUTES = [
12
+ :start_command,
13
+ :stop_command,
14
+ :restart_command,
15
+
16
+ :stdout,
17
+ :stderr,
18
+ :stdin,
19
+
20
+ :daemonize,
21
+ :pid_file,
22
+ :working_dir,
23
+ :environment,
24
+
25
+ :start_grace_time,
26
+ :stop_grace_time,
27
+ :restart_grace_time,
28
+
29
+ :uid,
30
+ :gid,
31
+
32
+ :cache_actual_pid,
33
+
34
+ :monitor_children,
35
+ :child_process_factory,
36
+
37
+ :pid_command,
38
+ :auto_start,
39
+
40
+ :supplementary_groups,
41
+
42
+ :stop_signals
43
+ ]
44
+
45
+ attr_accessor :name, :watches, :triggers, :logger, :skip_ticks_until, :process_running
46
+ attr_accessor *CONFIGURABLE_ATTRIBUTES
47
+ attr_reader :children, :statistics
48
+
49
+ state_machine :initial => :unmonitored do
50
+ # These are the idle states, i.e. only an event (either external or internal) will trigger a transition.
51
+ # The distinction between down and unmonitored is that down
52
+ # means we know it is not running and unmonitored is that we don't care if it's running.
53
+ state :unmonitored, :up, :down
54
+
55
+ # These are transitionary states, we expect the process to change state after a certain period of time.
56
+ state :starting, :stopping, :restarting
57
+
58
+ event :tick do
59
+ transition :starting => :up, :if => :process_running?
60
+ transition :starting => :down, :unless => :process_running?
61
+
62
+ transition :up => :up, :if => :process_running?
63
+ transition :up => :down, :unless => :process_running?
64
+
65
+ # The process failed to die after entering the stopping state. Change the state to reflect
66
+ # reality.
67
+ transition :stopping => :up, :if => :process_running?
68
+ transition :stopping => :down, :unless => :process_running?
69
+
70
+ transition :down => :up, :if => :process_running?
71
+ transition :down => :starting, :unless => :process_running?
72
+
73
+ transition :restarting => :up, :if => :process_running?
74
+ transition :restarting => :down, :unless => :process_running?
75
+ end
76
+
77
+ event :start do
78
+ transition [:unmonitored, :down] => :starting
79
+ end
80
+
81
+ event :stop do
82
+ transition :up => :stopping
83
+ end
84
+
85
+ event :unmonitor do
86
+ transition any => :unmonitored
87
+ end
88
+
89
+ event :restart do
90
+ transition [:up, :down] => :restarting
91
+ end
92
+
93
+ before_transition any => any, :do => :notify_triggers
94
+ before_transition :stopping => any, :do => :clean_threads
95
+
96
+ after_transition any => :starting, :do => :start_process
97
+ after_transition any => :stopping, :do => :stop_process
98
+ after_transition any => :restarting, :do => :restart_process
99
+
100
+ after_transition any => any, :do => :record_transition
101
+ end
102
+
103
+ def initialize(process_name, checks, options = {})
104
+ @name = process_name
105
+ @event_mutex = Monitor.new
106
+ @watches = []
107
+ @triggers = []
108
+ @children = []
109
+ @threads = []
110
+ @statistics = ProcessStatistics.new
111
+ @actual_pid = options[:actual_pid]
112
+ self.logger = options[:logger]
113
+
114
+ checks.each do |name, opts|
115
+ if Trigger[name]
116
+ self.add_trigger(name, opts)
117
+ else
118
+ self.add_watch(name, opts)
119
+ end
120
+ end
121
+
122
+ # These defaults are overriden below if it's configured to be something else.
123
+ @monitor_children = false
124
+ @cache_actual_pid = true
125
+ @start_grace_time = @stop_grace_time = @restart_grace_time = 3
126
+ @environment = {}
127
+
128
+ CONFIGURABLE_ATTRIBUTES.each do |attribute_name|
129
+ self.send("#{attribute_name}=", options[attribute_name]) if options.has_key?(attribute_name)
130
+ end
131
+
132
+ # Let state_machine do its initialization stuff
133
+ super() # no arguments intentional
134
+ end
135
+
136
+ def tick
137
+ return if self.skipping_ticks?
138
+ self.skip_ticks_until = nil
139
+
140
+ # clear the memoization per tick
141
+ @process_running = nil
142
+
143
+ # Deal with thread cleanup here since the stopping state isn't used
144
+ clean_threads if self.unmonitored?
145
+
146
+ # run state machine transitions
147
+ super
148
+
149
+ if self.up?
150
+ self.run_watches
151
+
152
+ if self.monitor_children?
153
+ refresh_children!
154
+ children.each {|child| child.tick}
155
+ end
156
+ end
157
+ end
158
+
159
+ def logger=(logger)
160
+ @logger = logger
161
+ self.watches.each {|w| w.logger = logger }
162
+ self.triggers.each {|t| t.logger = logger }
163
+ end
164
+
165
+ # State machine methods
166
+ def dispatch!(event, reason = nil)
167
+ @event_mutex.synchronize do
168
+ @statistics.record_event(event, reason)
169
+ self.send("#{event}")
170
+ end
171
+ end
172
+
173
+ def record_transition(transition)
174
+ unless transition.loopback?
175
+ @transitioned = true
176
+
177
+ # When a process changes state, we should clear the memory of all the watches
178
+ self.watches.each { |w| w.clear_history! }
179
+
180
+ # Also, when a process changes state, we should re-populate its child list
181
+ if self.monitor_children?
182
+ self.logger.warn "Clearing child list"
183
+ self.children.clear
184
+ end
185
+ logger.info "Going from #{transition.from_name} => #{transition.to_name}"
186
+ end
187
+ end
188
+
189
+ def notify_triggers(transition)
190
+ self.triggers.each {|trigger| trigger.notify(transition)}
191
+ end
192
+
193
+ # Watch related methods
194
+ def add_watch(name, options = {})
195
+ self.watches << ConditionWatch.new(name, options.merge(:logger => self.logger))
196
+ end
197
+
198
+ def add_trigger(name, options = {})
199
+ self.triggers << Trigger[name].new(self, options.merge(:logger => self.logger))
200
+ end
201
+
202
+ def run_watches
203
+ now = Time.now.to_i
204
+
205
+ threads = self.watches.collect do |watch|
206
+ [watch, Thread.new { Thread.current[:events] = watch.run(self.actual_pid, now) }]
207
+ end
208
+
209
+ @transitioned = false
210
+
211
+ threads.inject([]) do |events, (watch, thread)|
212
+ thread.join
213
+ if thread[:events].size > 0
214
+ logger.info "#{watch.name} dispatched: #{thread[:events].join(',')}"
215
+ thread[:events].each do |event|
216
+ events << [event, watch.to_s]
217
+ end
218
+ end
219
+ events
220
+ end.each do |(event, reason)|
221
+ break if @transitioned
222
+ self.dispatch!(event, reason)
223
+ end
224
+ end
225
+
226
+ def determine_initial_state
227
+ if self.process_running?(true)
228
+ self.state = 'up'
229
+ else
230
+ self.state = (auto_start == false) ? 'unmonitored' : 'down' # we need to check for false value
231
+ end
232
+ end
233
+
234
+ def handle_user_command(cmd)
235
+ case cmd
236
+ when "start"
237
+ if self.process_running?(true)
238
+ logger.warn("Refusing to re-run start command on an already running process.")
239
+ else
240
+ dispatch!(:start, "user initiated")
241
+ end
242
+ when "stop"
243
+ stop_process
244
+ dispatch!(:unmonitor, "user initiated")
245
+ when "restart"
246
+ restart_process
247
+ when "unmonitor"
248
+ # When the user issues an unmonitor cmd, reset any triggers so that
249
+ # scheduled events gets cleared
250
+ triggers.each {|t| t.reset! }
251
+ dispatch!(:unmonitor, "user initiated")
252
+ end
253
+ end
254
+
255
+ # System Process Methods
256
+ def process_running?(force = false)
257
+ @process_running = nil if force # clear existing state if forced
258
+
259
+ @process_running ||= signal_process(0)
260
+ # the process isn't running, so we should clear the PID
261
+ self.clear_pid unless @process_running
262
+ @process_running
263
+ end
264
+
265
+ def start_process
266
+ logger.warn "Executing start command: #{start_command}"
267
+
268
+ if self.daemonize?
269
+ System.daemonize(start_command, self.system_command_options)
270
+
271
+ else
272
+ # This is a self-daemonizing process
273
+ with_timeout(start_grace_time) do
274
+ result = System.execute_blocking(start_command, self.system_command_options)
275
+
276
+ unless result[:exit_code].zero?
277
+ logger.warn "Start command execution returned non-zero exit code:"
278
+ logger.warn result.inspect
279
+ end
280
+ end
281
+ end
282
+
283
+ self.skip_ticks_for(start_grace_time)
284
+ end
285
+
286
+ def stop_process
287
+ if stop_command
288
+ cmd = self.prepare_command(stop_command)
289
+ logger.warn "Executing stop command: #{cmd}"
290
+
291
+ with_timeout(stop_grace_time) do
292
+ result = System.execute_blocking(cmd, self.system_command_options)
293
+
294
+ unless result[:exit_code].zero?
295
+ logger.warn "Stop command execution returned non-zero exit code:"
296
+ logger.warn result.inspect
297
+ end
298
+ end
299
+
300
+ elsif stop_signals
301
+ # issue stop signals with configurable delay between each
302
+ logger.warn "Sending stop signals to #{actual_pid}"
303
+ @threads << Thread.new(self, stop_signals.clone) do |process, stop_signals|
304
+ signal = stop_signals.shift
305
+ logger.info "Sending signal #{signal} to #{process.actual_pid}"
306
+ process.signal_process(signal) # send first signal
307
+
308
+ until stop_signals.empty?
309
+ # we already checked to make sure stop_signals had an odd number of items
310
+ delay = stop_signals.shift
311
+ signal = stop_signals.shift
312
+
313
+ logger.debug "Sleeping for #{delay} seconds"
314
+ sleep delay
315
+ #break unless signal_process(0) #break unless the process can be reached
316
+ unless process.signal_process(0)
317
+ logger.debug "Process has terminated."
318
+ break
319
+ end
320
+ logger.info "Sending signal #{signal} to #{process.actual_pid}"
321
+ process.signal_process(signal)
322
+ end
323
+ end
324
+ else
325
+ logger.warn "Executing default stop command. Sending TERM signal to #{actual_pid}"
326
+ signal_process("TERM")
327
+ end
328
+ self.unlink_pid # TODO: we only write the pid file if we daemonize, should we only unlink it if we daemonize?
329
+
330
+ self.skip_ticks_for(stop_grace_time)
331
+ end
332
+
333
+ def restart_process
334
+ if restart_command
335
+ cmd = self.prepare_command(restart_command)
336
+
337
+ logger.warn "Executing restart command: #{cmd}"
338
+
339
+ with_timeout(restart_grace_time) do
340
+ result = System.execute_blocking(cmd, self.system_command_options)
341
+
342
+ unless result[:exit_code].zero?
343
+ logger.warn "Restart command execution returned non-zero exit code:"
344
+ logger.warn result.inspect
345
+ end
346
+ end
347
+
348
+ self.skip_ticks_for(restart_grace_time)
349
+ else
350
+ logger.warn "No restart_command specified. Must stop and start to restart"
351
+ self.stop_process
352
+ # the tick will bring it back.
353
+ end
354
+ end
355
+
356
+ def clean_threads
357
+ @threads.each { |t| t.kill }
358
+ @threads.clear
359
+ end
360
+
361
+ def daemonize?
362
+ !!self.daemonize
363
+ end
364
+
365
+ def monitor_children?
366
+ !!self.monitor_children
367
+ end
368
+
369
+ def signal_process(code)
370
+ code = code.to_s.upcase if code.is_a?(String) || code.is_a?(Symbol)
371
+ ::Process.kill(code, actual_pid)
372
+ true
373
+ rescue Exception => e
374
+ logger.error "Failed to signal process #{actual_pid} with code #{code}: #{e}"
375
+ false
376
+ end
377
+
378
+ def cache_actual_pid?
379
+ !!@cache_actual_pid
380
+ end
381
+
382
+ def actual_pid
383
+ pid_command ? pid_from_command : pid_from_file
384
+ end
385
+
386
+ def pid_from_file
387
+ return @actual_pid if cache_actual_pid? && @actual_pid
388
+ @actual_pid = begin
389
+ if pid_file
390
+ if File.exists?(pid_file)
391
+ str = File.read(pid_file)
392
+ str.to_i if str.size > 0
393
+ else
394
+ logger.warn("pid_file #{pid_file} does not exist or cannot be read")
395
+ nil
396
+ end
397
+ end
398
+ end
399
+ end
400
+
401
+ def pid_from_command
402
+ pid = %x{#{pid_command}}.strip
403
+ (pid =~ /\A\d+\z/) ? pid.to_i : nil
404
+ end
405
+
406
+ def actual_pid=(pid)
407
+ @actual_pid = pid
408
+ end
409
+
410
+ def clear_pid
411
+ @actual_pid = nil
412
+ end
413
+
414
+ def unlink_pid
415
+ File.unlink(pid_file) if pid_file && File.exists?(pid_file)
416
+ rescue Errno::ENOENT
417
+ end
418
+
419
+ # Internal State Methods
420
+ def skip_ticks_for(seconds)
421
+ # TODO: should this be addative or longest wins?
422
+ # i.e. if two calls for skip_ticks_for come in for 5 and 10, should it skip for 10 or 15?
423
+ self.skip_ticks_until = (self.skip_ticks_until || Time.now.to_i) + seconds.to_i
424
+ end
425
+
426
+ def skipping_ticks?
427
+ self.skip_ticks_until && self.skip_ticks_until > Time.now.to_i
428
+ end
429
+
430
+ def refresh_children!
431
+ # First prune the list of dead children
432
+ @children.delete_if {|child| !child.process_running?(true) }
433
+
434
+ # Add new found children to the list
435
+ new_children_pids = System.get_children(self.actual_pid) - @children.map {|child| child.actual_pid}
436
+
437
+ unless new_children_pids.empty?
438
+ logger.info "Existing children: #{@children.collect{|c| c.actual_pid}.join(",")}. Got new children: #{new_children_pids.inspect} for #{actual_pid}"
439
+ end
440
+
441
+ # Construct a new process wrapper for each new found children
442
+ new_children_pids.each do |child_pid|
443
+ name = "<child(pid:#{child_pid})>"
444
+ logger = self.logger
445
+
446
+ child = self.child_process_factory.create_child_process(name, child_pid, logger)
447
+ @children << child
448
+ end
449
+ end
450
+
451
+ def prepare_command(command)
452
+ command.to_s.gsub("{{PID}}", actual_pid.to_s)
453
+ end
454
+
455
+ def system_command_options
456
+ {
457
+ :uid => self.uid,
458
+ :gid => self.gid,
459
+ :working_dir => self.working_dir,
460
+ :environment => self.environment,
461
+ :pid_file => self.pid_file,
462
+ :logger => self.logger,
463
+ :stdin => self.stdin,
464
+ :stdout => self.stdout,
465
+ :stderr => self.stderr,
466
+ :supplementary_groups => self.supplementary_groups
467
+ }
468
+ end
469
+
470
+ def with_timeout(secs, &blk)
471
+ Timeout.timeout(secs.to_f, &blk)
472
+
473
+ rescue Timeout::Error
474
+ logger.error "Execution is taking longer than expected. Unmonitoring."
475
+ logger.error "Did you forget to tell bluepill to daemonize this process?"
476
+ self.dispatch!("unmonitor")
477
+ end
478
+ end
479
+ end
480
+