cloud66-bluepill 0.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/DESIGN.md +10 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +349 -0
- data/Rakefile +38 -0
- data/bin/bluepill +124 -0
- data/bin/bpsv +3 -0
- data/bin/sample_forking_server +53 -0
- data/bluepill.gemspec +37 -0
- data/examples/example.rb +87 -0
- data/examples/new_example.rb +89 -0
- data/examples/new_runit_example.rb +29 -0
- data/examples/runit_example.rb +26 -0
- data/lib/bluepill.rb +38 -0
- data/lib/bluepill/application.rb +215 -0
- data/lib/bluepill/application/client.rb +8 -0
- data/lib/bluepill/application/server.rb +23 -0
- data/lib/bluepill/condition_watch.rb +51 -0
- data/lib/bluepill/controller.rb +122 -0
- data/lib/bluepill/dsl.rb +12 -0
- data/lib/bluepill/dsl/app_proxy.rb +25 -0
- data/lib/bluepill/dsl/process_factory.rb +122 -0
- data/lib/bluepill/dsl/process_proxy.rb +44 -0
- data/lib/bluepill/group.rb +72 -0
- data/lib/bluepill/logger.rb +63 -0
- data/lib/bluepill/process.rb +514 -0
- data/lib/bluepill/process_conditions.rb +14 -0
- data/lib/bluepill/process_conditions/always_true.rb +18 -0
- data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
- data/lib/bluepill/process_conditions/file_time.rb +26 -0
- data/lib/bluepill/process_conditions/http.rb +58 -0
- data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
- data/lib/bluepill/process_conditions/process_condition.rb +22 -0
- data/lib/bluepill/process_journal.rb +219 -0
- data/lib/bluepill/process_statistics.rb +27 -0
- data/lib/bluepill/socket.rb +58 -0
- data/lib/bluepill/system.rb +265 -0
- data/lib/bluepill/trigger.rb +60 -0
- data/lib/bluepill/triggers/flapping.rb +56 -0
- data/lib/bluepill/util/rotational_array.rb +20 -0
- data/lib/bluepill/version.rb +4 -0
- data/local-bluepill +129 -0
- data/spec/lib/bluepill/logger_spec.rb +3 -0
- data/spec/lib/bluepill/process_spec.rb +96 -0
- data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
- data/spec/lib/bluepill/system_spec.rb +36 -0
- data/spec/spec_helper.rb +15 -0
- metadata +302 -0
data/lib/bluepill/dsl.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
def self.application(app_name, options = {}, &block)
|
4
|
+
app_proxy = AppProxy.new(app_name, options)
|
5
|
+
if block.arity == 0
|
6
|
+
app_proxy.instance_eval &block
|
7
|
+
else
|
8
|
+
app_proxy.instance_exec(app_proxy, &block)
|
9
|
+
end
|
10
|
+
app_proxy.app.load
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class AppProxy
|
4
|
+
APP_ATTRIBUTES = [:working_dir, :uid, :gid, :environment, :auto_start ]
|
5
|
+
|
6
|
+
attr_accessor *APP_ATTRIBUTES
|
7
|
+
attr_reader :app
|
8
|
+
|
9
|
+
def initialize(app_name, options)
|
10
|
+
@app = Application.new(app_name.to_s, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def process(process_name, &process_block)
|
14
|
+
attributes = {}
|
15
|
+
APP_ATTRIBUTES.each { |a| attributes[a] = self.send(a) }
|
16
|
+
|
17
|
+
process_factory = ProcessFactory.new(attributes, process_block)
|
18
|
+
|
19
|
+
process = process_factory.create_process(process_name, @app.pids_dir)
|
20
|
+
group = process_factory.attributes.delete(:group)
|
21
|
+
|
22
|
+
@app.add_process(process, group)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class ProcessFactory
|
4
|
+
attr_reader :attributes
|
5
|
+
|
6
|
+
@@process_keys = Hash.new
|
7
|
+
@@pid_files = Hash.new
|
8
|
+
|
9
|
+
def initialize(attributes, process_block)
|
10
|
+
@attributes = attributes
|
11
|
+
@process_block = process_block
|
12
|
+
end
|
13
|
+
|
14
|
+
def create_process(name, pids_dir)
|
15
|
+
self.assign_default_pid_file(name, pids_dir)
|
16
|
+
|
17
|
+
process = ProcessProxy.new(name, @attributes, @process_block)
|
18
|
+
child_process_block = @attributes.delete(:child_process_block)
|
19
|
+
@attributes[:child_process_factory] = ProcessFactory.new(@attributes, child_process_block) if @attributes[:monitor_children]
|
20
|
+
|
21
|
+
self.validate_process! process
|
22
|
+
process.to_process
|
23
|
+
end
|
24
|
+
|
25
|
+
def create_child_process(name, pid, logger)
|
26
|
+
attributes = {}
|
27
|
+
[:start_grace_time, :stop_grace_time, :restart_grace_time].each {|a| attributes[a] = @attributes[a]}
|
28
|
+
attributes[:actual_pid] = pid
|
29
|
+
attributes[:logger] = logger
|
30
|
+
|
31
|
+
child = ProcessProxy.new(name, attributes, @process_block)
|
32
|
+
self.validate_child_process! child
|
33
|
+
process = child.to_process
|
34
|
+
|
35
|
+
process.determine_initial_state
|
36
|
+
process
|
37
|
+
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
|
41
|
+
def assign_default_pid_file(process_name, pids_dir)
|
42
|
+
unless @attributes.key?(:pid_file)
|
43
|
+
group_name = @attributes[:group]
|
44
|
+
default_pid_name = [group_name, process_name].compact.join('_').gsub(/[^A-Za-z0-9_\-]/, "_")
|
45
|
+
@attributes[:pid_file] = File.join(pids_dir, default_pid_name + ".pid")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def validate_process!(process)
|
50
|
+
# validate uniqueness of group:process
|
51
|
+
process_key = [process.attributes[:group], process.name].join(":")
|
52
|
+
if @@process_keys.key?(process_key)
|
53
|
+
$stderr.print "Config Error: You have two entries for the process name '#{process.name}'"
|
54
|
+
$stderr.print " in the group '#{process.attributes[:group]}'" if process.attributes.key?(:group)
|
55
|
+
$stderr.puts
|
56
|
+
exit(6)
|
57
|
+
else
|
58
|
+
@@process_keys[process_key] = 0
|
59
|
+
end
|
60
|
+
|
61
|
+
# validate required attributes
|
62
|
+
[:start_command].each do |required_attr|
|
63
|
+
if !process.attributes.key?(required_attr)
|
64
|
+
$stderr.puts "Config Error: You must specify a #{required_attr} for '#{process.name}'"
|
65
|
+
exit(6)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# validate uniqueness of pid files
|
70
|
+
pid_key = process.attributes[:pid_file].strip
|
71
|
+
if @@pid_files.key?(pid_key)
|
72
|
+
$stderr.puts "Config Error: You have two entries with the pid file: #{pid_key}"
|
73
|
+
exit(6)
|
74
|
+
else
|
75
|
+
@@pid_files[pid_key] = 0
|
76
|
+
end
|
77
|
+
|
78
|
+
#validate stop_signals array
|
79
|
+
stop_grace_time = process.attributes[:stop_grace_time]
|
80
|
+
stop_signals = process.attributes[:stop_signals]
|
81
|
+
|
82
|
+
unless stop_signals.nil?
|
83
|
+
#Start with the more helpful error messages before the 'odd number' message.
|
84
|
+
delay_sum = 0
|
85
|
+
stop_signals.each_with_index do |s_or_d, i|
|
86
|
+
if i % 2 == 0
|
87
|
+
signal = s_or_d
|
88
|
+
unless signal.is_a? Symbol
|
89
|
+
$stderr.puts "Config Error: Invalid stop_signals! Expected a symbol (signal) at position #{i} instead of '#{signal}'."
|
90
|
+
exit(6)
|
91
|
+
end
|
92
|
+
else
|
93
|
+
delay = s_or_d
|
94
|
+
unless delay.is_a? Fixnum
|
95
|
+
$stderr.puts "Config Error: Invalid stop_signals! Expected a number (delay) at position #{i} instead of '#{delay}'."
|
96
|
+
exit(6)
|
97
|
+
end
|
98
|
+
delay_sum += delay
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
unless stop_signals.size % 2 == 1
|
103
|
+
$stderr.puts "Config Error: Invalid stop_signals! Expected an odd number of elements."
|
104
|
+
exit(6)
|
105
|
+
end
|
106
|
+
|
107
|
+
if stop_grace_time.nil? || stop_grace_time <= delay_sum
|
108
|
+
$stderr.puts "Config Error: Stop_grace_time should be greater than the sum of stop_signals delays!"
|
109
|
+
exit(6)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def validate_child_process!(child)
|
115
|
+
unless child.attributes.has_key?(:stop_command)
|
116
|
+
$stderr.puts "Config Error: Invalid child process monitor for #{child.name}"
|
117
|
+
$stderr.puts "You must specify a stop command to monitor child processes."
|
118
|
+
exit(6)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class ProcessProxy
|
4
|
+
attr_reader :attributes, :watches, :name
|
5
|
+
def initialize(process_name, attributes, process_block)
|
6
|
+
@name = process_name
|
7
|
+
@attributes = attributes
|
8
|
+
@watches = {}
|
9
|
+
|
10
|
+
if process_block.arity == 0
|
11
|
+
instance_eval &process_block
|
12
|
+
else
|
13
|
+
instance_exec(self, &process_block)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def method_missing(name, *args)
|
18
|
+
if args.size == 1 && name.to_s =~ /^(.*)=$/
|
19
|
+
@attributes[$1.to_sym] = args.first
|
20
|
+
elsif args.size == 1
|
21
|
+
@attributes[name.to_sym] = args.first
|
22
|
+
elsif args.size == 0 && name.to_s =~ /^(.*)!$/
|
23
|
+
@attributes[$1.to_sym] = true
|
24
|
+
elsif args.empty? && @attributes.key?(name.to_sym)
|
25
|
+
@attributes[name.to_sym]
|
26
|
+
else
|
27
|
+
super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def checks(name, options = {})
|
32
|
+
@watches[name] = options
|
33
|
+
end
|
34
|
+
|
35
|
+
def monitor_children(&child_process_block)
|
36
|
+
@attributes[:monitor_children] = true
|
37
|
+
@attributes[:child_process_block] = child_process_block
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_process
|
41
|
+
Process.new(@name, @watches, @attributes)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class Group
|
4
|
+
attr_accessor :name, :processes, :logger
|
5
|
+
attr_accessor :process_logger
|
6
|
+
|
7
|
+
def initialize(name, options = {})
|
8
|
+
self.name = name
|
9
|
+
self.processes = []
|
10
|
+
self.logger = options[:logger]
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_process(process)
|
14
|
+
process.logger = self.logger.prefix_with(process.name)
|
15
|
+
self.processes << process
|
16
|
+
end
|
17
|
+
|
18
|
+
def tick
|
19
|
+
self.processes.each do |process|
|
20
|
+
process.tick
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def determine_initial_state
|
25
|
+
self.processes.each do |process|
|
26
|
+
process.determine_initial_state
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# proxied events
|
31
|
+
[:start, :unmonitor, :stop, :restart].each do |event|
|
32
|
+
class_eval <<-END
|
33
|
+
def #{event}(process_name = nil)
|
34
|
+
threads = []
|
35
|
+
affected = []
|
36
|
+
self.processes.each do |process|
|
37
|
+
next if process_name && process_name != process.name
|
38
|
+
affected << [self.name, process.name].join(":")
|
39
|
+
threads << Thread.new { process.handle_user_command("#{event}") }
|
40
|
+
end
|
41
|
+
threads.each { |t| t.join }
|
42
|
+
affected
|
43
|
+
end
|
44
|
+
END
|
45
|
+
end
|
46
|
+
|
47
|
+
def status(process_name = nil)
|
48
|
+
lines = []
|
49
|
+
if process_name.nil?
|
50
|
+
prefix = self.name ? " " : ""
|
51
|
+
lines << "#{self.name}:" if self.name
|
52
|
+
|
53
|
+
self.processes.each do |process|
|
54
|
+
lines << "%s%s(pid:%s): %s" % [prefix, process.name, process.actual_pid, process.state]
|
55
|
+
if process.monitor_children?
|
56
|
+
process.children.each do |child|
|
57
|
+
lines << " %s%s: %s" % [prefix, child.name, child.state]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
else
|
62
|
+
self.processes.each do |process|
|
63
|
+
next if process_name != process.name
|
64
|
+
lines << "%s%s(pid:%s): %s" % [prefix, process.name, process.actual_pid, process.state]
|
65
|
+
lines << process.statistics.to_s
|
66
|
+
end
|
67
|
+
end
|
68
|
+
lines << ""
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class Logger
|
4
|
+
LOG_METHODS = [:emerg, :alert, :crit, :err, :warning, :notice, :info, :debug]
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
@logger = options[:logger] || self.create_logger
|
9
|
+
@prefix = options[:prefix]
|
10
|
+
@stdout = options[:stdout]
|
11
|
+
@prefixes = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
LOG_METHODS.each do |method|
|
15
|
+
eval <<-END
|
16
|
+
def #{method}(msg, prefix = [])
|
17
|
+
if @logger.is_a?(self.class)
|
18
|
+
@logger.#{method}(msg, [@prefix] + prefix)
|
19
|
+
else
|
20
|
+
s_prefix = prefix.size > 0 ? "[\#{prefix.compact.join(':')}] " : ""
|
21
|
+
if @stdout
|
22
|
+
$stdout.puts("[#{method}]: \#{s_prefix}\#{msg}")
|
23
|
+
$stdout.flush
|
24
|
+
end
|
25
|
+
@logger.#{method}("\#{s_prefix}\#{msg}")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
END
|
29
|
+
end
|
30
|
+
|
31
|
+
def prefix_with(prefix)
|
32
|
+
@prefixes[prefix] ||= self.class.new(:logger => self, :prefix => prefix)
|
33
|
+
end
|
34
|
+
|
35
|
+
def reopen
|
36
|
+
if @logger.is_a?(self.class)
|
37
|
+
@logger.reopen
|
38
|
+
else
|
39
|
+
@logger = create_logger
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
def create_logger
|
45
|
+
if @options[:log_file]
|
46
|
+
LoggerAdapter.new(@options[:log_file])
|
47
|
+
else
|
48
|
+
Syslog.close if Syslog.opened? # need to explictly close it before reopening it
|
49
|
+
Syslog.open(@options[:identity] || 'bluepilld', Syslog::LOG_PID, Syslog::LOG_LOCAL6)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class LoggerAdapter < ::Logger
|
54
|
+
LOGGER_EQUIVALENTS =
|
55
|
+
{:debug => :debug, :err => :error, :warning => :warn, :info => :info, :emerg => :fatal, :alert => :warn, :crit => :fatal, :notice => :info}
|
56
|
+
|
57
|
+
LOG_METHODS.each do |method|
|
58
|
+
next if method == LOGGER_EQUIVALENTS[method]
|
59
|
+
alias_method method, LOGGER_EQUIVALENTS[method]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,514 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
# fixes problem with loading on systems with rubyist-aasm installed
|
4
|
+
gem "state_machine"
|
5
|
+
|
6
|
+
require "state_machine"
|
7
|
+
require "daemons"
|
8
|
+
require "bluepill/system"
|
9
|
+
require "bluepill/process_journal"
|
10
|
+
|
11
|
+
module Bluepill
|
12
|
+
class Process
|
13
|
+
CONFIGURABLE_ATTRIBUTES = [
|
14
|
+
:pre_start_command,
|
15
|
+
:start_command,
|
16
|
+
:stop_command,
|
17
|
+
:restart_command,
|
18
|
+
|
19
|
+
:stdout,
|
20
|
+
:stderr,
|
21
|
+
:stdin,
|
22
|
+
|
23
|
+
:daemonize,
|
24
|
+
:pid_file,
|
25
|
+
:working_dir,
|
26
|
+
:environment,
|
27
|
+
|
28
|
+
:start_grace_time,
|
29
|
+
:stop_grace_time,
|
30
|
+
:restart_grace_time,
|
31
|
+
|
32
|
+
:uid,
|
33
|
+
:gid,
|
34
|
+
|
35
|
+
:cache_actual_pid,
|
36
|
+
|
37
|
+
:monitor_children,
|
38
|
+
:child_process_factory,
|
39
|
+
|
40
|
+
:pid_command,
|
41
|
+
:auto_start,
|
42
|
+
|
43
|
+
:supplementary_groups,
|
44
|
+
|
45
|
+
:stop_signals,
|
46
|
+
|
47
|
+
:on_start_timeout,
|
48
|
+
]
|
49
|
+
|
50
|
+
attr_accessor :name, :watches, :triggers, :logger, :skip_ticks_until, :process_running
|
51
|
+
attr_accessor *CONFIGURABLE_ATTRIBUTES
|
52
|
+
attr_reader :children, :statistics
|
53
|
+
|
54
|
+
state_machine :initial => :unmonitored do
|
55
|
+
# These are the idle states, i.e. only an event (either external or internal) will trigger a transition.
|
56
|
+
# The distinction between down and unmonitored is that down
|
57
|
+
# means we know it is not running and unmonitored is that we don't care if it's running.
|
58
|
+
state :unmonitored, :up, :down
|
59
|
+
|
60
|
+
# These are transitionary states, we expect the process to change state after a certain period of time.
|
61
|
+
state :starting, :stopping, :restarting
|
62
|
+
|
63
|
+
event :tick do
|
64
|
+
transition :starting => :up, :if => :process_running?
|
65
|
+
transition :starting => :down, :unless => :process_running?
|
66
|
+
|
67
|
+
transition :up => :up, :if => :process_running?
|
68
|
+
transition :up => :down, :unless => :process_running?
|
69
|
+
|
70
|
+
# The process failed to die after entering the stopping state. Change the state to reflect
|
71
|
+
# reality.
|
72
|
+
transition :stopping => :up, :if => :process_running?
|
73
|
+
transition :stopping => :down, :unless => :process_running?
|
74
|
+
|
75
|
+
transition :down => :up, :if => :process_running?
|
76
|
+
transition :down => :starting, :unless => :process_running?
|
77
|
+
|
78
|
+
transition :restarting => :up, :if => :process_running?
|
79
|
+
transition :restarting => :down, :unless => :process_running?
|
80
|
+
end
|
81
|
+
|
82
|
+
event :start do
|
83
|
+
transition [:unmonitored, :down] => :starting
|
84
|
+
end
|
85
|
+
|
86
|
+
event :stop do
|
87
|
+
transition :up => :stopping
|
88
|
+
end
|
89
|
+
|
90
|
+
event :unmonitor do
|
91
|
+
transition any => :unmonitored
|
92
|
+
end
|
93
|
+
|
94
|
+
event :restart do
|
95
|
+
transition [:up, :down] => :restarting
|
96
|
+
end
|
97
|
+
|
98
|
+
before_transition any => any, :do => :notify_triggers
|
99
|
+
before_transition :stopping => any, :do => :clean_threads
|
100
|
+
|
101
|
+
after_transition any => :starting, :do => :start_process
|
102
|
+
after_transition any => :stopping, :do => :stop_process
|
103
|
+
after_transition any => :restarting, :do => :restart_process
|
104
|
+
|
105
|
+
after_transition any => any, :do => :record_transition
|
106
|
+
end
|
107
|
+
|
108
|
+
def initialize(process_name, checks, options = {})
|
109
|
+
@name = process_name
|
110
|
+
@event_mutex = Monitor.new
|
111
|
+
@watches = []
|
112
|
+
@triggers = []
|
113
|
+
@children = []
|
114
|
+
@threads = []
|
115
|
+
@statistics = ProcessStatistics.new
|
116
|
+
@actual_pid = options[:actual_pid]
|
117
|
+
self.logger = options[:logger]
|
118
|
+
|
119
|
+
checks.each do |name, opts|
|
120
|
+
if Trigger[name]
|
121
|
+
self.add_trigger(name, opts)
|
122
|
+
else
|
123
|
+
self.add_watch(name, opts)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# These defaults are overriden below if it's configured to be something else.
|
128
|
+
@monitor_children = false
|
129
|
+
@cache_actual_pid = true
|
130
|
+
@start_grace_time = @stop_grace_time = @restart_grace_time = 3
|
131
|
+
@environment = {}
|
132
|
+
@on_start_timeout = "start"
|
133
|
+
|
134
|
+
CONFIGURABLE_ATTRIBUTES.each do |attribute_name|
|
135
|
+
self.send("#{attribute_name}=", options[attribute_name]) if options.has_key?(attribute_name)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Let state_machine do its initialization stuff
|
139
|
+
super() # no arguments intentional
|
140
|
+
end
|
141
|
+
|
142
|
+
def tick
|
143
|
+
return if self.skipping_ticks?
|
144
|
+
self.skip_ticks_until = nil
|
145
|
+
|
146
|
+
# clear the memoization per tick
|
147
|
+
@process_running = nil
|
148
|
+
|
149
|
+
# Deal with thread cleanup here since the stopping state isn't used
|
150
|
+
clean_threads if self.unmonitored?
|
151
|
+
|
152
|
+
# run state machine transitions
|
153
|
+
super
|
154
|
+
|
155
|
+
if self.up?
|
156
|
+
self.run_watches
|
157
|
+
|
158
|
+
if self.monitor_children?
|
159
|
+
refresh_children!
|
160
|
+
children.each {|child| child.tick}
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def logger=(logger)
|
166
|
+
@logger = logger
|
167
|
+
self.watches.each {|w| w.logger = logger }
|
168
|
+
self.triggers.each {|t| t.logger = logger }
|
169
|
+
end
|
170
|
+
|
171
|
+
# State machine methods
|
172
|
+
def dispatch!(event, reason = nil)
|
173
|
+
@event_mutex.synchronize do
|
174
|
+
@statistics.record_event(event, reason)
|
175
|
+
self.send("#{event}")
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def record_transition(transition)
|
180
|
+
unless transition.loopback?
|
181
|
+
@transitioned = true
|
182
|
+
|
183
|
+
# When a process changes state, we should clear the memory of all the watches
|
184
|
+
self.watches.each { |w| w.clear_history! }
|
185
|
+
|
186
|
+
# Also, when a process changes state, we should re-populate its child list
|
187
|
+
if self.monitor_children?
|
188
|
+
self.logger.warning "Clearing child list"
|
189
|
+
self.children.clear
|
190
|
+
end
|
191
|
+
logger.info "Going from #{transition.from_name} => #{transition.to_name}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def notify_triggers(transition)
|
196
|
+
self.triggers.each {|trigger| trigger.notify(transition)}
|
197
|
+
end
|
198
|
+
|
199
|
+
# Watch related methods
|
200
|
+
def add_watch(name, options = {})
|
201
|
+
self.watches << ConditionWatch.new(name, options.merge(:logger => self.logger))
|
202
|
+
end
|
203
|
+
|
204
|
+
def add_trigger(name, options = {})
|
205
|
+
self.triggers << Trigger[name].new(self, options.merge(:logger => self.logger))
|
206
|
+
end
|
207
|
+
|
208
|
+
def run_watches
|
209
|
+
now = Time.now.to_i
|
210
|
+
|
211
|
+
threads = self.watches.collect do |watch|
|
212
|
+
[watch, Thread.new { Thread.current[:events] = watch.run(self.actual_pid, now) }]
|
213
|
+
end
|
214
|
+
|
215
|
+
@transitioned = false
|
216
|
+
|
217
|
+
threads.inject([]) do |events, (watch, thread)|
|
218
|
+
thread.join
|
219
|
+
if thread[:events].size > 0
|
220
|
+
logger.info "#{watch.name} dispatched: #{thread[:events].join(',')}"
|
221
|
+
thread[:events].each do |event|
|
222
|
+
events << [event, watch.to_s]
|
223
|
+
end
|
224
|
+
end
|
225
|
+
events
|
226
|
+
end.each do |(event, reason)|
|
227
|
+
break if @transitioned
|
228
|
+
self.dispatch!(event, reason)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def determine_initial_state
|
233
|
+
if self.process_running?(true)
|
234
|
+
self.state = 'up'
|
235
|
+
else
|
236
|
+
self.state = (auto_start == false) ? 'unmonitored' : 'down' # we need to check for false value
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def handle_user_command(cmd)
|
241
|
+
case cmd
|
242
|
+
when "start"
|
243
|
+
if self.process_running?(true)
|
244
|
+
logger.warning("Refusing to re-run start command on an already running process.")
|
245
|
+
else
|
246
|
+
dispatch!(:start, "user initiated")
|
247
|
+
end
|
248
|
+
when "stop"
|
249
|
+
stop_process
|
250
|
+
dispatch!(:unmonitor, "user initiated")
|
251
|
+
when "restart"
|
252
|
+
restart_process
|
253
|
+
when "unmonitor"
|
254
|
+
# When the user issues an unmonitor cmd, reset any triggers so that
|
255
|
+
# scheduled events gets cleared
|
256
|
+
triggers.each {|t| t.reset! }
|
257
|
+
dispatch!(:unmonitor, "user initiated")
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
# System Process Methods
|
262
|
+
def process_running?(force = false)
|
263
|
+
@process_running = nil if force # clear existing state if forced
|
264
|
+
|
265
|
+
@process_running ||= signal_process(0)
|
266
|
+
# the process isn't running, so we should clear the PID
|
267
|
+
self.clear_pid unless @process_running
|
268
|
+
@process_running
|
269
|
+
end
|
270
|
+
|
271
|
+
def start_process
|
272
|
+
ProcessJournal.kill_all_from_journal(name) # be sure nothing else is running from previous runs
|
273
|
+
pre_start_process
|
274
|
+
logger.warning "Executing start command: #{start_command}"
|
275
|
+
if self.daemonize?
|
276
|
+
daemon_id = System.daemonize(start_command, self.system_command_options)
|
277
|
+
if daemon_id > 0
|
278
|
+
ProcessJournal.append_pid_to_journal(name, daemon_id)
|
279
|
+
children.each {|child|
|
280
|
+
ProcessJournal.append_pid_to_journal(name, child.actual_id)
|
281
|
+
} if self.monitor_children?
|
282
|
+
end
|
283
|
+
daemon_id
|
284
|
+
else
|
285
|
+
# This is a self-daemonizing process
|
286
|
+
with_timeout(start_grace_time, on_start_timeout) do
|
287
|
+
result = System.execute_blocking(start_command, self.system_command_options)
|
288
|
+
|
289
|
+
unless result[:exit_code].zero?
|
290
|
+
logger.warning "Start command execution returned non-zero exit code:"
|
291
|
+
logger.warning result.inspect
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
self.skip_ticks_for(start_grace_time)
|
297
|
+
end
|
298
|
+
|
299
|
+
def pre_start_process
|
300
|
+
return unless pre_start_command
|
301
|
+
logger.warning "Executing pre start command: #{pre_start_command}"
|
302
|
+
result = System.execute_blocking(pre_start_command, self.system_command_options)
|
303
|
+
unless result[:exit_code].zero?
|
304
|
+
logger.warning "Pre start command execution returned non-zero exit code:"
|
305
|
+
logger.warning result.inspect
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def stop_process
|
310
|
+
if monitor_children
|
311
|
+
System.get_children(self.actual_pid).each do |child_pid|
|
312
|
+
ProcessJournal.append_pid_to_journal(name, child_pid)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
if stop_command
|
317
|
+
cmd = self.prepare_command(stop_command)
|
318
|
+
logger.warning "Executing stop command: #{cmd}"
|
319
|
+
|
320
|
+
with_timeout(stop_grace_time, "stop") do
|
321
|
+
result = System.execute_blocking(cmd, self.system_command_options)
|
322
|
+
|
323
|
+
unless result[:exit_code].zero?
|
324
|
+
logger.warning "Stop command execution returned non-zero exit code:"
|
325
|
+
logger.warning result.inspect
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
elsif stop_signals
|
330
|
+
# issue stop signals with configurable delay between each
|
331
|
+
logger.warning "Sending stop signals to #{actual_pid}"
|
332
|
+
@threads << Thread.new(self, stop_signals.clone) do |process, stop_signals|
|
333
|
+
signal = stop_signals.shift
|
334
|
+
logger.info "Sending signal #{signal} to #{process.actual_pid}"
|
335
|
+
process.signal_process(signal) # send first signal
|
336
|
+
|
337
|
+
until stop_signals.empty?
|
338
|
+
# we already checked to make sure stop_signals had an odd number of items
|
339
|
+
delay = stop_signals.shift
|
340
|
+
signal = stop_signals.shift
|
341
|
+
|
342
|
+
logger.debug "Sleeping for #{delay} seconds"
|
343
|
+
sleep delay
|
344
|
+
#break unless signal_process(0) #break unless the process can be reached
|
345
|
+
unless process.signal_process(0)
|
346
|
+
logger.debug "Process has terminated."
|
347
|
+
break
|
348
|
+
end
|
349
|
+
logger.info "Sending signal #{signal} to #{process.actual_pid}"
|
350
|
+
process.signal_process(signal)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
else
|
354
|
+
logger.warning "Executing default stop command. Sending TERM signal to #{actual_pid}"
|
355
|
+
signal_process("TERM")
|
356
|
+
end
|
357
|
+
ProcessJournal.kill_all_from_journal(name) # finish cleanup
|
358
|
+
self.unlink_pid # TODO: we only write the pid file if we daemonize, should we only unlink it if we daemonize?
|
359
|
+
|
360
|
+
self.skip_ticks_for(stop_grace_time)
|
361
|
+
end
|
362
|
+
|
363
|
+
def restart_process
|
364
|
+
if restart_command
|
365
|
+
cmd = self.prepare_command(restart_command)
|
366
|
+
|
367
|
+
logger.warning "Executing restart command: #{cmd}"
|
368
|
+
|
369
|
+
with_timeout(restart_grace_time, "restart") do
|
370
|
+
result = System.execute_blocking(cmd, self.system_command_options)
|
371
|
+
|
372
|
+
unless result[:exit_code].zero?
|
373
|
+
logger.warning "Restart command execution returned non-zero exit code:"
|
374
|
+
logger.warning result.inspect
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
self.skip_ticks_for(restart_grace_time)
|
379
|
+
else
|
380
|
+
logger.warning "No restart_command specified. Must stop and start to restart"
|
381
|
+
self.stop_process
|
382
|
+
# the tick will bring it back.
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
def clean_threads
|
387
|
+
@threads.each { |t| t.kill }
|
388
|
+
@threads.clear
|
389
|
+
end
|
390
|
+
|
391
|
+
def daemonize?
|
392
|
+
!!self.daemonize
|
393
|
+
end
|
394
|
+
|
395
|
+
def monitor_children?
|
396
|
+
!!self.monitor_children
|
397
|
+
end
|
398
|
+
|
399
|
+
def signal_process(code)
|
400
|
+
code = code.to_s.upcase if code.is_a?(String) || code.is_a?(Symbol)
|
401
|
+
::Process.kill(code, actual_pid)
|
402
|
+
true
|
403
|
+
rescue Exception => e
|
404
|
+
logger.err "Failed to signal process #{actual_pid} with code #{code}: #{e}"
|
405
|
+
false
|
406
|
+
end
|
407
|
+
|
408
|
+
def cache_actual_pid?
|
409
|
+
!!@cache_actual_pid
|
410
|
+
end
|
411
|
+
|
412
|
+
def actual_pid
|
413
|
+
pid_command ? pid_from_command : pid_from_file
|
414
|
+
end
|
415
|
+
|
416
|
+
def pid_from_file
|
417
|
+
return @actual_pid if cache_actual_pid? && @actual_pid
|
418
|
+
@actual_pid = begin
|
419
|
+
if pid_file
|
420
|
+
if File.exists?(pid_file)
|
421
|
+
str = File.read(pid_file)
|
422
|
+
str.to_i if str.size > 0
|
423
|
+
else
|
424
|
+
logger.warning("pid_file #{pid_file} does not exist or cannot be read")
|
425
|
+
nil
|
426
|
+
end
|
427
|
+
end
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def pid_from_command
|
432
|
+
pid = %x{#{pid_command}}.strip
|
433
|
+
(pid =~ /\A\d+\z/) ? pid.to_i : nil
|
434
|
+
end
|
435
|
+
|
436
|
+
def actual_pid=(pid)
|
437
|
+
ProcessJournal.append_pid_to_journal(name, pid) # be sure to always log the pid
|
438
|
+
@actual_pid = pid
|
439
|
+
end
|
440
|
+
|
441
|
+
def clear_pid
|
442
|
+
@actual_pid = nil
|
443
|
+
end
|
444
|
+
|
445
|
+
def unlink_pid
|
446
|
+
System.delete_if_exists(pid_file)
|
447
|
+
end
|
448
|
+
|
449
|
+
# Internal State Methods
|
450
|
+
def skip_ticks_for(seconds)
|
451
|
+
# TODO: should this be addative or longest wins?
|
452
|
+
# i.e. if two calls for skip_ticks_for come in for 5 and 10, should it skip for 10 or 15?
|
453
|
+
self.skip_ticks_until = (self.skip_ticks_until || Time.now.to_i) + seconds.to_i
|
454
|
+
end
|
455
|
+
|
456
|
+
def skipping_ticks?
|
457
|
+
self.skip_ticks_until && self.skip_ticks_until > Time.now.to_i
|
458
|
+
end
|
459
|
+
|
460
|
+
def refresh_children!
|
461
|
+
# First prune the list of dead children
|
462
|
+
@children.delete_if {|child| !child.process_running?(true) }
|
463
|
+
|
464
|
+
# Add new found children to the list
|
465
|
+
new_children_pids = System.get_children(self.actual_pid) - @children.map {|child| child.actual_pid}
|
466
|
+
|
467
|
+
unless new_children_pids.empty?
|
468
|
+
logger.info "Existing children: #{@children.collect{|c| c.actual_pid}.join(",")}. Got new children: #{new_children_pids.inspect} for #{actual_pid}"
|
469
|
+
end
|
470
|
+
|
471
|
+
# Construct a new process wrapper for each new found children
|
472
|
+
new_children_pids.each do |child_pid|
|
473
|
+
ProcessJournal.append_pid_to_journal(name, child_pid)
|
474
|
+
name = "<child(pid:#{child_pid})>"
|
475
|
+
logger = self.logger.prefix_with(name)
|
476
|
+
|
477
|
+
child = self.child_process_factory.create_child_process(name, child_pid, logger)
|
478
|
+
@children << child
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
def prepare_command(command)
|
483
|
+
command.to_s.gsub("{{PID}}", actual_pid.to_s)
|
484
|
+
end
|
485
|
+
|
486
|
+
def system_command_options
|
487
|
+
{
|
488
|
+
:uid => self.uid,
|
489
|
+
:gid => self.gid,
|
490
|
+
:working_dir => self.working_dir,
|
491
|
+
:environment => self.environment,
|
492
|
+
:pid_file => self.pid_file,
|
493
|
+
:logger => self.logger,
|
494
|
+
:stdin => self.stdin,
|
495
|
+
:stdout => self.stdout,
|
496
|
+
:stderr => self.stderr,
|
497
|
+
:supplementary_groups => self.supplementary_groups
|
498
|
+
}
|
499
|
+
end
|
500
|
+
|
501
|
+
def with_timeout(secs, next_state = nil, &blk)
|
502
|
+
# Attempt to execute the passed block. If the block takes
|
503
|
+
# too long, transition to the indicated next state.
|
504
|
+
begin
|
505
|
+
Timeout.timeout(secs.to_f, &blk)
|
506
|
+
rescue Timeout::Error
|
507
|
+
logger.err "Execution is taking longer than expected."
|
508
|
+
logger.err "Did you forget to tell bluepill to daemonize this process?"
|
509
|
+
dispatch!(next_state)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|