bluepill 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +13 -3
- data/VERSION +1 -1
- data/bluepill.gemspec +2 -3
- data/lib/bluepill.rb +1 -0
- data/lib/bluepill/application.rb +63 -27
- data/lib/bluepill/condition_watch.rb +5 -3
- data/lib/bluepill/dsl.rb +29 -10
- data/lib/bluepill/group.rb +8 -19
- data/lib/bluepill/process.rb +180 -73
- data/lib/bluepill/process_conditions.rb +2 -2
- data/lib/bluepill/process_conditions/cpu_usage.rb +1 -1
- data/lib/bluepill/process_conditions/mem_usage.rb +2 -2
- data/lib/bluepill/system.rb +47 -7
- data/lib/bluepill/trigger.rb +24 -6
- data/lib/bluepill/triggers/flapping.rb +12 -7
- data/lib/example.rb +45 -53
- metadata +3 -3
data/TODO
CHANGED
@@ -7,6 +7,16 @@
|
|
7
7
|
Issues encountered in the wild
|
8
8
|
------------------------------
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
* Whenever bluepill executes user specified commands (like start_command, stop_command, restart_command), it should execute it in such a way that it does not affect the stability of bluepill daemon itself.
|
11
|
+
|
12
|
+
* Whenever a command is sent to a process group, execute them in parallel for each process in the group instead of serially.
|
13
|
+
|
14
|
+
* Issuing commands to the running bluepill daemon using the cli can trigger flapping condition. So, running bluepill restart <blah> can cause a flapping trigger to be fired depending on internal state.
|
15
|
+
|
16
|
+
* Have validations for easy to make mistakes in the config file. For ex:
|
17
|
+
+ Accidently specifying the same pid file for multiple processes.
|
18
|
+
+ Accidently specifying the same process name for multiple processes.
|
19
|
+
+ Validate the minimum number of config options to setup successful monitoring.
|
20
|
+
|
21
|
+
For example, for a valid "process", the only 2 things that are required are the start command and the pid file. We should tell a user at the time of loading whether the config file is syntactically and semantically valid.
|
22
|
+
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
data/bluepill.gemspec
CHANGED
@@ -5,12 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bluepill}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.6"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Arya Asemanfar", "Gary Tsang", "Rohith Ravi"]
|
12
|
-
s.date = %q{2009-10-
|
13
|
-
s.default_executable = %q{bluepill}
|
12
|
+
s.date = %q{2009-10-22}
|
14
13
|
s.description = %q{Bluepill keeps your daemons up while taking up as little resources as possible. After all you probably want the resources of your server to be used by whatever daemons you are running rather than the thing that's supposed to make sure they are brought back up, should they die or misbehave.}
|
15
14
|
s.email = %q{entombedvirus@gmail.com}
|
16
15
|
s.executables = ["bluepill"]
|
data/lib/bluepill.rb
CHANGED
data/lib/bluepill/application.rb
CHANGED
@@ -14,29 +14,60 @@ module Bluepill
|
|
14
14
|
self.pid_file = File.join(self.base_dir, 'pids', self.name + ".pid")
|
15
15
|
|
16
16
|
@server = false
|
17
|
-
signal_trap
|
18
17
|
end
|
19
18
|
|
20
19
|
def load
|
21
|
-
|
20
|
+
begin
|
21
|
+
start_server
|
22
|
+
rescue StandardError => e
|
23
|
+
logger.err("Got exception: %s `%s`" % [e.class.name, e.message])
|
24
|
+
logger.err(e.backtrace.join("\n"))
|
25
|
+
end
|
22
26
|
end
|
23
27
|
|
24
28
|
def status
|
25
29
|
if(@server)
|
26
|
-
buffer =
|
30
|
+
buffer = []
|
31
|
+
depth = 0
|
32
|
+
|
27
33
|
if self.groups.has_key?(nil)
|
28
|
-
self.groups[nil].
|
29
|
-
buffer << "%s: %s
|
34
|
+
self.groups[nil].processes.each do |p|
|
35
|
+
buffer << "%s%s: %s" % [" " * depth, p.name, p.state]
|
36
|
+
|
37
|
+
if p.monitor_children?
|
38
|
+
depth += 2
|
39
|
+
p.children.each do |c|
|
40
|
+
buffer << "%s%s: %s" % [" " * depth, c.name, c.state]
|
41
|
+
end
|
42
|
+
depth -= 2
|
43
|
+
end
|
30
44
|
end
|
31
|
-
buffer << "\n"
|
32
45
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
buffer << "\n"
|
46
|
+
|
47
|
+
self.groups.each do |group_name, group|
|
48
|
+
next if group_name.nil?
|
49
|
+
|
50
|
+
buffer << "\n#{group_name}"
|
51
|
+
|
52
|
+
group.processes.each do |p|
|
53
|
+
depth += 2
|
54
|
+
|
55
|
+
buffer << "%s%s(pid:%d): %s" % [" " * depth, p.name, p.actual_pid, p.state]
|
56
|
+
|
57
|
+
if p.monitor_children?
|
58
|
+
depth += 2
|
59
|
+
p.children.each do |c|
|
60
|
+
buffer << "%s%s: %s" % [" " * depth, c.name, c.state]
|
61
|
+
end
|
62
|
+
depth -= 2
|
63
|
+
end
|
64
|
+
|
65
|
+
depth -= 2
|
66
|
+
end
|
38
67
|
end
|
39
|
-
|
68
|
+
|
69
|
+
buffer.join("\n")
|
70
|
+
|
40
71
|
else
|
41
72
|
send_to_server('status')
|
42
73
|
end
|
@@ -117,8 +148,9 @@ private
|
|
117
148
|
client.write(response)
|
118
149
|
client.close
|
119
150
|
end
|
120
|
-
rescue
|
121
|
-
logger.
|
151
|
+
rescue StandardError => e
|
152
|
+
logger.err(e.inspect)
|
153
|
+
logger.err(e.backtrace.join("\n"))
|
122
154
|
end
|
123
155
|
end
|
124
156
|
end
|
@@ -126,10 +158,15 @@ private
|
|
126
158
|
def worker
|
127
159
|
Thread.new(self) do |app|
|
128
160
|
loop do
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
161
|
+
begin
|
162
|
+
# app.logger.info("Server | worker loop started:")
|
163
|
+
job = app.work_queue.pop
|
164
|
+
send_to_process_or_group(job[0], job[1], false)
|
165
|
+
|
166
|
+
rescue StandardError => e
|
167
|
+
logger.err("Error while trying to execute %s from work_queue" % job.inspect)
|
168
|
+
logger.err("%s: `%s`" % [e.class.name, e.message])
|
169
|
+
end
|
133
170
|
# app.logger.info("Server | worker job processed:")
|
134
171
|
end
|
135
172
|
end
|
@@ -153,11 +190,16 @@ private
|
|
153
190
|
Daemonize.daemonize
|
154
191
|
|
155
192
|
@server = true
|
193
|
+
$0 = "bluepilld: #{self.name}"
|
194
|
+
|
156
195
|
self.work_queue = Queue.new
|
196
|
+
|
157
197
|
self.socket = Bluepill::Socket.new(name, base_dir).server
|
158
198
|
File.open(self.pid_file, 'w') { |x| x.write(::Process.pid) }
|
159
|
-
|
160
|
-
self.groups.each {|
|
199
|
+
|
200
|
+
self.groups.each {|_, group| group.boot! }
|
201
|
+
|
202
|
+
setup_signal_traps
|
161
203
|
listener
|
162
204
|
worker
|
163
205
|
run
|
@@ -173,16 +215,10 @@ private
|
|
173
215
|
sleep 1
|
174
216
|
end
|
175
217
|
end
|
176
|
-
|
177
|
-
def cleanup
|
178
|
-
# self.socket.cleanup
|
179
|
-
end
|
180
218
|
|
181
|
-
def
|
182
|
-
|
219
|
+
def setup_signal_traps
|
183
220
|
terminator = lambda do
|
184
221
|
puts "Terminating..."
|
185
|
-
cleanup
|
186
222
|
::Kernel.exit
|
187
223
|
end
|
188
224
|
|
@@ -1,18 +1,20 @@
|
|
1
1
|
module Bluepill
|
2
2
|
class ConditionWatch
|
3
3
|
attr_accessor :logger, :name
|
4
|
+
EMPTY_ARRAY = [].freeze # no need to recreate one every tick
|
5
|
+
|
4
6
|
def initialize(name, options = {})
|
5
7
|
@name = name
|
6
8
|
|
7
9
|
@logger = options.delete(:logger)
|
8
|
-
@fires = options.has_key?(:fires) ?
|
10
|
+
@fires = options.has_key?(:fires) ? Array(options.delete(:fires)) : [:restart]
|
9
11
|
@every = options.delete(:every)
|
10
12
|
@times = options[:times] || [1,1]
|
11
13
|
@times = [@times, @times] unless @times.is_a?(Array) # handles :times => 5
|
12
14
|
|
13
15
|
self.clear_history!
|
14
16
|
|
15
|
-
@process_condition = ProcessConditions
|
17
|
+
@process_condition = ProcessConditions[@name].new(options)
|
16
18
|
end
|
17
19
|
|
18
20
|
def run(pid, tick_number = Time.now.to_i)
|
@@ -21,7 +23,7 @@ module Bluepill
|
|
21
23
|
self.record_value(@process_condition.run(pid))
|
22
24
|
return @fires if self.fired?
|
23
25
|
end
|
24
|
-
|
26
|
+
EMPTY_ARRAY
|
25
27
|
end
|
26
28
|
|
27
29
|
def record_value(value)
|
data/lib/bluepill/dsl.rb
CHANGED
@@ -23,6 +23,33 @@ module Bluepill
|
|
23
23
|
def checks(name, options = {})
|
24
24
|
@watches[name] = options
|
25
25
|
end
|
26
|
+
|
27
|
+
def monitor_children(&child_process_block)
|
28
|
+
child_proxy = self.class.new
|
29
|
+
|
30
|
+
# Children inherit some properties of the parent
|
31
|
+
child_proxy.start_grace_time = @attributes[:start_grace_time]
|
32
|
+
child_proxy.stop_grace_time = @attributes[:stop_grace_time]
|
33
|
+
child_proxy.restart_grace_time = @attributes[:restart_grace_time]
|
34
|
+
|
35
|
+
child_process_block.call(child_proxy)
|
36
|
+
|
37
|
+
@attributes[:child_process_template] = child_proxy.to_process(nil)
|
38
|
+
# @attributes[:child_process_template].freeze
|
39
|
+
@attributes[:monitor_children] = true
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_process(process_name)
|
43
|
+
process = Bluepill::Process.new(process_name, @attributes)
|
44
|
+
@watches.each do |name, opts|
|
45
|
+
if Bluepill::Trigger[name]
|
46
|
+
process.add_trigger(name, opts)
|
47
|
+
else
|
48
|
+
process.add_watch(name, opts)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
process
|
52
|
+
end
|
26
53
|
end
|
27
54
|
|
28
55
|
app_proxy = Class.new do
|
@@ -33,16 +60,8 @@ module Bluepill
|
|
33
60
|
process_proxy = @@process_proxy.new
|
34
61
|
process_block.call(process_proxy)
|
35
62
|
|
36
|
-
group = process_proxy.attributes.delete(:group)
|
37
|
-
|
38
|
-
process = Bluepill::Process.new(process_name, process_proxy.attributes)
|
39
|
-
process_proxy.watches.each do |name, opts|
|
40
|
-
if Bluepill::Trigger[name]
|
41
|
-
process.add_trigger(name, opts)
|
42
|
-
else
|
43
|
-
process.add_watch(name, opts)
|
44
|
-
end
|
45
|
-
end
|
63
|
+
group = process_proxy.attributes.delete(:group)
|
64
|
+
process = process_proxy.to_process(process_name)
|
46
65
|
|
47
66
|
@@app.add_process(process, group)
|
48
67
|
end
|
data/lib/bluepill/group.rb
CHANGED
@@ -15,34 +15,23 @@ module Bluepill
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def tick
|
18
|
-
self.
|
18
|
+
self.processes.each do |process|
|
19
19
|
process.tick
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
# proxied events
|
24
|
-
[:start, :unmonitor, :stop, :restart].each do |event|
|
25
|
-
|
24
|
+
[:start, :unmonitor, :stop, :restart, :boot!].each do |event|
|
25
|
+
class_eval <<-END
|
26
26
|
def #{event}(process_name = nil)
|
27
|
-
|
28
|
-
|
27
|
+
threads = []
|
28
|
+
self.processes.each do |process|
|
29
|
+
next if process_name && process_name != process.name
|
30
|
+
threads << Thread.new { process.handle_user_command("#{event}") }
|
29
31
|
end
|
32
|
+
threads.each { |t| t.join }
|
30
33
|
end
|
31
34
|
END
|
32
35
|
end
|
33
|
-
|
34
|
-
def status
|
35
|
-
status = []
|
36
|
-
self.each_process do |process|
|
37
|
-
status << [process.name, process.state]
|
38
|
-
end
|
39
|
-
status
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
protected
|
44
|
-
def each_process(&block)
|
45
|
-
self.processes.each(&block)
|
46
|
-
end
|
47
36
|
end
|
48
37
|
end
|
data/lib/bluepill/process.rb
CHANGED
@@ -7,61 +7,76 @@ module Bluepill
|
|
7
7
|
:start_command,
|
8
8
|
:stop_command,
|
9
9
|
:restart_command,
|
10
|
+
|
10
11
|
:daemonize,
|
11
12
|
:pid_file,
|
13
|
+
|
12
14
|
:start_grace_time,
|
13
15
|
:stop_grace_time,
|
14
16
|
:restart_grace_time,
|
17
|
+
|
15
18
|
:uid,
|
16
|
-
:gid
|
19
|
+
:gid,
|
20
|
+
|
21
|
+
:monitor_children,
|
22
|
+
:child_process_template
|
17
23
|
]
|
18
24
|
|
19
25
|
attr_accessor :name, :watches, :triggers, :logger, :skip_ticks_until
|
20
26
|
attr_accessor *CONFIGURABLE_ATTRIBUTES
|
27
|
+
attr_reader :children
|
21
28
|
|
22
|
-
state_machine :initial => :unmonitored do
|
23
|
-
|
24
|
-
|
29
|
+
state_machine :initial => :unmonitored do
|
30
|
+
# These are the idle states, i.e. only an event (either external or internal) will trigger a transition.
|
31
|
+
# The distinction between down and unmonitored is that down
|
32
|
+
# means we know it is not running and unmonitored is that we don't care if it's running.
|
33
|
+
state :unmonitored, :up, :down
|
34
|
+
|
35
|
+
# These are transitionary states, we expect the process to change state after a certain period of time.
|
36
|
+
state :starting, :stopping, :restarting
|
37
|
+
|
25
38
|
event :tick do
|
26
|
-
transition :
|
27
|
-
|
39
|
+
transition :starting => :up, :if => :process_running?
|
40
|
+
transition :starting => :down, :unless => :process_running?
|
41
|
+
|
28
42
|
transition :up => :up, :if => :process_running?
|
29
43
|
transition :up => :down, :unless => :process_running?
|
30
|
-
|
31
|
-
|
44
|
+
|
45
|
+
# The process failed to die after entering the stopping state. Change the state to reflect
|
46
|
+
# reality.
|
47
|
+
transition :stopping => :up, :if => :process_running?
|
48
|
+
transition :stopping => :down, :unless => :process_running?
|
49
|
+
|
50
|
+
transition :down => :up, :if => :process_running?
|
51
|
+
transition :down => :starting, :unless => :process_running?
|
32
52
|
|
33
53
|
transition :restarting => :up, :if => :process_running?
|
34
54
|
transition :restarting => :down, :unless => :process_running?
|
35
55
|
end
|
36
|
-
|
56
|
+
|
37
57
|
event :start do
|
38
|
-
transition :unmonitored
|
39
|
-
transition [:restarting, :up] => :up
|
40
|
-
transition :down => :up, :if => :start_process
|
58
|
+
transition [:unmonitored, :down] => :starting
|
41
59
|
end
|
42
|
-
|
60
|
+
|
43
61
|
event :stop do
|
44
|
-
transition
|
45
|
-
transition [:up, :restarting] => :unmonitored, :if => :stop_process
|
62
|
+
transition :up => :stopping
|
46
63
|
end
|
47
|
-
|
48
|
-
event :restart do
|
49
|
-
transition all => :restarting, :if => :restart_process
|
50
|
-
end
|
51
|
-
|
64
|
+
|
52
65
|
event :unmonitor do
|
53
|
-
transition
|
66
|
+
transition any => :unmonitored
|
54
67
|
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
process.record_transition(transition.from_name, transition.to_name)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
after_transition any => any do |process, transition|
|
63
|
-
process.notify_triggers(transition)
|
68
|
+
|
69
|
+
event :restart do
|
70
|
+
transition [:up, :down] => :restarting
|
64
71
|
end
|
72
|
+
|
73
|
+
before_transition any => any, :do => :notify_triggers
|
74
|
+
|
75
|
+
after_transition any => :starting, :do => :start_process
|
76
|
+
after_transition any => :stopping, :do => :stop_process
|
77
|
+
after_transition any => :restarting, :do => :restart_process
|
78
|
+
|
79
|
+
after_transition any => any, :do => :record_transition
|
65
80
|
end
|
66
81
|
|
67
82
|
def initialize(process_name, options = {})
|
@@ -70,15 +85,18 @@ module Bluepill
|
|
70
85
|
@transition_history = Util::RotationalArray.new(10)
|
71
86
|
@watches = []
|
72
87
|
@triggers = []
|
88
|
+
@children = []
|
73
89
|
|
74
|
-
@
|
90
|
+
@monitor_children = options[:monitor_children] || false
|
91
|
+
|
92
|
+
%w(start_grace_time stop_grace_time restart_grace_time).each do |grace|
|
93
|
+
instance_variable_set("@#{grace}", options[grace.to_sym] || 3)
|
94
|
+
end
|
75
95
|
|
76
96
|
CONFIGURABLE_ATTRIBUTES.each do |attribute_name|
|
77
97
|
self.send("#{attribute_name}=", options[attribute_name]) if options.has_key?(attribute_name)
|
78
98
|
end
|
79
99
|
|
80
|
-
raise ArgumentError, "Please specify a pid_file or the demonize option" if pid_file.nil? && !daemonize?
|
81
|
-
|
82
100
|
# Let state_machine do its initialization stuff
|
83
101
|
super()
|
84
102
|
end
|
@@ -89,12 +107,17 @@ module Bluepill
|
|
89
107
|
|
90
108
|
# clear the memoization per tick
|
91
109
|
@process_running = nil
|
92
|
-
|
110
|
+
|
93
111
|
# run state machine transitions
|
94
112
|
super
|
95
|
-
|
96
|
-
if
|
113
|
+
|
114
|
+
if self.up?
|
97
115
|
run_watches
|
116
|
+
|
117
|
+
if monitor_children?
|
118
|
+
refresh_children!
|
119
|
+
children.each {|child| child.tick}
|
120
|
+
end
|
98
121
|
end
|
99
122
|
end
|
100
123
|
|
@@ -111,16 +134,25 @@ module Bluepill
|
|
111
134
|
end
|
112
135
|
end
|
113
136
|
|
114
|
-
def record_transition(
|
115
|
-
|
116
|
-
|
117
|
-
|
137
|
+
def record_transition(transition)
|
138
|
+
unless transition.loopback?
|
139
|
+
@transitioned = true
|
140
|
+
|
141
|
+
# When a process changes state, we should clear the memory of all the watches
|
142
|
+
self.watches.each { |w| w.clear_history! }
|
143
|
+
|
144
|
+
# Also, when a process changes state, we should re-populate its child list
|
145
|
+
if self.monitor_children?
|
146
|
+
self.logger.warning "Clearing child list"
|
147
|
+
self.children.clear
|
148
|
+
end
|
149
|
+
logger.info "Going from #{transition.from_name} => #{transition.to_name}"
|
150
|
+
end
|
118
151
|
end
|
119
152
|
|
120
153
|
def notify_triggers(transition)
|
121
154
|
self.triggers.each {|trigger| trigger.notify(transition)}
|
122
155
|
end
|
123
|
-
|
124
156
|
|
125
157
|
# Watch related methods
|
126
158
|
def add_watch(name, options = {})
|
@@ -153,69 +185,106 @@ module Bluepill
|
|
153
185
|
end
|
154
186
|
end
|
155
187
|
|
188
|
+
def handle_user_command(cmd)
|
189
|
+
case cmd
|
190
|
+
when "boot!"
|
191
|
+
# This is only called when bluepill is initially starting up
|
192
|
+
if process_running?(true)
|
193
|
+
# process was running even before bluepill was
|
194
|
+
self.state = 'up'
|
195
|
+
else
|
196
|
+
self.state = 'starting'
|
197
|
+
end
|
198
|
+
|
199
|
+
when "start"
|
200
|
+
if process_running?(true) && daemonize?
|
201
|
+
logger.warning("Refusing to re-run start command on an automatically daemonized process to preserve currently running process pid file.")
|
202
|
+
return
|
203
|
+
end
|
204
|
+
dispatch!(:start)
|
205
|
+
|
206
|
+
when "stop"
|
207
|
+
stop_process
|
208
|
+
dispatch!(:unmonitor)
|
209
|
+
|
210
|
+
when "restart"
|
211
|
+
restart_process
|
212
|
+
|
213
|
+
when "unmonitor"
|
214
|
+
# When the user issues an unmonitor cmd, reset any triggers so that
|
215
|
+
# scheduled events gets cleared
|
216
|
+
triggers.each {|t| t.reset! }
|
217
|
+
dispatch!(:unmonitor)
|
218
|
+
end
|
219
|
+
end
|
156
220
|
|
157
221
|
# System Process Methods
|
158
222
|
def process_running?(force = false)
|
159
223
|
@process_running = nil if force
|
160
224
|
@process_running ||= signal_process(0)
|
225
|
+
self.clear_pid unless @process_running
|
226
|
+
@process_running
|
161
227
|
end
|
162
228
|
|
163
229
|
def start_process
|
230
|
+
logger.warning "Executing start command: #{start_command}"
|
231
|
+
|
164
232
|
if self.daemonize?
|
165
233
|
starter = lambda { drop_privileges; ::Kernel.exec(start_command) }
|
166
234
|
child_pid = Daemonize.call_as_daemon(starter)
|
167
235
|
File.open(pid_file, "w") {|f| f.write(child_pid)}
|
168
236
|
else
|
169
237
|
# This is a self-daemonizing process
|
170
|
-
|
238
|
+
unless System.execute_blocking(start_command)
|
239
|
+
logger.warning "Start command execution returned non-zero exit code"
|
240
|
+
end
|
171
241
|
end
|
172
|
-
|
173
|
-
|
174
|
-
skip_ticks_for(start_grace_time)
|
175
|
-
|
176
|
-
true
|
242
|
+
|
243
|
+
self.skip_ticks_for(start_grace_time)
|
177
244
|
end
|
178
245
|
|
179
|
-
def stop_process
|
246
|
+
def stop_process
|
180
247
|
if stop_command
|
181
|
-
|
248
|
+
cmd = stop_command.to_s.gsub("{{PID}}", actual_pid.to_s)
|
249
|
+
logger.warning "Executing stop command: #{cmd}"
|
250
|
+
|
251
|
+
unless System.execute_blocking(cmd)
|
252
|
+
logger.warning "Stop command execution returned non-zero exit code"
|
253
|
+
end
|
254
|
+
|
182
255
|
else
|
256
|
+
logger.warning "Executing default stop command. Sending TERM signal to #{actual_pid}"
|
183
257
|
signal_process("TERM")
|
184
|
-
|
185
|
-
wait_until = Time.now.to_i + stop_grace_time
|
186
|
-
while process_running?(true)
|
187
|
-
if wait_until <= Time.now.to_i
|
188
|
-
signal_process("KILL")
|
189
|
-
break
|
190
|
-
end
|
191
|
-
sleep 0.2
|
192
|
-
end
|
193
258
|
end
|
194
|
-
self.unlink_pid
|
195
|
-
self.clear_pid
|
196
|
-
|
197
|
-
skip_ticks_for(stop_grace_time)
|
259
|
+
self.unlink_pid # TODO: we only write the pid file if we daemonize, should we only unlink it if we daemonize?
|
198
260
|
|
199
|
-
|
261
|
+
self.skip_ticks_for(stop_grace_time)
|
200
262
|
end
|
201
263
|
|
202
264
|
def restart_process
|
203
265
|
if restart_command
|
204
|
-
|
205
|
-
|
206
|
-
|
266
|
+
logger.warning "Executing restart command: #{restart_command}"
|
267
|
+
|
268
|
+
unless System.execute_blocking(restart_command)
|
269
|
+
logger.warning "Restart command execution returned non-zero exit code"
|
270
|
+
end
|
271
|
+
|
272
|
+
self.skip_ticks_for(restart_grace_time)
|
207
273
|
else
|
208
|
-
|
209
|
-
|
274
|
+
logger.warning "No restart_command specified. Must stop and start to restart"
|
275
|
+
self.stop_process
|
276
|
+
# the tick will bring it back.
|
210
277
|
end
|
211
|
-
|
212
|
-
true
|
213
278
|
end
|
214
279
|
|
215
280
|
def daemonize?
|
216
281
|
!!self.daemonize
|
217
282
|
end
|
218
283
|
|
284
|
+
def monitor_children?
|
285
|
+
!!self.monitor_children
|
286
|
+
end
|
287
|
+
|
219
288
|
def signal_process(code)
|
220
289
|
::Process.kill(code, actual_pid)
|
221
290
|
true
|
@@ -224,7 +293,13 @@ module Bluepill
|
|
224
293
|
end
|
225
294
|
|
226
295
|
def actual_pid
|
227
|
-
@actual_pid ||=
|
296
|
+
@actual_pid ||= begin
|
297
|
+
File.read(pid_file).to_i if pid_file && File.exists?(pid_file)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def actual_pid=(pid)
|
302
|
+
@actual_pid = pid
|
228
303
|
end
|
229
304
|
|
230
305
|
def clear_pid
|
@@ -232,7 +307,7 @@ module Bluepill
|
|
232
307
|
end
|
233
308
|
|
234
309
|
def unlink_pid
|
235
|
-
File.unlink(pid_file) if File.exists?(pid_file)
|
310
|
+
File.unlink(pid_file) if pid_file && File.exists?(pid_file)
|
236
311
|
end
|
237
312
|
|
238
313
|
def drop_privileges
|
@@ -254,12 +329,44 @@ module Bluepill
|
|
254
329
|
|
255
330
|
# Internal State Methods
|
256
331
|
def skip_ticks_for(seconds)
|
257
|
-
|
332
|
+
# TODO: should this be addative or longest wins?
|
333
|
+
# i.e. if two calls for skip_ticks_for come in for 5 and 10, should it skip for 10 or 15?
|
334
|
+
self.skip_ticks_until = (self.skip_ticks_until || Time.now.to_i) + seconds.to_i
|
258
335
|
end
|
259
336
|
|
260
337
|
def skipping_ticks?
|
261
338
|
self.skip_ticks_until && self.skip_ticks_until > Time.now.to_i
|
262
339
|
end
|
340
|
+
|
341
|
+
def refresh_children!
|
342
|
+
# First prune the list of dead children
|
343
|
+
@children.delete_if {|child| !child.process_running?(true) }
|
344
|
+
|
345
|
+
# Add new found children to the list
|
346
|
+
new_children_pids = System.get_children(self.actual_pid) - @children.map {|child| child.actual_pid}
|
347
|
+
|
348
|
+
unless new_children_pids.empty?
|
349
|
+
logger.info "Existing children: #{@children.collect{|c| c.actual_pid}.join(",")}. Got new children: #{new_children_pids.inspect} for #{actual_pid}"
|
350
|
+
end
|
351
|
+
|
352
|
+
# Construct a new process wrapper for each new found children
|
353
|
+
new_children_pids.each do |child_pid|
|
354
|
+
child = self.child_process_template.deep_copy
|
355
|
+
|
356
|
+
child.name = "<child(pid:#{child_pid})>"
|
357
|
+
child.actual_pid = child_pid
|
358
|
+
child.logger = self.logger.prefix_with(child.name)
|
359
|
+
|
360
|
+
child.initialize_state_machines
|
361
|
+
child.state = "up"
|
362
|
+
|
363
|
+
@children << child
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def deep_copy
|
368
|
+
Marshal.load(Marshal.dump(self))
|
369
|
+
end
|
263
370
|
end
|
264
371
|
end
|
265
372
|
|
data/lib/bluepill/system.rb
CHANGED
@@ -1,10 +1,50 @@
|
|
1
|
-
require "singleton"
|
2
1
|
module Bluepill
|
3
2
|
# This class represents the system that bluepill is running on.. It's mainly used to memoize
|
4
3
|
# results of running ps auxx etc so that every watch in the every process will not result in a fork
|
5
4
|
module System
|
6
5
|
extend self
|
7
6
|
|
7
|
+
# The position of each field in ps output
|
8
|
+
IDX_MAP = {
|
9
|
+
:pid => 0,
|
10
|
+
:ppid => 1,
|
11
|
+
:pcpu => 2,
|
12
|
+
:rss => 3
|
13
|
+
}
|
14
|
+
|
15
|
+
def cpu_usage(pid)
|
16
|
+
ps_axu[pid] && ps_axu[pid][IDX_MAP[:pcpu]].to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def memory_usage(pid)
|
20
|
+
ps_axu[pid] && ps_axu[pid][IDX_MAP[:rss]].to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_children(parent_pid)
|
24
|
+
returning(Array.new) do |child_pids|
|
25
|
+
ps_axu.each_pair do |pid, chunks|
|
26
|
+
child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def execute_non_blocking(cmd)
|
32
|
+
if Daemonize.safefork
|
33
|
+
# In parent, return immediately
|
34
|
+
return
|
35
|
+
|
36
|
+
else
|
37
|
+
# in child
|
38
|
+
::Kernel.exec(cmd)
|
39
|
+
# execution should not reach here
|
40
|
+
exit
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def execute_blocking(cmd)
|
45
|
+
::Kernel.system(cmd)
|
46
|
+
end
|
47
|
+
|
8
48
|
def store
|
9
49
|
@store ||= Hash.new
|
10
50
|
end
|
@@ -14,16 +54,16 @@ module Bluepill
|
|
14
54
|
end
|
15
55
|
|
16
56
|
def ps_axu
|
57
|
+
# TODO: need a mutex here
|
17
58
|
store[:ps_axu] ||= begin
|
18
59
|
# BSD style ps invocation
|
19
|
-
lines = `ps
|
20
|
-
|
60
|
+
lines = `ps axo pid=,ppid=,pcpu=,rss=`.split("\n")
|
61
|
+
|
21
62
|
lines.inject(Hash.new) do |mem, line|
|
22
|
-
|
23
|
-
chunks
|
24
|
-
pid = chunks[
|
63
|
+
chunks = line.split(/\s+/)
|
64
|
+
chunks.delete_if {|c| c.strip.empty? }
|
65
|
+
pid = chunks[IDX_MAP[:pid]].strip.to_i
|
25
66
|
mem[pid] = chunks
|
26
|
-
|
27
67
|
mem
|
28
68
|
end
|
29
69
|
end
|
data/lib/bluepill/trigger.rb
CHANGED
@@ -9,11 +9,17 @@ module Bluepill
|
|
9
9
|
@implementations[name]
|
10
10
|
end
|
11
11
|
|
12
|
-
attr_accessor :process, :logger
|
12
|
+
attr_accessor :process, :logger, :mutex, :scheduled_events
|
13
13
|
|
14
14
|
def initialize(process, options = {})
|
15
15
|
self.process = process
|
16
16
|
self.logger = options[:logger]
|
17
|
+
self.mutex = Mutex.new
|
18
|
+
self.scheduled_events = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def reset!
|
22
|
+
self.cancel_all_events
|
17
23
|
end
|
18
24
|
|
19
25
|
def notify(transition)
|
@@ -26,16 +32,28 @@ module Bluepill
|
|
26
32
|
|
27
33
|
def schedule_event(event, delay)
|
28
34
|
# TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
|
29
|
-
Thread.new(self) do |trigger|
|
35
|
+
thread = Thread.new(self) do |trigger|
|
30
36
|
begin
|
31
|
-
sleep delay.
|
37
|
+
sleep delay.to_f
|
32
38
|
trigger.logger.info("Retrying from flapping")
|
33
39
|
trigger.process.dispatch!(event)
|
34
|
-
|
35
|
-
|
36
|
-
|
40
|
+
trigger.mutex.synchronize do
|
41
|
+
trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
|
42
|
+
end
|
43
|
+
rescue StandardError => e
|
44
|
+
trigger.logger.err(e)
|
45
|
+
trigger.logger.err(e.backtrace.join("\n"))
|
37
46
|
end
|
38
47
|
end
|
48
|
+
|
49
|
+
self.scheduled_events.push([event, thread])
|
50
|
+
end
|
51
|
+
|
52
|
+
def cancel_all_events
|
53
|
+
self.logger.info "Canceling all scheduled events"
|
54
|
+
self.mutex.synchronize do
|
55
|
+
self.scheduled_events.each {|_, thread| thread.kill}
|
56
|
+
end
|
39
57
|
end
|
40
58
|
|
41
59
|
end
|
@@ -1,10 +1,7 @@
|
|
1
1
|
module Bluepill
|
2
2
|
module Triggers
|
3
3
|
class Flapping < Bluepill::Trigger
|
4
|
-
TRIGGER_STATES = [
|
5
|
-
[:up, :down],
|
6
|
-
[:up, :restarting]
|
7
|
-
]
|
4
|
+
TRIGGER_STATES = [:starting, :restarting]
|
8
5
|
|
9
6
|
PARAMS = [:times, :within, :retry_in]
|
10
7
|
|
@@ -23,12 +20,17 @@ module Bluepill
|
|
23
20
|
end
|
24
21
|
|
25
22
|
def notify(transition)
|
26
|
-
if TRIGGER_STATES.include?(
|
23
|
+
if TRIGGER_STATES.include?(transition.to_name)
|
27
24
|
self.timeline << Time.now.to_i
|
28
25
|
self.check_flapping
|
29
26
|
end
|
30
27
|
end
|
31
|
-
|
28
|
+
|
29
|
+
def reset!
|
30
|
+
@timeline.clear
|
31
|
+
super
|
32
|
+
end
|
33
|
+
|
32
34
|
def check_flapping
|
33
35
|
num_occurances = (@timeline.nitems == self.times)
|
34
36
|
|
@@ -44,9 +46,12 @@ module Bluepill
|
|
44
46
|
self.schedule_event(:start, self.retry_in)
|
45
47
|
|
46
48
|
# this happens in the process' thread so we don't have to worry about concurrency issues with this event
|
47
|
-
self.dispatch!(:
|
49
|
+
self.dispatch!(:unmonitor)
|
48
50
|
|
49
51
|
@timeline.clear
|
52
|
+
|
53
|
+
# This will prevent a transition from happening in the process state_machine
|
54
|
+
throw :halt
|
50
55
|
end
|
51
56
|
end
|
52
57
|
end
|
data/lib/example.rb
CHANGED
@@ -3,34 +3,45 @@ require 'bluepill'
|
|
3
3
|
|
4
4
|
ROOT_DIR = "/tmp/bp"
|
5
5
|
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# process = Bluepill::Process.new("hello_world") do |process|
|
9
|
-
# process.start_command = "sleep 5"
|
10
|
-
# process.daemonize = true
|
11
|
-
# process.pid_file = "/tmp/bp/sleep.pid"
|
12
|
-
# end
|
13
|
-
#
|
14
|
-
# process.add_watch("AlwaysTrue", :every => 5)
|
15
|
-
#
|
16
|
-
# application.processes << process
|
17
|
-
# process.dispatch!("start")
|
18
|
-
#
|
19
|
-
# application.start
|
20
|
-
|
21
|
-
|
6
|
+
# Watch with
|
7
|
+
# watch -n0.2 'ps axu | egrep "(CPU|forking|bluepill|sleep)" | grep -v grep | sort'
|
22
8
|
Bluepill.application(:sample_app) do |app|
|
23
|
-
|
9
|
+
2.times do |i|
|
24
10
|
app.process("process_#{i}") do |process|
|
25
|
-
process.start_command = "while true; do echo ''; sleep 0.01; done"
|
26
|
-
process.daemonize = true
|
27
11
|
process.pid_file = "#{ROOT_DIR}/pids/process_#{i}.pid"
|
12
|
+
|
13
|
+
# I could not figure out a portable way to
|
14
|
+
# specify the path to the sample forking server across the diff developer laptops.
|
15
|
+
# Since this code is eval'ed we cannot reliably use __FILE__
|
16
|
+
process.start_command = "/Users/rohith/work/bluepill/bin/sample_forking_server #{4242 + i}"
|
17
|
+
process.stop_command = "kill -INT {{PID}}"
|
18
|
+
process.daemonize = true
|
19
|
+
|
20
|
+
process.start_grace_time = 1.seconds
|
21
|
+
process.restart_grace_time = 7.seconds
|
22
|
+
process.stop_grace_time = 7.seconds
|
23
|
+
|
28
24
|
process.uid = "admin"
|
29
25
|
process.gid = "staff"
|
30
26
|
|
31
|
-
|
32
|
-
process.checks :cpu_usage, :every => 5, :below => 0.5, :times => [2, 5]
|
27
|
+
# process.checks :cpu_usage, :every => 10, :below => 0.5, :times => [5, 5]
|
33
28
|
process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
|
29
|
+
|
30
|
+
process.monitor_children do |child_process|
|
31
|
+
# child_process.checks :cpu_usage,
|
32
|
+
# :every => 10,
|
33
|
+
# :below => 0.5,
|
34
|
+
# :times => [5, 5]
|
35
|
+
|
36
|
+
# child_process.checks :mem_usage,
|
37
|
+
# :every => 3,
|
38
|
+
# :below => 600.kilobytes,
|
39
|
+
# :times => [3, 5],
|
40
|
+
# :fires => [:stop]
|
41
|
+
|
42
|
+
child_process.stop_command = "kill -QUIT {{PID}}"
|
43
|
+
# child_process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
|
44
|
+
end
|
34
45
|
end
|
35
46
|
end
|
36
47
|
|
@@ -39,39 +50,20 @@ Bluepill.application(:sample_app) do |app|
|
|
39
50
|
process.start_command = "sleep #{rand(30) + i}"
|
40
51
|
process.group = "Poopfaced"
|
41
52
|
process.daemonize = true
|
42
|
-
process.pid_file = "#{ROOT_DIR}/pids
|
53
|
+
process.pid_file = "#{ROOT_DIR}/pids/#{process.group}_process_#{i}.pid"
|
43
54
|
|
44
55
|
process.checks :always_true, :every => 10
|
45
56
|
end
|
46
57
|
end
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
#
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
#
|
60
|
-
# checks.cpu_usage :every 10.seconds,
|
61
|
-
# :below => 50.percent,
|
62
|
-
# :fires => :restart
|
63
|
-
#
|
64
|
-
# checks.custom_method :custom_params => :to_be_sent_to_the_custom_condition,
|
65
|
-
# :fires => [:stop, :custom_event, :start]
|
66
|
-
#
|
67
|
-
# checks.deadly_condition :every => 20.seconds,
|
68
|
-
# :fires => :stop
|
69
|
-
# end
|
70
|
-
#
|
71
|
-
# handles(:restart) do |process|
|
72
|
-
# # process has pid
|
73
|
-
# process.transition :down
|
74
|
-
# process.transition :up
|
75
|
-
# run "some commands -P #{process.pid}"
|
76
|
-
# end
|
77
|
-
# end
|
58
|
+
|
59
|
+
0.times do |i|
|
60
|
+
app.process("group_process_#{i}") do |process|
|
61
|
+
process.start_command = "sleep #{rand(30) + i}"
|
62
|
+
process.group = "Poopfaced_2"
|
63
|
+
process.daemonize = true
|
64
|
+
process.pid_file = "#{ROOT_DIR}/pids/#{process.group}_process_#{i}.pid"
|
65
|
+
|
66
|
+
process.checks :always_true, :every => 10
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bluepill
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arya Asemanfar
|
@@ -11,8 +11,8 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2009-10-
|
15
|
-
default_executable:
|
14
|
+
date: 2009-10-22 00:00:00 -07:00
|
15
|
+
default_executable:
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|