bluepill 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/TODO CHANGED
@@ -7,6 +7,16 @@
7
7
  Issues encountered in the wild
8
8
  ------------------------------
9
9
 
10
- 1. There were weird dependencies on xml-simple and builder gems when you try to use the bluepill cli.
11
- 2. Whenever bluepill executes user specified commands (like start_command, stop_command, restart_command), it should execute it in such a way that it does not affect the stability of bluepill daemon itself.
12
- 3. Whenever a command is sent to a process group, execute them in parallel for each process in the group instead of serially.
10
+ * Whenever bluepill executes user specified commands (like start_command, stop_command, restart_command), it should execute it in such a way that it does not affect the stability of bluepill daemon itself.
11
+
12
+ * Whenever a command is sent to a process group, execute them in parallel for each process in the group instead of serially.
13
+
14
+ * Issuing commands to the running bluepill daemon using the cli can trigger flapping condition. So, running bluepill restart <blah> can cause a flapping trigger to be fired depending on internal state.
15
+
16
+ * Have validations for easy to make mistakes in the config file. For ex:
17
+ + Accidently specifying the same pid file for multiple processes.
18
+ + Accidently specifying the same process name for multiple processes.
19
+ + Validate the minimum number of config options to setup successful monitoring.
20
+
21
+ For example, for a valid "process", the only 2 things that are required are the start command and the pid file. We should tell a user at the time of loading whether the config file is syntactically and semantically valid.
22
+
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.5
1
+ 0.0.6
data/bluepill.gemspec CHANGED
@@ -5,12 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{bluepill}
8
- s.version = "0.0.5"
8
+ s.version = "0.0.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Arya Asemanfar", "Gary Tsang", "Rohith Ravi"]
12
- s.date = %q{2009-10-14}
13
- s.default_executable = %q{bluepill}
12
+ s.date = %q{2009-10-22}
14
13
  s.description = %q{Bluepill keeps your daemons up while taking up as little resources as possible. After all you probably want the resources of your server to be used by whatever daemons you are running rather than the thing that's supposed to make sure they are brought back up, should they die or misbehave.}
15
14
  s.email = %q{entombedvirus@gmail.com}
16
15
  s.executables = ["bluepill"]
data/lib/bluepill.rb CHANGED
@@ -7,6 +7,7 @@ require 'syslog'
7
7
  require 'active_support/inflector'
8
8
  require 'active_support/core_ext/hash'
9
9
  require 'active_support/core_ext/numeric'
10
+ require 'active_support/core_ext/object/misc'
10
11
  require 'active_support/duration'
11
12
 
12
13
  require 'bluepill/application'
@@ -14,29 +14,60 @@ module Bluepill
14
14
  self.pid_file = File.join(self.base_dir, 'pids', self.name + ".pid")
15
15
 
16
16
  @server = false
17
- signal_trap
18
17
  end
19
18
 
20
19
  def load
21
- start_server
20
+ begin
21
+ start_server
22
+ rescue StandardError => e
23
+ logger.err("Got exception: %s `%s`" % [e.class.name, e.message])
24
+ logger.err(e.backtrace.join("\n"))
25
+ end
22
26
  end
23
27
 
24
28
  def status
25
29
  if(@server)
26
- buffer = ""
30
+ buffer = []
31
+ depth = 0
32
+
27
33
  if self.groups.has_key?(nil)
28
- self.groups[nil].status.each do |line|
29
- buffer << "%s: %s\n" % line
34
+ self.groups[nil].processes.each do |p|
35
+ buffer << "%s%s: %s" % [" " * depth, p.name, p.state]
36
+
37
+ if p.monitor_children?
38
+ depth += 2
39
+ p.children.each do |c|
40
+ buffer << "%s%s: %s" % [" " * depth, c.name, c.state]
41
+ end
42
+ depth -= 2
43
+ end
30
44
  end
31
- buffer << "\n"
32
45
  end
33
- self.groups.keys.compact.sort.each do |name|
34
- group = self.groups[name]
35
- buffer << "#{name}:\n"
36
- group.status.each { |line| buffer << " %s: %s\n" % line }
37
- buffer << "\n"
46
+
47
+ self.groups.each do |group_name, group|
48
+ next if group_name.nil?
49
+
50
+ buffer << "\n#{group_name}"
51
+
52
+ group.processes.each do |p|
53
+ depth += 2
54
+
55
+ buffer << "%s%s(pid:%d): %s" % [" " * depth, p.name, p.actual_pid, p.state]
56
+
57
+ if p.monitor_children?
58
+ depth += 2
59
+ p.children.each do |c|
60
+ buffer << "%s%s: %s" % [" " * depth, c.name, c.state]
61
+ end
62
+ depth -= 2
63
+ end
64
+
65
+ depth -= 2
66
+ end
38
67
  end
39
- buffer
68
+
69
+ buffer.join("\n")
70
+
40
71
  else
41
72
  send_to_server('status')
42
73
  end
@@ -117,8 +148,9 @@ private
117
148
  client.write(response)
118
149
  client.close
119
150
  end
120
- rescue Exception => e
121
- logger.info(e.inspect)
151
+ rescue StandardError => e
152
+ logger.err(e.inspect)
153
+ logger.err(e.backtrace.join("\n"))
122
154
  end
123
155
  end
124
156
  end
@@ -126,10 +158,15 @@ private
126
158
  def worker
127
159
  Thread.new(self) do |app|
128
160
  loop do
129
- # app.logger.info("Server | worker loop started:")
130
- job = self.work_queue.pop
131
- # app.logger.info("Server | worker job recieved:")
132
- send_to_process_or_group(job[0], job[1], false)
161
+ begin
162
+ # app.logger.info("Server | worker loop started:")
163
+ job = app.work_queue.pop
164
+ send_to_process_or_group(job[0], job[1], false)
165
+
166
+ rescue StandardError => e
167
+ logger.err("Error while trying to execute %s from work_queue" % job.inspect)
168
+ logger.err("%s: `%s`" % [e.class.name, e.message])
169
+ end
133
170
  # app.logger.info("Server | worker job processed:")
134
171
  end
135
172
  end
@@ -153,11 +190,16 @@ private
153
190
  Daemonize.daemonize
154
191
 
155
192
  @server = true
193
+ $0 = "bluepilld: #{self.name}"
194
+
156
195
  self.work_queue = Queue.new
196
+
157
197
  self.socket = Bluepill::Socket.new(name, base_dir).server
158
198
  File.open(self.pid_file, 'w') { |x| x.write(::Process.pid) }
159
- $0 = "bluepilld: #{self.name}"
160
- self.groups.each {|name, group| group.start }
199
+
200
+ self.groups.each {|_, group| group.boot! }
201
+
202
+ setup_signal_traps
161
203
  listener
162
204
  worker
163
205
  run
@@ -173,16 +215,10 @@ private
173
215
  sleep 1
174
216
  end
175
217
  end
176
-
177
- def cleanup
178
- # self.socket.cleanup
179
- end
180
218
 
181
- def signal_trap
182
-
219
+ def setup_signal_traps
183
220
  terminator = lambda do
184
221
  puts "Terminating..."
185
- cleanup
186
222
  ::Kernel.exit
187
223
  end
188
224
 
@@ -1,18 +1,20 @@
1
1
  module Bluepill
2
2
  class ConditionWatch
3
3
  attr_accessor :logger, :name
4
+ EMPTY_ARRAY = [].freeze # no need to recreate one every tick
5
+
4
6
  def initialize(name, options = {})
5
7
  @name = name
6
8
 
7
9
  @logger = options.delete(:logger)
8
- @fires = options.has_key?(:fires) ? [options.delete(:fires)].flatten : [:restart]
10
+ @fires = options.has_key?(:fires) ? Array(options.delete(:fires)) : [:restart]
9
11
  @every = options.delete(:every)
10
12
  @times = options[:times] || [1,1]
11
13
  @times = [@times, @times] unless @times.is_a?(Array) # handles :times => 5
12
14
 
13
15
  self.clear_history!
14
16
 
15
- @process_condition = ProcessConditions.name_to_class(@name).new(options)
17
+ @process_condition = ProcessConditions[@name].new(options)
16
18
  end
17
19
 
18
20
  def run(pid, tick_number = Time.now.to_i)
@@ -21,7 +23,7 @@ module Bluepill
21
23
  self.record_value(@process_condition.run(pid))
22
24
  return @fires if self.fired?
23
25
  end
24
- []
26
+ EMPTY_ARRAY
25
27
  end
26
28
 
27
29
  def record_value(value)
data/lib/bluepill/dsl.rb CHANGED
@@ -23,6 +23,33 @@ module Bluepill
23
23
  def checks(name, options = {})
24
24
  @watches[name] = options
25
25
  end
26
+
27
+ def monitor_children(&child_process_block)
28
+ child_proxy = self.class.new
29
+
30
+ # Children inherit some properties of the parent
31
+ child_proxy.start_grace_time = @attributes[:start_grace_time]
32
+ child_proxy.stop_grace_time = @attributes[:stop_grace_time]
33
+ child_proxy.restart_grace_time = @attributes[:restart_grace_time]
34
+
35
+ child_process_block.call(child_proxy)
36
+
37
+ @attributes[:child_process_template] = child_proxy.to_process(nil)
38
+ # @attributes[:child_process_template].freeze
39
+ @attributes[:monitor_children] = true
40
+ end
41
+
42
+ def to_process(process_name)
43
+ process = Bluepill::Process.new(process_name, @attributes)
44
+ @watches.each do |name, opts|
45
+ if Bluepill::Trigger[name]
46
+ process.add_trigger(name, opts)
47
+ else
48
+ process.add_watch(name, opts)
49
+ end
50
+ end
51
+ process
52
+ end
26
53
  end
27
54
 
28
55
  app_proxy = Class.new do
@@ -33,16 +60,8 @@ module Bluepill
33
60
  process_proxy = @@process_proxy.new
34
61
  process_block.call(process_proxy)
35
62
 
36
- group = process_proxy.attributes.delete(:group)
37
-
38
- process = Bluepill::Process.new(process_name, process_proxy.attributes)
39
- process_proxy.watches.each do |name, opts|
40
- if Bluepill::Trigger[name]
41
- process.add_trigger(name, opts)
42
- else
43
- process.add_watch(name, opts)
44
- end
45
- end
63
+ group = process_proxy.attributes.delete(:group)
64
+ process = process_proxy.to_process(process_name)
46
65
 
47
66
  @@app.add_process(process, group)
48
67
  end
@@ -15,34 +15,23 @@ module Bluepill
15
15
  end
16
16
 
17
17
  def tick
18
- self.each_process do |process|
18
+ self.processes.each do |process|
19
19
  process.tick
20
20
  end
21
21
  end
22
22
 
23
23
  # proxied events
24
- [:start, :unmonitor, :stop, :restart].each do |event|
25
- eval <<-END
24
+ [:start, :unmonitor, :stop, :restart, :boot!].each do |event|
25
+ class_eval <<-END
26
26
  def #{event}(process_name = nil)
27
- self.each_process do |process|
28
- process.dispatch!("#{event}") if process_name.nil? || process.name == process_name
27
+ threads = []
28
+ self.processes.each do |process|
29
+ next if process_name && process_name != process.name
30
+ threads << Thread.new { process.handle_user_command("#{event}") }
29
31
  end
32
+ threads.each { |t| t.join }
30
33
  end
31
34
  END
32
35
  end
33
-
34
- def status
35
- status = []
36
- self.each_process do |process|
37
- status << [process.name, process.state]
38
- end
39
- status
40
- end
41
-
42
-
43
- protected
44
- def each_process(&block)
45
- self.processes.each(&block)
46
- end
47
36
  end
48
37
  end
@@ -7,61 +7,76 @@ module Bluepill
7
7
  :start_command,
8
8
  :stop_command,
9
9
  :restart_command,
10
+
10
11
  :daemonize,
11
12
  :pid_file,
13
+
12
14
  :start_grace_time,
13
15
  :stop_grace_time,
14
16
  :restart_grace_time,
17
+
15
18
  :uid,
16
- :gid
19
+ :gid,
20
+
21
+ :monitor_children,
22
+ :child_process_template
17
23
  ]
18
24
 
19
25
  attr_accessor :name, :watches, :triggers, :logger, :skip_ticks_until
20
26
  attr_accessor *CONFIGURABLE_ATTRIBUTES
27
+ attr_reader :children
21
28
 
22
- state_machine :initial => :unmonitored do
23
- state :unmonitored, :up, :down, :restarting
24
-
29
+ state_machine :initial => :unmonitored do
30
+ # These are the idle states, i.e. only an event (either external or internal) will trigger a transition.
31
+ # The distinction between down and unmonitored is that down
32
+ # means we know it is not running and unmonitored is that we don't care if it's running.
33
+ state :unmonitored, :up, :down
34
+
35
+ # These are transitionary states, we expect the process to change state after a certain period of time.
36
+ state :starting, :stopping, :restarting
37
+
25
38
  event :tick do
26
- transition :unmonitored => :unmonitored
27
-
39
+ transition :starting => :up, :if => :process_running?
40
+ transition :starting => :down, :unless => :process_running?
41
+
28
42
  transition :up => :up, :if => :process_running?
29
43
  transition :up => :down, :unless => :process_running?
30
-
31
- transition :down => :up, :if => lambda {|process| process.process_running? || process.start_process }
44
+
45
+ # The process failed to die after entering the stopping state. Change the state to reflect
46
+ # reality.
47
+ transition :stopping => :up, :if => :process_running?
48
+ transition :stopping => :down, :unless => :process_running?
49
+
50
+ transition :down => :up, :if => :process_running?
51
+ transition :down => :starting, :unless => :process_running?
32
52
 
33
53
  transition :restarting => :up, :if => :process_running?
34
54
  transition :restarting => :down, :unless => :process_running?
35
55
  end
36
-
56
+
37
57
  event :start do
38
- transition :unmonitored => :up, :if => lambda {|process| process.process_running? || process.start_process }
39
- transition [:restarting, :up] => :up
40
- transition :down => :up, :if => :start_process
58
+ transition [:unmonitored, :down] => :starting
41
59
  end
42
-
60
+
43
61
  event :stop do
44
- transition [:unmonitored, :down] => :unmonitored
45
- transition [:up, :restarting] => :unmonitored, :if => :stop_process
62
+ transition :up => :stopping
46
63
  end
47
-
48
- event :restart do
49
- transition all => :restarting, :if => :restart_process
50
- end
51
-
64
+
52
65
  event :unmonitor do
53
- transition all => :unmonitored
66
+ transition any => :unmonitored
54
67
  end
55
-
56
- after_transition any => any do |process, transition|
57
- unless transition.loopback?
58
- process.record_transition(transition.from_name, transition.to_name)
59
- end
60
- end
61
-
62
- after_transition any => any do |process, transition|
63
- process.notify_triggers(transition)
68
+
69
+ event :restart do
70
+ transition [:up, :down] => :restarting
64
71
  end
72
+
73
+ before_transition any => any, :do => :notify_triggers
74
+
75
+ after_transition any => :starting, :do => :start_process
76
+ after_transition any => :stopping, :do => :stop_process
77
+ after_transition any => :restarting, :do => :restart_process
78
+
79
+ after_transition any => any, :do => :record_transition
65
80
  end
66
81
 
67
82
  def initialize(process_name, options = {})
@@ -70,15 +85,18 @@ module Bluepill
70
85
  @transition_history = Util::RotationalArray.new(10)
71
86
  @watches = []
72
87
  @triggers = []
88
+ @children = []
73
89
 
74
- @stop_grace_time = @start_grace_time = @restart_grace_time = 3
90
+ @monitor_children = options[:monitor_children] || false
91
+
92
+ %w(start_grace_time stop_grace_time restart_grace_time).each do |grace|
93
+ instance_variable_set("@#{grace}", options[grace.to_sym] || 3)
94
+ end
75
95
 
76
96
  CONFIGURABLE_ATTRIBUTES.each do |attribute_name|
77
97
  self.send("#{attribute_name}=", options[attribute_name]) if options.has_key?(attribute_name)
78
98
  end
79
99
 
80
- raise ArgumentError, "Please specify a pid_file or the demonize option" if pid_file.nil? && !daemonize?
81
-
82
100
  # Let state_machine do its initialization stuff
83
101
  super()
84
102
  end
@@ -89,12 +107,17 @@ module Bluepill
89
107
 
90
108
  # clear the memoization per tick
91
109
  @process_running = nil
92
-
110
+
93
111
  # run state machine transitions
94
112
  super
95
-
96
- if process_running?
113
+
114
+ if self.up?
97
115
  run_watches
116
+
117
+ if monitor_children?
118
+ refresh_children!
119
+ children.each {|child| child.tick}
120
+ end
98
121
  end
99
122
  end
100
123
 
@@ -111,16 +134,25 @@ module Bluepill
111
134
  end
112
135
  end
113
136
 
114
- def record_transition(from, to)
115
- @transitioned = true
116
- logger.info "Going from #{from} => #{to}"
117
- self.watches.each { |w| w.clear_history! }
137
+ def record_transition(transition)
138
+ unless transition.loopback?
139
+ @transitioned = true
140
+
141
+ # When a process changes state, we should clear the memory of all the watches
142
+ self.watches.each { |w| w.clear_history! }
143
+
144
+ # Also, when a process changes state, we should re-populate its child list
145
+ if self.monitor_children?
146
+ self.logger.warning "Clearing child list"
147
+ self.children.clear
148
+ end
149
+ logger.info "Going from #{transition.from_name} => #{transition.to_name}"
150
+ end
118
151
  end
119
152
 
120
153
  def notify_triggers(transition)
121
154
  self.triggers.each {|trigger| trigger.notify(transition)}
122
155
  end
123
-
124
156
 
125
157
  # Watch related methods
126
158
  def add_watch(name, options = {})
@@ -153,69 +185,106 @@ module Bluepill
153
185
  end
154
186
  end
155
187
 
188
+ def handle_user_command(cmd)
189
+ case cmd
190
+ when "boot!"
191
+ # This is only called when bluepill is initially starting up
192
+ if process_running?(true)
193
+ # process was running even before bluepill was
194
+ self.state = 'up'
195
+ else
196
+ self.state = 'starting'
197
+ end
198
+
199
+ when "start"
200
+ if process_running?(true) && daemonize?
201
+ logger.warning("Refusing to re-run start command on an automatically daemonized process to preserve currently running process pid file.")
202
+ return
203
+ end
204
+ dispatch!(:start)
205
+
206
+ when "stop"
207
+ stop_process
208
+ dispatch!(:unmonitor)
209
+
210
+ when "restart"
211
+ restart_process
212
+
213
+ when "unmonitor"
214
+ # When the user issues an unmonitor cmd, reset any triggers so that
215
+ # scheduled events gets cleared
216
+ triggers.each {|t| t.reset! }
217
+ dispatch!(:unmonitor)
218
+ end
219
+ end
156
220
 
157
221
  # System Process Methods
158
222
  def process_running?(force = false)
159
223
  @process_running = nil if force
160
224
  @process_running ||= signal_process(0)
225
+ self.clear_pid unless @process_running
226
+ @process_running
161
227
  end
162
228
 
163
229
  def start_process
230
+ logger.warning "Executing start command: #{start_command}"
231
+
164
232
  if self.daemonize?
165
233
  starter = lambda { drop_privileges; ::Kernel.exec(start_command) }
166
234
  child_pid = Daemonize.call_as_daemon(starter)
167
235
  File.open(pid_file, "w") {|f| f.write(child_pid)}
168
236
  else
169
237
  # This is a self-daemonizing process
170
- system(start_command)
238
+ unless System.execute_blocking(start_command)
239
+ logger.warning "Start command execution returned non-zero exit code"
240
+ end
171
241
  end
172
- self.clear_pid
173
-
174
- skip_ticks_for(start_grace_time)
175
-
176
- true
242
+
243
+ self.skip_ticks_for(start_grace_time)
177
244
  end
178
245
 
179
- def stop_process
246
+ def stop_process
180
247
  if stop_command
181
- system(stop_command)
248
+ cmd = stop_command.to_s.gsub("{{PID}}", actual_pid.to_s)
249
+ logger.warning "Executing stop command: #{cmd}"
250
+
251
+ unless System.execute_blocking(cmd)
252
+ logger.warning "Stop command execution returned non-zero exit code"
253
+ end
254
+
182
255
  else
256
+ logger.warning "Executing default stop command. Sending TERM signal to #{actual_pid}"
183
257
  signal_process("TERM")
184
-
185
- wait_until = Time.now.to_i + stop_grace_time
186
- while process_running?(true)
187
- if wait_until <= Time.now.to_i
188
- signal_process("KILL")
189
- break
190
- end
191
- sleep 0.2
192
- end
193
258
  end
194
- self.unlink_pid
195
- self.clear_pid
196
-
197
- skip_ticks_for(stop_grace_time)
259
+ self.unlink_pid # TODO: we only write the pid file if we daemonize, should we only unlink it if we daemonize?
198
260
 
199
- true
261
+ self.skip_ticks_for(stop_grace_time)
200
262
  end
201
263
 
202
264
  def restart_process
203
265
  if restart_command
204
- system(restart_command)
205
- skip_ticks_for(restart_grace_time)
206
- self.clear_pid
266
+ logger.warning "Executing restart command: #{restart_command}"
267
+
268
+ unless System.execute_blocking(restart_command)
269
+ logger.warning "Restart command execution returned non-zero exit code"
270
+ end
271
+
272
+ self.skip_ticks_for(restart_grace_time)
207
273
  else
208
- stop_process
209
- start_process
274
+ logger.warning "No restart_command specified. Must stop and start to restart"
275
+ self.stop_process
276
+ # the tick will bring it back.
210
277
  end
211
-
212
- true
213
278
  end
214
279
 
215
280
  def daemonize?
216
281
  !!self.daemonize
217
282
  end
218
283
 
284
+ def monitor_children?
285
+ !!self.monitor_children
286
+ end
287
+
219
288
  def signal_process(code)
220
289
  ::Process.kill(code, actual_pid)
221
290
  true
@@ -224,7 +293,13 @@ module Bluepill
224
293
  end
225
294
 
226
295
  def actual_pid
227
- @actual_pid ||= File.read(pid_file).to_i if File.exists?(pid_file)
296
+ @actual_pid ||= begin
297
+ File.read(pid_file).to_i if pid_file && File.exists?(pid_file)
298
+ end
299
+ end
300
+
301
+ def actual_pid=(pid)
302
+ @actual_pid = pid
228
303
  end
229
304
 
230
305
  def clear_pid
@@ -232,7 +307,7 @@ module Bluepill
232
307
  end
233
308
 
234
309
  def unlink_pid
235
- File.unlink(pid_file) if File.exists?(pid_file)
310
+ File.unlink(pid_file) if pid_file && File.exists?(pid_file)
236
311
  end
237
312
 
238
313
  def drop_privileges
@@ -254,12 +329,44 @@ module Bluepill
254
329
 
255
330
  # Internal State Methods
256
331
  def skip_ticks_for(seconds)
257
- self.skip_ticks_until = (self.skip_ticks_until || Time.now.to_i) + seconds
332
+ # TODO: should this be addative or longest wins?
333
+ # i.e. if two calls for skip_ticks_for come in for 5 and 10, should it skip for 10 or 15?
334
+ self.skip_ticks_until = (self.skip_ticks_until || Time.now.to_i) + seconds.to_i
258
335
  end
259
336
 
260
337
  def skipping_ticks?
261
338
  self.skip_ticks_until && self.skip_ticks_until > Time.now.to_i
262
339
  end
340
+
341
+ def refresh_children!
342
+ # First prune the list of dead children
343
+ @children.delete_if {|child| !child.process_running?(true) }
344
+
345
+ # Add new found children to the list
346
+ new_children_pids = System.get_children(self.actual_pid) - @children.map {|child| child.actual_pid}
347
+
348
+ unless new_children_pids.empty?
349
+ logger.info "Existing children: #{@children.collect{|c| c.actual_pid}.join(",")}. Got new children: #{new_children_pids.inspect} for #{actual_pid}"
350
+ end
351
+
352
+ # Construct a new process wrapper for each new found children
353
+ new_children_pids.each do |child_pid|
354
+ child = self.child_process_template.deep_copy
355
+
356
+ child.name = "<child(pid:#{child_pid})>"
357
+ child.actual_pid = child_pid
358
+ child.logger = self.logger.prefix_with(child.name)
359
+
360
+ child.initialize_state_machines
361
+ child.state = "up"
362
+
363
+ @children << child
364
+ end
365
+ end
366
+
367
+ def deep_copy
368
+ Marshal.load(Marshal.dump(self))
369
+ end
263
370
  end
264
371
  end
265
372
 
@@ -1,6 +1,6 @@
1
1
  module Bluepill
2
- module ProcessConditions
3
- def self.name_to_class(name)
2
+ module ProcessConditions
3
+ def self.[](name)
4
4
  "#{self}::#{name.to_s.camelcase}".constantize
5
5
  end
6
6
  end
@@ -7,7 +7,7 @@ module Bluepill
7
7
 
8
8
  def run(pid)
9
9
  # third col in the ps axu output
10
- System.ps_axu[pid][2].to_f
10
+ System.cpu_usage(pid).to_f
11
11
  end
12
12
 
13
13
  def check(value)
@@ -7,11 +7,11 @@ module Bluepill
7
7
 
8
8
  def run(pid)
9
9
  # rss is on the 5th col
10
- System.ps_axu[pid][4].to_f
10
+ System.memory_usage(pid).to_f
11
11
  end
12
12
 
13
13
  def check(value)
14
- value < @below
14
+ value.kilobytes < @below
15
15
  end
16
16
  end
17
17
  end
@@ -1,10 +1,50 @@
1
- require "singleton"
2
1
  module Bluepill
3
2
  # This class represents the system that bluepill is running on.. It's mainly used to memoize
4
3
  # results of running ps auxx etc so that every watch in the every process will not result in a fork
5
4
  module System
6
5
  extend self
7
6
 
7
+ # The position of each field in ps output
8
+ IDX_MAP = {
9
+ :pid => 0,
10
+ :ppid => 1,
11
+ :pcpu => 2,
12
+ :rss => 3
13
+ }
14
+
15
+ def cpu_usage(pid)
16
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:pcpu]].to_f
17
+ end
18
+
19
+ def memory_usage(pid)
20
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:rss]].to_f
21
+ end
22
+
23
+ def get_children(parent_pid)
24
+ returning(Array.new) do |child_pids|
25
+ ps_axu.each_pair do |pid, chunks|
26
+ child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
27
+ end
28
+ end
29
+ end
30
+
31
+ def execute_non_blocking(cmd)
32
+ if Daemonize.safefork
33
+ # In parent, return immediately
34
+ return
35
+
36
+ else
37
+ # in child
38
+ ::Kernel.exec(cmd)
39
+ # execution should not reach here
40
+ exit
41
+ end
42
+ end
43
+
44
+ def execute_blocking(cmd)
45
+ ::Kernel.system(cmd)
46
+ end
47
+
8
48
  def store
9
49
  @store ||= Hash.new
10
50
  end
@@ -14,16 +54,16 @@ module Bluepill
14
54
  end
15
55
 
16
56
  def ps_axu
57
+ # TODO: need a mutex here
17
58
  store[:ps_axu] ||= begin
18
59
  # BSD style ps invocation
19
- lines = `ps axu`.split("\n")
20
-
60
+ lines = `ps axo pid=,ppid=,pcpu=,rss=`.split("\n")
61
+
21
62
  lines.inject(Hash.new) do |mem, line|
22
- # There are 11 cols in the ps ax output. This keeps programs that use spaces in $0 in one chunk
23
- chunks = line.split(/\s+/, 11)
24
- pid = chunks[1].to_i
63
+ chunks = line.split(/\s+/)
64
+ chunks.delete_if {|c| c.strip.empty? }
65
+ pid = chunks[IDX_MAP[:pid]].strip.to_i
25
66
  mem[pid] = chunks
26
-
27
67
  mem
28
68
  end
29
69
  end
@@ -9,11 +9,17 @@ module Bluepill
9
9
  @implementations[name]
10
10
  end
11
11
 
12
- attr_accessor :process, :logger
12
+ attr_accessor :process, :logger, :mutex, :scheduled_events
13
13
 
14
14
  def initialize(process, options = {})
15
15
  self.process = process
16
16
  self.logger = options[:logger]
17
+ self.mutex = Mutex.new
18
+ self.scheduled_events = []
19
+ end
20
+
21
+ def reset!
22
+ self.cancel_all_events
17
23
  end
18
24
 
19
25
  def notify(transition)
@@ -26,16 +32,28 @@ module Bluepill
26
32
 
27
33
  def schedule_event(event, delay)
28
34
  # TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
29
- Thread.new(self) do |trigger|
35
+ thread = Thread.new(self) do |trigger|
30
36
  begin
31
- sleep delay.to_i
37
+ sleep delay.to_f
32
38
  trigger.logger.info("Retrying from flapping")
33
39
  trigger.process.dispatch!(event)
34
- rescue Exception => e
35
- trigger.logger.error(e)
36
- trigger.logger.error(e.backtrace.join("\n"))
40
+ trigger.mutex.synchronize do
41
+ trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
42
+ end
43
+ rescue StandardError => e
44
+ trigger.logger.err(e)
45
+ trigger.logger.err(e.backtrace.join("\n"))
37
46
  end
38
47
  end
48
+
49
+ self.scheduled_events.push([event, thread])
50
+ end
51
+
52
+ def cancel_all_events
53
+ self.logger.info "Canceling all scheduled events"
54
+ self.mutex.synchronize do
55
+ self.scheduled_events.each {|_, thread| thread.kill}
56
+ end
39
57
  end
40
58
 
41
59
  end
@@ -1,10 +1,7 @@
1
1
  module Bluepill
2
2
  module Triggers
3
3
  class Flapping < Bluepill::Trigger
4
- TRIGGER_STATES = [
5
- [:up, :down],
6
- [:up, :restarting]
7
- ]
4
+ TRIGGER_STATES = [:starting, :restarting]
8
5
 
9
6
  PARAMS = [:times, :within, :retry_in]
10
7
 
@@ -23,12 +20,17 @@ module Bluepill
23
20
  end
24
21
 
25
22
  def notify(transition)
26
- if TRIGGER_STATES.include?([transition.from_name, transition.to_name])
23
+ if TRIGGER_STATES.include?(transition.to_name)
27
24
  self.timeline << Time.now.to_i
28
25
  self.check_flapping
29
26
  end
30
27
  end
31
-
28
+
29
+ def reset!
30
+ @timeline.clear
31
+ super
32
+ end
33
+
32
34
  def check_flapping
33
35
  num_occurances = (@timeline.nitems == self.times)
34
36
 
@@ -44,9 +46,12 @@ module Bluepill
44
46
  self.schedule_event(:start, self.retry_in)
45
47
 
46
48
  # this happens in the process' thread so we don't have to worry about concurrency issues with this event
47
- self.dispatch!(:stop)
49
+ self.dispatch!(:unmonitor)
48
50
 
49
51
  @timeline.clear
52
+
53
+ # This will prevent a transition from happening in the process state_machine
54
+ throw :halt
50
55
  end
51
56
  end
52
57
  end
data/lib/example.rb CHANGED
@@ -3,34 +3,45 @@ require 'bluepill'
3
3
 
4
4
  ROOT_DIR = "/tmp/bp"
5
5
 
6
- # application = Bluepill::Application.new("poop", 'base_dir' => '/tmp/bp')
7
- #
8
- # process = Bluepill::Process.new("hello_world") do |process|
9
- # process.start_command = "sleep 5"
10
- # process.daemonize = true
11
- # process.pid_file = "/tmp/bp/sleep.pid"
12
- # end
13
- #
14
- # process.add_watch("AlwaysTrue", :every => 5)
15
- #
16
- # application.processes << process
17
- # process.dispatch!("start")
18
- #
19
- # application.start
20
-
21
-
6
+ # Watch with
7
+ # watch -n0.2 'ps axu | egrep "(CPU|forking|bluepill|sleep)" | grep -v grep | sort'
22
8
  Bluepill.application(:sample_app) do |app|
23
- 1.times do |i|
9
+ 2.times do |i|
24
10
  app.process("process_#{i}") do |process|
25
- process.start_command = "while true; do echo ''; sleep 0.01; done"
26
- process.daemonize = true
27
11
  process.pid_file = "#{ROOT_DIR}/pids/process_#{i}.pid"
12
+
13
+ # I could not figure out a portable way to
14
+ # specify the path to the sample forking server across the diff developer laptops.
15
+ # Since this code is eval'ed we cannot reliably use __FILE__
16
+ process.start_command = "/Users/rohith/work/bluepill/bin/sample_forking_server #{4242 + i}"
17
+ process.stop_command = "kill -INT {{PID}}"
18
+ process.daemonize = true
19
+
20
+ process.start_grace_time = 1.seconds
21
+ process.restart_grace_time = 7.seconds
22
+ process.stop_grace_time = 7.seconds
23
+
28
24
  process.uid = "admin"
29
25
  process.gid = "staff"
30
26
 
31
-
32
- process.checks :cpu_usage, :every => 5, :below => 0.5, :times => [2, 5]
27
+ # process.checks :cpu_usage, :every => 10, :below => 0.5, :times => [5, 5]
33
28
  process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
29
+
30
+ process.monitor_children do |child_process|
31
+ # child_process.checks :cpu_usage,
32
+ # :every => 10,
33
+ # :below => 0.5,
34
+ # :times => [5, 5]
35
+
36
+ # child_process.checks :mem_usage,
37
+ # :every => 3,
38
+ # :below => 600.kilobytes,
39
+ # :times => [3, 5],
40
+ # :fires => [:stop]
41
+
42
+ child_process.stop_command = "kill -QUIT {{PID}}"
43
+ # child_process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
44
+ end
34
45
  end
35
46
  end
36
47
 
@@ -39,39 +50,20 @@ Bluepill.application(:sample_app) do |app|
39
50
  process.start_command = "sleep #{rand(30) + i}"
40
51
  process.group = "Poopfaced"
41
52
  process.daemonize = true
42
- process.pid_file = "#{ROOT_DIR}/pids/process_#{i}.pid"
53
+ process.pid_file = "#{ROOT_DIR}/pids/#{process.group}_process_#{i}.pid"
43
54
 
44
55
  process.checks :always_true, :every => 10
45
56
  end
46
57
  end
47
- end
48
-
49
-
50
- # Bluepill.watch do
51
- # start_command "start_process -P file.pid"
52
- # stop_command "stop_process -P file.pid"
53
- # pid_file 'file.pid'
54
- #
55
- # checks do |checks|
56
- # checks.mem_usage :every => 15.minutes,
57
- # :below => 250.megabytes,
58
- # :fires => :restart
59
- #
60
- # checks.cpu_usage :every 10.seconds,
61
- # :below => 50.percent,
62
- # :fires => :restart
63
- #
64
- # checks.custom_method :custom_params => :to_be_sent_to_the_custom_condition,
65
- # :fires => [:stop, :custom_event, :start]
66
- #
67
- # checks.deadly_condition :every => 20.seconds,
68
- # :fires => :stop
69
- # end
70
- #
71
- # handles(:restart) do |process|
72
- # # process has pid
73
- # process.transition :down
74
- # process.transition :up
75
- # run "some commands -P #{process.pid}"
76
- # end
77
- # end
58
+
59
+ 0.times do |i|
60
+ app.process("group_process_#{i}") do |process|
61
+ process.start_command = "sleep #{rand(30) + i}"
62
+ process.group = "Poopfaced_2"
63
+ process.daemonize = true
64
+ process.pid_file = "#{ROOT_DIR}/pids/#{process.group}_process_#{i}.pid"
65
+
66
+ process.checks :always_true, :every => 10
67
+ end
68
+ end
69
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bluepill
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arya Asemanfar
@@ -11,8 +11,8 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2009-10-14 00:00:00 -07:00
15
- default_executable: bluepill
14
+ date: 2009-10-22 00:00:00 -07:00
15
+ default_executable:
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec