evented_bluepill 0.0.46

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,229 @@
1
+ require 'etc'
2
+
3
+ module Bluepill
4
+ # This class represents the system that bluepill is running on.. It's mainly used to memoize
5
+ # results of running ps auxx etc so that every watch in the every process will not result in a fork
6
+ module System
7
+ APPEND_MODE = "a"
8
+ extend self
9
+
10
+ # The position of each field in ps output
11
+ IDX_MAP = {
12
+ :pid => 0,
13
+ :ppid => 1,
14
+ :pcpu => 2,
15
+ :rss => 3
16
+ }
17
+
18
+ def pid_alive?(pid)
19
+ begin
20
+ ::Process.kill(0, pid)
21
+ true
22
+ rescue Errno::ESRCH
23
+ false
24
+ end
25
+ end
26
+
27
+ def cpu_usage(pid)
28
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:pcpu]].to_f
29
+ end
30
+
31
+ def memory_usage(pid)
32
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:rss]].to_f
33
+ end
34
+
35
+ def get_children(parent_pid)
36
+ child_pids = Array.new
37
+ ps_axu.each_pair do |pid, chunks|
38
+ child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
39
+ end
40
+ child_pids
41
+ end
42
+
43
+ # Returns the pid of the child that executes the cmd
44
+ def daemonize(cmd, options = {})
45
+ rd, wr = IO.pipe
46
+
47
+ if child = Daemonize.safefork
48
+ # we do not wanna create zombies, so detach ourselves from the child exit status
49
+ ::Process.detach(child)
50
+
51
+ # parent
52
+ wr.close
53
+
54
+ daemon_id = rd.read.to_i
55
+ rd.close
56
+
57
+ return daemon_id if daemon_id > 0
58
+
59
+ else
60
+ # child
61
+ rd.close
62
+
63
+ drop_privileges(options[:uid], options[:gid])
64
+
65
+ # if we cannot write the pid file as the provided user, err out
66
+ exit unless can_write_pid_file(options[:pid_file], options[:logger])
67
+
68
+ to_daemonize = lambda do
69
+ # Setting end PWD env emulates bash behavior when dealing with symlinks
70
+ Dir.chdir(ENV["PWD"] = options[:working_dir]) if options[:working_dir]
71
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
72
+
73
+ redirect_io(*options.values_at(:stdin, :stdout, :stderr))
74
+
75
+ ::Kernel.exec(cmd)
76
+ exit
77
+ end
78
+
79
+ daemon_id = Daemonize.call_as_daemon(to_daemonize, nil, cmd)
80
+
81
+ File.open(options[:pid_file], "w") {|f| f.write(daemon_id)}
82
+
83
+ wr.write daemon_id
84
+ wr.close
85
+
86
+ exit
87
+ end
88
+ end
89
+
90
+ # Returns the stdout, stderr and exit code of the cmd
91
+ def execute_blocking(cmd, options = {})
92
+ rd, wr = IO.pipe
93
+
94
+ if child = Daemonize.safefork
95
+ # parent
96
+ wr.close
97
+
98
+ cmd_status = rd.read
99
+ rd.close
100
+
101
+ ::Process.waitpid(child)
102
+
103
+ return Marshal.load(cmd_status)
104
+
105
+ else
106
+ # child
107
+ rd.close
108
+
109
+ # create a child in which we can override the stdin, stdout and stderr
110
+ cmd_out_read, cmd_out_write = IO.pipe
111
+ cmd_err_read, cmd_err_write = IO.pipe
112
+
113
+ pid = fork {
114
+ # grandchild
115
+ drop_privileges(options[:uid], options[:gid])
116
+
117
+ Dir.chdir(ENV["PWD"] = options[:working_dir]) if options[:working_dir]
118
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
119
+
120
+ # close unused fds so ancestors wont hang. This line is the only reason we are not
121
+ # using something like popen3. If this fd is not closed, the .read call on the parent
122
+ # will never return because "wr" would still be open in the "exec"-ed cmd
123
+ wr.close
124
+
125
+ # we do not care about stdin of cmd
126
+ STDIN.reopen("/dev/null")
127
+
128
+ # point stdout of cmd to somewhere we can read
129
+ cmd_out_read.close
130
+ STDOUT.reopen(cmd_out_write)
131
+ cmd_out_write.close
132
+
133
+ # same thing for stderr
134
+ cmd_err_read.close
135
+ STDERR.reopen(cmd_err_write)
136
+ cmd_err_write.close
137
+
138
+ # finally, replace grandchild with cmd
139
+ ::Kernel.exec(cmd)
140
+ }
141
+
142
+ # we do not use these ends of the pipes in the child
143
+ cmd_out_write.close
144
+ cmd_err_write.close
145
+
146
+ # wait for the cmd to finish executing and acknowledge it's death
147
+ ::Process.waitpid(pid)
148
+
149
+ # collect stdout, stderr and exitcode
150
+ result = {
151
+ :stdout => cmd_out_read.read,
152
+ :stderr => cmd_err_read.read,
153
+ :exit_code => $?.exitstatus
154
+ }
155
+
156
+ # We're done with these ends of the pipes as well
157
+ cmd_out_read.close
158
+ cmd_err_read.close
159
+
160
+ # Time to tell the parent about what went down
161
+ wr.write Marshal.dump(result)
162
+ wr.close
163
+
164
+ exit
165
+ end
166
+ end
167
+
168
+ def store
169
+ @store ||= Hash.new
170
+ end
171
+
172
+ def reset_data
173
+ store.clear unless store.empty?
174
+ end
175
+
176
+ def ps_axu
177
+ # TODO: need a mutex here
178
+ store[:ps_axu] ||= begin
179
+ # BSD style ps invocation
180
+ lines = `ps axo pid=,ppid=,pcpu=,rss=`.split("\n")
181
+
182
+ lines.inject(Hash.new) do |mem, line|
183
+ chunks = line.split(/\s+/)
184
+ chunks.delete_if {|c| c.strip.empty? }
185
+ pid = chunks[IDX_MAP[:pid]].strip.to_i
186
+ mem[pid] = chunks
187
+ mem
188
+ end
189
+ end
190
+ end
191
+
192
+ # be sure to call this from a fork otherwise it will modify the attributes
193
+ # of the bluepill daemon
194
+ def drop_privileges(uid, gid)
195
+ if ::Process::Sys.geteuid == 0
196
+ uid_num = Etc.getpwnam(uid).uid if uid
197
+ gid_num = Etc.getgrnam(gid).gid if gid
198
+
199
+ ::Process.groups = [gid_num] if gid
200
+ ::Process::Sys.setgid(gid_num) if gid
201
+ ::Process::Sys.setuid(uid_num) if uid
202
+ end
203
+ end
204
+
205
+ def can_write_pid_file(pid_file, logger)
206
+ FileUtils.touch(pid_file)
207
+ File.unlink(pid_file)
208
+ return true
209
+
210
+ rescue Exception => e
211
+ logger.warning "%s - %s" % [e.class.name, e.message]
212
+ e.backtrace.each {|l| logger.warning l}
213
+ return false
214
+ end
215
+
216
+ def redirect_io(io_in, io_out, io_err)
217
+ $stdin.reopen(io_in) if io_in
218
+
219
+ if !io_out.nil? && !io_err.nil? && io_out == io_err
220
+ $stdout.reopen(io_out, APPEND_MODE)
221
+ $stderr.reopen($stdout)
222
+
223
+ else
224
+ $stdout.reopen(io_out, APPEND_MODE) if io_out
225
+ $stderr.reopen(io_err, APPEND_MODE) if io_err
226
+ end
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,70 @@
1
+ module Bluepill
2
+ class Trigger
3
+ @implementations = {}
4
+ def self.inherited(klass)
5
+ @implementations[klass.name.split('::').last.underscore.to_sym] = klass
6
+ end
7
+
8
+ def self.[](name)
9
+ @implementations[name]
10
+ end
11
+
12
+ attr_accessor :process, :logger, :mutex, :scheduled_events
13
+
14
+ def initialize(process, options = {})
15
+ self.process = process
16
+ self.logger = options[:logger]
17
+ self.mutex = Mutex.new
18
+ self.scheduled_events = []
19
+ end
20
+
21
+ def reset!
22
+ self.cancel_all_events
23
+ end
24
+
25
+ def notify(transition)
26
+ raise "Implement in subclass"
27
+ end
28
+
29
+ def dispatch!(event)
30
+ self.process.dispatch!(event, self.class.name.split("::").last)
31
+ end
32
+
33
+ def deep_copy
34
+ # TODO: This is a kludge. Ideally, process templates
35
+ # would be facotries, and not a template object.
36
+ mutex, @mutex = @mutex, nil
37
+ clone = Marshal.load(Marshal.dump(self))
38
+ clone.instance_variable_set("@mutex", Monitor.new)
39
+ @mutex = mutex
40
+ clone
41
+ end
42
+
43
+ def schedule_event(event, delay)
44
+ # TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
45
+ thread = Thread.new(self) do |trigger|
46
+ begin
47
+ sleep delay.to_f
48
+ trigger.logger.info("Retrying from flapping")
49
+ trigger.dispatch!(event)
50
+ trigger.mutex.synchronize do
51
+ trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
52
+ end
53
+ rescue StandardError => e
54
+ trigger.logger.err(e)
55
+ trigger.logger.err(e.backtrace.join("\n"))
56
+ end
57
+ end
58
+
59
+ self.scheduled_events.push([event, thread])
60
+ end
61
+
62
+ def cancel_all_events
63
+ self.logger.info "Canceling all scheduled events"
64
+ self.mutex.synchronize do
65
+ self.scheduled_events.each {|_, thread| thread.kill}
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,59 @@
1
+ module Bluepill
2
+ module Triggers
3
+ class Flapping < Bluepill::Trigger
4
+ TRIGGER_STATES = [:starting, :restarting]
5
+
6
+ PARAMS = [:times, :within, :retry_in]
7
+
8
+ attr_accessor *PARAMS
9
+ attr_reader :timeline
10
+
11
+ def initialize(process, options = {})
12
+ options.reverse_merge!(:times => 5, :within => 1, :retry_in => 5)
13
+
14
+ options.each_pair do |name, val|
15
+ instance_variable_set("@#{name}", val) if PARAMS.include?(name)
16
+ end
17
+
18
+ @timeline = Util::RotationalArray.new(@times)
19
+ super
20
+ end
21
+
22
+ def notify(transition)
23
+ if TRIGGER_STATES.include?(transition.to_name)
24
+ self.timeline << Time.now.to_i
25
+ self.check_flapping
26
+ end
27
+ end
28
+
29
+ def reset!
30
+ @timeline.clear
31
+ super
32
+ end
33
+
34
+ def check_flapping
35
+ num_occurances = (@timeline.nitems == self.times)
36
+
37
+ # The process has not flapped if we haven't encountered enough incidents
38
+ return unless num_occurances
39
+
40
+ # Check if the incident happend within the timeframe
41
+ duration = (@timeline.last - @timeline.first) <= self.within
42
+
43
+ if duration
44
+ self.logger.info "Flapping detected: retrying in #{self.retry_in} seconds"
45
+
46
+ self.schedule_event(:start, self.retry_in)
47
+
48
+ # this happens in the process' thread so we don't have to worry about concurrency issues with this event
49
+ self.dispatch!(:unmonitor)
50
+
51
+ @timeline.clear
52
+
53
+ # This will prevent a transition from happening in the process state_machine
54
+ throw :halt
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,72 @@
1
+ module Bluepill
2
+ module Util
3
+ class RotationalArray < Array
4
+ def initialize(size)
5
+ super(size)
6
+
7
+ @capacity = size
8
+ @counter = 0
9
+ end
10
+
11
+ def push(value)
12
+ idx = rotational_idx(@counter)
13
+ self[idx] = value
14
+
15
+ @counter += 1
16
+ self
17
+ end
18
+
19
+ alias_method :<<, :push
20
+
21
+ def pop
22
+ raise "Cannot call pop on a rotational array"
23
+ end
24
+
25
+ def shift
26
+ raise "Cannot call shift on a rotational array"
27
+ end
28
+
29
+ def unshift
30
+ raise "Cannot call unshift on a rotational array"
31
+ end
32
+
33
+ def last
34
+ return if @counter.zero?
35
+
36
+ self[rotational_idx(@counter - 1)]
37
+ end
38
+
39
+ def first
40
+ return if @counter.zero?
41
+ return self[0] if @counter <= @capacity
42
+
43
+ self[rotational_idx(@counter)]
44
+ end
45
+
46
+ def clear
47
+ @counter = 0
48
+ super
49
+ end
50
+
51
+ def each(&block)
52
+ times = @counter >= @capacity ? @capacity : @counter
53
+ start = @counter >= @capacity ? rotational_idx(@counter) : 0
54
+ times.times do |i|
55
+ block.call(self[rotational_idx(start + i)])
56
+ end
57
+ end
58
+
59
+ unless method_defined?(:nitems)
60
+ def nitems
61
+ compact.length
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def rotational_idx(idx)
68
+ idx % @capacity
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,3 @@
1
+ module Bluepill
2
+ VERSION = "0.0.44"
3
+ end
data/lib/bluepill.rb ADDED
@@ -0,0 +1,31 @@
1
+ require 'rubygems'
2
+
3
+ require 'thread'
4
+ require 'monitor'
5
+ require 'syslog'
6
+ require 'timeout'
7
+ require 'logger'
8
+
9
+ require 'active_support/inflector'
10
+ require 'active_support/core_ext/hash'
11
+ require 'active_support/core_ext/numeric'
12
+ require 'active_support/duration'
13
+
14
+ require 'bluepill/application'
15
+ require 'bluepill/controller'
16
+ require 'bluepill/socket'
17
+ require "bluepill/process"
18
+ require "bluepill/process_statistics"
19
+ require "bluepill/group"
20
+ require "bluepill/logger"
21
+ require "bluepill/condition_watch"
22
+ require 'bluepill/trigger'
23
+ require 'bluepill/triggers/flapping'
24
+ require "bluepill/dsl"
25
+ require "bluepill/system"
26
+
27
+ require "bluepill/process_conditions"
28
+
29
+ require "bluepill/util/rotational_array"
30
+
31
+ require "bluepill/version"
data/lib/example.rb ADDED
@@ -0,0 +1,85 @@
1
+ #! /usr/bin/env ruby
2
+ $LOAD_PATH << 'lib/'
3
+
4
+ require 'rubygems'
5
+ require 'bluepill'
6
+ require 'logger'
7
+
8
+ ROOT_DIR = "/tmp/bp"
9
+
10
+ # Watch with
11
+ # watch -n0.2 'ps axu | egrep "(CPU|forking|bluepill|sleep)" | grep -v grep | sort'
12
+ Bluepill.application(:sample_app, :foreground => true, :log_file => '/tmp/bp/sample_app.log') do |app|
13
+ 1.times do |i|
14
+ app.process("process_#{i}") do |process|
15
+ process.pid_file = "#{ROOT_DIR}/pids/process_#{i}.pid"
16
+
17
+ # I could not figure out a portable way to
18
+ # specify the path to the sample forking server across the diff developer laptops.
19
+ # Since this code is eval'ed we cannot reliably use __FILE__
20
+ process.start_command = "/Users/stefan/Documents/projects/bluepill/bin/sample_forking_server #{4242 + i}"
21
+ process.stop_command = "kill -INT {{PID}}"
22
+ process.daemonize = true
23
+
24
+ process.start_grace_time = 1.seconds
25
+ process.restart_grace_time = 7.seconds
26
+ process.stop_grace_time = 7.seconds
27
+
28
+ process.uid = "stefan"
29
+ process.gid = "staff"
30
+ process.group = "test"
31
+
32
+ process.checks :cpu_usage, :every => 5, :below => 0.5, :times => [5, 5]
33
+ process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 20.seconds
34
+
35
+ process.monitor_children do |child_process|
36
+ child_process.checks :cpu_usage,
37
+ :every => 10,
38
+ :below => 0.5,
39
+ :times => [5, 5]
40
+
41
+ child_process.checks :mem_usage,
42
+ :every => 3,
43
+ :below => 900.kilobytes,
44
+ :times => [3, 5],
45
+ :fires => :stop
46
+
47
+ child_process.stop_command = "kill -QUIT {{PID}}"
48
+ # child_process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
49
+ end
50
+ end
51
+ end
52
+
53
+ 0.times do |i|
54
+ app.process("group_process_#{i}") do |process|
55
+ process.group = "group_1"
56
+ process.pid_file = "/Users/rohith/ffs/tmp/pids/mongrel_#{i}.pid"
57
+ process.start_command = "cd ~/ffs && mongrel_rails start -P #{process.pid_file} -p 3000 -d"
58
+
59
+ process.start_grace_time = 10.seconds
60
+
61
+ process.uid = "rohith"
62
+ process.gid = "staff"
63
+
64
+ # process.checks :always_true, :every => 10
65
+ end
66
+ end
67
+
68
+ 0.times do |i|
69
+ app.process("group_process_#{i}") do |process|
70
+ process.uid = "rohith"
71
+ process.gid = "wheel"
72
+
73
+ process.stderr = "/tmp/err.log"
74
+ process.stdout = "/tmp/err.log"
75
+
76
+
77
+ process.group = "grouped"
78
+ process.start_command = %Q{cd /tmp && ruby -e '$stderr.puts("hello stderr");$stdout.puts("hello stdout"); $stdout.flush; $stderr.flush; sleep 10'}
79
+ process.daemonize = true
80
+ process.pid_file = "/tmp/noperm/p_#{process.group}_#{i}.pid"
81
+
82
+ # process.checks :always_true, :every => 5
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ require 'bluepill'
3
+ require 'logger'
4
+
5
+ # ATTENTION:
6
+ # You must declare only one application per config when foreground mode specified
7
+ #
8
+ # http://github.com/akzhan/runit-man used as example of monitored application.
9
+
10
+ Bluepill.application(:runit_man, :foreground => true) do |app|
11
+ app.process("runit-man") do |process|
12
+ process.pid_file = "/etc/service/runit-man/supervise/pid"
13
+
14
+ process.start_command = "/usr/bin/sv start runit-man"
15
+ process.stop_command = "/usr/bin/sv stop runit-man"
16
+ process.restart_command = "/usr/bin/sv restart runit-man"
17
+
18
+ process.start_grace_time = 1.seconds
19
+ process.restart_grace_time = 7.seconds
20
+ process.stop_grace_time = 7.seconds
21
+
22
+ process.checks :http, :within => 30.seconds, :retry_in => 7.seconds, :every => 30.seconds,
23
+ :url => 'http://localhost:4567/', :kind => :success, :pattern => /html/, :timeout => 3.seconds
24
+ end
25
+ end