evented_bluepill 0.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ require 'etc'
2
+
3
+ module Bluepill
4
+ # This class represents the system that bluepill is running on.. It's mainly used to memoize
5
+ # results of running ps auxx etc so that every watch in the every process will not result in a fork
6
+ module System
7
+ APPEND_MODE = "a"
8
+ extend self
9
+
10
+ # The position of each field in ps output
11
+ IDX_MAP = {
12
+ :pid => 0,
13
+ :ppid => 1,
14
+ :pcpu => 2,
15
+ :rss => 3
16
+ }
17
+
18
+ def pid_alive?(pid)
19
+ begin
20
+ ::Process.kill(0, pid)
21
+ true
22
+ rescue Errno::ESRCH
23
+ false
24
+ end
25
+ end
26
+
27
+ def cpu_usage(pid)
28
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:pcpu]].to_f
29
+ end
30
+
31
+ def memory_usage(pid)
32
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:rss]].to_f
33
+ end
34
+
35
+ def get_children(parent_pid)
36
+ child_pids = Array.new
37
+ ps_axu.each_pair do |pid, chunks|
38
+ child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
39
+ end
40
+ child_pids
41
+ end
42
+
43
+ # Returns the pid of the child that executes the cmd
44
+ def daemonize(cmd, options = {})
45
+ rd, wr = IO.pipe
46
+
47
+ if child = Daemonize.safefork
48
+ # we do not wanna create zombies, so detach ourselves from the child exit status
49
+ ::Process.detach(child)
50
+
51
+ # parent
52
+ wr.close
53
+
54
+ daemon_id = rd.read.to_i
55
+ rd.close
56
+
57
+ return daemon_id if daemon_id > 0
58
+
59
+ else
60
+ # child
61
+ rd.close
62
+
63
+ drop_privileges(options[:uid], options[:gid])
64
+
65
+ # if we cannot write the pid file as the provided user, err out
66
+ exit unless can_write_pid_file(options[:pid_file], options[:logger])
67
+
68
+ to_daemonize = lambda do
69
+ # Setting end PWD env emulates bash behavior when dealing with symlinks
70
+ Dir.chdir(ENV["PWD"] = options[:working_dir]) if options[:working_dir]
71
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
72
+
73
+ redirect_io(*options.values_at(:stdin, :stdout, :stderr))
74
+
75
+ ::Kernel.exec(cmd)
76
+ exit
77
+ end
78
+
79
+ daemon_id = Daemonize.call_as_daemon(to_daemonize, nil, cmd)
80
+
81
+ File.open(options[:pid_file], "w") {|f| f.write(daemon_id)}
82
+
83
+ wr.write daemon_id
84
+ wr.close
85
+
86
+ exit
87
+ end
88
+ end
89
+
90
+ # Returns the stdout, stderr and exit code of the cmd
91
+ def execute_blocking(cmd, options = {})
92
+ rd, wr = IO.pipe
93
+
94
+ if child = Daemonize.safefork
95
+ # parent
96
+ wr.close
97
+
98
+ cmd_status = rd.read
99
+ rd.close
100
+
101
+ ::Process.waitpid(child)
102
+
103
+ return Marshal.load(cmd_status)
104
+
105
+ else
106
+ # child
107
+ rd.close
108
+
109
+ # create a child in which we can override the stdin, stdout and stderr
110
+ cmd_out_read, cmd_out_write = IO.pipe
111
+ cmd_err_read, cmd_err_write = IO.pipe
112
+
113
+ pid = fork {
114
+ # grandchild
115
+ drop_privileges(options[:uid], options[:gid])
116
+
117
+ Dir.chdir(ENV["PWD"] = options[:working_dir]) if options[:working_dir]
118
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
119
+
120
+ # close unused fds so ancestors wont hang. This line is the only reason we are not
121
+ # using something like popen3. If this fd is not closed, the .read call on the parent
122
+ # will never return because "wr" would still be open in the "exec"-ed cmd
123
+ wr.close
124
+
125
+ # we do not care about stdin of cmd
126
+ STDIN.reopen("/dev/null")
127
+
128
+ # point stdout of cmd to somewhere we can read
129
+ cmd_out_read.close
130
+ STDOUT.reopen(cmd_out_write)
131
+ cmd_out_write.close
132
+
133
+ # same thing for stderr
134
+ cmd_err_read.close
135
+ STDERR.reopen(cmd_err_write)
136
+ cmd_err_write.close
137
+
138
+ # finally, replace grandchild with cmd
139
+ ::Kernel.exec(cmd)
140
+ }
141
+
142
+ # we do not use these ends of the pipes in the child
143
+ cmd_out_write.close
144
+ cmd_err_write.close
145
+
146
+ # wait for the cmd to finish executing and acknowledge it's death
147
+ ::Process.waitpid(pid)
148
+
149
+ # collect stdout, stderr and exitcode
150
+ result = {
151
+ :stdout => cmd_out_read.read,
152
+ :stderr => cmd_err_read.read,
153
+ :exit_code => $?.exitstatus
154
+ }
155
+
156
+ # We're done with these ends of the pipes as well
157
+ cmd_out_read.close
158
+ cmd_err_read.close
159
+
160
+ # Time to tell the parent about what went down
161
+ wr.write Marshal.dump(result)
162
+ wr.close
163
+
164
+ exit
165
+ end
166
+ end
167
+
168
+ def store
169
+ @store ||= Hash.new
170
+ end
171
+
172
+ def reset_data
173
+ store.clear unless store.empty?
174
+ end
175
+
176
+ def ps_axu
177
+ # TODO: need a mutex here
178
+ store[:ps_axu] ||= begin
179
+ # BSD style ps invocation
180
+ lines = `ps axo pid=,ppid=,pcpu=,rss=`.split("\n")
181
+
182
+ lines.inject(Hash.new) do |mem, line|
183
+ chunks = line.split(/\s+/)
184
+ chunks.delete_if {|c| c.strip.empty? }
185
+ pid = chunks[IDX_MAP[:pid]].strip.to_i
186
+ mem[pid] = chunks
187
+ mem
188
+ end
189
+ end
190
+ end
191
+
192
+ # be sure to call this from a fork otherwise it will modify the attributes
193
+ # of the bluepill daemon
194
+ def drop_privileges(uid, gid)
195
+ if ::Process::Sys.geteuid == 0
196
+ uid_num = Etc.getpwnam(uid).uid if uid
197
+ gid_num = Etc.getgrnam(gid).gid if gid
198
+
199
+ ::Process.groups = [gid_num] if gid
200
+ ::Process::Sys.setgid(gid_num) if gid
201
+ ::Process::Sys.setuid(uid_num) if uid
202
+ end
203
+ end
204
+
205
+ def can_write_pid_file(pid_file, logger)
206
+ FileUtils.touch(pid_file)
207
+ File.unlink(pid_file)
208
+ return true
209
+
210
+ rescue Exception => e
211
+ logger.warning "%s - %s" % [e.class.name, e.message]
212
+ e.backtrace.each {|l| logger.warning l}
213
+ return false
214
+ end
215
+
216
+ def redirect_io(io_in, io_out, io_err)
217
+ $stdin.reopen(io_in) if io_in
218
+
219
+ if !io_out.nil? && !io_err.nil? && io_out == io_err
220
+ $stdout.reopen(io_out, APPEND_MODE)
221
+ $stderr.reopen($stdout)
222
+
223
+ else
224
+ $stdout.reopen(io_out, APPEND_MODE) if io_out
225
+ $stderr.reopen(io_err, APPEND_MODE) if io_err
226
+ end
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,70 @@
1
+ module Bluepill
2
+ class Trigger
3
+ @implementations = {}
4
+ def self.inherited(klass)
5
+ @implementations[klass.name.split('::').last.underscore.to_sym] = klass
6
+ end
7
+
8
+ def self.[](name)
9
+ @implementations[name]
10
+ end
11
+
12
+ attr_accessor :process, :logger, :mutex, :scheduled_events
13
+
14
+ def initialize(process, options = {})
15
+ self.process = process
16
+ self.logger = options[:logger]
17
+ self.mutex = Mutex.new
18
+ self.scheduled_events = []
19
+ end
20
+
21
+ def reset!
22
+ self.cancel_all_events
23
+ end
24
+
25
+ def notify(transition)
26
+ raise "Implement in subclass"
27
+ end
28
+
29
+ def dispatch!(event)
30
+ self.process.dispatch!(event, self.class.name.split("::").last)
31
+ end
32
+
33
+ def deep_copy
34
+ # TODO: This is a kludge. Ideally, process templates
35
+ # would be facotries, and not a template object.
36
+ mutex, @mutex = @mutex, nil
37
+ clone = Marshal.load(Marshal.dump(self))
38
+ clone.instance_variable_set("@mutex", Monitor.new)
39
+ @mutex = mutex
40
+ clone
41
+ end
42
+
43
+ def schedule_event(event, delay)
44
+ # TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
45
+ thread = Thread.new(self) do |trigger|
46
+ begin
47
+ sleep delay.to_f
48
+ trigger.logger.info("Retrying from flapping")
49
+ trigger.dispatch!(event)
50
+ trigger.mutex.synchronize do
51
+ trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
52
+ end
53
+ rescue StandardError => e
54
+ trigger.logger.err(e)
55
+ trigger.logger.err(e.backtrace.join("\n"))
56
+ end
57
+ end
58
+
59
+ self.scheduled_events.push([event, thread])
60
+ end
61
+
62
+ def cancel_all_events
63
+ self.logger.info "Canceling all scheduled events"
64
+ self.mutex.synchronize do
65
+ self.scheduled_events.each {|_, thread| thread.kill}
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,59 @@
1
+ module Bluepill
2
+ module Triggers
3
+ class Flapping < Bluepill::Trigger
4
+ TRIGGER_STATES = [:starting, :restarting]
5
+
6
+ PARAMS = [:times, :within, :retry_in]
7
+
8
+ attr_accessor *PARAMS
9
+ attr_reader :timeline
10
+
11
+ def initialize(process, options = {})
12
+ options.reverse_merge!(:times => 5, :within => 1, :retry_in => 5)
13
+
14
+ options.each_pair do |name, val|
15
+ instance_variable_set("@#{name}", val) if PARAMS.include?(name)
16
+ end
17
+
18
+ @timeline = Util::RotationalArray.new(@times)
19
+ super
20
+ end
21
+
22
+ def notify(transition)
23
+ if TRIGGER_STATES.include?(transition.to_name)
24
+ self.timeline << Time.now.to_i
25
+ self.check_flapping
26
+ end
27
+ end
28
+
29
+ def reset!
30
+ @timeline.clear
31
+ super
32
+ end
33
+
34
+ def check_flapping
35
+ num_occurances = (@timeline.nitems == self.times)
36
+
37
+ # The process has not flapped if we haven't encountered enough incidents
38
+ return unless num_occurances
39
+
40
+ # Check if the incident happend within the timeframe
41
+ duration = (@timeline.last - @timeline.first) <= self.within
42
+
43
+ if duration
44
+ self.logger.info "Flapping detected: retrying in #{self.retry_in} seconds"
45
+
46
+ self.schedule_event(:start, self.retry_in)
47
+
48
+ # this happens in the process' thread so we don't have to worry about concurrency issues with this event
49
+ self.dispatch!(:unmonitor)
50
+
51
+ @timeline.clear
52
+
53
+ # This will prevent a transition from happening in the process state_machine
54
+ throw :halt
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,72 @@
1
+ module Bluepill
2
+ module Util
3
+ class RotationalArray < Array
4
+ def initialize(size)
5
+ super(size)
6
+
7
+ @capacity = size
8
+ @counter = 0
9
+ end
10
+
11
+ def push(value)
12
+ idx = rotational_idx(@counter)
13
+ self[idx] = value
14
+
15
+ @counter += 1
16
+ self
17
+ end
18
+
19
+ alias_method :<<, :push
20
+
21
+ def pop
22
+ raise "Cannot call pop on a rotational array"
23
+ end
24
+
25
+ def shift
26
+ raise "Cannot call shift on a rotational array"
27
+ end
28
+
29
+ def unshift
30
+ raise "Cannot call unshift on a rotational array"
31
+ end
32
+
33
+ def last
34
+ return if @counter.zero?
35
+
36
+ self[rotational_idx(@counter - 1)]
37
+ end
38
+
39
+ def first
40
+ return if @counter.zero?
41
+ return self[0] if @counter <= @capacity
42
+
43
+ self[rotational_idx(@counter)]
44
+ end
45
+
46
+ def clear
47
+ @counter = 0
48
+ super
49
+ end
50
+
51
+ def each(&block)
52
+ times = @counter >= @capacity ? @capacity : @counter
53
+ start = @counter >= @capacity ? rotational_idx(@counter) : 0
54
+ times.times do |i|
55
+ block.call(self[rotational_idx(start + i)])
56
+ end
57
+ end
58
+
59
+ unless method_defined?(:nitems)
60
+ def nitems
61
+ compact.length
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def rotational_idx(idx)
68
+ idx % @capacity
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,3 @@
1
+ module Bluepill
2
+ VERSION = "0.0.44"
3
+ end
data/lib/bluepill.rb ADDED
@@ -0,0 +1,31 @@
1
+ require 'rubygems'
2
+
3
+ require 'thread'
4
+ require 'monitor'
5
+ require 'syslog'
6
+ require 'timeout'
7
+ require 'logger'
8
+
9
+ require 'active_support/inflector'
10
+ require 'active_support/core_ext/hash'
11
+ require 'active_support/core_ext/numeric'
12
+ require 'active_support/duration'
13
+
14
+ require 'bluepill/application'
15
+ require 'bluepill/controller'
16
+ require 'bluepill/socket'
17
+ require "bluepill/process"
18
+ require "bluepill/process_statistics"
19
+ require "bluepill/group"
20
+ require "bluepill/logger"
21
+ require "bluepill/condition_watch"
22
+ require 'bluepill/trigger'
23
+ require 'bluepill/triggers/flapping'
24
+ require "bluepill/dsl"
25
+ require "bluepill/system"
26
+
27
+ require "bluepill/process_conditions"
28
+
29
+ require "bluepill/util/rotational_array"
30
+
31
+ require "bluepill/version"
data/lib/example.rb ADDED
@@ -0,0 +1,85 @@
1
+ #! /usr/bin/env ruby
2
+ $LOAD_PATH << 'lib/'
3
+
4
+ require 'rubygems'
5
+ require 'bluepill'
6
+ require 'logger'
7
+
8
+ ROOT_DIR = "/tmp/bp"
9
+
10
+ # Watch with
11
+ # watch -n0.2 'ps axu | egrep "(CPU|forking|bluepill|sleep)" | grep -v grep | sort'
12
+ Bluepill.application(:sample_app, :foreground => true, :log_file => '/tmp/bp/sample_app.log') do |app|
13
+ 1.times do |i|
14
+ app.process("process_#{i}") do |process|
15
+ process.pid_file = "#{ROOT_DIR}/pids/process_#{i}.pid"
16
+
17
+ # I could not figure out a portable way to
18
+ # specify the path to the sample forking server across the diff developer laptops.
19
+ # Since this code is eval'ed we cannot reliably use __FILE__
20
+ process.start_command = "/Users/stefan/Documents/projects/bluepill/bin/sample_forking_server #{4242 + i}"
21
+ process.stop_command = "kill -INT {{PID}}"
22
+ process.daemonize = true
23
+
24
+ process.start_grace_time = 1.seconds
25
+ process.restart_grace_time = 7.seconds
26
+ process.stop_grace_time = 7.seconds
27
+
28
+ process.uid = "stefan"
29
+ process.gid = "staff"
30
+ process.group = "test"
31
+
32
+ process.checks :cpu_usage, :every => 5, :below => 0.5, :times => [5, 5]
33
+ process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 20.seconds
34
+
35
+ process.monitor_children do |child_process|
36
+ child_process.checks :cpu_usage,
37
+ :every => 10,
38
+ :below => 0.5,
39
+ :times => [5, 5]
40
+
41
+ child_process.checks :mem_usage,
42
+ :every => 3,
43
+ :below => 900.kilobytes,
44
+ :times => [3, 5],
45
+ :fires => :stop
46
+
47
+ child_process.stop_command = "kill -QUIT {{PID}}"
48
+ # child_process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
49
+ end
50
+ end
51
+ end
52
+
53
+ 0.times do |i|
54
+ app.process("group_process_#{i}") do |process|
55
+ process.group = "group_1"
56
+ process.pid_file = "/Users/rohith/ffs/tmp/pids/mongrel_#{i}.pid"
57
+ process.start_command = "cd ~/ffs && mongrel_rails start -P #{process.pid_file} -p 3000 -d"
58
+
59
+ process.start_grace_time = 10.seconds
60
+
61
+ process.uid = "rohith"
62
+ process.gid = "staff"
63
+
64
+ # process.checks :always_true, :every => 10
65
+ end
66
+ end
67
+
68
+ 0.times do |i|
69
+ app.process("group_process_#{i}") do |process|
70
+ process.uid = "rohith"
71
+ process.gid = "wheel"
72
+
73
+ process.stderr = "/tmp/err.log"
74
+ process.stdout = "/tmp/err.log"
75
+
76
+
77
+ process.group = "grouped"
78
+ process.start_command = %Q{cd /tmp && ruby -e '$stderr.puts("hello stderr");$stdout.puts("hello stdout"); $stdout.flush; $stderr.flush; sleep 10'}
79
+ process.daemonize = true
80
+ process.pid_file = "/tmp/noperm/p_#{process.group}_#{i}.pid"
81
+
82
+ # process.checks :always_true, :every => 5
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ require 'bluepill'
3
+ require 'logger'
4
+
5
+ # ATTENTION:
6
+ # You must declare only one application per config when foreground mode specified
7
+ #
8
+ # http://github.com/akzhan/runit-man used as example of monitored application.
9
+
10
+ Bluepill.application(:runit_man, :foreground => true) do |app|
11
+ app.process("runit-man") do |process|
12
+ process.pid_file = "/etc/service/runit-man/supervise/pid"
13
+
14
+ process.start_command = "/usr/bin/sv start runit-man"
15
+ process.stop_command = "/usr/bin/sv stop runit-man"
16
+ process.restart_command = "/usr/bin/sv restart runit-man"
17
+
18
+ process.start_grace_time = 1.seconds
19
+ process.restart_grace_time = 7.seconds
20
+ process.stop_grace_time = 7.seconds
21
+
22
+ process.checks :http, :within => 30.seconds, :retry_in => 7.seconds, :every => 30.seconds,
23
+ :url => 'http://localhost:4567/', :kind => :success, :pattern => /html/, :timeout => 3.seconds
24
+ end
25
+ end