ryansch-bluepill 0.0.53

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/.gitignore +8 -0
  2. data/DESIGN.md +10 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +22 -0
  5. data/README.md +306 -0
  6. data/Rakefile +9 -0
  7. data/bin/bluepill +111 -0
  8. data/bin/bpsv +3 -0
  9. data/bin/sample_forking_server +53 -0
  10. data/bluepill.gemspec +29 -0
  11. data/examples/example.rb +87 -0
  12. data/examples/new_example.rb +89 -0
  13. data/examples/new_runit_example.rb +29 -0
  14. data/examples/runit_example.rb +26 -0
  15. data/lib/bluepill.rb +36 -0
  16. data/lib/bluepill/application.rb +203 -0
  17. data/lib/bluepill/application/client.rb +8 -0
  18. data/lib/bluepill/application/server.rb +23 -0
  19. data/lib/bluepill/condition_watch.rb +50 -0
  20. data/lib/bluepill/controller.rb +121 -0
  21. data/lib/bluepill/dsl.rb +12 -0
  22. data/lib/bluepill/dsl/app_proxy.rb +25 -0
  23. data/lib/bluepill/dsl/process_factory.rb +122 -0
  24. data/lib/bluepill/dsl/process_proxy.rb +44 -0
  25. data/lib/bluepill/group.rb +72 -0
  26. data/lib/bluepill/logger.rb +63 -0
  27. data/lib/bluepill/process.rb +478 -0
  28. data/lib/bluepill/process_conditions.rb +14 -0
  29. data/lib/bluepill/process_conditions/always_true.rb +18 -0
  30. data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
  31. data/lib/bluepill/process_conditions/http.rb +58 -0
  32. data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
  33. data/lib/bluepill/process_conditions/process_condition.rb +22 -0
  34. data/lib/bluepill/process_statistics.rb +23 -0
  35. data/lib/bluepill/socket.rb +47 -0
  36. data/lib/bluepill/system.rb +235 -0
  37. data/lib/bluepill/trigger.rb +60 -0
  38. data/lib/bluepill/triggers/flapping.rb +56 -0
  39. data/lib/bluepill/util/rotational_array.rb +20 -0
  40. data/lib/bluepill/version.rb +4 -0
  41. metadata +192 -0
data/bin/bpsv ADDED
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+ target=`basename $0`
3
+ exec bluepill $1 $target
@@ -0,0 +1,53 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ # This is a modified version found at http://tomayko.com/writings/unicorn-is-unix
4
+ # It is modified to trigger various states like increase memory consumption so that
5
+ # I could write watches for them.
6
+
7
+ # Instructions for running the test
8
+ #
9
+ # (1) Edit the example config and fix the path to this file. Around line 16.
10
+ # (2) Load up the config and run the bluepill daemon
11
+ # (3) Run watch -n0.2 'sudo ruby bin/bluepill status 2>/dev/null; echo; ps ajxu | egrep "(CPU|forking|bluepill|sleep|ruby)" | grep -v grep | sort'
12
+ # (4) After verifying that the "sleep" workers are properly being restarted, telnet to localhost 4242 and say something. You should get it echoed back and the worker which answered your request should now be over the allowed memory limit
13
+ # (5) Observe the worker being killed in the watch you started in step 3.
14
+
15
+ require 'socket'
16
+
17
+ port = ARGV[0].to_i
18
+ port = 4242 if port == 0
19
+
20
+ acceptor = Socket.new(Socket::AF_INET, Socket::SOCK_STREAM, 0)
21
+ address = Socket.pack_sockaddr_in(port, '127.0.0.1')
22
+ acceptor.bind(address)
23
+ acceptor.listen(10)
24
+
25
+ children = []
26
+ trap('EXIT') { acceptor.close; children.each {|c| Process.kill('QUIT', c)} }
27
+
28
+
29
+ 3.times do
30
+ children << fork do
31
+ trap('QUIT') {$0 = "forking_server| QUIT received shutting down gracefully..."; sleep 5; exit}
32
+ trap('INT') {$0 = "forking_server| INT received shutting down UN-gracefully..."; sleep 3; exit}
33
+
34
+ puts "child #$$ accepting on shared socket (localhost:#{port})"
35
+ loop {
36
+ socket, addr = acceptor.accept
37
+ socket.write "child #$$ echo> "
38
+ socket.flush
39
+ message = socket.gets
40
+ socket.write message
41
+ socket.close
42
+ puts "child #$$ echo'd: '#{message.strip}'"
43
+
44
+ # cause a spike in mem usage
45
+ temp = "*" * (100 * 1024)
46
+ }
47
+ exit
48
+ end
49
+ end
50
+
51
+ trap('INT') { puts "\nbailing" ; exit }
52
+
53
+ Process.waitall
data/bluepill.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ $LOAD_PATH.push File.expand_path("../lib", __FILE__)
4
+
5
+ require "bluepill/version"
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "ryansch-bluepill"
9
+ s.version = Bluepill::VERSION.dup
10
+ s.platform = Gem::Platform::RUBY
11
+ s.authors = ["Arya Asemanfar", "Gary Tsang", "Rohith Ravi", "Ryan Schlesinger"]
12
+ s.email = ["ryan@instanceinc.com"]
13
+ s.homepage = "http://github.com/ryansch/bluepill"
14
+ s.summary = %q{A process monitor written in Ruby with stability and minimalism in mind.}
15
+ s.description = %q{Bluepill keeps your daemons up while taking up as little resources as possible. After all you probably want the resources of your server to be used by whatever daemons you are running rather than the thing that's supposed to make sure they are brought back up, should they die or misbehave.}
16
+
17
+ s.add_dependency 'daemons', '~> 1.1.0'
18
+ s.add_dependency 'state_machine', '~> 0.9.4'
19
+ s.add_dependency 'activesupport', '>= 3.0.0'
20
+ s.add_dependency 'i18n', '>= 0.5.0'
21
+
22
+ s.add_development_dependency 'bundler', '>= 1.0.10'
23
+
24
+ s.files = `git ls-files`.split("\n")
25
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
26
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ s.extra_rdoc_files = ["LICENSE", "README.md"]
29
+ end
@@ -0,0 +1,87 @@
1
+ require 'rubygems'
2
+ require 'bluepill'
3
+ require 'logger'
4
+
5
+ ROOT_DIR = "/tmp/bp"
6
+
7
+ # Watch with
8
+ # watch -n0.2 'ps axu | egrep "(CPU|forking|bluepill|sleep)" | grep -v grep | sort'
9
+ Bluepill.application(:sample_app) do |app|
10
+ 0.times do |i|
11
+ app.process("process_#{i}") do |process|
12
+ process.pid_file = "#{ROOT_DIR}/pids/process_#{i}.pid"
13
+
14
+ # Example of use of pid_command option to find memcached process
15
+ # process.pid_command = "ps -ef | awk '/memcached$/{ print $2 }'"
16
+
17
+ # I could not figure out a portable way to
18
+ # specify the path to the sample forking server across the diff developer laptops.
19
+ # Since this code is eval'ed we cannot reliably use __FILE__
20
+ process.start_command = "/Users/rohith/work/bluepill/bin/sample_forking_server #{4242 + i}"
21
+ process.stop_command = "kill -INT {{PID}}"
22
+ process.daemonize = true
23
+
24
+ process.start_grace_time = 1.seconds
25
+ process.restart_grace_time = 7.seconds
26
+ process.stop_grace_time = 7.seconds
27
+
28
+ process.uid = "rohith"
29
+ process.gid = "staff"
30
+
31
+ # process.checks :cpu_usage, :every => 10, :below => 0.5, :times => [5, 5]
32
+ process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
33
+
34
+ process.monitor_children do |child_process|
35
+ # child_process.checks :cpu_usage,
36
+ # :every => 10,
37
+ # :below => 0.5,
38
+ # :times => [5, 5]
39
+
40
+ # child_process.checks :mem_usage,
41
+ # :every => 3,
42
+ # :below => 600.kilobytes,
43
+ # :times => [3, 5],
44
+ # :fires => [:stop]
45
+
46
+ child_process.stop_command = "kill -QUIT {{PID}}"
47
+ # child_process.checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
48
+ end
49
+ end
50
+ end
51
+
52
+ 0.times do |i|
53
+ app.process("group_process_#{i}") do |process|
54
+ process.group = "group_1"
55
+ process.pid_file = "/Users/rohith/ffs/tmp/pids/mongrel_#{i}.pid"
56
+ process.start_command = "cd ~/ffs && mongrel_rails start -P #{process.pid_file} -p 3000 -d"
57
+
58
+ process.start_grace_time = 10.seconds
59
+
60
+ process.uid = "rohith"
61
+ process.gid = "staff"
62
+
63
+ # process.checks :always_true, :every => 10
64
+ end
65
+ end
66
+
67
+ 1.times do |i|
68
+ app.process("group_process_#{i}") do |process|
69
+ process.auto_start = false
70
+
71
+ process.uid = "rohith"
72
+ process.gid = "wheel"
73
+
74
+ process.stderr = "/tmp/err.log"
75
+ process.stdout = "/tmp/err.log"
76
+
77
+
78
+ process.group = "grouped"
79
+ process.start_command = %Q{cd /tmp && ruby -e '$stderr.puts("hello stderr");$stdout.puts("hello stdout"); $stdout.flush; $stderr.flush; sleep 10'}
80
+ process.daemonize = true
81
+ process.pid_file = "/tmp/noperm/p_#{process.group}_#{i}.pid"
82
+
83
+ # process.checks :always_true, :every => 5
84
+ end
85
+ end
86
+ end
87
+
@@ -0,0 +1,89 @@
1
+ require 'rubygems'
2
+ require 'bluepill'
3
+ require 'logger'
4
+
5
+ # Note that this syntax supported from bluepill 0.0.50
6
+
7
+ ROOT_DIR = "/tmp/bp"
8
+
9
+ # Watch with
10
+ # watch -n0.2 'ps axu | egrep "(CPU|forking|bluepill|sleep)" | grep -v grep | sort'
11
+ Bluepill.application(:sample_app) do
12
+ 0.times do |i|
13
+ process("process_#{i}") do
14
+ pid_file "#{ROOT_DIR}/pids/process_#{i}.pid"
15
+
16
+ # Example of use of pid_command option to find memcached process
17
+ # pid_command = "ps -ef | awk '/memcached$/{ print $2 }'"
18
+
19
+ # I could not figure out a portable way to
20
+ # specify the path to the sample forking server across the diff developer laptops.
21
+ # Since this code is eval'ed we cannot reliably use __FILE__
22
+ start_command "/Users/rohith/work/bluepill/bin/sample_forking_server #{4242 + i}"
23
+ stop_command "kill -INT {{PID}}"
24
+ daemonize!
25
+
26
+ start_grace_time 1.seconds
27
+ restart_grace_time 7.seconds
28
+ stop_grace_time 7.seconds
29
+
30
+ uid "rohith"
31
+ gid "staff"
32
+
33
+ # checks :cpu_usage, :every => 10, :below => 0.5, :times => [5, 5]
34
+ checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
35
+
36
+ monitor_children do
37
+ # checks :cpu_usage,
38
+ # :every => 10,
39
+ # :below => 0.5,
40
+ # :times => [5, 5]
41
+
42
+ # checks :mem_usage,
43
+ # :every => 3,
44
+ # :below => 600.kilobytes,
45
+ # :times => [3, 5],
46
+ # :fires => [:stop]
47
+
48
+ stop_command "kill -QUIT {{PID}}"
49
+ # checks :flapping, :times => 2, :within => 30.seconds, :retry_in => 7.seconds
50
+ end
51
+ end
52
+ end
53
+
54
+ 0.times do |i|
55
+ process("group_process_#{i}") do
56
+ group "group_1"
57
+ pid_file "/Users/rohith/ffs/tmp/pids/mongrel_#{i}.pid"
58
+ start_command "cd ~/ffs && mongrel_rails start -P #{pid_file} -p 3000 -d"
59
+
60
+ start_grace_time 10.seconds
61
+
62
+ uid "rohith"
63
+ gid "staff"
64
+
65
+ # checks :always_true, :every => 10
66
+ end
67
+ end
68
+
69
+ 1.times do |i|
70
+ process("group_process_#{i}") do
71
+ auto_start false
72
+
73
+ uid "rohith"
74
+ gid "wheel"
75
+
76
+ stderr "/tmp/err.log"
77
+ stdout "/tmp/err.log"
78
+
79
+
80
+ group "grouped"
81
+ start_command %Q{cd /tmp && ruby -e '$stderr.puts("hello stderr");$stdout.puts("hello stdout"); $stdout.flush; $stderr.flush; sleep 10'}
82
+ daemonize!
83
+ pid_file "/tmp/noperm/p_#{group}_#{i}.pid"
84
+
85
+ # checks :always_true, :every => 5
86
+ end
87
+ end
88
+ end
89
+
@@ -0,0 +1,29 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bluepill'
4
+ require 'logger'
5
+
6
+ # ATTENTION:
7
+ # You must declare only one application per config when foreground mode specified
8
+ #
9
+ # http://github.com/Undev/runit-man used as example of monitored application.
10
+
11
+ # Note that this syntax supported from bluepill 0.0.50
12
+
13
+ Bluepill.application(:runit_man, :foreground => true) do
14
+ process("runit-man") do
15
+ pid_file "/etc/service/runit-man/supervise/pid"
16
+
17
+ start_command "/usr/bin/sv start runit-man"
18
+ stop_command "/usr/bin/sv stop runit-man"
19
+ restart_command "/usr/bin/sv restart runit-man"
20
+
21
+ start_grace_time 1.seconds
22
+ restart_grace_time 7.seconds
23
+ stop_grace_time 7.seconds
24
+
25
+ checks :http, :within => 30.seconds, :retry_in => 7.seconds, :every => 30.seconds,
26
+ :url => 'http://localhost:4567/', :kind => :success, :pattern => /html/, :timeout => 3.seconds
27
+ end
28
+ end
29
+
@@ -0,0 +1,26 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bluepill'
4
+ require 'logger'
5
+
6
+ # ATTENTION:
7
+ # You must declare only one application per config when foreground mode specified
8
+ #
9
+ # http://github.com/Undev/runit-man used as example of monitored application.
10
+
11
+ Bluepill.application(:runit_man, :foreground => true) do |app|
12
+ app.process("runit-man") do |process|
13
+ process.pid_file = "/etc/service/runit-man/supervise/pid"
14
+
15
+ process.start_command = "/usr/bin/sv start runit-man"
16
+ process.stop_command = "/usr/bin/sv stop runit-man"
17
+ process.restart_command = "/usr/bin/sv restart runit-man"
18
+
19
+ process.start_grace_time = 1.seconds
20
+ process.restart_grace_time = 7.seconds
21
+ process.stop_grace_time = 7.seconds
22
+
23
+ process.checks :http, :within => 30.seconds, :retry_in => 7.seconds, :every => 30.seconds,
24
+ :url => 'http://localhost:4567/', :kind => :success, :pattern => /html/, :timeout => 3.seconds
25
+ end
26
+ end
data/lib/bluepill.rb ADDED
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'rubygems'
3
+
4
+ require 'thread'
5
+ require 'monitor'
6
+ require 'syslog'
7
+ require 'timeout'
8
+ require 'logger'
9
+
10
+ require 'active_support/inflector'
11
+ require 'active_support/core_ext/hash'
12
+ require 'active_support/core_ext/numeric'
13
+ require 'active_support/duration'
14
+
15
+ require 'bluepill/dsl/process_proxy'
16
+ require 'bluepill/dsl/process_factory'
17
+ require 'bluepill/dsl/app_proxy'
18
+
19
+ require 'bluepill/application'
20
+ require 'bluepill/controller'
21
+ require 'bluepill/socket'
22
+ require "bluepill/process"
23
+ require "bluepill/process_statistics"
24
+ require "bluepill/group"
25
+ require "bluepill/logger"
26
+ require "bluepill/condition_watch"
27
+ require 'bluepill/trigger'
28
+ require 'bluepill/triggers/flapping'
29
+ require "bluepill/dsl"
30
+ require "bluepill/system"
31
+
32
+ require "bluepill/process_conditions"
33
+
34
+ require "bluepill/util/rotational_array"
35
+
36
+ require "bluepill/version"
@@ -0,0 +1,203 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'thread'
3
+
4
+ module Bluepill
5
+ class Application
6
+ PROCESS_COMMANDS = [:start, :stop, :restart, :unmonitor, :status]
7
+
8
+ attr_accessor :name, :logger, :base_dir, :socket, :pid_file
9
+ attr_accessor :groups, :work_queue
10
+ attr_accessor :pids_dir, :log_file
11
+
12
+ def initialize(name, options = {})
13
+ self.name = name
14
+
15
+ @foreground = options[:foreground]
16
+ self.log_file = options[:log_file]
17
+ self.base_dir = options[:base_dir] || '/var/bluepill'
18
+ self.pid_file = File.join(self.base_dir, 'pids', self.name + ".pid")
19
+ self.pids_dir = File.join(self.base_dir, 'pids', self.name)
20
+
21
+ self.groups = {}
22
+
23
+ self.logger = Bluepill::Logger.new(:log_file => self.log_file, :stdout => foreground?).prefix_with(self.name)
24
+
25
+ self.setup_signal_traps
26
+ self.setup_pids_dir
27
+
28
+ @mutex = Mutex.new
29
+ end
30
+
31
+ def foreground?
32
+ !!@foreground
33
+ end
34
+
35
+ def mutex(&b)
36
+ @mutex.synchronize(&b)
37
+ end
38
+
39
+ def load
40
+ begin
41
+ self.start_server
42
+ rescue StandardError => e
43
+ $stderr.puts "Failed to start bluepill:"
44
+ $stderr.puts "%s `%s`" % [e.class.name, e.message]
45
+ $stderr.puts e.backtrace
46
+ exit(5)
47
+ end
48
+ end
49
+
50
+ PROCESS_COMMANDS.each do |command|
51
+ class_eval <<-END
52
+ def #{command}(group_name = nil, process_name = nil)
53
+ self.send_to_process_or_group(:#{command}, group_name, process_name)
54
+ end
55
+ END
56
+ end
57
+
58
+ def add_process(process, group_name = nil)
59
+ group_name = group_name.to_s if group_name
60
+
61
+ self.groups[group_name] ||= Group.new(group_name, :logger => self.logger.prefix_with(group_name))
62
+ self.groups[group_name].add_process(process)
63
+ end
64
+
65
+ def version
66
+ Bluepill::VERSION
67
+ end
68
+
69
+ protected
70
+ def send_to_process_or_group(method, group_name, process_name)
71
+ if group_name.nil? && process_name.nil?
72
+ self.groups.values.collect do |group|
73
+ group.send(method)
74
+ end.flatten
75
+ elsif self.groups.key?(group_name)
76
+ self.groups[group_name].send(method, process_name)
77
+ elsif process_name.nil?
78
+ # they must be targeting just by process name
79
+ process_name = group_name
80
+ self.groups.values.collect do |group|
81
+ group.send(method, process_name)
82
+ end.flatten
83
+ else
84
+ []
85
+ end
86
+ end
87
+
88
+ def start_listener
89
+ @listener_thread.kill if @listener_thread
90
+ @listener_thread = Thread.new do
91
+ loop do
92
+ begin
93
+ client = self.socket.accept
94
+ command, *args = client.readline.strip.split(":")
95
+ response = begin
96
+ mutex { self.send(command, *args) }
97
+ rescue Exception => e
98
+ e
99
+ end
100
+ client.write(Marshal.dump(response))
101
+ rescue StandardError => e
102
+ logger.err("Got exception in cmd listener: %s `%s`" % [e.class.name, e.message])
103
+ e.backtrace.each {|l| logger.err(l)}
104
+ ensure
105
+ begin
106
+ client.close
107
+ rescue IOError
108
+ # closed stream
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ def start_server
116
+ self.kill_previous_bluepill
117
+
118
+ Daemonize.daemonize unless foreground?
119
+
120
+ self.logger.reopen
121
+
122
+ $0 = "bluepilld: #{self.name}"
123
+
124
+ self.groups.each {|_, group| group.determine_initial_state }
125
+
126
+
127
+ self.write_pid_file
128
+ self.socket = Bluepill::Socket.server(self.base_dir, self.name)
129
+ self.start_listener
130
+
131
+ self.run
132
+ end
133
+
134
+ def run
135
+ @running = true # set to false by signal trap
136
+ while @running
137
+ mutex do
138
+ System.reset_data
139
+ self.groups.each { |_, group| group.tick }
140
+ end
141
+ sleep 1
142
+ end
143
+ cleanup
144
+ end
145
+
146
+ def cleanup
147
+ File.unlink(self.socket.path) if self.socket
148
+ File.unlink(self.pid_file) if File.exists?(self.pid_file)
149
+ end
150
+
151
+ def setup_signal_traps
152
+ terminator = Proc.new do
153
+ puts "Terminating..."
154
+ @running = false
155
+ end
156
+
157
+ Signal.trap("TERM", &terminator)
158
+ Signal.trap("INT", &terminator)
159
+
160
+ Signal.trap("HUP") do
161
+ self.logger.reopen if self.logger
162
+ end
163
+ end
164
+
165
+ def setup_pids_dir
166
+ FileUtils.mkdir_p(self.pids_dir) unless File.exists?(self.pids_dir)
167
+ # we need everybody to be able to write to the pids_dir as processes managed by
168
+ # bluepill will be writing to this dir after they've dropped privileges
169
+ FileUtils.chmod(0777, self.pids_dir)
170
+ end
171
+
172
+ def kill_previous_bluepill
173
+ if File.exists?(self.pid_file)
174
+ previous_pid = File.read(self.pid_file).to_i
175
+ if System.pid_alive?(previous_pid)
176
+ begin
177
+ ::Process.kill(0, previous_pid)
178
+ puts "Killing previous bluepilld[#{previous_pid}]"
179
+ ::Process.kill(2, previous_pid)
180
+ rescue Exception => e
181
+ $stderr.puts "Encountered error trying to kill previous bluepill:"
182
+ $stderr.puts "#{e.class}: #{e.message}"
183
+ exit(4) unless e.is_a?(Errno::ESRCH)
184
+ else
185
+ 10.times do |i|
186
+ sleep 0.5
187
+ break unless System.pid_alive?(previous_pid)
188
+ end
189
+
190
+ if System.pid_alive?(previous_pid)
191
+ $stderr.puts "Previous bluepilld[#{previous_pid}] didn't die"
192
+ exit(4)
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end
198
+
199
+ def write_pid_file
200
+ File.open(self.pid_file, 'w') { |x| x.write(::Process.pid) }
201
+ end
202
+ end
203
+ end