cloud66-bluepill 0.0.62
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/DESIGN.md +10 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +349 -0
- data/Rakefile +38 -0
- data/bin/bluepill +124 -0
- data/bin/bpsv +3 -0
- data/bin/sample_forking_server +53 -0
- data/bluepill.gemspec +37 -0
- data/examples/example.rb +87 -0
- data/examples/new_example.rb +89 -0
- data/examples/new_runit_example.rb +29 -0
- data/examples/runit_example.rb +26 -0
- data/lib/bluepill.rb +38 -0
- data/lib/bluepill/application.rb +215 -0
- data/lib/bluepill/application/client.rb +8 -0
- data/lib/bluepill/application/server.rb +23 -0
- data/lib/bluepill/condition_watch.rb +51 -0
- data/lib/bluepill/controller.rb +122 -0
- data/lib/bluepill/dsl.rb +12 -0
- data/lib/bluepill/dsl/app_proxy.rb +25 -0
- data/lib/bluepill/dsl/process_factory.rb +122 -0
- data/lib/bluepill/dsl/process_proxy.rb +44 -0
- data/lib/bluepill/group.rb +72 -0
- data/lib/bluepill/logger.rb +63 -0
- data/lib/bluepill/process.rb +514 -0
- data/lib/bluepill/process_conditions.rb +14 -0
- data/lib/bluepill/process_conditions/always_true.rb +18 -0
- data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
- data/lib/bluepill/process_conditions/file_time.rb +26 -0
- data/lib/bluepill/process_conditions/http.rb +58 -0
- data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
- data/lib/bluepill/process_conditions/process_condition.rb +22 -0
- data/lib/bluepill/process_journal.rb +219 -0
- data/lib/bluepill/process_statistics.rb +27 -0
- data/lib/bluepill/socket.rb +58 -0
- data/lib/bluepill/system.rb +265 -0
- data/lib/bluepill/trigger.rb +60 -0
- data/lib/bluepill/triggers/flapping.rb +56 -0
- data/lib/bluepill/util/rotational_array.rb +20 -0
- data/lib/bluepill/version.rb +4 -0
- data/local-bluepill +129 -0
- data/spec/lib/bluepill/logger_spec.rb +3 -0
- data/spec/lib/bluepill/process_spec.rb +96 -0
- data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
- data/spec/lib/bluepill/system_spec.rb +36 -0
- data/spec/spec_helper.rb +15 -0
- metadata +302 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
def self.[](name)
|
5
|
+
const_get(name.to_s.camelcase)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
require "bluepill/process_conditions/process_condition"
|
11
|
+
Dir["#{File.dirname(__FILE__)}/process_conditions/*.rb"].each do |pc|
|
12
|
+
require pc
|
13
|
+
end
|
14
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class AlwaysTrue < ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@below = options[:below]
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(pid, include_children)
|
10
|
+
1
|
11
|
+
end
|
12
|
+
|
13
|
+
def check(value)
|
14
|
+
true
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class CpuUsage < ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@below = options[:below]
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(pid, include_children)
|
10
|
+
# third col in the ps axu output
|
11
|
+
System.cpu_usage(pid, include_children).to_f
|
12
|
+
end
|
13
|
+
|
14
|
+
def check(value)
|
15
|
+
value < @below
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class FileTime < ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@below = options[:below]
|
7
|
+
@filename = options[:filename]
|
8
|
+
end
|
9
|
+
|
10
|
+
def run(pid, include_children)
|
11
|
+
if File.exists?(@filename)
|
12
|
+
Time.now()-File::mtime(@filename)
|
13
|
+
else
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
rescue
|
17
|
+
$!
|
18
|
+
end
|
19
|
+
|
20
|
+
def check(value)
|
21
|
+
return false if value.nil?
|
22
|
+
return value < @below
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module Bluepill
|
6
|
+
module ProcessConditions
|
7
|
+
class Http < ProcessCondition
|
8
|
+
def initialize(options = {})
|
9
|
+
@uri = URI.parse(options[:url])
|
10
|
+
@kind = case options[:kind]
|
11
|
+
when Fixnum then Net::HTTPResponse::CODE_TO_OBJ[options[:kind].to_s]
|
12
|
+
when String, Symbol then Net.const_get("HTTP#{options[:kind].to_s.camelize}")
|
13
|
+
else
|
14
|
+
Net::HTTPSuccess
|
15
|
+
end
|
16
|
+
@pattern = options[:pattern] || nil
|
17
|
+
@open_timeout = (options[:open_timeout] || options[:timeout] || 5).to_i
|
18
|
+
@read_timeout = (options[:read_timeout] || options[:timeout] || 5).to_i
|
19
|
+
end
|
20
|
+
|
21
|
+
def run(pid, include_children)
|
22
|
+
session = Net::HTTP.new(@uri.host, @uri.port)
|
23
|
+
if @uri.scheme == 'https'
|
24
|
+
require 'net/https'
|
25
|
+
session.use_ssl=true
|
26
|
+
session.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
27
|
+
end
|
28
|
+
session.open_timeout = @open_timeout
|
29
|
+
session.read_timeout = @read_timeout
|
30
|
+
hide_net_http_bug do
|
31
|
+
session.start do |http|
|
32
|
+
http.get(@uri.request_uri)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
rescue
|
36
|
+
$!
|
37
|
+
end
|
38
|
+
|
39
|
+
def check(value)
|
40
|
+
return false unless value.kind_of?(@kind)
|
41
|
+
return true unless @pattern
|
42
|
+
return false unless value.class.body_permitted?
|
43
|
+
@pattern === value.body
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def hide_net_http_bug
|
48
|
+
yield
|
49
|
+
rescue NoMethodError => e
|
50
|
+
if e.to_s =~ /#{Regexp.escape(%q|undefined method `closed?' for nil:NilClass|)}/
|
51
|
+
raise Errno::ECONNREFUSED, "Connection refused attempting to contact #{@uri.scheme}://#{@uri.host}:#{@uri.port}"
|
52
|
+
else
|
53
|
+
raise
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class MemUsage < ProcessCondition
|
5
|
+
MB = 1024 ** 2
|
6
|
+
FORMAT_STR = "%d%s"
|
7
|
+
MB_LABEL = "MB"
|
8
|
+
KB_LABEL = "KB"
|
9
|
+
|
10
|
+
def initialize(options = {})
|
11
|
+
@below = options[:below]
|
12
|
+
end
|
13
|
+
|
14
|
+
def run(pid, include_children)
|
15
|
+
# rss is on the 5th col
|
16
|
+
System.memory_usage(pid, include_children).to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def check(value)
|
20
|
+
value.kilobytes < @below
|
21
|
+
end
|
22
|
+
|
23
|
+
def format_value(value)
|
24
|
+
if value.kilobytes >= MB
|
25
|
+
FORMAT_STR % [(value / 1024).round, MB_LABEL]
|
26
|
+
else
|
27
|
+
FORMAT_STR % [value, KB_LABEL]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(pid, include_children)
|
10
|
+
raise "Implement in subclass!"
|
11
|
+
end
|
12
|
+
|
13
|
+
def check(value)
|
14
|
+
raise "Implement in subclass!"
|
15
|
+
end
|
16
|
+
|
17
|
+
def format_value(value)
|
18
|
+
value
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
require 'bluepill/system'
|
2
|
+
|
3
|
+
module Bluepill
|
4
|
+
module ProcessJournal
|
5
|
+
extend self
|
6
|
+
|
7
|
+
class << self
|
8
|
+
attr_reader :logger
|
9
|
+
attr_reader :journal_base_dir
|
10
|
+
|
11
|
+
def logger=(new_logger)
|
12
|
+
@logger ||= new_logger
|
13
|
+
end
|
14
|
+
|
15
|
+
def base_dir=(base_dir)
|
16
|
+
@journal_base_dir ||= File.join(base_dir, "journals")
|
17
|
+
FileUtils.mkdir_p(@journal_base_dir) unless File.exists?(@journal_base_dir)
|
18
|
+
FileUtils.chmod(0777, @journal_base_dir)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def skip_pid?(pid)
|
23
|
+
!pid.is_a?(Integer) || pid <= 1
|
24
|
+
end
|
25
|
+
|
26
|
+
def skip_pgid?(pgid)
|
27
|
+
!pgid.is_a?(Integer) || pgid <= 1
|
28
|
+
end
|
29
|
+
|
30
|
+
# atomic operation on POSIX filesystems, since
|
31
|
+
# f.flock(File::LOCK_SH) is not available on all platforms
|
32
|
+
def acquire_atomic_fs_lock(name)
|
33
|
+
times = 0
|
34
|
+
name += '.lock'
|
35
|
+
Dir.mkdir name, 0700
|
36
|
+
logger.debug("Acquired lock #{name}")
|
37
|
+
yield
|
38
|
+
rescue Errno::EEXIST
|
39
|
+
times += 1
|
40
|
+
logger.debug("Waiting for lock #{name}")
|
41
|
+
sleep 1
|
42
|
+
unless times >= 10
|
43
|
+
retry
|
44
|
+
else
|
45
|
+
logger.info("Timeout waiting for lock #{name}")
|
46
|
+
raise "Timeout waiting for lock #{name}"
|
47
|
+
end
|
48
|
+
ensure
|
49
|
+
clear_atomic_fs_lock(name)
|
50
|
+
end
|
51
|
+
|
52
|
+
def clear_all_atomic_fs_locks(application_name = nil)
|
53
|
+
if application_name.nil?
|
54
|
+
files = Dir['.*.lock']
|
55
|
+
else
|
56
|
+
files = Dir[".*.#{application_name}.lock"]
|
57
|
+
end
|
58
|
+
files.each do |f|
|
59
|
+
System.delete_if_exists(f) if File.directory?(f)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def pid_journal_filename(journal_name)
|
64
|
+
File.join(@journal_base_dir, ".bluepill_pids_journal.#{journal_name}")
|
65
|
+
end
|
66
|
+
|
67
|
+
def pgid_journal_filename(journal_name)
|
68
|
+
File.join(@journal_base_dir, ".bluepill_pgids_journal.#{journal_name}")
|
69
|
+
end
|
70
|
+
|
71
|
+
def pid_journal(filename)
|
72
|
+
logger.debug("pid journal file: #{filename}")
|
73
|
+
result = File.open(filename, 'r').readlines.map(&:to_i).reject {|pid| skip_pid?(pid)}
|
74
|
+
logger.debug("pid journal = #{result.join(' ')}")
|
75
|
+
result
|
76
|
+
rescue Errno::ENOENT
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
|
80
|
+
def pgid_journal(filename)
|
81
|
+
logger.debug("pgid journal file: #{filename}")
|
82
|
+
result = File.open(filename, 'r').readlines.map(&:to_i).reject {|pgid| skip_pgid?(pgid)}
|
83
|
+
logger.debug("pgid journal = #{result.join(' ')}")
|
84
|
+
result
|
85
|
+
rescue Errno::ENOENT
|
86
|
+
[]
|
87
|
+
end
|
88
|
+
|
89
|
+
def clear_atomic_fs_lock(name)
|
90
|
+
if File.directory?(name)
|
91
|
+
Dir.rmdir(name)
|
92
|
+
logger.debug("Cleared lock #{name}")
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def kill_all_from_all_journals
|
97
|
+
Dir[".bluepill_pids_journal.*"].map { |x|
|
98
|
+
x.sub(/^\.bluepill_pids_journal\./,"")
|
99
|
+
}.reject { |y|
|
100
|
+
y =~ /\.lock$/
|
101
|
+
}.each do |journal_name|
|
102
|
+
kill_all_from_journal(journal_name)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def kill_all_from_journal(journal_name)
|
107
|
+
kill_all_pids_from_journal(journal_name)
|
108
|
+
kill_all_pgids_from_journal(journal_name)
|
109
|
+
end
|
110
|
+
|
111
|
+
def kill_all_pgids_from_journal(journal_name)
|
112
|
+
filename = pgid_journal_filename(journal_name)
|
113
|
+
j = pgid_journal(filename)
|
114
|
+
if j.length > 0
|
115
|
+
acquire_atomic_fs_lock(filename) do
|
116
|
+
j.each do |pgid|
|
117
|
+
begin
|
118
|
+
::Process.kill('TERM', -pgid)
|
119
|
+
logger.info("Termed old process group #{pgid}")
|
120
|
+
rescue Errno::ESRCH
|
121
|
+
logger.debug("Unable to term missing process group #{pgid}")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
if j.select { |pgid| System.pid_alive?(pgid) }.length > 1
|
126
|
+
sleep(1)
|
127
|
+
j.each do |pgid|
|
128
|
+
begin
|
129
|
+
::Process.kill('KILL', -pgid)
|
130
|
+
logger.info("Killed old process group #{pgid}")
|
131
|
+
rescue Errno::ESRCH
|
132
|
+
logger.debug("Unable to kill missing process group #{pgid}")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
System.delete_if_exists(filename) # reset journal
|
137
|
+
logger.debug('Journal cleanup completed')
|
138
|
+
end
|
139
|
+
else
|
140
|
+
logger.debug('No previous process journal - Skipping cleanup')
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def kill_all_pids_from_journal(journal_name)
|
145
|
+
filename = pid_journal_filename(journal_name)
|
146
|
+
j = pid_journal(filename)
|
147
|
+
if j.length > 0
|
148
|
+
acquire_atomic_fs_lock(filename) do
|
149
|
+
j.each do |pid|
|
150
|
+
begin
|
151
|
+
::Process.kill('TERM', pid)
|
152
|
+
logger.info("Termed old process #{pid}")
|
153
|
+
rescue Errno::ESRCH
|
154
|
+
logger.debug("Unable to term missing process #{pid}")
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
if j.select { |pid| System.pid_alive?(pid) }.length > 1
|
159
|
+
sleep(1)
|
160
|
+
j.each do |pid|
|
161
|
+
begin
|
162
|
+
::Process.kill('KILL', pid)
|
163
|
+
logger.info("Killed old process #{pid}")
|
164
|
+
rescue Errno::ESRCH
|
165
|
+
logger.debug("Unable to kill missing process #{pid}")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
System.delete_if_exists(filename) # reset journal
|
170
|
+
logger.debug('Journal cleanup completed')
|
171
|
+
end
|
172
|
+
else
|
173
|
+
logger.debug('No previous process journal - Skipping cleanup')
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def append_pgid_to_journal(journal_name, pgid)
|
178
|
+
if skip_pgid?(pgid)
|
179
|
+
logger.debug("Skipping invalid pgid #{pgid} (class #{pgid.class})")
|
180
|
+
return
|
181
|
+
end
|
182
|
+
|
183
|
+
filename = pgid_journal_filename(journal_name)
|
184
|
+
acquire_atomic_fs_lock(filename) do
|
185
|
+
unless pgid_journal(filename).include?(pgid)
|
186
|
+
logger.debug("Saving pgid #{pgid} to process journal #{journal_name}")
|
187
|
+
File.open(filename, 'a+', 0600) { |f| f.puts(pgid) }
|
188
|
+
logger.info("Saved pgid #{pgid} to journal #{journal_name}")
|
189
|
+
logger.debug("Journal now = #{File.open(filename, 'r').read}")
|
190
|
+
else
|
191
|
+
logger.debug("Skipping duplicate pgid #{pgid} already in journal #{journal_name}")
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def append_pid_to_journal(journal_name, pid)
|
197
|
+
begin
|
198
|
+
append_pgid_to_journal(journal_name, ::Process.getpgid(pid))
|
199
|
+
rescue Errno::ESRCH
|
200
|
+
end
|
201
|
+
if skip_pid?(pid)
|
202
|
+
logger.debug("Skipping invalid pid #{pid} (class #{pid.class})")
|
203
|
+
return
|
204
|
+
end
|
205
|
+
|
206
|
+
filename = pid_journal_filename(journal_name)
|
207
|
+
acquire_atomic_fs_lock(filename) do
|
208
|
+
unless pid_journal(filename).include?(pid)
|
209
|
+
logger.debug("Saving pid #{pid} to process journal #{journal_name}")
|
210
|
+
File.open(filename, 'a+', 0600) { |f| f.puts(pid) }
|
211
|
+
logger.info("Saved pid #{pid} to journal #{journal_name}")
|
212
|
+
logger.debug("Journal now = #{File.open(filename, 'r').read}")
|
213
|
+
else
|
214
|
+
logger.debug("Skipping duplicate pid #{pid} already in journal #{journal_name}")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class ProcessStatistics
|
4
|
+
STRFTIME = "%m/%d/%Y %H:%I:%S".freeze
|
5
|
+
EVENTS_TO_PERSIST = 10
|
6
|
+
|
7
|
+
attr_reader :events
|
8
|
+
|
9
|
+
# possibly persist this data.
|
10
|
+
def initialize
|
11
|
+
@events = Util::RotationalArray.new(EVENTS_TO_PERSIST)
|
12
|
+
end
|
13
|
+
|
14
|
+
def record_event(event, reason)
|
15
|
+
events.push([event, reason, Time.now])
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
str = events.reverse.map do |(event, reason, time)|
|
20
|
+
" #{event} at #{time.strftime(STRFTIME)} - #{reason || "unspecified"}"
|
21
|
+
end.join("\n")
|
22
|
+
|
23
|
+
"event history:\n#{str}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|