cloud66-bluepill 0.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/DESIGN.md +10 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +349 -0
- data/Rakefile +38 -0
- data/bin/bluepill +124 -0
- data/bin/bpsv +3 -0
- data/bin/sample_forking_server +53 -0
- data/bluepill.gemspec +37 -0
- data/examples/example.rb +87 -0
- data/examples/new_example.rb +89 -0
- data/examples/new_runit_example.rb +29 -0
- data/examples/runit_example.rb +26 -0
- data/lib/bluepill.rb +38 -0
- data/lib/bluepill/application.rb +215 -0
- data/lib/bluepill/application/client.rb +8 -0
- data/lib/bluepill/application/server.rb +23 -0
- data/lib/bluepill/condition_watch.rb +51 -0
- data/lib/bluepill/controller.rb +122 -0
- data/lib/bluepill/dsl.rb +12 -0
- data/lib/bluepill/dsl/app_proxy.rb +25 -0
- data/lib/bluepill/dsl/process_factory.rb +122 -0
- data/lib/bluepill/dsl/process_proxy.rb +44 -0
- data/lib/bluepill/group.rb +72 -0
- data/lib/bluepill/logger.rb +63 -0
- data/lib/bluepill/process.rb +514 -0
- data/lib/bluepill/process_conditions.rb +14 -0
- data/lib/bluepill/process_conditions/always_true.rb +18 -0
- data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
- data/lib/bluepill/process_conditions/file_time.rb +26 -0
- data/lib/bluepill/process_conditions/http.rb +58 -0
- data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
- data/lib/bluepill/process_conditions/process_condition.rb +22 -0
- data/lib/bluepill/process_journal.rb +219 -0
- data/lib/bluepill/process_statistics.rb +27 -0
- data/lib/bluepill/socket.rb +58 -0
- data/lib/bluepill/system.rb +265 -0
- data/lib/bluepill/trigger.rb +60 -0
- data/lib/bluepill/triggers/flapping.rb +56 -0
- data/lib/bluepill/util/rotational_array.rb +20 -0
- data/lib/bluepill/version.rb +4 -0
- data/local-bluepill +129 -0
- data/spec/lib/bluepill/logger_spec.rb +3 -0
- data/spec/lib/bluepill/process_spec.rb +96 -0
- data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
- data/spec/lib/bluepill/system_spec.rb +36 -0
- data/spec/spec_helper.rb +15 -0
- metadata +302 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
def self.[](name)
|
5
|
+
const_get(name.to_s.camelcase)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
require "bluepill/process_conditions/process_condition"
|
11
|
+
Dir["#{File.dirname(__FILE__)}/process_conditions/*.rb"].each do |pc|
|
12
|
+
require pc
|
13
|
+
end
|
14
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class AlwaysTrue < ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@below = options[:below]
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(pid, include_children)
|
10
|
+
1
|
11
|
+
end
|
12
|
+
|
13
|
+
def check(value)
|
14
|
+
true
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class CpuUsage < ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@below = options[:below]
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(pid, include_children)
|
10
|
+
# third col in the ps axu output
|
11
|
+
System.cpu_usage(pid, include_children).to_f
|
12
|
+
end
|
13
|
+
|
14
|
+
def check(value)
|
15
|
+
value < @below
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class FileTime < ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@below = options[:below]
|
7
|
+
@filename = options[:filename]
|
8
|
+
end
|
9
|
+
|
10
|
+
def run(pid, include_children)
|
11
|
+
if File.exists?(@filename)
|
12
|
+
Time.now()-File::mtime(@filename)
|
13
|
+
else
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
rescue
|
17
|
+
$!
|
18
|
+
end
|
19
|
+
|
20
|
+
def check(value)
|
21
|
+
return false if value.nil?
|
22
|
+
return value < @below
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module Bluepill
|
6
|
+
module ProcessConditions
|
7
|
+
class Http < ProcessCondition
|
8
|
+
def initialize(options = {})
|
9
|
+
@uri = URI.parse(options[:url])
|
10
|
+
@kind = case options[:kind]
|
11
|
+
when Fixnum then Net::HTTPResponse::CODE_TO_OBJ[options[:kind].to_s]
|
12
|
+
when String, Symbol then Net.const_get("HTTP#{options[:kind].to_s.camelize}")
|
13
|
+
else
|
14
|
+
Net::HTTPSuccess
|
15
|
+
end
|
16
|
+
@pattern = options[:pattern] || nil
|
17
|
+
@open_timeout = (options[:open_timeout] || options[:timeout] || 5).to_i
|
18
|
+
@read_timeout = (options[:read_timeout] || options[:timeout] || 5).to_i
|
19
|
+
end
|
20
|
+
|
21
|
+
def run(pid, include_children)
|
22
|
+
session = Net::HTTP.new(@uri.host, @uri.port)
|
23
|
+
if @uri.scheme == 'https'
|
24
|
+
require 'net/https'
|
25
|
+
session.use_ssl=true
|
26
|
+
session.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
27
|
+
end
|
28
|
+
session.open_timeout = @open_timeout
|
29
|
+
session.read_timeout = @read_timeout
|
30
|
+
hide_net_http_bug do
|
31
|
+
session.start do |http|
|
32
|
+
http.get(@uri.request_uri)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
rescue
|
36
|
+
$!
|
37
|
+
end
|
38
|
+
|
39
|
+
def check(value)
|
40
|
+
return false unless value.kind_of?(@kind)
|
41
|
+
return true unless @pattern
|
42
|
+
return false unless value.class.body_permitted?
|
43
|
+
@pattern === value.body
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def hide_net_http_bug
|
48
|
+
yield
|
49
|
+
rescue NoMethodError => e
|
50
|
+
if e.to_s =~ /#{Regexp.escape(%q|undefined method `closed?' for nil:NilClass|)}/
|
51
|
+
raise Errno::ECONNREFUSED, "Connection refused attempting to contact #{@uri.scheme}://#{@uri.host}:#{@uri.port}"
|
52
|
+
else
|
53
|
+
raise
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class MemUsage < ProcessCondition
|
5
|
+
MB = 1024 ** 2
|
6
|
+
FORMAT_STR = "%d%s"
|
7
|
+
MB_LABEL = "MB"
|
8
|
+
KB_LABEL = "KB"
|
9
|
+
|
10
|
+
def initialize(options = {})
|
11
|
+
@below = options[:below]
|
12
|
+
end
|
13
|
+
|
14
|
+
def run(pid, include_children)
|
15
|
+
# rss is on the 5th col
|
16
|
+
System.memory_usage(pid, include_children).to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def check(value)
|
20
|
+
value.kilobytes < @below
|
21
|
+
end
|
22
|
+
|
23
|
+
def format_value(value)
|
24
|
+
if value.kilobytes >= MB
|
25
|
+
FORMAT_STR % [(value / 1024).round, MB_LABEL]
|
26
|
+
else
|
27
|
+
FORMAT_STR % [value, KB_LABEL]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module ProcessConditions
|
4
|
+
class ProcessCondition
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(pid, include_children)
|
10
|
+
raise "Implement in subclass!"
|
11
|
+
end
|
12
|
+
|
13
|
+
def check(value)
|
14
|
+
raise "Implement in subclass!"
|
15
|
+
end
|
16
|
+
|
17
|
+
def format_value(value)
|
18
|
+
value
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
require 'bluepill/system'
|
2
|
+
|
3
|
+
module Bluepill
|
4
|
+
module ProcessJournal
|
5
|
+
extend self
|
6
|
+
|
7
|
+
class << self
|
8
|
+
attr_reader :logger
|
9
|
+
attr_reader :journal_base_dir
|
10
|
+
|
11
|
+
def logger=(new_logger)
|
12
|
+
@logger ||= new_logger
|
13
|
+
end
|
14
|
+
|
15
|
+
def base_dir=(base_dir)
|
16
|
+
@journal_base_dir ||= File.join(base_dir, "journals")
|
17
|
+
FileUtils.mkdir_p(@journal_base_dir) unless File.exists?(@journal_base_dir)
|
18
|
+
FileUtils.chmod(0777, @journal_base_dir)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def skip_pid?(pid)
|
23
|
+
!pid.is_a?(Integer) || pid <= 1
|
24
|
+
end
|
25
|
+
|
26
|
+
def skip_pgid?(pgid)
|
27
|
+
!pgid.is_a?(Integer) || pgid <= 1
|
28
|
+
end
|
29
|
+
|
30
|
+
# atomic operation on POSIX filesystems, since
|
31
|
+
# f.flock(File::LOCK_SH) is not available on all platforms
|
32
|
+
def acquire_atomic_fs_lock(name)
|
33
|
+
times = 0
|
34
|
+
name += '.lock'
|
35
|
+
Dir.mkdir name, 0700
|
36
|
+
logger.debug("Acquired lock #{name}")
|
37
|
+
yield
|
38
|
+
rescue Errno::EEXIST
|
39
|
+
times += 1
|
40
|
+
logger.debug("Waiting for lock #{name}")
|
41
|
+
sleep 1
|
42
|
+
unless times >= 10
|
43
|
+
retry
|
44
|
+
else
|
45
|
+
logger.info("Timeout waiting for lock #{name}")
|
46
|
+
raise "Timeout waiting for lock #{name}"
|
47
|
+
end
|
48
|
+
ensure
|
49
|
+
clear_atomic_fs_lock(name)
|
50
|
+
end
|
51
|
+
|
52
|
+
def clear_all_atomic_fs_locks(application_name = nil)
|
53
|
+
if application_name.nil?
|
54
|
+
files = Dir['.*.lock']
|
55
|
+
else
|
56
|
+
files = Dir[".*.#{application_name}.lock"]
|
57
|
+
end
|
58
|
+
files.each do |f|
|
59
|
+
System.delete_if_exists(f) if File.directory?(f)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def pid_journal_filename(journal_name)
|
64
|
+
File.join(@journal_base_dir, ".bluepill_pids_journal.#{journal_name}")
|
65
|
+
end
|
66
|
+
|
67
|
+
def pgid_journal_filename(journal_name)
|
68
|
+
File.join(@journal_base_dir, ".bluepill_pgids_journal.#{journal_name}")
|
69
|
+
end
|
70
|
+
|
71
|
+
def pid_journal(filename)
|
72
|
+
logger.debug("pid journal file: #{filename}")
|
73
|
+
result = File.open(filename, 'r').readlines.map(&:to_i).reject {|pid| skip_pid?(pid)}
|
74
|
+
logger.debug("pid journal = #{result.join(' ')}")
|
75
|
+
result
|
76
|
+
rescue Errno::ENOENT
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
|
80
|
+
def pgid_journal(filename)
|
81
|
+
logger.debug("pgid journal file: #{filename}")
|
82
|
+
result = File.open(filename, 'r').readlines.map(&:to_i).reject {|pgid| skip_pgid?(pgid)}
|
83
|
+
logger.debug("pgid journal = #{result.join(' ')}")
|
84
|
+
result
|
85
|
+
rescue Errno::ENOENT
|
86
|
+
[]
|
87
|
+
end
|
88
|
+
|
89
|
+
def clear_atomic_fs_lock(name)
|
90
|
+
if File.directory?(name)
|
91
|
+
Dir.rmdir(name)
|
92
|
+
logger.debug("Cleared lock #{name}")
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def kill_all_from_all_journals
|
97
|
+
Dir[".bluepill_pids_journal.*"].map { |x|
|
98
|
+
x.sub(/^\.bluepill_pids_journal\./,"")
|
99
|
+
}.reject { |y|
|
100
|
+
y =~ /\.lock$/
|
101
|
+
}.each do |journal_name|
|
102
|
+
kill_all_from_journal(journal_name)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def kill_all_from_journal(journal_name)
|
107
|
+
kill_all_pids_from_journal(journal_name)
|
108
|
+
kill_all_pgids_from_journal(journal_name)
|
109
|
+
end
|
110
|
+
|
111
|
+
def kill_all_pgids_from_journal(journal_name)
|
112
|
+
filename = pgid_journal_filename(journal_name)
|
113
|
+
j = pgid_journal(filename)
|
114
|
+
if j.length > 0
|
115
|
+
acquire_atomic_fs_lock(filename) do
|
116
|
+
j.each do |pgid|
|
117
|
+
begin
|
118
|
+
::Process.kill('TERM', -pgid)
|
119
|
+
logger.info("Termed old process group #{pgid}")
|
120
|
+
rescue Errno::ESRCH
|
121
|
+
logger.debug("Unable to term missing process group #{pgid}")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
if j.select { |pgid| System.pid_alive?(pgid) }.length > 1
|
126
|
+
sleep(1)
|
127
|
+
j.each do |pgid|
|
128
|
+
begin
|
129
|
+
::Process.kill('KILL', -pgid)
|
130
|
+
logger.info("Killed old process group #{pgid}")
|
131
|
+
rescue Errno::ESRCH
|
132
|
+
logger.debug("Unable to kill missing process group #{pgid}")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
System.delete_if_exists(filename) # reset journal
|
137
|
+
logger.debug('Journal cleanup completed')
|
138
|
+
end
|
139
|
+
else
|
140
|
+
logger.debug('No previous process journal - Skipping cleanup')
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def kill_all_pids_from_journal(journal_name)
|
145
|
+
filename = pid_journal_filename(journal_name)
|
146
|
+
j = pid_journal(filename)
|
147
|
+
if j.length > 0
|
148
|
+
acquire_atomic_fs_lock(filename) do
|
149
|
+
j.each do |pid|
|
150
|
+
begin
|
151
|
+
::Process.kill('TERM', pid)
|
152
|
+
logger.info("Termed old process #{pid}")
|
153
|
+
rescue Errno::ESRCH
|
154
|
+
logger.debug("Unable to term missing process #{pid}")
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
if j.select { |pid| System.pid_alive?(pid) }.length > 1
|
159
|
+
sleep(1)
|
160
|
+
j.each do |pid|
|
161
|
+
begin
|
162
|
+
::Process.kill('KILL', pid)
|
163
|
+
logger.info("Killed old process #{pid}")
|
164
|
+
rescue Errno::ESRCH
|
165
|
+
logger.debug("Unable to kill missing process #{pid}")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
System.delete_if_exists(filename) # reset journal
|
170
|
+
logger.debug('Journal cleanup completed')
|
171
|
+
end
|
172
|
+
else
|
173
|
+
logger.debug('No previous process journal - Skipping cleanup')
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def append_pgid_to_journal(journal_name, pgid)
|
178
|
+
if skip_pgid?(pgid)
|
179
|
+
logger.debug("Skipping invalid pgid #{pgid} (class #{pgid.class})")
|
180
|
+
return
|
181
|
+
end
|
182
|
+
|
183
|
+
filename = pgid_journal_filename(journal_name)
|
184
|
+
acquire_atomic_fs_lock(filename) do
|
185
|
+
unless pgid_journal(filename).include?(pgid)
|
186
|
+
logger.debug("Saving pgid #{pgid} to process journal #{journal_name}")
|
187
|
+
File.open(filename, 'a+', 0600) { |f| f.puts(pgid) }
|
188
|
+
logger.info("Saved pgid #{pgid} to journal #{journal_name}")
|
189
|
+
logger.debug("Journal now = #{File.open(filename, 'r').read}")
|
190
|
+
else
|
191
|
+
logger.debug("Skipping duplicate pgid #{pgid} already in journal #{journal_name}")
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def append_pid_to_journal(journal_name, pid)
|
197
|
+
begin
|
198
|
+
append_pgid_to_journal(journal_name, ::Process.getpgid(pid))
|
199
|
+
rescue Errno::ESRCH
|
200
|
+
end
|
201
|
+
if skip_pid?(pid)
|
202
|
+
logger.debug("Skipping invalid pid #{pid} (class #{pid.class})")
|
203
|
+
return
|
204
|
+
end
|
205
|
+
|
206
|
+
filename = pid_journal_filename(journal_name)
|
207
|
+
acquire_atomic_fs_lock(filename) do
|
208
|
+
unless pid_journal(filename).include?(pid)
|
209
|
+
logger.debug("Saving pid #{pid} to process journal #{journal_name}")
|
210
|
+
File.open(filename, 'a+', 0600) { |f| f.puts(pid) }
|
211
|
+
logger.info("Saved pid #{pid} to journal #{journal_name}")
|
212
|
+
logger.debug("Journal now = #{File.open(filename, 'r').read}")
|
213
|
+
else
|
214
|
+
logger.debug("Skipping duplicate pid #{pid} already in journal #{journal_name}")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class ProcessStatistics
|
4
|
+
STRFTIME = "%m/%d/%Y %H:%I:%S".freeze
|
5
|
+
EVENTS_TO_PERSIST = 10
|
6
|
+
|
7
|
+
attr_reader :events
|
8
|
+
|
9
|
+
# possibly persist this data.
|
10
|
+
def initialize
|
11
|
+
@events = Util::RotationalArray.new(EVENTS_TO_PERSIST)
|
12
|
+
end
|
13
|
+
|
14
|
+
def record_event(event, reason)
|
15
|
+
events.push([event, reason, Time.now])
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
str = events.reverse.map do |(event, reason, time)|
|
20
|
+
" #{event} at #{time.strftime(STRFTIME)} - #{reason || "unspecified"}"
|
21
|
+
end.join("\n")
|
22
|
+
|
23
|
+
"event history:\n#{str}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|