cloud66-bluepill 0.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/DESIGN.md +10 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +349 -0
- data/Rakefile +38 -0
- data/bin/bluepill +124 -0
- data/bin/bpsv +3 -0
- data/bin/sample_forking_server +53 -0
- data/bluepill.gemspec +37 -0
- data/examples/example.rb +87 -0
- data/examples/new_example.rb +89 -0
- data/examples/new_runit_example.rb +29 -0
- data/examples/runit_example.rb +26 -0
- data/lib/bluepill.rb +38 -0
- data/lib/bluepill/application.rb +215 -0
- data/lib/bluepill/application/client.rb +8 -0
- data/lib/bluepill/application/server.rb +23 -0
- data/lib/bluepill/condition_watch.rb +51 -0
- data/lib/bluepill/controller.rb +122 -0
- data/lib/bluepill/dsl.rb +12 -0
- data/lib/bluepill/dsl/app_proxy.rb +25 -0
- data/lib/bluepill/dsl/process_factory.rb +122 -0
- data/lib/bluepill/dsl/process_proxy.rb +44 -0
- data/lib/bluepill/group.rb +72 -0
- data/lib/bluepill/logger.rb +63 -0
- data/lib/bluepill/process.rb +514 -0
- data/lib/bluepill/process_conditions.rb +14 -0
- data/lib/bluepill/process_conditions/always_true.rb +18 -0
- data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
- data/lib/bluepill/process_conditions/file_time.rb +26 -0
- data/lib/bluepill/process_conditions/http.rb +58 -0
- data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
- data/lib/bluepill/process_conditions/process_condition.rb +22 -0
- data/lib/bluepill/process_journal.rb +219 -0
- data/lib/bluepill/process_statistics.rb +27 -0
- data/lib/bluepill/socket.rb +58 -0
- data/lib/bluepill/system.rb +265 -0
- data/lib/bluepill/trigger.rb +60 -0
- data/lib/bluepill/triggers/flapping.rb +56 -0
- data/lib/bluepill/util/rotational_array.rb +20 -0
- data/lib/bluepill/version.rb +4 -0
- data/local-bluepill +129 -0
- data/spec/lib/bluepill/logger_spec.rb +3 -0
- data/spec/lib/bluepill/process_spec.rb +96 -0
- data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
- data/spec/lib/bluepill/system_spec.rb +36 -0
- data/spec/spec_helper.rb +15 -0
- metadata +302 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'socket'
|
3
|
+
|
4
|
+
module Bluepill
|
5
|
+
module Socket
|
6
|
+
TIMEOUT = 60 # Used for client commands
|
7
|
+
MAX_ATTEMPTS = 5
|
8
|
+
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def client(base_dir, name, &block)
|
12
|
+
UNIXSocket.open(socket_path(base_dir, name), &block)
|
13
|
+
end
|
14
|
+
|
15
|
+
def client_command(base_dir, name, command)
|
16
|
+
res = nil
|
17
|
+
MAX_ATTEMPTS.times do |current_attempt|
|
18
|
+
begin
|
19
|
+
client(base_dir, name) do |socket|
|
20
|
+
Timeout.timeout(TIMEOUT) do
|
21
|
+
socket.puts command
|
22
|
+
res = Marshal.load(socket.read)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
break
|
26
|
+
rescue EOFError, Timeout::Error
|
27
|
+
if current_attempt == MAX_ATTEMPTS - 1
|
28
|
+
abort("Socket Timeout: Server may not be responding")
|
29
|
+
end
|
30
|
+
puts "Retry #{current_attempt + 1} of #{MAX_ATTEMPTS}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
res
|
34
|
+
end
|
35
|
+
|
36
|
+
def server(base_dir, name)
|
37
|
+
socket_path = self.socket_path(base_dir, name)
|
38
|
+
begin
|
39
|
+
UNIXServer.open(socket_path)
|
40
|
+
rescue Errno::EADDRINUSE
|
41
|
+
# if sock file has been created. test to see if there is a server
|
42
|
+
begin
|
43
|
+
UNIXSocket.open(socket_path)
|
44
|
+
rescue Errno::ECONNREFUSED
|
45
|
+
File.delete(socket_path)
|
46
|
+
return UNIXServer.open(socket_path)
|
47
|
+
else
|
48
|
+
logger.err("Server is already running!")
|
49
|
+
exit(7)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def socket_path(base_dir, name)
|
55
|
+
File.join(base_dir, 'socks', name + ".sock")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'etc'
|
3
|
+
require "shellwords"
|
4
|
+
|
5
|
+
module Bluepill
|
6
|
+
# This class represents the system that bluepill is running on.. It's mainly used to memoize
|
7
|
+
# results of running ps auxx etc so that every watch in the every process will not result in a fork
|
8
|
+
module System
|
9
|
+
APPEND_MODE = "a"
|
10
|
+
extend self
|
11
|
+
|
12
|
+
# The position of each field in ps output
|
13
|
+
IDX_MAP = {
|
14
|
+
:pid => 0,
|
15
|
+
:ppid => 1,
|
16
|
+
:pcpu => 2,
|
17
|
+
:rss => 3
|
18
|
+
}
|
19
|
+
|
20
|
+
def pid_alive?(pid)
|
21
|
+
begin
|
22
|
+
::Process.kill(0, pid)
|
23
|
+
true
|
24
|
+
rescue Errno::EPERM # no permission, but it is definitely alive
|
25
|
+
true
|
26
|
+
rescue Errno::ESRCH
|
27
|
+
false
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def cpu_usage(pid, include_children)
|
32
|
+
ps = ps_axu
|
33
|
+
return unless ps[pid]
|
34
|
+
cpu_used = ps[pid][IDX_MAP[:pcpu]].to_f
|
35
|
+
get_children(pid).each { |child_pid|
|
36
|
+
cpu_used += ps[child_pid][IDX_MAP[:pcpu]].to_f if ps[child_pid]
|
37
|
+
} if include_children
|
38
|
+
cpu_used
|
39
|
+
end
|
40
|
+
|
41
|
+
def memory_usage(pid, include_children)
|
42
|
+
ps = ps_axu
|
43
|
+
return unless ps[pid]
|
44
|
+
mem_used = ps[pid][IDX_MAP[:rss]].to_f
|
45
|
+
get_children(pid).each { |child_pid|
|
46
|
+
mem_used += ps[child_pid][IDX_MAP[:rss]].to_f if ps[child_pid]
|
47
|
+
} if include_children
|
48
|
+
mem_used
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_children(parent_pid)
|
52
|
+
child_pids = Array.new
|
53
|
+
ps_axu.each_pair do |pid, chunks|
|
54
|
+
child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
|
55
|
+
end
|
56
|
+
grand_children = child_pids.map{|pid| get_children(pid)}.flatten
|
57
|
+
child_pids.concat grand_children
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the pid of the child that executes the cmd
|
61
|
+
def daemonize(cmd, options = {})
|
62
|
+
rd, wr = IO.pipe
|
63
|
+
|
64
|
+
if child = Daemonize.safefork
|
65
|
+
# we do not wanna create zombies, so detach ourselves from the child exit status
|
66
|
+
::Process.detach(child)
|
67
|
+
|
68
|
+
# parent
|
69
|
+
wr.close
|
70
|
+
|
71
|
+
daemon_id = rd.read.to_i
|
72
|
+
rd.close
|
73
|
+
|
74
|
+
return daemon_id if daemon_id > 0
|
75
|
+
|
76
|
+
else
|
77
|
+
# child
|
78
|
+
rd.close
|
79
|
+
|
80
|
+
drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
|
81
|
+
|
82
|
+
# if we cannot write the pid file as the provided user, err out
|
83
|
+
exit unless can_write_pid_file(options[:pid_file], options[:logger])
|
84
|
+
|
85
|
+
to_daemonize = lambda do
|
86
|
+
# Setting end PWD env emulates bash behavior when dealing with symlinks
|
87
|
+
Dir.chdir(ENV["PWD"] = options[:working_dir].to_s) if options[:working_dir]
|
88
|
+
options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
|
89
|
+
|
90
|
+
redirect_io(*options.values_at(:stdin, :stdout, :stderr))
|
91
|
+
|
92
|
+
::Kernel.exec(*Shellwords.shellwords(cmd))
|
93
|
+
exit
|
94
|
+
end
|
95
|
+
|
96
|
+
daemon_id = Daemonize.call_as_daemon(to_daemonize, nil, cmd)
|
97
|
+
|
98
|
+
File.open(options[:pid_file], "w") {|f| f.write(daemon_id)}
|
99
|
+
|
100
|
+
wr.write daemon_id
|
101
|
+
wr.close
|
102
|
+
|
103
|
+
exit
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def delete_if_exists(filename)
|
108
|
+
tries = 0
|
109
|
+
|
110
|
+
begin
|
111
|
+
File.unlink(filename) if filename && File.exists?(filename)
|
112
|
+
rescue IOError, Errno::ENOENT
|
113
|
+
rescue Errno::EACCES
|
114
|
+
retry if (tries += 1) < 3
|
115
|
+
$stderr.puts("Warning: permission denied trying to delete #{filename}")
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns the stdout, stderr and exit code of the cmd
|
120
|
+
def execute_blocking(cmd, options = {})
|
121
|
+
rd, wr = IO.pipe
|
122
|
+
|
123
|
+
if child = Daemonize.safefork
|
124
|
+
# parent
|
125
|
+
wr.close
|
126
|
+
|
127
|
+
cmd_status = rd.read
|
128
|
+
rd.close
|
129
|
+
|
130
|
+
::Process.waitpid(child)
|
131
|
+
|
132
|
+
cmd_status.strip != '' ? Marshal.load(cmd_status) : {:exit_code => 0, :stdout => '', :stderr => ''}
|
133
|
+
else
|
134
|
+
# child
|
135
|
+
rd.close
|
136
|
+
|
137
|
+
# create a child in which we can override the stdin, stdout and stderr
|
138
|
+
cmd_out_read, cmd_out_write = IO.pipe
|
139
|
+
cmd_err_read, cmd_err_write = IO.pipe
|
140
|
+
|
141
|
+
pid = fork {
|
142
|
+
# grandchild
|
143
|
+
drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
|
144
|
+
|
145
|
+
Dir.chdir(ENV["PWD"] = options[:working_dir].to_s) if options[:working_dir]
|
146
|
+
options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
|
147
|
+
|
148
|
+
# close unused fds so ancestors wont hang. This line is the only reason we are not
|
149
|
+
# using something like popen3. If this fd is not closed, the .read call on the parent
|
150
|
+
# will never return because "wr" would still be open in the "exec"-ed cmd
|
151
|
+
wr.close
|
152
|
+
|
153
|
+
# we do not care about stdin of cmd
|
154
|
+
STDIN.reopen("/dev/null")
|
155
|
+
|
156
|
+
# point stdout of cmd to somewhere we can read
|
157
|
+
cmd_out_read.close
|
158
|
+
STDOUT.reopen(cmd_out_write)
|
159
|
+
cmd_out_write.close
|
160
|
+
|
161
|
+
# same thing for stderr
|
162
|
+
cmd_err_read.close
|
163
|
+
STDERR.reopen(cmd_err_write)
|
164
|
+
cmd_err_write.close
|
165
|
+
|
166
|
+
# finally, replace grandchild with cmd
|
167
|
+
::Kernel.exec(*Shellwords.shellwords(cmd))
|
168
|
+
}
|
169
|
+
|
170
|
+
# we do not use these ends of the pipes in the child
|
171
|
+
cmd_out_write.close
|
172
|
+
cmd_err_write.close
|
173
|
+
|
174
|
+
# wait for the cmd to finish executing and acknowledge it's death
|
175
|
+
::Process.waitpid(pid)
|
176
|
+
|
177
|
+
# collect stdout, stderr and exitcode
|
178
|
+
result = {
|
179
|
+
:stdout => cmd_out_read.read,
|
180
|
+
:stderr => cmd_err_read.read,
|
181
|
+
:exit_code => $?.exitstatus
|
182
|
+
}
|
183
|
+
|
184
|
+
# We're done with these ends of the pipes as well
|
185
|
+
cmd_out_read.close
|
186
|
+
cmd_err_read.close
|
187
|
+
|
188
|
+
# Time to tell the parent about what went down
|
189
|
+
wr.write Marshal.dump(result)
|
190
|
+
wr.close
|
191
|
+
|
192
|
+
exit
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def store
|
197
|
+
@store ||= Hash.new
|
198
|
+
end
|
199
|
+
|
200
|
+
def reset_data
|
201
|
+
store.clear unless store.empty?
|
202
|
+
end
|
203
|
+
|
204
|
+
def ps_axu
|
205
|
+
# TODO: need a mutex here
|
206
|
+
store[:ps_axu] ||= begin
|
207
|
+
# BSD style ps invocation
|
208
|
+
lines = `ps axo pid,ppid,pcpu,rss`.split("\n")
|
209
|
+
|
210
|
+
lines.inject(Hash.new) do |mem, line|
|
211
|
+
chunks = line.split(/\s+/)
|
212
|
+
chunks.delete_if {|c| c.strip.empty? }
|
213
|
+
pid = chunks[IDX_MAP[:pid]].strip.to_i
|
214
|
+
mem[pid] = chunks
|
215
|
+
mem
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# be sure to call this from a fork otherwise it will modify the attributes
|
221
|
+
# of the bluepill daemon
|
222
|
+
def drop_privileges(uid, gid, supplementary_groups)
|
223
|
+
if ::Process::Sys.geteuid == 0
|
224
|
+
uid_num = Etc.getpwnam(uid).uid if uid
|
225
|
+
gid_num = Etc.getgrnam(gid).gid if gid
|
226
|
+
|
227
|
+
supplementary_groups ||= []
|
228
|
+
|
229
|
+
group_nums = supplementary_groups.map do |group|
|
230
|
+
Etc.getgrnam(group).gid
|
231
|
+
end
|
232
|
+
|
233
|
+
::Process.groups = [gid_num] if gid
|
234
|
+
::Process.groups |= group_nums unless group_nums.empty?
|
235
|
+
::Process::Sys.setgid(gid_num) if gid
|
236
|
+
::Process::Sys.setuid(uid_num) if uid
|
237
|
+
ENV['HOME'] = Etc.getpwuid(uid_num).try(:dir) || ENV['HOME'] if uid
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def can_write_pid_file(pid_file, logger)
|
242
|
+
FileUtils.touch(pid_file)
|
243
|
+
File.unlink(pid_file)
|
244
|
+
return true
|
245
|
+
|
246
|
+
rescue Exception => e
|
247
|
+
logger.warning "%s - %s" % [e.class.name, e.message]
|
248
|
+
e.backtrace.each {|l| logger.warning l}
|
249
|
+
return false
|
250
|
+
end
|
251
|
+
|
252
|
+
def redirect_io(io_in, io_out, io_err)
|
253
|
+
$stdin.reopen(io_in) if io_in
|
254
|
+
|
255
|
+
if !io_out.nil? && !io_err.nil? && io_out == io_err
|
256
|
+
$stdout.reopen(io_out, APPEND_MODE)
|
257
|
+
$stderr.reopen($stdout)
|
258
|
+
|
259
|
+
else
|
260
|
+
$stdout.reopen(io_out, APPEND_MODE) if io_out
|
261
|
+
$stderr.reopen(io_err, APPEND_MODE) if io_err
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
class Trigger
|
4
|
+
@implementations = {}
|
5
|
+
def self.inherited(klass)
|
6
|
+
@implementations[klass.name.split('::').last.underscore.to_sym] = klass
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.[](name)
|
10
|
+
@implementations[name]
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_accessor :process, :logger, :mutex, :scheduled_events
|
14
|
+
|
15
|
+
def initialize(process, options = {})
|
16
|
+
self.process = process
|
17
|
+
self.logger = options[:logger]
|
18
|
+
self.mutex = Mutex.new
|
19
|
+
self.scheduled_events = []
|
20
|
+
end
|
21
|
+
|
22
|
+
def reset!
|
23
|
+
self.cancel_all_events
|
24
|
+
end
|
25
|
+
|
26
|
+
def notify(transition)
|
27
|
+
raise "Implement in subclass"
|
28
|
+
end
|
29
|
+
|
30
|
+
def dispatch!(event)
|
31
|
+
self.process.dispatch!(event, self.class.name.split("::").last)
|
32
|
+
end
|
33
|
+
|
34
|
+
def schedule_event(event, delay)
|
35
|
+
# TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
|
36
|
+
thread = Thread.new(self) do |trigger|
|
37
|
+
begin
|
38
|
+
sleep delay.to_f
|
39
|
+
trigger.dispatch!(event)
|
40
|
+
trigger.mutex.synchronize do
|
41
|
+
trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
|
42
|
+
end
|
43
|
+
rescue StandardError => e
|
44
|
+
trigger.logger.err(e)
|
45
|
+
trigger.logger.err(e.backtrace.join("\n"))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
self.scheduled_events.push([event, thread])
|
50
|
+
end
|
51
|
+
|
52
|
+
def cancel_all_events
|
53
|
+
self.logger.info "Canceling all scheduled events"
|
54
|
+
self.mutex.synchronize do
|
55
|
+
self.scheduled_events.each {|_, thread| thread.kill}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module Triggers
|
4
|
+
class Flapping < Bluepill::Trigger
|
5
|
+
TRIGGER_STATES = [:starting, :restarting]
|
6
|
+
|
7
|
+
PARAMS = [:times, :within, :retry_in]
|
8
|
+
|
9
|
+
attr_accessor *PARAMS
|
10
|
+
attr_reader :timeline
|
11
|
+
|
12
|
+
def initialize(process, options = {})
|
13
|
+
options.reverse_merge!(:times => 5, :within => 1, :retry_in => 5)
|
14
|
+
|
15
|
+
options.each_pair do |name, val|
|
16
|
+
instance_variable_set("@#{name}", val) if PARAMS.include?(name)
|
17
|
+
end
|
18
|
+
|
19
|
+
@timeline = Util::RotationalArray.new(@times)
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def notify(transition)
|
24
|
+
if TRIGGER_STATES.include?(transition.to_name)
|
25
|
+
self.timeline << Time.now.to_i
|
26
|
+
self.check_flapping
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def reset!
|
31
|
+
@timeline.clear
|
32
|
+
super
|
33
|
+
end
|
34
|
+
|
35
|
+
def check_flapping
|
36
|
+
# The process has not flapped if we haven't encountered enough incidents
|
37
|
+
return unless (@timeline.compact.length == self.times)
|
38
|
+
|
39
|
+
# Check if the incident happend within the timeframe
|
40
|
+
duration = (@timeline.last - @timeline.first) <= self.within
|
41
|
+
|
42
|
+
if duration
|
43
|
+
self.logger.info "Flapping detected: retrying in #{self.retry_in} seconds"
|
44
|
+
|
45
|
+
self.schedule_event(:start, self.retry_in) unless self.retry_in == 0 # retry_in zero means "do not retry, ever"
|
46
|
+
self.schedule_event(:unmonitor, 0)
|
47
|
+
|
48
|
+
@timeline.clear
|
49
|
+
|
50
|
+
# This will prevent a transition from happening in the process state_machine
|
51
|
+
throw :halt
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
module Bluepill
|
3
|
+
module Util
|
4
|
+
class RotationalArray < Array
|
5
|
+
def initialize(size)
|
6
|
+
@capacity = size
|
7
|
+
|
8
|
+
super() # no size - intentionally
|
9
|
+
end
|
10
|
+
|
11
|
+
def push(value)
|
12
|
+
super(value)
|
13
|
+
|
14
|
+
self.shift if self.length > @capacity
|
15
|
+
self
|
16
|
+
end
|
17
|
+
alias_method :<<, :push
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|