cloud66-bluepill 0.0.62

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/.gitignore +10 -0
  2. data/.rspec +1 -0
  3. data/DESIGN.md +10 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +22 -0
  6. data/README.md +349 -0
  7. data/Rakefile +38 -0
  8. data/bin/bluepill +124 -0
  9. data/bin/bpsv +3 -0
  10. data/bin/sample_forking_server +53 -0
  11. data/bluepill.gemspec +37 -0
  12. data/examples/example.rb +87 -0
  13. data/examples/new_example.rb +89 -0
  14. data/examples/new_runit_example.rb +29 -0
  15. data/examples/runit_example.rb +26 -0
  16. data/lib/bluepill.rb +38 -0
  17. data/lib/bluepill/application.rb +215 -0
  18. data/lib/bluepill/application/client.rb +8 -0
  19. data/lib/bluepill/application/server.rb +23 -0
  20. data/lib/bluepill/condition_watch.rb +51 -0
  21. data/lib/bluepill/controller.rb +122 -0
  22. data/lib/bluepill/dsl.rb +12 -0
  23. data/lib/bluepill/dsl/app_proxy.rb +25 -0
  24. data/lib/bluepill/dsl/process_factory.rb +122 -0
  25. data/lib/bluepill/dsl/process_proxy.rb +44 -0
  26. data/lib/bluepill/group.rb +72 -0
  27. data/lib/bluepill/logger.rb +63 -0
  28. data/lib/bluepill/process.rb +514 -0
  29. data/lib/bluepill/process_conditions.rb +14 -0
  30. data/lib/bluepill/process_conditions/always_true.rb +18 -0
  31. data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
  32. data/lib/bluepill/process_conditions/file_time.rb +26 -0
  33. data/lib/bluepill/process_conditions/http.rb +58 -0
  34. data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
  35. data/lib/bluepill/process_conditions/process_condition.rb +22 -0
  36. data/lib/bluepill/process_journal.rb +219 -0
  37. data/lib/bluepill/process_statistics.rb +27 -0
  38. data/lib/bluepill/socket.rb +58 -0
  39. data/lib/bluepill/system.rb +265 -0
  40. data/lib/bluepill/trigger.rb +60 -0
  41. data/lib/bluepill/triggers/flapping.rb +56 -0
  42. data/lib/bluepill/util/rotational_array.rb +20 -0
  43. data/lib/bluepill/version.rb +4 -0
  44. data/local-bluepill +129 -0
  45. data/spec/lib/bluepill/logger_spec.rb +3 -0
  46. data/spec/lib/bluepill/process_spec.rb +96 -0
  47. data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
  48. data/spec/lib/bluepill/system_spec.rb +36 -0
  49. data/spec/spec_helper.rb +15 -0
  50. metadata +302 -0
@@ -0,0 +1,58 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'socket'
3
+
4
+ module Bluepill
5
+ module Socket
6
+ TIMEOUT = 60 # Used for client commands
7
+ MAX_ATTEMPTS = 5
8
+
9
+ extend self
10
+
11
+ def client(base_dir, name, &block)
12
+ UNIXSocket.open(socket_path(base_dir, name), &block)
13
+ end
14
+
15
+ def client_command(base_dir, name, command)
16
+ res = nil
17
+ MAX_ATTEMPTS.times do |current_attempt|
18
+ begin
19
+ client(base_dir, name) do |socket|
20
+ Timeout.timeout(TIMEOUT) do
21
+ socket.puts command
22
+ res = Marshal.load(socket.read)
23
+ end
24
+ end
25
+ break
26
+ rescue EOFError, Timeout::Error
27
+ if current_attempt == MAX_ATTEMPTS - 1
28
+ abort("Socket Timeout: Server may not be responding")
29
+ end
30
+ puts "Retry #{current_attempt + 1} of #{MAX_ATTEMPTS}"
31
+ end
32
+ end
33
+ res
34
+ end
35
+
36
+ def server(base_dir, name)
37
+ socket_path = self.socket_path(base_dir, name)
38
+ begin
39
+ UNIXServer.open(socket_path)
40
+ rescue Errno::EADDRINUSE
41
+ # if sock file has been created. test to see if there is a server
42
+ begin
43
+ UNIXSocket.open(socket_path)
44
+ rescue Errno::ECONNREFUSED
45
+ File.delete(socket_path)
46
+ return UNIXServer.open(socket_path)
47
+ else
48
+ logger.err("Server is already running!")
49
+ exit(7)
50
+ end
51
+ end
52
+ end
53
+
54
+ def socket_path(base_dir, name)
55
+ File.join(base_dir, 'socks', name + ".sock")
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,265 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'etc'
3
+ require "shellwords"
4
+
5
+ module Bluepill
6
+ # This class represents the system that bluepill is running on.. It's mainly used to memoize
7
+ # results of running ps auxx etc so that every watch in the every process will not result in a fork
8
+ module System
9
+ APPEND_MODE = "a"
10
+ extend self
11
+
12
+ # The position of each field in ps output
13
+ IDX_MAP = {
14
+ :pid => 0,
15
+ :ppid => 1,
16
+ :pcpu => 2,
17
+ :rss => 3
18
+ }
19
+
20
+ def pid_alive?(pid)
21
+ begin
22
+ ::Process.kill(0, pid)
23
+ true
24
+ rescue Errno::EPERM # no permission, but it is definitely alive
25
+ true
26
+ rescue Errno::ESRCH
27
+ false
28
+ end
29
+ end
30
+
31
+ def cpu_usage(pid, include_children)
32
+ ps = ps_axu
33
+ return unless ps[pid]
34
+ cpu_used = ps[pid][IDX_MAP[:pcpu]].to_f
35
+ get_children(pid).each { |child_pid|
36
+ cpu_used += ps[child_pid][IDX_MAP[:pcpu]].to_f if ps[child_pid]
37
+ } if include_children
38
+ cpu_used
39
+ end
40
+
41
+ def memory_usage(pid, include_children)
42
+ ps = ps_axu
43
+ return unless ps[pid]
44
+ mem_used = ps[pid][IDX_MAP[:rss]].to_f
45
+ get_children(pid).each { |child_pid|
46
+ mem_used += ps[child_pid][IDX_MAP[:rss]].to_f if ps[child_pid]
47
+ } if include_children
48
+ mem_used
49
+ end
50
+
51
+ def get_children(parent_pid)
52
+ child_pids = Array.new
53
+ ps_axu.each_pair do |pid, chunks|
54
+ child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
55
+ end
56
+ grand_children = child_pids.map{|pid| get_children(pid)}.flatten
57
+ child_pids.concat grand_children
58
+ end
59
+
60
+ # Returns the pid of the child that executes the cmd
61
+ def daemonize(cmd, options = {})
62
+ rd, wr = IO.pipe
63
+
64
+ if child = Daemonize.safefork
65
+ # we do not wanna create zombies, so detach ourselves from the child exit status
66
+ ::Process.detach(child)
67
+
68
+ # parent
69
+ wr.close
70
+
71
+ daemon_id = rd.read.to_i
72
+ rd.close
73
+
74
+ return daemon_id if daemon_id > 0
75
+
76
+ else
77
+ # child
78
+ rd.close
79
+
80
+ drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
81
+
82
+ # if we cannot write the pid file as the provided user, err out
83
+ exit unless can_write_pid_file(options[:pid_file], options[:logger])
84
+
85
+ to_daemonize = lambda do
86
+ # Setting end PWD env emulates bash behavior when dealing with symlinks
87
+ Dir.chdir(ENV["PWD"] = options[:working_dir].to_s) if options[:working_dir]
88
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
89
+
90
+ redirect_io(*options.values_at(:stdin, :stdout, :stderr))
91
+
92
+ ::Kernel.exec(*Shellwords.shellwords(cmd))
93
+ exit
94
+ end
95
+
96
+ daemon_id = Daemonize.call_as_daemon(to_daemonize, nil, cmd)
97
+
98
+ File.open(options[:pid_file], "w") {|f| f.write(daemon_id)}
99
+
100
+ wr.write daemon_id
101
+ wr.close
102
+
103
+ exit
104
+ end
105
+ end
106
+
107
+ def delete_if_exists(filename)
108
+ tries = 0
109
+
110
+ begin
111
+ File.unlink(filename) if filename && File.exists?(filename)
112
+ rescue IOError, Errno::ENOENT
113
+ rescue Errno::EACCES
114
+ retry if (tries += 1) < 3
115
+ $stderr.puts("Warning: permission denied trying to delete #{filename}")
116
+ end
117
+ end
118
+
119
+ # Returns the stdout, stderr and exit code of the cmd
120
+ def execute_blocking(cmd, options = {})
121
+ rd, wr = IO.pipe
122
+
123
+ if child = Daemonize.safefork
124
+ # parent
125
+ wr.close
126
+
127
+ cmd_status = rd.read
128
+ rd.close
129
+
130
+ ::Process.waitpid(child)
131
+
132
+ cmd_status.strip != '' ? Marshal.load(cmd_status) : {:exit_code => 0, :stdout => '', :stderr => ''}
133
+ else
134
+ # child
135
+ rd.close
136
+
137
+ # create a child in which we can override the stdin, stdout and stderr
138
+ cmd_out_read, cmd_out_write = IO.pipe
139
+ cmd_err_read, cmd_err_write = IO.pipe
140
+
141
+ pid = fork {
142
+ # grandchild
143
+ drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
144
+
145
+ Dir.chdir(ENV["PWD"] = options[:working_dir].to_s) if options[:working_dir]
146
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
147
+
148
+ # close unused fds so ancestors wont hang. This line is the only reason we are not
149
+ # using something like popen3. If this fd is not closed, the .read call on the parent
150
+ # will never return because "wr" would still be open in the "exec"-ed cmd
151
+ wr.close
152
+
153
+ # we do not care about stdin of cmd
154
+ STDIN.reopen("/dev/null")
155
+
156
+ # point stdout of cmd to somewhere we can read
157
+ cmd_out_read.close
158
+ STDOUT.reopen(cmd_out_write)
159
+ cmd_out_write.close
160
+
161
+ # same thing for stderr
162
+ cmd_err_read.close
163
+ STDERR.reopen(cmd_err_write)
164
+ cmd_err_write.close
165
+
166
+ # finally, replace grandchild with cmd
167
+ ::Kernel.exec(*Shellwords.shellwords(cmd))
168
+ }
169
+
170
+ # we do not use these ends of the pipes in the child
171
+ cmd_out_write.close
172
+ cmd_err_write.close
173
+
174
+ # wait for the cmd to finish executing and acknowledge it's death
175
+ ::Process.waitpid(pid)
176
+
177
+ # collect stdout, stderr and exitcode
178
+ result = {
179
+ :stdout => cmd_out_read.read,
180
+ :stderr => cmd_err_read.read,
181
+ :exit_code => $?.exitstatus
182
+ }
183
+
184
+ # We're done with these ends of the pipes as well
185
+ cmd_out_read.close
186
+ cmd_err_read.close
187
+
188
+ # Time to tell the parent about what went down
189
+ wr.write Marshal.dump(result)
190
+ wr.close
191
+
192
+ exit
193
+ end
194
+ end
195
+
196
+ def store
197
+ @store ||= Hash.new
198
+ end
199
+
200
+ def reset_data
201
+ store.clear unless store.empty?
202
+ end
203
+
204
+ def ps_axu
205
+ # TODO: need a mutex here
206
+ store[:ps_axu] ||= begin
207
+ # BSD style ps invocation
208
+ lines = `ps axo pid,ppid,pcpu,rss`.split("\n")
209
+
210
+ lines.inject(Hash.new) do |mem, line|
211
+ chunks = line.split(/\s+/)
212
+ chunks.delete_if {|c| c.strip.empty? }
213
+ pid = chunks[IDX_MAP[:pid]].strip.to_i
214
+ mem[pid] = chunks
215
+ mem
216
+ end
217
+ end
218
+ end
219
+
220
+ # be sure to call this from a fork otherwise it will modify the attributes
221
+ # of the bluepill daemon
222
+ def drop_privileges(uid, gid, supplementary_groups)
223
+ if ::Process::Sys.geteuid == 0
224
+ uid_num = Etc.getpwnam(uid).uid if uid
225
+ gid_num = Etc.getgrnam(gid).gid if gid
226
+
227
+ supplementary_groups ||= []
228
+
229
+ group_nums = supplementary_groups.map do |group|
230
+ Etc.getgrnam(group).gid
231
+ end
232
+
233
+ ::Process.groups = [gid_num] if gid
234
+ ::Process.groups |= group_nums unless group_nums.empty?
235
+ ::Process::Sys.setgid(gid_num) if gid
236
+ ::Process::Sys.setuid(uid_num) if uid
237
+ ENV['HOME'] = Etc.getpwuid(uid_num).try(:dir) || ENV['HOME'] if uid
238
+ end
239
+ end
240
+
241
+ def can_write_pid_file(pid_file, logger)
242
+ FileUtils.touch(pid_file)
243
+ File.unlink(pid_file)
244
+ return true
245
+
246
+ rescue Exception => e
247
+ logger.warning "%s - %s" % [e.class.name, e.message]
248
+ e.backtrace.each {|l| logger.warning l}
249
+ return false
250
+ end
251
+
252
+ def redirect_io(io_in, io_out, io_err)
253
+ $stdin.reopen(io_in) if io_in
254
+
255
+ if !io_out.nil? && !io_err.nil? && io_out == io_err
256
+ $stdout.reopen(io_out, APPEND_MODE)
257
+ $stderr.reopen($stdout)
258
+
259
+ else
260
+ $stdout.reopen(io_out, APPEND_MODE) if io_out
261
+ $stderr.reopen(io_err, APPEND_MODE) if io_err
262
+ end
263
+ end
264
+ end
265
+ end
@@ -0,0 +1,60 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ class Trigger
4
+ @implementations = {}
5
+ def self.inherited(klass)
6
+ @implementations[klass.name.split('::').last.underscore.to_sym] = klass
7
+ end
8
+
9
+ def self.[](name)
10
+ @implementations[name]
11
+ end
12
+
13
+ attr_accessor :process, :logger, :mutex, :scheduled_events
14
+
15
+ def initialize(process, options = {})
16
+ self.process = process
17
+ self.logger = options[:logger]
18
+ self.mutex = Mutex.new
19
+ self.scheduled_events = []
20
+ end
21
+
22
+ def reset!
23
+ self.cancel_all_events
24
+ end
25
+
26
+ def notify(transition)
27
+ raise "Implement in subclass"
28
+ end
29
+
30
+ def dispatch!(event)
31
+ self.process.dispatch!(event, self.class.name.split("::").last)
32
+ end
33
+
34
+ def schedule_event(event, delay)
35
+ # TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
36
+ thread = Thread.new(self) do |trigger|
37
+ begin
38
+ sleep delay.to_f
39
+ trigger.dispatch!(event)
40
+ trigger.mutex.synchronize do
41
+ trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
42
+ end
43
+ rescue StandardError => e
44
+ trigger.logger.err(e)
45
+ trigger.logger.err(e.backtrace.join("\n"))
46
+ end
47
+ end
48
+
49
+ self.scheduled_events.push([event, thread])
50
+ end
51
+
52
+ def cancel_all_events
53
+ self.logger.info "Canceling all scheduled events"
54
+ self.mutex.synchronize do
55
+ self.scheduled_events.each {|_, thread| thread.kill}
56
+ end
57
+ end
58
+
59
+ end
60
+ end
@@ -0,0 +1,56 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module Triggers
4
+ class Flapping < Bluepill::Trigger
5
+ TRIGGER_STATES = [:starting, :restarting]
6
+
7
+ PARAMS = [:times, :within, :retry_in]
8
+
9
+ attr_accessor *PARAMS
10
+ attr_reader :timeline
11
+
12
+ def initialize(process, options = {})
13
+ options.reverse_merge!(:times => 5, :within => 1, :retry_in => 5)
14
+
15
+ options.each_pair do |name, val|
16
+ instance_variable_set("@#{name}", val) if PARAMS.include?(name)
17
+ end
18
+
19
+ @timeline = Util::RotationalArray.new(@times)
20
+ super
21
+ end
22
+
23
+ def notify(transition)
24
+ if TRIGGER_STATES.include?(transition.to_name)
25
+ self.timeline << Time.now.to_i
26
+ self.check_flapping
27
+ end
28
+ end
29
+
30
+ def reset!
31
+ @timeline.clear
32
+ super
33
+ end
34
+
35
+ def check_flapping
36
+ # The process has not flapped if we haven't encountered enough incidents
37
+ return unless (@timeline.compact.length == self.times)
38
+
39
+ # Check if the incident happend within the timeframe
40
+ duration = (@timeline.last - @timeline.first) <= self.within
41
+
42
+ if duration
43
+ self.logger.info "Flapping detected: retrying in #{self.retry_in} seconds"
44
+
45
+ self.schedule_event(:start, self.retry_in) unless self.retry_in == 0 # retry_in zero means "do not retry, ever"
46
+ self.schedule_event(:unmonitor, 0)
47
+
48
+ @timeline.clear
49
+
50
+ # This will prevent a transition from happening in the process state_machine
51
+ throw :halt
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module Util
4
+ class RotationalArray < Array
5
+ def initialize(size)
6
+ @capacity = size
7
+
8
+ super() # no size - intentionally
9
+ end
10
+
11
+ def push(value)
12
+ super(value)
13
+
14
+ self.shift if self.length > @capacity
15
+ self
16
+ end
17
+ alias_method :<<, :push
18
+ end
19
+ end
20
+ end