cloud66-bluepill 0.0.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. data/.gitignore +10 -0
  2. data/.rspec +1 -0
  3. data/DESIGN.md +10 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +22 -0
  6. data/README.md +349 -0
  7. data/Rakefile +38 -0
  8. data/bin/bluepill +124 -0
  9. data/bin/bpsv +3 -0
  10. data/bin/sample_forking_server +53 -0
  11. data/bluepill.gemspec +37 -0
  12. data/examples/example.rb +87 -0
  13. data/examples/new_example.rb +89 -0
  14. data/examples/new_runit_example.rb +29 -0
  15. data/examples/runit_example.rb +26 -0
  16. data/lib/bluepill.rb +38 -0
  17. data/lib/bluepill/application.rb +215 -0
  18. data/lib/bluepill/application/client.rb +8 -0
  19. data/lib/bluepill/application/server.rb +23 -0
  20. data/lib/bluepill/condition_watch.rb +51 -0
  21. data/lib/bluepill/controller.rb +122 -0
  22. data/lib/bluepill/dsl.rb +12 -0
  23. data/lib/bluepill/dsl/app_proxy.rb +25 -0
  24. data/lib/bluepill/dsl/process_factory.rb +122 -0
  25. data/lib/bluepill/dsl/process_proxy.rb +44 -0
  26. data/lib/bluepill/group.rb +72 -0
  27. data/lib/bluepill/logger.rb +63 -0
  28. data/lib/bluepill/process.rb +514 -0
  29. data/lib/bluepill/process_conditions.rb +14 -0
  30. data/lib/bluepill/process_conditions/always_true.rb +18 -0
  31. data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
  32. data/lib/bluepill/process_conditions/file_time.rb +26 -0
  33. data/lib/bluepill/process_conditions/http.rb +58 -0
  34. data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
  35. data/lib/bluepill/process_conditions/process_condition.rb +22 -0
  36. data/lib/bluepill/process_journal.rb +219 -0
  37. data/lib/bluepill/process_statistics.rb +27 -0
  38. data/lib/bluepill/socket.rb +58 -0
  39. data/lib/bluepill/system.rb +265 -0
  40. data/lib/bluepill/trigger.rb +60 -0
  41. data/lib/bluepill/triggers/flapping.rb +56 -0
  42. data/lib/bluepill/util/rotational_array.rb +20 -0
  43. data/lib/bluepill/version.rb +4 -0
  44. data/local-bluepill +129 -0
  45. data/spec/lib/bluepill/logger_spec.rb +3 -0
  46. data/spec/lib/bluepill/process_spec.rb +96 -0
  47. data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
  48. data/spec/lib/bluepill/system_spec.rb +36 -0
  49. data/spec/spec_helper.rb +15 -0
  50. metadata +302 -0
@@ -0,0 +1,58 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'socket'
3
+
4
+ module Bluepill
5
+ module Socket
6
+ TIMEOUT = 60 # Used for client commands
7
+ MAX_ATTEMPTS = 5
8
+
9
+ extend self
10
+
11
+ def client(base_dir, name, &block)
12
+ UNIXSocket.open(socket_path(base_dir, name), &block)
13
+ end
14
+
15
+ def client_command(base_dir, name, command)
16
+ res = nil
17
+ MAX_ATTEMPTS.times do |current_attempt|
18
+ begin
19
+ client(base_dir, name) do |socket|
20
+ Timeout.timeout(TIMEOUT) do
21
+ socket.puts command
22
+ res = Marshal.load(socket.read)
23
+ end
24
+ end
25
+ break
26
+ rescue EOFError, Timeout::Error
27
+ if current_attempt == MAX_ATTEMPTS - 1
28
+ abort("Socket Timeout: Server may not be responding")
29
+ end
30
+ puts "Retry #{current_attempt + 1} of #{MAX_ATTEMPTS}"
31
+ end
32
+ end
33
+ res
34
+ end
35
+
36
+ def server(base_dir, name)
37
+ socket_path = self.socket_path(base_dir, name)
38
+ begin
39
+ UNIXServer.open(socket_path)
40
+ rescue Errno::EADDRINUSE
41
+ # if sock file has been created. test to see if there is a server
42
+ begin
43
+ UNIXSocket.open(socket_path)
44
+ rescue Errno::ECONNREFUSED
45
+ File.delete(socket_path)
46
+ return UNIXServer.open(socket_path)
47
+ else
48
+ logger.err("Server is already running!")
49
+ exit(7)
50
+ end
51
+ end
52
+ end
53
+
54
+ def socket_path(base_dir, name)
55
+ File.join(base_dir, 'socks', name + ".sock")
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,265 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'etc'
3
+ require "shellwords"
4
+
5
+ module Bluepill
6
+ # This class represents the system that bluepill is running on.. It's mainly used to memoize
7
+ # results of running ps auxx etc so that every watch in the every process will not result in a fork
8
+ module System
9
+ APPEND_MODE = "a"
10
+ extend self
11
+
12
+ # The position of each field in ps output
13
+ IDX_MAP = {
14
+ :pid => 0,
15
+ :ppid => 1,
16
+ :pcpu => 2,
17
+ :rss => 3
18
+ }
19
+
20
+ def pid_alive?(pid)
21
+ begin
22
+ ::Process.kill(0, pid)
23
+ true
24
+ rescue Errno::EPERM # no permission, but it is definitely alive
25
+ true
26
+ rescue Errno::ESRCH
27
+ false
28
+ end
29
+ end
30
+
31
+ def cpu_usage(pid, include_children)
32
+ ps = ps_axu
33
+ return unless ps[pid]
34
+ cpu_used = ps[pid][IDX_MAP[:pcpu]].to_f
35
+ get_children(pid).each { |child_pid|
36
+ cpu_used += ps[child_pid][IDX_MAP[:pcpu]].to_f if ps[child_pid]
37
+ } if include_children
38
+ cpu_used
39
+ end
40
+
41
+ def memory_usage(pid, include_children)
42
+ ps = ps_axu
43
+ return unless ps[pid]
44
+ mem_used = ps[pid][IDX_MAP[:rss]].to_f
45
+ get_children(pid).each { |child_pid|
46
+ mem_used += ps[child_pid][IDX_MAP[:rss]].to_f if ps[child_pid]
47
+ } if include_children
48
+ mem_used
49
+ end
50
+
51
+ def get_children(parent_pid)
52
+ child_pids = Array.new
53
+ ps_axu.each_pair do |pid, chunks|
54
+ child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
55
+ end
56
+ grand_children = child_pids.map{|pid| get_children(pid)}.flatten
57
+ child_pids.concat grand_children
58
+ end
59
+
60
+ # Returns the pid of the child that executes the cmd
61
+ def daemonize(cmd, options = {})
62
+ rd, wr = IO.pipe
63
+
64
+ if child = Daemonize.safefork
65
+ # we do not wanna create zombies, so detach ourselves from the child exit status
66
+ ::Process.detach(child)
67
+
68
+ # parent
69
+ wr.close
70
+
71
+ daemon_id = rd.read.to_i
72
+ rd.close
73
+
74
+ return daemon_id if daemon_id > 0
75
+
76
+ else
77
+ # child
78
+ rd.close
79
+
80
+ drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
81
+
82
+ # if we cannot write the pid file as the provided user, err out
83
+ exit unless can_write_pid_file(options[:pid_file], options[:logger])
84
+
85
+ to_daemonize = lambda do
86
+ # Setting end PWD env emulates bash behavior when dealing with symlinks
87
+ Dir.chdir(ENV["PWD"] = options[:working_dir].to_s) if options[:working_dir]
88
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
89
+
90
+ redirect_io(*options.values_at(:stdin, :stdout, :stderr))
91
+
92
+ ::Kernel.exec(*Shellwords.shellwords(cmd))
93
+ exit
94
+ end
95
+
96
+ daemon_id = Daemonize.call_as_daemon(to_daemonize, nil, cmd)
97
+
98
+ File.open(options[:pid_file], "w") {|f| f.write(daemon_id)}
99
+
100
+ wr.write daemon_id
101
+ wr.close
102
+
103
+ exit
104
+ end
105
+ end
106
+
107
+ def delete_if_exists(filename)
108
+ tries = 0
109
+
110
+ begin
111
+ File.unlink(filename) if filename && File.exists?(filename)
112
+ rescue IOError, Errno::ENOENT
113
+ rescue Errno::EACCES
114
+ retry if (tries += 1) < 3
115
+ $stderr.puts("Warning: permission denied trying to delete #{filename}")
116
+ end
117
+ end
118
+
119
+ # Returns the stdout, stderr and exit code of the cmd
120
+ def execute_blocking(cmd, options = {})
121
+ rd, wr = IO.pipe
122
+
123
+ if child = Daemonize.safefork
124
+ # parent
125
+ wr.close
126
+
127
+ cmd_status = rd.read
128
+ rd.close
129
+
130
+ ::Process.waitpid(child)
131
+
132
+ cmd_status.strip != '' ? Marshal.load(cmd_status) : {:exit_code => 0, :stdout => '', :stderr => ''}
133
+ else
134
+ # child
135
+ rd.close
136
+
137
+ # create a child in which we can override the stdin, stdout and stderr
138
+ cmd_out_read, cmd_out_write = IO.pipe
139
+ cmd_err_read, cmd_err_write = IO.pipe
140
+
141
+ pid = fork {
142
+ # grandchild
143
+ drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
144
+
145
+ Dir.chdir(ENV["PWD"] = options[:working_dir].to_s) if options[:working_dir]
146
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
147
+
148
+ # close unused fds so ancestors wont hang. This line is the only reason we are not
149
+ # using something like popen3. If this fd is not closed, the .read call on the parent
150
+ # will never return because "wr" would still be open in the "exec"-ed cmd
151
+ wr.close
152
+
153
+ # we do not care about stdin of cmd
154
+ STDIN.reopen("/dev/null")
155
+
156
+ # point stdout of cmd to somewhere we can read
157
+ cmd_out_read.close
158
+ STDOUT.reopen(cmd_out_write)
159
+ cmd_out_write.close
160
+
161
+ # same thing for stderr
162
+ cmd_err_read.close
163
+ STDERR.reopen(cmd_err_write)
164
+ cmd_err_write.close
165
+
166
+ # finally, replace grandchild with cmd
167
+ ::Kernel.exec(*Shellwords.shellwords(cmd))
168
+ }
169
+
170
+ # we do not use these ends of the pipes in the child
171
+ cmd_out_write.close
172
+ cmd_err_write.close
173
+
174
+ # wait for the cmd to finish executing and acknowledge it's death
175
+ ::Process.waitpid(pid)
176
+
177
+ # collect stdout, stderr and exitcode
178
+ result = {
179
+ :stdout => cmd_out_read.read,
180
+ :stderr => cmd_err_read.read,
181
+ :exit_code => $?.exitstatus
182
+ }
183
+
184
+ # We're done with these ends of the pipes as well
185
+ cmd_out_read.close
186
+ cmd_err_read.close
187
+
188
+ # Time to tell the parent about what went down
189
+ wr.write Marshal.dump(result)
190
+ wr.close
191
+
192
+ exit
193
+ end
194
+ end
195
+
196
+ def store
197
+ @store ||= Hash.new
198
+ end
199
+
200
+ def reset_data
201
+ store.clear unless store.empty?
202
+ end
203
+
204
+ def ps_axu
205
+ # TODO: need a mutex here
206
+ store[:ps_axu] ||= begin
207
+ # BSD style ps invocation
208
+ lines = `ps axo pid,ppid,pcpu,rss`.split("\n")
209
+
210
+ lines.inject(Hash.new) do |mem, line|
211
+ chunks = line.split(/\s+/)
212
+ chunks.delete_if {|c| c.strip.empty? }
213
+ pid = chunks[IDX_MAP[:pid]].strip.to_i
214
+ mem[pid] = chunks
215
+ mem
216
+ end
217
+ end
218
+ end
219
+
220
+ # be sure to call this from a fork otherwise it will modify the attributes
221
+ # of the bluepill daemon
222
+ def drop_privileges(uid, gid, supplementary_groups)
223
+ if ::Process::Sys.geteuid == 0
224
+ uid_num = Etc.getpwnam(uid).uid if uid
225
+ gid_num = Etc.getgrnam(gid).gid if gid
226
+
227
+ supplementary_groups ||= []
228
+
229
+ group_nums = supplementary_groups.map do |group|
230
+ Etc.getgrnam(group).gid
231
+ end
232
+
233
+ ::Process.groups = [gid_num] if gid
234
+ ::Process.groups |= group_nums unless group_nums.empty?
235
+ ::Process::Sys.setgid(gid_num) if gid
236
+ ::Process::Sys.setuid(uid_num) if uid
237
+ ENV['HOME'] = Etc.getpwuid(uid_num).try(:dir) || ENV['HOME'] if uid
238
+ end
239
+ end
240
+
241
+ def can_write_pid_file(pid_file, logger)
242
+ FileUtils.touch(pid_file)
243
+ File.unlink(pid_file)
244
+ return true
245
+
246
+ rescue Exception => e
247
+ logger.warning "%s - %s" % [e.class.name, e.message]
248
+ e.backtrace.each {|l| logger.warning l}
249
+ return false
250
+ end
251
+
252
+ def redirect_io(io_in, io_out, io_err)
253
+ $stdin.reopen(io_in) if io_in
254
+
255
+ if !io_out.nil? && !io_err.nil? && io_out == io_err
256
+ $stdout.reopen(io_out, APPEND_MODE)
257
+ $stderr.reopen($stdout)
258
+
259
+ else
260
+ $stdout.reopen(io_out, APPEND_MODE) if io_out
261
+ $stderr.reopen(io_err, APPEND_MODE) if io_err
262
+ end
263
+ end
264
+ end
265
+ end
@@ -0,0 +1,60 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ class Trigger
4
+ @implementations = {}
5
+ def self.inherited(klass)
6
+ @implementations[klass.name.split('::').last.underscore.to_sym] = klass
7
+ end
8
+
9
+ def self.[](name)
10
+ @implementations[name]
11
+ end
12
+
13
+ attr_accessor :process, :logger, :mutex, :scheduled_events
14
+
15
+ def initialize(process, options = {})
16
+ self.process = process
17
+ self.logger = options[:logger]
18
+ self.mutex = Mutex.new
19
+ self.scheduled_events = []
20
+ end
21
+
22
+ def reset!
23
+ self.cancel_all_events
24
+ end
25
+
26
+ def notify(transition)
27
+ raise "Implement in subclass"
28
+ end
29
+
30
+ def dispatch!(event)
31
+ self.process.dispatch!(event, self.class.name.split("::").last)
32
+ end
33
+
34
+ def schedule_event(event, delay)
35
+ # TODO: maybe wrap this in a ScheduledEvent class with methods like cancel
36
+ thread = Thread.new(self) do |trigger|
37
+ begin
38
+ sleep delay.to_f
39
+ trigger.dispatch!(event)
40
+ trigger.mutex.synchronize do
41
+ trigger.scheduled_events.delete_if { |_, thread| thread == Thread.current }
42
+ end
43
+ rescue StandardError => e
44
+ trigger.logger.err(e)
45
+ trigger.logger.err(e.backtrace.join("\n"))
46
+ end
47
+ end
48
+
49
+ self.scheduled_events.push([event, thread])
50
+ end
51
+
52
+ def cancel_all_events
53
+ self.logger.info "Canceling all scheduled events"
54
+ self.mutex.synchronize do
55
+ self.scheduled_events.each {|_, thread| thread.kill}
56
+ end
57
+ end
58
+
59
+ end
60
+ end
@@ -0,0 +1,56 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module Triggers
4
+ class Flapping < Bluepill::Trigger
5
+ TRIGGER_STATES = [:starting, :restarting]
6
+
7
+ PARAMS = [:times, :within, :retry_in]
8
+
9
+ attr_accessor *PARAMS
10
+ attr_reader :timeline
11
+
12
+ def initialize(process, options = {})
13
+ options.reverse_merge!(:times => 5, :within => 1, :retry_in => 5)
14
+
15
+ options.each_pair do |name, val|
16
+ instance_variable_set("@#{name}", val) if PARAMS.include?(name)
17
+ end
18
+
19
+ @timeline = Util::RotationalArray.new(@times)
20
+ super
21
+ end
22
+
23
+ def notify(transition)
24
+ if TRIGGER_STATES.include?(transition.to_name)
25
+ self.timeline << Time.now.to_i
26
+ self.check_flapping
27
+ end
28
+ end
29
+
30
+ def reset!
31
+ @timeline.clear
32
+ super
33
+ end
34
+
35
+ def check_flapping
36
+ # The process has not flapped if we haven't encountered enough incidents
37
+ return unless (@timeline.compact.length == self.times)
38
+
39
+ # Check if the incident happend within the timeframe
40
+ duration = (@timeline.last - @timeline.first) <= self.within
41
+
42
+ if duration
43
+ self.logger.info "Flapping detected: retrying in #{self.retry_in} seconds"
44
+
45
+ self.schedule_event(:start, self.retry_in) unless self.retry_in == 0 # retry_in zero means "do not retry, ever"
46
+ self.schedule_event(:unmonitor, 0)
47
+
48
+ @timeline.clear
49
+
50
+ # This will prevent a transition from happening in the process state_machine
51
+ throw :halt
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module Util
4
+ class RotationalArray < Array
5
+ def initialize(size)
6
+ @capacity = size
7
+
8
+ super() # no size - intentionally
9
+ end
10
+
11
+ def push(value)
12
+ super(value)
13
+
14
+ self.shift if self.length > @capacity
15
+ self
16
+ end
17
+ alias_method :<<, :push
18
+ end
19
+ end
20
+ end