kostya-bluepill 0.0.60.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +10 -0
  2. data/.rspec +1 -0
  3. data/DESIGN.md +10 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +22 -0
  6. data/README.md +305 -0
  7. data/Rakefile +38 -0
  8. data/bin/bluepill +104 -0
  9. data/bluepill.gemspec +37 -0
  10. data/examples/example.rb +87 -0
  11. data/examples/new_example.rb +89 -0
  12. data/examples/new_runit_example.rb +29 -0
  13. data/examples/runit_example.rb +26 -0
  14. data/lib/bluepill.rb +38 -0
  15. data/lib/bluepill/application.rb +201 -0
  16. data/lib/bluepill/application/client.rb +8 -0
  17. data/lib/bluepill/application/server.rb +23 -0
  18. data/lib/bluepill/condition_watch.rb +50 -0
  19. data/lib/bluepill/controller.rb +110 -0
  20. data/lib/bluepill/dsl.rb +12 -0
  21. data/lib/bluepill/dsl/app_proxy.rb +25 -0
  22. data/lib/bluepill/dsl/process_factory.rb +122 -0
  23. data/lib/bluepill/dsl/process_proxy.rb +44 -0
  24. data/lib/bluepill/group.rb +72 -0
  25. data/lib/bluepill/process.rb +480 -0
  26. data/lib/bluepill/process_conditions.rb +14 -0
  27. data/lib/bluepill/process_conditions/always_true.rb +18 -0
  28. data/lib/bluepill/process_conditions/cpu_usage.rb +19 -0
  29. data/lib/bluepill/process_conditions/http.rb +58 -0
  30. data/lib/bluepill/process_conditions/mem_usage.rb +32 -0
  31. data/lib/bluepill/process_conditions/process_condition.rb +22 -0
  32. data/lib/bluepill/process_statistics.rb +27 -0
  33. data/lib/bluepill/socket.rb +58 -0
  34. data/lib/bluepill/system.rb +236 -0
  35. data/lib/bluepill/trigger.rb +59 -0
  36. data/lib/bluepill/triggers/flapping.rb +56 -0
  37. data/lib/bluepill/util/rotational_array.rb +20 -0
  38. data/lib/bluepill/version.rb +4 -0
  39. data/spec/lib/bluepill/process_statistics_spec.rb +24 -0
  40. data/spec/lib/bluepill/system_spec.rb +36 -0
  41. data/spec/spec_helper.rb +19 -0
  42. metadata +304 -0
@@ -0,0 +1,14 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module ProcessConditions
4
+ def self.[](name)
5
+ const_get(name.to_s.camelcase)
6
+ end
7
+ end
8
+ end
9
+
10
+ require "bluepill/process_conditions/process_condition"
11
+ Dir["#{File.dirname(__FILE__)}/process_conditions/*.rb"].each do |pc|
12
+ require pc
13
+ end
14
+
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module ProcessConditions
4
+ class AlwaysTrue < ProcessCondition
5
+ def initialize(options = {})
6
+ @below = options[:below]
7
+ end
8
+
9
+ def run(pid)
10
+ 1
11
+ end
12
+
13
+ def check(value)
14
+ true
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module ProcessConditions
4
+ class CpuUsage < ProcessCondition
5
+ def initialize(options = {})
6
+ @below = options[:below]
7
+ end
8
+
9
+ def run(pid)
10
+ # third col in the ps axu output
11
+ System.cpu_usage(pid).to_f
12
+ end
13
+
14
+ def check(value)
15
+ value < @below
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,58 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'net/http'
3
+ require 'uri'
4
+
5
+ module Bluepill
6
+ module ProcessConditions
7
+ class Http < ProcessCondition
8
+ def initialize(options = {})
9
+ @uri = URI.parse(options[:url])
10
+ @kind = case options[:kind]
11
+ when Fixnum then Net::HTTPResponse::CODE_TO_OBJ[options[:kind].to_s]
12
+ when String, Symbol then Net.const_get("HTTP#{options[:kind].to_s.camelize}")
13
+ else
14
+ Net::HTTPSuccess
15
+ end
16
+ @pattern = options[:pattern] || nil
17
+ @open_timeout = (options[:open_timeout] || options[:timeout] || 5).to_i
18
+ @read_timeout = (options[:read_timeout] || options[:timeout] || 5).to_i
19
+ end
20
+
21
+ def run(pid)
22
+ session = Net::HTTP.new(@uri.host, @uri.port)
23
+ if @uri.scheme == 'https'
24
+ require 'net/https'
25
+ session.use_ssl=true
26
+ session.verify_mode = OpenSSL::SSL::VERIFY_NONE
27
+ end
28
+ session.open_timeout = @open_timeout
29
+ session.read_timeout = @read_timeout
30
+ hide_net_http_bug do
31
+ session.start do |http|
32
+ http.get(@uri.path)
33
+ end
34
+ end
35
+ rescue
36
+ $!
37
+ end
38
+
39
+ def check(value)
40
+ return false unless value.kind_of?(@kind)
41
+ return true unless @pattern
42
+ return false unless value.class.body_permitted?
43
+ @pattern === value.body
44
+ end
45
+
46
+ private
47
+ def hide_net_http_bug
48
+ yield
49
+ rescue NoMethodError => e
50
+ if e.to_s =~ /#{Regexp.escape(%q|undefined method `closed?' for nil:NilClass|)}/
51
+ raise Errno::ECONNREFUSED, "Connection refused attempting to contact #{@uri.scheme}://#{@uri.host}:#{@uri.port}"
52
+ else
53
+ raise
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module ProcessConditions
4
+ class MemUsage < ProcessCondition
5
+ MB = 1024 ** 2
6
+ FORMAT_STR = "%d%s"
7
+ MB_LABEL = "MB"
8
+ KB_LABEL = "KB"
9
+
10
+ def initialize(options = {})
11
+ @below = options[:below]
12
+ end
13
+
14
+ def run(pid)
15
+ # rss is on the 5th col
16
+ System.memory_usage(pid).to_f
17
+ end
18
+
19
+ def check(value)
20
+ value.kilobytes < @below
21
+ end
22
+
23
+ def format_value(value)
24
+ if value.kilobytes >= MB
25
+ FORMAT_STR % [(value / 1024).round, MB_LABEL]
26
+ else
27
+ FORMAT_STR % [value, KB_LABEL]
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ module ProcessConditions
4
+ class ProcessCondition
5
+ def initialize(options = {})
6
+ @options = options
7
+ end
8
+
9
+ def run(pid)
10
+ raise "Implement in subclass!"
11
+ end
12
+
13
+ def check(value)
14
+ raise "Implement in subclass!"
15
+ end
16
+
17
+ def format_value(value)
18
+ value
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Bluepill
3
+ class ProcessStatistics
4
+ STRFTIME = "%m/%d/%Y %H:%I:%S".freeze
5
+ EVENTS_TO_PERSIST = 10
6
+
7
+ attr_reader :events
8
+
9
+ # possibly persist this data.
10
+ def initialize
11
+ @events = Util::RotationalArray.new(EVENTS_TO_PERSIST)
12
+ end
13
+
14
+ def record_event(event, reason)
15
+ events.push([event, reason, Time.now])
16
+ end
17
+
18
+ def to_s
19
+ str = events.reverse.map do |(event, reason, time)|
20
+ " #{event} at #{time.strftime(STRFTIME)} - #{reason || "unspecified"}"
21
+ end.join("\n")
22
+
23
+ "event history:\n#{str}"
24
+ end
25
+ end
26
+ end
27
+
@@ -0,0 +1,58 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'socket'
3
+
4
+ module Bluepill
5
+ module Socket
6
+ TIMEOUT = 60 # Used for client commands
7
+ MAX_ATTEMPTS = 5
8
+
9
+ extend self
10
+
11
+ def client(base_dir, name, &block)
12
+ UNIXSocket.open(socket_path(base_dir, name), &block)
13
+ end
14
+
15
+ def client_command(base_dir, name, command)
16
+ res = nil
17
+ MAX_ATTEMPTS.times do |current_attempt|
18
+ begin
19
+ client(base_dir, name) do |socket|
20
+ Timeout.timeout(TIMEOUT) do
21
+ socket.puts command
22
+ res = Marshal.load(socket.read)
23
+ end
24
+ end
25
+ break
26
+ rescue EOFError, Timeout::Error
27
+ if current_attempt == MAX_ATTEMPTS - 1
28
+ abort("Socket Timeout: Server may not be responding")
29
+ end
30
+ puts "Retry #{current_attempt + 1} of #{MAX_ATTEMPTS}"
31
+ end
32
+ end
33
+ res
34
+ end
35
+
36
+ def server(base_dir, name)
37
+ socket_path = self.socket_path(base_dir, name)
38
+ begin
39
+ UNIXServer.open(socket_path)
40
+ rescue Errno::EADDRINUSE
41
+ # if sock file has been created. test to see if there is a server
42
+ begin
43
+ UNIXSocket.open(socket_path)
44
+ rescue Errno::ECONNREFUSED
45
+ File.delete(socket_path)
46
+ return UNIXServer.open(socket_path)
47
+ else
48
+ logger.error("Server is already running!")
49
+ exit(7)
50
+ end
51
+ end
52
+ end
53
+
54
+ def socket_path(base_dir, name)
55
+ File.join(base_dir, 'socks', name + ".sock")
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,236 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'etc'
3
+ require "shellwords"
4
+
5
+ module Bluepill
6
+ # This class represents the system that bluepill is running on.. It's mainly used to memoize
7
+ # results of running ps auxx etc so that every watch in the every process will not result in a fork
8
+ module System
9
+ APPEND_MODE = "a"
10
+ extend self
11
+
12
+ # The position of each field in ps output
13
+ IDX_MAP = {
14
+ :pid => 0,
15
+ :ppid => 1,
16
+ :pcpu => 2,
17
+ :rss => 3
18
+ }
19
+
20
+ def pid_alive?(pid)
21
+ begin
22
+ ::Process.kill(0, pid)
23
+ true
24
+ rescue Errno::ESRCH
25
+ false
26
+ end
27
+ end
28
+
29
+ def cpu_usage(pid)
30
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:pcpu]].to_f
31
+ end
32
+
33
+ def memory_usage(pid)
34
+ ps_axu[pid] && ps_axu[pid][IDX_MAP[:rss]].to_f
35
+ end
36
+
37
+ def get_children(parent_pid)
38
+ child_pids = Array.new
39
+ ps_axu.each_pair do |pid, chunks|
40
+ child_pids << chunks[IDX_MAP[:pid]].to_i if chunks[IDX_MAP[:ppid]].to_i == parent_pid.to_i
41
+ end
42
+ child_pids
43
+ end
44
+
45
+ # Returns the pid of the child that executes the cmd
46
+ def daemonize(cmd, options = {})
47
+ rd, wr = IO.pipe
48
+
49
+ if child = Daemonize.safefork
50
+ # we do not wanna create zombies, so detach ourselves from the child exit status
51
+ ::Process.detach(child)
52
+
53
+ # parent
54
+ wr.close
55
+
56
+ daemon_id = rd.read.to_i
57
+ rd.close
58
+
59
+ return daemon_id if daemon_id > 0
60
+
61
+ else
62
+ # child
63
+ rd.close
64
+
65
+ drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
66
+
67
+ # if we cannot write the pid file as the provided user, err out
68
+ exit unless can_write_pid_file(options[:pid_file], options[:logger])
69
+
70
+ to_daemonize = lambda do
71
+ # Setting end PWD env emulates bash behavior when dealing with symlinks
72
+ Dir.chdir(ENV["PWD"] = options[:working_dir]) if options[:working_dir]
73
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
74
+
75
+ redirect_io(*options.values_at(:stdin, :stdout, :stderr))
76
+
77
+ ::Kernel.exec(*Shellwords.shellwords(cmd))
78
+ exit
79
+ end
80
+
81
+ daemon_id = Daemonize.call_as_daemon(to_daemonize, nil, cmd)
82
+
83
+ File.open(options[:pid_file], "w") {|f| f.write(daemon_id)}
84
+
85
+ wr.write daemon_id
86
+ wr.close
87
+
88
+ exit
89
+ end
90
+ end
91
+
92
+ # Returns the stdout, stderr and exit code of the cmd
93
+ def execute_blocking(cmd, options = {})
94
+ rd, wr = IO.pipe
95
+
96
+ if child = Daemonize.safefork
97
+ # parent
98
+ wr.close
99
+
100
+ cmd_status = rd.read
101
+ rd.close
102
+
103
+ ::Process.waitpid(child)
104
+
105
+ cmd_status.strip != '' ? Marshal.load(cmd_status) : {:exit_code => 0, :stdout => '', :stderr => ''}
106
+ else
107
+ # child
108
+ rd.close
109
+
110
+ # create a child in which we can override the stdin, stdout and stderr
111
+ cmd_out_read, cmd_out_write = IO.pipe
112
+ cmd_err_read, cmd_err_write = IO.pipe
113
+
114
+ pid = fork {
115
+ # grandchild
116
+ drop_privileges(options[:uid], options[:gid], options[:supplementary_groups])
117
+
118
+ Dir.chdir(ENV["PWD"] = options[:working_dir]) if options[:working_dir]
119
+ options[:environment].each { |key, value| ENV[key.to_s] = value.to_s } if options[:environment]
120
+
121
+ # close unused fds so ancestors wont hang. This line is the only reason we are not
122
+ # using something like popen3. If this fd is not closed, the .read call on the parent
123
+ # will never return because "wr" would still be open in the "exec"-ed cmd
124
+ wr.close
125
+
126
+ # we do not care about stdin of cmd
127
+ STDIN.reopen("/dev/null")
128
+
129
+ # point stdout of cmd to somewhere we can read
130
+ cmd_out_read.close
131
+ STDOUT.reopen(cmd_out_write)
132
+ cmd_out_write.close
133
+
134
+ # same thing for stderr
135
+ cmd_err_read.close
136
+ STDERR.reopen(cmd_err_write)
137
+ cmd_err_write.close
138
+
139
+ # finally, replace grandchild with cmd
140
+ ::Kernel.exec(*Shellwords.shellwords(cmd))
141
+ }
142
+
143
+ # we do not use these ends of the pipes in the child
144
+ cmd_out_write.close
145
+ cmd_err_write.close
146
+
147
+ # wait for the cmd to finish executing and acknowledge it's death
148
+ ::Process.waitpid(pid)
149
+
150
+ # collect stdout, stderr and exitcode
151
+ result = {
152
+ :stdout => cmd_out_read.read,
153
+ :stderr => cmd_err_read.read,
154
+ :exit_code => $?.exitstatus
155
+ }
156
+
157
+ # We're done with these ends of the pipes as well
158
+ cmd_out_read.close
159
+ cmd_err_read.close
160
+
161
+ # Time to tell the parent about what went down
162
+ wr.write Marshal.dump(result)
163
+ wr.close
164
+
165
+ exit
166
+ end
167
+ end
168
+
169
+ def store
170
+ @store ||= Hash.new
171
+ end
172
+
173
+ def reset_data
174
+ store.clear unless store.empty?
175
+ end
176
+
177
+ def ps_axu
178
+ # TODO: need a mutex here
179
+ store[:ps_axu] ||= begin
180
+ # BSD style ps invocation
181
+ lines = `ps axo pid,ppid,pcpu,rss`.split("\n")
182
+
183
+ lines.inject(Hash.new) do |mem, line|
184
+ chunks = line.split(/\s+/)
185
+ chunks.delete_if {|c| c.strip.empty? }
186
+ pid = chunks[IDX_MAP[:pid]].strip.to_i
187
+ mem[pid] = chunks
188
+ mem
189
+ end
190
+ end
191
+ end
192
+
193
+ # be sure to call this from a fork otherwise it will modify the attributes
194
+ # of the bluepill daemon
195
+ def drop_privileges(uid, gid, supplementary_groups)
196
+ if ::Process::Sys.geteuid == 0
197
+ uid_num = Etc.getpwnam(uid).uid if uid
198
+ gid_num = Etc.getgrnam(gid).gid if gid
199
+
200
+ supplementary_groups ||= []
201
+
202
+ group_nums = supplementary_groups.map do |group|
203
+ Etc.getgrnam(group).gid
204
+ end
205
+
206
+ ::Process.groups = [gid_num] if gid
207
+ ::Process.groups |= group_nums unless group_nums.empty?
208
+ ::Process::Sys.setgid(gid_num) if gid
209
+ ::Process::Sys.setuid(uid_num) if uid
210
+ ENV['HOME'] = Etc.getpwuid(uid_num).try(:dir) || ENV['HOME'] if uid
211
+ end
212
+ end
213
+
214
+ def can_write_pid_file(pid_file, logger)
215
+ FileUtils.touch(pid_file)
216
+ File.unlink(pid_file)
217
+ return true
218
+ rescue Exception => e
219
+ log.exception(e)
220
+ return false
221
+ end
222
+
223
+ def redirect_io(io_in, io_out, io_err)
224
+ $stdin.reopen(io_in) if io_in
225
+
226
+ if !io_out.nil? && !io_err.nil? && io_out == io_err
227
+ $stdout.reopen(io_out, APPEND_MODE)
228
+ $stderr.reopen($stdout)
229
+
230
+ else
231
+ $stdout.reopen(io_out, APPEND_MODE) if io_out
232
+ $stderr.reopen(io_err, APPEND_MODE) if io_err
233
+ end
234
+ end
235
+ end
236
+ end