god 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +23 -0
- data/Manifest.txt +9 -0
- data/README.txt +9 -2
- data/Rakefile +8 -1
- data/bin/god +11 -214
- data/examples/single.god +66 -0
- data/ext/god/netlink_handler.c +16 -3
- data/lib/god.rb +153 -17
- data/lib/god/cli/command.rb +189 -0
- data/lib/god/cli/run.rb +120 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +27 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +42 -11
- data/lib/god/conditions/http_response_code.rb +63 -3
- data/lib/god/conditions/memory_usage.rb +30 -1
- data/lib/god/conditions/process_exits.rb +24 -2
- data/lib/god/conditions/process_running.rb +32 -0
- data/lib/god/configurable.rb +5 -3
- data/lib/god/event_handler.rb +2 -2
- data/lib/god/hub.rb +12 -19
- data/lib/god/logger.rb +11 -2
- data/lib/god/process.rb +29 -20
- data/lib/god/socket.rb +41 -5
- data/lib/god/task.rb +6 -9
- data/lib/god/timer.rb +20 -13
- data/lib/god/watch.rb +3 -6
- data/test/configs/child_events/child_events.god +1 -1
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +15 -21
- data/test/test_god.rb +36 -0
- data/test/test_hub.rb +6 -4
- data/test/test_logger.rb +8 -0
- data/test/test_timer.rb +9 -0
- metadata +12 -2
@@ -1,9 +1,38 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :memory_usage
|
5
|
+
# Type: Poll
|
6
|
+
#
|
7
|
+
# Trigger when the resident memory of a process is above a specified limit.
|
8
|
+
#
|
9
|
+
# Paramaters
|
10
|
+
# Required
|
11
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
12
|
+
# populated for Watches.
|
13
|
+
# +above+ is the amount of resident memory (in kilobytes) above which
|
14
|
+
# the condition should trigger. You can also use the sugar
|
15
|
+
# methods #kilobytes, #megabytes, and #gigabytes to clarify
|
16
|
+
# this amount (see examples).
|
17
|
+
#
|
18
|
+
# Examples
|
19
|
+
#
|
20
|
+
# Trigger if the process is using more than 100 megabytes of resident
|
21
|
+
# memory (from a Watch):
|
22
|
+
#
|
23
|
+
# on.condition(:memory_usage) do |c|
|
24
|
+
# c.above = 100.megabytes
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# Non-Watch Tasks must specify a PID file:
|
28
|
+
#
|
29
|
+
# on.condition(:memory_usage) do |c|
|
30
|
+
# c.above = 100.megabytes
|
31
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
32
|
+
# end
|
4
33
|
class MemoryUsage < PollCondition
|
5
34
|
attr_accessor :above, :times
|
6
|
-
|
35
|
+
|
7
36
|
def initialize
|
8
37
|
super
|
9
38
|
self.above = nil
|
@@ -1,6 +1,27 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :process_exits
|
5
|
+
# Type: Event
|
6
|
+
#
|
7
|
+
# Trigger when a process exits.
|
8
|
+
#
|
9
|
+
# Paramaters
|
10
|
+
# Required
|
11
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
12
|
+
# populated for Watches.
|
13
|
+
#
|
14
|
+
# Examples
|
15
|
+
#
|
16
|
+
# Trigger if process exits (from a Watch):
|
17
|
+
#
|
18
|
+
# on.condition(:process_exits)
|
19
|
+
#
|
20
|
+
# Trigger if process exits:
|
21
|
+
#
|
22
|
+
# on.condition(:process_exits) do |c|
|
23
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
24
|
+
# end
|
4
25
|
class ProcessExits < EventCondition
|
5
26
|
def initialize
|
6
27
|
self.info = "process exited"
|
@@ -16,7 +37,8 @@ module God
|
|
16
37
|
pid = File.read(self.watch.pid_file).strip.to_i
|
17
38
|
|
18
39
|
begin
|
19
|
-
EventHandler.register(pid, :proc_exit) do
|
40
|
+
EventHandler.register(pid, :proc_exit) do |extra|
|
41
|
+
self.info = "process exited #{extra.inspect}"
|
20
42
|
Hub.trigger(self)
|
21
43
|
end
|
22
44
|
rescue StandardError
|
@@ -29,7 +51,7 @@ module God
|
|
29
51
|
pid = File.read(self.watch.pid_file).strip.to_i
|
30
52
|
EventHandler.deregister(pid, :proc_exit)
|
31
53
|
else
|
32
|
-
|
54
|
+
applog(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
|
33
55
|
end
|
34
56
|
end
|
35
57
|
end
|
@@ -1,6 +1,38 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :process_running
|
5
|
+
# Type: Poll
|
6
|
+
#
|
7
|
+
# Trigger when a process is running or not running depending on attributes.
|
8
|
+
#
|
9
|
+
# Paramaters
|
10
|
+
# Required
|
11
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
12
|
+
# populated for Watches.
|
13
|
+
# +running" specifies whether you want to trigger if the process is
|
14
|
+
# running (true) or whether it is not running (false)
|
15
|
+
#
|
16
|
+
# Examples
|
17
|
+
#
|
18
|
+
# Trigger if process IS NOT running (from a Watch):
|
19
|
+
#
|
20
|
+
# on.condition(:process_running) do |c|
|
21
|
+
# c.running = false
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# Trigger if process IS running (from a Watch):
|
25
|
+
#
|
26
|
+
# on.condition(:process_running) do |c|
|
27
|
+
# c.running = true
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# Non-Watch Tasks must specify a PID file:
|
31
|
+
#
|
32
|
+
# on.condition(:process_running) do |c|
|
33
|
+
# c.running = false
|
34
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
35
|
+
# end
|
4
36
|
class ProcessRunning < PollCondition
|
5
37
|
attr_accessor :running
|
6
38
|
|
data/lib/god/configurable.rb
CHANGED
@@ -39,10 +39,12 @@ module God
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def self.complain(text, c = nil)
|
42
|
-
|
42
|
+
watch = c.watch rescue nil
|
43
|
+
msg = ""
|
44
|
+
msg += "#{watch.name}: " if watch
|
45
|
+
msg += text
|
43
46
|
msg += " for #{c.friendly_name}" if c
|
44
|
-
|
45
|
-
puts msg
|
47
|
+
applog(watch, :error, msg)
|
46
48
|
false
|
47
49
|
end
|
48
50
|
|
data/lib/god/event_handler.rb
CHANGED
@@ -50,8 +50,8 @@ module God
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
def self.call(pid, event)
|
54
|
-
@@actions[pid][event].call if watching_pid?(pid) && @@actions[pid][event]
|
53
|
+
def self.call(pid, event, extra_data = {})
|
54
|
+
@@actions[pid][event].call(extra_data) if watching_pid?(pid) && @@actions[pid][event]
|
55
55
|
end
|
56
56
|
|
57
57
|
def self.watching_pid?(pid)
|
data/lib/god/hub.rb
CHANGED
@@ -83,8 +83,7 @@ module God
|
|
83
83
|
watch.move(dest)
|
84
84
|
rescue EventRegistrationFailedError
|
85
85
|
msg = watch.name + ' Event registration failed, moving back to previous state'
|
86
|
-
|
87
|
-
LOG.log(watch, :info, msg)
|
86
|
+
applog(watch, :info, msg)
|
88
87
|
|
89
88
|
dest = watch.state
|
90
89
|
retry
|
@@ -95,11 +94,10 @@ module God
|
|
95
94
|
end
|
96
95
|
end
|
97
96
|
end
|
98
|
-
rescue => e
|
97
|
+
rescue Exception => e
|
99
98
|
message = format("Unhandled exception (%s): %s\n%s",
|
100
99
|
e.class, e.message, e.backtrace.join("\n"))
|
101
|
-
|
102
|
-
abort message
|
100
|
+
applog(nil, :fatal, message)
|
103
101
|
end
|
104
102
|
end
|
105
103
|
end
|
@@ -136,11 +134,10 @@ module God
|
|
136
134
|
end
|
137
135
|
end
|
138
136
|
end
|
139
|
-
rescue => e
|
137
|
+
rescue Exception => e
|
140
138
|
message = format("Unhandled exception (%s): %s\n%s",
|
141
139
|
e.class, e.message, e.backtrace.join("\n"))
|
142
|
-
|
143
|
-
abort message
|
140
|
+
applog(nil, :fatal, message)
|
144
141
|
end
|
145
142
|
end
|
146
143
|
end
|
@@ -165,19 +162,16 @@ module God
|
|
165
162
|
if condition.info
|
166
163
|
Array(condition.info).each do |condition_info|
|
167
164
|
messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
|
168
|
-
|
169
|
-
LOG.log(watch, :info, messages.last % [])
|
165
|
+
applog(watch, :info, messages.last)
|
170
166
|
end
|
171
167
|
else
|
172
168
|
messages << "#{watch.name} #{status} (#{condition.base_name})"
|
173
|
-
|
174
|
-
LOG.log(watch, :info, messages.last % [])
|
169
|
+
applog(watch, :info, messages.last)
|
175
170
|
end
|
176
171
|
|
177
172
|
# log
|
178
173
|
debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
|
179
|
-
|
180
|
-
LOG.log(watch, :debug, debug_message)
|
174
|
+
applog(watch, :debug, debug_message)
|
181
175
|
|
182
176
|
messages
|
183
177
|
end
|
@@ -210,18 +204,17 @@ module God
|
|
210
204
|
# warn about unmatched contacts
|
211
205
|
unless unmatched.empty?
|
212
206
|
msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
|
213
|
-
|
207
|
+
applog(condition.watch, :warn, msg)
|
214
208
|
end
|
215
209
|
|
216
210
|
# notify each contact
|
217
211
|
resolved_contacts.each do |c|
|
218
212
|
host = `hostname`.chomp rescue 'none'
|
219
213
|
c.notify(message, Time.now, spec[:priority], spec[:category], host)
|
220
|
-
|
214
|
+
|
221
215
|
msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
|
222
|
-
|
223
|
-
|
224
|
-
LOG.log(condition.watch, :info, msg % [])
|
216
|
+
|
217
|
+
applog(condition.watch, :info, msg % [])
|
225
218
|
end
|
226
219
|
end
|
227
220
|
end
|
data/lib/god/logger.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Logger < ::Logger
|
4
|
+
SYSLOG_EQUIVALENTS = {:fatal => :crit,
|
5
|
+
:error => :err,
|
6
|
+
:warn => :debug,
|
7
|
+
:info => :debug,
|
8
|
+
:debug => :debug}
|
9
|
+
|
4
10
|
attr_accessor :logs
|
5
11
|
|
6
12
|
def initialize
|
@@ -32,7 +38,7 @@ module God
|
|
32
38
|
buf = StringIO.new
|
33
39
|
templog = ::Logger.new(buf)
|
34
40
|
templog.level = Logger::INFO
|
35
|
-
templog.send(level, text)
|
41
|
+
templog.send(level, text % [])
|
36
42
|
@mutex.synchronize do
|
37
43
|
@capture.puts(buf.string) if @capture
|
38
44
|
self.logs[watch.name] << [Time.now, buf.string] if watch
|
@@ -40,7 +46,10 @@ module God
|
|
40
46
|
templog.close
|
41
47
|
|
42
48
|
# send to regular logger
|
43
|
-
self.send(level, text)
|
49
|
+
self.send(level, text % [])
|
50
|
+
|
51
|
+
# send to syslog
|
52
|
+
Syslog.send(SYSLOG_EQUIVALENTS[level], text)
|
44
53
|
end
|
45
54
|
|
46
55
|
def watch_log_since(watch_name, since)
|
data/lib/god/process.rb
CHANGED
@@ -44,13 +44,13 @@ module God
|
|
44
44
|
# a start command must be specified
|
45
45
|
if self.start.nil?
|
46
46
|
valid = false
|
47
|
-
|
47
|
+
applog(self, :error, "No start command was specified")
|
48
48
|
end
|
49
49
|
|
50
50
|
# self-daemonizing processes must specify a stop command
|
51
51
|
if !@tracking_pid && self.stop.nil?
|
52
52
|
valid = false
|
53
|
-
|
53
|
+
applog(self, :error, "No stop command was specified")
|
54
54
|
end
|
55
55
|
|
56
56
|
# uid must exist if specified
|
@@ -59,7 +59,7 @@ module God
|
|
59
59
|
Etc.getpwnam(self.uid)
|
60
60
|
rescue ArgumentError
|
61
61
|
valid = false
|
62
|
-
|
62
|
+
applog(self, :error, "UID for '#{self.uid}' does not exist")
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
@@ -69,38 +69,38 @@ module God
|
|
69
69
|
Etc.getgrnam(self.gid)
|
70
70
|
rescue ArgumentError
|
71
71
|
valid = false
|
72
|
-
|
72
|
+
applog(self, :error, "GID for '#{self.gid}' does not exist")
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
76
|
# pid dir must exist if specified
|
77
77
|
if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
|
78
78
|
valid = false
|
79
|
-
|
79
|
+
applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
|
80
80
|
end
|
81
81
|
|
82
82
|
# pid dir must be writable if specified
|
83
|
-
if !@tracking_pid && !file_writable?(File.dirname(self.pid_file))
|
83
|
+
if !@tracking_pid && File.exist?(File.dirname(self.pid_file)) && !file_writable?(File.dirname(self.pid_file))
|
84
84
|
valid = false
|
85
|
-
|
85
|
+
applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
|
86
86
|
end
|
87
87
|
|
88
88
|
# log dir must exist
|
89
89
|
if !File.exist?(File.dirname(self.log))
|
90
90
|
valid = false
|
91
|
-
|
91
|
+
applog(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
|
92
92
|
end
|
93
93
|
|
94
94
|
# log file or dir must be writable
|
95
95
|
if File.exist?(self.log)
|
96
96
|
unless file_writable?(self.log)
|
97
97
|
valid = false
|
98
|
-
|
98
|
+
applog(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
|
99
99
|
end
|
100
100
|
else
|
101
101
|
unless file_writable?(File.dirname(self.log))
|
102
102
|
valid = false
|
103
|
-
|
103
|
+
applog(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
|
104
104
|
end
|
105
105
|
end
|
106
106
|
|
@@ -147,6 +147,9 @@ module God
|
|
147
147
|
STDOUT.reopen self.log, "a"
|
148
148
|
STDERR.reopen STDOUT
|
149
149
|
|
150
|
+
# close any other file descriptors
|
151
|
+
3.upto(256){|fd| IO::new(fd).close rescue nil}
|
152
|
+
|
150
153
|
exec command unless command.empty?
|
151
154
|
end
|
152
155
|
end
|
@@ -158,7 +161,7 @@ module God
|
|
158
161
|
pid = File.read(self.pid_file).strip.to_i
|
159
162
|
name = self.name
|
160
163
|
command = lambda do
|
161
|
-
|
164
|
+
applog(self, :info, "#{self.name} stop: default lambda killer")
|
162
165
|
|
163
166
|
::Process.kill('HUP', pid) rescue nil
|
164
167
|
|
@@ -185,16 +188,22 @@ module God
|
|
185
188
|
# double fork god-daemonized processes
|
186
189
|
# we don't want to wait for them to finish
|
187
190
|
r, w = IO.pipe
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
191
|
+
begin
|
192
|
+
opid = fork do
|
193
|
+
STDOUT.reopen(w)
|
194
|
+
r.close
|
195
|
+
pid = self.spawn(command)
|
196
|
+
puts pid.to_s # send pid back to forker
|
197
|
+
end
|
198
|
+
|
199
|
+
::Process.waitpid(opid, 0)
|
200
|
+
w.close
|
201
|
+
pid = r.gets.chomp
|
202
|
+
ensure
|
203
|
+
# make sure the file descriptors get closed no matter what
|
204
|
+
r.close rescue nil
|
205
|
+
w.close rescue nil
|
193
206
|
end
|
194
|
-
|
195
|
-
::Process.waitpid(opid, 0)
|
196
|
-
w.close
|
197
|
-
pid = r.gets.chomp
|
198
207
|
else
|
199
208
|
# single fork self-daemonizing processes
|
200
209
|
# we want to wait for them to finish
|
data/lib/god/socket.rb
CHANGED
@@ -1,57 +1,93 @@
|
|
1
1
|
require 'drb'
|
2
2
|
|
3
|
-
# The God::Server oversees the DRb server which dishes out info on this God daemon.
|
4
|
-
|
5
3
|
module God
|
6
4
|
|
5
|
+
# The God::Server oversees the DRb server which dishes out info on this God daemon.
|
7
6
|
class Socket
|
8
7
|
attr_reader :port
|
9
8
|
|
9
|
+
# The location of the socket for a given port
|
10
|
+
# +port+ is the port number
|
11
|
+
#
|
12
|
+
# Returns String (file location)
|
10
13
|
def self.socket_file(port)
|
11
14
|
"/tmp/god.#{port}.sock"
|
12
15
|
end
|
13
16
|
|
17
|
+
# The address of the socket for a given port
|
18
|
+
# +port+ is the port number
|
19
|
+
#
|
20
|
+
# Returns String (drb address)
|
14
21
|
def self.socket(port)
|
15
22
|
"drbunix://#{self.socket_file(port)}"
|
16
23
|
end
|
17
24
|
|
25
|
+
# The location of the socket for this Server
|
26
|
+
#
|
27
|
+
# Returns String (file location)
|
18
28
|
def socket_file
|
19
29
|
self.class.socket_file(@port)
|
20
30
|
end
|
21
31
|
|
32
|
+
# The address of the socket for this Server
|
33
|
+
#
|
34
|
+
# Returns String (drb address)
|
22
35
|
def socket
|
23
36
|
self.class.socket(@port)
|
24
37
|
end
|
25
38
|
|
39
|
+
# Create a new Server and star the DRb server
|
40
|
+
# +port+ is the port on which to start the DRb service (default nil)
|
26
41
|
def initialize(port = nil)
|
27
42
|
@port = port
|
28
43
|
start
|
29
44
|
end
|
30
45
|
|
46
|
+
# Returns true
|
31
47
|
def ping
|
32
48
|
true
|
33
49
|
end
|
34
50
|
|
51
|
+
# Forward API calls to God
|
52
|
+
#
|
53
|
+
# Returns whatever the forwarded call returns
|
35
54
|
def method_missing(*args, &block)
|
36
55
|
God.send(*args, &block)
|
37
56
|
end
|
38
57
|
|
58
|
+
# Stop the DRb server and delete the socket file
|
59
|
+
#
|
60
|
+
# Returns nothing
|
61
|
+
def stop
|
62
|
+
DRb.stop_service
|
63
|
+
FileUtils.rm_f(self.socket_file)
|
64
|
+
end
|
65
|
+
|
39
66
|
private
|
40
67
|
|
68
|
+
# Start the DRb server. Abort if there is already a running god instance
|
69
|
+
# on the socket.
|
70
|
+
#
|
71
|
+
# Returns nothing
|
41
72
|
def start
|
42
73
|
begin
|
43
74
|
@drb ||= DRb.start_service(self.socket, self)
|
44
|
-
|
75
|
+
applog(nil, :info, "Started on #{DRb.uri}")
|
45
76
|
rescue Errno::EADDRINUSE
|
77
|
+
applog(nil, :info, "Socket already in use")
|
46
78
|
DRb.start_service
|
47
79
|
server = DRbObject.new(nil, self.socket)
|
48
80
|
|
49
81
|
begin
|
50
|
-
|
82
|
+
Timeout.timeout(5) do
|
83
|
+
server.ping
|
84
|
+
end
|
51
85
|
abort "Socket #{self.socket} already in use by another instance of god"
|
52
|
-
rescue
|
86
|
+
rescue StandardError, Timeout::Error
|
87
|
+
applog(nil, :info, "Socket is stale, reopening")
|
53
88
|
File.delete(self.socket_file) rescue nil
|
54
89
|
@drb ||= DRb.start_service(self.socket, self)
|
90
|
+
applog(nil, :info, "Started on #{DRb.uri}")
|
55
91
|
end
|
56
92
|
end
|
57
93
|
end
|