god 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +23 -0
- data/Manifest.txt +9 -0
- data/README.txt +9 -2
- data/Rakefile +8 -1
- data/bin/god +11 -214
- data/examples/single.god +66 -0
- data/ext/god/netlink_handler.c +16 -3
- data/lib/god.rb +153 -17
- data/lib/god/cli/command.rb +189 -0
- data/lib/god/cli/run.rb +120 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +27 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +42 -11
- data/lib/god/conditions/http_response_code.rb +63 -3
- data/lib/god/conditions/memory_usage.rb +30 -1
- data/lib/god/conditions/process_exits.rb +24 -2
- data/lib/god/conditions/process_running.rb +32 -0
- data/lib/god/configurable.rb +5 -3
- data/lib/god/event_handler.rb +2 -2
- data/lib/god/hub.rb +12 -19
- data/lib/god/logger.rb +11 -2
- data/lib/god/process.rb +29 -20
- data/lib/god/socket.rb +41 -5
- data/lib/god/task.rb +6 -9
- data/lib/god/timer.rb +20 -13
- data/lib/god/watch.rb +3 -6
- data/test/configs/child_events/child_events.god +1 -1
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +15 -21
- data/test/test_god.rb +36 -0
- data/test/test_hub.rb +6 -4
- data/test/test_logger.rb +8 -0
- data/test/test_timer.rb +9 -0
- metadata +12 -2
@@ -1,9 +1,38 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :memory_usage
|
5
|
+
# Type: Poll
|
6
|
+
#
|
7
|
+
# Trigger when the resident memory of a process is above a specified limit.
|
8
|
+
#
|
9
|
+
# Paramaters
|
10
|
+
# Required
|
11
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
12
|
+
# populated for Watches.
|
13
|
+
# +above+ is the amount of resident memory (in kilobytes) above which
|
14
|
+
# the condition should trigger. You can also use the sugar
|
15
|
+
# methods #kilobytes, #megabytes, and #gigabytes to clarify
|
16
|
+
# this amount (see examples).
|
17
|
+
#
|
18
|
+
# Examples
|
19
|
+
#
|
20
|
+
# Trigger if the process is using more than 100 megabytes of resident
|
21
|
+
# memory (from a Watch):
|
22
|
+
#
|
23
|
+
# on.condition(:memory_usage) do |c|
|
24
|
+
# c.above = 100.megabytes
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# Non-Watch Tasks must specify a PID file:
|
28
|
+
#
|
29
|
+
# on.condition(:memory_usage) do |c|
|
30
|
+
# c.above = 100.megabytes
|
31
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
32
|
+
# end
|
4
33
|
class MemoryUsage < PollCondition
|
5
34
|
attr_accessor :above, :times
|
6
|
-
|
35
|
+
|
7
36
|
def initialize
|
8
37
|
super
|
9
38
|
self.above = nil
|
@@ -1,6 +1,27 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :process_exits
|
5
|
+
# Type: Event
|
6
|
+
#
|
7
|
+
# Trigger when a process exits.
|
8
|
+
#
|
9
|
+
# Paramaters
|
10
|
+
# Required
|
11
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
12
|
+
# populated for Watches.
|
13
|
+
#
|
14
|
+
# Examples
|
15
|
+
#
|
16
|
+
# Trigger if process exits (from a Watch):
|
17
|
+
#
|
18
|
+
# on.condition(:process_exits)
|
19
|
+
#
|
20
|
+
# Trigger if process exits:
|
21
|
+
#
|
22
|
+
# on.condition(:process_exits) do |c|
|
23
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
24
|
+
# end
|
4
25
|
class ProcessExits < EventCondition
|
5
26
|
def initialize
|
6
27
|
self.info = "process exited"
|
@@ -16,7 +37,8 @@ module God
|
|
16
37
|
pid = File.read(self.watch.pid_file).strip.to_i
|
17
38
|
|
18
39
|
begin
|
19
|
-
EventHandler.register(pid, :proc_exit) do
|
40
|
+
EventHandler.register(pid, :proc_exit) do |extra|
|
41
|
+
self.info = "process exited #{extra.inspect}"
|
20
42
|
Hub.trigger(self)
|
21
43
|
end
|
22
44
|
rescue StandardError
|
@@ -29,7 +51,7 @@ module God
|
|
29
51
|
pid = File.read(self.watch.pid_file).strip.to_i
|
30
52
|
EventHandler.deregister(pid, :proc_exit)
|
31
53
|
else
|
32
|
-
|
54
|
+
applog(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
|
33
55
|
end
|
34
56
|
end
|
35
57
|
end
|
@@ -1,6 +1,38 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :process_running
|
5
|
+
# Type: Poll
|
6
|
+
#
|
7
|
+
# Trigger when a process is running or not running depending on attributes.
|
8
|
+
#
|
9
|
+
# Paramaters
|
10
|
+
# Required
|
11
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
12
|
+
# populated for Watches.
|
13
|
+
# +running" specifies whether you want to trigger if the process is
|
14
|
+
# running (true) or whether it is not running (false)
|
15
|
+
#
|
16
|
+
# Examples
|
17
|
+
#
|
18
|
+
# Trigger if process IS NOT running (from a Watch):
|
19
|
+
#
|
20
|
+
# on.condition(:process_running) do |c|
|
21
|
+
# c.running = false
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# Trigger if process IS running (from a Watch):
|
25
|
+
#
|
26
|
+
# on.condition(:process_running) do |c|
|
27
|
+
# c.running = true
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# Non-Watch Tasks must specify a PID file:
|
31
|
+
#
|
32
|
+
# on.condition(:process_running) do |c|
|
33
|
+
# c.running = false
|
34
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
35
|
+
# end
|
4
36
|
class ProcessRunning < PollCondition
|
5
37
|
attr_accessor :running
|
6
38
|
|
data/lib/god/configurable.rb
CHANGED
@@ -39,10 +39,12 @@ module God
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def self.complain(text, c = nil)
|
42
|
-
|
42
|
+
watch = c.watch rescue nil
|
43
|
+
msg = ""
|
44
|
+
msg += "#{watch.name}: " if watch
|
45
|
+
msg += text
|
43
46
|
msg += " for #{c.friendly_name}" if c
|
44
|
-
|
45
|
-
puts msg
|
47
|
+
applog(watch, :error, msg)
|
46
48
|
false
|
47
49
|
end
|
48
50
|
|
data/lib/god/event_handler.rb
CHANGED
@@ -50,8 +50,8 @@ module God
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
def self.call(pid, event)
|
54
|
-
@@actions[pid][event].call if watching_pid?(pid) && @@actions[pid][event]
|
53
|
+
def self.call(pid, event, extra_data = {})
|
54
|
+
@@actions[pid][event].call(extra_data) if watching_pid?(pid) && @@actions[pid][event]
|
55
55
|
end
|
56
56
|
|
57
57
|
def self.watching_pid?(pid)
|
data/lib/god/hub.rb
CHANGED
@@ -83,8 +83,7 @@ module God
|
|
83
83
|
watch.move(dest)
|
84
84
|
rescue EventRegistrationFailedError
|
85
85
|
msg = watch.name + ' Event registration failed, moving back to previous state'
|
86
|
-
|
87
|
-
LOG.log(watch, :info, msg)
|
86
|
+
applog(watch, :info, msg)
|
88
87
|
|
89
88
|
dest = watch.state
|
90
89
|
retry
|
@@ -95,11 +94,10 @@ module God
|
|
95
94
|
end
|
96
95
|
end
|
97
96
|
end
|
98
|
-
rescue => e
|
97
|
+
rescue Exception => e
|
99
98
|
message = format("Unhandled exception (%s): %s\n%s",
|
100
99
|
e.class, e.message, e.backtrace.join("\n"))
|
101
|
-
|
102
|
-
abort message
|
100
|
+
applog(nil, :fatal, message)
|
103
101
|
end
|
104
102
|
end
|
105
103
|
end
|
@@ -136,11 +134,10 @@ module God
|
|
136
134
|
end
|
137
135
|
end
|
138
136
|
end
|
139
|
-
rescue => e
|
137
|
+
rescue Exception => e
|
140
138
|
message = format("Unhandled exception (%s): %s\n%s",
|
141
139
|
e.class, e.message, e.backtrace.join("\n"))
|
142
|
-
|
143
|
-
abort message
|
140
|
+
applog(nil, :fatal, message)
|
144
141
|
end
|
145
142
|
end
|
146
143
|
end
|
@@ -165,19 +162,16 @@ module God
|
|
165
162
|
if condition.info
|
166
163
|
Array(condition.info).each do |condition_info|
|
167
164
|
messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
|
168
|
-
|
169
|
-
LOG.log(watch, :info, messages.last % [])
|
165
|
+
applog(watch, :info, messages.last)
|
170
166
|
end
|
171
167
|
else
|
172
168
|
messages << "#{watch.name} #{status} (#{condition.base_name})"
|
173
|
-
|
174
|
-
LOG.log(watch, :info, messages.last % [])
|
169
|
+
applog(watch, :info, messages.last)
|
175
170
|
end
|
176
171
|
|
177
172
|
# log
|
178
173
|
debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
|
179
|
-
|
180
|
-
LOG.log(watch, :debug, debug_message)
|
174
|
+
applog(watch, :debug, debug_message)
|
181
175
|
|
182
176
|
messages
|
183
177
|
end
|
@@ -210,18 +204,17 @@ module God
|
|
210
204
|
# warn about unmatched contacts
|
211
205
|
unless unmatched.empty?
|
212
206
|
msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
|
213
|
-
|
207
|
+
applog(condition.watch, :warn, msg)
|
214
208
|
end
|
215
209
|
|
216
210
|
# notify each contact
|
217
211
|
resolved_contacts.each do |c|
|
218
212
|
host = `hostname`.chomp rescue 'none'
|
219
213
|
c.notify(message, Time.now, spec[:priority], spec[:category], host)
|
220
|
-
|
214
|
+
|
221
215
|
msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
|
222
|
-
|
223
|
-
|
224
|
-
LOG.log(condition.watch, :info, msg % [])
|
216
|
+
|
217
|
+
applog(condition.watch, :info, msg % [])
|
225
218
|
end
|
226
219
|
end
|
227
220
|
end
|
data/lib/god/logger.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Logger < ::Logger
|
4
|
+
SYSLOG_EQUIVALENTS = {:fatal => :crit,
|
5
|
+
:error => :err,
|
6
|
+
:warn => :debug,
|
7
|
+
:info => :debug,
|
8
|
+
:debug => :debug}
|
9
|
+
|
4
10
|
attr_accessor :logs
|
5
11
|
|
6
12
|
def initialize
|
@@ -32,7 +38,7 @@ module God
|
|
32
38
|
buf = StringIO.new
|
33
39
|
templog = ::Logger.new(buf)
|
34
40
|
templog.level = Logger::INFO
|
35
|
-
templog.send(level, text)
|
41
|
+
templog.send(level, text % [])
|
36
42
|
@mutex.synchronize do
|
37
43
|
@capture.puts(buf.string) if @capture
|
38
44
|
self.logs[watch.name] << [Time.now, buf.string] if watch
|
@@ -40,7 +46,10 @@ module God
|
|
40
46
|
templog.close
|
41
47
|
|
42
48
|
# send to regular logger
|
43
|
-
self.send(level, text)
|
49
|
+
self.send(level, text % [])
|
50
|
+
|
51
|
+
# send to syslog
|
52
|
+
Syslog.send(SYSLOG_EQUIVALENTS[level], text)
|
44
53
|
end
|
45
54
|
|
46
55
|
def watch_log_since(watch_name, since)
|
data/lib/god/process.rb
CHANGED
@@ -44,13 +44,13 @@ module God
|
|
44
44
|
# a start command must be specified
|
45
45
|
if self.start.nil?
|
46
46
|
valid = false
|
47
|
-
|
47
|
+
applog(self, :error, "No start command was specified")
|
48
48
|
end
|
49
49
|
|
50
50
|
# self-daemonizing processes must specify a stop command
|
51
51
|
if !@tracking_pid && self.stop.nil?
|
52
52
|
valid = false
|
53
|
-
|
53
|
+
applog(self, :error, "No stop command was specified")
|
54
54
|
end
|
55
55
|
|
56
56
|
# uid must exist if specified
|
@@ -59,7 +59,7 @@ module God
|
|
59
59
|
Etc.getpwnam(self.uid)
|
60
60
|
rescue ArgumentError
|
61
61
|
valid = false
|
62
|
-
|
62
|
+
applog(self, :error, "UID for '#{self.uid}' does not exist")
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
@@ -69,38 +69,38 @@ module God
|
|
69
69
|
Etc.getgrnam(self.gid)
|
70
70
|
rescue ArgumentError
|
71
71
|
valid = false
|
72
|
-
|
72
|
+
applog(self, :error, "GID for '#{self.gid}' does not exist")
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
76
|
# pid dir must exist if specified
|
77
77
|
if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
|
78
78
|
valid = false
|
79
|
-
|
79
|
+
applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
|
80
80
|
end
|
81
81
|
|
82
82
|
# pid dir must be writable if specified
|
83
|
-
if !@tracking_pid && !file_writable?(File.dirname(self.pid_file))
|
83
|
+
if !@tracking_pid && File.exist?(File.dirname(self.pid_file)) && !file_writable?(File.dirname(self.pid_file))
|
84
84
|
valid = false
|
85
|
-
|
85
|
+
applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
|
86
86
|
end
|
87
87
|
|
88
88
|
# log dir must exist
|
89
89
|
if !File.exist?(File.dirname(self.log))
|
90
90
|
valid = false
|
91
|
-
|
91
|
+
applog(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
|
92
92
|
end
|
93
93
|
|
94
94
|
# log file or dir must be writable
|
95
95
|
if File.exist?(self.log)
|
96
96
|
unless file_writable?(self.log)
|
97
97
|
valid = false
|
98
|
-
|
98
|
+
applog(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
|
99
99
|
end
|
100
100
|
else
|
101
101
|
unless file_writable?(File.dirname(self.log))
|
102
102
|
valid = false
|
103
|
-
|
103
|
+
applog(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
|
104
104
|
end
|
105
105
|
end
|
106
106
|
|
@@ -147,6 +147,9 @@ module God
|
|
147
147
|
STDOUT.reopen self.log, "a"
|
148
148
|
STDERR.reopen STDOUT
|
149
149
|
|
150
|
+
# close any other file descriptors
|
151
|
+
3.upto(256){|fd| IO::new(fd).close rescue nil}
|
152
|
+
|
150
153
|
exec command unless command.empty?
|
151
154
|
end
|
152
155
|
end
|
@@ -158,7 +161,7 @@ module God
|
|
158
161
|
pid = File.read(self.pid_file).strip.to_i
|
159
162
|
name = self.name
|
160
163
|
command = lambda do
|
161
|
-
|
164
|
+
applog(self, :info, "#{self.name} stop: default lambda killer")
|
162
165
|
|
163
166
|
::Process.kill('HUP', pid) rescue nil
|
164
167
|
|
@@ -185,16 +188,22 @@ module God
|
|
185
188
|
# double fork god-daemonized processes
|
186
189
|
# we don't want to wait for them to finish
|
187
190
|
r, w = IO.pipe
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
191
|
+
begin
|
192
|
+
opid = fork do
|
193
|
+
STDOUT.reopen(w)
|
194
|
+
r.close
|
195
|
+
pid = self.spawn(command)
|
196
|
+
puts pid.to_s # send pid back to forker
|
197
|
+
end
|
198
|
+
|
199
|
+
::Process.waitpid(opid, 0)
|
200
|
+
w.close
|
201
|
+
pid = r.gets.chomp
|
202
|
+
ensure
|
203
|
+
# make sure the file descriptors get closed no matter what
|
204
|
+
r.close rescue nil
|
205
|
+
w.close rescue nil
|
193
206
|
end
|
194
|
-
|
195
|
-
::Process.waitpid(opid, 0)
|
196
|
-
w.close
|
197
|
-
pid = r.gets.chomp
|
198
207
|
else
|
199
208
|
# single fork self-daemonizing processes
|
200
209
|
# we want to wait for them to finish
|
data/lib/god/socket.rb
CHANGED
@@ -1,57 +1,93 @@
|
|
1
1
|
require 'drb'
|
2
2
|
|
3
|
-
# The God::Server oversees the DRb server which dishes out info on this God daemon.
|
4
|
-
|
5
3
|
module God
|
6
4
|
|
5
|
+
# The God::Server oversees the DRb server which dishes out info on this God daemon.
|
7
6
|
class Socket
|
8
7
|
attr_reader :port
|
9
8
|
|
9
|
+
# The location of the socket for a given port
|
10
|
+
# +port+ is the port number
|
11
|
+
#
|
12
|
+
# Returns String (file location)
|
10
13
|
def self.socket_file(port)
|
11
14
|
"/tmp/god.#{port}.sock"
|
12
15
|
end
|
13
16
|
|
17
|
+
# The address of the socket for a given port
|
18
|
+
# +port+ is the port number
|
19
|
+
#
|
20
|
+
# Returns String (drb address)
|
14
21
|
def self.socket(port)
|
15
22
|
"drbunix://#{self.socket_file(port)}"
|
16
23
|
end
|
17
24
|
|
25
|
+
# The location of the socket for this Server
|
26
|
+
#
|
27
|
+
# Returns String (file location)
|
18
28
|
def socket_file
|
19
29
|
self.class.socket_file(@port)
|
20
30
|
end
|
21
31
|
|
32
|
+
# The address of the socket for this Server
|
33
|
+
#
|
34
|
+
# Returns String (drb address)
|
22
35
|
def socket
|
23
36
|
self.class.socket(@port)
|
24
37
|
end
|
25
38
|
|
39
|
+
# Create a new Server and star the DRb server
|
40
|
+
# +port+ is the port on which to start the DRb service (default nil)
|
26
41
|
def initialize(port = nil)
|
27
42
|
@port = port
|
28
43
|
start
|
29
44
|
end
|
30
45
|
|
46
|
+
# Returns true
|
31
47
|
def ping
|
32
48
|
true
|
33
49
|
end
|
34
50
|
|
51
|
+
# Forward API calls to God
|
52
|
+
#
|
53
|
+
# Returns whatever the forwarded call returns
|
35
54
|
def method_missing(*args, &block)
|
36
55
|
God.send(*args, &block)
|
37
56
|
end
|
38
57
|
|
58
|
+
# Stop the DRb server and delete the socket file
|
59
|
+
#
|
60
|
+
# Returns nothing
|
61
|
+
def stop
|
62
|
+
DRb.stop_service
|
63
|
+
FileUtils.rm_f(self.socket_file)
|
64
|
+
end
|
65
|
+
|
39
66
|
private
|
40
67
|
|
68
|
+
# Start the DRb server. Abort if there is already a running god instance
|
69
|
+
# on the socket.
|
70
|
+
#
|
71
|
+
# Returns nothing
|
41
72
|
def start
|
42
73
|
begin
|
43
74
|
@drb ||= DRb.start_service(self.socket, self)
|
44
|
-
|
75
|
+
applog(nil, :info, "Started on #{DRb.uri}")
|
45
76
|
rescue Errno::EADDRINUSE
|
77
|
+
applog(nil, :info, "Socket already in use")
|
46
78
|
DRb.start_service
|
47
79
|
server = DRbObject.new(nil, self.socket)
|
48
80
|
|
49
81
|
begin
|
50
|
-
|
82
|
+
Timeout.timeout(5) do
|
83
|
+
server.ping
|
84
|
+
end
|
51
85
|
abort "Socket #{self.socket} already in use by another instance of god"
|
52
|
-
rescue
|
86
|
+
rescue StandardError, Timeout::Error
|
87
|
+
applog(nil, :info, "Socket is stale, reopening")
|
53
88
|
File.delete(self.socket_file) rescue nil
|
54
89
|
@drb ||= DRb.start_service(self.socket, self)
|
90
|
+
applog(nil, :info, "Started on #{DRb.uri}")
|
55
91
|
end
|
56
92
|
end
|
57
93
|
end
|