god 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +23 -0
- data/Manifest.txt +9 -0
- data/README.txt +9 -2
- data/Rakefile +8 -1
- data/bin/god +11 -214
- data/examples/single.god +66 -0
- data/ext/god/netlink_handler.c +16 -3
- data/lib/god.rb +153 -17
- data/lib/god/cli/command.rb +189 -0
- data/lib/god/cli/run.rb +120 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +27 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +42 -11
- data/lib/god/conditions/http_response_code.rb +63 -3
- data/lib/god/conditions/memory_usage.rb +30 -1
- data/lib/god/conditions/process_exits.rb +24 -2
- data/lib/god/conditions/process_running.rb +32 -0
- data/lib/god/configurable.rb +5 -3
- data/lib/god/event_handler.rb +2 -2
- data/lib/god/hub.rb +12 -19
- data/lib/god/logger.rb +11 -2
- data/lib/god/process.rb +29 -20
- data/lib/god/socket.rb +41 -5
- data/lib/god/task.rb +6 -9
- data/lib/god/timer.rb +20 -13
- data/lib/god/watch.rb +3 -6
- data/test/configs/child_events/child_events.god +1 -1
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +15 -21
- data/test/test_god.rb +36 -0
- data/test/test_hub.rb +6 -4
- data/test/test_logger.rb +8 -0
- data/test/test_timer.rb +9 -0
- metadata +12 -2
data/lib/god/cli/run.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
module God
|
2
|
+
module CLI
|
3
|
+
|
4
|
+
class Run
|
5
|
+
def initialize(options)
|
6
|
+
@options = options
|
7
|
+
|
8
|
+
dispatch
|
9
|
+
end
|
10
|
+
|
11
|
+
def dispatch
|
12
|
+
# have at_exit start god
|
13
|
+
$run = true
|
14
|
+
|
15
|
+
if @options[:daemonize]
|
16
|
+
run_daemonized
|
17
|
+
else
|
18
|
+
run_in_front
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def run_daemonized
|
23
|
+
# trap and ignore SIGHUP
|
24
|
+
Signal.trap('HUP') {}
|
25
|
+
|
26
|
+
pid = fork do
|
27
|
+
begin
|
28
|
+
require 'god'
|
29
|
+
|
30
|
+
log_file = @options[:log] || "/dev/null"
|
31
|
+
|
32
|
+
unless God::EventHandler.loaded?
|
33
|
+
puts
|
34
|
+
puts "***********************************************************************"
|
35
|
+
puts "*"
|
36
|
+
puts "* Event conditions are not available for your installation of god."
|
37
|
+
puts "* You may still use and write custom conditions using the poll system"
|
38
|
+
puts "*"
|
39
|
+
puts "***********************************************************************"
|
40
|
+
puts
|
41
|
+
end
|
42
|
+
|
43
|
+
# set port if requested
|
44
|
+
if @options[:port]
|
45
|
+
God.port = @options[:port]
|
46
|
+
end
|
47
|
+
|
48
|
+
# set pid if requested
|
49
|
+
if @options[:pid]
|
50
|
+
God.pid = @options[:pid]
|
51
|
+
end
|
52
|
+
|
53
|
+
# load config
|
54
|
+
if @options[:config]
|
55
|
+
unless File.exist?(@options[:config])
|
56
|
+
abort "File not found: #{@options[:config]}"
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
load File.expand_path(@options[:config])
|
61
|
+
rescue Exception => e
|
62
|
+
if e.instance_of?(SystemExit)
|
63
|
+
raise
|
64
|
+
else
|
65
|
+
puts e.message
|
66
|
+
puts e.backtrace.join("\n")
|
67
|
+
abort "There was an error in your configuration file (see above)"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# reset file descriptors
|
73
|
+
STDIN.reopen "/dev/null"
|
74
|
+
STDOUT.reopen(log_file, "a")
|
75
|
+
STDERR.reopen STDOUT
|
76
|
+
rescue => e
|
77
|
+
puts e.message
|
78
|
+
puts e.backtrace.join("\n")
|
79
|
+
abort "There was a fatal system error while starting god (see above)"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
if @options[:pid]
|
84
|
+
File.open(@options[:pid], 'w') { |f| f.write pid }
|
85
|
+
end
|
86
|
+
|
87
|
+
::Process.detach pid
|
88
|
+
|
89
|
+
exit
|
90
|
+
end
|
91
|
+
|
92
|
+
def run_in_front
|
93
|
+
require 'god'
|
94
|
+
|
95
|
+
if @options[:port]
|
96
|
+
God.port = @options[:port]
|
97
|
+
end
|
98
|
+
|
99
|
+
if @options[:config]
|
100
|
+
unless File.exist?(@options[:config])
|
101
|
+
abort "File not found: #{@options[:config]}"
|
102
|
+
end
|
103
|
+
|
104
|
+
begin
|
105
|
+
load File.expand_path(@options[:config])
|
106
|
+
rescue Exception => e
|
107
|
+
if e.instance_of?(SystemExit)
|
108
|
+
raise
|
109
|
+
else
|
110
|
+
puts e.message
|
111
|
+
puts e.backtrace.join("\n")
|
112
|
+
abort "There was an error in your configuration file (see above)"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end # Run
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module God
|
2
|
+
module CLI
|
3
|
+
|
4
|
+
class Version
|
5
|
+
def self.version
|
6
|
+
require 'god'
|
7
|
+
|
8
|
+
# print version
|
9
|
+
puts "Version #{God::VERSION}"
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.version_extended
|
14
|
+
puts "Version: #{God::VERSION}"
|
15
|
+
puts "Polls: enabled"
|
16
|
+
puts "Events: " + God::EventHandler.event_system
|
17
|
+
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class Complex < PollCondition
|
5
|
+
AND = 0x1
|
6
|
+
OR = 0x2
|
7
|
+
NOT = 0x4
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
super
|
11
|
+
|
12
|
+
@oper_stack = []
|
13
|
+
@op_stack = []
|
14
|
+
|
15
|
+
@this = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def valid?
|
19
|
+
@oper_stack.inject(true) { |acc, oper| acc & oper.valid? }
|
20
|
+
end
|
21
|
+
|
22
|
+
def prepare
|
23
|
+
@oper_stack.each { |oper| oper.prepare }
|
24
|
+
end
|
25
|
+
|
26
|
+
def new_oper(kind, op)
|
27
|
+
oper = Condition.generate(kind, self.watch)
|
28
|
+
@oper_stack.push(oper)
|
29
|
+
@op_stack.push(op)
|
30
|
+
oper
|
31
|
+
end
|
32
|
+
|
33
|
+
def this(kind)
|
34
|
+
@this = Condition.generate(kind, self.watch)
|
35
|
+
yield @this if block_given?
|
36
|
+
end
|
37
|
+
|
38
|
+
def and(kind)
|
39
|
+
oper = new_oper(kind, 0x1)
|
40
|
+
yield oper if block_given?
|
41
|
+
end
|
42
|
+
|
43
|
+
def and_not(kind)
|
44
|
+
oper = new_oper(kind, 0x5)
|
45
|
+
yield oper if block_given?
|
46
|
+
end
|
47
|
+
|
48
|
+
def or(kind)
|
49
|
+
oper = new_oper(kind, 0x2)
|
50
|
+
yield oper if block_given?
|
51
|
+
end
|
52
|
+
|
53
|
+
def or_not(kind)
|
54
|
+
oper = new_oper(kind, 0x6)
|
55
|
+
yield oper if block_given?
|
56
|
+
end
|
57
|
+
|
58
|
+
def test
|
59
|
+
if @this.nil?
|
60
|
+
# Although this() makes sense semantically and therefore
|
61
|
+
# encourages easy-to-read conditions, being able to omit it
|
62
|
+
# allows for more DRY code in some cases, so we deal with a
|
63
|
+
# nil @this here by initially setting res to true or false,
|
64
|
+
# depending on whether the first operator used is AND or OR
|
65
|
+
# respectively.
|
66
|
+
if 0 < @op_stack[0] & AND
|
67
|
+
res = true
|
68
|
+
else
|
69
|
+
res = false
|
70
|
+
end
|
71
|
+
else
|
72
|
+
res = @this.test
|
73
|
+
end
|
74
|
+
|
75
|
+
@op_stack.each do |op|
|
76
|
+
cond = @oper_stack.shift
|
77
|
+
eval "res " + ((0 < op & AND) ? "&&" : "||") + "= " + ((0 < op & NOT) ? "!" : "") + "cond.test"
|
78
|
+
@oper_stack.push cond
|
79
|
+
end
|
80
|
+
|
81
|
+
res
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -1,6 +1,33 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :cpu_usage
|
5
|
+
# Type: Poll
|
6
|
+
#
|
7
|
+
# Trigger when the percent of CPU use of a process is above a specified limit.
|
8
|
+
# On multi-core systems, this number could conceivably be above 100.
|
9
|
+
#
|
10
|
+
# Paramaters
|
11
|
+
# Required
|
12
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
13
|
+
# populated for Watches.
|
14
|
+
# +above+ is the percent CPU above which to trigger the condition. You
|
15
|
+
# may use #percent to clarify this amount (see examples).
|
16
|
+
#
|
17
|
+
# Examples
|
18
|
+
#
|
19
|
+
# Trigger if the process is using more than 25 percent of the cpu (from a Watch):
|
20
|
+
#
|
21
|
+
# on.condition(:cpu_usage) do |c|
|
22
|
+
# c.above = 25.percent
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# Non-Watch Tasks must specify a PID file:
|
26
|
+
#
|
27
|
+
# on.condition(:cpu_usage) do |c|
|
28
|
+
# c.above = 25.percent
|
29
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
30
|
+
# end
|
4
31
|
class CpuUsage < PollCondition
|
5
32
|
attr_accessor :above, :times
|
6
33
|
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class DiskUsage < PollCondition
|
5
|
+
attr_accessor :above, :mount_point
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super
|
9
|
+
self.above = nil
|
10
|
+
self.mount_point = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?
|
14
|
+
valid = true
|
15
|
+
valid &= complain("Attribute 'mount_point' must be specified", self) if self.mount_point.nil?
|
16
|
+
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
17
|
+
valid
|
18
|
+
end
|
19
|
+
|
20
|
+
def test
|
21
|
+
usage = `df | grep -i " #{self.mount_point}$" | awk '{print $5}' | sed 's/%//'`
|
22
|
+
usage.to_i > self.above
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -1,8 +1,42 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :flapping
|
5
|
+
# Type: Trigger
|
6
|
+
#
|
7
|
+
# Trigger when a Task transitions to or from a state or states a given number
|
8
|
+
# of times within a given period.
|
9
|
+
#
|
10
|
+
# Paramaters
|
11
|
+
# Required
|
12
|
+
# +times+ is the number of times that the Task must transition before
|
13
|
+
# triggering.
|
14
|
+
# +within+ is the number of seconds within which the Task must transition
|
15
|
+
# the specified number of times before triggering. You may use
|
16
|
+
# the sugar methods #seconds, #minutes, #hours, #days to clarify
|
17
|
+
# your code (see examples).
|
18
|
+
# --one or both of--
|
19
|
+
# +from_state+ is the state (as a Symbol) from which the transition must occur.
|
20
|
+
# +to_state is the state (as a Symbol) to which the transition must occur.
|
21
|
+
#
|
22
|
+
# Optional:
|
23
|
+
# +retry_in+ is the number of seconds after which to re-monitor the Task after
|
24
|
+
# it has been disabled by the condition.
|
25
|
+
# +retry_times+ is the number of times after which to permanently unmonitor
|
26
|
+
# the Task.
|
27
|
+
# +retry_within+ is the number of seconds within which
|
28
|
+
#
|
29
|
+
# Examples
|
30
|
+
#
|
31
|
+
# Trigger if
|
4
32
|
class Flapping < TriggerCondition
|
5
|
-
attr_accessor :times,
|
33
|
+
attr_accessor :times,
|
34
|
+
:within,
|
35
|
+
:from_state,
|
36
|
+
:to_state,
|
37
|
+
:retry_in,
|
38
|
+
:retry_times,
|
39
|
+
:retry_within
|
6
40
|
|
7
41
|
def initialize
|
8
42
|
self.info = "process is flapping"
|
@@ -64,26 +98,23 @@ module God
|
|
64
98
|
|
65
99
|
# log
|
66
100
|
msg = "#{self.watch.name} giving up"
|
67
|
-
|
68
|
-
LOG.log(self.watch, :info, msg)
|
101
|
+
applog(self.watch, :info, msg)
|
69
102
|
end
|
70
103
|
else
|
71
104
|
# try again later
|
72
105
|
Thread.new do
|
73
106
|
sleep 1
|
74
|
-
|
107
|
+
|
75
108
|
# log
|
76
109
|
msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds"
|
77
|
-
|
78
|
-
|
79
|
-
|
110
|
+
applog(self.watch, :info, msg)
|
111
|
+
|
80
112
|
sleep self.retry_in
|
81
|
-
|
113
|
+
|
82
114
|
# log
|
83
115
|
msg = "#{self.watch.name} auto-reenabling monitoring"
|
84
|
-
|
85
|
-
|
86
|
-
|
116
|
+
applog(self.watch, :info, msg)
|
117
|
+
|
87
118
|
if self.watch.state == :unmonitored
|
88
119
|
self.watch.monitor
|
89
120
|
end
|
@@ -3,6 +3,64 @@ require 'net/http'
|
|
3
3
|
module God
|
4
4
|
module Conditions
|
5
5
|
|
6
|
+
# Condition Symbol :http_response_code
|
7
|
+
# Type: Poll
|
8
|
+
#
|
9
|
+
# Trigger based on the response from an HTTP request.
|
10
|
+
#
|
11
|
+
# Paramaters
|
12
|
+
# Required
|
13
|
+
# +host+ is the hostname to connect [required]
|
14
|
+
# --one of code_is or code_is_not--
|
15
|
+
# +code_is+ trigger if the response code IS one of these
|
16
|
+
# e.g. 500 or '500' or [404, 500] or %w{404 500}
|
17
|
+
# +code_is_not+ trigger if the response code IS NOT one of these
|
18
|
+
# e.g. 200 or '200' or [200, 302] or %w{200 302}
|
19
|
+
# Optional
|
20
|
+
# +port+ is the port to connect (default 80)
|
21
|
+
# +path+ is the path to connect (default '/')
|
22
|
+
# +times+ is the number of times after which to trigger (default 1)
|
23
|
+
# e.g. 3 (times in a row) or [3, 5] (three out of fives times)
|
24
|
+
# +timeout+ is the time to wait for a connection (default 60.seconds)
|
25
|
+
#
|
26
|
+
# Examples
|
27
|
+
#
|
28
|
+
# Trigger if the response code from www.example.com/foo/bar
|
29
|
+
# is not a 200 (or if the connection is refused or times out:
|
30
|
+
#
|
31
|
+
# on.condition(:http_response_code) do |c|
|
32
|
+
# c.host = 'www.example.com'
|
33
|
+
# c.path = '/foo/bar'
|
34
|
+
# c.code_is_not = 200
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# Trigger if the response code is a 404 or a 500 (will not
|
38
|
+
# be triggered by a connection refusal or timeout):
|
39
|
+
#
|
40
|
+
# on.condition(:http_response_code) do |c|
|
41
|
+
# c.host = 'www.example.com'
|
42
|
+
# c.path = '/foo/bar'
|
43
|
+
# c.code_is = [404, 500]
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# Trigger if the response code is not a 200 five times in a row:
|
47
|
+
#
|
48
|
+
# on.condition(:http_response_code) do |c|
|
49
|
+
# c.host = 'www.example.com'
|
50
|
+
# c.path = '/foo/bar'
|
51
|
+
# c.code_is_not = 200
|
52
|
+
# c.times = 5
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# Trigger if the response code is not a 200 or does not respond
|
56
|
+
# within 10 seconds:
|
57
|
+
#
|
58
|
+
# on.condition(:http_response_code) do |c|
|
59
|
+
# c.host = 'www.example.com'
|
60
|
+
# c.path = '/foo/bar'
|
61
|
+
# c.code_is_not = 200
|
62
|
+
# c.timeout = 10
|
63
|
+
# end
|
6
64
|
class HttpResponseCode < PollCondition
|
7
65
|
attr_accessor :code_is, # e.g. 500 or '500' or [404, 500] or %w{404 500}
|
8
66
|
:code_is_not, # e.g. 200 or '200' or [200, 302] or %w{200 302}
|
@@ -14,7 +72,10 @@ module God
|
|
14
72
|
|
15
73
|
def initialize
|
16
74
|
super
|
75
|
+
self.port = 80
|
76
|
+
self.path = '/'
|
17
77
|
self.times = [1, 1]
|
78
|
+
self.timeout = 60.seconds
|
18
79
|
end
|
19
80
|
|
20
81
|
def prepare
|
@@ -37,11 +98,8 @@ module God
|
|
37
98
|
def valid?
|
38
99
|
valid = true
|
39
100
|
valid &= complain("Attribute 'host' must be specified", self) if self.host.nil?
|
40
|
-
valid &= complain("Attribute 'port' must be specified", self) if self.port.nil?
|
41
|
-
valid &= complain("Attribute 'path' must be specified", self) if self.path.nil?
|
42
101
|
valid &= complain("One (and only one) of attributes 'code_is' and 'code_is_not' must be specified", self) if
|
43
102
|
(self.code_is.nil? && self.code_is_not.nil?) || (self.code_is && self.code_is_not)
|
44
|
-
valid &= complain("Attribute 'timeout' must be specified", self) if self.timeout.nil?
|
45
103
|
valid
|
46
104
|
end
|
47
105
|
|
@@ -61,6 +119,8 @@ module God
|
|
61
119
|
else
|
62
120
|
fail(actual_response_code)
|
63
121
|
end
|
122
|
+
rescue Errno::ECONNREFUSED
|
123
|
+
self.code_is ? fail('Refused') : pass('Refused')
|
64
124
|
rescue Timeout::Error
|
65
125
|
self.code_is ? fail('Timeout') : pass('Timeout')
|
66
126
|
end
|