god 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +23 -0
- data/Manifest.txt +9 -0
- data/README.txt +9 -2
- data/Rakefile +8 -1
- data/bin/god +11 -214
- data/examples/single.god +66 -0
- data/ext/god/netlink_handler.c +16 -3
- data/lib/god.rb +153 -17
- data/lib/god/cli/command.rb +189 -0
- data/lib/god/cli/run.rb +120 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +27 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +42 -11
- data/lib/god/conditions/http_response_code.rb +63 -3
- data/lib/god/conditions/memory_usage.rb +30 -1
- data/lib/god/conditions/process_exits.rb +24 -2
- data/lib/god/conditions/process_running.rb +32 -0
- data/lib/god/configurable.rb +5 -3
- data/lib/god/event_handler.rb +2 -2
- data/lib/god/hub.rb +12 -19
- data/lib/god/logger.rb +11 -2
- data/lib/god/process.rb +29 -20
- data/lib/god/socket.rb +41 -5
- data/lib/god/task.rb +6 -9
- data/lib/god/timer.rb +20 -13
- data/lib/god/watch.rb +3 -6
- data/test/configs/child_events/child_events.god +1 -1
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +15 -21
- data/test/test_god.rb +36 -0
- data/test/test_hub.rb +6 -4
- data/test/test_logger.rb +8 -0
- data/test/test_timer.rb +9 -0
- metadata +12 -2
data/lib/god/cli/run.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
module God
|
2
|
+
module CLI
|
3
|
+
|
4
|
+
class Run
|
5
|
+
def initialize(options)
|
6
|
+
@options = options
|
7
|
+
|
8
|
+
dispatch
|
9
|
+
end
|
10
|
+
|
11
|
+
def dispatch
|
12
|
+
# have at_exit start god
|
13
|
+
$run = true
|
14
|
+
|
15
|
+
if @options[:daemonize]
|
16
|
+
run_daemonized
|
17
|
+
else
|
18
|
+
run_in_front
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def run_daemonized
|
23
|
+
# trap and ignore SIGHUP
|
24
|
+
Signal.trap('HUP') {}
|
25
|
+
|
26
|
+
pid = fork do
|
27
|
+
begin
|
28
|
+
require 'god'
|
29
|
+
|
30
|
+
log_file = @options[:log] || "/dev/null"
|
31
|
+
|
32
|
+
unless God::EventHandler.loaded?
|
33
|
+
puts
|
34
|
+
puts "***********************************************************************"
|
35
|
+
puts "*"
|
36
|
+
puts "* Event conditions are not available for your installation of god."
|
37
|
+
puts "* You may still use and write custom conditions using the poll system"
|
38
|
+
puts "*"
|
39
|
+
puts "***********************************************************************"
|
40
|
+
puts
|
41
|
+
end
|
42
|
+
|
43
|
+
# set port if requested
|
44
|
+
if @options[:port]
|
45
|
+
God.port = @options[:port]
|
46
|
+
end
|
47
|
+
|
48
|
+
# set pid if requested
|
49
|
+
if @options[:pid]
|
50
|
+
God.pid = @options[:pid]
|
51
|
+
end
|
52
|
+
|
53
|
+
# load config
|
54
|
+
if @options[:config]
|
55
|
+
unless File.exist?(@options[:config])
|
56
|
+
abort "File not found: #{@options[:config]}"
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
load File.expand_path(@options[:config])
|
61
|
+
rescue Exception => e
|
62
|
+
if e.instance_of?(SystemExit)
|
63
|
+
raise
|
64
|
+
else
|
65
|
+
puts e.message
|
66
|
+
puts e.backtrace.join("\n")
|
67
|
+
abort "There was an error in your configuration file (see above)"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# reset file descriptors
|
73
|
+
STDIN.reopen "/dev/null"
|
74
|
+
STDOUT.reopen(log_file, "a")
|
75
|
+
STDERR.reopen STDOUT
|
76
|
+
rescue => e
|
77
|
+
puts e.message
|
78
|
+
puts e.backtrace.join("\n")
|
79
|
+
abort "There was a fatal system error while starting god (see above)"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
if @options[:pid]
|
84
|
+
File.open(@options[:pid], 'w') { |f| f.write pid }
|
85
|
+
end
|
86
|
+
|
87
|
+
::Process.detach pid
|
88
|
+
|
89
|
+
exit
|
90
|
+
end
|
91
|
+
|
92
|
+
def run_in_front
|
93
|
+
require 'god'
|
94
|
+
|
95
|
+
if @options[:port]
|
96
|
+
God.port = @options[:port]
|
97
|
+
end
|
98
|
+
|
99
|
+
if @options[:config]
|
100
|
+
unless File.exist?(@options[:config])
|
101
|
+
abort "File not found: #{@options[:config]}"
|
102
|
+
end
|
103
|
+
|
104
|
+
begin
|
105
|
+
load File.expand_path(@options[:config])
|
106
|
+
rescue Exception => e
|
107
|
+
if e.instance_of?(SystemExit)
|
108
|
+
raise
|
109
|
+
else
|
110
|
+
puts e.message
|
111
|
+
puts e.backtrace.join("\n")
|
112
|
+
abort "There was an error in your configuration file (see above)"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end # Run
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module God
|
2
|
+
module CLI
|
3
|
+
|
4
|
+
class Version
|
5
|
+
def self.version
|
6
|
+
require 'god'
|
7
|
+
|
8
|
+
# print version
|
9
|
+
puts "Version #{God::VERSION}"
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.version_extended
|
14
|
+
puts "Version: #{God::VERSION}"
|
15
|
+
puts "Polls: enabled"
|
16
|
+
puts "Events: " + God::EventHandler.event_system
|
17
|
+
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class Complex < PollCondition
|
5
|
+
AND = 0x1
|
6
|
+
OR = 0x2
|
7
|
+
NOT = 0x4
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
super
|
11
|
+
|
12
|
+
@oper_stack = []
|
13
|
+
@op_stack = []
|
14
|
+
|
15
|
+
@this = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def valid?
|
19
|
+
@oper_stack.inject(true) { |acc, oper| acc & oper.valid? }
|
20
|
+
end
|
21
|
+
|
22
|
+
def prepare
|
23
|
+
@oper_stack.each { |oper| oper.prepare }
|
24
|
+
end
|
25
|
+
|
26
|
+
def new_oper(kind, op)
|
27
|
+
oper = Condition.generate(kind, self.watch)
|
28
|
+
@oper_stack.push(oper)
|
29
|
+
@op_stack.push(op)
|
30
|
+
oper
|
31
|
+
end
|
32
|
+
|
33
|
+
def this(kind)
|
34
|
+
@this = Condition.generate(kind, self.watch)
|
35
|
+
yield @this if block_given?
|
36
|
+
end
|
37
|
+
|
38
|
+
def and(kind)
|
39
|
+
oper = new_oper(kind, 0x1)
|
40
|
+
yield oper if block_given?
|
41
|
+
end
|
42
|
+
|
43
|
+
def and_not(kind)
|
44
|
+
oper = new_oper(kind, 0x5)
|
45
|
+
yield oper if block_given?
|
46
|
+
end
|
47
|
+
|
48
|
+
def or(kind)
|
49
|
+
oper = new_oper(kind, 0x2)
|
50
|
+
yield oper if block_given?
|
51
|
+
end
|
52
|
+
|
53
|
+
def or_not(kind)
|
54
|
+
oper = new_oper(kind, 0x6)
|
55
|
+
yield oper if block_given?
|
56
|
+
end
|
57
|
+
|
58
|
+
def test
|
59
|
+
if @this.nil?
|
60
|
+
# Although this() makes sense semantically and therefore
|
61
|
+
# encourages easy-to-read conditions, being able to omit it
|
62
|
+
# allows for more DRY code in some cases, so we deal with a
|
63
|
+
# nil @this here by initially setting res to true or false,
|
64
|
+
# depending on whether the first operator used is AND or OR
|
65
|
+
# respectively.
|
66
|
+
if 0 < @op_stack[0] & AND
|
67
|
+
res = true
|
68
|
+
else
|
69
|
+
res = false
|
70
|
+
end
|
71
|
+
else
|
72
|
+
res = @this.test
|
73
|
+
end
|
74
|
+
|
75
|
+
@op_stack.each do |op|
|
76
|
+
cond = @oper_stack.shift
|
77
|
+
eval "res " + ((0 < op & AND) ? "&&" : "||") + "= " + ((0 < op & NOT) ? "!" : "") + "cond.test"
|
78
|
+
@oper_stack.push cond
|
79
|
+
end
|
80
|
+
|
81
|
+
res
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -1,6 +1,33 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :cpu_usage
|
5
|
+
# Type: Poll
|
6
|
+
#
|
7
|
+
# Trigger when the percent of CPU use of a process is above a specified limit.
|
8
|
+
# On multi-core systems, this number could conceivably be above 100.
|
9
|
+
#
|
10
|
+
# Paramaters
|
11
|
+
# Required
|
12
|
+
# +pid_file+ is the pid file of the process in question. Automatically
|
13
|
+
# populated for Watches.
|
14
|
+
# +above+ is the percent CPU above which to trigger the condition. You
|
15
|
+
# may use #percent to clarify this amount (see examples).
|
16
|
+
#
|
17
|
+
# Examples
|
18
|
+
#
|
19
|
+
# Trigger if the process is using more than 25 percent of the cpu (from a Watch):
|
20
|
+
#
|
21
|
+
# on.condition(:cpu_usage) do |c|
|
22
|
+
# c.above = 25.percent
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# Non-Watch Tasks must specify a PID file:
|
26
|
+
#
|
27
|
+
# on.condition(:cpu_usage) do |c|
|
28
|
+
# c.above = 25.percent
|
29
|
+
# c.pid_file = "/var/run/mongrel.3000.pid"
|
30
|
+
# end
|
4
31
|
class CpuUsage < PollCondition
|
5
32
|
attr_accessor :above, :times
|
6
33
|
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class DiskUsage < PollCondition
|
5
|
+
attr_accessor :above, :mount_point
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super
|
9
|
+
self.above = nil
|
10
|
+
self.mount_point = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?
|
14
|
+
valid = true
|
15
|
+
valid &= complain("Attribute 'mount_point' must be specified", self) if self.mount_point.nil?
|
16
|
+
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
17
|
+
valid
|
18
|
+
end
|
19
|
+
|
20
|
+
def test
|
21
|
+
usage = `df | grep -i " #{self.mount_point}$" | awk '{print $5}' | sed 's/%//'`
|
22
|
+
usage.to_i > self.above
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -1,8 +1,42 @@
|
|
1
1
|
module God
|
2
2
|
module Conditions
|
3
3
|
|
4
|
+
# Condition Symbol :flapping
|
5
|
+
# Type: Trigger
|
6
|
+
#
|
7
|
+
# Trigger when a Task transitions to or from a state or states a given number
|
8
|
+
# of times within a given period.
|
9
|
+
#
|
10
|
+
# Paramaters
|
11
|
+
# Required
|
12
|
+
# +times+ is the number of times that the Task must transition before
|
13
|
+
# triggering.
|
14
|
+
# +within+ is the number of seconds within which the Task must transition
|
15
|
+
# the specified number of times before triggering. You may use
|
16
|
+
# the sugar methods #seconds, #minutes, #hours, #days to clarify
|
17
|
+
# your code (see examples).
|
18
|
+
# --one or both of--
|
19
|
+
# +from_state+ is the state (as a Symbol) from which the transition must occur.
|
20
|
+
# +to_state is the state (as a Symbol) to which the transition must occur.
|
21
|
+
#
|
22
|
+
# Optional:
|
23
|
+
# +retry_in+ is the number of seconds after which to re-monitor the Task after
|
24
|
+
# it has been disabled by the condition.
|
25
|
+
# +retry_times+ is the number of times after which to permanently unmonitor
|
26
|
+
# the Task.
|
27
|
+
# +retry_within+ is the number of seconds within which
|
28
|
+
#
|
29
|
+
# Examples
|
30
|
+
#
|
31
|
+
# Trigger if
|
4
32
|
class Flapping < TriggerCondition
|
5
|
-
attr_accessor :times,
|
33
|
+
attr_accessor :times,
|
34
|
+
:within,
|
35
|
+
:from_state,
|
36
|
+
:to_state,
|
37
|
+
:retry_in,
|
38
|
+
:retry_times,
|
39
|
+
:retry_within
|
6
40
|
|
7
41
|
def initialize
|
8
42
|
self.info = "process is flapping"
|
@@ -64,26 +98,23 @@ module God
|
|
64
98
|
|
65
99
|
# log
|
66
100
|
msg = "#{self.watch.name} giving up"
|
67
|
-
|
68
|
-
LOG.log(self.watch, :info, msg)
|
101
|
+
applog(self.watch, :info, msg)
|
69
102
|
end
|
70
103
|
else
|
71
104
|
# try again later
|
72
105
|
Thread.new do
|
73
106
|
sleep 1
|
74
|
-
|
107
|
+
|
75
108
|
# log
|
76
109
|
msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds"
|
77
|
-
|
78
|
-
|
79
|
-
|
110
|
+
applog(self.watch, :info, msg)
|
111
|
+
|
80
112
|
sleep self.retry_in
|
81
|
-
|
113
|
+
|
82
114
|
# log
|
83
115
|
msg = "#{self.watch.name} auto-reenabling monitoring"
|
84
|
-
|
85
|
-
|
86
|
-
|
116
|
+
applog(self.watch, :info, msg)
|
117
|
+
|
87
118
|
if self.watch.state == :unmonitored
|
88
119
|
self.watch.monitor
|
89
120
|
end
|
@@ -3,6 +3,64 @@ require 'net/http'
|
|
3
3
|
module God
|
4
4
|
module Conditions
|
5
5
|
|
6
|
+
# Condition Symbol :http_response_code
|
7
|
+
# Type: Poll
|
8
|
+
#
|
9
|
+
# Trigger based on the response from an HTTP request.
|
10
|
+
#
|
11
|
+
# Paramaters
|
12
|
+
# Required
|
13
|
+
# +host+ is the hostname to connect [required]
|
14
|
+
# --one of code_is or code_is_not--
|
15
|
+
# +code_is+ trigger if the response code IS one of these
|
16
|
+
# e.g. 500 or '500' or [404, 500] or %w{404 500}
|
17
|
+
# +code_is_not+ trigger if the response code IS NOT one of these
|
18
|
+
# e.g. 200 or '200' or [200, 302] or %w{200 302}
|
19
|
+
# Optional
|
20
|
+
# +port+ is the port to connect (default 80)
|
21
|
+
# +path+ is the path to connect (default '/')
|
22
|
+
# +times+ is the number of times after which to trigger (default 1)
|
23
|
+
# e.g. 3 (times in a row) or [3, 5] (three out of fives times)
|
24
|
+
# +timeout+ is the time to wait for a connection (default 60.seconds)
|
25
|
+
#
|
26
|
+
# Examples
|
27
|
+
#
|
28
|
+
# Trigger if the response code from www.example.com/foo/bar
|
29
|
+
# is not a 200 (or if the connection is refused or times out:
|
30
|
+
#
|
31
|
+
# on.condition(:http_response_code) do |c|
|
32
|
+
# c.host = 'www.example.com'
|
33
|
+
# c.path = '/foo/bar'
|
34
|
+
# c.code_is_not = 200
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# Trigger if the response code is a 404 or a 500 (will not
|
38
|
+
# be triggered by a connection refusal or timeout):
|
39
|
+
#
|
40
|
+
# on.condition(:http_response_code) do |c|
|
41
|
+
# c.host = 'www.example.com'
|
42
|
+
# c.path = '/foo/bar'
|
43
|
+
# c.code_is = [404, 500]
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# Trigger if the response code is not a 200 five times in a row:
|
47
|
+
#
|
48
|
+
# on.condition(:http_response_code) do |c|
|
49
|
+
# c.host = 'www.example.com'
|
50
|
+
# c.path = '/foo/bar'
|
51
|
+
# c.code_is_not = 200
|
52
|
+
# c.times = 5
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# Trigger if the response code is not a 200 or does not respond
|
56
|
+
# within 10 seconds:
|
57
|
+
#
|
58
|
+
# on.condition(:http_response_code) do |c|
|
59
|
+
# c.host = 'www.example.com'
|
60
|
+
# c.path = '/foo/bar'
|
61
|
+
# c.code_is_not = 200
|
62
|
+
# c.timeout = 10
|
63
|
+
# end
|
6
64
|
class HttpResponseCode < PollCondition
|
7
65
|
attr_accessor :code_is, # e.g. 500 or '500' or [404, 500] or %w{404 500}
|
8
66
|
:code_is_not, # e.g. 200 or '200' or [200, 302] or %w{200 302}
|
@@ -14,7 +72,10 @@ module God
|
|
14
72
|
|
15
73
|
def initialize
|
16
74
|
super
|
75
|
+
self.port = 80
|
76
|
+
self.path = '/'
|
17
77
|
self.times = [1, 1]
|
78
|
+
self.timeout = 60.seconds
|
18
79
|
end
|
19
80
|
|
20
81
|
def prepare
|
@@ -37,11 +98,8 @@ module God
|
|
37
98
|
def valid?
|
38
99
|
valid = true
|
39
100
|
valid &= complain("Attribute 'host' must be specified", self) if self.host.nil?
|
40
|
-
valid &= complain("Attribute 'port' must be specified", self) if self.port.nil?
|
41
|
-
valid &= complain("Attribute 'path' must be specified", self) if self.path.nil?
|
42
101
|
valid &= complain("One (and only one) of attributes 'code_is' and 'code_is_not' must be specified", self) if
|
43
102
|
(self.code_is.nil? && self.code_is_not.nil?) || (self.code_is && self.code_is_not)
|
44
|
-
valid &= complain("Attribute 'timeout' must be specified", self) if self.timeout.nil?
|
45
103
|
valid
|
46
104
|
end
|
47
105
|
|
@@ -61,6 +119,8 @@ module God
|
|
61
119
|
else
|
62
120
|
fail(actual_response_code)
|
63
121
|
end
|
122
|
+
rescue Errno::ECONNREFUSED
|
123
|
+
self.code_is ? fail('Refused') : pass('Refused')
|
64
124
|
rescue Timeout::Error
|
65
125
|
self.code_is ? fail('Timeout') : pass('Timeout')
|
66
126
|
end
|