god 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +43 -7
- data/Manifest.txt +20 -4
- data/Rakefile +1 -1
- data/bin/god +263 -195
- data/examples/events.god +66 -34
- data/examples/gravatar.god +25 -12
- data/init/god +42 -0
- data/lib/god/behavior.rb +9 -29
- data/lib/god/behaviors/clean_pid_file.rb +6 -2
- data/lib/god/behaviors/notify_when_flapping.rb +4 -4
- data/lib/god/condition.rb +48 -6
- data/lib/god/conditions/always.rb +5 -1
- data/lib/god/conditions/cpu_usage.rb +13 -5
- data/lib/god/conditions/degrading_lambda.rb +8 -3
- data/lib/god/conditions/flapping.rb +97 -0
- data/lib/god/conditions/http_response_code.rb +97 -0
- data/lib/god/conditions/lambda.rb +8 -2
- data/lib/god/conditions/memory_usage.rb +13 -5
- data/lib/god/conditions/process_exits.rb +11 -3
- data/lib/god/conditions/process_running.rb +22 -4
- data/lib/god/conditions/tries.rb +16 -5
- data/lib/god/configurable.rb +54 -0
- data/lib/god/contact.rb +106 -0
- data/lib/god/contacts/email.rb +73 -0
- data/lib/god/errors.rb +3 -0
- data/lib/god/hub.rb +138 -33
- data/lib/god/logger.rb +21 -4
- data/lib/god/metric.rb +3 -4
- data/lib/god/process.rb +93 -49
- data/lib/god/socket.rb +60 -0
- data/lib/god/task.rb +233 -0
- data/lib/god/trigger.rb +43 -0
- data/lib/god/watch.rb +48 -114
- data/lib/god.rb +216 -63
- data/test/configs/child_events/child_events.god +20 -1
- data/test/configs/child_polls/child_polls.god +26 -6
- data/test/configs/child_polls/simple_server.rb +10 -1
- data/test/configs/contact/contact.god +74 -0
- data/test/configs/contact/simple_server.rb +3 -0
- data/test/configs/daemon_events/daemon_events.god +5 -2
- data/test/configs/daemon_events/simple_server.rb +2 -0
- data/test/configs/daemon_events/simple_server_stop.rb +9 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +1 -3
- data/test/configs/task/logs/.placeholder +0 -0
- data/test/configs/task/task.god +26 -0
- data/test/helper.rb +19 -11
- data/test/test_conditions_http_response_code.rb +115 -0
- data/test/test_conditions_process_running.rb +2 -2
- data/test/test_conditions_tries.rb +21 -0
- data/test/test_contact.rb +109 -0
- data/test/test_god.rb +101 -17
- data/test/test_hub.rb +64 -1
- data/test/test_process.rb +43 -56
- data/test/{test_server.rb → test_socket.rb} +6 -20
- data/test/test_task.rb +86 -0
- data/test/test_trigger.rb +59 -0
- data/test/test_watch.rb +32 -7
- metadata +27 -8
- data/lib/god/reporter.rb +0 -25
- data/lib/god/server.rb +0 -37
- data/test/test_reporter.rb +0 -18
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module God
|
4
|
+
module Conditions
|
5
|
+
|
6
|
+
class HttpResponseCode < PollCondition
|
7
|
+
attr_accessor :code_is, # e.g. 500 or '500' or [404, 500] or %w{404 500}
|
8
|
+
:code_is_not, # e.g. 200 or '200' or [200, 302] or %w{200 302}
|
9
|
+
:times, # e.g. 3 or [3, 5]
|
10
|
+
:host, # e.g. www.example.com
|
11
|
+
:port, # e.g. 8080
|
12
|
+
:timeout, # e.g. 60.seconds
|
13
|
+
:path # e.g. '/'
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
self.times = [1, 1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def prepare
|
21
|
+
self.code_is = Array(self.code_is).map { |x| x.to_i } if self.code_is
|
22
|
+
self.code_is_not = Array(self.code_is_not).map { |x| x.to_i } if self.code_is_not
|
23
|
+
|
24
|
+
if self.times.kind_of?(Integer)
|
25
|
+
self.times = [self.times, self.times]
|
26
|
+
end
|
27
|
+
|
28
|
+
@timeline = Timeline.new(self.times[1])
|
29
|
+
@history = Timeline.new(self.times[1])
|
30
|
+
end
|
31
|
+
|
32
|
+
def reset
|
33
|
+
@timeline.clear
|
34
|
+
@history.clear
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid?
|
38
|
+
valid = true
|
39
|
+
valid &= complain("Attribute 'host' must be specified", self) if self.host.nil?
|
40
|
+
valid &= complain("Attribute 'port' must be specified", self) if self.port.nil?
|
41
|
+
valid &= complain("Attribute 'path' must be specified", self) if self.path.nil?
|
42
|
+
valid &= complain("One (and only one) of attributes 'code_is' and 'code_is_not' must be specified", self) if
|
43
|
+
(self.code_is.nil? && self.code_is_not.nil?) || (self.code_is && self.code_is_not)
|
44
|
+
valid &= complain("Attribute 'timeout' must be specified", self) if self.timeout.nil?
|
45
|
+
valid
|
46
|
+
end
|
47
|
+
|
48
|
+
def test
|
49
|
+
response = nil
|
50
|
+
|
51
|
+
Net::HTTP.start(self.host, self.port) do |http|
|
52
|
+
http.read_timeout = self.timeout
|
53
|
+
response = http.head(self.path)
|
54
|
+
end
|
55
|
+
|
56
|
+
actual_response_code = response.code.to_i
|
57
|
+
if self.code_is && self.code_is.include?(actual_response_code)
|
58
|
+
pass(actual_response_code)
|
59
|
+
elsif self.code_is_not && !self.code_is_not.include?(actual_response_code)
|
60
|
+
pass(actual_response_code)
|
61
|
+
else
|
62
|
+
fail(actual_response_code)
|
63
|
+
end
|
64
|
+
rescue Timeout::Error
|
65
|
+
self.code_is ? fail('Timeout') : pass('Timeout')
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def pass(code)
|
71
|
+
@timeline << true
|
72
|
+
if @timeline.select { |x| x }.size >= self.times.first
|
73
|
+
self.info = "http response abnormal #{history(code, true)}"
|
74
|
+
true
|
75
|
+
else
|
76
|
+
self.info = "http response nominal #{history(code, true)}"
|
77
|
+
false
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def fail(code)
|
82
|
+
@timeline << false
|
83
|
+
self.info = "http response nominal #{history(code, false)}"
|
84
|
+
false
|
85
|
+
end
|
86
|
+
|
87
|
+
def history(code, passed)
|
88
|
+
entry = code.to_s.dup
|
89
|
+
entry = '*' + entry if passed
|
90
|
+
@history << entry
|
91
|
+
'[' + @history.join(", ") + ']'
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
@@ -6,12 +6,18 @@ module God
|
|
6
6
|
|
7
7
|
def valid?
|
8
8
|
valid = true
|
9
|
-
valid &= complain("
|
9
|
+
valid &= complain("Attribute 'lambda' must be specified", self) if self.lambda.nil?
|
10
10
|
valid
|
11
11
|
end
|
12
12
|
|
13
13
|
def test
|
14
|
-
self.lambda.call()
|
14
|
+
if self.lambda.call()
|
15
|
+
self.info = "lambda condition was satisfied"
|
16
|
+
true
|
17
|
+
else
|
18
|
+
self.info = "lambda condition was not satisfied"
|
19
|
+
false
|
20
|
+
end
|
15
21
|
end
|
16
22
|
end
|
17
23
|
|
@@ -17,24 +17,32 @@ module God
|
|
17
17
|
|
18
18
|
@timeline = Timeline.new(self.times[1])
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@timeline.clear
|
23
|
+
end
|
24
|
+
|
21
25
|
def valid?
|
22
26
|
valid = true
|
23
|
-
valid &= complain("
|
24
|
-
valid &= complain("
|
27
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
28
|
+
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
25
29
|
valid
|
26
30
|
end
|
27
|
-
|
31
|
+
|
28
32
|
def test
|
29
33
|
return false unless File.exist?(self.watch.pid_file)
|
30
34
|
|
31
35
|
pid = File.read(self.watch.pid_file).strip
|
32
36
|
process = System::Process.new(pid)
|
33
37
|
@timeline.push(process.memory)
|
38
|
+
|
39
|
+
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
|
40
|
+
|
34
41
|
if @timeline.select { |x| x > self.above }.size >= self.times.first
|
35
|
-
|
42
|
+
self.info = "memory out of bounds #{history}"
|
36
43
|
return true
|
37
44
|
else
|
45
|
+
self.info = "memory within bounds #{history}"
|
38
46
|
return false
|
39
47
|
end
|
40
48
|
end
|
@@ -2,9 +2,13 @@ module God
|
|
2
2
|
module Conditions
|
3
3
|
|
4
4
|
class ProcessExits < EventCondition
|
5
|
+
def initialize
|
6
|
+
self.info = "process exited"
|
7
|
+
end
|
8
|
+
|
5
9
|
def valid?
|
6
10
|
valid = true
|
7
|
-
valid &= complain("
|
11
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
8
12
|
valid
|
9
13
|
end
|
10
14
|
|
@@ -21,8 +25,12 @@ module God
|
|
21
25
|
end
|
22
26
|
|
23
27
|
def deregister
|
24
|
-
|
25
|
-
|
28
|
+
if File.exist?(self.watch.pid_file)
|
29
|
+
pid = File.read(self.watch.pid_file).strip.to_i
|
30
|
+
EventHandler.deregister(pid, :proc_exit)
|
31
|
+
else
|
32
|
+
LOG.log(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
|
33
|
+
end
|
26
34
|
end
|
27
35
|
end
|
28
36
|
|
@@ -6,18 +6,36 @@ module God
|
|
6
6
|
|
7
7
|
def valid?
|
8
8
|
valid = true
|
9
|
-
valid &= complain("
|
10
|
-
valid &= complain("
|
9
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
10
|
+
valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
|
11
11
|
valid
|
12
12
|
end
|
13
13
|
|
14
14
|
def test
|
15
|
-
|
15
|
+
self.info = []
|
16
|
+
|
17
|
+
unless File.exist?(self.watch.pid_file)
|
18
|
+
self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
|
19
|
+
return !self.running
|
20
|
+
end
|
16
21
|
|
17
22
|
pid = File.read(self.watch.pid_file).strip
|
18
23
|
active = System::Process.new(pid).exists?
|
19
24
|
|
20
|
-
|
25
|
+
if (self.running && active)
|
26
|
+
self.info << "process is running"
|
27
|
+
true
|
28
|
+
elsif (!self.running && !active)
|
29
|
+
self.info << "process is not running"
|
30
|
+
true
|
31
|
+
else
|
32
|
+
if self.running
|
33
|
+
self.info << "process is not running"
|
34
|
+
else
|
35
|
+
self.info << "process is running"
|
36
|
+
end
|
37
|
+
false
|
38
|
+
end
|
21
39
|
end
|
22
40
|
end
|
23
41
|
|
data/lib/god/conditions/tries.rb
CHANGED
@@ -7,23 +7,34 @@ module God
|
|
7
7
|
def prepare
|
8
8
|
@timeline = Timeline.new(self.times)
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
|
+
def reset
|
12
|
+
@timeline.clear
|
13
|
+
end
|
14
|
+
|
11
15
|
def valid?
|
12
16
|
valid = true
|
13
|
-
valid &= complain("
|
17
|
+
valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
|
14
18
|
valid
|
15
19
|
end
|
16
|
-
|
20
|
+
|
17
21
|
def test
|
18
22
|
@timeline << Time.now
|
19
23
|
|
20
24
|
concensus = (@timeline.size == self.times)
|
21
|
-
duration = within.nil? || (@timeline.last - @timeline.first) < self.within
|
25
|
+
duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within
|
26
|
+
|
27
|
+
if within
|
28
|
+
history = "[#{@timeline.size}/#{self.times} within #{(@timeline.last - @timeline.first).to_i}s]"
|
29
|
+
else
|
30
|
+
history = "[#{@timeline.size}/#{self.times}]"
|
31
|
+
end
|
22
32
|
|
23
33
|
if concensus && duration
|
24
|
-
|
34
|
+
self.info = "tries exceeded #{history}"
|
25
35
|
return true
|
26
36
|
else
|
37
|
+
self.info = "tries within bounds #{history}"
|
27
38
|
return false
|
28
39
|
end
|
29
40
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
module Configurable
|
4
|
+
# Override this method in your Configurable (optional)
|
5
|
+
#
|
6
|
+
# Called once after the Configurable has been sent to the block and attributes have been
|
7
|
+
# set. Do any post-processing on attributes here
|
8
|
+
def prepare
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
def reset
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
# Override this method in your Configurable (optional)
|
17
|
+
#
|
18
|
+
# Called once during evaluation of the config file. Return true if valid, false otherwise
|
19
|
+
#
|
20
|
+
# A convenience method 'complain' is available that will print out a message and return false,
|
21
|
+
# making it easy to report multiple validation errors:
|
22
|
+
#
|
23
|
+
# def valid?
|
24
|
+
# valid = true
|
25
|
+
# valid &= complain("You must specify the 'pid_file' attribute for :memory_usage") if self.pid_file.nil?
|
26
|
+
# valid &= complain("You must specify the 'above' attribute for :memory_usage") if self.above.nil?
|
27
|
+
# valid
|
28
|
+
# end
|
29
|
+
def valid?
|
30
|
+
true
|
31
|
+
end
|
32
|
+
|
33
|
+
def base_name
|
34
|
+
self.class.name.split('::').last
|
35
|
+
end
|
36
|
+
|
37
|
+
def friendly_name
|
38
|
+
base_name
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.complain(text, c = nil)
|
42
|
+
msg = text
|
43
|
+
msg += " for #{c.friendly_name}" if c
|
44
|
+
Syslog.err(msg)
|
45
|
+
puts msg
|
46
|
+
false
|
47
|
+
end
|
48
|
+
|
49
|
+
def complain(text, c = nil)
|
50
|
+
Configurable.complain(text, c)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
data/lib/god/contact.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Contact
|
4
|
+
include Configurable
|
5
|
+
|
6
|
+
attr_accessor :name, :group, :info
|
7
|
+
|
8
|
+
def self.generate(kind)
|
9
|
+
sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
|
10
|
+
c = God::Contacts.const_get(sym).new
|
11
|
+
|
12
|
+
unless c.kind_of?(Contact)
|
13
|
+
abort "Contact '#{c.class.name}' must subclass God::Contact"
|
14
|
+
end
|
15
|
+
|
16
|
+
c
|
17
|
+
rescue NameError
|
18
|
+
raise NoSuchContactError.new("No Contact found with the class name God::Contacts::#{sym}")
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.valid?(contact)
|
22
|
+
valid = true
|
23
|
+
valid &= Configurable.complain("Attribute 'name' must be specified", contact) if contact.name.nil?
|
24
|
+
valid
|
25
|
+
end
|
26
|
+
|
27
|
+
# Normalize the given notify specification into canonical form.
|
28
|
+
# +spec+ is the notify spec as a String, Array of Strings, or Hash
|
29
|
+
#
|
30
|
+
# Canonical form looks like:
|
31
|
+
# {:contacts => ['fred', 'john'], :priority => '1', :category => 'awesome'}
|
32
|
+
# Where :contacts will be present and point to an Array of Strings. Both
|
33
|
+
# :priority and :category may not be present but if they are, they will each
|
34
|
+
# contain a single String.
|
35
|
+
#
|
36
|
+
# Returns normalized notify spec
|
37
|
+
# Raises ArgumentError on invalid spec (message contains details)
|
38
|
+
def self.normalize(spec)
|
39
|
+
case spec
|
40
|
+
when String
|
41
|
+
{:contacts => Array(spec)}
|
42
|
+
when Array
|
43
|
+
unless spec.select { |x| !x.instance_of?(String) }.empty?
|
44
|
+
raise ArgumentError.new("contains non-String elements")
|
45
|
+
end
|
46
|
+
{:contacts => spec}
|
47
|
+
when Hash
|
48
|
+
copy = spec.dup
|
49
|
+
|
50
|
+
# check :contacts
|
51
|
+
if contacts = copy.delete(:contacts)
|
52
|
+
case contacts
|
53
|
+
when String
|
54
|
+
# valid
|
55
|
+
when Array
|
56
|
+
unless contacts.select { |x| !x.instance_of?(String) }.empty?
|
57
|
+
raise ArgumentError.new("has a :contacts key containing non-String elements")
|
58
|
+
end
|
59
|
+
# valid
|
60
|
+
else
|
61
|
+
raise ArgumentError.new("must have a :contacts key pointing to a String or Array of Strings")
|
62
|
+
end
|
63
|
+
else
|
64
|
+
raise ArgumentError.new("must have a :contacts key")
|
65
|
+
end
|
66
|
+
|
67
|
+
# remove priority and category
|
68
|
+
copy.delete(:priority)
|
69
|
+
copy.delete(:category)
|
70
|
+
|
71
|
+
# check for invalid keys
|
72
|
+
unless copy.empty?
|
73
|
+
raise ArgumentError.new("contains extra elements: #{copy.inspect}")
|
74
|
+
end
|
75
|
+
|
76
|
+
# normalize
|
77
|
+
spec[:contacts] &&= Array(spec[:contacts])
|
78
|
+
spec[:priority] &&= spec[:priority].to_s
|
79
|
+
spec[:category] &&= spec[:category].to_s
|
80
|
+
|
81
|
+
spec
|
82
|
+
else
|
83
|
+
raise ArgumentError.new("must be a String (contact name), Array (of contact names), or Hash (contact specification)")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Abstract
|
88
|
+
# Send the message to the external source
|
89
|
+
# +message+ is the message body returned from the condition
|
90
|
+
# +time+ is the Time at which the notification was made
|
91
|
+
# +priority+ is the arbitrary priority String
|
92
|
+
# +category+ is the arbitrary category String
|
93
|
+
# +host+ is the hostname of the server
|
94
|
+
def notify(message, time, priority, category, host)
|
95
|
+
raise AbstractMethodNotOverriddenError.new("Contact#notify must be overridden in subclasses")
|
96
|
+
end
|
97
|
+
|
98
|
+
# Construct the friendly name of this Contact, looks like:
|
99
|
+
#
|
100
|
+
# Contact FooBar
|
101
|
+
def friendly_name
|
102
|
+
super + " Contact '#{self.name}'"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'net/smtp'
|
3
|
+
|
4
|
+
module God
|
5
|
+
module Contacts
|
6
|
+
|
7
|
+
class Email < Contact
|
8
|
+
class << self
|
9
|
+
attr_accessor :message_settings, :delivery_method, :server_settings, :format
|
10
|
+
end
|
11
|
+
|
12
|
+
self.message_settings = {:from => 'god@example.com'}
|
13
|
+
|
14
|
+
self.delivery_method = :smtp
|
15
|
+
|
16
|
+
self.server_settings = {:address => 'localhost',
|
17
|
+
:port => 25}
|
18
|
+
# :domain
|
19
|
+
# :user_name
|
20
|
+
# :password
|
21
|
+
# :authentication
|
22
|
+
|
23
|
+
self.format = lambda do |name, email, message, time, priority, category, host|
|
24
|
+
<<-EOF
|
25
|
+
From: god <#{self.message_settings[:from]}>
|
26
|
+
To: #{name} <#{email}>
|
27
|
+
Subject: [god] #{message}
|
28
|
+
Date: #{Time.now.httpdate}
|
29
|
+
Message-Id: <unique.message.id.string@example.com>
|
30
|
+
|
31
|
+
Message: #{message}
|
32
|
+
Host: #{host}
|
33
|
+
Priority: #{priority}
|
34
|
+
Category: #{category}
|
35
|
+
EOF
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_accessor :email
|
39
|
+
|
40
|
+
def valid?
|
41
|
+
valid = true
|
42
|
+
valid &= complain("Attribute 'email' must be specified", self) if self.email.nil?
|
43
|
+
valid
|
44
|
+
end
|
45
|
+
|
46
|
+
def notify(message, time, priority, category, host)
|
47
|
+
begin
|
48
|
+
body = Email.format.call(self.name, self.email, message, time, priority, category, host)
|
49
|
+
|
50
|
+
args = [Email.server_settings[:address], Email.server_settings[:port]]
|
51
|
+
if Email.server_settings[:authentication]
|
52
|
+
args << Email.server_settings[:domain]
|
53
|
+
args << Email.server_settings[:user_name]
|
54
|
+
args << Email.server_settings[:password]
|
55
|
+
args << Email.server_settings[:authentication]
|
56
|
+
end
|
57
|
+
|
58
|
+
Net::SMTP.start(*args) do |smtp|
|
59
|
+
smtp.send_message body, Email.message_settings[:from], self.email
|
60
|
+
end
|
61
|
+
|
62
|
+
self.info = "sent email to #{self.email}"
|
63
|
+
rescue => e
|
64
|
+
puts e.message
|
65
|
+
puts e.backtrace.join("\n")
|
66
|
+
|
67
|
+
self.info = "failed to send email to #{self.email}: #{e.message}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|