god 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +43 -7
- data/Manifest.txt +20 -4
- data/Rakefile +1 -1
- data/bin/god +263 -195
- data/examples/events.god +66 -34
- data/examples/gravatar.god +25 -12
- data/init/god +42 -0
- data/lib/god/behavior.rb +9 -29
- data/lib/god/behaviors/clean_pid_file.rb +6 -2
- data/lib/god/behaviors/notify_when_flapping.rb +4 -4
- data/lib/god/condition.rb +48 -6
- data/lib/god/conditions/always.rb +5 -1
- data/lib/god/conditions/cpu_usage.rb +13 -5
- data/lib/god/conditions/degrading_lambda.rb +8 -3
- data/lib/god/conditions/flapping.rb +97 -0
- data/lib/god/conditions/http_response_code.rb +97 -0
- data/lib/god/conditions/lambda.rb +8 -2
- data/lib/god/conditions/memory_usage.rb +13 -5
- data/lib/god/conditions/process_exits.rb +11 -3
- data/lib/god/conditions/process_running.rb +22 -4
- data/lib/god/conditions/tries.rb +16 -5
- data/lib/god/configurable.rb +54 -0
- data/lib/god/contact.rb +106 -0
- data/lib/god/contacts/email.rb +73 -0
- data/lib/god/errors.rb +3 -0
- data/lib/god/hub.rb +138 -33
- data/lib/god/logger.rb +21 -4
- data/lib/god/metric.rb +3 -4
- data/lib/god/process.rb +93 -49
- data/lib/god/socket.rb +60 -0
- data/lib/god/task.rb +233 -0
- data/lib/god/trigger.rb +43 -0
- data/lib/god/watch.rb +48 -114
- data/lib/god.rb +216 -63
- data/test/configs/child_events/child_events.god +20 -1
- data/test/configs/child_polls/child_polls.god +26 -6
- data/test/configs/child_polls/simple_server.rb +10 -1
- data/test/configs/contact/contact.god +74 -0
- data/test/configs/contact/simple_server.rb +3 -0
- data/test/configs/daemon_events/daemon_events.god +5 -2
- data/test/configs/daemon_events/simple_server.rb +2 -0
- data/test/configs/daemon_events/simple_server_stop.rb +9 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +1 -3
- data/test/configs/task/logs/.placeholder +0 -0
- data/test/configs/task/task.god +26 -0
- data/test/helper.rb +19 -11
- data/test/test_conditions_http_response_code.rb +115 -0
- data/test/test_conditions_process_running.rb +2 -2
- data/test/test_conditions_tries.rb +21 -0
- data/test/test_contact.rb +109 -0
- data/test/test_god.rb +101 -17
- data/test/test_hub.rb +64 -1
- data/test/test_process.rb +43 -56
- data/test/{test_server.rb → test_socket.rb} +6 -20
- data/test/test_task.rb +86 -0
- data/test/test_trigger.rb +59 -0
- data/test/test_watch.rb +32 -7
- metadata +27 -8
- data/lib/god/reporter.rb +0 -25
- data/lib/god/server.rb +0 -37
- data/test/test_reporter.rb +0 -18
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module God
|
4
|
+
module Conditions
|
5
|
+
|
6
|
+
class HttpResponseCode < PollCondition
|
7
|
+
attr_accessor :code_is, # e.g. 500 or '500' or [404, 500] or %w{404 500}
|
8
|
+
:code_is_not, # e.g. 200 or '200' or [200, 302] or %w{200 302}
|
9
|
+
:times, # e.g. 3 or [3, 5]
|
10
|
+
:host, # e.g. www.example.com
|
11
|
+
:port, # e.g. 8080
|
12
|
+
:timeout, # e.g. 60.seconds
|
13
|
+
:path # e.g. '/'
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
self.times = [1, 1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def prepare
|
21
|
+
self.code_is = Array(self.code_is).map { |x| x.to_i } if self.code_is
|
22
|
+
self.code_is_not = Array(self.code_is_not).map { |x| x.to_i } if self.code_is_not
|
23
|
+
|
24
|
+
if self.times.kind_of?(Integer)
|
25
|
+
self.times = [self.times, self.times]
|
26
|
+
end
|
27
|
+
|
28
|
+
@timeline = Timeline.new(self.times[1])
|
29
|
+
@history = Timeline.new(self.times[1])
|
30
|
+
end
|
31
|
+
|
32
|
+
def reset
|
33
|
+
@timeline.clear
|
34
|
+
@history.clear
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid?
|
38
|
+
valid = true
|
39
|
+
valid &= complain("Attribute 'host' must be specified", self) if self.host.nil?
|
40
|
+
valid &= complain("Attribute 'port' must be specified", self) if self.port.nil?
|
41
|
+
valid &= complain("Attribute 'path' must be specified", self) if self.path.nil?
|
42
|
+
valid &= complain("One (and only one) of attributes 'code_is' and 'code_is_not' must be specified", self) if
|
43
|
+
(self.code_is.nil? && self.code_is_not.nil?) || (self.code_is && self.code_is_not)
|
44
|
+
valid &= complain("Attribute 'timeout' must be specified", self) if self.timeout.nil?
|
45
|
+
valid
|
46
|
+
end
|
47
|
+
|
48
|
+
def test
|
49
|
+
response = nil
|
50
|
+
|
51
|
+
Net::HTTP.start(self.host, self.port) do |http|
|
52
|
+
http.read_timeout = self.timeout
|
53
|
+
response = http.head(self.path)
|
54
|
+
end
|
55
|
+
|
56
|
+
actual_response_code = response.code.to_i
|
57
|
+
if self.code_is && self.code_is.include?(actual_response_code)
|
58
|
+
pass(actual_response_code)
|
59
|
+
elsif self.code_is_not && !self.code_is_not.include?(actual_response_code)
|
60
|
+
pass(actual_response_code)
|
61
|
+
else
|
62
|
+
fail(actual_response_code)
|
63
|
+
end
|
64
|
+
rescue Timeout::Error
|
65
|
+
self.code_is ? fail('Timeout') : pass('Timeout')
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def pass(code)
|
71
|
+
@timeline << true
|
72
|
+
if @timeline.select { |x| x }.size >= self.times.first
|
73
|
+
self.info = "http response abnormal #{history(code, true)}"
|
74
|
+
true
|
75
|
+
else
|
76
|
+
self.info = "http response nominal #{history(code, true)}"
|
77
|
+
false
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def fail(code)
|
82
|
+
@timeline << false
|
83
|
+
self.info = "http response nominal #{history(code, false)}"
|
84
|
+
false
|
85
|
+
end
|
86
|
+
|
87
|
+
def history(code, passed)
|
88
|
+
entry = code.to_s.dup
|
89
|
+
entry = '*' + entry if passed
|
90
|
+
@history << entry
|
91
|
+
'[' + @history.join(", ") + ']'
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
@@ -6,12 +6,18 @@ module God
|
|
6
6
|
|
7
7
|
def valid?
|
8
8
|
valid = true
|
9
|
-
valid &= complain("
|
9
|
+
valid &= complain("Attribute 'lambda' must be specified", self) if self.lambda.nil?
|
10
10
|
valid
|
11
11
|
end
|
12
12
|
|
13
13
|
def test
|
14
|
-
self.lambda.call()
|
14
|
+
if self.lambda.call()
|
15
|
+
self.info = "lambda condition was satisfied"
|
16
|
+
true
|
17
|
+
else
|
18
|
+
self.info = "lambda condition was not satisfied"
|
19
|
+
false
|
20
|
+
end
|
15
21
|
end
|
16
22
|
end
|
17
23
|
|
@@ -17,24 +17,32 @@ module God
|
|
17
17
|
|
18
18
|
@timeline = Timeline.new(self.times[1])
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@timeline.clear
|
23
|
+
end
|
24
|
+
|
21
25
|
def valid?
|
22
26
|
valid = true
|
23
|
-
valid &= complain("
|
24
|
-
valid &= complain("
|
27
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
28
|
+
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
25
29
|
valid
|
26
30
|
end
|
27
|
-
|
31
|
+
|
28
32
|
def test
|
29
33
|
return false unless File.exist?(self.watch.pid_file)
|
30
34
|
|
31
35
|
pid = File.read(self.watch.pid_file).strip
|
32
36
|
process = System::Process.new(pid)
|
33
37
|
@timeline.push(process.memory)
|
38
|
+
|
39
|
+
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
|
40
|
+
|
34
41
|
if @timeline.select { |x| x > self.above }.size >= self.times.first
|
35
|
-
|
42
|
+
self.info = "memory out of bounds #{history}"
|
36
43
|
return true
|
37
44
|
else
|
45
|
+
self.info = "memory within bounds #{history}"
|
38
46
|
return false
|
39
47
|
end
|
40
48
|
end
|
@@ -2,9 +2,13 @@ module God
|
|
2
2
|
module Conditions
|
3
3
|
|
4
4
|
class ProcessExits < EventCondition
|
5
|
+
def initialize
|
6
|
+
self.info = "process exited"
|
7
|
+
end
|
8
|
+
|
5
9
|
def valid?
|
6
10
|
valid = true
|
7
|
-
valid &= complain("
|
11
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
8
12
|
valid
|
9
13
|
end
|
10
14
|
|
@@ -21,8 +25,12 @@ module God
|
|
21
25
|
end
|
22
26
|
|
23
27
|
def deregister
|
24
|
-
|
25
|
-
|
28
|
+
if File.exist?(self.watch.pid_file)
|
29
|
+
pid = File.read(self.watch.pid_file).strip.to_i
|
30
|
+
EventHandler.deregister(pid, :proc_exit)
|
31
|
+
else
|
32
|
+
LOG.log(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
|
33
|
+
end
|
26
34
|
end
|
27
35
|
end
|
28
36
|
|
@@ -6,18 +6,36 @@ module God
|
|
6
6
|
|
7
7
|
def valid?
|
8
8
|
valid = true
|
9
|
-
valid &= complain("
|
10
|
-
valid &= complain("
|
9
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
10
|
+
valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
|
11
11
|
valid
|
12
12
|
end
|
13
13
|
|
14
14
|
def test
|
15
|
-
|
15
|
+
self.info = []
|
16
|
+
|
17
|
+
unless File.exist?(self.watch.pid_file)
|
18
|
+
self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
|
19
|
+
return !self.running
|
20
|
+
end
|
16
21
|
|
17
22
|
pid = File.read(self.watch.pid_file).strip
|
18
23
|
active = System::Process.new(pid).exists?
|
19
24
|
|
20
|
-
|
25
|
+
if (self.running && active)
|
26
|
+
self.info << "process is running"
|
27
|
+
true
|
28
|
+
elsif (!self.running && !active)
|
29
|
+
self.info << "process is not running"
|
30
|
+
true
|
31
|
+
else
|
32
|
+
if self.running
|
33
|
+
self.info << "process is not running"
|
34
|
+
else
|
35
|
+
self.info << "process is running"
|
36
|
+
end
|
37
|
+
false
|
38
|
+
end
|
21
39
|
end
|
22
40
|
end
|
23
41
|
|
data/lib/god/conditions/tries.rb
CHANGED
@@ -7,23 +7,34 @@ module God
|
|
7
7
|
def prepare
|
8
8
|
@timeline = Timeline.new(self.times)
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
|
+
def reset
|
12
|
+
@timeline.clear
|
13
|
+
end
|
14
|
+
|
11
15
|
def valid?
|
12
16
|
valid = true
|
13
|
-
valid &= complain("
|
17
|
+
valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
|
14
18
|
valid
|
15
19
|
end
|
16
|
-
|
20
|
+
|
17
21
|
def test
|
18
22
|
@timeline << Time.now
|
19
23
|
|
20
24
|
concensus = (@timeline.size == self.times)
|
21
|
-
duration = within.nil? || (@timeline.last - @timeline.first) < self.within
|
25
|
+
duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within
|
26
|
+
|
27
|
+
if within
|
28
|
+
history = "[#{@timeline.size}/#{self.times} within #{(@timeline.last - @timeline.first).to_i}s]"
|
29
|
+
else
|
30
|
+
history = "[#{@timeline.size}/#{self.times}]"
|
31
|
+
end
|
22
32
|
|
23
33
|
if concensus && duration
|
24
|
-
|
34
|
+
self.info = "tries exceeded #{history}"
|
25
35
|
return true
|
26
36
|
else
|
37
|
+
self.info = "tries within bounds #{history}"
|
27
38
|
return false
|
28
39
|
end
|
29
40
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
module Configurable
|
4
|
+
# Override this method in your Configurable (optional)
|
5
|
+
#
|
6
|
+
# Called once after the Configurable has been sent to the block and attributes have been
|
7
|
+
# set. Do any post-processing on attributes here
|
8
|
+
def prepare
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
def reset
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
# Override this method in your Configurable (optional)
|
17
|
+
#
|
18
|
+
# Called once during evaluation of the config file. Return true if valid, false otherwise
|
19
|
+
#
|
20
|
+
# A convenience method 'complain' is available that will print out a message and return false,
|
21
|
+
# making it easy to report multiple validation errors:
|
22
|
+
#
|
23
|
+
# def valid?
|
24
|
+
# valid = true
|
25
|
+
# valid &= complain("You must specify the 'pid_file' attribute for :memory_usage") if self.pid_file.nil?
|
26
|
+
# valid &= complain("You must specify the 'above' attribute for :memory_usage") if self.above.nil?
|
27
|
+
# valid
|
28
|
+
# end
|
29
|
+
def valid?
|
30
|
+
true
|
31
|
+
end
|
32
|
+
|
33
|
+
def base_name
|
34
|
+
self.class.name.split('::').last
|
35
|
+
end
|
36
|
+
|
37
|
+
def friendly_name
|
38
|
+
base_name
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.complain(text, c = nil)
|
42
|
+
msg = text
|
43
|
+
msg += " for #{c.friendly_name}" if c
|
44
|
+
Syslog.err(msg)
|
45
|
+
puts msg
|
46
|
+
false
|
47
|
+
end
|
48
|
+
|
49
|
+
def complain(text, c = nil)
|
50
|
+
Configurable.complain(text, c)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
data/lib/god/contact.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Contact
|
4
|
+
include Configurable
|
5
|
+
|
6
|
+
attr_accessor :name, :group, :info
|
7
|
+
|
8
|
+
def self.generate(kind)
|
9
|
+
sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
|
10
|
+
c = God::Contacts.const_get(sym).new
|
11
|
+
|
12
|
+
unless c.kind_of?(Contact)
|
13
|
+
abort "Contact '#{c.class.name}' must subclass God::Contact"
|
14
|
+
end
|
15
|
+
|
16
|
+
c
|
17
|
+
rescue NameError
|
18
|
+
raise NoSuchContactError.new("No Contact found with the class name God::Contacts::#{sym}")
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.valid?(contact)
|
22
|
+
valid = true
|
23
|
+
valid &= Configurable.complain("Attribute 'name' must be specified", contact) if contact.name.nil?
|
24
|
+
valid
|
25
|
+
end
|
26
|
+
|
27
|
+
# Normalize the given notify specification into canonical form.
|
28
|
+
# +spec+ is the notify spec as a String, Array of Strings, or Hash
|
29
|
+
#
|
30
|
+
# Canonical form looks like:
|
31
|
+
# {:contacts => ['fred', 'john'], :priority => '1', :category => 'awesome'}
|
32
|
+
# Where :contacts will be present and point to an Array of Strings. Both
|
33
|
+
# :priority and :category may not be present but if they are, they will each
|
34
|
+
# contain a single String.
|
35
|
+
#
|
36
|
+
# Returns normalized notify spec
|
37
|
+
# Raises ArgumentError on invalid spec (message contains details)
|
38
|
+
def self.normalize(spec)
|
39
|
+
case spec
|
40
|
+
when String
|
41
|
+
{:contacts => Array(spec)}
|
42
|
+
when Array
|
43
|
+
unless spec.select { |x| !x.instance_of?(String) }.empty?
|
44
|
+
raise ArgumentError.new("contains non-String elements")
|
45
|
+
end
|
46
|
+
{:contacts => spec}
|
47
|
+
when Hash
|
48
|
+
copy = spec.dup
|
49
|
+
|
50
|
+
# check :contacts
|
51
|
+
if contacts = copy.delete(:contacts)
|
52
|
+
case contacts
|
53
|
+
when String
|
54
|
+
# valid
|
55
|
+
when Array
|
56
|
+
unless contacts.select { |x| !x.instance_of?(String) }.empty?
|
57
|
+
raise ArgumentError.new("has a :contacts key containing non-String elements")
|
58
|
+
end
|
59
|
+
# valid
|
60
|
+
else
|
61
|
+
raise ArgumentError.new("must have a :contacts key pointing to a String or Array of Strings")
|
62
|
+
end
|
63
|
+
else
|
64
|
+
raise ArgumentError.new("must have a :contacts key")
|
65
|
+
end
|
66
|
+
|
67
|
+
# remove priority and category
|
68
|
+
copy.delete(:priority)
|
69
|
+
copy.delete(:category)
|
70
|
+
|
71
|
+
# check for invalid keys
|
72
|
+
unless copy.empty?
|
73
|
+
raise ArgumentError.new("contains extra elements: #{copy.inspect}")
|
74
|
+
end
|
75
|
+
|
76
|
+
# normalize
|
77
|
+
spec[:contacts] &&= Array(spec[:contacts])
|
78
|
+
spec[:priority] &&= spec[:priority].to_s
|
79
|
+
spec[:category] &&= spec[:category].to_s
|
80
|
+
|
81
|
+
spec
|
82
|
+
else
|
83
|
+
raise ArgumentError.new("must be a String (contact name), Array (of contact names), or Hash (contact specification)")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Abstract
|
88
|
+
# Send the message to the external source
|
89
|
+
# +message+ is the message body returned from the condition
|
90
|
+
# +time+ is the Time at which the notification was made
|
91
|
+
# +priority+ is the arbitrary priority String
|
92
|
+
# +category+ is the arbitrary category String
|
93
|
+
# +host+ is the hostname of the server
|
94
|
+
def notify(message, time, priority, category, host)
|
95
|
+
raise AbstractMethodNotOverriddenError.new("Contact#notify must be overridden in subclasses")
|
96
|
+
end
|
97
|
+
|
98
|
+
# Construct the friendly name of this Contact, looks like:
|
99
|
+
#
|
100
|
+
# Contact FooBar
|
101
|
+
def friendly_name
|
102
|
+
super + " Contact '#{self.name}'"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'net/smtp'
|
3
|
+
|
4
|
+
module God
|
5
|
+
module Contacts
|
6
|
+
|
7
|
+
class Email < Contact
|
8
|
+
class << self
|
9
|
+
attr_accessor :message_settings, :delivery_method, :server_settings, :format
|
10
|
+
end
|
11
|
+
|
12
|
+
self.message_settings = {:from => 'god@example.com'}
|
13
|
+
|
14
|
+
self.delivery_method = :smtp
|
15
|
+
|
16
|
+
self.server_settings = {:address => 'localhost',
|
17
|
+
:port => 25}
|
18
|
+
# :domain
|
19
|
+
# :user_name
|
20
|
+
# :password
|
21
|
+
# :authentication
|
22
|
+
|
23
|
+
self.format = lambda do |name, email, message, time, priority, category, host|
|
24
|
+
<<-EOF
|
25
|
+
From: god <#{self.message_settings[:from]}>
|
26
|
+
To: #{name} <#{email}>
|
27
|
+
Subject: [god] #{message}
|
28
|
+
Date: #{Time.now.httpdate}
|
29
|
+
Message-Id: <unique.message.id.string@example.com>
|
30
|
+
|
31
|
+
Message: #{message}
|
32
|
+
Host: #{host}
|
33
|
+
Priority: #{priority}
|
34
|
+
Category: #{category}
|
35
|
+
EOF
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_accessor :email
|
39
|
+
|
40
|
+
def valid?
|
41
|
+
valid = true
|
42
|
+
valid &= complain("Attribute 'email' must be specified", self) if self.email.nil?
|
43
|
+
valid
|
44
|
+
end
|
45
|
+
|
46
|
+
def notify(message, time, priority, category, host)
|
47
|
+
begin
|
48
|
+
body = Email.format.call(self.name, self.email, message, time, priority, category, host)
|
49
|
+
|
50
|
+
args = [Email.server_settings[:address], Email.server_settings[:port]]
|
51
|
+
if Email.server_settings[:authentication]
|
52
|
+
args << Email.server_settings[:domain]
|
53
|
+
args << Email.server_settings[:user_name]
|
54
|
+
args << Email.server_settings[:password]
|
55
|
+
args << Email.server_settings[:authentication]
|
56
|
+
end
|
57
|
+
|
58
|
+
Net::SMTP.start(*args) do |smtp|
|
59
|
+
smtp.send_message body, Email.message_settings[:from], self.email
|
60
|
+
end
|
61
|
+
|
62
|
+
self.info = "sent email to #{self.email}"
|
63
|
+
rescue => e
|
64
|
+
puts e.message
|
65
|
+
puts e.backtrace.join("\n")
|
66
|
+
|
67
|
+
self.info = "failed to send email to #{self.email}: #{e.message}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|