god 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +43 -7
- data/Manifest.txt +20 -4
- data/Rakefile +1 -1
- data/bin/god +263 -195
- data/examples/events.god +66 -34
- data/examples/gravatar.god +25 -12
- data/init/god +42 -0
- data/lib/god/behavior.rb +9 -29
- data/lib/god/behaviors/clean_pid_file.rb +6 -2
- data/lib/god/behaviors/notify_when_flapping.rb +4 -4
- data/lib/god/condition.rb +48 -6
- data/lib/god/conditions/always.rb +5 -1
- data/lib/god/conditions/cpu_usage.rb +13 -5
- data/lib/god/conditions/degrading_lambda.rb +8 -3
- data/lib/god/conditions/flapping.rb +97 -0
- data/lib/god/conditions/http_response_code.rb +97 -0
- data/lib/god/conditions/lambda.rb +8 -2
- data/lib/god/conditions/memory_usage.rb +13 -5
- data/lib/god/conditions/process_exits.rb +11 -3
- data/lib/god/conditions/process_running.rb +22 -4
- data/lib/god/conditions/tries.rb +16 -5
- data/lib/god/configurable.rb +54 -0
- data/lib/god/contact.rb +106 -0
- data/lib/god/contacts/email.rb +73 -0
- data/lib/god/errors.rb +3 -0
- data/lib/god/hub.rb +138 -33
- data/lib/god/logger.rb +21 -4
- data/lib/god/metric.rb +3 -4
- data/lib/god/process.rb +93 -49
- data/lib/god/socket.rb +60 -0
- data/lib/god/task.rb +233 -0
- data/lib/god/trigger.rb +43 -0
- data/lib/god/watch.rb +48 -114
- data/lib/god.rb +216 -63
- data/test/configs/child_events/child_events.god +20 -1
- data/test/configs/child_polls/child_polls.god +26 -6
- data/test/configs/child_polls/simple_server.rb +10 -1
- data/test/configs/contact/contact.god +74 -0
- data/test/configs/contact/simple_server.rb +3 -0
- data/test/configs/daemon_events/daemon_events.god +5 -2
- data/test/configs/daemon_events/simple_server.rb +2 -0
- data/test/configs/daemon_events/simple_server_stop.rb +9 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +1 -3
- data/test/configs/task/logs/.placeholder +0 -0
- data/test/configs/task/task.god +26 -0
- data/test/helper.rb +19 -11
- data/test/test_conditions_http_response_code.rb +115 -0
- data/test/test_conditions_process_running.rb +2 -2
- data/test/test_conditions_tries.rb +21 -0
- data/test/test_contact.rb +109 -0
- data/test/test_god.rb +101 -17
- data/test/test_hub.rb +64 -1
- data/test/test_process.rb +43 -56
- data/test/{test_server.rb → test_socket.rb} +6 -20
- data/test/test_task.rb +86 -0
- data/test/test_trigger.rb +59 -0
- data/test/test_watch.rb +32 -7
- metadata +27 -8
- data/lib/god/reporter.rb +0 -25
- data/lib/god/server.rb +0 -37
- data/test/test_reporter.rb +0 -18
data/examples/events.god
CHANGED
@@ -4,49 +4,81 @@
|
|
4
4
|
# Run with:
|
5
5
|
# god -c /path/to/events.god
|
6
6
|
|
7
|
-
RAILS_ROOT =
|
7
|
+
RAILS_ROOT = ENV['GOD_TEST_RAILS_ROOT']
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
%w{3002}.each do |port|
|
10
|
+
God.watch do |w|
|
11
|
+
w.name = "local-#{port}"
|
12
|
+
w.interval = 5.seconds
|
13
|
+
w.start = "mongrel_rails start -p #{port} -P #{RAILS_ROOT}/log/mongrel.#{port}.pid -c #{RAILS_ROOT} -d"
|
14
|
+
w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.#{port}.pid -c #{RAILS_ROOT}"
|
15
|
+
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
|
16
|
+
w.log = File.join(RAILS_ROOT, "log/commands.#{port}.log")
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
+
# clean pid files before start if necessary
|
19
|
+
w.behavior(:clean_pid_file)
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
# determine the state on startup
|
22
|
+
w.transition(:init, { true => :up, false => :start }) do |on|
|
23
|
+
on.condition(:process_running) do |c|
|
24
|
+
c.running = true
|
25
|
+
end
|
23
26
|
end
|
24
|
-
end
|
25
27
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
# determine when process has finished starting
|
29
|
+
w.transition([:start, :restart], :up) do |on|
|
30
|
+
on.condition(:process_running) do |c|
|
31
|
+
c.running = true
|
32
|
+
end
|
33
|
+
|
34
|
+
# failsafe
|
35
|
+
on.condition(:tries) do |c|
|
36
|
+
c.times = 8
|
37
|
+
c.within = 2.minutes
|
38
|
+
c.transition = :start
|
39
|
+
end
|
30
40
|
end
|
31
|
-
end
|
32
41
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
# restart if memory or cpu is too high
|
39
|
-
w.transition(:up, :restart) do |on|
|
40
|
-
on.condition(:memory_usage) do |c|
|
41
|
-
c.interval = 20
|
42
|
-
c.above = (50 * 1024) # 50mb
|
43
|
-
c.times = [3, 5]
|
42
|
+
# start if process is not running
|
43
|
+
w.transition(:up, :start) do |on|
|
44
|
+
on.condition(:process_exits)
|
44
45
|
end
|
46
|
+
|
47
|
+
# restart if memory or cpu is too high
|
48
|
+
w.transition(:up, :restart) do |on|
|
49
|
+
on.condition(:memory_usage) do |c|
|
50
|
+
c.interval = 20
|
51
|
+
c.above = 50.megabytes
|
52
|
+
c.times = [3, 5]
|
53
|
+
end
|
54
|
+
|
55
|
+
on.condition(:cpu_usage) do |c|
|
56
|
+
c.interval = 10
|
57
|
+
c.above = 10.percent
|
58
|
+
c.times = 5
|
59
|
+
end
|
45
60
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
61
|
+
on.condition(:http_response_code) do |c|
|
62
|
+
c.host = 'localhost'
|
63
|
+
c.port = port
|
64
|
+
c.path = '/'
|
65
|
+
c.code_is = 500
|
66
|
+
c.timeout = 10.seconds
|
67
|
+
c.times = [3, 5]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# lifecycle
|
72
|
+
w.lifecycle do |on|
|
73
|
+
on.condition(:flapping) do |c|
|
74
|
+
c.to_state = [:start, :restart]
|
75
|
+
c.times = 5
|
76
|
+
c.within = 1.minute
|
77
|
+
c.transition = :unmonitored
|
78
|
+
c.retry_in = 10.minutes
|
79
|
+
c.retry_times = 5
|
80
|
+
c.retry_within = 2.hours
|
81
|
+
end
|
50
82
|
end
|
51
83
|
end
|
52
84
|
end
|
data/examples/gravatar.god
CHANGED
@@ -3,38 +3,51 @@
|
|
3
3
|
# This is the actual config file used to keep the mongrels of
|
4
4
|
# gravatar.com running.
|
5
5
|
|
6
|
-
RAILS_ROOT = "/
|
6
|
+
RAILS_ROOT = "/Users/tom/dev/gravatar2"
|
7
7
|
|
8
8
|
%w{8200 8201 8202}.each do |port|
|
9
9
|
God.watch do |w|
|
10
10
|
w.name = "gravatar2-mongrel-#{port}"
|
11
|
-
w.interval = 30 #
|
12
|
-
w.start = "mongrel_rails
|
13
|
-
-
|
14
|
-
w.stop = "mongrel_rails
|
15
|
-
|
16
|
-
w.
|
17
|
-
|
18
|
-
pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
|
11
|
+
w.interval = 30.seconds # default
|
12
|
+
w.start = "mongrel_rails start -c #{RAILS_ROOT} -p #{port} \
|
13
|
+
-P #{RAILS_ROOT}/log/mongrel.#{port}.pid -d"
|
14
|
+
w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
|
15
|
+
w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
|
16
|
+
w.start_grace = 10.seconds
|
17
|
+
w.restart_grace = 10.seconds
|
18
|
+
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
|
19
19
|
|
20
20
|
w.behavior(:clean_pid_file)
|
21
21
|
|
22
22
|
w.start_if do |start|
|
23
23
|
start.condition(:process_running) do |c|
|
24
|
-
c.interval = 5
|
24
|
+
c.interval = 5.seconds
|
25
25
|
c.running = false
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
29
|
w.restart_if do |restart|
|
30
30
|
restart.condition(:memory_usage) do |c|
|
31
|
-
c.above =
|
31
|
+
c.above = 150.megabytes
|
32
32
|
c.times = [3, 5] # 3 out of 5 intervals
|
33
33
|
end
|
34
34
|
|
35
35
|
restart.condition(:cpu_usage) do |c|
|
36
|
-
c.above = 50
|
36
|
+
c.above = 50.percent
|
37
|
+
c.times = 5
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# lifecycle
|
42
|
+
w.lifecycle do |on|
|
43
|
+
on.condition(:flapping) do |c|
|
44
|
+
c.to_state = [:start, :restart]
|
37
45
|
c.times = 5
|
46
|
+
c.within = 5.minute
|
47
|
+
c.transition = :unmonitored
|
48
|
+
c.retry_in = 10.minutes
|
49
|
+
c.retry_times = 5
|
50
|
+
c.retry_within = 2.hours
|
38
51
|
end
|
39
52
|
end
|
40
53
|
end
|
data/init/god
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# god Startup script for god (http://god.rubyforge.org)
|
4
|
+
#
|
5
|
+
# chkconfig: - 85 15
|
6
|
+
# description: God is an easy to configure, easy to extend monitoring \
|
7
|
+
# framework written in Ruby.
|
8
|
+
#
|
9
|
+
|
10
|
+
CONF_DIR=/etc/god
|
11
|
+
|
12
|
+
RETVAL=0
|
13
|
+
|
14
|
+
# Go no further if config directory is missing.
|
15
|
+
[ -d "$CONF_DIR" ] || exit 0
|
16
|
+
|
17
|
+
case "$1" in
|
18
|
+
start)
|
19
|
+
# Create pid directory
|
20
|
+
ruby /usr/bin/god -c $CONF_DIR/master.conf
|
21
|
+
RETVAL=$?
|
22
|
+
;;
|
23
|
+
stop)
|
24
|
+
ruby /usr/bin/god terminate
|
25
|
+
RETVAL=$?
|
26
|
+
;;
|
27
|
+
restart)
|
28
|
+
ruby /usr/bin/god terminate
|
29
|
+
ruby /usr/bin/god -c $CONF_DIR/master.conf
|
30
|
+
RETVAL=$?
|
31
|
+
;;
|
32
|
+
status)
|
33
|
+
ruby /usr/bin/god status
|
34
|
+
RETVAL=$?
|
35
|
+
;;
|
36
|
+
*)
|
37
|
+
echo "Usage: god {start|stop|restart|status}"
|
38
|
+
exit 1
|
39
|
+
;;
|
40
|
+
esac
|
41
|
+
|
42
|
+
exit $RETVAL
|
data/lib/god/behavior.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Behavior
|
4
|
+
include Configurable
|
5
|
+
|
4
6
|
attr_accessor :watch
|
5
7
|
|
6
|
-
# Generate a Behavior of the given kind. The proper class
|
8
|
+
# Generate a Behavior of the given kind. The proper class is found by camel casing the
|
7
9
|
# kind (which is given as an underscored symbol).
|
8
10
|
# +kind+ is the underscored symbol representing the class (e.g. foo_bar for God::Behaviors::FooBar)
|
9
11
|
def self.generate(kind, watch)
|
@@ -15,31 +17,10 @@ module God
|
|
15
17
|
raise NoSuchBehaviorError.new("No Behavior found with the class name God::Behaviors::#{sym}")
|
16
18
|
end
|
17
19
|
|
18
|
-
# Override this method in your Behaviors (optional)
|
19
|
-
#
|
20
|
-
# Called once after the Condition has been sent to the block and attributes have been
|
21
|
-
# set. Do any post-processing on attributes here
|
22
|
-
def prepare
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
# Override this method in your Behaviors (optional)
|
27
|
-
#
|
28
|
-
# Called once during evaluation of the config file. Return true if valid, false otherwise
|
29
|
-
#
|
30
|
-
# A convenience method 'complain' is available that will print out a message and return false,
|
31
|
-
# making it easy to report multiple validation errors:
|
32
|
-
#
|
33
|
-
# def valid?
|
34
|
-
# valid = true
|
35
|
-
# valid &= complain("You must specify the 'pid_file' attribute for :memory_usage") if self.pid_file.nil?
|
36
|
-
# valid &= complain("You must specify the 'above' attribute for :memory_usage") if self.above.nil?
|
37
|
-
# valid
|
38
|
-
# end
|
39
20
|
def valid?
|
40
21
|
true
|
41
22
|
end
|
42
|
-
|
23
|
+
|
43
24
|
#######
|
44
25
|
|
45
26
|
def before_start
|
@@ -60,12 +41,11 @@ module God
|
|
60
41
|
def after_stop
|
61
42
|
end
|
62
43
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
false
|
44
|
+
# Construct the friendly name of this Behavior, looks like:
|
45
|
+
#
|
46
|
+
# Behavior FooBar on Watch 'baz'
|
47
|
+
def friendly_name
|
48
|
+
"Behavior " + super + " on Watch '#{self.watch.name}'"
|
69
49
|
end
|
70
50
|
end
|
71
51
|
|
@@ -4,12 +4,16 @@ module God
|
|
4
4
|
class CleanPidFile < Behavior
|
5
5
|
def valid?
|
6
6
|
valid = true
|
7
|
-
valid &= complain("
|
7
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
8
8
|
valid
|
9
9
|
end
|
10
10
|
|
11
11
|
def before_start
|
12
|
-
File.delete(self.watch.pid_file)
|
12
|
+
File.delete(self.watch.pid_file)
|
13
|
+
|
14
|
+
"deleted pid file"
|
15
|
+
rescue
|
16
|
+
"no pid file to delete"
|
13
17
|
end
|
14
18
|
end
|
15
19
|
|
@@ -13,13 +13,13 @@ module God
|
|
13
13
|
|
14
14
|
def valid?
|
15
15
|
valid = true
|
16
|
-
valid &= complain("
|
17
|
-
valid &= complain("
|
18
|
-
valid &= complain("
|
16
|
+
valid &= complain("Attribute 'failures' must be specified", self) unless self.failures
|
17
|
+
valid &= complain("Attribute 'seconds' must be specified", self) unless self.seconds
|
18
|
+
valid &= complain("Attribute 'notifier' must be specified", self) unless self.notifier
|
19
19
|
|
20
20
|
# Must take one arg or variable args
|
21
21
|
unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
|
22
|
-
valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args")
|
22
|
+
valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args", self)
|
23
23
|
end
|
24
24
|
|
25
25
|
valid
|
data/lib/god/condition.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Condition < Behavior
|
4
|
+
attr_accessor :transition, :notify, :info
|
5
|
+
|
4
6
|
# Generate a Condition of the given kind. The proper class if found by camel casing the
|
5
7
|
# kind (which is given as an underscored symbol).
|
6
|
-
# +kind+ is the underscored symbol representing the class (e.g. foo_bar for God::Conditions::FooBar)
|
8
|
+
# +kind+ is the underscored symbol representing the class (e.g. :foo_bar for God::Conditions::FooBar)
|
7
9
|
def self.generate(kind, watch)
|
8
10
|
sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
|
9
11
|
c = God::Conditions.const_get(sym).new
|
10
12
|
|
11
|
-
unless c.kind_of?(PollCondition) || c.kind_of?(EventCondition)
|
12
|
-
abort "Condition '#{c.class.name}' must subclass
|
13
|
+
unless c.kind_of?(PollCondition) || c.kind_of?(EventCondition) || c.kind_of?(TriggerCondition)
|
14
|
+
abort "Condition '#{c.class.name}' must subclass God::PollCondition, God::EventCondition, or God::TriggerCondition"
|
13
15
|
end
|
14
16
|
|
15
17
|
c.watch = watch
|
@@ -17,12 +19,30 @@ module God
|
|
17
19
|
rescue NameError
|
18
20
|
raise NoSuchConditionError.new("No Condition found with the class name God::Conditions::#{sym}")
|
19
21
|
end
|
22
|
+
|
23
|
+
def self.valid?(condition)
|
24
|
+
valid = true
|
25
|
+
if condition.notify
|
26
|
+
begin
|
27
|
+
Contact.normalize(condition.notify)
|
28
|
+
rescue ArgumentError => e
|
29
|
+
valid &= Configurable.complain("Attribute 'notify' " + e.message, condition)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
valid
|
33
|
+
end
|
34
|
+
|
35
|
+
# Construct the friendly name of this Condition, looks like:
|
36
|
+
#
|
37
|
+
# Condition FooBar on Watch 'baz'
|
38
|
+
def friendly_name
|
39
|
+
"Condition #{self.class.name.split('::').last} on Watch '#{self.watch.name}'"
|
40
|
+
end
|
20
41
|
end
|
21
42
|
|
22
43
|
class PollCondition < Condition
|
23
44
|
# all poll conditions can specify a poll interval
|
24
45
|
attr_accessor :interval
|
25
|
-
attr_accessor :transition
|
26
46
|
|
27
47
|
# Override this method in your Conditions (optional)
|
28
48
|
def before
|
@@ -33,7 +53,7 @@ module God
|
|
33
53
|
# Return true if the test passes (everything is ok)
|
34
54
|
# Return false otherwise
|
35
55
|
def test
|
36
|
-
raise AbstractMethodNotOverriddenError.new("
|
56
|
+
raise AbstractMethodNotOverriddenError.new("PollCondition#test must be overridden in subclasses")
|
37
57
|
end
|
38
58
|
|
39
59
|
# Override this method in your Conditions (optional)
|
@@ -43,7 +63,29 @@ module God
|
|
43
63
|
|
44
64
|
class EventCondition < Condition
|
45
65
|
def register
|
46
|
-
|
66
|
+
raise AbstractMethodNotOverriddenError.new("EventCondition#register must be overridden in subclasses")
|
67
|
+
end
|
68
|
+
|
69
|
+
def deregister
|
70
|
+
raise AbstractMethodNotOverriddenError.new("EventCondition#deregister must be overridden in subclasses")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class TriggerCondition < Condition
|
75
|
+
def process(event, payload)
|
76
|
+
raise AbstractMethodNotOverriddenError.new("TriggerCondition#process must be overridden in subclasses")
|
77
|
+
end
|
78
|
+
|
79
|
+
def trigger
|
80
|
+
Hub.trigger(self)
|
81
|
+
end
|
82
|
+
|
83
|
+
def register
|
84
|
+
Trigger.register(self)
|
85
|
+
end
|
86
|
+
|
87
|
+
def deregister
|
88
|
+
Trigger.deregister(self)
|
47
89
|
end
|
48
90
|
end
|
49
91
|
|
@@ -4,9 +4,13 @@ module God
|
|
4
4
|
class Always < PollCondition
|
5
5
|
attr_accessor :what
|
6
6
|
|
7
|
+
def initialize
|
8
|
+
self.info = "always"
|
9
|
+
end
|
10
|
+
|
7
11
|
def valid?
|
8
12
|
valid = true
|
9
|
-
valid &= complain("
|
13
|
+
valid &= complain("Attribute 'what' must be specified", self) if self.what.nil?
|
10
14
|
valid
|
11
15
|
end
|
12
16
|
|
@@ -17,24 +17,32 @@ module God
|
|
17
17
|
|
18
18
|
@timeline = Timeline.new(self.times[1])
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@timeline.clear
|
23
|
+
end
|
24
|
+
|
21
25
|
def valid?
|
22
26
|
valid = true
|
23
|
-
valid &= complain("
|
24
|
-
valid &= complain("
|
27
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
28
|
+
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
25
29
|
valid
|
26
30
|
end
|
27
|
-
|
31
|
+
|
28
32
|
def test
|
29
33
|
return false unless File.exist?(self.watch.pid_file)
|
30
34
|
|
31
35
|
pid = File.read(self.watch.pid_file).strip
|
32
36
|
process = System::Process.new(pid)
|
33
37
|
@timeline.push(process.percent_cpu)
|
38
|
+
|
39
|
+
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
|
40
|
+
|
34
41
|
if @timeline.select { |x| x > self.above }.size >= self.times.first
|
35
|
-
|
42
|
+
self.info = "cpu out of bounds #{history}"
|
36
43
|
return true
|
37
44
|
else
|
45
|
+
self.info = "cpu within bounds #{history}"
|
38
46
|
return false
|
39
47
|
end
|
40
48
|
end
|
@@ -12,21 +12,26 @@ module God
|
|
12
12
|
|
13
13
|
def valid?
|
14
14
|
valid = true
|
15
|
-
valid &= complain("
|
15
|
+
valid &= complain("Attribute 'lambda' must be specified", self) if self.lambda.nil?
|
16
16
|
valid
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
def test
|
20
20
|
puts "Calling test. Interval at #{self.interval}"
|
21
21
|
@original_interval ||= self.interval
|
22
22
|
unless pass?
|
23
|
-
|
23
|
+
if @tries == 2
|
24
|
+
self.info = "lambda condition was satisfied"
|
25
|
+
return true
|
26
|
+
end
|
24
27
|
self.interval = self.interval / 2.0
|
25
28
|
@tries += 1
|
26
29
|
else
|
27
30
|
@tries = 0
|
28
31
|
self.interval = @original_interval
|
29
32
|
end
|
33
|
+
|
34
|
+
self.info = "lambda condition was not satisfied"
|
30
35
|
false
|
31
36
|
end
|
32
37
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class Flapping < TriggerCondition
|
5
|
+
attr_accessor :times, :within, :from_state, :to_state, :retry_in, :retry_times, :retry_within
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
self.info = "process is flapping"
|
9
|
+
end
|
10
|
+
|
11
|
+
def prepare
|
12
|
+
@timeline = Timeline.new(self.times)
|
13
|
+
@retry_timeline = Timeline.new(self.retry_times)
|
14
|
+
end
|
15
|
+
|
16
|
+
def valid?
|
17
|
+
valid = true
|
18
|
+
valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
|
19
|
+
valid &= complain("Attribute 'within' must be specified", self) if self.within.nil?
|
20
|
+
valid &= complain("Attributes 'from_state', 'to_state', or both must be specified", self) if self.from_state.nil? && self.to_state.nil?
|
21
|
+
valid
|
22
|
+
end
|
23
|
+
|
24
|
+
def process(event, payload)
|
25
|
+
begin
|
26
|
+
if event == :state_change
|
27
|
+
event_from_state, event_to_state = *payload
|
28
|
+
|
29
|
+
from_state_match = !self.from_state || self.from_state && Array(self.from_state).include?(event_from_state)
|
30
|
+
to_state_match = !self.to_state || self.to_state && Array(self.to_state).include?(event_to_state)
|
31
|
+
|
32
|
+
if from_state_match && to_state_match
|
33
|
+
@timeline << Time.now
|
34
|
+
|
35
|
+
concensus = (@timeline.size == self.times)
|
36
|
+
duration = (@timeline.last - @timeline.first) < self.within
|
37
|
+
|
38
|
+
if concensus && duration
|
39
|
+
@timeline.clear
|
40
|
+
trigger
|
41
|
+
retry_mechanism
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
rescue => e
|
46
|
+
puts e.message
|
47
|
+
puts e.backtrace.join("\n")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def retry_mechanism
|
54
|
+
if self.retry_in
|
55
|
+
@retry_timeline << Time.now
|
56
|
+
|
57
|
+
concensus = (@retry_timeline.size == self.retry_times)
|
58
|
+
duration = (@retry_timeline.last - @retry_timeline.first) < self.retry_within
|
59
|
+
|
60
|
+
if concensus && duration
|
61
|
+
# give up
|
62
|
+
Thread.new do
|
63
|
+
sleep 1
|
64
|
+
|
65
|
+
# log
|
66
|
+
msg = "#{self.watch.name} giving up"
|
67
|
+
Syslog.debug(msg)
|
68
|
+
LOG.log(self.watch, :info, msg)
|
69
|
+
end
|
70
|
+
else
|
71
|
+
# try again later
|
72
|
+
Thread.new do
|
73
|
+
sleep 1
|
74
|
+
|
75
|
+
# log
|
76
|
+
msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds"
|
77
|
+
Syslog.debug(msg)
|
78
|
+
LOG.log(self.watch, :info, msg)
|
79
|
+
|
80
|
+
sleep self.retry_in
|
81
|
+
|
82
|
+
# log
|
83
|
+
msg = "#{self.watch.name} auto-reenabling monitoring"
|
84
|
+
Syslog.debug(msg)
|
85
|
+
LOG.log(self.watch, :info, msg)
|
86
|
+
|
87
|
+
if self.watch.state == :unmonitored
|
88
|
+
self.watch.monitor
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|