god 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +43 -7
- data/Manifest.txt +20 -4
- data/Rakefile +1 -1
- data/bin/god +263 -195
- data/examples/events.god +66 -34
- data/examples/gravatar.god +25 -12
- data/init/god +42 -0
- data/lib/god/behavior.rb +9 -29
- data/lib/god/behaviors/clean_pid_file.rb +6 -2
- data/lib/god/behaviors/notify_when_flapping.rb +4 -4
- data/lib/god/condition.rb +48 -6
- data/lib/god/conditions/always.rb +5 -1
- data/lib/god/conditions/cpu_usage.rb +13 -5
- data/lib/god/conditions/degrading_lambda.rb +8 -3
- data/lib/god/conditions/flapping.rb +97 -0
- data/lib/god/conditions/http_response_code.rb +97 -0
- data/lib/god/conditions/lambda.rb +8 -2
- data/lib/god/conditions/memory_usage.rb +13 -5
- data/lib/god/conditions/process_exits.rb +11 -3
- data/lib/god/conditions/process_running.rb +22 -4
- data/lib/god/conditions/tries.rb +16 -5
- data/lib/god/configurable.rb +54 -0
- data/lib/god/contact.rb +106 -0
- data/lib/god/contacts/email.rb +73 -0
- data/lib/god/errors.rb +3 -0
- data/lib/god/hub.rb +138 -33
- data/lib/god/logger.rb +21 -4
- data/lib/god/metric.rb +3 -4
- data/lib/god/process.rb +93 -49
- data/lib/god/socket.rb +60 -0
- data/lib/god/task.rb +233 -0
- data/lib/god/trigger.rb +43 -0
- data/lib/god/watch.rb +48 -114
- data/lib/god.rb +216 -63
- data/test/configs/child_events/child_events.god +20 -1
- data/test/configs/child_polls/child_polls.god +26 -6
- data/test/configs/child_polls/simple_server.rb +10 -1
- data/test/configs/contact/contact.god +74 -0
- data/test/configs/contact/simple_server.rb +3 -0
- data/test/configs/daemon_events/daemon_events.god +5 -2
- data/test/configs/daemon_events/simple_server.rb +2 -0
- data/test/configs/daemon_events/simple_server_stop.rb +9 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +1 -3
- data/test/configs/task/logs/.placeholder +0 -0
- data/test/configs/task/task.god +26 -0
- data/test/helper.rb +19 -11
- data/test/test_conditions_http_response_code.rb +115 -0
- data/test/test_conditions_process_running.rb +2 -2
- data/test/test_conditions_tries.rb +21 -0
- data/test/test_contact.rb +109 -0
- data/test/test_god.rb +101 -17
- data/test/test_hub.rb +64 -1
- data/test/test_process.rb +43 -56
- data/test/{test_server.rb → test_socket.rb} +6 -20
- data/test/test_task.rb +86 -0
- data/test/test_trigger.rb +59 -0
- data/test/test_watch.rb +32 -7
- metadata +27 -8
- data/lib/god/reporter.rb +0 -25
- data/lib/god/server.rb +0 -37
- data/test/test_reporter.rb +0 -18
data/examples/events.god
CHANGED
@@ -4,49 +4,81 @@
|
|
4
4
|
# Run with:
|
5
5
|
# god -c /path/to/events.god
|
6
6
|
|
7
|
-
RAILS_ROOT =
|
7
|
+
RAILS_ROOT = ENV['GOD_TEST_RAILS_ROOT']
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
%w{3002}.each do |port|
|
10
|
+
God.watch do |w|
|
11
|
+
w.name = "local-#{port}"
|
12
|
+
w.interval = 5.seconds
|
13
|
+
w.start = "mongrel_rails start -p #{port} -P #{RAILS_ROOT}/log/mongrel.#{port}.pid -c #{RAILS_ROOT} -d"
|
14
|
+
w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.#{port}.pid -c #{RAILS_ROOT}"
|
15
|
+
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
|
16
|
+
w.log = File.join(RAILS_ROOT, "log/commands.#{port}.log")
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
+
# clean pid files before start if necessary
|
19
|
+
w.behavior(:clean_pid_file)
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
# determine the state on startup
|
22
|
+
w.transition(:init, { true => :up, false => :start }) do |on|
|
23
|
+
on.condition(:process_running) do |c|
|
24
|
+
c.running = true
|
25
|
+
end
|
23
26
|
end
|
24
|
-
end
|
25
27
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
# determine when process has finished starting
|
29
|
+
w.transition([:start, :restart], :up) do |on|
|
30
|
+
on.condition(:process_running) do |c|
|
31
|
+
c.running = true
|
32
|
+
end
|
33
|
+
|
34
|
+
# failsafe
|
35
|
+
on.condition(:tries) do |c|
|
36
|
+
c.times = 8
|
37
|
+
c.within = 2.minutes
|
38
|
+
c.transition = :start
|
39
|
+
end
|
30
40
|
end
|
31
|
-
end
|
32
41
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
# restart if memory or cpu is too high
|
39
|
-
w.transition(:up, :restart) do |on|
|
40
|
-
on.condition(:memory_usage) do |c|
|
41
|
-
c.interval = 20
|
42
|
-
c.above = (50 * 1024) # 50mb
|
43
|
-
c.times = [3, 5]
|
42
|
+
# start if process is not running
|
43
|
+
w.transition(:up, :start) do |on|
|
44
|
+
on.condition(:process_exits)
|
44
45
|
end
|
46
|
+
|
47
|
+
# restart if memory or cpu is too high
|
48
|
+
w.transition(:up, :restart) do |on|
|
49
|
+
on.condition(:memory_usage) do |c|
|
50
|
+
c.interval = 20
|
51
|
+
c.above = 50.megabytes
|
52
|
+
c.times = [3, 5]
|
53
|
+
end
|
54
|
+
|
55
|
+
on.condition(:cpu_usage) do |c|
|
56
|
+
c.interval = 10
|
57
|
+
c.above = 10.percent
|
58
|
+
c.times = 5
|
59
|
+
end
|
45
60
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
61
|
+
on.condition(:http_response_code) do |c|
|
62
|
+
c.host = 'localhost'
|
63
|
+
c.port = port
|
64
|
+
c.path = '/'
|
65
|
+
c.code_is = 500
|
66
|
+
c.timeout = 10.seconds
|
67
|
+
c.times = [3, 5]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# lifecycle
|
72
|
+
w.lifecycle do |on|
|
73
|
+
on.condition(:flapping) do |c|
|
74
|
+
c.to_state = [:start, :restart]
|
75
|
+
c.times = 5
|
76
|
+
c.within = 1.minute
|
77
|
+
c.transition = :unmonitored
|
78
|
+
c.retry_in = 10.minutes
|
79
|
+
c.retry_times = 5
|
80
|
+
c.retry_within = 2.hours
|
81
|
+
end
|
50
82
|
end
|
51
83
|
end
|
52
84
|
end
|
data/examples/gravatar.god
CHANGED
@@ -3,38 +3,51 @@
|
|
3
3
|
# This is the actual config file used to keep the mongrels of
|
4
4
|
# gravatar.com running.
|
5
5
|
|
6
|
-
RAILS_ROOT = "/
|
6
|
+
RAILS_ROOT = "/Users/tom/dev/gravatar2"
|
7
7
|
|
8
8
|
%w{8200 8201 8202}.each do |port|
|
9
9
|
God.watch do |w|
|
10
10
|
w.name = "gravatar2-mongrel-#{port}"
|
11
|
-
w.interval = 30 #
|
12
|
-
w.start = "mongrel_rails
|
13
|
-
-
|
14
|
-
w.stop = "mongrel_rails
|
15
|
-
|
16
|
-
w.
|
17
|
-
|
18
|
-
pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
|
11
|
+
w.interval = 30.seconds # default
|
12
|
+
w.start = "mongrel_rails start -c #{RAILS_ROOT} -p #{port} \
|
13
|
+
-P #{RAILS_ROOT}/log/mongrel.#{port}.pid -d"
|
14
|
+
w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
|
15
|
+
w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
|
16
|
+
w.start_grace = 10.seconds
|
17
|
+
w.restart_grace = 10.seconds
|
18
|
+
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
|
19
19
|
|
20
20
|
w.behavior(:clean_pid_file)
|
21
21
|
|
22
22
|
w.start_if do |start|
|
23
23
|
start.condition(:process_running) do |c|
|
24
|
-
c.interval = 5
|
24
|
+
c.interval = 5.seconds
|
25
25
|
c.running = false
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
29
|
w.restart_if do |restart|
|
30
30
|
restart.condition(:memory_usage) do |c|
|
31
|
-
c.above =
|
31
|
+
c.above = 150.megabytes
|
32
32
|
c.times = [3, 5] # 3 out of 5 intervals
|
33
33
|
end
|
34
34
|
|
35
35
|
restart.condition(:cpu_usage) do |c|
|
36
|
-
c.above = 50
|
36
|
+
c.above = 50.percent
|
37
|
+
c.times = 5
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# lifecycle
|
42
|
+
w.lifecycle do |on|
|
43
|
+
on.condition(:flapping) do |c|
|
44
|
+
c.to_state = [:start, :restart]
|
37
45
|
c.times = 5
|
46
|
+
c.within = 5.minute
|
47
|
+
c.transition = :unmonitored
|
48
|
+
c.retry_in = 10.minutes
|
49
|
+
c.retry_times = 5
|
50
|
+
c.retry_within = 2.hours
|
38
51
|
end
|
39
52
|
end
|
40
53
|
end
|
data/init/god
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# god Startup script for god (http://god.rubyforge.org)
|
4
|
+
#
|
5
|
+
# chkconfig: - 85 15
|
6
|
+
# description: God is an easy to configure, easy to extend monitoring \
|
7
|
+
# framework written in Ruby.
|
8
|
+
#
|
9
|
+
|
10
|
+
CONF_DIR=/etc/god
|
11
|
+
|
12
|
+
RETVAL=0
|
13
|
+
|
14
|
+
# Go no further if config directory is missing.
|
15
|
+
[ -d "$CONF_DIR" ] || exit 0
|
16
|
+
|
17
|
+
case "$1" in
|
18
|
+
start)
|
19
|
+
# Create pid directory
|
20
|
+
ruby /usr/bin/god -c $CONF_DIR/master.conf
|
21
|
+
RETVAL=$?
|
22
|
+
;;
|
23
|
+
stop)
|
24
|
+
ruby /usr/bin/god terminate
|
25
|
+
RETVAL=$?
|
26
|
+
;;
|
27
|
+
restart)
|
28
|
+
ruby /usr/bin/god terminate
|
29
|
+
ruby /usr/bin/god -c $CONF_DIR/master.conf
|
30
|
+
RETVAL=$?
|
31
|
+
;;
|
32
|
+
status)
|
33
|
+
ruby /usr/bin/god status
|
34
|
+
RETVAL=$?
|
35
|
+
;;
|
36
|
+
*)
|
37
|
+
echo "Usage: god {start|stop|restart|status}"
|
38
|
+
exit 1
|
39
|
+
;;
|
40
|
+
esac
|
41
|
+
|
42
|
+
exit $RETVAL
|
data/lib/god/behavior.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Behavior
|
4
|
+
include Configurable
|
5
|
+
|
4
6
|
attr_accessor :watch
|
5
7
|
|
6
|
-
# Generate a Behavior of the given kind. The proper class
|
8
|
+
# Generate a Behavior of the given kind. The proper class is found by camel casing the
|
7
9
|
# kind (which is given as an underscored symbol).
|
8
10
|
# +kind+ is the underscored symbol representing the class (e.g. foo_bar for God::Behaviors::FooBar)
|
9
11
|
def self.generate(kind, watch)
|
@@ -15,31 +17,10 @@ module God
|
|
15
17
|
raise NoSuchBehaviorError.new("No Behavior found with the class name God::Behaviors::#{sym}")
|
16
18
|
end
|
17
19
|
|
18
|
-
# Override this method in your Behaviors (optional)
|
19
|
-
#
|
20
|
-
# Called once after the Condition has been sent to the block and attributes have been
|
21
|
-
# set. Do any post-processing on attributes here
|
22
|
-
def prepare
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
# Override this method in your Behaviors (optional)
|
27
|
-
#
|
28
|
-
# Called once during evaluation of the config file. Return true if valid, false otherwise
|
29
|
-
#
|
30
|
-
# A convenience method 'complain' is available that will print out a message and return false,
|
31
|
-
# making it easy to report multiple validation errors:
|
32
|
-
#
|
33
|
-
# def valid?
|
34
|
-
# valid = true
|
35
|
-
# valid &= complain("You must specify the 'pid_file' attribute for :memory_usage") if self.pid_file.nil?
|
36
|
-
# valid &= complain("You must specify the 'above' attribute for :memory_usage") if self.above.nil?
|
37
|
-
# valid
|
38
|
-
# end
|
39
20
|
def valid?
|
40
21
|
true
|
41
22
|
end
|
42
|
-
|
23
|
+
|
43
24
|
#######
|
44
25
|
|
45
26
|
def before_start
|
@@ -60,12 +41,11 @@ module God
|
|
60
41
|
def after_stop
|
61
42
|
end
|
62
43
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
false
|
44
|
+
# Construct the friendly name of this Behavior, looks like:
|
45
|
+
#
|
46
|
+
# Behavior FooBar on Watch 'baz'
|
47
|
+
def friendly_name
|
48
|
+
"Behavior " + super + " on Watch '#{self.watch.name}'"
|
69
49
|
end
|
70
50
|
end
|
71
51
|
|
@@ -4,12 +4,16 @@ module God
|
|
4
4
|
class CleanPidFile < Behavior
|
5
5
|
def valid?
|
6
6
|
valid = true
|
7
|
-
valid &= complain("
|
7
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
8
8
|
valid
|
9
9
|
end
|
10
10
|
|
11
11
|
def before_start
|
12
|
-
File.delete(self.watch.pid_file)
|
12
|
+
File.delete(self.watch.pid_file)
|
13
|
+
|
14
|
+
"deleted pid file"
|
15
|
+
rescue
|
16
|
+
"no pid file to delete"
|
13
17
|
end
|
14
18
|
end
|
15
19
|
|
@@ -13,13 +13,13 @@ module God
|
|
13
13
|
|
14
14
|
def valid?
|
15
15
|
valid = true
|
16
|
-
valid &= complain("
|
17
|
-
valid &= complain("
|
18
|
-
valid &= complain("
|
16
|
+
valid &= complain("Attribute 'failures' must be specified", self) unless self.failures
|
17
|
+
valid &= complain("Attribute 'seconds' must be specified", self) unless self.seconds
|
18
|
+
valid &= complain("Attribute 'notifier' must be specified", self) unless self.notifier
|
19
19
|
|
20
20
|
# Must take one arg or variable args
|
21
21
|
unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
|
22
|
-
valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args")
|
22
|
+
valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args", self)
|
23
23
|
end
|
24
24
|
|
25
25
|
valid
|
data/lib/god/condition.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Condition < Behavior
|
4
|
+
attr_accessor :transition, :notify, :info
|
5
|
+
|
4
6
|
# Generate a Condition of the given kind. The proper class if found by camel casing the
|
5
7
|
# kind (which is given as an underscored symbol).
|
6
|
-
# +kind+ is the underscored symbol representing the class (e.g. foo_bar for God::Conditions::FooBar)
|
8
|
+
# +kind+ is the underscored symbol representing the class (e.g. :foo_bar for God::Conditions::FooBar)
|
7
9
|
def self.generate(kind, watch)
|
8
10
|
sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
|
9
11
|
c = God::Conditions.const_get(sym).new
|
10
12
|
|
11
|
-
unless c.kind_of?(PollCondition) || c.kind_of?(EventCondition)
|
12
|
-
abort "Condition '#{c.class.name}' must subclass
|
13
|
+
unless c.kind_of?(PollCondition) || c.kind_of?(EventCondition) || c.kind_of?(TriggerCondition)
|
14
|
+
abort "Condition '#{c.class.name}' must subclass God::PollCondition, God::EventCondition, or God::TriggerCondition"
|
13
15
|
end
|
14
16
|
|
15
17
|
c.watch = watch
|
@@ -17,12 +19,30 @@ module God
|
|
17
19
|
rescue NameError
|
18
20
|
raise NoSuchConditionError.new("No Condition found with the class name God::Conditions::#{sym}")
|
19
21
|
end
|
22
|
+
|
23
|
+
def self.valid?(condition)
|
24
|
+
valid = true
|
25
|
+
if condition.notify
|
26
|
+
begin
|
27
|
+
Contact.normalize(condition.notify)
|
28
|
+
rescue ArgumentError => e
|
29
|
+
valid &= Configurable.complain("Attribute 'notify' " + e.message, condition)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
valid
|
33
|
+
end
|
34
|
+
|
35
|
+
# Construct the friendly name of this Condition, looks like:
|
36
|
+
#
|
37
|
+
# Condition FooBar on Watch 'baz'
|
38
|
+
def friendly_name
|
39
|
+
"Condition #{self.class.name.split('::').last} on Watch '#{self.watch.name}'"
|
40
|
+
end
|
20
41
|
end
|
21
42
|
|
22
43
|
class PollCondition < Condition
|
23
44
|
# all poll conditions can specify a poll interval
|
24
45
|
attr_accessor :interval
|
25
|
-
attr_accessor :transition
|
26
46
|
|
27
47
|
# Override this method in your Conditions (optional)
|
28
48
|
def before
|
@@ -33,7 +53,7 @@ module God
|
|
33
53
|
# Return true if the test passes (everything is ok)
|
34
54
|
# Return false otherwise
|
35
55
|
def test
|
36
|
-
raise AbstractMethodNotOverriddenError.new("
|
56
|
+
raise AbstractMethodNotOverriddenError.new("PollCondition#test must be overridden in subclasses")
|
37
57
|
end
|
38
58
|
|
39
59
|
# Override this method in your Conditions (optional)
|
@@ -43,7 +63,29 @@ module God
|
|
43
63
|
|
44
64
|
class EventCondition < Condition
|
45
65
|
def register
|
46
|
-
|
66
|
+
raise AbstractMethodNotOverriddenError.new("EventCondition#register must be overridden in subclasses")
|
67
|
+
end
|
68
|
+
|
69
|
+
def deregister
|
70
|
+
raise AbstractMethodNotOverriddenError.new("EventCondition#deregister must be overridden in subclasses")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class TriggerCondition < Condition
|
75
|
+
def process(event, payload)
|
76
|
+
raise AbstractMethodNotOverriddenError.new("TriggerCondition#process must be overridden in subclasses")
|
77
|
+
end
|
78
|
+
|
79
|
+
def trigger
|
80
|
+
Hub.trigger(self)
|
81
|
+
end
|
82
|
+
|
83
|
+
def register
|
84
|
+
Trigger.register(self)
|
85
|
+
end
|
86
|
+
|
87
|
+
def deregister
|
88
|
+
Trigger.deregister(self)
|
47
89
|
end
|
48
90
|
end
|
49
91
|
|
@@ -4,9 +4,13 @@ module God
|
|
4
4
|
class Always < PollCondition
|
5
5
|
attr_accessor :what
|
6
6
|
|
7
|
+
def initialize
|
8
|
+
self.info = "always"
|
9
|
+
end
|
10
|
+
|
7
11
|
def valid?
|
8
12
|
valid = true
|
9
|
-
valid &= complain("
|
13
|
+
valid &= complain("Attribute 'what' must be specified", self) if self.what.nil?
|
10
14
|
valid
|
11
15
|
end
|
12
16
|
|
@@ -17,24 +17,32 @@ module God
|
|
17
17
|
|
18
18
|
@timeline = Timeline.new(self.times[1])
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@timeline.clear
|
23
|
+
end
|
24
|
+
|
21
25
|
def valid?
|
22
26
|
valid = true
|
23
|
-
valid &= complain("
|
24
|
-
valid &= complain("
|
27
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
28
|
+
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
25
29
|
valid
|
26
30
|
end
|
27
|
-
|
31
|
+
|
28
32
|
def test
|
29
33
|
return false unless File.exist?(self.watch.pid_file)
|
30
34
|
|
31
35
|
pid = File.read(self.watch.pid_file).strip
|
32
36
|
process = System::Process.new(pid)
|
33
37
|
@timeline.push(process.percent_cpu)
|
38
|
+
|
39
|
+
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
|
40
|
+
|
34
41
|
if @timeline.select { |x| x > self.above }.size >= self.times.first
|
35
|
-
|
42
|
+
self.info = "cpu out of bounds #{history}"
|
36
43
|
return true
|
37
44
|
else
|
45
|
+
self.info = "cpu within bounds #{history}"
|
38
46
|
return false
|
39
47
|
end
|
40
48
|
end
|
@@ -12,21 +12,26 @@ module God
|
|
12
12
|
|
13
13
|
def valid?
|
14
14
|
valid = true
|
15
|
-
valid &= complain("
|
15
|
+
valid &= complain("Attribute 'lambda' must be specified", self) if self.lambda.nil?
|
16
16
|
valid
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
def test
|
20
20
|
puts "Calling test. Interval at #{self.interval}"
|
21
21
|
@original_interval ||= self.interval
|
22
22
|
unless pass?
|
23
|
-
|
23
|
+
if @tries == 2
|
24
|
+
self.info = "lambda condition was satisfied"
|
25
|
+
return true
|
26
|
+
end
|
24
27
|
self.interval = self.interval / 2.0
|
25
28
|
@tries += 1
|
26
29
|
else
|
27
30
|
@tries = 0
|
28
31
|
self.interval = @original_interval
|
29
32
|
end
|
33
|
+
|
34
|
+
self.info = "lambda condition was not satisfied"
|
30
35
|
false
|
31
36
|
end
|
32
37
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class Flapping < TriggerCondition
|
5
|
+
attr_accessor :times, :within, :from_state, :to_state, :retry_in, :retry_times, :retry_within
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
self.info = "process is flapping"
|
9
|
+
end
|
10
|
+
|
11
|
+
def prepare
|
12
|
+
@timeline = Timeline.new(self.times)
|
13
|
+
@retry_timeline = Timeline.new(self.retry_times)
|
14
|
+
end
|
15
|
+
|
16
|
+
def valid?
|
17
|
+
valid = true
|
18
|
+
valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
|
19
|
+
valid &= complain("Attribute 'within' must be specified", self) if self.within.nil?
|
20
|
+
valid &= complain("Attributes 'from_state', 'to_state', or both must be specified", self) if self.from_state.nil? && self.to_state.nil?
|
21
|
+
valid
|
22
|
+
end
|
23
|
+
|
24
|
+
def process(event, payload)
|
25
|
+
begin
|
26
|
+
if event == :state_change
|
27
|
+
event_from_state, event_to_state = *payload
|
28
|
+
|
29
|
+
from_state_match = !self.from_state || self.from_state && Array(self.from_state).include?(event_from_state)
|
30
|
+
to_state_match = !self.to_state || self.to_state && Array(self.to_state).include?(event_to_state)
|
31
|
+
|
32
|
+
if from_state_match && to_state_match
|
33
|
+
@timeline << Time.now
|
34
|
+
|
35
|
+
concensus = (@timeline.size == self.times)
|
36
|
+
duration = (@timeline.last - @timeline.first) < self.within
|
37
|
+
|
38
|
+
if concensus && duration
|
39
|
+
@timeline.clear
|
40
|
+
trigger
|
41
|
+
retry_mechanism
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
rescue => e
|
46
|
+
puts e.message
|
47
|
+
puts e.backtrace.join("\n")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def retry_mechanism
|
54
|
+
if self.retry_in
|
55
|
+
@retry_timeline << Time.now
|
56
|
+
|
57
|
+
concensus = (@retry_timeline.size == self.retry_times)
|
58
|
+
duration = (@retry_timeline.last - @retry_timeline.first) < self.retry_within
|
59
|
+
|
60
|
+
if concensus && duration
|
61
|
+
# give up
|
62
|
+
Thread.new do
|
63
|
+
sleep 1
|
64
|
+
|
65
|
+
# log
|
66
|
+
msg = "#{self.watch.name} giving up"
|
67
|
+
Syslog.debug(msg)
|
68
|
+
LOG.log(self.watch, :info, msg)
|
69
|
+
end
|
70
|
+
else
|
71
|
+
# try again later
|
72
|
+
Thread.new do
|
73
|
+
sleep 1
|
74
|
+
|
75
|
+
# log
|
76
|
+
msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds"
|
77
|
+
Syslog.debug(msg)
|
78
|
+
LOG.log(self.watch, :info, msg)
|
79
|
+
|
80
|
+
sleep self.retry_in
|
81
|
+
|
82
|
+
# log
|
83
|
+
msg = "#{self.watch.name} auto-reenabling monitoring"
|
84
|
+
Syslog.debug(msg)
|
85
|
+
LOG.log(self.watch, :info, msg)
|
86
|
+
|
87
|
+
if self.watch.state == :unmonitored
|
88
|
+
self.watch.monitor
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|