god 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +26 -0
- data/Manifest.txt +15 -1
- data/Rakefile +2 -7
- data/bin/god +104 -16
- data/lib/god.rb +169 -37
- data/lib/god/behaviors/notify_when_flapping.rb +51 -0
- data/lib/god/condition.rb +1 -0
- data/lib/god/conditions/degrading_lambda.rb +47 -0
- data/lib/god/conditions/process_exits.rb +6 -2
- data/lib/god/conditions/tries.rb +33 -0
- data/lib/god/dependency_graph.rb +41 -0
- data/lib/god/errors.rb +6 -0
- data/lib/god/hub.rb +43 -20
- data/lib/god/logger.rb +44 -0
- data/lib/god/process.rb +91 -19
- data/lib/god/registry.rb +4 -0
- data/lib/god/server.rb +12 -2
- data/lib/god/timeline.rb +36 -0
- data/lib/god/watch.rb +27 -8
- data/test/configs/child_events/child_events.god +7 -2
- data/test/configs/child_polls/child_polls.god +3 -1
- data/test/configs/child_polls/simple_server.rb +1 -1
- data/test/configs/daemon_events/daemon_events.god +7 -3
- data/test/configs/daemon_polls/daemon_polls.god +17 -0
- data/test/configs/daemon_polls/simple_server.rb +6 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
- data/test/configs/degrading_lambda/tcp_server.rb +15 -0
- data/test/configs/real.rb +1 -1
- data/test/configs/running_load/running_load.god +16 -0
- data/test/configs/stress/simple_server.rb +3 -0
- data/test/configs/stress/stress.god +15 -0
- data/test/configs/test.rb +14 -2
- data/test/helper.rb +12 -2
- data/test/test_conditions_tries.rb +46 -0
- data/test/test_dependency_graph.rb +62 -0
- data/test/test_god.rb +289 -33
- data/test/test_handlers_kqueue_handler.rb +11 -7
- data/test/test_hub.rb +18 -0
- data/test/test_logger.rb +55 -0
- data/test/test_process.rb +135 -17
- data/test/test_registry.rb +2 -1
- data/test/test_server.rb +35 -4
- data/test/test_timeline.rb +14 -2
- data/test/test_watch.rb +7 -0
- metadata +21 -4
- data/lib/god/conditions/timeline.rb +0 -17
@@ -0,0 +1,51 @@
|
|
1
|
+
module God
|
2
|
+
module Behaviors
|
3
|
+
|
4
|
+
class NotifyWhenFlapping < Behavior
|
5
|
+
attr_accessor :failures # number of failures
|
6
|
+
attr_accessor :seconds # number of seconds
|
7
|
+
attr_accessor :notifier # class to notify with
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
super
|
11
|
+
@startup_times = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def valid?
|
15
|
+
valid = true
|
16
|
+
valid &= complain("You must specify the 'failures' attribute for :notify_when_flapping") unless self.failures
|
17
|
+
valid &= complain("You must specify the 'seconds' attribute for :notify_when_flapping") unless self.seconds
|
18
|
+
valid &= complain("You must specify the 'notifier' attribute for :notify_when_flapping") unless self.notifier
|
19
|
+
|
20
|
+
# Must take one arg or variable args
|
21
|
+
unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
|
22
|
+
valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args")
|
23
|
+
end
|
24
|
+
|
25
|
+
valid
|
26
|
+
end
|
27
|
+
|
28
|
+
def before_start
|
29
|
+
now = Time.now.to_i
|
30
|
+
@startup_times << now
|
31
|
+
check_for_flapping(now)
|
32
|
+
end
|
33
|
+
|
34
|
+
def before_restart
|
35
|
+
now = Time.now.to_i
|
36
|
+
@startup_times << now
|
37
|
+
check_for_flapping(now)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def check_for_flapping(now)
|
43
|
+
@startup_times.select! {|time| time >= now - self.seconds }
|
44
|
+
if @startup_times.length >= self.failures
|
45
|
+
self.notifier.notify("#{self.watch.name} has called start/restart #{@startup_times.length} times in #{self.seconds} seconds")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
data/lib/god/condition.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
# This condition degrades its interval by a factor of two for 3 tries before failing
|
5
|
+
class DegradingLambda < PollCondition
|
6
|
+
attr_accessor :lambda
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
super
|
10
|
+
@tries = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?
|
14
|
+
valid = true
|
15
|
+
valid &= complain("You must specify the 'lambda' attribute for :degrading_lambda") if self.lambda.nil?
|
16
|
+
valid
|
17
|
+
end
|
18
|
+
|
19
|
+
def test
|
20
|
+
puts "Calling test. Interval at #{self.interval}"
|
21
|
+
@original_interval ||= self.interval
|
22
|
+
unless pass?
|
23
|
+
return true if @tries == 2
|
24
|
+
self.interval = self.interval / 2.0
|
25
|
+
@tries += 1
|
26
|
+
else
|
27
|
+
@tries = 0
|
28
|
+
self.interval = @original_interval
|
29
|
+
end
|
30
|
+
false
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def pass?
|
36
|
+
begin
|
37
|
+
Timeout::timeout(@interval) {
|
38
|
+
self.lambda.call()
|
39
|
+
}
|
40
|
+
rescue Timeout::Error
|
41
|
+
false
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -11,8 +11,12 @@ module God
|
|
11
11
|
def register
|
12
12
|
pid = File.read(self.watch.pid_file).strip.to_i
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
begin
|
15
|
+
EventHandler.register(pid, :proc_exit) do
|
16
|
+
Hub.trigger(self)
|
17
|
+
end
|
18
|
+
rescue StandardError
|
19
|
+
raise EventRegistrationFailedError.new
|
16
20
|
end
|
17
21
|
end
|
18
22
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class Tries < PollCondition
|
5
|
+
attr_accessor :times, :within
|
6
|
+
|
7
|
+
def prepare
|
8
|
+
@timeline = Timeline.new(self.times)
|
9
|
+
end
|
10
|
+
|
11
|
+
def valid?
|
12
|
+
valid = true
|
13
|
+
valid &= complain("You must specify the 'times' attribute for :tries") if self.times.nil?
|
14
|
+
valid
|
15
|
+
end
|
16
|
+
|
17
|
+
def test
|
18
|
+
@timeline << Time.now
|
19
|
+
|
20
|
+
concensus = (@timeline.size == self.times)
|
21
|
+
duration = within.nil? || (@timeline.last - @timeline.first) < self.within
|
22
|
+
|
23
|
+
if concensus && duration
|
24
|
+
@timeline.clear if within.nil?
|
25
|
+
return true
|
26
|
+
else
|
27
|
+
return false
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module God
|
2
|
+
class DependencyGraph
|
3
|
+
attr_accessor :nodes
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
self.nodes = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def add(a, b)
|
10
|
+
node_a = self.nodes[a] || Node.new(a)
|
11
|
+
node_b = self.nodes[b] || Node.new(b)
|
12
|
+
|
13
|
+
node_a.add(node_b)
|
14
|
+
|
15
|
+
self.nodes[a] ||= node_a
|
16
|
+
self.nodes[b] ||= node_b
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module God
|
22
|
+
class DependencyGraph
|
23
|
+
class Node
|
24
|
+
attr_accessor :name
|
25
|
+
attr_accessor :dependencies
|
26
|
+
|
27
|
+
def initialize(name)
|
28
|
+
self.name = name
|
29
|
+
self.dependencies = []
|
30
|
+
end
|
31
|
+
|
32
|
+
def add(node)
|
33
|
+
self.dependencies << node unless self.dependencies.include?(node)
|
34
|
+
end
|
35
|
+
|
36
|
+
def has_node?(node)
|
37
|
+
(self == node) || self.dependencies.any { |x| x.has_node?(node) }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/god/errors.rb
CHANGED
@@ -3,6 +3,9 @@ module God
|
|
3
3
|
class AbstractMethodNotOverriddenError < StandardError
|
4
4
|
end
|
5
5
|
|
6
|
+
class NoSuchWatchError < StandardError
|
7
|
+
end
|
8
|
+
|
6
9
|
class NoSuchConditionError < StandardError
|
7
10
|
end
|
8
11
|
|
@@ -12,4 +15,7 @@ module God
|
|
12
15
|
class InvalidCommandError < StandardError
|
13
16
|
end
|
14
17
|
|
18
|
+
class EventRegistrationFailedError < StandardError
|
19
|
+
end
|
20
|
+
|
15
21
|
end
|
data/lib/god/hub.rb
CHANGED
@@ -51,25 +51,48 @@ module God
|
|
51
51
|
|
52
52
|
# it's possible that the timer will trigger an event before it can be cleared
|
53
53
|
# by an exiting metric, in which case it should be ignored
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
54
|
+
unless metric.nil?
|
55
|
+
watch = metric.watch
|
56
|
+
|
57
|
+
watch.mutex.synchronize do
|
58
|
+
# run the test
|
59
|
+
result = condition.test
|
60
|
+
|
61
|
+
# log
|
62
|
+
msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
|
63
|
+
Syslog.debug(msg)
|
64
|
+
LOG.log(watch, :info, msg)
|
65
|
+
|
66
|
+
# after-condition
|
67
|
+
condition.after
|
68
|
+
|
69
|
+
# get the destination
|
70
|
+
dest =
|
71
|
+
if result && condition.transition
|
72
|
+
# condition override
|
73
|
+
condition.transition
|
74
|
+
else
|
75
|
+
# regular
|
76
|
+
metric.destination[result]
|
77
|
+
end
|
78
|
+
|
79
|
+
# transition or reschedule
|
80
|
+
if dest
|
81
|
+
# transition
|
82
|
+
begin
|
83
|
+
watch.move(dest)
|
84
|
+
rescue EventRegistrationFailedError
|
85
|
+
msg = watch.name + ' Event registration failed, moving back to previous state'
|
86
|
+
Syslog.debug(msg)
|
87
|
+
LOG.log(watch, :info, msg)
|
88
|
+
|
89
|
+
dest = watch.state
|
90
|
+
retry
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# reschedule
|
94
|
+
Timer.get.schedule(condition)
|
95
|
+
end
|
73
96
|
end
|
74
97
|
end
|
75
98
|
rescue => e
|
@@ -89,7 +112,7 @@ module God
|
|
89
112
|
watch.mutex.synchronize do
|
90
113
|
msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect
|
91
114
|
Syslog.debug(msg)
|
92
|
-
|
115
|
+
LOG.log(watch, :info, msg)
|
93
116
|
|
94
117
|
dest = metric.destination[true]
|
95
118
|
watch.move(dest)
|
data/lib/god/logger.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Logger < ::Logger
|
4
|
+
attr_accessor :logs
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super(STDOUT)
|
8
|
+
self.logs = {}
|
9
|
+
@mutex = Mutex.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def log(watch, level, text)
|
13
|
+
# initialize watch log if necessary
|
14
|
+
self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
|
15
|
+
|
16
|
+
# push onto timeline for the given watch
|
17
|
+
buf = StringIO.new
|
18
|
+
templog = ::Logger.new(buf)
|
19
|
+
templog.send(level, text)
|
20
|
+
@mutex.synchronize do
|
21
|
+
self.logs[watch.name] << [Time.now, buf.string]
|
22
|
+
end
|
23
|
+
templog.close
|
24
|
+
|
25
|
+
# send to regular logger
|
26
|
+
self.send(level, text)
|
27
|
+
end
|
28
|
+
|
29
|
+
def watch_log_since(watch_name, since)
|
30
|
+
# initialize watch log if necessary
|
31
|
+
self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
|
32
|
+
|
33
|
+
# get and join lines since given time
|
34
|
+
@mutex.synchronize do
|
35
|
+
self.logs[watch_name].select do |x|
|
36
|
+
x.first > since
|
37
|
+
end.map do |x|
|
38
|
+
x[1]
|
39
|
+
end.join
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/god/process.rb
CHANGED
@@ -4,14 +4,69 @@ module God
|
|
4
4
|
class Process
|
5
5
|
WRITES_PID = [:start, :restart]
|
6
6
|
|
7
|
-
attr_accessor :name, :uid, :gid, :start, :stop, :restart
|
7
|
+
attr_accessor :name, :uid, :gid, :log, :start, :stop, :restart
|
8
8
|
|
9
|
-
def initialize
|
10
|
-
|
11
|
-
|
9
|
+
def initialize
|
10
|
+
@pid_file = nil
|
11
|
+
@tracking_pid = false
|
12
|
+
end
|
13
|
+
|
14
|
+
def alive?
|
15
|
+
pid = File.read(self.pid_file).strip.to_i
|
16
|
+
System::Process.new(pid).exists?
|
17
|
+
end
|
18
|
+
|
19
|
+
def valid?
|
20
|
+
# determine if we're tracking pid or not
|
21
|
+
self.pid_file
|
22
|
+
|
23
|
+
valid = true
|
24
|
+
|
25
|
+
# a name must be specified
|
26
|
+
if self.name.nil?
|
27
|
+
valid = false
|
28
|
+
LOG.log(self, :error, "No name was specified")
|
12
29
|
end
|
13
30
|
|
14
|
-
|
31
|
+
# a start command must be specified
|
32
|
+
if self.start.nil?
|
33
|
+
valid = false
|
34
|
+
LOG.log(self, :error, "No start command was specified")
|
35
|
+
end
|
36
|
+
|
37
|
+
# self-daemonizing processes must specify a stop command
|
38
|
+
if !@tracking_pid && self.stop.nil?
|
39
|
+
valid = false
|
40
|
+
LOG.log(self, :error, "No stop command was specified")
|
41
|
+
end
|
42
|
+
|
43
|
+
# self-daemonizing processes cannot specify log
|
44
|
+
if !@tracking_pid && self.log
|
45
|
+
valid = false
|
46
|
+
LOG.log(self, :error, "Self-daemonizing processes cannot specify a log file")
|
47
|
+
end
|
48
|
+
|
49
|
+
# uid must exist if specified
|
50
|
+
if self.uid
|
51
|
+
begin
|
52
|
+
Etc.getpwnam(self.uid)
|
53
|
+
rescue ArgumentError
|
54
|
+
valid = false
|
55
|
+
LOG.log(self, :error, "UID for '#{self.uid}' does not exist")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# gid must exist if specified
|
60
|
+
if self.gid
|
61
|
+
begin
|
62
|
+
Etc.getgrnam(self.gid)
|
63
|
+
rescue ArgumentError
|
64
|
+
valid = false
|
65
|
+
LOG.log(self, :error, "GID for '#{self.gid}' does not exist")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
valid
|
15
70
|
end
|
16
71
|
|
17
72
|
# DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
|
@@ -43,20 +98,32 @@ module God
|
|
43
98
|
|
44
99
|
def call_action(action)
|
45
100
|
command = send(action)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
101
|
+
|
102
|
+
if action == :stop && command.nil?
|
103
|
+
pid = File.read(self.pid_file).strip.to_i
|
104
|
+
name = self.name
|
105
|
+
command = lambda do
|
106
|
+
LOG.log(self, :info, "#{self.name} stop: default lambda killer")
|
107
|
+
|
108
|
+
::Process.kill('HUP', pid) rescue nil
|
109
|
+
|
110
|
+
# Poll to see if it's dead
|
111
|
+
5.times do
|
112
|
+
begin
|
113
|
+
::Process.kill(0, pid)
|
114
|
+
rescue Errno::ESRCH
|
115
|
+
# It died. Good.
|
116
|
+
return
|
117
|
+
end
|
118
|
+
|
119
|
+
sleep 1
|
53
120
|
end
|
121
|
+
|
122
|
+
::Process.kill('KILL', pid) rescue nil
|
54
123
|
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
59
|
-
|
124
|
+
end
|
125
|
+
|
126
|
+
if command.kind_of?(String)
|
60
127
|
# string command
|
61
128
|
# fork/exec to setuid/gid
|
62
129
|
r, w = IO.pipe
|
@@ -70,9 +137,14 @@ module God
|
|
70
137
|
Dir.chdir "/"
|
71
138
|
$0 = command
|
72
139
|
STDIN.reopen "/dev/null"
|
73
|
-
|
140
|
+
if self.log
|
141
|
+
STDOUT.reopen self.log, "a"
|
142
|
+
else
|
143
|
+
STDOUT.reopen "/dev/null", "a"
|
144
|
+
end
|
74
145
|
STDERR.reopen STDOUT
|
75
|
-
|
146
|
+
|
147
|
+
exec command unless command.empty?
|
76
148
|
end
|
77
149
|
puts pid.to_s
|
78
150
|
end
|