god 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +26 -0
- data/Manifest.txt +15 -1
- data/Rakefile +2 -7
- data/bin/god +104 -16
- data/lib/god.rb +169 -37
- data/lib/god/behaviors/notify_when_flapping.rb +51 -0
- data/lib/god/condition.rb +1 -0
- data/lib/god/conditions/degrading_lambda.rb +47 -0
- data/lib/god/conditions/process_exits.rb +6 -2
- data/lib/god/conditions/tries.rb +33 -0
- data/lib/god/dependency_graph.rb +41 -0
- data/lib/god/errors.rb +6 -0
- data/lib/god/hub.rb +43 -20
- data/lib/god/logger.rb +44 -0
- data/lib/god/process.rb +91 -19
- data/lib/god/registry.rb +4 -0
- data/lib/god/server.rb +12 -2
- data/lib/god/timeline.rb +36 -0
- data/lib/god/watch.rb +27 -8
- data/test/configs/child_events/child_events.god +7 -2
- data/test/configs/child_polls/child_polls.god +3 -1
- data/test/configs/child_polls/simple_server.rb +1 -1
- data/test/configs/daemon_events/daemon_events.god +7 -3
- data/test/configs/daemon_polls/daemon_polls.god +17 -0
- data/test/configs/daemon_polls/simple_server.rb +6 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
- data/test/configs/degrading_lambda/tcp_server.rb +15 -0
- data/test/configs/real.rb +1 -1
- data/test/configs/running_load/running_load.god +16 -0
- data/test/configs/stress/simple_server.rb +3 -0
- data/test/configs/stress/stress.god +15 -0
- data/test/configs/test.rb +14 -2
- data/test/helper.rb +12 -2
- data/test/test_conditions_tries.rb +46 -0
- data/test/test_dependency_graph.rb +62 -0
- data/test/test_god.rb +289 -33
- data/test/test_handlers_kqueue_handler.rb +11 -7
- data/test/test_hub.rb +18 -0
- data/test/test_logger.rb +55 -0
- data/test/test_process.rb +135 -17
- data/test/test_registry.rb +2 -1
- data/test/test_server.rb +35 -4
- data/test/test_timeline.rb +14 -2
- data/test/test_watch.rb +7 -0
- metadata +21 -4
- data/lib/god/conditions/timeline.rb +0 -17
@@ -0,0 +1,51 @@
|
|
1
|
+
module God
|
2
|
+
module Behaviors
|
3
|
+
|
4
|
+
class NotifyWhenFlapping < Behavior
|
5
|
+
attr_accessor :failures # number of failures
|
6
|
+
attr_accessor :seconds # number of seconds
|
7
|
+
attr_accessor :notifier # class to notify with
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
super
|
11
|
+
@startup_times = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def valid?
|
15
|
+
valid = true
|
16
|
+
valid &= complain("You must specify the 'failures' attribute for :notify_when_flapping") unless self.failures
|
17
|
+
valid &= complain("You must specify the 'seconds' attribute for :notify_when_flapping") unless self.seconds
|
18
|
+
valid &= complain("You must specify the 'notifier' attribute for :notify_when_flapping") unless self.notifier
|
19
|
+
|
20
|
+
# Must take one arg or variable args
|
21
|
+
unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
|
22
|
+
valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args")
|
23
|
+
end
|
24
|
+
|
25
|
+
valid
|
26
|
+
end
|
27
|
+
|
28
|
+
def before_start
|
29
|
+
now = Time.now.to_i
|
30
|
+
@startup_times << now
|
31
|
+
check_for_flapping(now)
|
32
|
+
end
|
33
|
+
|
34
|
+
def before_restart
|
35
|
+
now = Time.now.to_i
|
36
|
+
@startup_times << now
|
37
|
+
check_for_flapping(now)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def check_for_flapping(now)
|
43
|
+
@startup_times.select! {|time| time >= now - self.seconds }
|
44
|
+
if @startup_times.length >= self.failures
|
45
|
+
self.notifier.notify("#{self.watch.name} has called start/restart #{@startup_times.length} times in #{self.seconds} seconds")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
data/lib/god/condition.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
# This condition degrades its interval by a factor of two for 3 tries before failing
|
5
|
+
class DegradingLambda < PollCondition
|
6
|
+
attr_accessor :lambda
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
super
|
10
|
+
@tries = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?
|
14
|
+
valid = true
|
15
|
+
valid &= complain("You must specify the 'lambda' attribute for :degrading_lambda") if self.lambda.nil?
|
16
|
+
valid
|
17
|
+
end
|
18
|
+
|
19
|
+
def test
|
20
|
+
puts "Calling test. Interval at #{self.interval}"
|
21
|
+
@original_interval ||= self.interval
|
22
|
+
unless pass?
|
23
|
+
return true if @tries == 2
|
24
|
+
self.interval = self.interval / 2.0
|
25
|
+
@tries += 1
|
26
|
+
else
|
27
|
+
@tries = 0
|
28
|
+
self.interval = @original_interval
|
29
|
+
end
|
30
|
+
false
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def pass?
|
36
|
+
begin
|
37
|
+
Timeout::timeout(@interval) {
|
38
|
+
self.lambda.call()
|
39
|
+
}
|
40
|
+
rescue Timeout::Error
|
41
|
+
false
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -11,8 +11,12 @@ module God
|
|
11
11
|
def register
|
12
12
|
pid = File.read(self.watch.pid_file).strip.to_i
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
begin
|
15
|
+
EventHandler.register(pid, :proc_exit) do
|
16
|
+
Hub.trigger(self)
|
17
|
+
end
|
18
|
+
rescue StandardError
|
19
|
+
raise EventRegistrationFailedError.new
|
16
20
|
end
|
17
21
|
end
|
18
22
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module God
|
2
|
+
module Conditions
|
3
|
+
|
4
|
+
class Tries < PollCondition
|
5
|
+
attr_accessor :times, :within
|
6
|
+
|
7
|
+
def prepare
|
8
|
+
@timeline = Timeline.new(self.times)
|
9
|
+
end
|
10
|
+
|
11
|
+
def valid?
|
12
|
+
valid = true
|
13
|
+
valid &= complain("You must specify the 'times' attribute for :tries") if self.times.nil?
|
14
|
+
valid
|
15
|
+
end
|
16
|
+
|
17
|
+
def test
|
18
|
+
@timeline << Time.now
|
19
|
+
|
20
|
+
concensus = (@timeline.size == self.times)
|
21
|
+
duration = within.nil? || (@timeline.last - @timeline.first) < self.within
|
22
|
+
|
23
|
+
if concensus && duration
|
24
|
+
@timeline.clear if within.nil?
|
25
|
+
return true
|
26
|
+
else
|
27
|
+
return false
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module God
|
2
|
+
class DependencyGraph
|
3
|
+
attr_accessor :nodes
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
self.nodes = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def add(a, b)
|
10
|
+
node_a = self.nodes[a] || Node.new(a)
|
11
|
+
node_b = self.nodes[b] || Node.new(b)
|
12
|
+
|
13
|
+
node_a.add(node_b)
|
14
|
+
|
15
|
+
self.nodes[a] ||= node_a
|
16
|
+
self.nodes[b] ||= node_b
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module God
|
22
|
+
class DependencyGraph
|
23
|
+
class Node
|
24
|
+
attr_accessor :name
|
25
|
+
attr_accessor :dependencies
|
26
|
+
|
27
|
+
def initialize(name)
|
28
|
+
self.name = name
|
29
|
+
self.dependencies = []
|
30
|
+
end
|
31
|
+
|
32
|
+
def add(node)
|
33
|
+
self.dependencies << node unless self.dependencies.include?(node)
|
34
|
+
end
|
35
|
+
|
36
|
+
def has_node?(node)
|
37
|
+
(self == node) || self.dependencies.any { |x| x.has_node?(node) }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/god/errors.rb
CHANGED
@@ -3,6 +3,9 @@ module God
|
|
3
3
|
class AbstractMethodNotOverriddenError < StandardError
|
4
4
|
end
|
5
5
|
|
6
|
+
class NoSuchWatchError < StandardError
|
7
|
+
end
|
8
|
+
|
6
9
|
class NoSuchConditionError < StandardError
|
7
10
|
end
|
8
11
|
|
@@ -12,4 +15,7 @@ module God
|
|
12
15
|
class InvalidCommandError < StandardError
|
13
16
|
end
|
14
17
|
|
18
|
+
class EventRegistrationFailedError < StandardError
|
19
|
+
end
|
20
|
+
|
15
21
|
end
|
data/lib/god/hub.rb
CHANGED
@@ -51,25 +51,48 @@ module God
|
|
51
51
|
|
52
52
|
# it's possible that the timer will trigger an event before it can be cleared
|
53
53
|
# by an exiting metric, in which case it should be ignored
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
54
|
+
unless metric.nil?
|
55
|
+
watch = metric.watch
|
56
|
+
|
57
|
+
watch.mutex.synchronize do
|
58
|
+
# run the test
|
59
|
+
result = condition.test
|
60
|
+
|
61
|
+
# log
|
62
|
+
msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
|
63
|
+
Syslog.debug(msg)
|
64
|
+
LOG.log(watch, :info, msg)
|
65
|
+
|
66
|
+
# after-condition
|
67
|
+
condition.after
|
68
|
+
|
69
|
+
# get the destination
|
70
|
+
dest =
|
71
|
+
if result && condition.transition
|
72
|
+
# condition override
|
73
|
+
condition.transition
|
74
|
+
else
|
75
|
+
# regular
|
76
|
+
metric.destination[result]
|
77
|
+
end
|
78
|
+
|
79
|
+
# transition or reschedule
|
80
|
+
if dest
|
81
|
+
# transition
|
82
|
+
begin
|
83
|
+
watch.move(dest)
|
84
|
+
rescue EventRegistrationFailedError
|
85
|
+
msg = watch.name + ' Event registration failed, moving back to previous state'
|
86
|
+
Syslog.debug(msg)
|
87
|
+
LOG.log(watch, :info, msg)
|
88
|
+
|
89
|
+
dest = watch.state
|
90
|
+
retry
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# reschedule
|
94
|
+
Timer.get.schedule(condition)
|
95
|
+
end
|
73
96
|
end
|
74
97
|
end
|
75
98
|
rescue => e
|
@@ -89,7 +112,7 @@ module God
|
|
89
112
|
watch.mutex.synchronize do
|
90
113
|
msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect
|
91
114
|
Syslog.debug(msg)
|
92
|
-
|
115
|
+
LOG.log(watch, :info, msg)
|
93
116
|
|
94
117
|
dest = metric.destination[true]
|
95
118
|
watch.move(dest)
|
data/lib/god/logger.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Logger < ::Logger
|
4
|
+
attr_accessor :logs
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super(STDOUT)
|
8
|
+
self.logs = {}
|
9
|
+
@mutex = Mutex.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def log(watch, level, text)
|
13
|
+
# initialize watch log if necessary
|
14
|
+
self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
|
15
|
+
|
16
|
+
# push onto timeline for the given watch
|
17
|
+
buf = StringIO.new
|
18
|
+
templog = ::Logger.new(buf)
|
19
|
+
templog.send(level, text)
|
20
|
+
@mutex.synchronize do
|
21
|
+
self.logs[watch.name] << [Time.now, buf.string]
|
22
|
+
end
|
23
|
+
templog.close
|
24
|
+
|
25
|
+
# send to regular logger
|
26
|
+
self.send(level, text)
|
27
|
+
end
|
28
|
+
|
29
|
+
def watch_log_since(watch_name, since)
|
30
|
+
# initialize watch log if necessary
|
31
|
+
self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
|
32
|
+
|
33
|
+
# get and join lines since given time
|
34
|
+
@mutex.synchronize do
|
35
|
+
self.logs[watch_name].select do |x|
|
36
|
+
x.first > since
|
37
|
+
end.map do |x|
|
38
|
+
x[1]
|
39
|
+
end.join
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/god/process.rb
CHANGED
@@ -4,14 +4,69 @@ module God
|
|
4
4
|
class Process
|
5
5
|
WRITES_PID = [:start, :restart]
|
6
6
|
|
7
|
-
attr_accessor :name, :uid, :gid, :start, :stop, :restart
|
7
|
+
attr_accessor :name, :uid, :gid, :log, :start, :stop, :restart
|
8
8
|
|
9
|
-
def initialize
|
10
|
-
|
11
|
-
|
9
|
+
def initialize
|
10
|
+
@pid_file = nil
|
11
|
+
@tracking_pid = false
|
12
|
+
end
|
13
|
+
|
14
|
+
def alive?
|
15
|
+
pid = File.read(self.pid_file).strip.to_i
|
16
|
+
System::Process.new(pid).exists?
|
17
|
+
end
|
18
|
+
|
19
|
+
def valid?
|
20
|
+
# determine if we're tracking pid or not
|
21
|
+
self.pid_file
|
22
|
+
|
23
|
+
valid = true
|
24
|
+
|
25
|
+
# a name must be specified
|
26
|
+
if self.name.nil?
|
27
|
+
valid = false
|
28
|
+
LOG.log(self, :error, "No name was specified")
|
12
29
|
end
|
13
30
|
|
14
|
-
|
31
|
+
# a start command must be specified
|
32
|
+
if self.start.nil?
|
33
|
+
valid = false
|
34
|
+
LOG.log(self, :error, "No start command was specified")
|
35
|
+
end
|
36
|
+
|
37
|
+
# self-daemonizing processes must specify a stop command
|
38
|
+
if !@tracking_pid && self.stop.nil?
|
39
|
+
valid = false
|
40
|
+
LOG.log(self, :error, "No stop command was specified")
|
41
|
+
end
|
42
|
+
|
43
|
+
# self-daemonizing processes cannot specify log
|
44
|
+
if !@tracking_pid && self.log
|
45
|
+
valid = false
|
46
|
+
LOG.log(self, :error, "Self-daemonizing processes cannot specify a log file")
|
47
|
+
end
|
48
|
+
|
49
|
+
# uid must exist if specified
|
50
|
+
if self.uid
|
51
|
+
begin
|
52
|
+
Etc.getpwnam(self.uid)
|
53
|
+
rescue ArgumentError
|
54
|
+
valid = false
|
55
|
+
LOG.log(self, :error, "UID for '#{self.uid}' does not exist")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# gid must exist if specified
|
60
|
+
if self.gid
|
61
|
+
begin
|
62
|
+
Etc.getgrnam(self.gid)
|
63
|
+
rescue ArgumentError
|
64
|
+
valid = false
|
65
|
+
LOG.log(self, :error, "GID for '#{self.gid}' does not exist")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
valid
|
15
70
|
end
|
16
71
|
|
17
72
|
# DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
|
@@ -43,20 +98,32 @@ module God
|
|
43
98
|
|
44
99
|
def call_action(action)
|
45
100
|
command = send(action)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
101
|
+
|
102
|
+
if action == :stop && command.nil?
|
103
|
+
pid = File.read(self.pid_file).strip.to_i
|
104
|
+
name = self.name
|
105
|
+
command = lambda do
|
106
|
+
LOG.log(self, :info, "#{self.name} stop: default lambda killer")
|
107
|
+
|
108
|
+
::Process.kill('HUP', pid) rescue nil
|
109
|
+
|
110
|
+
# Poll to see if it's dead
|
111
|
+
5.times do
|
112
|
+
begin
|
113
|
+
::Process.kill(0, pid)
|
114
|
+
rescue Errno::ESRCH
|
115
|
+
# It died. Good.
|
116
|
+
return
|
117
|
+
end
|
118
|
+
|
119
|
+
sleep 1
|
53
120
|
end
|
121
|
+
|
122
|
+
::Process.kill('KILL', pid) rescue nil
|
54
123
|
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
59
|
-
|
124
|
+
end
|
125
|
+
|
126
|
+
if command.kind_of?(String)
|
60
127
|
# string command
|
61
128
|
# fork/exec to setuid/gid
|
62
129
|
r, w = IO.pipe
|
@@ -70,9 +137,14 @@ module God
|
|
70
137
|
Dir.chdir "/"
|
71
138
|
$0 = command
|
72
139
|
STDIN.reopen "/dev/null"
|
73
|
-
|
140
|
+
if self.log
|
141
|
+
STDOUT.reopen self.log, "a"
|
142
|
+
else
|
143
|
+
STDOUT.reopen "/dev/null", "a"
|
144
|
+
end
|
74
145
|
STDERR.reopen STDOUT
|
75
|
-
|
146
|
+
|
147
|
+
exec command unless command.empty?
|
76
148
|
end
|
77
149
|
puts pid.to_s
|
78
150
|
end
|