god 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +26 -0
- data/Manifest.txt +15 -1
- data/Rakefile +2 -7
- data/bin/god +104 -16
- data/lib/god.rb +169 -37
- data/lib/god/behaviors/notify_when_flapping.rb +51 -0
- data/lib/god/condition.rb +1 -0
- data/lib/god/conditions/degrading_lambda.rb +47 -0
- data/lib/god/conditions/process_exits.rb +6 -2
- data/lib/god/conditions/tries.rb +33 -0
- data/lib/god/dependency_graph.rb +41 -0
- data/lib/god/errors.rb +6 -0
- data/lib/god/hub.rb +43 -20
- data/lib/god/logger.rb +44 -0
- data/lib/god/process.rb +91 -19
- data/lib/god/registry.rb +4 -0
- data/lib/god/server.rb +12 -2
- data/lib/god/timeline.rb +36 -0
- data/lib/god/watch.rb +27 -8
- data/test/configs/child_events/child_events.god +7 -2
- data/test/configs/child_polls/child_polls.god +3 -1
- data/test/configs/child_polls/simple_server.rb +1 -1
- data/test/configs/daemon_events/daemon_events.god +7 -3
- data/test/configs/daemon_polls/daemon_polls.god +17 -0
- data/test/configs/daemon_polls/simple_server.rb +6 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
- data/test/configs/degrading_lambda/tcp_server.rb +15 -0
- data/test/configs/real.rb +1 -1
- data/test/configs/running_load/running_load.god +16 -0
- data/test/configs/stress/simple_server.rb +3 -0
- data/test/configs/stress/stress.god +15 -0
- data/test/configs/test.rb +14 -2
- data/test/helper.rb +12 -2
- data/test/test_conditions_tries.rb +46 -0
- data/test/test_dependency_graph.rb +62 -0
- data/test/test_god.rb +289 -33
- data/test/test_handlers_kqueue_handler.rb +11 -7
- data/test/test_hub.rb +18 -0
- data/test/test_logger.rb +55 -0
- data/test/test_process.rb +135 -17
- data/test/test_registry.rb +2 -1
- data/test/test_server.rb +35 -4
- data/test/test_timeline.rb +14 -2
- data/test/test_watch.rb +7 -0
- metadata +21 -4
- data/lib/god/conditions/timeline.rb +0 -17
data/lib/god/registry.rb
CHANGED
data/lib/god/server.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'drb'
|
2
|
+
require 'drb/acl'
|
2
3
|
|
3
4
|
# The God::Server oversees the DRb server which dishes out info on this God daemon.
|
4
5
|
|
@@ -7,12 +8,18 @@ module God
|
|
7
8
|
class Server
|
8
9
|
attr_reader :host, :port
|
9
10
|
|
10
|
-
def initialize(host = nil, port = nil)
|
11
|
+
def initialize(host = nil, port = nil, allow = [])
|
11
12
|
@host = host
|
12
|
-
@port = port
|
13
|
+
@port = port
|
14
|
+
@acl = %w{deny all} + allow.inject([]) { |acc, a| acc + ['allow', a] }
|
15
|
+
puts "Starting on #{@host}:#{@port}"
|
13
16
|
start
|
14
17
|
end
|
15
18
|
|
19
|
+
def ping
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
16
23
|
def method_missing(*args, &block)
|
17
24
|
God.send(*args, &block)
|
18
25
|
end
|
@@ -20,6 +27,9 @@ module God
|
|
20
27
|
private
|
21
28
|
|
22
29
|
def start
|
30
|
+
acl = ACL.new(@acl)
|
31
|
+
DRb.install_acl(acl)
|
32
|
+
|
23
33
|
@drb ||= DRb.start_service("druby://#{@host}:#{@port}", self)
|
24
34
|
end
|
25
35
|
end
|
data/lib/god/timeline.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Timeline < Array
|
4
|
+
def initialize(max_size)
|
5
|
+
super()
|
6
|
+
@max_size = max_size
|
7
|
+
end
|
8
|
+
|
9
|
+
# Push a value onto the Timeline
|
10
|
+
#
|
11
|
+
# Implementation explanation:
|
12
|
+
# A performance optimization appears here to speed up the push time.
|
13
|
+
# In essence, the code does this:
|
14
|
+
#
|
15
|
+
# def push(val)
|
16
|
+
# super(val)
|
17
|
+
# shift if size > @max_size
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# But that's super slow due to the shift, so we resort to reverse! and pop
|
21
|
+
# which gives us a 2x speedup with 100 elements and a 6x speedup with 1000
|
22
|
+
def push(val)
|
23
|
+
if (size + 1) > @max_size
|
24
|
+
reverse!
|
25
|
+
pop
|
26
|
+
reverse!
|
27
|
+
end
|
28
|
+
super(val)
|
29
|
+
end
|
30
|
+
|
31
|
+
def <<(val)
|
32
|
+
push(val)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/lib/god/watch.rb
CHANGED
@@ -17,7 +17,7 @@ module God
|
|
17
17
|
extend Forwardable
|
18
18
|
def_delegators :@process, :name, :uid, :gid, :start, :stop, :restart,
|
19
19
|
:name=, :uid=, :gid=, :start=, :stop=, :restart=,
|
20
|
-
:pid_file, :pid_file
|
20
|
+
:pid_file, :pid_file=, :log, :log=, :alive?
|
21
21
|
|
22
22
|
# api
|
23
23
|
attr_accessor :behaviors, :metrics
|
@@ -46,6 +46,16 @@ module God
|
|
46
46
|
self.mutex = Mutex.new
|
47
47
|
end
|
48
48
|
|
49
|
+
def valid?
|
50
|
+
@process.valid?
|
51
|
+
end
|
52
|
+
|
53
|
+
###########################################################################
|
54
|
+
#
|
55
|
+
# Behavior
|
56
|
+
#
|
57
|
+
###########################################################################
|
58
|
+
|
49
59
|
def behavior(kind)
|
50
60
|
# create the behavior
|
51
61
|
begin
|
@@ -136,7 +146,7 @@ module God
|
|
136
146
|
def move(to_state)
|
137
147
|
msg = "#{self.name} move '#{self.state}' to '#{to_state}'"
|
138
148
|
Syslog.debug(msg)
|
139
|
-
|
149
|
+
LOG.log(self, :info, msg)
|
140
150
|
|
141
151
|
# cleanup from current state
|
142
152
|
from_state = self.state
|
@@ -167,14 +177,16 @@ module God
|
|
167
177
|
def action(a, c = nil)
|
168
178
|
case a
|
169
179
|
when :start
|
170
|
-
|
171
|
-
|
180
|
+
msg = "#{self.name} start: #{self.start.to_s}"
|
181
|
+
Syslog.debug(msg)
|
182
|
+
LOG.log(self, :info, msg)
|
172
183
|
call_action(c, :start)
|
173
184
|
sleep(self.start_grace + self.grace)
|
174
185
|
when :restart
|
175
186
|
if self.restart
|
176
|
-
|
177
|
-
|
187
|
+
msg = "#{self.name} restart: #{self.restart.to_s}"
|
188
|
+
Syslog.debug(msg)
|
189
|
+
LOG.log(self, :info, msg)
|
178
190
|
call_action(c, :restart)
|
179
191
|
else
|
180
192
|
action(:stop, c)
|
@@ -182,8 +194,11 @@ module God
|
|
182
194
|
end
|
183
195
|
sleep(self.restart_grace + self.grace)
|
184
196
|
when :stop
|
185
|
-
|
186
|
-
|
197
|
+
if self.stop
|
198
|
+
msg = "#{self.name} stop: #{self.stop.to_s}"
|
199
|
+
Syslog.debug(msg)
|
200
|
+
LOG.log(self, :info, msg)
|
201
|
+
end
|
187
202
|
call_action(c, :stop)
|
188
203
|
sleep(self.stop_grace + self.grace)
|
189
204
|
end
|
@@ -210,6 +225,10 @@ module God
|
|
210
225
|
def register!
|
211
226
|
God.registry.add(@process)
|
212
227
|
end
|
228
|
+
|
229
|
+
def unregister!
|
230
|
+
God.registry.remove(@process)
|
231
|
+
end
|
213
232
|
end
|
214
233
|
|
215
234
|
end
|
@@ -2,7 +2,6 @@ God.watch do |w|
|
|
2
2
|
w.name = "child-events"
|
3
3
|
w.interval = 5.seconds
|
4
4
|
w.start = File.join(File.dirname(__FILE__), *%w[simple_server.rb])
|
5
|
-
w.stop = ""
|
6
5
|
|
7
6
|
# determine the state on startup
|
8
7
|
w.transition(:init, { true => :up, false => :start }) do |on|
|
@@ -12,10 +11,16 @@ God.watch do |w|
|
|
12
11
|
end
|
13
12
|
|
14
13
|
# determine when process has finished starting
|
15
|
-
w.transition(:start, :up) do |on|
|
14
|
+
w.transition([:start, :restart], :up) do |on|
|
16
15
|
on.condition(:process_running) do |c|
|
17
16
|
c.running = true
|
18
17
|
end
|
18
|
+
|
19
|
+
# failsafe
|
20
|
+
on.condition(:tries) do |c|
|
21
|
+
c.times = 2
|
22
|
+
c.transition = :start
|
23
|
+
end
|
19
24
|
end
|
20
25
|
|
21
26
|
# start if process is not running
|
@@ -1,11 +1,13 @@
|
|
1
1
|
God.watch do |w|
|
2
2
|
w.name = 'child-polls'
|
3
3
|
w.start = File.join(File.dirname(__FILE__), *%w[simple_server.rb])
|
4
|
-
w.stop = ''
|
4
|
+
# w.stop = ''
|
5
5
|
w.interval = 5
|
6
6
|
w.grace = 2
|
7
7
|
w.uid = 'tom'
|
8
8
|
w.gid = 'tom'
|
9
|
+
w.group = 'test'
|
10
|
+
w.log = File.join(File.dirname(__FILE__), *%w[out.log])
|
9
11
|
|
10
12
|
w.start_if do |start|
|
11
13
|
start.condition(:process_running) do |c|
|
@@ -3,8 +3,6 @@ God.watch do |w|
|
|
3
3
|
w.interval = 5.seconds
|
4
4
|
w.start = '/usr/local/bin/ruby ' + File.join(File.dirname(__FILE__), *%w[simple_server.rb]) + ' start'
|
5
5
|
w.stop = '/usr/local/bin/ruby ' + File.join(File.dirname(__FILE__), *%w[simple_server.rb]) + ' stop'
|
6
|
-
w.uid = 'tom'
|
7
|
-
w.gid = 'tom'
|
8
6
|
w.pid_file = '/var/run/daemon-events.pid'
|
9
7
|
|
10
8
|
w.behavior(:clean_pid_file)
|
@@ -17,10 +15,16 @@ God.watch do |w|
|
|
17
15
|
end
|
18
16
|
|
19
17
|
# determine when process has finished starting
|
20
|
-
w.transition(:start, :up) do |on|
|
18
|
+
w.transition([:start, :restart], :up) do |on|
|
21
19
|
on.condition(:process_running) do |c|
|
22
20
|
c.running = true
|
23
21
|
end
|
22
|
+
|
23
|
+
# failsafe
|
24
|
+
on.condition(:tries) do |c|
|
25
|
+
c.times = 2
|
26
|
+
c.transition = :start
|
27
|
+
end
|
24
28
|
end
|
25
29
|
|
26
30
|
# start if process is not running
|
@@ -0,0 +1,17 @@
|
|
1
|
+
God.watch do |w|
|
2
|
+
w.name = "daemon-polls"
|
3
|
+
w.interval = 5.seconds
|
4
|
+
w.start = 'ruby ' + File.join(File.dirname(__FILE__), *%w[simple_server.rb]) + ' start'
|
5
|
+
w.stop = 'ruby ' + File.join(File.dirname(__FILE__), *%w[simple_server.rb]) + ' stop'
|
6
|
+
w.pid_file = '/var/run/daemon-polls.pid'
|
7
|
+
w.start_grace = 2.seconds
|
8
|
+
w.log = File.join(File.dirname(__FILE__), *%w[out.log])
|
9
|
+
|
10
|
+
w.behavior(:clean_pid_file)
|
11
|
+
|
12
|
+
w.start_if do |start|
|
13
|
+
start.condition(:process_running) do |c|
|
14
|
+
c.running = false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
God.watch do |w|
|
2
|
+
w.name = 'degrading-lambda'
|
3
|
+
w.start = File.join(File.dirname(__FILE__), *%w[tcp_server.rb])
|
4
|
+
w.interval = 5
|
5
|
+
w.grace = 2
|
6
|
+
w.uid = 'kev'
|
7
|
+
w.gid = 'kev'
|
8
|
+
w.group = 'test'
|
9
|
+
|
10
|
+
w.start_if do |start|
|
11
|
+
start.condition(:process_running) do |c|
|
12
|
+
c.running = false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
w.restart_if do |restart|
|
17
|
+
restart.condition(:degrading_lambda) do |c|
|
18
|
+
require 'socket'
|
19
|
+
c.lambda = lambda {
|
20
|
+
begin
|
21
|
+
sock = TCPSocket.open('127.0.0.1', 9090)
|
22
|
+
sock.send "2\n", 0
|
23
|
+
retval = sock.gets
|
24
|
+
puts "Retval is #{retval}"
|
25
|
+
sock.close
|
26
|
+
retval
|
27
|
+
rescue
|
28
|
+
false
|
29
|
+
end
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
server = TCPServer.new('127.0.0.1', 9090)
|
5
|
+
while (session = server.accept)
|
6
|
+
puts "Found a session"
|
7
|
+
request = session.gets
|
8
|
+
puts "Request: #{request}"
|
9
|
+
time = request.to_i
|
10
|
+
puts "Sleeping for #{time}"
|
11
|
+
sleep time
|
12
|
+
session.print "Slept for #{time} seconds"
|
13
|
+
session.close
|
14
|
+
puts "Session closed"
|
15
|
+
end
|
data/test/configs/real.rb
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
God.watch do |w|
|
2
|
+
w.name = 'running-load'
|
3
|
+
w.start = '/Users/tom/dev/god/test/configs/child_polls/simple_server.rb'
|
4
|
+
w.stop = ''
|
5
|
+
w.interval = 5
|
6
|
+
w.grace = 2
|
7
|
+
w.uid = 'tom'
|
8
|
+
w.gid = 'tom'
|
9
|
+
w.group = 'test'
|
10
|
+
|
11
|
+
w.start_if do |start|
|
12
|
+
start.condition(:process_running) do |c|
|
13
|
+
c.running = false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
('01'..'20').each do |i|
|
2
|
+
God.watch do |w|
|
3
|
+
w.name = "stress-#{i}"
|
4
|
+
w.start = "ruby " + File.join(File.dirname(__FILE__), *%w[simple_server.rb])
|
5
|
+
w.interval = 1
|
6
|
+
w.grace = 2
|
7
|
+
w.group = 'test'
|
8
|
+
|
9
|
+
w.start_if do |start|
|
10
|
+
start.condition(:process_running) do |c|
|
11
|
+
c.running = false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/configs/test.rb
CHANGED
@@ -12,6 +12,12 @@ God.init do |g|
|
|
12
12
|
# g.pid_file_directory =
|
13
13
|
end
|
14
14
|
|
15
|
+
class SimpleNotifier
|
16
|
+
def self.notify(str)
|
17
|
+
puts "Notifying: #{str}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
15
21
|
God.watch do |w|
|
16
22
|
w.name = "local-3000"
|
17
23
|
w.interval = 5.seconds
|
@@ -21,14 +27,20 @@ God.watch do |w|
|
|
21
27
|
w.restart_grace = 5.seconds
|
22
28
|
w.stop_grace = 5.seconds
|
23
29
|
w.autostart = true
|
24
|
-
w.uid = '
|
25
|
-
w.gid = '
|
30
|
+
w.uid = 'kev'
|
31
|
+
w.gid = 'kev'
|
26
32
|
w.group = 'mongrels'
|
27
33
|
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
|
28
34
|
|
29
35
|
# clean pid files before start if necessary
|
30
36
|
w.behavior(:clean_pid_file)
|
31
37
|
|
38
|
+
w.behavior(:notify_when_flapping) do |b|
|
39
|
+
b.failures = 5
|
40
|
+
b.seconds = 60.seconds
|
41
|
+
b.notifier = SimpleNotifier
|
42
|
+
end
|
43
|
+
|
32
44
|
# determine the state on startup
|
33
45
|
w.transition(:init, { true => :up, false => :start }) do |on|
|
34
46
|
on.condition(:process_running) do |c|
|
data/test/helper.rb
CHANGED
@@ -3,6 +3,18 @@ require File.join(File.dirname(__FILE__), *%w[.. lib god])
|
|
3
3
|
require 'test/unit'
|
4
4
|
require 'set'
|
5
5
|
|
6
|
+
include God
|
7
|
+
|
8
|
+
if RUBY_PLATFORM =~ /linux/i && Process.uid != 0
|
9
|
+
abort <<-EOF
|
10
|
+
*********************************************************************
|
11
|
+
* *
|
12
|
+
* You need to run these tests as root (netlink requires it) *
|
13
|
+
* *
|
14
|
+
*********************************************************************
|
15
|
+
EOF
|
16
|
+
end
|
17
|
+
|
6
18
|
begin
|
7
19
|
require 'mocha'
|
8
20
|
rescue LoadError
|
@@ -15,8 +27,6 @@ rescue LoadError
|
|
15
27
|
end
|
16
28
|
end
|
17
29
|
|
18
|
-
include God
|
19
|
-
|
20
30
|
module God
|
21
31
|
module Conditions
|
22
32
|
class FakeCondition < Condition
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/helper'
|
2
|
+
|
3
|
+
class TestConditionsTries < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@c = Conditions::Tries.new
|
6
|
+
@c.times = 3
|
7
|
+
@c.prepare
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_prepare_should_create_timeline
|
11
|
+
assert 3, @c.instance_variable_get(:@timeline).instance_variable_get(:@max_size)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_test_should_return_true_if_called_three_times_within_one_second
|
15
|
+
assert !@c.test
|
16
|
+
assert !@c.test
|
17
|
+
assert @c.test
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_test_should_return_false_on_fourth_call_if_called_three_times_within_one_second
|
21
|
+
3.times { @c.test }
|
22
|
+
assert !@c.test
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class TestConditionsTriesWithin < Test::Unit::TestCase
|
27
|
+
def setup
|
28
|
+
@c = Conditions::Tries.new
|
29
|
+
@c.times = 3
|
30
|
+
@c.within = 1.seconds
|
31
|
+
@c.prepare
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_test_should_return_true_if_called_three_times_within_one_second
|
35
|
+
assert !@c.test
|
36
|
+
assert !@c.test
|
37
|
+
assert @c.test
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_test_should_return_false_if_called_three_times_within_two_seconds
|
41
|
+
assert !@c.test
|
42
|
+
assert !@c.test
|
43
|
+
assert sleep(1.1)
|
44
|
+
assert !@c.test
|
45
|
+
end
|
46
|
+
end
|