god 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +67 -1
- data/Manifest.txt +3 -4
- data/Rakefile +1 -1
- data/bin/god +19 -1
- data/lib/god.rb +86 -49
- data/lib/god/cli/command.rb +7 -1
- data/lib/god/cli/run.rb +58 -0
- data/lib/god/condition.rb +6 -2
- data/lib/god/conditions/cpu_usage.rb +7 -6
- data/lib/god/conditions/http_response_code.rb +5 -1
- data/lib/god/conditions/memory_usage.rb +7 -6
- data/lib/god/conditions/process_exits.rb +15 -10
- data/lib/god/conditions/process_running.rb +17 -13
- data/lib/god/diagnostics.rb +37 -0
- data/lib/god/driver.rb +108 -0
- data/lib/god/event_handler.rb +41 -1
- data/lib/god/logger.rb +69 -19
- data/lib/god/metric.rb +2 -2
- data/lib/god/process.rb +84 -27
- data/lib/god/task.rb +286 -29
- data/lib/god/timeline.rb +20 -31
- data/lib/god/watch.rb +26 -15
- data/test/configs/child_events/child_events.god +0 -5
- data/test/configs/child_polls/simple_server.rb +1 -1
- data/test/configs/daemon_events/simple_server_stop.rb +2 -0
- data/test/configs/stress/stress.god +1 -1
- data/test/configs/test.rb +12 -28
- data/test/test_condition.rb +8 -0
- data/test/test_conditions_http_response_code.rb +5 -5
- data/test/test_conditions_process_running.rb +6 -4
- data/test/test_driver.rb +11 -0
- data/test/test_event_handler.rb +7 -0
- data/test/test_god.rb +63 -62
- data/test/test_metric.rb +0 -16
- data/test/test_process.rb +29 -1
- data/test/test_task.rb +177 -1
- data/test/test_timeline.rb +2 -1
- data/test/test_watch.rb +24 -6
- metadata +6 -8
- data/lib/god/hub.rb +0 -222
- data/lib/god/timer.rb +0 -87
- data/test/test_hub.rb +0 -240
- data/test/test_timer.rb +0 -69
data/lib/god/condition.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Condition < Behavior
|
4
|
-
attr_accessor :transition, :notify, :info
|
4
|
+
attr_accessor :transition, :notify, :info, :phase
|
5
5
|
|
6
6
|
# Generate a Condition of the given kind. The proper class if found by camel casing the
|
7
7
|
# kind (which is given as an underscored symbol).
|
@@ -14,6 +14,10 @@ module God
|
|
14
14
|
abort "Condition '#{c.class.name}' must subclass God::PollCondition, God::EventCondition, or God::TriggerCondition"
|
15
15
|
end
|
16
16
|
|
17
|
+
if !EventHandler.loaded? && c.kind_of?(EventCondition)
|
18
|
+
abort "Condition '#{c.class.name}' requires an event system but none has been loaded"
|
19
|
+
end
|
20
|
+
|
17
21
|
c.watch = watch
|
18
22
|
c
|
19
23
|
rescue NameError
|
@@ -77,7 +81,7 @@ module God
|
|
77
81
|
end
|
78
82
|
|
79
83
|
def trigger
|
80
|
-
|
84
|
+
self.watch.trigger(self)
|
81
85
|
end
|
82
86
|
|
83
87
|
def register
|
@@ -29,7 +29,7 @@ module God
|
|
29
29
|
# c.pid_file = "/var/run/mongrel.3000.pid"
|
30
30
|
# end
|
31
31
|
class CpuUsage < PollCondition
|
32
|
-
attr_accessor :above, :times
|
32
|
+
attr_accessor :above, :times, :pid_file
|
33
33
|
|
34
34
|
def initialize
|
35
35
|
super
|
@@ -49,18 +49,19 @@ module God
|
|
49
49
|
@timeline.clear
|
50
50
|
end
|
51
51
|
|
52
|
+
def pid
|
53
|
+
self.watch.pid || File.read(self.pid_file).strip.to_i
|
54
|
+
end
|
55
|
+
|
52
56
|
def valid?
|
53
57
|
valid = true
|
54
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
58
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
|
55
59
|
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
56
60
|
valid
|
57
61
|
end
|
58
62
|
|
59
63
|
def test
|
60
|
-
|
61
|
-
|
62
|
-
pid = File.read(self.watch.pid_file).strip
|
63
|
-
process = System::Process.new(pid)
|
64
|
+
process = System::Process.new(self.pid)
|
64
65
|
@timeline.push(process.percent_cpu)
|
65
66
|
|
66
67
|
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
|
@@ -108,7 +108,7 @@ module God
|
|
108
108
|
|
109
109
|
Net::HTTP.start(self.host, self.port) do |http|
|
110
110
|
http.read_timeout = self.timeout
|
111
|
-
response = http.
|
111
|
+
response = http.get(self.path)
|
112
112
|
end
|
113
113
|
|
114
114
|
actual_response_code = response.code.to_i
|
@@ -121,6 +121,10 @@ module God
|
|
121
121
|
end
|
122
122
|
rescue Errno::ECONNREFUSED
|
123
123
|
self.code_is ? fail('Refused') : pass('Refused')
|
124
|
+
rescue Errno::ECONNRESET
|
125
|
+
self.code_is ? fail('Reset') : pass('Reset')
|
126
|
+
rescue EOFError
|
127
|
+
self.code_is ? fail('EOF') : pass('EOF')
|
124
128
|
rescue Timeout::Error
|
125
129
|
self.code_is ? fail('Timeout') : pass('Timeout')
|
126
130
|
end
|
@@ -31,7 +31,7 @@ module God
|
|
31
31
|
# c.pid_file = "/var/run/mongrel.3000.pid"
|
32
32
|
# end
|
33
33
|
class MemoryUsage < PollCondition
|
34
|
-
attr_accessor :above, :times
|
34
|
+
attr_accessor :above, :times, :pid_file
|
35
35
|
|
36
36
|
def initialize
|
37
37
|
super
|
@@ -51,18 +51,19 @@ module God
|
|
51
51
|
@timeline.clear
|
52
52
|
end
|
53
53
|
|
54
|
+
def pid
|
55
|
+
self.watch.pid || File.read(self.pid_file).strip.to_i
|
56
|
+
end
|
57
|
+
|
54
58
|
def valid?
|
55
59
|
valid = true
|
56
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
60
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
|
57
61
|
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
58
62
|
valid
|
59
63
|
end
|
60
64
|
|
61
65
|
def test
|
62
|
-
|
63
|
-
|
64
|
-
pid = File.read(self.watch.pid_file).strip
|
65
|
-
process = System::Process.new(pid)
|
66
|
+
process = System::Process.new(self.pid)
|
66
67
|
@timeline.push(process.memory)
|
67
68
|
|
68
69
|
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
|
@@ -28,30 +28,35 @@ module God
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def valid?
|
31
|
-
|
32
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
33
|
-
valid
|
31
|
+
true
|
34
32
|
end
|
35
|
-
|
33
|
+
|
36
34
|
def register
|
37
|
-
pid =
|
35
|
+
pid = self.watch.pid
|
38
36
|
|
39
37
|
begin
|
40
38
|
EventHandler.register(pid, :proc_exit) do |extra|
|
41
|
-
|
42
|
-
|
39
|
+
formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
|
40
|
+
self.info = "process #{pid} exited#{formatted_extra}"
|
41
|
+
self.watch.trigger(self)
|
43
42
|
end
|
43
|
+
|
44
|
+
msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
|
45
|
+
applog(self.watch, :info, msg)
|
44
46
|
rescue StandardError
|
45
47
|
raise EventRegistrationFailedError.new
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
49
51
|
def deregister
|
50
|
-
|
51
|
-
|
52
|
+
pid = self.watch.pid
|
53
|
+
if pid
|
52
54
|
EventHandler.deregister(pid, :proc_exit)
|
55
|
+
|
56
|
+
msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
|
57
|
+
applog(self.watch, :info, msg)
|
53
58
|
else
|
54
|
-
applog(self.watch, :error, "#{self.watch.name} could not deregister: no
|
59
|
+
applog(self.watch, :error, "#{self.watch.name} could not deregister: no cached PID or PID file #{self.watch.pid_file} (#{self.base_name})")
|
55
60
|
end
|
56
61
|
end
|
57
62
|
end
|
@@ -34,37 +34,41 @@ module God
|
|
34
34
|
# c.pid_file = "/var/run/mongrel.3000.pid"
|
35
35
|
# end
|
36
36
|
class ProcessRunning < PollCondition
|
37
|
-
attr_accessor :running
|
37
|
+
attr_accessor :running, :pid_file
|
38
|
+
|
39
|
+
def pid
|
40
|
+
self.watch.pid || File.read(self.pid_file).strip.to_i
|
41
|
+
end
|
38
42
|
|
39
43
|
def valid?
|
40
44
|
valid = true
|
41
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
45
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
|
42
46
|
valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
|
43
47
|
valid
|
44
48
|
end
|
45
|
-
|
49
|
+
|
46
50
|
def test
|
47
51
|
self.info = []
|
48
52
|
|
49
|
-
unless File.exist?(self.watch.pid_file)
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
+
# unless File.exist?(self.watch.pid_file)
|
54
|
+
# self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
|
55
|
+
# return !self.running
|
56
|
+
# end
|
53
57
|
|
54
|
-
pid =
|
55
|
-
active = System::Process.new(pid).exists?
|
58
|
+
pid = self.watch.pid
|
59
|
+
active = pid && System::Process.new(pid).exists?
|
56
60
|
|
57
61
|
if (self.running && active)
|
58
|
-
self.info
|
62
|
+
self.info.concat(["process is running"])
|
59
63
|
true
|
60
64
|
elsif (!self.running && !active)
|
61
|
-
self.info
|
65
|
+
self.info.concat(["process is not running"])
|
62
66
|
true
|
63
67
|
else
|
64
68
|
if self.running
|
65
|
-
self.info
|
69
|
+
self.info.concat(["process is not running"])
|
66
70
|
else
|
67
|
-
self.info
|
71
|
+
self.info.concat(["process is running"])
|
68
72
|
end
|
69
73
|
false
|
70
74
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
def start_dike
|
2
|
+
require 'dike'
|
3
|
+
Thread.new do
|
4
|
+
Dike.logfactory File.join(File.dirname(__FILE__), *%w[.. .. logs])
|
5
|
+
loop do
|
6
|
+
Dike.finger
|
7
|
+
sleep(1)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class BleakHouseDiagnostic
|
13
|
+
LOG_FILE = File.join(File.dirname(__FILE__), *%w[.. .. logs bleak.log])
|
14
|
+
|
15
|
+
class << self
|
16
|
+
attr_accessor :logger
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.install
|
20
|
+
require 'bleak_house'
|
21
|
+
self.logger = BleakHouse::Logger.new
|
22
|
+
File.delete(LOG_FILE) rescue nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.snapshot
|
26
|
+
self.logger.snapshot(LOG_FILE, "timer", false) if self.logger
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.spin(delay = 1)
|
30
|
+
Thread.new do
|
31
|
+
loop do
|
32
|
+
self.snapshot
|
33
|
+
sleep(delay)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/god/driver.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class DriverEvent
|
4
|
+
attr_accessor :condition, :at
|
5
|
+
|
6
|
+
# Instantiate a new TimerEvent that will be triggered after the specified delay
|
7
|
+
# +condition+ is the Condition
|
8
|
+
# +delay+ is the number of seconds from now at which to trigger
|
9
|
+
#
|
10
|
+
# Returns TimerEvent
|
11
|
+
def initialize(condition, delay)
|
12
|
+
self.condition = condition
|
13
|
+
self.at = Time.now + delay
|
14
|
+
end
|
15
|
+
|
16
|
+
def due?
|
17
|
+
Time.now >= self.at
|
18
|
+
end
|
19
|
+
end # DriverEvent
|
20
|
+
|
21
|
+
class Driver
|
22
|
+
attr_reader :thread
|
23
|
+
|
24
|
+
INTERVAL = 0.25
|
25
|
+
|
26
|
+
# Instantiate a new Driver and start the scheduler loop to handle events
|
27
|
+
# +task+ is the Task this Driver belongs to
|
28
|
+
#
|
29
|
+
# Returns Driver
|
30
|
+
def initialize(task)
|
31
|
+
@task = task
|
32
|
+
@events = []
|
33
|
+
@ops = Queue.new
|
34
|
+
|
35
|
+
@thread = Thread.new do
|
36
|
+
loop do
|
37
|
+
begin
|
38
|
+
if !@ops.empty?
|
39
|
+
self.handle_op
|
40
|
+
elsif !@events.empty?
|
41
|
+
self.handle_event
|
42
|
+
else
|
43
|
+
sleep INTERVAL
|
44
|
+
end
|
45
|
+
rescue Exception => e
|
46
|
+
message = format("Unhandled exception in driver loop - (%s): %s\n%s",
|
47
|
+
e.class, e.message, e.backtrace.join("\n"))
|
48
|
+
applog(nil, :fatal, message)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Handle the next queued operation that was issued asynchronously
|
55
|
+
#
|
56
|
+
# Returns nothing
|
57
|
+
def handle_op
|
58
|
+
command = @ops.pop
|
59
|
+
@task.send(command[0], *command[1])
|
60
|
+
end
|
61
|
+
|
62
|
+
# Handle the next event (poll condition) that is due
|
63
|
+
#
|
64
|
+
# Returns nothing
|
65
|
+
def handle_event
|
66
|
+
if @events.first.due?
|
67
|
+
event = @events.shift
|
68
|
+
@task.handle_poll(event.condition)
|
69
|
+
end
|
70
|
+
|
71
|
+
# don't sleep if there is a pending event and it is due
|
72
|
+
unless @events.first && @events.first.due?
|
73
|
+
sleep INTERVAL
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Clear all events for this Driver
|
78
|
+
#
|
79
|
+
# Returns nothing
|
80
|
+
def clear_events
|
81
|
+
@events.clear
|
82
|
+
end
|
83
|
+
|
84
|
+
# Queue an asynchronous message
|
85
|
+
# +name+ is the Symbol name of the operation
|
86
|
+
# +args+ is an optional Array of arguments
|
87
|
+
#
|
88
|
+
# Returns nothing
|
89
|
+
def message(name, args = [])
|
90
|
+
@ops.push([name, args])
|
91
|
+
end
|
92
|
+
|
93
|
+
# Create and schedule a new DriverEvent
|
94
|
+
# +condition+ is the Condition
|
95
|
+
# +delay+ is the number of seconds to delay (default: interval defined in condition)
|
96
|
+
#
|
97
|
+
# Returns nothing
|
98
|
+
def schedule(condition, delay = condition.interval)
|
99
|
+
applog(nil, :debug, "driver schedule #{condition} in #{delay} seconds")
|
100
|
+
|
101
|
+
@events.concat([DriverEvent.new(condition, delay)])
|
102
|
+
|
103
|
+
# sort events
|
104
|
+
@events.sort! { |x, y| x.at <=> y.at }
|
105
|
+
end
|
106
|
+
end # Driver
|
107
|
+
|
108
|
+
end # God
|
data/lib/god/event_handler.rb
CHANGED
@@ -61,9 +61,49 @@ module God
|
|
61
61
|
def self.start
|
62
62
|
Thread.new do
|
63
63
|
loop do
|
64
|
-
|
64
|
+
begin
|
65
|
+
@@handler.handle_events
|
66
|
+
rescue Exception => e
|
67
|
+
message = format("Unhandled exception (%s): %s\n%s",
|
68
|
+
e.class, e.message, e.backtrace.join("\n"))
|
69
|
+
applog(nil, :fatal, message)
|
70
|
+
end
|
65
71
|
end
|
66
72
|
end
|
73
|
+
|
74
|
+
# do a real test to make sure events are working properly
|
75
|
+
@@loaded = self.operational?
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.operational?
|
79
|
+
com = [false]
|
80
|
+
|
81
|
+
Thread.new do
|
82
|
+
begin
|
83
|
+
event_system = God::EventHandler.event_system
|
84
|
+
|
85
|
+
pid = fork do
|
86
|
+
loop { sleep(1) }
|
87
|
+
end
|
88
|
+
|
89
|
+
self.register(pid, :proc_exit) do
|
90
|
+
com[0] = true
|
91
|
+
end
|
92
|
+
|
93
|
+
::Process.kill('KILL', pid)
|
94
|
+
|
95
|
+
sleep(0.1)
|
96
|
+
|
97
|
+
self.deregister(pid, :proc_exit) rescue nil
|
98
|
+
rescue => e
|
99
|
+
puts e.message
|
100
|
+
puts e.backtrace.join("\n")
|
101
|
+
end
|
102
|
+
end.join
|
103
|
+
|
104
|
+
sleep(0.1)
|
105
|
+
|
106
|
+
com.first
|
67
107
|
end
|
68
108
|
|
69
109
|
end
|
data/lib/god/logger.rb
CHANGED
@@ -9,49 +9,76 @@ module God
|
|
9
9
|
|
10
10
|
attr_accessor :logs
|
11
11
|
|
12
|
+
class << self
|
13
|
+
attr_accessor :syslog
|
14
|
+
end
|
15
|
+
|
16
|
+
self.syslog ||= true
|
17
|
+
|
18
|
+
# Instantiate a new Logger object
|
12
19
|
def initialize
|
13
20
|
super($stdout)
|
14
21
|
self.logs = {}
|
15
22
|
@mutex = Mutex.new
|
16
23
|
@capture = nil
|
24
|
+
@templogio = StringIO.new
|
25
|
+
@templog = ::Logger.new(@templogio)
|
26
|
+
@templog.level = Logger::INFO
|
27
|
+
load_syslog
|
17
28
|
end
|
18
29
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
# If Logger.syslog is true then attempt to load the syslog bindings. If syslog
|
31
|
+
# cannot be loaded, then set Logger.syslog to false and continue.
|
32
|
+
#
|
33
|
+
# Returns nothing
|
34
|
+
def load_syslog
|
35
|
+
return unless Logger.syslog
|
36
|
+
|
37
|
+
begin
|
38
|
+
require 'syslog'
|
39
|
+
|
40
|
+
# Ensure that Syslog is open
|
41
|
+
begin
|
42
|
+
Syslog.open('god')
|
43
|
+
rescue RuntimeError
|
44
|
+
Syslog.reopen('god')
|
45
|
+
end
|
46
|
+
rescue Exception
|
47
|
+
Logger.syslog = false
|
30
48
|
end
|
31
49
|
end
|
32
50
|
|
51
|
+
# Log a message
|
52
|
+
# +watch+ is the String name of the Watch (may be nil if not Watch is applicable)
|
53
|
+
# +level+ is the log level [:debug|:info|:warn|:error|:fatal]
|
54
|
+
# +text+ is the String message
|
55
|
+
#
|
56
|
+
# Returns nothing
|
33
57
|
def log(watch, level, text)
|
34
58
|
# initialize watch log if necessary
|
35
59
|
self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
|
36
60
|
|
37
61
|
# push onto capture and timeline for the given watch
|
38
|
-
|
39
|
-
|
40
|
-
templog.level
|
41
|
-
templog.send(level, text % [])
|
62
|
+
@templogio.truncate(0)
|
63
|
+
@templogio.rewind
|
64
|
+
@templog.send(level, text % [])
|
42
65
|
@mutex.synchronize do
|
43
|
-
@capture.puts(
|
44
|
-
self.logs[watch.name] << [Time.now,
|
66
|
+
@capture.puts(@templogio.string.dup) if @capture
|
67
|
+
self.logs[watch.name] << [Time.now, @templogio.string.dup] if watch
|
45
68
|
end
|
46
|
-
templog.close
|
47
69
|
|
48
70
|
# send to regular logger
|
49
71
|
self.send(level, text % [])
|
50
72
|
|
51
73
|
# send to syslog
|
52
|
-
Syslog.send(SYSLOG_EQUIVALENTS[level], text)
|
74
|
+
Syslog.send(SYSLOG_EQUIVALENTS[level], text) if Logger.syslog
|
53
75
|
end
|
54
76
|
|
77
|
+
# Get all log output for a given Watch since a certain Time.
|
78
|
+
# +watch_name+ is the String name of the Watch
|
79
|
+
# +since+ is the Time since which to fetch log lines
|
80
|
+
#
|
81
|
+
# Returns String
|
55
82
|
def watch_log_since(watch_name, since)
|
56
83
|
# initialize watch log if necessary
|
57
84
|
self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
|
@@ -65,6 +92,29 @@ module God
|
|
65
92
|
end.join
|
66
93
|
end
|
67
94
|
end
|
95
|
+
|
96
|
+
# private
|
97
|
+
|
98
|
+
# Enable capturing of log
|
99
|
+
#
|
100
|
+
# Returns nothing
|
101
|
+
def start_capture
|
102
|
+
@mutex.synchronize do
|
103
|
+
@capture = StringIO.new
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Disable capturing of log and return what was captured since
|
108
|
+
# capturing was enabled with Logger#start_capture
|
109
|
+
#
|
110
|
+
# Returns String
|
111
|
+
def finish_capture
|
112
|
+
@mutex.synchronize do
|
113
|
+
cap = @capture.string
|
114
|
+
@capture = nil
|
115
|
+
cap
|
116
|
+
end
|
117
|
+
end
|
68
118
|
end
|
69
119
|
|
70
120
|
end
|