god 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +67 -1
- data/Manifest.txt +3 -4
- data/Rakefile +1 -1
- data/bin/god +19 -1
- data/lib/god.rb +86 -49
- data/lib/god/cli/command.rb +7 -1
- data/lib/god/cli/run.rb +58 -0
- data/lib/god/condition.rb +6 -2
- data/lib/god/conditions/cpu_usage.rb +7 -6
- data/lib/god/conditions/http_response_code.rb +5 -1
- data/lib/god/conditions/memory_usage.rb +7 -6
- data/lib/god/conditions/process_exits.rb +15 -10
- data/lib/god/conditions/process_running.rb +17 -13
- data/lib/god/diagnostics.rb +37 -0
- data/lib/god/driver.rb +108 -0
- data/lib/god/event_handler.rb +41 -1
- data/lib/god/logger.rb +69 -19
- data/lib/god/metric.rb +2 -2
- data/lib/god/process.rb +84 -27
- data/lib/god/task.rb +286 -29
- data/lib/god/timeline.rb +20 -31
- data/lib/god/watch.rb +26 -15
- data/test/configs/child_events/child_events.god +0 -5
- data/test/configs/child_polls/simple_server.rb +1 -1
- data/test/configs/daemon_events/simple_server_stop.rb +2 -0
- data/test/configs/stress/stress.god +1 -1
- data/test/configs/test.rb +12 -28
- data/test/test_condition.rb +8 -0
- data/test/test_conditions_http_response_code.rb +5 -5
- data/test/test_conditions_process_running.rb +6 -4
- data/test/test_driver.rb +11 -0
- data/test/test_event_handler.rb +7 -0
- data/test/test_god.rb +63 -62
- data/test/test_metric.rb +0 -16
- data/test/test_process.rb +29 -1
- data/test/test_task.rb +177 -1
- data/test/test_timeline.rb +2 -1
- data/test/test_watch.rb +24 -6
- metadata +6 -8
- data/lib/god/hub.rb +0 -222
- data/lib/god/timer.rb +0 -87
- data/test/test_hub.rb +0 -240
- data/test/test_timer.rb +0 -69
data/lib/god/condition.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module God
|
2
2
|
|
3
3
|
class Condition < Behavior
|
4
|
-
attr_accessor :transition, :notify, :info
|
4
|
+
attr_accessor :transition, :notify, :info, :phase
|
5
5
|
|
6
6
|
# Generate a Condition of the given kind. The proper class if found by camel casing the
|
7
7
|
# kind (which is given as an underscored symbol).
|
@@ -14,6 +14,10 @@ module God
|
|
14
14
|
abort "Condition '#{c.class.name}' must subclass God::PollCondition, God::EventCondition, or God::TriggerCondition"
|
15
15
|
end
|
16
16
|
|
17
|
+
if !EventHandler.loaded? && c.kind_of?(EventCondition)
|
18
|
+
abort "Condition '#{c.class.name}' requires an event system but none has been loaded"
|
19
|
+
end
|
20
|
+
|
17
21
|
c.watch = watch
|
18
22
|
c
|
19
23
|
rescue NameError
|
@@ -77,7 +81,7 @@ module God
|
|
77
81
|
end
|
78
82
|
|
79
83
|
def trigger
|
80
|
-
|
84
|
+
self.watch.trigger(self)
|
81
85
|
end
|
82
86
|
|
83
87
|
def register
|
@@ -29,7 +29,7 @@ module God
|
|
29
29
|
# c.pid_file = "/var/run/mongrel.3000.pid"
|
30
30
|
# end
|
31
31
|
class CpuUsage < PollCondition
|
32
|
-
attr_accessor :above, :times
|
32
|
+
attr_accessor :above, :times, :pid_file
|
33
33
|
|
34
34
|
def initialize
|
35
35
|
super
|
@@ -49,18 +49,19 @@ module God
|
|
49
49
|
@timeline.clear
|
50
50
|
end
|
51
51
|
|
52
|
+
def pid
|
53
|
+
self.watch.pid || File.read(self.pid_file).strip.to_i
|
54
|
+
end
|
55
|
+
|
52
56
|
def valid?
|
53
57
|
valid = true
|
54
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
58
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
|
55
59
|
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
56
60
|
valid
|
57
61
|
end
|
58
62
|
|
59
63
|
def test
|
60
|
-
|
61
|
-
|
62
|
-
pid = File.read(self.watch.pid_file).strip
|
63
|
-
process = System::Process.new(pid)
|
64
|
+
process = System::Process.new(self.pid)
|
64
65
|
@timeline.push(process.percent_cpu)
|
65
66
|
|
66
67
|
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
|
@@ -108,7 +108,7 @@ module God
|
|
108
108
|
|
109
109
|
Net::HTTP.start(self.host, self.port) do |http|
|
110
110
|
http.read_timeout = self.timeout
|
111
|
-
response = http.
|
111
|
+
response = http.get(self.path)
|
112
112
|
end
|
113
113
|
|
114
114
|
actual_response_code = response.code.to_i
|
@@ -121,6 +121,10 @@ module God
|
|
121
121
|
end
|
122
122
|
rescue Errno::ECONNREFUSED
|
123
123
|
self.code_is ? fail('Refused') : pass('Refused')
|
124
|
+
rescue Errno::ECONNRESET
|
125
|
+
self.code_is ? fail('Reset') : pass('Reset')
|
126
|
+
rescue EOFError
|
127
|
+
self.code_is ? fail('EOF') : pass('EOF')
|
124
128
|
rescue Timeout::Error
|
125
129
|
self.code_is ? fail('Timeout') : pass('Timeout')
|
126
130
|
end
|
@@ -31,7 +31,7 @@ module God
|
|
31
31
|
# c.pid_file = "/var/run/mongrel.3000.pid"
|
32
32
|
# end
|
33
33
|
class MemoryUsage < PollCondition
|
34
|
-
attr_accessor :above, :times
|
34
|
+
attr_accessor :above, :times, :pid_file
|
35
35
|
|
36
36
|
def initialize
|
37
37
|
super
|
@@ -51,18 +51,19 @@ module God
|
|
51
51
|
@timeline.clear
|
52
52
|
end
|
53
53
|
|
54
|
+
def pid
|
55
|
+
self.watch.pid || File.read(self.pid_file).strip.to_i
|
56
|
+
end
|
57
|
+
|
54
58
|
def valid?
|
55
59
|
valid = true
|
56
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
60
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
|
57
61
|
valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
|
58
62
|
valid
|
59
63
|
end
|
60
64
|
|
61
65
|
def test
|
62
|
-
|
63
|
-
|
64
|
-
pid = File.read(self.watch.pid_file).strip
|
65
|
-
process = System::Process.new(pid)
|
66
|
+
process = System::Process.new(self.pid)
|
66
67
|
@timeline.push(process.memory)
|
67
68
|
|
68
69
|
history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
|
@@ -28,30 +28,35 @@ module God
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def valid?
|
31
|
-
|
32
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
33
|
-
valid
|
31
|
+
true
|
34
32
|
end
|
35
|
-
|
33
|
+
|
36
34
|
def register
|
37
|
-
pid =
|
35
|
+
pid = self.watch.pid
|
38
36
|
|
39
37
|
begin
|
40
38
|
EventHandler.register(pid, :proc_exit) do |extra|
|
41
|
-
|
42
|
-
|
39
|
+
formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
|
40
|
+
self.info = "process #{pid} exited#{formatted_extra}"
|
41
|
+
self.watch.trigger(self)
|
43
42
|
end
|
43
|
+
|
44
|
+
msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
|
45
|
+
applog(self.watch, :info, msg)
|
44
46
|
rescue StandardError
|
45
47
|
raise EventRegistrationFailedError.new
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
49
51
|
def deregister
|
50
|
-
|
51
|
-
|
52
|
+
pid = self.watch.pid
|
53
|
+
if pid
|
52
54
|
EventHandler.deregister(pid, :proc_exit)
|
55
|
+
|
56
|
+
msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
|
57
|
+
applog(self.watch, :info, msg)
|
53
58
|
else
|
54
|
-
applog(self.watch, :error, "#{self.watch.name} could not deregister: no
|
59
|
+
applog(self.watch, :error, "#{self.watch.name} could not deregister: no cached PID or PID file #{self.watch.pid_file} (#{self.base_name})")
|
55
60
|
end
|
56
61
|
end
|
57
62
|
end
|
@@ -34,37 +34,41 @@ module God
|
|
34
34
|
# c.pid_file = "/var/run/mongrel.3000.pid"
|
35
35
|
# end
|
36
36
|
class ProcessRunning < PollCondition
|
37
|
-
attr_accessor :running
|
37
|
+
attr_accessor :running, :pid_file
|
38
|
+
|
39
|
+
def pid
|
40
|
+
self.watch.pid || File.read(self.pid_file).strip.to_i
|
41
|
+
end
|
38
42
|
|
39
43
|
def valid?
|
40
44
|
valid = true
|
41
|
-
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
|
45
|
+
valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
|
42
46
|
valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
|
43
47
|
valid
|
44
48
|
end
|
45
|
-
|
49
|
+
|
46
50
|
def test
|
47
51
|
self.info = []
|
48
52
|
|
49
|
-
unless File.exist?(self.watch.pid_file)
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
+
# unless File.exist?(self.watch.pid_file)
|
54
|
+
# self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
|
55
|
+
# return !self.running
|
56
|
+
# end
|
53
57
|
|
54
|
-
pid =
|
55
|
-
active = System::Process.new(pid).exists?
|
58
|
+
pid = self.watch.pid
|
59
|
+
active = pid && System::Process.new(pid).exists?
|
56
60
|
|
57
61
|
if (self.running && active)
|
58
|
-
self.info
|
62
|
+
self.info.concat(["process is running"])
|
59
63
|
true
|
60
64
|
elsif (!self.running && !active)
|
61
|
-
self.info
|
65
|
+
self.info.concat(["process is not running"])
|
62
66
|
true
|
63
67
|
else
|
64
68
|
if self.running
|
65
|
-
self.info
|
69
|
+
self.info.concat(["process is not running"])
|
66
70
|
else
|
67
|
-
self.info
|
71
|
+
self.info.concat(["process is running"])
|
68
72
|
end
|
69
73
|
false
|
70
74
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
def start_dike
|
2
|
+
require 'dike'
|
3
|
+
Thread.new do
|
4
|
+
Dike.logfactory File.join(File.dirname(__FILE__), *%w[.. .. logs])
|
5
|
+
loop do
|
6
|
+
Dike.finger
|
7
|
+
sleep(1)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class BleakHouseDiagnostic
|
13
|
+
LOG_FILE = File.join(File.dirname(__FILE__), *%w[.. .. logs bleak.log])
|
14
|
+
|
15
|
+
class << self
|
16
|
+
attr_accessor :logger
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.install
|
20
|
+
require 'bleak_house'
|
21
|
+
self.logger = BleakHouse::Logger.new
|
22
|
+
File.delete(LOG_FILE) rescue nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.snapshot
|
26
|
+
self.logger.snapshot(LOG_FILE, "timer", false) if self.logger
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.spin(delay = 1)
|
30
|
+
Thread.new do
|
31
|
+
loop do
|
32
|
+
self.snapshot
|
33
|
+
sleep(delay)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/god/driver.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class DriverEvent
|
4
|
+
attr_accessor :condition, :at
|
5
|
+
|
6
|
+
# Instantiate a new TimerEvent that will be triggered after the specified delay
|
7
|
+
# +condition+ is the Condition
|
8
|
+
# +delay+ is the number of seconds from now at which to trigger
|
9
|
+
#
|
10
|
+
# Returns TimerEvent
|
11
|
+
def initialize(condition, delay)
|
12
|
+
self.condition = condition
|
13
|
+
self.at = Time.now + delay
|
14
|
+
end
|
15
|
+
|
16
|
+
def due?
|
17
|
+
Time.now >= self.at
|
18
|
+
end
|
19
|
+
end # DriverEvent
|
20
|
+
|
21
|
+
class Driver
|
22
|
+
attr_reader :thread
|
23
|
+
|
24
|
+
INTERVAL = 0.25
|
25
|
+
|
26
|
+
# Instantiate a new Driver and start the scheduler loop to handle events
|
27
|
+
# +task+ is the Task this Driver belongs to
|
28
|
+
#
|
29
|
+
# Returns Driver
|
30
|
+
def initialize(task)
|
31
|
+
@task = task
|
32
|
+
@events = []
|
33
|
+
@ops = Queue.new
|
34
|
+
|
35
|
+
@thread = Thread.new do
|
36
|
+
loop do
|
37
|
+
begin
|
38
|
+
if !@ops.empty?
|
39
|
+
self.handle_op
|
40
|
+
elsif !@events.empty?
|
41
|
+
self.handle_event
|
42
|
+
else
|
43
|
+
sleep INTERVAL
|
44
|
+
end
|
45
|
+
rescue Exception => e
|
46
|
+
message = format("Unhandled exception in driver loop - (%s): %s\n%s",
|
47
|
+
e.class, e.message, e.backtrace.join("\n"))
|
48
|
+
applog(nil, :fatal, message)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Handle the next queued operation that was issued asynchronously
|
55
|
+
#
|
56
|
+
# Returns nothing
|
57
|
+
def handle_op
|
58
|
+
command = @ops.pop
|
59
|
+
@task.send(command[0], *command[1])
|
60
|
+
end
|
61
|
+
|
62
|
+
# Handle the next event (poll condition) that is due
|
63
|
+
#
|
64
|
+
# Returns nothing
|
65
|
+
def handle_event
|
66
|
+
if @events.first.due?
|
67
|
+
event = @events.shift
|
68
|
+
@task.handle_poll(event.condition)
|
69
|
+
end
|
70
|
+
|
71
|
+
# don't sleep if there is a pending event and it is due
|
72
|
+
unless @events.first && @events.first.due?
|
73
|
+
sleep INTERVAL
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Clear all events for this Driver
|
78
|
+
#
|
79
|
+
# Returns nothing
|
80
|
+
def clear_events
|
81
|
+
@events.clear
|
82
|
+
end
|
83
|
+
|
84
|
+
# Queue an asynchronous message
|
85
|
+
# +name+ is the Symbol name of the operation
|
86
|
+
# +args+ is an optional Array of arguments
|
87
|
+
#
|
88
|
+
# Returns nothing
|
89
|
+
def message(name, args = [])
|
90
|
+
@ops.push([name, args])
|
91
|
+
end
|
92
|
+
|
93
|
+
# Create and schedule a new DriverEvent
|
94
|
+
# +condition+ is the Condition
|
95
|
+
# +delay+ is the number of seconds to delay (default: interval defined in condition)
|
96
|
+
#
|
97
|
+
# Returns nothing
|
98
|
+
def schedule(condition, delay = condition.interval)
|
99
|
+
applog(nil, :debug, "driver schedule #{condition} in #{delay} seconds")
|
100
|
+
|
101
|
+
@events.concat([DriverEvent.new(condition, delay)])
|
102
|
+
|
103
|
+
# sort events
|
104
|
+
@events.sort! { |x, y| x.at <=> y.at }
|
105
|
+
end
|
106
|
+
end # Driver
|
107
|
+
|
108
|
+
end # God
|
data/lib/god/event_handler.rb
CHANGED
@@ -61,9 +61,49 @@ module God
|
|
61
61
|
def self.start
|
62
62
|
Thread.new do
|
63
63
|
loop do
|
64
|
-
|
64
|
+
begin
|
65
|
+
@@handler.handle_events
|
66
|
+
rescue Exception => e
|
67
|
+
message = format("Unhandled exception (%s): %s\n%s",
|
68
|
+
e.class, e.message, e.backtrace.join("\n"))
|
69
|
+
applog(nil, :fatal, message)
|
70
|
+
end
|
65
71
|
end
|
66
72
|
end
|
73
|
+
|
74
|
+
# do a real test to make sure events are working properly
|
75
|
+
@@loaded = self.operational?
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.operational?
|
79
|
+
com = [false]
|
80
|
+
|
81
|
+
Thread.new do
|
82
|
+
begin
|
83
|
+
event_system = God::EventHandler.event_system
|
84
|
+
|
85
|
+
pid = fork do
|
86
|
+
loop { sleep(1) }
|
87
|
+
end
|
88
|
+
|
89
|
+
self.register(pid, :proc_exit) do
|
90
|
+
com[0] = true
|
91
|
+
end
|
92
|
+
|
93
|
+
::Process.kill('KILL', pid)
|
94
|
+
|
95
|
+
sleep(0.1)
|
96
|
+
|
97
|
+
self.deregister(pid, :proc_exit) rescue nil
|
98
|
+
rescue => e
|
99
|
+
puts e.message
|
100
|
+
puts e.backtrace.join("\n")
|
101
|
+
end
|
102
|
+
end.join
|
103
|
+
|
104
|
+
sleep(0.1)
|
105
|
+
|
106
|
+
com.first
|
67
107
|
end
|
68
108
|
|
69
109
|
end
|
data/lib/god/logger.rb
CHANGED
@@ -9,49 +9,76 @@ module God
|
|
9
9
|
|
10
10
|
attr_accessor :logs
|
11
11
|
|
12
|
+
class << self
|
13
|
+
attr_accessor :syslog
|
14
|
+
end
|
15
|
+
|
16
|
+
self.syslog ||= true
|
17
|
+
|
18
|
+
# Instantiate a new Logger object
|
12
19
|
def initialize
|
13
20
|
super($stdout)
|
14
21
|
self.logs = {}
|
15
22
|
@mutex = Mutex.new
|
16
23
|
@capture = nil
|
24
|
+
@templogio = StringIO.new
|
25
|
+
@templog = ::Logger.new(@templogio)
|
26
|
+
@templog.level = Logger::INFO
|
27
|
+
load_syslog
|
17
28
|
end
|
18
29
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
# If Logger.syslog is true then attempt to load the syslog bindings. If syslog
|
31
|
+
# cannot be loaded, then set Logger.syslog to false and continue.
|
32
|
+
#
|
33
|
+
# Returns nothing
|
34
|
+
def load_syslog
|
35
|
+
return unless Logger.syslog
|
36
|
+
|
37
|
+
begin
|
38
|
+
require 'syslog'
|
39
|
+
|
40
|
+
# Ensure that Syslog is open
|
41
|
+
begin
|
42
|
+
Syslog.open('god')
|
43
|
+
rescue RuntimeError
|
44
|
+
Syslog.reopen('god')
|
45
|
+
end
|
46
|
+
rescue Exception
|
47
|
+
Logger.syslog = false
|
30
48
|
end
|
31
49
|
end
|
32
50
|
|
51
|
+
# Log a message
|
52
|
+
# +watch+ is the String name of the Watch (may be nil if not Watch is applicable)
|
53
|
+
# +level+ is the log level [:debug|:info|:warn|:error|:fatal]
|
54
|
+
# +text+ is the String message
|
55
|
+
#
|
56
|
+
# Returns nothing
|
33
57
|
def log(watch, level, text)
|
34
58
|
# initialize watch log if necessary
|
35
59
|
self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
|
36
60
|
|
37
61
|
# push onto capture and timeline for the given watch
|
38
|
-
|
39
|
-
|
40
|
-
templog.level
|
41
|
-
templog.send(level, text % [])
|
62
|
+
@templogio.truncate(0)
|
63
|
+
@templogio.rewind
|
64
|
+
@templog.send(level, text % [])
|
42
65
|
@mutex.synchronize do
|
43
|
-
@capture.puts(
|
44
|
-
self.logs[watch.name] << [Time.now,
|
66
|
+
@capture.puts(@templogio.string.dup) if @capture
|
67
|
+
self.logs[watch.name] << [Time.now, @templogio.string.dup] if watch
|
45
68
|
end
|
46
|
-
templog.close
|
47
69
|
|
48
70
|
# send to regular logger
|
49
71
|
self.send(level, text % [])
|
50
72
|
|
51
73
|
# send to syslog
|
52
|
-
Syslog.send(SYSLOG_EQUIVALENTS[level], text)
|
74
|
+
Syslog.send(SYSLOG_EQUIVALENTS[level], text) if Logger.syslog
|
53
75
|
end
|
54
76
|
|
77
|
+
# Get all log output for a given Watch since a certain Time.
|
78
|
+
# +watch_name+ is the String name of the Watch
|
79
|
+
# +since+ is the Time since which to fetch log lines
|
80
|
+
#
|
81
|
+
# Returns String
|
55
82
|
def watch_log_since(watch_name, since)
|
56
83
|
# initialize watch log if necessary
|
57
84
|
self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
|
@@ -65,6 +92,29 @@ module God
|
|
65
92
|
end.join
|
66
93
|
end
|
67
94
|
end
|
95
|
+
|
96
|
+
# private
|
97
|
+
|
98
|
+
# Enable capturing of log
|
99
|
+
#
|
100
|
+
# Returns nothing
|
101
|
+
def start_capture
|
102
|
+
@mutex.synchronize do
|
103
|
+
@capture = StringIO.new
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Disable capturing of log and return what was captured since
|
108
|
+
# capturing was enabled with Logger#start_capture
|
109
|
+
#
|
110
|
+
# Returns String
|
111
|
+
def finish_capture
|
112
|
+
@mutex.synchronize do
|
113
|
+
cap = @capture.string
|
114
|
+
@capture = nil
|
115
|
+
cap
|
116
|
+
end
|
117
|
+
end
|
68
118
|
end
|
69
119
|
|
70
120
|
end
|