mojombo-god 0.7.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +255 -0
- data/Manifest.txt +107 -0
- data/README.txt +59 -0
- data/Rakefile +35 -0
- data/bin/god +127 -0
- data/examples/events.god +84 -0
- data/examples/gravatar.god +54 -0
- data/examples/single.god +66 -0
- data/ext/god/extconf.rb +55 -0
- data/ext/god/kqueue_handler.c +123 -0
- data/ext/god/netlink_handler.c +167 -0
- data/init/god +42 -0
- data/lib/god.rb +644 -0
- data/lib/god/behavior.rb +52 -0
- data/lib/god/behaviors/clean_pid_file.rb +21 -0
- data/lib/god/behaviors/clean_unix_socket.rb +21 -0
- data/lib/god/behaviors/notify_when_flapping.rb +51 -0
- data/lib/god/cli/command.rb +206 -0
- data/lib/god/cli/run.rb +177 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/condition.rb +96 -0
- data/lib/god/conditions/always.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +80 -0
- data/lib/god/conditions/degrading_lambda.rb +52 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +128 -0
- data/lib/god/conditions/http_response_code.rb +168 -0
- data/lib/god/conditions/lambda.rb +25 -0
- data/lib/god/conditions/memory_usage.rb +82 -0
- data/lib/god/conditions/process_exits.rb +72 -0
- data/lib/god/conditions/process_running.rb +74 -0
- data/lib/god/conditions/tries.rb +44 -0
- data/lib/god/configurable.rb +57 -0
- data/lib/god/contact.rb +106 -0
- data/lib/god/contacts/email.rb +95 -0
- data/lib/god/dependency_graph.rb +41 -0
- data/lib/god/diagnostics.rb +37 -0
- data/lib/god/driver.rb +108 -0
- data/lib/god/errors.rb +24 -0
- data/lib/god/event_handler.rb +111 -0
- data/lib/god/event_handlers/dummy_handler.rb +13 -0
- data/lib/god/event_handlers/kqueue_handler.rb +17 -0
- data/lib/god/event_handlers/netlink_handler.rb +13 -0
- data/lib/god/logger.rb +120 -0
- data/lib/god/metric.rb +59 -0
- data/lib/god/process.rb +325 -0
- data/lib/god/registry.rb +32 -0
- data/lib/god/simple_logger.rb +53 -0
- data/lib/god/socket.rb +96 -0
- data/lib/god/sugar.rb +47 -0
- data/lib/god/system/portable_poller.rb +42 -0
- data/lib/god/system/process.rb +42 -0
- data/lib/god/system/slash_proc_poller.rb +82 -0
- data/lib/god/task.rb +487 -0
- data/lib/god/timeline.rb +25 -0
- data/lib/god/trigger.rb +43 -0
- data/lib/god/watch.rb +183 -0
- data/test/configs/child_events/child_events.god +44 -0
- data/test/configs/child_events/simple_server.rb +3 -0
- data/test/configs/child_polls/child_polls.god +37 -0
- data/test/configs/child_polls/simple_server.rb +12 -0
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/configs/contact/contact.god +74 -0
- data/test/configs/contact/simple_server.rb +3 -0
- data/test/configs/daemon_events/daemon_events.god +37 -0
- data/test/configs/daemon_events/simple_server.rb +8 -0
- data/test/configs/daemon_events/simple_server_stop.rb +11 -0
- data/test/configs/daemon_polls/daemon_polls.god +17 -0
- data/test/configs/daemon_polls/simple_server.rb +6 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
- data/test/configs/degrading_lambda/tcp_server.rb +15 -0
- data/test/configs/matias/matias.god +50 -0
- data/test/configs/real.rb +59 -0
- data/test/configs/running_load/running_load.god +16 -0
- data/test/configs/stress/simple_server.rb +3 -0
- data/test/configs/stress/stress.god +15 -0
- data/test/configs/task/logs/.placeholder +0 -0
- data/test/configs/task/task.god +26 -0
- data/test/configs/test.rb +61 -0
- data/test/helper.rb +151 -0
- data/test/suite.rb +6 -0
- data/test/test_behavior.rb +21 -0
- data/test/test_condition.rb +50 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +109 -0
- data/test/test_conditions_process_running.rb +44 -0
- data/test/test_conditions_tries.rb +67 -0
- data/test/test_contact.rb +109 -0
- data/test/test_dependency_graph.rb +62 -0
- data/test/test_driver.rb +11 -0
- data/test/test_event_handler.rb +80 -0
- data/test/test_god.rb +598 -0
- data/test/test_handlers_kqueue_handler.rb +16 -0
- data/test/test_logger.rb +63 -0
- data/test/test_metric.rb +72 -0
- data/test/test_process.rb +246 -0
- data/test/test_registry.rb +15 -0
- data/test/test_socket.rb +42 -0
- data/test/test_sugar.rb +42 -0
- data/test/test_system_portable_poller.rb +17 -0
- data/test/test_system_process.rb +30 -0
- data/test/test_task.rb +262 -0
- data/test/test_timeline.rb +37 -0
- data/test/test_trigger.rb +59 -0
- data/test/test_watch.rb +279 -0
- metadata +186 -0
data/lib/god/registry.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module God
|
2
|
+
def self.registry
|
3
|
+
@registry ||= Registry.new
|
4
|
+
end
|
5
|
+
|
6
|
+
class Registry
|
7
|
+
def initialize
|
8
|
+
@storage = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def add(item)
|
12
|
+
# raise TypeError unless item.is_a? God::Process
|
13
|
+
@storage[item.name] = item
|
14
|
+
end
|
15
|
+
|
16
|
+
def remove(item)
|
17
|
+
@storage.delete(item.name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def size
|
21
|
+
@storage.size
|
22
|
+
end
|
23
|
+
|
24
|
+
def [](name)
|
25
|
+
@storage[name]
|
26
|
+
end
|
27
|
+
|
28
|
+
def reset
|
29
|
+
@storage.clear
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class SimpleLogger
|
4
|
+
DEBUG = 2
|
5
|
+
INFO = 4
|
6
|
+
WARN = 8
|
7
|
+
ERROR = 16
|
8
|
+
FATAL = 32
|
9
|
+
|
10
|
+
SEV_LABEL = {DEBUG => 'DEBUG',
|
11
|
+
INFO => 'INFO',
|
12
|
+
WARN => 'WARN',
|
13
|
+
ERROR => 'ERROR',
|
14
|
+
FATAL => 'FATAL'}
|
15
|
+
|
16
|
+
attr_accessor :datetime_format, :level
|
17
|
+
|
18
|
+
def initialize(io)
|
19
|
+
@io = io
|
20
|
+
@level = INFO
|
21
|
+
@datetime_format = "%Y-%m-%d %H:%M:%S"
|
22
|
+
end
|
23
|
+
|
24
|
+
def output(level, msg)
|
25
|
+
return if level < self.level
|
26
|
+
|
27
|
+
time = Time.now.strftime(self.datetime_format)
|
28
|
+
label = SEV_LABEL[level]
|
29
|
+
@io.print("#{label[0..0]} [#{time}] #{label.rjust(5)}: #{msg}\n")
|
30
|
+
end
|
31
|
+
|
32
|
+
def fatal(msg)
|
33
|
+
self.output(FATAL, msg)
|
34
|
+
end
|
35
|
+
|
36
|
+
def error(msg)
|
37
|
+
self.output(ERROR, msg)
|
38
|
+
end
|
39
|
+
|
40
|
+
def warn(msg)
|
41
|
+
self.output(WARN, msg)
|
42
|
+
end
|
43
|
+
|
44
|
+
def info(msg)
|
45
|
+
self.output(INFO, msg)
|
46
|
+
end
|
47
|
+
|
48
|
+
def debug(msg)
|
49
|
+
self.output(DEBUG, msg)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
data/lib/god/socket.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'drb'
|
2
|
+
|
3
|
+
module God
|
4
|
+
|
5
|
+
# The God::Server oversees the DRb server which dishes out info on this God daemon.
|
6
|
+
class Socket
|
7
|
+
attr_reader :port
|
8
|
+
|
9
|
+
# The location of the socket for a given port
|
10
|
+
# +port+ is the port number
|
11
|
+
#
|
12
|
+
# Returns String (file location)
|
13
|
+
def self.socket_file(port)
|
14
|
+
"/tmp/god.#{port}.sock"
|
15
|
+
end
|
16
|
+
|
17
|
+
# The address of the socket for a given port
|
18
|
+
# +port+ is the port number
|
19
|
+
#
|
20
|
+
# Returns String (drb address)
|
21
|
+
def self.socket(port)
|
22
|
+
"drbunix://#{self.socket_file(port)}"
|
23
|
+
end
|
24
|
+
|
25
|
+
# The location of the socket for this Server
|
26
|
+
#
|
27
|
+
# Returns String (file location)
|
28
|
+
def socket_file
|
29
|
+
self.class.socket_file(@port)
|
30
|
+
end
|
31
|
+
|
32
|
+
# The address of the socket for this Server
|
33
|
+
#
|
34
|
+
# Returns String (drb address)
|
35
|
+
def socket
|
36
|
+
self.class.socket(@port)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Create a new Server and star the DRb server
|
40
|
+
# +port+ is the port on which to start the DRb service (default nil)
|
41
|
+
def initialize(port = nil)
|
42
|
+
@port = port
|
43
|
+
start
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns true
|
47
|
+
def ping
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
# Forward API calls to God
|
52
|
+
#
|
53
|
+
# Returns whatever the forwarded call returns
|
54
|
+
def method_missing(*args, &block)
|
55
|
+
God.send(*args, &block)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Stop the DRb server and delete the socket file
|
59
|
+
#
|
60
|
+
# Returns nothing
|
61
|
+
def stop
|
62
|
+
DRb.stop_service
|
63
|
+
FileUtils.rm_f(self.socket_file)
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
# Start the DRb server. Abort if there is already a running god instance
|
69
|
+
# on the socket.
|
70
|
+
#
|
71
|
+
# Returns nothing
|
72
|
+
def start
|
73
|
+
begin
|
74
|
+
@drb ||= DRb.start_service(self.socket, self)
|
75
|
+
applog(nil, :info, "Started on #{DRb.uri}")
|
76
|
+
rescue Errno::EADDRINUSE
|
77
|
+
applog(nil, :info, "Socket already in use")
|
78
|
+
DRb.start_service
|
79
|
+
server = DRbObject.new(nil, self.socket)
|
80
|
+
|
81
|
+
begin
|
82
|
+
Timeout.timeout(5) do
|
83
|
+
server.ping
|
84
|
+
end
|
85
|
+
abort "Socket #{self.socket} already in use by another instance of god"
|
86
|
+
rescue StandardError, Timeout::Error
|
87
|
+
applog(nil, :info, "Socket is stale, reopening")
|
88
|
+
File.delete(self.socket_file) rescue nil
|
89
|
+
@drb ||= DRb.start_service(self.socket, self)
|
90
|
+
applog(nil, :info, "Started on #{DRb.uri}")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/lib/god/sugar.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
class Numeric
|
2
|
+
def seconds
|
3
|
+
self
|
4
|
+
end
|
5
|
+
|
6
|
+
alias :second :seconds
|
7
|
+
|
8
|
+
def minutes
|
9
|
+
self * 60
|
10
|
+
end
|
11
|
+
|
12
|
+
alias :minute :minutes
|
13
|
+
|
14
|
+
def hours
|
15
|
+
self * 3600
|
16
|
+
end
|
17
|
+
|
18
|
+
alias :hour :hours
|
19
|
+
|
20
|
+
def days
|
21
|
+
self * 86400
|
22
|
+
end
|
23
|
+
|
24
|
+
alias :day :days
|
25
|
+
|
26
|
+
def kilobytes
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
alias :kilobyte :kilobytes
|
31
|
+
|
32
|
+
def megabytes
|
33
|
+
self * 1024
|
34
|
+
end
|
35
|
+
|
36
|
+
alias :megabyte :megabytes
|
37
|
+
|
38
|
+
def gigabytes
|
39
|
+
self * (1024 ** 2)
|
40
|
+
end
|
41
|
+
|
42
|
+
alias :gigabyte :gigabytes
|
43
|
+
|
44
|
+
def percent
|
45
|
+
self
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module God
|
2
|
+
module System
|
3
|
+
class PortablePoller
|
4
|
+
def initialize(pid)
|
5
|
+
@pid = pid
|
6
|
+
end
|
7
|
+
# Memory usage in kilobytes (resident set size)
|
8
|
+
def memory
|
9
|
+
ps_int('rss')
|
10
|
+
end
|
11
|
+
|
12
|
+
# Percentage memory usage
|
13
|
+
def percent_memory
|
14
|
+
ps_float('%mem')
|
15
|
+
end
|
16
|
+
|
17
|
+
# Percentage CPU usage
|
18
|
+
def percent_cpu
|
19
|
+
ps_float('%cpu')
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def ps_int(keyword)
|
25
|
+
`ps -o #{keyword}= -p #{@pid}`.to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
def ps_float(keyword)
|
29
|
+
`ps -o #{keyword}= -p #{@pid}`.to_f
|
30
|
+
end
|
31
|
+
|
32
|
+
def ps_string(keyword)
|
33
|
+
`ps -o #{keyword}= -p #{@pid}`.strip
|
34
|
+
end
|
35
|
+
|
36
|
+
def time_string_to_seconds(text)
|
37
|
+
_, minutes, seconds, useconds = *text.match(/(\d+):(\d{2}).(\d{2})/)
|
38
|
+
(minutes.to_i * 60) + seconds.to_i
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module God
|
2
|
+
module System
|
3
|
+
|
4
|
+
class Process
|
5
|
+
def initialize(pid)
|
6
|
+
@pid = pid.to_i
|
7
|
+
@poller = fetch_system_poller.new(@pid)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Return true if this process is running, false otherwise
|
11
|
+
def exists?
|
12
|
+
!!::Process.kill(0, @pid) rescue false
|
13
|
+
end
|
14
|
+
|
15
|
+
# Memory usage in kilobytes (resident set size)
|
16
|
+
def memory
|
17
|
+
@poller.memory
|
18
|
+
end
|
19
|
+
|
20
|
+
# Percentage memory usage
|
21
|
+
def percent_memory
|
22
|
+
@poller.percent_memory
|
23
|
+
end
|
24
|
+
|
25
|
+
# Percentage CPU usage
|
26
|
+
def percent_cpu
|
27
|
+
@poller.percent_cpu
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def fetch_system_poller
|
33
|
+
if SlashProcPoller.usable?
|
34
|
+
SlashProcPoller
|
35
|
+
else
|
36
|
+
PortablePoller
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module God
|
2
|
+
module System
|
3
|
+
class SlashProcPoller < PortablePoller
|
4
|
+
@@kb_per_page = 4 # TODO: Need to make this portable
|
5
|
+
@@hertz = 100
|
6
|
+
@@total_mem = nil
|
7
|
+
|
8
|
+
MeminfoPath = '/proc/meminfo'
|
9
|
+
UptimePath = '/proc/uptime'
|
10
|
+
|
11
|
+
RequiredPaths = [MeminfoPath, UptimePath]
|
12
|
+
|
13
|
+
# FreeBSD has /proc by default, but nothing mounted there!
|
14
|
+
# So we should check for the actual required paths!
|
15
|
+
# Returns true if +RequiredPaths+ are readable.
|
16
|
+
def self.usable?
|
17
|
+
RequiredPaths.all? do |path|
|
18
|
+
test(?r, path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(pid)
|
23
|
+
super(pid)
|
24
|
+
|
25
|
+
unless @@total_mem # in K
|
26
|
+
File.open(MeminfoPath) do |f|
|
27
|
+
@@total_mem = f.gets.split[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def memory
|
33
|
+
stat[:rss].to_i * @@kb_per_page
|
34
|
+
rescue # This shouldn't fail is there's an error (or proc doesn't exist)
|
35
|
+
0
|
36
|
+
end
|
37
|
+
|
38
|
+
def percent_memory
|
39
|
+
(memory / @@total_mem.to_f) * 100
|
40
|
+
rescue # This shouldn't fail is there's an error (or proc doesn't exist)
|
41
|
+
0
|
42
|
+
end
|
43
|
+
|
44
|
+
# TODO: Change this to calculate the wma instead
|
45
|
+
def percent_cpu
|
46
|
+
stats = stat
|
47
|
+
total_time = stats[:utime].to_i + stats[:stime].to_i # in jiffies
|
48
|
+
seconds = uptime - stats[:starttime].to_i / @@hertz
|
49
|
+
if seconds == 0
|
50
|
+
0
|
51
|
+
else
|
52
|
+
((total_time * 1000 / @@hertz) / seconds) / 10
|
53
|
+
end
|
54
|
+
rescue # This shouldn't fail is there's an error (or proc doesn't exist)
|
55
|
+
0
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# in seconds
|
61
|
+
def uptime
|
62
|
+
File.read(UptimePath).split[0].to_f
|
63
|
+
end
|
64
|
+
|
65
|
+
def stat
|
66
|
+
stats = {}
|
67
|
+
stats[:pid], stats[:comm], stats[:state], stats[:ppid], stats[:pgrp],
|
68
|
+
stats[:session], stats[:tty_nr], stats[:tpgid], stats[:flags],
|
69
|
+
stats[:minflt], stats[:cminflt], stats[:majflt], stats[:cmajflt],
|
70
|
+
stats[:utime], stats[:stime], stats[:cutime], stats[:cstime],
|
71
|
+
stats[:priority], stats[:nice], _, stats[:itrealvalue],
|
72
|
+
stats[:starttime], stats[:vsize], stats[:rss], stats[:rlim],
|
73
|
+
stats[:startcode], stats[:endcode], stats[:startstack], stats[:kstkesp],
|
74
|
+
stats[:kstkeip], stats[:signal], stats[:blocked], stats[:sigignore],
|
75
|
+
stats[:sigcatch], stats[:wchan], stats[:nswap], stats[:cnswap],
|
76
|
+
stats[:exit_signal], stats[:processor], stats[:rt_priority],
|
77
|
+
stats[:policy] = File.read("/proc/#{@pid}/stat").split
|
78
|
+
stats
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/god/task.rb
ADDED
@@ -0,0 +1,487 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Task
|
4
|
+
attr_accessor :name, :interval, :group, :valid_states, :initial_state, :driver
|
5
|
+
|
6
|
+
attr_writer :autostart
|
7
|
+
def autostart?; @autostart; end
|
8
|
+
|
9
|
+
# api
|
10
|
+
attr_accessor :state, :behaviors, :metrics, :directory
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@autostart ||= true
|
14
|
+
|
15
|
+
# initial state is unmonitored
|
16
|
+
self.state = :unmonitored
|
17
|
+
|
18
|
+
# the list of behaviors
|
19
|
+
self.behaviors = []
|
20
|
+
|
21
|
+
# the list of conditions for each action
|
22
|
+
self.metrics = {nil => [], :unmonitored => [], :stop => []}
|
23
|
+
|
24
|
+
# the condition -> metric lookup
|
25
|
+
self.directory = {}
|
26
|
+
|
27
|
+
# driver
|
28
|
+
self.driver = Driver.new(self)
|
29
|
+
end
|
30
|
+
|
31
|
+
def prepare
|
32
|
+
self.valid_states.each do |state|
|
33
|
+
self.metrics[state] ||= []
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid?
|
38
|
+
valid = true
|
39
|
+
|
40
|
+
# a name must be specified
|
41
|
+
if self.name.nil?
|
42
|
+
valid = false
|
43
|
+
applog(self, :error, "No name was specified")
|
44
|
+
end
|
45
|
+
|
46
|
+
# valid_states must be specified
|
47
|
+
if self.valid_states.nil?
|
48
|
+
valid = false
|
49
|
+
applog(self, :error, "No valid_states array was specified")
|
50
|
+
end
|
51
|
+
|
52
|
+
# valid_states must be specified
|
53
|
+
if self.initial_state.nil?
|
54
|
+
valid = false
|
55
|
+
applog(self, :error, "No initial_state was specified")
|
56
|
+
end
|
57
|
+
|
58
|
+
valid
|
59
|
+
end
|
60
|
+
|
61
|
+
###########################################################################
|
62
|
+
#
|
63
|
+
# Advanced mode
|
64
|
+
#
|
65
|
+
###########################################################################
|
66
|
+
|
67
|
+
def canonical_hash_form(to)
|
68
|
+
to.instance_of?(Symbol) ? {true => to} : to
|
69
|
+
end
|
70
|
+
|
71
|
+
# Define a transition handler which consists of a set of conditions
|
72
|
+
def transition(start_states, end_states)
|
73
|
+
# convert end_states into canonical hash form
|
74
|
+
canonical_end_states = canonical_hash_form(end_states)
|
75
|
+
|
76
|
+
Array(start_states).each do |start_state|
|
77
|
+
# validate start state
|
78
|
+
unless self.valid_states.include?(start_state)
|
79
|
+
abort "Invalid state :#{start_state}. Must be one of the symbols #{self.valid_states.map{|x| ":#{x}"}.join(', ')}"
|
80
|
+
end
|
81
|
+
|
82
|
+
# create a new metric to hold the watch, end states, and conditions
|
83
|
+
m = Metric.new(self, canonical_end_states)
|
84
|
+
|
85
|
+
if block_given?
|
86
|
+
# let the config file define some conditions on the metric
|
87
|
+
yield(m)
|
88
|
+
else
|
89
|
+
# add an :always condition if no block
|
90
|
+
m.condition(:always) do |c|
|
91
|
+
c.what = true
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# populate the condition -> metric directory
|
96
|
+
m.conditions.each do |c|
|
97
|
+
self.directory[c] = m
|
98
|
+
end
|
99
|
+
|
100
|
+
# record the metric
|
101
|
+
self.metrics[start_state] ||= []
|
102
|
+
self.metrics[start_state] << m
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def lifecycle
|
107
|
+
# create a new metric to hold the watch and conditions
|
108
|
+
m = Metric.new(self)
|
109
|
+
|
110
|
+
# let the config file define some conditions on the metric
|
111
|
+
yield(m)
|
112
|
+
|
113
|
+
# populate the condition -> metric directory
|
114
|
+
m.conditions.each do |c|
|
115
|
+
self.directory[c] = m
|
116
|
+
end
|
117
|
+
|
118
|
+
# record the metric
|
119
|
+
self.metrics[nil] << m
|
120
|
+
end
|
121
|
+
|
122
|
+
###########################################################################
|
123
|
+
#
|
124
|
+
# Lifecycle
|
125
|
+
#
|
126
|
+
###########################################################################
|
127
|
+
|
128
|
+
# Enable monitoring
|
129
|
+
#
|
130
|
+
# Returns nothing
|
131
|
+
def monitor
|
132
|
+
self.move(self.initial_state)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Disable monitoring
|
136
|
+
#
|
137
|
+
# Returns nothing
|
138
|
+
def unmonitor
|
139
|
+
self.move(:unmonitored)
|
140
|
+
end
|
141
|
+
|
142
|
+
# Move to the givent state
|
143
|
+
# +to_state+ is the Symbol representing the state to move to
|
144
|
+
#
|
145
|
+
# Returns Task (self)
|
146
|
+
def move(to_state)
|
147
|
+
if Thread.current != self.driver.thread
|
148
|
+
# called from outside Driver
|
149
|
+
|
150
|
+
# send an async message to Driver
|
151
|
+
self.driver.message(:move, [to_state])
|
152
|
+
else
|
153
|
+
# called from within Driver
|
154
|
+
|
155
|
+
# record original info
|
156
|
+
orig_to_state = to_state
|
157
|
+
from_state = self.state
|
158
|
+
|
159
|
+
# log
|
160
|
+
msg = "#{self.name} move '#{from_state}' to '#{to_state}'"
|
161
|
+
applog(self, :info, msg)
|
162
|
+
|
163
|
+
# cleanup from current state
|
164
|
+
self.driver.clear_events
|
165
|
+
self.metrics[from_state].each { |m| m.disable }
|
166
|
+
if to_state == :unmonitored
|
167
|
+
self.metrics[nil].each { |m| m.disable }
|
168
|
+
end
|
169
|
+
|
170
|
+
# perform action
|
171
|
+
self.action(to_state)
|
172
|
+
|
173
|
+
# enable simple mode
|
174
|
+
if [:start, :restart].include?(to_state) && self.metrics[to_state].empty?
|
175
|
+
to_state = :up
|
176
|
+
end
|
177
|
+
|
178
|
+
# move to new state
|
179
|
+
self.metrics[to_state].each { |m| m.enable }
|
180
|
+
|
181
|
+
# if no from state, enable lifecycle metric
|
182
|
+
if from_state == :unmonitored
|
183
|
+
self.metrics[nil].each { |m| m.enable }
|
184
|
+
end
|
185
|
+
|
186
|
+
# set state
|
187
|
+
self.state = to_state
|
188
|
+
|
189
|
+
# broadcast to interested TriggerConditions
|
190
|
+
Trigger.broadcast(self, :state_change, [from_state, orig_to_state])
|
191
|
+
|
192
|
+
# log
|
193
|
+
msg = "#{self.name} moved '#{from_state}' to '#{to_state}'"
|
194
|
+
applog(self, :info, msg)
|
195
|
+
end
|
196
|
+
|
197
|
+
self
|
198
|
+
end
|
199
|
+
|
200
|
+
# Notify the Driver that an EventCondition has triggered
|
201
|
+
#
|
202
|
+
# Returns nothing
|
203
|
+
def trigger(condition)
|
204
|
+
self.driver.message(:handle_event, [condition])
|
205
|
+
end
|
206
|
+
|
207
|
+
###########################################################################
|
208
|
+
#
|
209
|
+
# Actions
|
210
|
+
#
|
211
|
+
###########################################################################
|
212
|
+
|
213
|
+
def method_missing(sym, *args)
|
214
|
+
unless (sym.to_s =~ /=$/)
|
215
|
+
super
|
216
|
+
end
|
217
|
+
|
218
|
+
base = sym.to_s.chop.intern
|
219
|
+
|
220
|
+
unless self.valid_states.include?(base)
|
221
|
+
super
|
222
|
+
end
|
223
|
+
|
224
|
+
self.class.send(:attr_accessor, base)
|
225
|
+
self.send(sym, *args)
|
226
|
+
end
|
227
|
+
|
228
|
+
# Perform the given action
|
229
|
+
# +a+ is the action Symbol
|
230
|
+
# +c+ is the Condition
|
231
|
+
#
|
232
|
+
# Returns Task (self)
|
233
|
+
def action(a, c = nil)
|
234
|
+
if Thread.current != self.driver.thread
|
235
|
+
# called from outside Driver
|
236
|
+
|
237
|
+
# send an async message to Driver
|
238
|
+
self.driver.message(:action, [a, c])
|
239
|
+
else
|
240
|
+
# called from within Driver
|
241
|
+
|
242
|
+
if self.respond_to?(a)
|
243
|
+
command = self.send(a)
|
244
|
+
|
245
|
+
case command
|
246
|
+
when String
|
247
|
+
msg = "#{self.name} #{a}: #{command}"
|
248
|
+
applog(self, :info, msg)
|
249
|
+
|
250
|
+
system(command)
|
251
|
+
when Proc
|
252
|
+
msg = "#{self.name} #{a}: lambda"
|
253
|
+
applog(self, :info, msg)
|
254
|
+
|
255
|
+
command.call
|
256
|
+
else
|
257
|
+
raise NotImplementedError
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
###########################################################################
|
264
|
+
#
|
265
|
+
# Events
|
266
|
+
#
|
267
|
+
###########################################################################
|
268
|
+
|
269
|
+
def attach(condition)
|
270
|
+
case condition
|
271
|
+
when PollCondition
|
272
|
+
self.driver.schedule(condition, 0)
|
273
|
+
when EventCondition, TriggerCondition
|
274
|
+
condition.register
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def detach(condition)
|
279
|
+
case condition
|
280
|
+
when PollCondition
|
281
|
+
condition.reset
|
282
|
+
when EventCondition, TriggerCondition
|
283
|
+
condition.deregister
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
###########################################################################
|
288
|
+
#
|
289
|
+
# Registration
|
290
|
+
#
|
291
|
+
###########################################################################
|
292
|
+
|
293
|
+
def register!
|
294
|
+
# override if necessary
|
295
|
+
end
|
296
|
+
|
297
|
+
def unregister!
|
298
|
+
# override if necessary
|
299
|
+
end
|
300
|
+
|
301
|
+
###########################################################################
|
302
|
+
#
|
303
|
+
# Handlers
|
304
|
+
#
|
305
|
+
###########################################################################
|
306
|
+
|
307
|
+
# Evaluate and handle the given poll condition. Handles logging
|
308
|
+
# notifications, and moving to the new state if necessary
|
309
|
+
# +condition+ is the Condition to handle
|
310
|
+
#
|
311
|
+
# Returns nothing
|
312
|
+
def handle_poll(condition)
|
313
|
+
# lookup metric
|
314
|
+
metric = self.directory[condition]
|
315
|
+
|
316
|
+
# run the test
|
317
|
+
result = condition.test
|
318
|
+
|
319
|
+
# log
|
320
|
+
messages = self.log_line(self, metric, condition, result)
|
321
|
+
|
322
|
+
# notify
|
323
|
+
if condition.notify && self.trigger?(metric, result)
|
324
|
+
self.notify(condition, messages.last)
|
325
|
+
end
|
326
|
+
|
327
|
+
# after-condition
|
328
|
+
condition.after
|
329
|
+
|
330
|
+
# get the destination
|
331
|
+
dest =
|
332
|
+
if result && condition.transition
|
333
|
+
# condition override
|
334
|
+
condition.transition
|
335
|
+
else
|
336
|
+
# regular
|
337
|
+
metric.destination && metric.destination[result]
|
338
|
+
end
|
339
|
+
|
340
|
+
# transition or reschedule
|
341
|
+
if dest
|
342
|
+
# transition
|
343
|
+
begin
|
344
|
+
self.move(dest)
|
345
|
+
rescue EventRegistrationFailedError
|
346
|
+
msg = self.name + ' Event registration failed, moving back to previous state'
|
347
|
+
applog(self, :info, msg)
|
348
|
+
|
349
|
+
dest = self.state
|
350
|
+
retry
|
351
|
+
end
|
352
|
+
else
|
353
|
+
# reschedule
|
354
|
+
self.driver.schedule(condition)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
# Asynchronously evaluate and handle the given event condition. Handles logging
|
359
|
+
# notifications, and moving to the new state if necessary
|
360
|
+
# +condition+ is the Condition to handle
|
361
|
+
#
|
362
|
+
# Returns nothing
|
363
|
+
def handle_event(condition)
|
364
|
+
# lookup metric
|
365
|
+
metric = self.directory[condition]
|
366
|
+
|
367
|
+
# log
|
368
|
+
messages = self.log_line(self, metric, condition, true)
|
369
|
+
|
370
|
+
# notify
|
371
|
+
if condition.notify && self.trigger?(metric, true)
|
372
|
+
self.notify(condition, messages.last)
|
373
|
+
end
|
374
|
+
|
375
|
+
# get the destination
|
376
|
+
dest =
|
377
|
+
if condition.transition
|
378
|
+
# condition override
|
379
|
+
condition.transition
|
380
|
+
else
|
381
|
+
# regular
|
382
|
+
metric.destination && metric.destination[true]
|
383
|
+
end
|
384
|
+
|
385
|
+
if dest
|
386
|
+
self.move(dest)
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
# Determine whether a trigger happened
|
391
|
+
# +metric+ is the Metric
|
392
|
+
# +result+ is the result from the condition's test
|
393
|
+
#
|
394
|
+
# Returns Boolean
|
395
|
+
def trigger?(metric, result)
|
396
|
+
metric.destination && metric.destination[result]
|
397
|
+
end
|
398
|
+
|
399
|
+
# Log info about the condition and return the list of messages logged
|
400
|
+
# +watch+ is the Watch
|
401
|
+
# +metric+ is the Metric
|
402
|
+
# +condition+ is the Condition
|
403
|
+
# +result+ is the Boolean result of the condition test evaluation
|
404
|
+
#
|
405
|
+
# Returns String[]
|
406
|
+
def log_line(watch, metric, condition, result)
|
407
|
+
status =
|
408
|
+
if self.trigger?(metric, result)
|
409
|
+
"[trigger]"
|
410
|
+
else
|
411
|
+
"[ok]"
|
412
|
+
end
|
413
|
+
|
414
|
+
messages = []
|
415
|
+
|
416
|
+
# log info if available
|
417
|
+
if condition.info
|
418
|
+
Array(condition.info).each do |condition_info|
|
419
|
+
messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
|
420
|
+
applog(watch, :info, messages.last)
|
421
|
+
end
|
422
|
+
else
|
423
|
+
messages << "#{watch.name} #{status} (#{condition.base_name})"
|
424
|
+
applog(watch, :info, messages.last)
|
425
|
+
end
|
426
|
+
|
427
|
+
# log
|
428
|
+
debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
|
429
|
+
applog(watch, :debug, debug_message)
|
430
|
+
|
431
|
+
messages
|
432
|
+
end
|
433
|
+
|
434
|
+
# Format the destination specification for use in debug logging
|
435
|
+
# +metric+ is the Metric
|
436
|
+
# +condition+ is the Condition
|
437
|
+
#
|
438
|
+
# Returns String
|
439
|
+
def dest_desc(metric, condition)
|
440
|
+
if condition.transition
|
441
|
+
{true => condition.transition}.inspect
|
442
|
+
else
|
443
|
+
if metric.destination
|
444
|
+
metric.destination.inspect
|
445
|
+
else
|
446
|
+
'none'
|
447
|
+
end
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# Notify all recipeients of the given condition with the specified message
|
452
|
+
# +condition+ is the Condition
|
453
|
+
# +message+ is the String message to send
|
454
|
+
#
|
455
|
+
# Returns nothing
|
456
|
+
def notify(condition, message)
|
457
|
+
spec = Contact.normalize(condition.notify)
|
458
|
+
unmatched = []
|
459
|
+
|
460
|
+
# resolve contacts
|
461
|
+
resolved_contacts =
|
462
|
+
spec[:contacts].inject([]) do |acc, contact_name_or_group|
|
463
|
+
cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
|
464
|
+
unmatched << contact_name_or_group if cons.empty?
|
465
|
+
acc += cons
|
466
|
+
acc
|
467
|
+
end
|
468
|
+
|
469
|
+
# warn about unmatched contacts
|
470
|
+
unless unmatched.empty?
|
471
|
+
msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
|
472
|
+
applog(condition.watch, :warn, msg)
|
473
|
+
end
|
474
|
+
|
475
|
+
# notify each contact
|
476
|
+
resolved_contacts.each do |c|
|
477
|
+
host = `hostname`.chomp rescue 'none'
|
478
|
+
c.notify(message, Time.now, spec[:priority], spec[:category], host)
|
479
|
+
|
480
|
+
msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
|
481
|
+
|
482
|
+
applog(condition.watch, :info, msg % [])
|
483
|
+
end
|
484
|
+
end
|
485
|
+
end
|
486
|
+
|
487
|
+
end
|