dosire-god 0.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +261 -0
- data/Manifest.txt +107 -0
- data/README.txt +59 -0
- data/Rakefile +35 -0
- data/bin/god +127 -0
- data/examples/events.god +84 -0
- data/examples/gravatar.god +54 -0
- data/examples/single.god +66 -0
- data/ext/god/extconf.rb +55 -0
- data/ext/god/kqueue_handler.c +123 -0
- data/ext/god/netlink_handler.c +167 -0
- data/init/god +42 -0
- data/lib/god/behavior.rb +52 -0
- data/lib/god/behaviors/clean_pid_file.rb +21 -0
- data/lib/god/behaviors/clean_unix_socket.rb +21 -0
- data/lib/god/behaviors/notify_when_flapping.rb +51 -0
- data/lib/god/cli/command.rb +206 -0
- data/lib/god/cli/run.rb +177 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/condition.rb +96 -0
- data/lib/god/conditions/always.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +80 -0
- data/lib/god/conditions/degrading_lambda.rb +52 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +128 -0
- data/lib/god/conditions/http_response_code.rb +168 -0
- data/lib/god/conditions/lambda.rb +25 -0
- data/lib/god/conditions/memory_usage.rb +82 -0
- data/lib/god/conditions/process_exits.rb +72 -0
- data/lib/god/conditions/process_running.rb +74 -0
- data/lib/god/conditions/tries.rb +44 -0
- data/lib/god/configurable.rb +57 -0
- data/lib/god/contact.rb +106 -0
- data/lib/god/contacts/email.rb +95 -0
- data/lib/god/dependency_graph.rb +41 -0
- data/lib/god/diagnostics.rb +37 -0
- data/lib/god/driver.rb +206 -0
- data/lib/god/errors.rb +24 -0
- data/lib/god/event_handler.rb +111 -0
- data/lib/god/event_handlers/dummy_handler.rb +13 -0
- data/lib/god/event_handlers/kqueue_handler.rb +17 -0
- data/lib/god/event_handlers/netlink_handler.rb +13 -0
- data/lib/god/logger.rb +120 -0
- data/lib/god/metric.rb +59 -0
- data/lib/god/process.rb +327 -0
- data/lib/god/registry.rb +32 -0
- data/lib/god/simple_logger.rb +53 -0
- data/lib/god/socket.rb +96 -0
- data/lib/god/sugar.rb +47 -0
- data/lib/god/system/portable_poller.rb +42 -0
- data/lib/god/system/process.rb +42 -0
- data/lib/god/system/slash_proc_poller.rb +82 -0
- data/lib/god/task.rb +487 -0
- data/lib/god/timeline.rb +25 -0
- data/lib/god/trigger.rb +43 -0
- data/lib/god/watch.rb +183 -0
- data/lib/god.rb +644 -0
- data/test/configs/child_events/child_events.god +44 -0
- data/test/configs/child_events/simple_server.rb +3 -0
- data/test/configs/child_polls/child_polls.god +37 -0
- data/test/configs/child_polls/simple_server.rb +12 -0
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/configs/contact/contact.god +74 -0
- data/test/configs/contact/simple_server.rb +3 -0
- data/test/configs/daemon_events/daemon_events.god +37 -0
- data/test/configs/daemon_events/simple_server.rb +8 -0
- data/test/configs/daemon_events/simple_server_stop.rb +11 -0
- data/test/configs/daemon_polls/daemon_polls.god +17 -0
- data/test/configs/daemon_polls/simple_server.rb +6 -0
- data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
- data/test/configs/degrading_lambda/tcp_server.rb +15 -0
- data/test/configs/matias/matias.god +50 -0
- data/test/configs/real.rb +59 -0
- data/test/configs/running_load/running_load.god +16 -0
- data/test/configs/stress/simple_server.rb +3 -0
- data/test/configs/stress/stress.god +15 -0
- data/test/configs/task/logs/.placeholder +0 -0
- data/test/configs/task/task.god +26 -0
- data/test/configs/test.rb +61 -0
- data/test/helper.rb +151 -0
- data/test/suite.rb +6 -0
- data/test/test_behavior.rb +21 -0
- data/test/test_condition.rb +50 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +109 -0
- data/test/test_conditions_process_running.rb +44 -0
- data/test/test_conditions_tries.rb +67 -0
- data/test/test_contact.rb +109 -0
- data/test/test_dependency_graph.rb +62 -0
- data/test/test_driver.rb +11 -0
- data/test/test_event_handler.rb +80 -0
- data/test/test_god.rb +598 -0
- data/test/test_handlers_kqueue_handler.rb +16 -0
- data/test/test_logger.rb +63 -0
- data/test/test_metric.rb +72 -0
- data/test/test_process.rb +246 -0
- data/test/test_registry.rb +15 -0
- data/test/test_socket.rb +42 -0
- data/test/test_sugar.rb +42 -0
- data/test/test_system_portable_poller.rb +17 -0
- data/test/test_system_process.rb +30 -0
- data/test/test_task.rb +262 -0
- data/test/test_timeline.rb +37 -0
- data/test/test_trigger.rb +59 -0
- data/test/test_watch.rb +279 -0
- metadata +186 -0
data/lib/god/sugar.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
class Numeric
|
2
|
+
def seconds
|
3
|
+
self
|
4
|
+
end
|
5
|
+
|
6
|
+
alias :second :seconds
|
7
|
+
|
8
|
+
def minutes
|
9
|
+
self * 60
|
10
|
+
end
|
11
|
+
|
12
|
+
alias :minute :minutes
|
13
|
+
|
14
|
+
def hours
|
15
|
+
self * 3600
|
16
|
+
end
|
17
|
+
|
18
|
+
alias :hour :hours
|
19
|
+
|
20
|
+
def days
|
21
|
+
self * 86400
|
22
|
+
end
|
23
|
+
|
24
|
+
alias :day :days
|
25
|
+
|
26
|
+
def kilobytes
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
alias :kilobyte :kilobytes
|
31
|
+
|
32
|
+
def megabytes
|
33
|
+
self * 1024
|
34
|
+
end
|
35
|
+
|
36
|
+
alias :megabyte :megabytes
|
37
|
+
|
38
|
+
def gigabytes
|
39
|
+
self * (1024 ** 2)
|
40
|
+
end
|
41
|
+
|
42
|
+
alias :gigabyte :gigabytes
|
43
|
+
|
44
|
+
def percent
|
45
|
+
self
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module God
|
2
|
+
module System
|
3
|
+
class PortablePoller
|
4
|
+
def initialize(pid)
|
5
|
+
@pid = pid
|
6
|
+
end
|
7
|
+
# Memory usage in kilobytes (resident set size)
|
8
|
+
def memory
|
9
|
+
ps_int('rss')
|
10
|
+
end
|
11
|
+
|
12
|
+
# Percentage memory usage
|
13
|
+
def percent_memory
|
14
|
+
ps_float('%mem')
|
15
|
+
end
|
16
|
+
|
17
|
+
# Percentage CPU usage
|
18
|
+
def percent_cpu
|
19
|
+
ps_float('%cpu')
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def ps_int(keyword)
|
25
|
+
`ps -o #{keyword}= -p #{@pid}`.to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
def ps_float(keyword)
|
29
|
+
`ps -o #{keyword}= -p #{@pid}`.to_f
|
30
|
+
end
|
31
|
+
|
32
|
+
def ps_string(keyword)
|
33
|
+
`ps -o #{keyword}= -p #{@pid}`.strip
|
34
|
+
end
|
35
|
+
|
36
|
+
def time_string_to_seconds(text)
|
37
|
+
_, minutes, seconds, useconds = *text.match(/(\d+):(\d{2}).(\d{2})/)
|
38
|
+
(minutes.to_i * 60) + seconds.to_i
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module God
|
2
|
+
module System
|
3
|
+
|
4
|
+
class Process
|
5
|
+
def initialize(pid)
|
6
|
+
@pid = pid.to_i
|
7
|
+
@poller = fetch_system_poller.new(@pid)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Return true if this process is running, false otherwise
|
11
|
+
def exists?
|
12
|
+
!!::Process.kill(0, @pid) rescue false
|
13
|
+
end
|
14
|
+
|
15
|
+
# Memory usage in kilobytes (resident set size)
|
16
|
+
def memory
|
17
|
+
@poller.memory
|
18
|
+
end
|
19
|
+
|
20
|
+
# Percentage memory usage
|
21
|
+
def percent_memory
|
22
|
+
@poller.percent_memory
|
23
|
+
end
|
24
|
+
|
25
|
+
# Percentage CPU usage
|
26
|
+
def percent_cpu
|
27
|
+
@poller.percent_cpu
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def fetch_system_poller
|
33
|
+
if SlashProcPoller.usable?
|
34
|
+
SlashProcPoller
|
35
|
+
else
|
36
|
+
PortablePoller
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module God
|
2
|
+
module System
|
3
|
+
class SlashProcPoller < PortablePoller
|
4
|
+
@@kb_per_page = 4 # TODO: Need to make this portable
|
5
|
+
@@hertz = 100
|
6
|
+
@@total_mem = nil
|
7
|
+
|
8
|
+
MeminfoPath = '/proc/meminfo'
|
9
|
+
UptimePath = '/proc/uptime'
|
10
|
+
|
11
|
+
RequiredPaths = [MeminfoPath, UptimePath]
|
12
|
+
|
13
|
+
# FreeBSD has /proc by default, but nothing mounted there!
|
14
|
+
# So we should check for the actual required paths!
|
15
|
+
# Returns true if +RequiredPaths+ are readable.
|
16
|
+
def self.usable?
|
17
|
+
RequiredPaths.all? do |path|
|
18
|
+
test(?r, path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(pid)
|
23
|
+
super(pid)
|
24
|
+
|
25
|
+
unless @@total_mem # in K
|
26
|
+
File.open(MeminfoPath) do |f|
|
27
|
+
@@total_mem = f.gets.split[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def memory
|
33
|
+
stat[:rss].to_i * @@kb_per_page
|
34
|
+
rescue # This shouldn't fail is there's an error (or proc doesn't exist)
|
35
|
+
0
|
36
|
+
end
|
37
|
+
|
38
|
+
def percent_memory
|
39
|
+
(memory / @@total_mem.to_f) * 100
|
40
|
+
rescue # This shouldn't fail is there's an error (or proc doesn't exist)
|
41
|
+
0
|
42
|
+
end
|
43
|
+
|
44
|
+
# TODO: Change this to calculate the wma instead
|
45
|
+
def percent_cpu
|
46
|
+
stats = stat
|
47
|
+
total_time = stats[:utime].to_i + stats[:stime].to_i # in jiffies
|
48
|
+
seconds = uptime - stats[:starttime].to_i / @@hertz
|
49
|
+
if seconds == 0
|
50
|
+
0
|
51
|
+
else
|
52
|
+
((total_time * 1000 / @@hertz) / seconds) / 10
|
53
|
+
end
|
54
|
+
rescue # This shouldn't fail is there's an error (or proc doesn't exist)
|
55
|
+
0
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# in seconds
|
61
|
+
def uptime
|
62
|
+
File.read(UptimePath).split[0].to_f
|
63
|
+
end
|
64
|
+
|
65
|
+
def stat
|
66
|
+
stats = {}
|
67
|
+
stats[:pid], stats[:comm], stats[:state], stats[:ppid], stats[:pgrp],
|
68
|
+
stats[:session], stats[:tty_nr], stats[:tpgid], stats[:flags],
|
69
|
+
stats[:minflt], stats[:cminflt], stats[:majflt], stats[:cmajflt],
|
70
|
+
stats[:utime], stats[:stime], stats[:cutime], stats[:cstime],
|
71
|
+
stats[:priority], stats[:nice], _, stats[:itrealvalue],
|
72
|
+
stats[:starttime], stats[:vsize], stats[:rss], stats[:rlim],
|
73
|
+
stats[:startcode], stats[:endcode], stats[:startstack], stats[:kstkesp],
|
74
|
+
stats[:kstkeip], stats[:signal], stats[:blocked], stats[:sigignore],
|
75
|
+
stats[:sigcatch], stats[:wchan], stats[:nswap], stats[:cnswap],
|
76
|
+
stats[:exit_signal], stats[:processor], stats[:rt_priority],
|
77
|
+
stats[:policy] = File.read("/proc/#{@pid}/stat").split
|
78
|
+
stats
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/god/task.rb
ADDED
@@ -0,0 +1,487 @@
|
|
1
|
+
module God
|
2
|
+
|
3
|
+
class Task
|
4
|
+
attr_accessor :name, :interval, :group, :valid_states, :initial_state, :driver
|
5
|
+
|
6
|
+
attr_writer :autostart
|
7
|
+
def autostart?; @autostart; end
|
8
|
+
|
9
|
+
# api
|
10
|
+
attr_accessor :state, :behaviors, :metrics, :directory
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@autostart ||= true
|
14
|
+
|
15
|
+
# initial state is unmonitored
|
16
|
+
self.state = :unmonitored
|
17
|
+
|
18
|
+
# the list of behaviors
|
19
|
+
self.behaviors = []
|
20
|
+
|
21
|
+
# the list of conditions for each action
|
22
|
+
self.metrics = {nil => [], :unmonitored => [], :stop => []}
|
23
|
+
|
24
|
+
# the condition -> metric lookup
|
25
|
+
self.directory = {}
|
26
|
+
|
27
|
+
# driver
|
28
|
+
self.driver = Driver.new(self)
|
29
|
+
end
|
30
|
+
|
31
|
+
def prepare
|
32
|
+
self.valid_states.each do |state|
|
33
|
+
self.metrics[state] ||= []
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid?
|
38
|
+
valid = true
|
39
|
+
|
40
|
+
# a name must be specified
|
41
|
+
if self.name.nil?
|
42
|
+
valid = false
|
43
|
+
applog(self, :error, "No name was specified")
|
44
|
+
end
|
45
|
+
|
46
|
+
# valid_states must be specified
|
47
|
+
if self.valid_states.nil?
|
48
|
+
valid = false
|
49
|
+
applog(self, :error, "No valid_states array was specified")
|
50
|
+
end
|
51
|
+
|
52
|
+
# valid_states must be specified
|
53
|
+
if self.initial_state.nil?
|
54
|
+
valid = false
|
55
|
+
applog(self, :error, "No initial_state was specified")
|
56
|
+
end
|
57
|
+
|
58
|
+
valid
|
59
|
+
end
|
60
|
+
|
61
|
+
###########################################################################
|
62
|
+
#
|
63
|
+
# Advanced mode
|
64
|
+
#
|
65
|
+
###########################################################################
|
66
|
+
|
67
|
+
def canonical_hash_form(to)
|
68
|
+
to.instance_of?(Symbol) ? {true => to} : to
|
69
|
+
end
|
70
|
+
|
71
|
+
# Define a transition handler which consists of a set of conditions
|
72
|
+
def transition(start_states, end_states)
|
73
|
+
# convert end_states into canonical hash form
|
74
|
+
canonical_end_states = canonical_hash_form(end_states)
|
75
|
+
|
76
|
+
Array(start_states).each do |start_state|
|
77
|
+
# validate start state
|
78
|
+
unless self.valid_states.include?(start_state)
|
79
|
+
abort "Invalid state :#{start_state}. Must be one of the symbols #{self.valid_states.map{|x| ":#{x}"}.join(', ')}"
|
80
|
+
end
|
81
|
+
|
82
|
+
# create a new metric to hold the watch, end states, and conditions
|
83
|
+
m = Metric.new(self, canonical_end_states)
|
84
|
+
|
85
|
+
if block_given?
|
86
|
+
# let the config file define some conditions on the metric
|
87
|
+
yield(m)
|
88
|
+
else
|
89
|
+
# add an :always condition if no block
|
90
|
+
m.condition(:always) do |c|
|
91
|
+
c.what = true
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# populate the condition -> metric directory
|
96
|
+
m.conditions.each do |c|
|
97
|
+
self.directory[c] = m
|
98
|
+
end
|
99
|
+
|
100
|
+
# record the metric
|
101
|
+
self.metrics[start_state] ||= []
|
102
|
+
self.metrics[start_state] << m
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def lifecycle
|
107
|
+
# create a new metric to hold the watch and conditions
|
108
|
+
m = Metric.new(self)
|
109
|
+
|
110
|
+
# let the config file define some conditions on the metric
|
111
|
+
yield(m)
|
112
|
+
|
113
|
+
# populate the condition -> metric directory
|
114
|
+
m.conditions.each do |c|
|
115
|
+
self.directory[c] = m
|
116
|
+
end
|
117
|
+
|
118
|
+
# record the metric
|
119
|
+
self.metrics[nil] << m
|
120
|
+
end
|
121
|
+
|
122
|
+
###########################################################################
|
123
|
+
#
|
124
|
+
# Lifecycle
|
125
|
+
#
|
126
|
+
###########################################################################
|
127
|
+
|
128
|
+
# Enable monitoring
|
129
|
+
#
|
130
|
+
# Returns nothing
|
131
|
+
def monitor
|
132
|
+
self.move(self.initial_state)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Disable monitoring
|
136
|
+
#
|
137
|
+
# Returns nothing
|
138
|
+
def unmonitor
|
139
|
+
self.move(:unmonitored)
|
140
|
+
end
|
141
|
+
|
142
|
+
# Move to the givent state
|
143
|
+
# +to_state+ is the Symbol representing the state to move to
|
144
|
+
#
|
145
|
+
# Returns Task (self)
|
146
|
+
def move(to_state)
|
147
|
+
if Thread.current != self.driver.thread
|
148
|
+
# called from outside Driver
|
149
|
+
|
150
|
+
# send an async message to Driver
|
151
|
+
self.driver.message(:move, [to_state])
|
152
|
+
else
|
153
|
+
# called from within Driver
|
154
|
+
|
155
|
+
# record original info
|
156
|
+
orig_to_state = to_state
|
157
|
+
from_state = self.state
|
158
|
+
|
159
|
+
# log
|
160
|
+
msg = "#{self.name} move '#{from_state}' to '#{to_state}'"
|
161
|
+
applog(self, :info, msg)
|
162
|
+
|
163
|
+
# cleanup from current state
|
164
|
+
self.driver.clear_events
|
165
|
+
self.metrics[from_state].each { |m| m.disable }
|
166
|
+
if to_state == :unmonitored
|
167
|
+
self.metrics[nil].each { |m| m.disable }
|
168
|
+
end
|
169
|
+
|
170
|
+
# perform action
|
171
|
+
self.action(to_state)
|
172
|
+
|
173
|
+
# enable simple mode
|
174
|
+
if [:start, :restart].include?(to_state) && self.metrics[to_state].empty?
|
175
|
+
to_state = :up
|
176
|
+
end
|
177
|
+
|
178
|
+
# move to new state
|
179
|
+
self.metrics[to_state].each { |m| m.enable }
|
180
|
+
|
181
|
+
# if no from state, enable lifecycle metric
|
182
|
+
if from_state == :unmonitored
|
183
|
+
self.metrics[nil].each { |m| m.enable }
|
184
|
+
end
|
185
|
+
|
186
|
+
# set state
|
187
|
+
self.state = to_state
|
188
|
+
|
189
|
+
# broadcast to interested TriggerConditions
|
190
|
+
Trigger.broadcast(self, :state_change, [from_state, orig_to_state])
|
191
|
+
|
192
|
+
# log
|
193
|
+
msg = "#{self.name} moved '#{from_state}' to '#{to_state}'"
|
194
|
+
applog(self, :info, msg)
|
195
|
+
end
|
196
|
+
|
197
|
+
self
|
198
|
+
end
|
199
|
+
|
200
|
+
# Notify the Driver that an EventCondition has triggered
|
201
|
+
#
|
202
|
+
# Returns nothing
|
203
|
+
def trigger(condition)
|
204
|
+
self.driver.message(:handle_event, [condition])
|
205
|
+
end
|
206
|
+
|
207
|
+
###########################################################################
|
208
|
+
#
|
209
|
+
# Actions
|
210
|
+
#
|
211
|
+
###########################################################################
|
212
|
+
|
213
|
+
def method_missing(sym, *args)
|
214
|
+
unless (sym.to_s =~ /=$/)
|
215
|
+
super
|
216
|
+
end
|
217
|
+
|
218
|
+
base = sym.to_s.chop.intern
|
219
|
+
|
220
|
+
unless self.valid_states.include?(base)
|
221
|
+
super
|
222
|
+
end
|
223
|
+
|
224
|
+
self.class.send(:attr_accessor, base)
|
225
|
+
self.send(sym, *args)
|
226
|
+
end
|
227
|
+
|
228
|
+
# Perform the given action
|
229
|
+
# +a+ is the action Symbol
|
230
|
+
# +c+ is the Condition
|
231
|
+
#
|
232
|
+
# Returns Task (self)
|
233
|
+
def action(a, c = nil)
|
234
|
+
if Thread.current != self.driver.thread
|
235
|
+
# called from outside Driver
|
236
|
+
|
237
|
+
# send an async message to Driver
|
238
|
+
self.driver.message(:action, [a, c])
|
239
|
+
else
|
240
|
+
# called from within Driver
|
241
|
+
|
242
|
+
if self.respond_to?(a)
|
243
|
+
command = self.send(a)
|
244
|
+
|
245
|
+
case command
|
246
|
+
when String
|
247
|
+
msg = "#{self.name} #{a}: #{command}"
|
248
|
+
applog(self, :info, msg)
|
249
|
+
|
250
|
+
system(command)
|
251
|
+
when Proc
|
252
|
+
msg = "#{self.name} #{a}: lambda"
|
253
|
+
applog(self, :info, msg)
|
254
|
+
|
255
|
+
command.call
|
256
|
+
else
|
257
|
+
raise NotImplementedError
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
###########################################################################
|
264
|
+
#
|
265
|
+
# Events
|
266
|
+
#
|
267
|
+
###########################################################################
|
268
|
+
|
269
|
+
def attach(condition)
|
270
|
+
case condition
|
271
|
+
when PollCondition
|
272
|
+
self.driver.schedule(condition, 0)
|
273
|
+
when EventCondition, TriggerCondition
|
274
|
+
condition.register
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def detach(condition)
|
279
|
+
case condition
|
280
|
+
when PollCondition
|
281
|
+
condition.reset
|
282
|
+
when EventCondition, TriggerCondition
|
283
|
+
condition.deregister
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
###########################################################################
|
288
|
+
#
|
289
|
+
# Registration
|
290
|
+
#
|
291
|
+
###########################################################################
|
292
|
+
|
293
|
+
def register!
|
294
|
+
# override if necessary
|
295
|
+
end
|
296
|
+
|
297
|
+
def unregister!
|
298
|
+
# override if necessary
|
299
|
+
end
|
300
|
+
|
301
|
+
###########################################################################
|
302
|
+
#
|
303
|
+
# Handlers
|
304
|
+
#
|
305
|
+
###########################################################################
|
306
|
+
|
307
|
+
# Evaluate and handle the given poll condition. Handles logging
|
308
|
+
# notifications, and moving to the new state if necessary
|
309
|
+
# +condition+ is the Condition to handle
|
310
|
+
#
|
311
|
+
# Returns nothing
|
312
|
+
def handle_poll(condition)
|
313
|
+
# lookup metric
|
314
|
+
metric = self.directory[condition]
|
315
|
+
|
316
|
+
# run the test
|
317
|
+
result = condition.test
|
318
|
+
|
319
|
+
# log
|
320
|
+
messages = self.log_line(self, metric, condition, result)
|
321
|
+
|
322
|
+
# notify
|
323
|
+
if condition.notify && self.trigger?(metric, result)
|
324
|
+
self.notify(condition, messages.last)
|
325
|
+
end
|
326
|
+
|
327
|
+
# after-condition
|
328
|
+
condition.after
|
329
|
+
|
330
|
+
# get the destination
|
331
|
+
dest =
|
332
|
+
if result && condition.transition
|
333
|
+
# condition override
|
334
|
+
condition.transition
|
335
|
+
else
|
336
|
+
# regular
|
337
|
+
metric.destination && metric.destination[result]
|
338
|
+
end
|
339
|
+
|
340
|
+
# transition or reschedule
|
341
|
+
if dest
|
342
|
+
# transition
|
343
|
+
begin
|
344
|
+
self.move(dest)
|
345
|
+
rescue EventRegistrationFailedError
|
346
|
+
msg = self.name + ' Event registration failed, moving back to previous state'
|
347
|
+
applog(self, :info, msg)
|
348
|
+
|
349
|
+
dest = self.state
|
350
|
+
retry
|
351
|
+
end
|
352
|
+
else
|
353
|
+
# reschedule
|
354
|
+
self.driver.schedule(condition)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
# Asynchronously evaluate and handle the given event condition. Handles logging
|
359
|
+
# notifications, and moving to the new state if necessary
|
360
|
+
# +condition+ is the Condition to handle
|
361
|
+
#
|
362
|
+
# Returns nothing
|
363
|
+
def handle_event(condition)
|
364
|
+
# lookup metric
|
365
|
+
metric = self.directory[condition]
|
366
|
+
|
367
|
+
# log
|
368
|
+
messages = self.log_line(self, metric, condition, true)
|
369
|
+
|
370
|
+
# notify
|
371
|
+
if condition.notify && self.trigger?(metric, true)
|
372
|
+
self.notify(condition, messages.last)
|
373
|
+
end
|
374
|
+
|
375
|
+
# get the destination
|
376
|
+
dest =
|
377
|
+
if condition.transition
|
378
|
+
# condition override
|
379
|
+
condition.transition
|
380
|
+
else
|
381
|
+
# regular
|
382
|
+
metric.destination && metric.destination[true]
|
383
|
+
end
|
384
|
+
|
385
|
+
if dest
|
386
|
+
self.move(dest)
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
# Determine whether a trigger happened
|
391
|
+
# +metric+ is the Metric
|
392
|
+
# +result+ is the result from the condition's test
|
393
|
+
#
|
394
|
+
# Returns Boolean
|
395
|
+
def trigger?(metric, result)
|
396
|
+
metric.destination && metric.destination[result]
|
397
|
+
end
|
398
|
+
|
399
|
+
# Log info about the condition and return the list of messages logged
|
400
|
+
# +watch+ is the Watch
|
401
|
+
# +metric+ is the Metric
|
402
|
+
# +condition+ is the Condition
|
403
|
+
# +result+ is the Boolean result of the condition test evaluation
|
404
|
+
#
|
405
|
+
# Returns String[]
|
406
|
+
def log_line(watch, metric, condition, result)
|
407
|
+
status =
|
408
|
+
if self.trigger?(metric, result)
|
409
|
+
"[trigger]"
|
410
|
+
else
|
411
|
+
"[ok]"
|
412
|
+
end
|
413
|
+
|
414
|
+
messages = []
|
415
|
+
|
416
|
+
# log info if available
|
417
|
+
if condition.info
|
418
|
+
Array(condition.info).each do |condition_info|
|
419
|
+
messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
|
420
|
+
applog(watch, :info, messages.last)
|
421
|
+
end
|
422
|
+
else
|
423
|
+
messages << "#{watch.name} #{status} (#{condition.base_name})"
|
424
|
+
applog(watch, :info, messages.last)
|
425
|
+
end
|
426
|
+
|
427
|
+
# log
|
428
|
+
debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
|
429
|
+
applog(watch, :debug, debug_message)
|
430
|
+
|
431
|
+
messages
|
432
|
+
end
|
433
|
+
|
434
|
+
# Format the destination specification for use in debug logging
|
435
|
+
# +metric+ is the Metric
|
436
|
+
# +condition+ is the Condition
|
437
|
+
#
|
438
|
+
# Returns String
|
439
|
+
def dest_desc(metric, condition)
|
440
|
+
if condition.transition
|
441
|
+
{true => condition.transition}.inspect
|
442
|
+
else
|
443
|
+
if metric.destination
|
444
|
+
metric.destination.inspect
|
445
|
+
else
|
446
|
+
'none'
|
447
|
+
end
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# Notify all recipeients of the given condition with the specified message
|
452
|
+
# +condition+ is the Condition
|
453
|
+
# +message+ is the String message to send
|
454
|
+
#
|
455
|
+
# Returns nothing
|
456
|
+
def notify(condition, message)
|
457
|
+
spec = Contact.normalize(condition.notify)
|
458
|
+
unmatched = []
|
459
|
+
|
460
|
+
# resolve contacts
|
461
|
+
resolved_contacts =
|
462
|
+
spec[:contacts].inject([]) do |acc, contact_name_or_group|
|
463
|
+
cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
|
464
|
+
unmatched << contact_name_or_group if cons.empty?
|
465
|
+
acc += cons
|
466
|
+
acc
|
467
|
+
end
|
468
|
+
|
469
|
+
# warn about unmatched contacts
|
470
|
+
unless unmatched.empty?
|
471
|
+
msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
|
472
|
+
applog(condition.watch, :warn, msg)
|
473
|
+
end
|
474
|
+
|
475
|
+
# notify each contact
|
476
|
+
resolved_contacts.each do |c|
|
477
|
+
host = `hostname`.chomp rescue 'none'
|
478
|
+
c.notify(message, Time.now, spec[:priority], spec[:category], host)
|
479
|
+
|
480
|
+
msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
|
481
|
+
|
482
|
+
applog(condition.watch, :info, msg % [])
|
483
|
+
end
|
484
|
+
end
|
485
|
+
end
|
486
|
+
|
487
|
+
end
|