samhendley-god 0.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/History.txt +293 -0
  2. data/Manifest.txt +114 -0
  3. data/README.txt +60 -0
  4. data/Rakefile +35 -0
  5. data/bin/god +128 -0
  6. data/examples/events.god +84 -0
  7. data/examples/gravatar.god +54 -0
  8. data/examples/single.god +66 -0
  9. data/ext/god/extconf.rb +55 -0
  10. data/ext/god/kqueue_handler.c +123 -0
  11. data/ext/god/netlink_handler.c +167 -0
  12. data/init/god +42 -0
  13. data/lib/god.rb +667 -0
  14. data/lib/god/behavior.rb +52 -0
  15. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  16. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  17. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  18. data/lib/god/cli/command.rb +229 -0
  19. data/lib/god/cli/run.rb +176 -0
  20. data/lib/god/cli/version.rb +23 -0
  21. data/lib/god/condition.rb +96 -0
  22. data/lib/god/conditions/always.rb +23 -0
  23. data/lib/god/conditions/complex.rb +86 -0
  24. data/lib/god/conditions/cpu_usage.rb +80 -0
  25. data/lib/god/conditions/degrading_lambda.rb +52 -0
  26. data/lib/god/conditions/disk_usage.rb +27 -0
  27. data/lib/god/conditions/file_mtime.rb +28 -0
  28. data/lib/god/conditions/flapping.rb +128 -0
  29. data/lib/god/conditions/http_response_code.rb +168 -0
  30. data/lib/god/conditions/lambda.rb +25 -0
  31. data/lib/god/conditions/memory_usage.rb +82 -0
  32. data/lib/god/conditions/process_exits.rb +72 -0
  33. data/lib/god/conditions/process_running.rb +74 -0
  34. data/lib/god/conditions/tries.rb +44 -0
  35. data/lib/god/configurable.rb +57 -0
  36. data/lib/god/contact.rb +106 -0
  37. data/lib/god/contacts/campfire.rb +82 -0
  38. data/lib/god/contacts/email.rb +95 -0
  39. data/lib/god/contacts/jabber.rb +65 -0
  40. data/lib/god/contacts/twitter.rb +39 -0
  41. data/lib/god/contacts/webhook.rb +47 -0
  42. data/lib/god/dependency_graph.rb +41 -0
  43. data/lib/god/diagnostics.rb +37 -0
  44. data/lib/god/driver.rb +206 -0
  45. data/lib/god/errors.rb +24 -0
  46. data/lib/god/event_handler.rb +111 -0
  47. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  48. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  49. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  50. data/lib/god/logger.rb +120 -0
  51. data/lib/god/metric.rb +59 -0
  52. data/lib/god/process.rb +342 -0
  53. data/lib/god/registry.rb +32 -0
  54. data/lib/god/simple_logger.rb +53 -0
  55. data/lib/god/socket.rb +96 -0
  56. data/lib/god/sugar.rb +47 -0
  57. data/lib/god/system/portable_poller.rb +42 -0
  58. data/lib/god/system/process.rb +42 -0
  59. data/lib/god/system/slash_proc_poller.rb +92 -0
  60. data/lib/god/task.rb +491 -0
  61. data/lib/god/timeline.rb +25 -0
  62. data/lib/god/trigger.rb +43 -0
  63. data/lib/god/watch.rb +184 -0
  64. data/test/configs/child_events/child_events.god +44 -0
  65. data/test/configs/child_events/simple_server.rb +3 -0
  66. data/test/configs/child_polls/child_polls.god +37 -0
  67. data/test/configs/child_polls/simple_server.rb +12 -0
  68. data/test/configs/complex/complex.god +59 -0
  69. data/test/configs/complex/simple_server.rb +3 -0
  70. data/test/configs/contact/contact.god +84 -0
  71. data/test/configs/contact/simple_server.rb +3 -0
  72. data/test/configs/daemon_events/daemon_events.god +37 -0
  73. data/test/configs/daemon_events/simple_server.rb +8 -0
  74. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  75. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  76. data/test/configs/daemon_polls/simple_server.rb +6 -0
  77. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  78. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  79. data/test/configs/matias/matias.god +50 -0
  80. data/test/configs/real.rb +59 -0
  81. data/test/configs/running_load/running_load.god +16 -0
  82. data/test/configs/stress/simple_server.rb +3 -0
  83. data/test/configs/stress/stress.god +15 -0
  84. data/test/configs/task/logs/.placeholder +0 -0
  85. data/test/configs/task/task.god +26 -0
  86. data/test/configs/test.rb +61 -0
  87. data/test/helper.rb +151 -0
  88. data/test/suite.rb +6 -0
  89. data/test/test_behavior.rb +21 -0
  90. data/test/test_campfire.rb +41 -0
  91. data/test/test_condition.rb +50 -0
  92. data/test/test_conditions_disk_usage.rb +56 -0
  93. data/test/test_conditions_http_response_code.rb +109 -0
  94. data/test/test_conditions_process_running.rb +44 -0
  95. data/test/test_conditions_tries.rb +67 -0
  96. data/test/test_contact.rb +109 -0
  97. data/test/test_dependency_graph.rb +62 -0
  98. data/test/test_driver.rb +11 -0
  99. data/test/test_email.rb +45 -0
  100. data/test/test_event_handler.rb +80 -0
  101. data/test/test_god.rb +598 -0
  102. data/test/test_handlers_kqueue_handler.rb +16 -0
  103. data/test/test_logger.rb +63 -0
  104. data/test/test_metric.rb +72 -0
  105. data/test/test_process.rb +246 -0
  106. data/test/test_registry.rb +15 -0
  107. data/test/test_socket.rb +42 -0
  108. data/test/test_sugar.rb +42 -0
  109. data/test/test_system_portable_poller.rb +17 -0
  110. data/test/test_system_process.rb +30 -0
  111. data/test/test_task.rb +262 -0
  112. data/test/test_timeline.rb +37 -0
  113. data/test/test_trigger.rb +59 -0
  114. data/test/test_watch.rb +279 -0
  115. metadata +193 -0
@@ -0,0 +1,32 @@
1
+ module God
2
+ def self.registry
3
+ @registry ||= Registry.new
4
+ end
5
+
6
+ class Registry
7
+ def initialize
8
+ @storage = {}
9
+ end
10
+
11
+ def add(item)
12
+ # raise TypeError unless item.is_a? God::Process
13
+ @storage[item.name] = item
14
+ end
15
+
16
+ def remove(item)
17
+ @storage.delete(item.name)
18
+ end
19
+
20
+ def size
21
+ @storage.size
22
+ end
23
+
24
+ def [](name)
25
+ @storage[name]
26
+ end
27
+
28
+ def reset
29
+ @storage.clear
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,53 @@
1
+ module God
2
+
3
+ class SimpleLogger
4
+ DEBUG = 2
5
+ INFO = 4
6
+ WARN = 8
7
+ ERROR = 16
8
+ FATAL = 32
9
+
10
+ SEV_LABEL = {DEBUG => 'DEBUG',
11
+ INFO => 'INFO',
12
+ WARN => 'WARN',
13
+ ERROR => 'ERROR',
14
+ FATAL => 'FATAL'}
15
+
16
+ attr_accessor :datetime_format, :level
17
+
18
+ def initialize(io)
19
+ @io = io
20
+ @level = INFO
21
+ @datetime_format = "%Y-%m-%d %H:%M:%S"
22
+ end
23
+
24
+ def output(level, msg)
25
+ return if level < self.level
26
+
27
+ time = Time.now.strftime(self.datetime_format)
28
+ label = SEV_LABEL[level]
29
+ @io.print("#{label[0..0]} [#{time}] #{label.rjust(5)}: #{msg}\n")
30
+ end
31
+
32
+ def fatal(msg)
33
+ self.output(FATAL, msg)
34
+ end
35
+
36
+ def error(msg)
37
+ self.output(ERROR, msg)
38
+ end
39
+
40
+ def warn(msg)
41
+ self.output(WARN, msg)
42
+ end
43
+
44
+ def info(msg)
45
+ self.output(INFO, msg)
46
+ end
47
+
48
+ def debug(msg)
49
+ self.output(DEBUG, msg)
50
+ end
51
+ end
52
+
53
+ end
@@ -0,0 +1,96 @@
1
+ require 'drb'
2
+
3
+ module God
4
+
5
+ # The God::Server oversees the DRb server which dishes out info on this God daemon.
6
+ class Socket
7
+ attr_reader :port
8
+
9
+ # The location of the socket for a given port
10
+ # +port+ is the port number
11
+ #
12
+ # Returns String (file location)
13
+ def self.socket_file(port)
14
+ "/tmp/god.#{port}.sock"
15
+ end
16
+
17
+ # The address of the socket for a given port
18
+ # +port+ is the port number
19
+ #
20
+ # Returns String (drb address)
21
+ def self.socket(port)
22
+ "drbunix://#{self.socket_file(port)}"
23
+ end
24
+
25
+ # The location of the socket for this Server
26
+ #
27
+ # Returns String (file location)
28
+ def socket_file
29
+ self.class.socket_file(@port)
30
+ end
31
+
32
+ # The address of the socket for this Server
33
+ #
34
+ # Returns String (drb address)
35
+ def socket
36
+ self.class.socket(@port)
37
+ end
38
+
39
+ # Create a new Server and star the DRb server
40
+ # +port+ is the port on which to start the DRb service (default nil)
41
+ def initialize(port = nil)
42
+ @port = port
43
+ start
44
+ end
45
+
46
+ # Returns true
47
+ def ping
48
+ true
49
+ end
50
+
51
+ # Forward API calls to God
52
+ #
53
+ # Returns whatever the forwarded call returns
54
+ def method_missing(*args, &block)
55
+ God.send(*args, &block)
56
+ end
57
+
58
+ # Stop the DRb server and delete the socket file
59
+ #
60
+ # Returns nothing
61
+ def stop
62
+ DRb.stop_service
63
+ FileUtils.rm_f(self.socket_file)
64
+ end
65
+
66
+ private
67
+
68
+ # Start the DRb server. Abort if there is already a running god instance
69
+ # on the socket.
70
+ #
71
+ # Returns nothing
72
+ def start
73
+ begin
74
+ @drb ||= DRb.start_service(self.socket, self)
75
+ applog(nil, :info, "Started on #{DRb.uri}")
76
+ rescue Errno::EADDRINUSE
77
+ applog(nil, :info, "Socket already in use")
78
+ DRb.start_service
79
+ server = DRbObject.new(nil, self.socket)
80
+
81
+ begin
82
+ Timeout.timeout(5) do
83
+ server.ping
84
+ end
85
+ abort "Socket #{self.socket} already in use by another instance of god"
86
+ rescue StandardError, Timeout::Error
87
+ applog(nil, :info, "Socket is stale, reopening")
88
+ File.delete(self.socket_file) rescue nil
89
+ @drb ||= DRb.start_service(self.socket, self)
90
+ applog(nil, :info, "Started on #{DRb.uri}")
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ end
@@ -0,0 +1,47 @@
1
+ class Numeric
2
+ def seconds
3
+ self
4
+ end
5
+
6
+ alias :second :seconds
7
+
8
+ def minutes
9
+ self * 60
10
+ end
11
+
12
+ alias :minute :minutes
13
+
14
+ def hours
15
+ self * 3600
16
+ end
17
+
18
+ alias :hour :hours
19
+
20
+ def days
21
+ self * 86400
22
+ end
23
+
24
+ alias :day :days
25
+
26
+ def kilobytes
27
+ self
28
+ end
29
+
30
+ alias :kilobyte :kilobytes
31
+
32
+ def megabytes
33
+ self * 1024
34
+ end
35
+
36
+ alias :megabyte :megabytes
37
+
38
+ def gigabytes
39
+ self * (1024 ** 2)
40
+ end
41
+
42
+ alias :gigabyte :gigabytes
43
+
44
+ def percent
45
+ self
46
+ end
47
+ end
@@ -0,0 +1,42 @@
1
+ module God
2
+ module System
3
+ class PortablePoller
4
+ def initialize(pid)
5
+ @pid = pid
6
+ end
7
+ # Memory usage in kilobytes (resident set size)
8
+ def memory
9
+ ps_int('rss')
10
+ end
11
+
12
+ # Percentage memory usage
13
+ def percent_memory
14
+ ps_float('%mem')
15
+ end
16
+
17
+ # Percentage CPU usage
18
+ def percent_cpu
19
+ ps_float('%cpu')
20
+ end
21
+
22
+ private
23
+
24
+ def ps_int(keyword)
25
+ `ps -o #{keyword}= -p #{@pid}`.to_i
26
+ end
27
+
28
+ def ps_float(keyword)
29
+ `ps -o #{keyword}= -p #{@pid}`.to_f
30
+ end
31
+
32
+ def ps_string(keyword)
33
+ `ps -o #{keyword}= -p #{@pid}`.strip
34
+ end
35
+
36
+ def time_string_to_seconds(text)
37
+ _, minutes, seconds, useconds = *text.match(/(\d+):(\d{2}).(\d{2})/)
38
+ (minutes.to_i * 60) + seconds.to_i
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,42 @@
1
+ module God
2
+ module System
3
+
4
+ class Process
5
+ def initialize(pid)
6
+ @pid = pid.to_i
7
+ @poller = fetch_system_poller.new(@pid)
8
+ end
9
+
10
+ # Return true if this process is running, false otherwise
11
+ def exists?
12
+ !!::Process.kill(0, @pid) rescue false
13
+ end
14
+
15
+ # Memory usage in kilobytes (resident set size)
16
+ def memory
17
+ @poller.memory
18
+ end
19
+
20
+ # Percentage memory usage
21
+ def percent_memory
22
+ @poller.percent_memory
23
+ end
24
+
25
+ # Percentage CPU usage
26
+ def percent_cpu
27
+ @poller.percent_cpu
28
+ end
29
+
30
+ private
31
+
32
+ def fetch_system_poller
33
+ if SlashProcPoller.usable?
34
+ SlashProcPoller
35
+ else
36
+ PortablePoller
37
+ end
38
+ end
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,92 @@
1
+ module God
2
+ module System
3
+ class SlashProcPoller < PortablePoller
4
+ @@kb_per_page = 4 # TODO: Need to make this portable
5
+ @@hertz = 100
6
+ @@total_mem = nil
7
+
8
+ MeminfoPath = '/proc/meminfo'
9
+ UptimePath = '/proc/uptime'
10
+
11
+ RequiredPaths = [MeminfoPath, UptimePath]
12
+
13
+ # FreeBSD has /proc by default, but nothing mounted there!
14
+ # So we should check for the actual required paths!
15
+ # Returns true if +RequiredPaths+ are readable.
16
+ def self.usable?
17
+ RequiredPaths.all? do |path|
18
+ test(?r, path) && readable?(path)
19
+ end
20
+ end
21
+
22
+ def initialize(pid)
23
+ super(pid)
24
+
25
+ unless @@total_mem # in K
26
+ File.open(MeminfoPath) do |f|
27
+ @@total_mem = f.gets.split[1]
28
+ end
29
+ end
30
+ end
31
+
32
+ def memory
33
+ stat[:rss].to_i * @@kb_per_page
34
+ rescue # This shouldn't fail is there's an error (or proc doesn't exist)
35
+ 0
36
+ end
37
+
38
+ def percent_memory
39
+ (memory / @@total_mem.to_f) * 100
40
+ rescue # This shouldn't fail is there's an error (or proc doesn't exist)
41
+ 0
42
+ end
43
+
44
+ # TODO: Change this to calculate the wma instead
45
+ def percent_cpu
46
+ stats = stat
47
+ total_time = stats[:utime].to_i + stats[:stime].to_i # in jiffies
48
+ seconds = uptime - stats[:starttime].to_i / @@hertz
49
+ if seconds == 0
50
+ 0
51
+ else
52
+ ((total_time * 1000 / @@hertz) / seconds) / 10
53
+ end
54
+ rescue # This shouldn't fail is there's an error (or proc doesn't exist)
55
+ 0
56
+ end
57
+
58
+ private
59
+
60
+ # Some systems (CentOS?) have a /proc, but they can hang when trying to
61
+ # read from them. Try to use this sparingly as it is expensive.
62
+ def self.readable?(path)
63
+ begin
64
+ timeout(1) { File.read(path) }
65
+ rescue Timeout::Error
66
+ false
67
+ end
68
+ end
69
+
70
+ # in seconds
71
+ def uptime
72
+ File.read(UptimePath).split[0].to_f
73
+ end
74
+
75
+ def stat
76
+ stats = {}
77
+ stats[:pid], stats[:comm], stats[:state], stats[:ppid], stats[:pgrp],
78
+ stats[:session], stats[:tty_nr], stats[:tpgid], stats[:flags],
79
+ stats[:minflt], stats[:cminflt], stats[:majflt], stats[:cmajflt],
80
+ stats[:utime], stats[:stime], stats[:cutime], stats[:cstime],
81
+ stats[:priority], stats[:nice], _, stats[:itrealvalue],
82
+ stats[:starttime], stats[:vsize], stats[:rss], stats[:rlim],
83
+ stats[:startcode], stats[:endcode], stats[:startstack], stats[:kstkesp],
84
+ stats[:kstkeip], stats[:signal], stats[:blocked], stats[:sigignore],
85
+ stats[:sigcatch], stats[:wchan], stats[:nswap], stats[:cnswap],
86
+ stats[:exit_signal], stats[:processor], stats[:rt_priority],
87
+ stats[:policy] = File.read("/proc/#{@pid}/stat").split
88
+ stats
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,491 @@
1
+ module God
2
+
3
+ class Task
4
+ attr_accessor :name, :interval, :group, :valid_states, :initial_state, :driver
5
+
6
+ attr_writer :autostart
7
+ def autostart?; @autostart; end
8
+
9
+ # api
10
+ attr_accessor :state, :behaviors, :metrics, :directory
11
+
12
+ def initialize
13
+ @autostart ||= true
14
+
15
+ # initial state is unmonitored
16
+ self.state = :unmonitored
17
+
18
+ # the list of behaviors
19
+ self.behaviors = []
20
+
21
+ # the list of conditions for each action
22
+ self.metrics = {nil => [], :unmonitored => [], :stop => []}
23
+
24
+ # the condition -> metric lookup
25
+ self.directory = {}
26
+
27
+ # driver
28
+ self.driver = Driver.new(self)
29
+ end
30
+
31
+ def prepare
32
+ self.valid_states.each do |state|
33
+ self.metrics[state] ||= []
34
+ end
35
+ end
36
+
37
+ def valid?
38
+ valid = true
39
+
40
+ # a name must be specified
41
+ if self.name.nil?
42
+ valid = false
43
+ applog(self, :error, "No name was specified")
44
+ end
45
+
46
+ # valid_states must be specified
47
+ if self.valid_states.nil?
48
+ valid = false
49
+ applog(self, :error, "No valid_states array was specified")
50
+ end
51
+
52
+ # valid_states must be specified
53
+ if self.initial_state.nil?
54
+ valid = false
55
+ applog(self, :error, "No initial_state was specified")
56
+ end
57
+
58
+ valid
59
+ end
60
+
61
+ ###########################################################################
62
+ #
63
+ # Advanced mode
64
+ #
65
+ ###########################################################################
66
+
67
+ def canonical_hash_form(to)
68
+ to.instance_of?(Symbol) ? {true => to} : to
69
+ end
70
+
71
+ # Define a transition handler which consists of a set of conditions
72
+ def transition(start_states, end_states)
73
+ # convert end_states into canonical hash form
74
+ canonical_end_states = canonical_hash_form(end_states)
75
+
76
+ Array(start_states).each do |start_state|
77
+ # validate start state
78
+ unless self.valid_states.include?(start_state)
79
+ abort "Invalid state :#{start_state}. Must be one of the symbols #{self.valid_states.map{|x| ":#{x}"}.join(', ')}"
80
+ end
81
+
82
+ # create a new metric to hold the watch, end states, and conditions
83
+ m = Metric.new(self, canonical_end_states)
84
+
85
+ if block_given?
86
+ # let the config file define some conditions on the metric
87
+ yield(m)
88
+ else
89
+ # add an :always condition if no block
90
+ m.condition(:always) do |c|
91
+ c.what = true
92
+ end
93
+ end
94
+
95
+ # populate the condition -> metric directory
96
+ m.conditions.each do |c|
97
+ self.directory[c] = m
98
+ end
99
+
100
+ # record the metric
101
+ self.metrics[start_state] ||= []
102
+ self.metrics[start_state] << m
103
+ end
104
+ end
105
+
106
+ def lifecycle
107
+ # create a new metric to hold the watch and conditions
108
+ m = Metric.new(self)
109
+
110
+ # let the config file define some conditions on the metric
111
+ yield(m)
112
+
113
+ # populate the condition -> metric directory
114
+ m.conditions.each do |c|
115
+ self.directory[c] = m
116
+ end
117
+
118
+ # record the metric
119
+ self.metrics[nil] << m
120
+ end
121
+
122
+ ###########################################################################
123
+ #
124
+ # Lifecycle
125
+ #
126
+ ###########################################################################
127
+
128
+ # Enable monitoring
129
+ #
130
+ # Returns nothing
131
+ def monitor
132
+ self.move(self.initial_state)
133
+ end
134
+
135
+ # Disable monitoring
136
+ #
137
+ # Returns nothing
138
+ def unmonitor
139
+ self.move(:unmonitored)
140
+ end
141
+
142
+ # Move to the givent state
143
+ # +to_state+ is the Symbol representing the state to move to
144
+ #
145
+ # Returns Task (self)
146
+ def move(to_state)
147
+ if Thread.current != self.driver.thread
148
+ # called from outside Driver
149
+
150
+ # send an async message to Driver
151
+ self.driver.message(:move, [to_state])
152
+ else
153
+ # called from within Driver
154
+
155
+ # record original info
156
+ orig_to_state = to_state
157
+ from_state = self.state
158
+
159
+ # log
160
+ msg = "#{self.name} move '#{from_state}' to '#{to_state}'"
161
+ applog(self, :info, msg)
162
+
163
+ # cleanup from current state
164
+ self.driver.clear_events
165
+ self.metrics[from_state].each { |m| m.disable }
166
+ if to_state == :unmonitored
167
+ self.metrics[nil].each { |m| m.disable }
168
+ end
169
+
170
+ # perform action
171
+ self.action(to_state)
172
+
173
+ # enable simple mode
174
+ if [:start, :restart].include?(to_state) && self.metrics[to_state].empty?
175
+ to_state = :up
176
+ end
177
+
178
+ # move to new state
179
+ self.metrics[to_state].each { |m| m.enable }
180
+
181
+ # if no from state, enable lifecycle metric
182
+ if from_state == :unmonitored
183
+ self.metrics[nil].each { |m| m.enable }
184
+ end
185
+
186
+ # set state
187
+ self.state = to_state
188
+
189
+ # broadcast to interested TriggerConditions
190
+ Trigger.broadcast(self, :state_change, [from_state, orig_to_state])
191
+
192
+ # log
193
+ msg = "#{self.name} moved '#{from_state}' to '#{to_state}'"
194
+ applog(self, :info, msg)
195
+ end
196
+
197
+ self
198
+ end
199
+
200
+ # Notify the Driver that an EventCondition has triggered
201
+ #
202
+ # Returns nothing
203
+ def trigger(condition)
204
+ self.driver.message(:handle_event, [condition])
205
+ end
206
+
207
+ def signal(sig)
208
+ # noop
209
+ end
210
+
211
+ ###########################################################################
212
+ #
213
+ # Actions
214
+ #
215
+ ###########################################################################
216
+
217
+ def method_missing(sym, *args)
218
+ unless (sym.to_s =~ /=$/)
219
+ super
220
+ end
221
+
222
+ base = sym.to_s.chop.intern
223
+
224
+ unless self.valid_states.include?(base)
225
+ super
226
+ end
227
+
228
+ self.class.send(:attr_accessor, base)
229
+ self.send(sym, *args)
230
+ end
231
+
232
+ # Perform the given action
233
+ # +a+ is the action Symbol
234
+ # +c+ is the Condition
235
+ #
236
+ # Returns Task (self)
237
+ def action(a, c = nil)
238
+ if Thread.current != self.driver.thread
239
+ # called from outside Driver
240
+
241
+ # send an async message to Driver
242
+ self.driver.message(:action, [a, c])
243
+ else
244
+ # called from within Driver
245
+
246
+ if self.respond_to?(a)
247
+ command = self.send(a)
248
+
249
+ case command
250
+ when String
251
+ msg = "#{self.name} #{a}: #{command}"
252
+ applog(self, :info, msg)
253
+
254
+ system(command)
255
+ when Proc
256
+ msg = "#{self.name} #{a}: lambda"
257
+ applog(self, :info, msg)
258
+
259
+ command.call
260
+ else
261
+ raise NotImplementedError
262
+ end
263
+ end
264
+ end
265
+ end
266
+
267
+ ###########################################################################
268
+ #
269
+ # Events
270
+ #
271
+ ###########################################################################
272
+
273
+ def attach(condition)
274
+ case condition
275
+ when PollCondition
276
+ self.driver.schedule(condition, 0)
277
+ when EventCondition, TriggerCondition
278
+ condition.register
279
+ end
280
+ end
281
+
282
+ def detach(condition)
283
+ case condition
284
+ when PollCondition
285
+ condition.reset
286
+ when EventCondition, TriggerCondition
287
+ condition.deregister
288
+ end
289
+ end
290
+
291
+ ###########################################################################
292
+ #
293
+ # Registration
294
+ #
295
+ ###########################################################################
296
+
297
+ def register!
298
+ # override if necessary
299
+ end
300
+
301
+ def unregister!
302
+ # override if necessary
303
+ end
304
+
305
+ ###########################################################################
306
+ #
307
+ # Handlers
308
+ #
309
+ ###########################################################################
310
+
311
+ # Evaluate and handle the given poll condition. Handles logging
312
+ # notifications, and moving to the new state if necessary
313
+ # +condition+ is the Condition to handle
314
+ #
315
+ # Returns nothing
316
+ def handle_poll(condition)
317
+ # lookup metric
318
+ metric = self.directory[condition]
319
+
320
+ # run the test
321
+ result = condition.test
322
+
323
+ # log
324
+ messages = self.log_line(self, metric, condition, result)
325
+
326
+ # notify
327
+ if result && condition.notify
328
+ self.notify(condition, messages.last)
329
+ end
330
+
331
+ # after-condition
332
+ condition.after
333
+
334
+ # get the destination
335
+ dest =
336
+ if result && condition.transition
337
+ # condition override
338
+ condition.transition
339
+ else
340
+ # regular
341
+ metric.destination && metric.destination[result]
342
+ end
343
+
344
+ # transition or reschedule
345
+ if dest
346
+ # transition
347
+ begin
348
+ self.move(dest)
349
+ rescue EventRegistrationFailedError
350
+ msg = self.name + ' Event registration failed, moving back to previous state'
351
+ applog(self, :info, msg)
352
+
353
+ dest = self.state
354
+ retry
355
+ end
356
+ else
357
+ # reschedule
358
+ self.driver.schedule(condition)
359
+ end
360
+ end
361
+
362
+ # Asynchronously evaluate and handle the given event condition. Handles logging
363
+ # notifications, and moving to the new state if necessary
364
+ # +condition+ is the Condition to handle
365
+ #
366
+ # Returns nothing
367
+ def handle_event(condition)
368
+ # lookup metric
369
+ metric = self.directory[condition]
370
+
371
+ # log
372
+ messages = self.log_line(self, metric, condition, true)
373
+
374
+ # notify
375
+ if condition.notify
376
+ self.notify(condition, messages.last)
377
+ end
378
+
379
+ # get the destination
380
+ dest =
381
+ if condition.transition
382
+ # condition override
383
+ condition.transition
384
+ else
385
+ # regular
386
+ metric.destination && metric.destination[true]
387
+ end
388
+
389
+ if dest
390
+ self.move(dest)
391
+ end
392
+ end
393
+
394
+ # Determine whether a trigger happened
395
+ # +metric+ is the Metric
396
+ # +result+ is the result from the condition's test
397
+ #
398
+ # Returns Boolean
399
+ def trigger?(metric, result)
400
+ metric.destination && metric.destination[result]
401
+ end
402
+
403
+ # Log info about the condition and return the list of messages logged
404
+ # +watch+ is the Watch
405
+ # +metric+ is the Metric
406
+ # +condition+ is the Condition
407
+ # +result+ is the Boolean result of the condition test evaluation
408
+ #
409
+ # Returns String[]
410
+ def log_line(watch, metric, condition, result)
411
+ status =
412
+ if self.trigger?(metric, result)
413
+ "[trigger]"
414
+ else
415
+ "[ok]"
416
+ end
417
+
418
+ messages = []
419
+
420
+ # log info if available
421
+ if condition.info
422
+ Array(condition.info).each do |condition_info|
423
+ messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
424
+ applog(watch, :info, messages.last)
425
+ end
426
+ else
427
+ messages << "#{watch.name} #{status} (#{condition.base_name})"
428
+ applog(watch, :info, messages.last)
429
+ end
430
+
431
+ # log
432
+ debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
433
+ applog(watch, :debug, debug_message)
434
+
435
+ messages
436
+ end
437
+
438
+ # Format the destination specification for use in debug logging
439
+ # +metric+ is the Metric
440
+ # +condition+ is the Condition
441
+ #
442
+ # Returns String
443
+ def dest_desc(metric, condition)
444
+ if condition.transition
445
+ {true => condition.transition}.inspect
446
+ else
447
+ if metric.destination
448
+ metric.destination.inspect
449
+ else
450
+ 'none'
451
+ end
452
+ end
453
+ end
454
+
455
+ # Notify all recipeients of the given condition with the specified message
456
+ # +condition+ is the Condition
457
+ # +message+ is the String message to send
458
+ #
459
+ # Returns nothing
460
+ def notify(condition, message)
461
+ spec = Contact.normalize(condition.notify)
462
+ unmatched = []
463
+
464
+ # resolve contacts
465
+ resolved_contacts =
466
+ spec[:contacts].inject([]) do |acc, contact_name_or_group|
467
+ cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
468
+ unmatched << contact_name_or_group if cons.empty?
469
+ acc += cons
470
+ acc
471
+ end
472
+
473
+ # warn about unmatched contacts
474
+ unless unmatched.empty?
475
+ msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
476
+ applog(condition.watch, :warn, msg)
477
+ end
478
+
479
+ # notify each contact
480
+ resolved_contacts.each do |c|
481
+ host = `hostname`.chomp rescue 'none'
482
+ c.notify(message, Time.now, spec[:priority], spec[:category], host)
483
+
484
+ msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
485
+
486
+ applog(condition.watch, :info, msg % [])
487
+ end
488
+ end
489
+ end
490
+
491
+ end