god 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +43 -7
  2. data/Manifest.txt +20 -4
  3. data/Rakefile +1 -1
  4. data/bin/god +263 -195
  5. data/examples/events.god +66 -34
  6. data/examples/gravatar.god +25 -12
  7. data/init/god +42 -0
  8. data/lib/god/behavior.rb +9 -29
  9. data/lib/god/behaviors/clean_pid_file.rb +6 -2
  10. data/lib/god/behaviors/notify_when_flapping.rb +4 -4
  11. data/lib/god/condition.rb +48 -6
  12. data/lib/god/conditions/always.rb +5 -1
  13. data/lib/god/conditions/cpu_usage.rb +13 -5
  14. data/lib/god/conditions/degrading_lambda.rb +8 -3
  15. data/lib/god/conditions/flapping.rb +97 -0
  16. data/lib/god/conditions/http_response_code.rb +97 -0
  17. data/lib/god/conditions/lambda.rb +8 -2
  18. data/lib/god/conditions/memory_usage.rb +13 -5
  19. data/lib/god/conditions/process_exits.rb +11 -3
  20. data/lib/god/conditions/process_running.rb +22 -4
  21. data/lib/god/conditions/tries.rb +16 -5
  22. data/lib/god/configurable.rb +54 -0
  23. data/lib/god/contact.rb +106 -0
  24. data/lib/god/contacts/email.rb +73 -0
  25. data/lib/god/errors.rb +3 -0
  26. data/lib/god/hub.rb +138 -33
  27. data/lib/god/logger.rb +21 -4
  28. data/lib/god/metric.rb +3 -4
  29. data/lib/god/process.rb +93 -49
  30. data/lib/god/socket.rb +60 -0
  31. data/lib/god/task.rb +233 -0
  32. data/lib/god/trigger.rb +43 -0
  33. data/lib/god/watch.rb +48 -114
  34. data/lib/god.rb +216 -63
  35. data/test/configs/child_events/child_events.god +20 -1
  36. data/test/configs/child_polls/child_polls.god +26 -6
  37. data/test/configs/child_polls/simple_server.rb +10 -1
  38. data/test/configs/contact/contact.god +74 -0
  39. data/test/configs/contact/simple_server.rb +3 -0
  40. data/test/configs/daemon_events/daemon_events.god +5 -2
  41. data/test/configs/daemon_events/simple_server.rb +2 -0
  42. data/test/configs/daemon_events/simple_server_stop.rb +9 -0
  43. data/test/configs/degrading_lambda/degrading_lambda.god +1 -3
  44. data/test/configs/task/logs/.placeholder +0 -0
  45. data/test/configs/task/task.god +26 -0
  46. data/test/helper.rb +19 -11
  47. data/test/test_conditions_http_response_code.rb +115 -0
  48. data/test/test_conditions_process_running.rb +2 -2
  49. data/test/test_conditions_tries.rb +21 -0
  50. data/test/test_contact.rb +109 -0
  51. data/test/test_god.rb +101 -17
  52. data/test/test_hub.rb +64 -1
  53. data/test/test_process.rb +43 -56
  54. data/test/{test_server.rb → test_socket.rb} +6 -20
  55. data/test/test_task.rb +86 -0
  56. data/test/test_trigger.rb +59 -0
  57. data/test/test_watch.rb +32 -7
  58. metadata +27 -8
  59. data/lib/god/reporter.rb +0 -25
  60. data/lib/god/server.rb +0 -37
  61. data/test/test_reporter.rb +0 -18
data/lib/god/hub.rb CHANGED
@@ -3,44 +3,41 @@ module God
3
3
  class Hub
4
4
  class << self
5
5
  # directory to hold conditions and their corresponding metric
6
- # key: condition
7
- # val: metric
6
+ # {condition => metric}
8
7
  attr_accessor :directory
9
8
  end
10
9
 
11
10
  self.directory = {}
12
11
 
13
12
  def self.attach(condition, metric)
14
- # add the condition to the directory
15
13
  self.directory[condition] = metric
14
+ condition.reset
16
15
 
17
- # schedule poll condition
18
- # register event condition
19
- if condition.kind_of?(PollCondition)
20
- Timer.get.schedule(condition, 0)
21
- else
22
- condition.register
16
+ case condition
17
+ when PollCondition
18
+ Timer.get.schedule(condition, 0)
19
+ when EventCondition, TriggerCondition
20
+ condition.register
23
21
  end
24
22
  end
25
23
 
26
24
  def self.detach(condition)
27
- # remove the condition from the directory
28
25
  self.directory.delete(condition)
29
26
 
30
- # unschedule any pending polls
31
- Timer.get.unschedule(condition)
32
-
33
- # deregister event condition
34
- if condition.kind_of?(EventCondition)
35
- condition.deregister
27
+ case condition
28
+ when PollCondition
29
+ Timer.get.unschedule(condition)
30
+ when EventCondition, TriggerCondition
31
+ condition.deregister
36
32
  end
37
33
  end
38
34
 
39
35
  def self.trigger(condition)
40
- if condition.kind_of?(PollCondition)
41
- self.handle_poll(condition)
42
- elsif condition.kind_of?(EventCondition)
43
- self.handle_event(condition)
36
+ case condition
37
+ when PollCondition
38
+ self.handle_poll(condition)
39
+ when EventCondition, TriggerCondition
40
+ self.handle_event(condition)
44
41
  end
45
42
  end
46
43
 
@@ -59,9 +56,12 @@ module God
59
56
  result = condition.test
60
57
 
61
58
  # log
62
- msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
63
- Syslog.debug(msg)
64
- LOG.log(watch, :info, msg)
59
+ messages = self.log(watch, metric, condition, result)
60
+
61
+ # notify
62
+ if condition.notify && self.trigger?(metric, result)
63
+ self.notify(condition, messages.last)
64
+ end
65
65
 
66
66
  # after-condition
67
67
  condition.after
@@ -73,7 +73,7 @@ module God
73
73
  condition.transition
74
74
  else
75
75
  # regular
76
- metric.destination[result]
76
+ metric.destination && metric.destination[result]
77
77
  end
78
78
 
79
79
  # transition or reschedule
@@ -106,19 +106,124 @@ module God
106
106
 
107
107
  def self.handle_event(condition)
108
108
  Thread.new do
109
- metric = self.directory[condition]
110
- watch = metric.watch
111
-
112
- watch.mutex.synchronize do
113
- msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect
114
- Syslog.debug(msg)
115
- LOG.log(watch, :info, msg)
109
+ begin
110
+ metric = self.directory[condition]
116
111
 
117
- dest = metric.destination[true]
118
- watch.move(dest)
112
+ unless metric.nil?
113
+ watch = metric.watch
114
+
115
+ watch.mutex.synchronize do
116
+ # log
117
+ messages = self.log(watch, metric, condition, true)
118
+
119
+ # notify
120
+ if condition.notify && self.trigger?(metric, true)
121
+ self.notify(condition, messages.last)
122
+ end
123
+
124
+ # get the destination
125
+ dest =
126
+ if condition.transition
127
+ # condition override
128
+ condition.transition
129
+ else
130
+ # regular
131
+ metric.destination && metric.destination[true]
132
+ end
133
+
134
+ if dest
135
+ watch.move(dest)
136
+ end
137
+ end
138
+ end
139
+ rescue => e
140
+ message = format("Unhandled exception (%s): %s\n%s",
141
+ e.class, e.message, e.backtrace.join("\n"))
142
+ Syslog.crit message
143
+ abort message
144
+ end
145
+ end
146
+ end
147
+
148
+ # helpers
149
+
150
+ def self.trigger?(metric, result)
151
+ (metric.destination && metric.destination.keys.size == 2) || result == true
152
+ end
153
+
154
+ def self.log(watch, metric, condition, result)
155
+ status =
156
+ if self.trigger?(metric, result)
157
+ "[trigger]"
158
+ else
159
+ "[ok]"
160
+ end
161
+
162
+ messages = []
163
+
164
+ # log info if available
165
+ if condition.info
166
+ Array(condition.info).each do |condition_info|
167
+ messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
168
+ Syslog.debug(messages.last)
169
+ LOG.log(watch, :info, messages.last % [])
170
+ end
171
+ else
172
+ messages << "#{watch.name} #{status} (#{condition.base_name})"
173
+ Syslog.debug(messages.last)
174
+ LOG.log(watch, :info, messages.last % [])
175
+ end
176
+
177
+ # log
178
+ debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
179
+ Syslog.debug(debug_message)
180
+ LOG.log(watch, :debug, debug_message)
181
+
182
+ messages
183
+ end
184
+
185
+ def self.dest_desc(metric, condition)
186
+ if condition.transition
187
+ {true => condition.transition}.inspect
188
+ else
189
+ if metric.destination
190
+ metric.destination.inspect
191
+ else
192
+ 'none'
119
193
  end
120
194
  end
121
195
  end
196
+
197
+ def self.notify(condition, message)
198
+ spec = Contact.normalize(condition.notify)
199
+ unmatched = []
200
+
201
+ # resolve contacts
202
+ resolved_contacts =
203
+ spec[:contacts].inject([]) do |acc, contact_name_or_group|
204
+ cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
205
+ unmatched << contact_name_or_group if cons.empty?
206
+ acc += cons
207
+ acc
208
+ end
209
+
210
+ # warn about unmatched contacts
211
+ unless unmatched.empty?
212
+ msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
213
+ LOG.log(condition.watch, :warn, msg)
214
+ end
215
+
216
+ # notify each contact
217
+ resolved_contacts.each do |c|
218
+ host = `hostname`.chomp rescue 'none'
219
+ c.notify(message, Time.now, spec[:priority], spec[:category], host)
220
+
221
+ msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
222
+
223
+ Syslog.debug(msg)
224
+ LOG.log(condition.watch, :info, msg % [])
225
+ end
226
+ end
122
227
  end
123
228
 
124
229
  end
data/lib/god/logger.rb CHANGED
@@ -4,21 +4,38 @@ module God
4
4
  attr_accessor :logs
5
5
 
6
6
  def initialize
7
- super(STDOUT)
7
+ super($stdout)
8
8
  self.logs = {}
9
9
  @mutex = Mutex.new
10
+ @capture = nil
11
+ end
12
+
13
+ def start_capture
14
+ @mutex.synchronize do
15
+ @capture = StringIO.new
16
+ end
17
+ end
18
+
19
+ def finish_capture
20
+ @mutex.synchronize do
21
+ cap = @capture.string
22
+ @capture = nil
23
+ cap
24
+ end
10
25
  end
11
26
 
12
27
  def log(watch, level, text)
13
28
  # initialize watch log if necessary
14
- self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
29
+ self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
15
30
 
16
- # push onto timeline for the given watch
31
+ # push onto capture and timeline for the given watch
17
32
  buf = StringIO.new
18
33
  templog = ::Logger.new(buf)
34
+ templog.level = Logger::INFO
19
35
  templog.send(level, text)
20
36
  @mutex.synchronize do
21
- self.logs[watch.name] << [Time.now, buf.string]
37
+ @capture.puts(buf.string) if @capture
38
+ self.logs[watch.name] << [Time.now, buf.string] if watch
22
39
  end
23
40
  templog.close
24
41
 
data/lib/god/metric.rb CHANGED
@@ -3,7 +3,7 @@ module God
3
3
  class Metric
4
4
  attr_accessor :watch, :destination, :conditions
5
5
 
6
- def initialize(watch, destination)
6
+ def initialize(watch, destination = nil)
7
7
  self.watch = watch
8
8
  self.destination = destination
9
9
  self.conditions = []
@@ -25,9 +25,8 @@ module God
25
25
  # call prepare on the condition
26
26
  c.prepare
27
27
 
28
- # abort if the Condition is invalid, the Condition will have printed
29
- # out its own error messages by now
30
- unless c.valid?
28
+ # test generic and specific validity
29
+ unless Condition.valid?(c) && c.valid?
31
30
  abort "Exiting on invalid condition"
32
31
  end
33
32
 
data/lib/god/process.rb CHANGED
@@ -7,8 +7,11 @@ module God
7
7
  attr_accessor :name, :uid, :gid, :log, :start, :stop, :restart
8
8
 
9
9
  def initialize
10
+ self.log = '/dev/null'
11
+
10
12
  @pid_file = nil
11
- @tracking_pid = false
13
+ @tracking_pid = true
14
+ @user_log = false
12
15
  end
13
16
 
14
17
  def alive?
@@ -20,18 +23,24 @@ module God
20
23
  end
21
24
  end
22
25
 
26
+ def file_writable?(file)
27
+ pid = fork do
28
+ ::Process::Sys.setgid(Etc.getgrnam(self.gid).gid) if self.gid
29
+ ::Process::Sys.setuid(Etc.getpwnam(self.uid).uid) if self.uid
30
+
31
+ File.writable?(file) ? exit(0) : exit(1)
32
+ end
33
+
34
+ wpid, status = ::Process.waitpid2(pid)
35
+ status.exitstatus == 0 ? true : false
36
+ end
37
+
23
38
  def valid?
24
39
  # determine if we're tracking pid or not
25
40
  self.pid_file
26
41
 
27
42
  valid = true
28
43
 
29
- # a name must be specified
30
- if self.name.nil?
31
- valid = false
32
- LOG.log(self, :error, "No name was specified")
33
- end
34
-
35
44
  # a start command must be specified
36
45
  if self.start.nil?
37
46
  valid = false
@@ -44,12 +53,6 @@ module God
44
53
  LOG.log(self, :error, "No stop command was specified")
45
54
  end
46
55
 
47
- # self-daemonizing processes cannot specify log
48
- if !@tracking_pid && self.log
49
- valid = false
50
- LOG.log(self, :error, "Self-daemonizing processes cannot specify a log file")
51
- end
52
-
53
56
  # uid must exist if specified
54
57
  if self.uid
55
58
  begin
@@ -70,22 +73,55 @@ module God
70
73
  end
71
74
  end
72
75
 
76
+ # pid dir must exist if specified
77
+ if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
78
+ valid = false
79
+ LOG.log(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
80
+ end
81
+
82
+ # pid dir must be writable if specified
83
+ if !@tracking_pid && !file_writable?(File.dirname(self.pid_file))
84
+ valid = false
85
+ LOG.log(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
86
+ end
87
+
88
+ # log dir must exist
89
+ if !File.exist?(File.dirname(self.log))
90
+ valid = false
91
+ LOG.log(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
92
+ end
93
+
94
+ # log file or dir must be writable
95
+ if File.exist?(self.log)
96
+ unless file_writable?(self.log)
97
+ valid = false
98
+ LOG.log(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
99
+ end
100
+ else
101
+ unless file_writable?(File.dirname(self.log))
102
+ valid = false
103
+ LOG.log(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
104
+ end
105
+ end
106
+
73
107
  valid
74
108
  end
75
109
 
76
110
  # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
77
111
  # No really, trust me. Use the instance variable.
78
112
  def pid_file=(value)
79
- @tracking_pid = false
113
+ # if value is nil, do the right thing
114
+ if value
115
+ @tracking_pid = false
116
+ else
117
+ @tracking_pid = true
118
+ end
119
+
80
120
  @pid_file = value
81
121
  end
82
122
 
83
123
  def pid_file
84
- if @pid_file.nil?
85
- @tracking_pid = true
86
- @pid_file = default_pid_file
87
- end
88
- @pid_file
124
+ @pid_file ||= default_pid_file
89
125
  end
90
126
 
91
127
  def start!
@@ -100,6 +136,21 @@ module God
100
136
  call_action(:restart)
101
137
  end
102
138
 
139
+ def spawn(command)
140
+ fork do
141
+ ::Process.setsid
142
+ ::Process::Sys.setgid(Etc.getgrnam(self.gid).gid) if self.gid
143
+ ::Process::Sys.setuid(Etc.getpwnam(self.uid).uid) if self.uid
144
+ Dir.chdir "/"
145
+ $0 = command
146
+ STDIN.reopen "/dev/null"
147
+ STDOUT.reopen self.log, "a"
148
+ STDERR.reopen STDOUT
149
+
150
+ exec command unless command.empty?
151
+ end
152
+ end
153
+
103
154
  def call_action(action)
104
155
  command = send(action)
105
156
 
@@ -110,7 +161,7 @@ module God
110
161
  LOG.log(self, :info, "#{self.name} stop: default lambda killer")
111
162
 
112
163
  ::Process.kill('HUP', pid) rescue nil
113
-
164
+
114
165
  # Poll to see if it's dead
115
166
  5.times do
116
167
  begin
@@ -119,44 +170,38 @@ module God
119
170
  # It died. Good.
120
171
  return
121
172
  end
122
-
173
+
123
174
  sleep 1
124
175
  end
125
-
176
+
126
177
  ::Process.kill('KILL', pid) rescue nil
127
178
  end
128
179
  end
129
180
 
130
181
  if command.kind_of?(String)
131
- # string command
132
- # fork/exec to setuid/gid
133
- r, w = IO.pipe
134
- opid = fork do
135
- STDOUT.reopen(w)
136
- r.close
137
- pid = fork do
138
- ::Process.setsid
139
- ::Process::Sys.setgid(Etc.getgrnam(self.gid).gid) if self.gid
140
- ::Process::Sys.setuid(Etc.getpwnam(self.uid).uid) if self.uid
141
- Dir.chdir "/"
142
- $0 = command
143
- STDIN.reopen "/dev/null"
144
- if self.log
145
- STDOUT.reopen self.log, "a"
146
- else
147
- STDOUT.reopen "/dev/null", "a"
148
- end
149
- STDERR.reopen STDOUT
150
-
151
- exec command unless command.empty?
182
+ pid = nil
183
+
184
+ if @tracking_pid
185
+ # double fork god-daemonized processes
186
+ # we don't want to wait for them to finish
187
+ r, w = IO.pipe
188
+ opid = fork do
189
+ STDOUT.reopen(w)
190
+ r.close
191
+ pid = self.spawn(command)
192
+ puts pid.to_s
152
193
  end
153
- puts pid.to_s
194
+
195
+ ::Process.waitpid(opid, 0)
196
+ w.close
197
+ pid = r.gets.chomp
198
+ else
199
+ # single fork self-daemonizing processes
200
+ # we want to wait for them to finish
201
+ pid = self.spawn(command)
202
+ ::Process.waitpid(pid, 0)
154
203
  end
155
204
 
156
- ::Process.waitpid(opid, 0)
157
- w.close
158
- pid = r.gets.chomp
159
-
160
205
  if @tracking_pid or (@pid_file.nil? and WRITES_PID.include?(action))
161
206
  File.open(default_pid_file, 'w') do |f|
162
207
  f.write pid
@@ -165,7 +210,6 @@ module God
165
210
  @tracking_pid = true
166
211
  @pid_file = default_pid_file
167
212
  end
168
-
169
213
  elsif command.kind_of?(Proc)
170
214
  # lambda command
171
215
  command.call
data/lib/god/socket.rb ADDED
@@ -0,0 +1,60 @@
1
+ require 'drb'
2
+
3
+ # The God::Server oversees the DRb server which dishes out info on this God daemon.
4
+
5
+ module God
6
+
7
+ class Socket
8
+ attr_reader :port
9
+
10
+ def self.socket_file(port)
11
+ "/tmp/god.#{port}.sock"
12
+ end
13
+
14
+ def self.socket(port)
15
+ "drbunix://#{self.socket_file(port)}"
16
+ end
17
+
18
+ def socket_file
19
+ self.class.socket_file(@port)
20
+ end
21
+
22
+ def socket
23
+ self.class.socket(@port)
24
+ end
25
+
26
+ def initialize(port = nil)
27
+ @port = port
28
+ start
29
+ end
30
+
31
+ def ping
32
+ true
33
+ end
34
+
35
+ def method_missing(*args, &block)
36
+ God.send(*args, &block)
37
+ end
38
+
39
+ private
40
+
41
+ def start
42
+ begin
43
+ @drb ||= DRb.start_service(self.socket, self)
44
+ LOG.log(nil, :info, "Started on #{DRb.uri}")
45
+ rescue Errno::EADDRINUSE
46
+ DRb.start_service
47
+ server = DRbObject.new(nil, self.socket)
48
+
49
+ begin
50
+ server.ping
51
+ abort "Socket #{self.socket} already in use by another instance of god"
52
+ rescue
53
+ File.delete(self.socket_file) rescue nil
54
+ @drb ||= DRb.start_service(self.socket, self)
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ end