god 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/History.txt +43 -7
  2. data/Manifest.txt +20 -4
  3. data/Rakefile +1 -1
  4. data/bin/god +263 -195
  5. data/examples/events.god +66 -34
  6. data/examples/gravatar.god +25 -12
  7. data/init/god +42 -0
  8. data/lib/god/behavior.rb +9 -29
  9. data/lib/god/behaviors/clean_pid_file.rb +6 -2
  10. data/lib/god/behaviors/notify_when_flapping.rb +4 -4
  11. data/lib/god/condition.rb +48 -6
  12. data/lib/god/conditions/always.rb +5 -1
  13. data/lib/god/conditions/cpu_usage.rb +13 -5
  14. data/lib/god/conditions/degrading_lambda.rb +8 -3
  15. data/lib/god/conditions/flapping.rb +97 -0
  16. data/lib/god/conditions/http_response_code.rb +97 -0
  17. data/lib/god/conditions/lambda.rb +8 -2
  18. data/lib/god/conditions/memory_usage.rb +13 -5
  19. data/lib/god/conditions/process_exits.rb +11 -3
  20. data/lib/god/conditions/process_running.rb +22 -4
  21. data/lib/god/conditions/tries.rb +16 -5
  22. data/lib/god/configurable.rb +54 -0
  23. data/lib/god/contact.rb +106 -0
  24. data/lib/god/contacts/email.rb +73 -0
  25. data/lib/god/errors.rb +3 -0
  26. data/lib/god/hub.rb +138 -33
  27. data/lib/god/logger.rb +21 -4
  28. data/lib/god/metric.rb +3 -4
  29. data/lib/god/process.rb +93 -49
  30. data/lib/god/socket.rb +60 -0
  31. data/lib/god/task.rb +233 -0
  32. data/lib/god/trigger.rb +43 -0
  33. data/lib/god/watch.rb +48 -114
  34. data/lib/god.rb +216 -63
  35. data/test/configs/child_events/child_events.god +20 -1
  36. data/test/configs/child_polls/child_polls.god +26 -6
  37. data/test/configs/child_polls/simple_server.rb +10 -1
  38. data/test/configs/contact/contact.god +74 -0
  39. data/test/configs/contact/simple_server.rb +3 -0
  40. data/test/configs/daemon_events/daemon_events.god +5 -2
  41. data/test/configs/daemon_events/simple_server.rb +2 -0
  42. data/test/configs/daemon_events/simple_server_stop.rb +9 -0
  43. data/test/configs/degrading_lambda/degrading_lambda.god +1 -3
  44. data/test/configs/task/logs/.placeholder +0 -0
  45. data/test/configs/task/task.god +26 -0
  46. data/test/helper.rb +19 -11
  47. data/test/test_conditions_http_response_code.rb +115 -0
  48. data/test/test_conditions_process_running.rb +2 -2
  49. data/test/test_conditions_tries.rb +21 -0
  50. data/test/test_contact.rb +109 -0
  51. data/test/test_god.rb +101 -17
  52. data/test/test_hub.rb +64 -1
  53. data/test/test_process.rb +43 -56
  54. data/test/{test_server.rb → test_socket.rb} +6 -20
  55. data/test/test_task.rb +86 -0
  56. data/test/test_trigger.rb +59 -0
  57. data/test/test_watch.rb +32 -7
  58. metadata +27 -8
  59. data/lib/god/reporter.rb +0 -25
  60. data/lib/god/server.rb +0 -37
  61. data/test/test_reporter.rb +0 -18
data/lib/god/hub.rb CHANGED
@@ -3,44 +3,41 @@ module God
3
3
  class Hub
4
4
  class << self
5
5
  # directory to hold conditions and their corresponding metric
6
- # key: condition
7
- # val: metric
6
+ # {condition => metric}
8
7
  attr_accessor :directory
9
8
  end
10
9
 
11
10
  self.directory = {}
12
11
 
13
12
  def self.attach(condition, metric)
14
- # add the condition to the directory
15
13
  self.directory[condition] = metric
14
+ condition.reset
16
15
 
17
- # schedule poll condition
18
- # register event condition
19
- if condition.kind_of?(PollCondition)
20
- Timer.get.schedule(condition, 0)
21
- else
22
- condition.register
16
+ case condition
17
+ when PollCondition
18
+ Timer.get.schedule(condition, 0)
19
+ when EventCondition, TriggerCondition
20
+ condition.register
23
21
  end
24
22
  end
25
23
 
26
24
  def self.detach(condition)
27
- # remove the condition from the directory
28
25
  self.directory.delete(condition)
29
26
 
30
- # unschedule any pending polls
31
- Timer.get.unschedule(condition)
32
-
33
- # deregister event condition
34
- if condition.kind_of?(EventCondition)
35
- condition.deregister
27
+ case condition
28
+ when PollCondition
29
+ Timer.get.unschedule(condition)
30
+ when EventCondition, TriggerCondition
31
+ condition.deregister
36
32
  end
37
33
  end
38
34
 
39
35
  def self.trigger(condition)
40
- if condition.kind_of?(PollCondition)
41
- self.handle_poll(condition)
42
- elsif condition.kind_of?(EventCondition)
43
- self.handle_event(condition)
36
+ case condition
37
+ when PollCondition
38
+ self.handle_poll(condition)
39
+ when EventCondition, TriggerCondition
40
+ self.handle_event(condition)
44
41
  end
45
42
  end
46
43
 
@@ -59,9 +56,12 @@ module God
59
56
  result = condition.test
60
57
 
61
58
  # log
62
- msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
63
- Syslog.debug(msg)
64
- LOG.log(watch, :info, msg)
59
+ messages = self.log(watch, metric, condition, result)
60
+
61
+ # notify
62
+ if condition.notify && self.trigger?(metric, result)
63
+ self.notify(condition, messages.last)
64
+ end
65
65
 
66
66
  # after-condition
67
67
  condition.after
@@ -73,7 +73,7 @@ module God
73
73
  condition.transition
74
74
  else
75
75
  # regular
76
- metric.destination[result]
76
+ metric.destination && metric.destination[result]
77
77
  end
78
78
 
79
79
  # transition or reschedule
@@ -106,19 +106,124 @@ module God
106
106
 
107
107
  def self.handle_event(condition)
108
108
  Thread.new do
109
- metric = self.directory[condition]
110
- watch = metric.watch
111
-
112
- watch.mutex.synchronize do
113
- msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect
114
- Syslog.debug(msg)
115
- LOG.log(watch, :info, msg)
109
+ begin
110
+ metric = self.directory[condition]
116
111
 
117
- dest = metric.destination[true]
118
- watch.move(dest)
112
+ unless metric.nil?
113
+ watch = metric.watch
114
+
115
+ watch.mutex.synchronize do
116
+ # log
117
+ messages = self.log(watch, metric, condition, true)
118
+
119
+ # notify
120
+ if condition.notify && self.trigger?(metric, true)
121
+ self.notify(condition, messages.last)
122
+ end
123
+
124
+ # get the destination
125
+ dest =
126
+ if condition.transition
127
+ # condition override
128
+ condition.transition
129
+ else
130
+ # regular
131
+ metric.destination && metric.destination[true]
132
+ end
133
+
134
+ if dest
135
+ watch.move(dest)
136
+ end
137
+ end
138
+ end
139
+ rescue => e
140
+ message = format("Unhandled exception (%s): %s\n%s",
141
+ e.class, e.message, e.backtrace.join("\n"))
142
+ Syslog.crit message
143
+ abort message
144
+ end
145
+ end
146
+ end
147
+
148
+ # helpers
149
+
150
+ def self.trigger?(metric, result)
151
+ (metric.destination && metric.destination.keys.size == 2) || result == true
152
+ end
153
+
154
+ def self.log(watch, metric, condition, result)
155
+ status =
156
+ if self.trigger?(metric, result)
157
+ "[trigger]"
158
+ else
159
+ "[ok]"
160
+ end
161
+
162
+ messages = []
163
+
164
+ # log info if available
165
+ if condition.info
166
+ Array(condition.info).each do |condition_info|
167
+ messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
168
+ Syslog.debug(messages.last)
169
+ LOG.log(watch, :info, messages.last % [])
170
+ end
171
+ else
172
+ messages << "#{watch.name} #{status} (#{condition.base_name})"
173
+ Syslog.debug(messages.last)
174
+ LOG.log(watch, :info, messages.last % [])
175
+ end
176
+
177
+ # log
178
+ debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
179
+ Syslog.debug(debug_message)
180
+ LOG.log(watch, :debug, debug_message)
181
+
182
+ messages
183
+ end
184
+
185
+ def self.dest_desc(metric, condition)
186
+ if condition.transition
187
+ {true => condition.transition}.inspect
188
+ else
189
+ if metric.destination
190
+ metric.destination.inspect
191
+ else
192
+ 'none'
119
193
  end
120
194
  end
121
195
  end
196
+
197
+ def self.notify(condition, message)
198
+ spec = Contact.normalize(condition.notify)
199
+ unmatched = []
200
+
201
+ # resolve contacts
202
+ resolved_contacts =
203
+ spec[:contacts].inject([]) do |acc, contact_name_or_group|
204
+ cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
205
+ unmatched << contact_name_or_group if cons.empty?
206
+ acc += cons
207
+ acc
208
+ end
209
+
210
+ # warn about unmatched contacts
211
+ unless unmatched.empty?
212
+ msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
213
+ LOG.log(condition.watch, :warn, msg)
214
+ end
215
+
216
+ # notify each contact
217
+ resolved_contacts.each do |c|
218
+ host = `hostname`.chomp rescue 'none'
219
+ c.notify(message, Time.now, spec[:priority], spec[:category], host)
220
+
221
+ msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
222
+
223
+ Syslog.debug(msg)
224
+ LOG.log(condition.watch, :info, msg % [])
225
+ end
226
+ end
122
227
  end
123
228
 
124
229
  end
data/lib/god/logger.rb CHANGED
@@ -4,21 +4,38 @@ module God
4
4
  attr_accessor :logs
5
5
 
6
6
  def initialize
7
- super(STDOUT)
7
+ super($stdout)
8
8
  self.logs = {}
9
9
  @mutex = Mutex.new
10
+ @capture = nil
11
+ end
12
+
13
+ def start_capture
14
+ @mutex.synchronize do
15
+ @capture = StringIO.new
16
+ end
17
+ end
18
+
19
+ def finish_capture
20
+ @mutex.synchronize do
21
+ cap = @capture.string
22
+ @capture = nil
23
+ cap
24
+ end
10
25
  end
11
26
 
12
27
  def log(watch, level, text)
13
28
  # initialize watch log if necessary
14
- self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
29
+ self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
15
30
 
16
- # push onto timeline for the given watch
31
+ # push onto capture and timeline for the given watch
17
32
  buf = StringIO.new
18
33
  templog = ::Logger.new(buf)
34
+ templog.level = Logger::INFO
19
35
  templog.send(level, text)
20
36
  @mutex.synchronize do
21
- self.logs[watch.name] << [Time.now, buf.string]
37
+ @capture.puts(buf.string) if @capture
38
+ self.logs[watch.name] << [Time.now, buf.string] if watch
22
39
  end
23
40
  templog.close
24
41
 
data/lib/god/metric.rb CHANGED
@@ -3,7 +3,7 @@ module God
3
3
  class Metric
4
4
  attr_accessor :watch, :destination, :conditions
5
5
 
6
- def initialize(watch, destination)
6
+ def initialize(watch, destination = nil)
7
7
  self.watch = watch
8
8
  self.destination = destination
9
9
  self.conditions = []
@@ -25,9 +25,8 @@ module God
25
25
  # call prepare on the condition
26
26
  c.prepare
27
27
 
28
- # abort if the Condition is invalid, the Condition will have printed
29
- # out its own error messages by now
30
- unless c.valid?
28
+ # test generic and specific validity
29
+ unless Condition.valid?(c) && c.valid?
31
30
  abort "Exiting on invalid condition"
32
31
  end
33
32
 
data/lib/god/process.rb CHANGED
@@ -7,8 +7,11 @@ module God
7
7
  attr_accessor :name, :uid, :gid, :log, :start, :stop, :restart
8
8
 
9
9
  def initialize
10
+ self.log = '/dev/null'
11
+
10
12
  @pid_file = nil
11
- @tracking_pid = false
13
+ @tracking_pid = true
14
+ @user_log = false
12
15
  end
13
16
 
14
17
  def alive?
@@ -20,18 +23,24 @@ module God
20
23
  end
21
24
  end
22
25
 
26
+ def file_writable?(file)
27
+ pid = fork do
28
+ ::Process::Sys.setgid(Etc.getgrnam(self.gid).gid) if self.gid
29
+ ::Process::Sys.setuid(Etc.getpwnam(self.uid).uid) if self.uid
30
+
31
+ File.writable?(file) ? exit(0) : exit(1)
32
+ end
33
+
34
+ wpid, status = ::Process.waitpid2(pid)
35
+ status.exitstatus == 0 ? true : false
36
+ end
37
+
23
38
  def valid?
24
39
  # determine if we're tracking pid or not
25
40
  self.pid_file
26
41
 
27
42
  valid = true
28
43
 
29
- # a name must be specified
30
- if self.name.nil?
31
- valid = false
32
- LOG.log(self, :error, "No name was specified")
33
- end
34
-
35
44
  # a start command must be specified
36
45
  if self.start.nil?
37
46
  valid = false
@@ -44,12 +53,6 @@ module God
44
53
  LOG.log(self, :error, "No stop command was specified")
45
54
  end
46
55
 
47
- # self-daemonizing processes cannot specify log
48
- if !@tracking_pid && self.log
49
- valid = false
50
- LOG.log(self, :error, "Self-daemonizing processes cannot specify a log file")
51
- end
52
-
53
56
  # uid must exist if specified
54
57
  if self.uid
55
58
  begin
@@ -70,22 +73,55 @@ module God
70
73
  end
71
74
  end
72
75
 
76
+ # pid dir must exist if specified
77
+ if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
78
+ valid = false
79
+ LOG.log(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
80
+ end
81
+
82
+ # pid dir must be writable if specified
83
+ if !@tracking_pid && !file_writable?(File.dirname(self.pid_file))
84
+ valid = false
85
+ LOG.log(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
86
+ end
87
+
88
+ # log dir must exist
89
+ if !File.exist?(File.dirname(self.log))
90
+ valid = false
91
+ LOG.log(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
92
+ end
93
+
94
+ # log file or dir must be writable
95
+ if File.exist?(self.log)
96
+ unless file_writable?(self.log)
97
+ valid = false
98
+ LOG.log(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
99
+ end
100
+ else
101
+ unless file_writable?(File.dirname(self.log))
102
+ valid = false
103
+ LOG.log(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
104
+ end
105
+ end
106
+
73
107
  valid
74
108
  end
75
109
 
76
110
  # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
77
111
  # No really, trust me. Use the instance variable.
78
112
  def pid_file=(value)
79
- @tracking_pid = false
113
+ # if value is nil, do the right thing
114
+ if value
115
+ @tracking_pid = false
116
+ else
117
+ @tracking_pid = true
118
+ end
119
+
80
120
  @pid_file = value
81
121
  end
82
122
 
83
123
  def pid_file
84
- if @pid_file.nil?
85
- @tracking_pid = true
86
- @pid_file = default_pid_file
87
- end
88
- @pid_file
124
+ @pid_file ||= default_pid_file
89
125
  end
90
126
 
91
127
  def start!
@@ -100,6 +136,21 @@ module God
100
136
  call_action(:restart)
101
137
  end
102
138
 
139
+ def spawn(command)
140
+ fork do
141
+ ::Process.setsid
142
+ ::Process::Sys.setgid(Etc.getgrnam(self.gid).gid) if self.gid
143
+ ::Process::Sys.setuid(Etc.getpwnam(self.uid).uid) if self.uid
144
+ Dir.chdir "/"
145
+ $0 = command
146
+ STDIN.reopen "/dev/null"
147
+ STDOUT.reopen self.log, "a"
148
+ STDERR.reopen STDOUT
149
+
150
+ exec command unless command.empty?
151
+ end
152
+ end
153
+
103
154
  def call_action(action)
104
155
  command = send(action)
105
156
 
@@ -110,7 +161,7 @@ module God
110
161
  LOG.log(self, :info, "#{self.name} stop: default lambda killer")
111
162
 
112
163
  ::Process.kill('HUP', pid) rescue nil
113
-
164
+
114
165
  # Poll to see if it's dead
115
166
  5.times do
116
167
  begin
@@ -119,44 +170,38 @@ module God
119
170
  # It died. Good.
120
171
  return
121
172
  end
122
-
173
+
123
174
  sleep 1
124
175
  end
125
-
176
+
126
177
  ::Process.kill('KILL', pid) rescue nil
127
178
  end
128
179
  end
129
180
 
130
181
  if command.kind_of?(String)
131
- # string command
132
- # fork/exec to setuid/gid
133
- r, w = IO.pipe
134
- opid = fork do
135
- STDOUT.reopen(w)
136
- r.close
137
- pid = fork do
138
- ::Process.setsid
139
- ::Process::Sys.setgid(Etc.getgrnam(self.gid).gid) if self.gid
140
- ::Process::Sys.setuid(Etc.getpwnam(self.uid).uid) if self.uid
141
- Dir.chdir "/"
142
- $0 = command
143
- STDIN.reopen "/dev/null"
144
- if self.log
145
- STDOUT.reopen self.log, "a"
146
- else
147
- STDOUT.reopen "/dev/null", "a"
148
- end
149
- STDERR.reopen STDOUT
150
-
151
- exec command unless command.empty?
182
+ pid = nil
183
+
184
+ if @tracking_pid
185
+ # double fork god-daemonized processes
186
+ # we don't want to wait for them to finish
187
+ r, w = IO.pipe
188
+ opid = fork do
189
+ STDOUT.reopen(w)
190
+ r.close
191
+ pid = self.spawn(command)
192
+ puts pid.to_s
152
193
  end
153
- puts pid.to_s
194
+
195
+ ::Process.waitpid(opid, 0)
196
+ w.close
197
+ pid = r.gets.chomp
198
+ else
199
+ # single fork self-daemonizing processes
200
+ # we want to wait for them to finish
201
+ pid = self.spawn(command)
202
+ ::Process.waitpid(pid, 0)
154
203
  end
155
204
 
156
- ::Process.waitpid(opid, 0)
157
- w.close
158
- pid = r.gets.chomp
159
-
160
205
  if @tracking_pid or (@pid_file.nil? and WRITES_PID.include?(action))
161
206
  File.open(default_pid_file, 'w') do |f|
162
207
  f.write pid
@@ -165,7 +210,6 @@ module God
165
210
  @tracking_pid = true
166
211
  @pid_file = default_pid_file
167
212
  end
168
-
169
213
  elsif command.kind_of?(Proc)
170
214
  # lambda command
171
215
  command.call
data/lib/god/socket.rb ADDED
@@ -0,0 +1,60 @@
1
+ require 'drb'
2
+
3
+ # The God::Server oversees the DRb server which dishes out info on this God daemon.
4
+
5
+ module God
6
+
7
+ class Socket
8
+ attr_reader :port
9
+
10
+ def self.socket_file(port)
11
+ "/tmp/god.#{port}.sock"
12
+ end
13
+
14
+ def self.socket(port)
15
+ "drbunix://#{self.socket_file(port)}"
16
+ end
17
+
18
+ def socket_file
19
+ self.class.socket_file(@port)
20
+ end
21
+
22
+ def socket
23
+ self.class.socket(@port)
24
+ end
25
+
26
+ def initialize(port = nil)
27
+ @port = port
28
+ start
29
+ end
30
+
31
+ def ping
32
+ true
33
+ end
34
+
35
+ def method_missing(*args, &block)
36
+ God.send(*args, &block)
37
+ end
38
+
39
+ private
40
+
41
+ def start
42
+ begin
43
+ @drb ||= DRb.start_service(self.socket, self)
44
+ LOG.log(nil, :info, "Started on #{DRb.uri}")
45
+ rescue Errno::EADDRINUSE
46
+ DRb.start_service
47
+ server = DRbObject.new(nil, self.socket)
48
+
49
+ begin
50
+ server.ping
51
+ abort "Socket #{self.socket} already in use by another instance of god"
52
+ rescue
53
+ File.delete(self.socket_file) rescue nil
54
+ @drb ||= DRb.start_service(self.socket, self)
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ end