god 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,38 @@
1
1
  module God
2
2
  module Conditions
3
3
 
4
+ # Condition Symbol :memory_usage
5
+ # Type: Poll
6
+ #
7
+ # Trigger when the resident memory of a process is above a specified limit.
8
+ #
9
+ # Paramaters
10
+ # Required
11
+ # +pid_file+ is the pid file of the process in question. Automatically
12
+ # populated for Watches.
13
+ # +above+ is the amount of resident memory (in kilobytes) above which
14
+ # the condition should trigger. You can also use the sugar
15
+ # methods #kilobytes, #megabytes, and #gigabytes to clarify
16
+ # this amount (see examples).
17
+ #
18
+ # Examples
19
+ #
20
+ # Trigger if the process is using more than 100 megabytes of resident
21
+ # memory (from a Watch):
22
+ #
23
+ # on.condition(:memory_usage) do |c|
24
+ # c.above = 100.megabytes
25
+ # end
26
+ #
27
+ # Non-Watch Tasks must specify a PID file:
28
+ #
29
+ # on.condition(:memory_usage) do |c|
30
+ # c.above = 100.megabytes
31
+ # c.pid_file = "/var/run/mongrel.3000.pid"
32
+ # end
4
33
  class MemoryUsage < PollCondition
5
34
  attr_accessor :above, :times
6
-
35
+
7
36
  def initialize
8
37
  super
9
38
  self.above = nil
@@ -1,6 +1,27 @@
1
1
  module God
2
2
  module Conditions
3
3
 
4
+ # Condition Symbol :process_exits
5
+ # Type: Event
6
+ #
7
+ # Trigger when a process exits.
8
+ #
9
+ # Paramaters
10
+ # Required
11
+ # +pid_file+ is the pid file of the process in question. Automatically
12
+ # populated for Watches.
13
+ #
14
+ # Examples
15
+ #
16
+ # Trigger if process exits (from a Watch):
17
+ #
18
+ # on.condition(:process_exits)
19
+ #
20
+ # Trigger if process exits:
21
+ #
22
+ # on.condition(:process_exits) do |c|
23
+ # c.pid_file = "/var/run/mongrel.3000.pid"
24
+ # end
4
25
  class ProcessExits < EventCondition
5
26
  def initialize
6
27
  self.info = "process exited"
@@ -16,7 +37,8 @@ module God
16
37
  pid = File.read(self.watch.pid_file).strip.to_i
17
38
 
18
39
  begin
19
- EventHandler.register(pid, :proc_exit) do
40
+ EventHandler.register(pid, :proc_exit) do |extra|
41
+ self.info = "process exited #{extra.inspect}"
20
42
  Hub.trigger(self)
21
43
  end
22
44
  rescue StandardError
@@ -29,7 +51,7 @@ module God
29
51
  pid = File.read(self.watch.pid_file).strip.to_i
30
52
  EventHandler.deregister(pid, :proc_exit)
31
53
  else
32
- LOG.log(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
54
+ applog(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
33
55
  end
34
56
  end
35
57
  end
@@ -1,6 +1,38 @@
1
1
  module God
2
2
  module Conditions
3
3
 
4
+ # Condition Symbol :process_running
5
+ # Type: Poll
6
+ #
7
+ # Trigger when a process is running or not running depending on attributes.
8
+ #
9
+ # Paramaters
10
+ # Required
11
+ # +pid_file+ is the pid file of the process in question. Automatically
12
+ # populated for Watches.
13
+ # +running" specifies whether you want to trigger if the process is
14
+ # running (true) or whether it is not running (false)
15
+ #
16
+ # Examples
17
+ #
18
+ # Trigger if process IS NOT running (from a Watch):
19
+ #
20
+ # on.condition(:process_running) do |c|
21
+ # c.running = false
22
+ # end
23
+ #
24
+ # Trigger if process IS running (from a Watch):
25
+ #
26
+ # on.condition(:process_running) do |c|
27
+ # c.running = true
28
+ # end
29
+ #
30
+ # Non-Watch Tasks must specify a PID file:
31
+ #
32
+ # on.condition(:process_running) do |c|
33
+ # c.running = false
34
+ # c.pid_file = "/var/run/mongrel.3000.pid"
35
+ # end
4
36
  class ProcessRunning < PollCondition
5
37
  attr_accessor :running
6
38
 
@@ -39,10 +39,12 @@ module God
39
39
  end
40
40
 
41
41
  def self.complain(text, c = nil)
42
- msg = text
42
+ watch = c.watch rescue nil
43
+ msg = ""
44
+ msg += "#{watch.name}: " if watch
45
+ msg += text
43
46
  msg += " for #{c.friendly_name}" if c
44
- Syslog.err(msg)
45
- puts msg
47
+ applog(watch, :error, msg)
46
48
  false
47
49
  end
48
50
 
@@ -50,8 +50,8 @@ module God
50
50
  end
51
51
  end
52
52
 
53
- def self.call(pid, event)
54
- @@actions[pid][event].call if watching_pid?(pid) && @@actions[pid][event]
53
+ def self.call(pid, event, extra_data = {})
54
+ @@actions[pid][event].call(extra_data) if watching_pid?(pid) && @@actions[pid][event]
55
55
  end
56
56
 
57
57
  def self.watching_pid?(pid)
@@ -83,8 +83,7 @@ module God
83
83
  watch.move(dest)
84
84
  rescue EventRegistrationFailedError
85
85
  msg = watch.name + ' Event registration failed, moving back to previous state'
86
- Syslog.debug(msg)
87
- LOG.log(watch, :info, msg)
86
+ applog(watch, :info, msg)
88
87
 
89
88
  dest = watch.state
90
89
  retry
@@ -95,11 +94,10 @@ module God
95
94
  end
96
95
  end
97
96
  end
98
- rescue => e
97
+ rescue Exception => e
99
98
  message = format("Unhandled exception (%s): %s\n%s",
100
99
  e.class, e.message, e.backtrace.join("\n"))
101
- Syslog.crit message
102
- abort message
100
+ applog(nil, :fatal, message)
103
101
  end
104
102
  end
105
103
  end
@@ -136,11 +134,10 @@ module God
136
134
  end
137
135
  end
138
136
  end
139
- rescue => e
137
+ rescue Exception => e
140
138
  message = format("Unhandled exception (%s): %s\n%s",
141
139
  e.class, e.message, e.backtrace.join("\n"))
142
- Syslog.crit message
143
- abort message
140
+ applog(nil, :fatal, message)
144
141
  end
145
142
  end
146
143
  end
@@ -165,19 +162,16 @@ module God
165
162
  if condition.info
166
163
  Array(condition.info).each do |condition_info|
167
164
  messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
168
- Syslog.debug(messages.last)
169
- LOG.log(watch, :info, messages.last % [])
165
+ applog(watch, :info, messages.last)
170
166
  end
171
167
  else
172
168
  messages << "#{watch.name} #{status} (#{condition.base_name})"
173
- Syslog.debug(messages.last)
174
- LOG.log(watch, :info, messages.last % [])
169
+ applog(watch, :info, messages.last)
175
170
  end
176
171
 
177
172
  # log
178
173
  debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
179
- Syslog.debug(debug_message)
180
- LOG.log(watch, :debug, debug_message)
174
+ applog(watch, :debug, debug_message)
181
175
 
182
176
  messages
183
177
  end
@@ -210,18 +204,17 @@ module God
210
204
  # warn about unmatched contacts
211
205
  unless unmatched.empty?
212
206
  msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
213
- LOG.log(condition.watch, :warn, msg)
207
+ applog(condition.watch, :warn, msg)
214
208
  end
215
209
 
216
210
  # notify each contact
217
211
  resolved_contacts.each do |c|
218
212
  host = `hostname`.chomp rescue 'none'
219
213
  c.notify(message, Time.now, spec[:priority], spec[:category], host)
220
-
214
+
221
215
  msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
222
-
223
- Syslog.debug(msg)
224
- LOG.log(condition.watch, :info, msg % [])
216
+
217
+ applog(condition.watch, :info, msg % [])
225
218
  end
226
219
  end
227
220
  end
@@ -1,6 +1,12 @@
1
1
  module God
2
2
 
3
3
  class Logger < ::Logger
4
+ SYSLOG_EQUIVALENTS = {:fatal => :crit,
5
+ :error => :err,
6
+ :warn => :debug,
7
+ :info => :debug,
8
+ :debug => :debug}
9
+
4
10
  attr_accessor :logs
5
11
 
6
12
  def initialize
@@ -32,7 +38,7 @@ module God
32
38
  buf = StringIO.new
33
39
  templog = ::Logger.new(buf)
34
40
  templog.level = Logger::INFO
35
- templog.send(level, text)
41
+ templog.send(level, text % [])
36
42
  @mutex.synchronize do
37
43
  @capture.puts(buf.string) if @capture
38
44
  self.logs[watch.name] << [Time.now, buf.string] if watch
@@ -40,7 +46,10 @@ module God
40
46
  templog.close
41
47
 
42
48
  # send to regular logger
43
- self.send(level, text)
49
+ self.send(level, text % [])
50
+
51
+ # send to syslog
52
+ Syslog.send(SYSLOG_EQUIVALENTS[level], text)
44
53
  end
45
54
 
46
55
  def watch_log_since(watch_name, since)
@@ -44,13 +44,13 @@ module God
44
44
  # a start command must be specified
45
45
  if self.start.nil?
46
46
  valid = false
47
- LOG.log(self, :error, "No start command was specified")
47
+ applog(self, :error, "No start command was specified")
48
48
  end
49
49
 
50
50
  # self-daemonizing processes must specify a stop command
51
51
  if !@tracking_pid && self.stop.nil?
52
52
  valid = false
53
- LOG.log(self, :error, "No stop command was specified")
53
+ applog(self, :error, "No stop command was specified")
54
54
  end
55
55
 
56
56
  # uid must exist if specified
@@ -59,7 +59,7 @@ module God
59
59
  Etc.getpwnam(self.uid)
60
60
  rescue ArgumentError
61
61
  valid = false
62
- LOG.log(self, :error, "UID for '#{self.uid}' does not exist")
62
+ applog(self, :error, "UID for '#{self.uid}' does not exist")
63
63
  end
64
64
  end
65
65
 
@@ -69,38 +69,38 @@ module God
69
69
  Etc.getgrnam(self.gid)
70
70
  rescue ArgumentError
71
71
  valid = false
72
- LOG.log(self, :error, "GID for '#{self.gid}' does not exist")
72
+ applog(self, :error, "GID for '#{self.gid}' does not exist")
73
73
  end
74
74
  end
75
75
 
76
76
  # pid dir must exist if specified
77
77
  if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
78
78
  valid = false
79
- LOG.log(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
79
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
80
80
  end
81
81
 
82
82
  # pid dir must be writable if specified
83
- if !@tracking_pid && !file_writable?(File.dirname(self.pid_file))
83
+ if !@tracking_pid && File.exist?(File.dirname(self.pid_file)) && !file_writable?(File.dirname(self.pid_file))
84
84
  valid = false
85
- LOG.log(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
85
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
86
86
  end
87
87
 
88
88
  # log dir must exist
89
89
  if !File.exist?(File.dirname(self.log))
90
90
  valid = false
91
- LOG.log(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
91
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
92
92
  end
93
93
 
94
94
  # log file or dir must be writable
95
95
  if File.exist?(self.log)
96
96
  unless file_writable?(self.log)
97
97
  valid = false
98
- LOG.log(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
98
+ applog(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
99
99
  end
100
100
  else
101
101
  unless file_writable?(File.dirname(self.log))
102
102
  valid = false
103
- LOG.log(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
103
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
104
104
  end
105
105
  end
106
106
 
@@ -147,6 +147,9 @@ module God
147
147
  STDOUT.reopen self.log, "a"
148
148
  STDERR.reopen STDOUT
149
149
 
150
+ # close any other file descriptors
151
+ 3.upto(256){|fd| IO::new(fd).close rescue nil}
152
+
150
153
  exec command unless command.empty?
151
154
  end
152
155
  end
@@ -158,7 +161,7 @@ module God
158
161
  pid = File.read(self.pid_file).strip.to_i
159
162
  name = self.name
160
163
  command = lambda do
161
- LOG.log(self, :info, "#{self.name} stop: default lambda killer")
164
+ applog(self, :info, "#{self.name} stop: default lambda killer")
162
165
 
163
166
  ::Process.kill('HUP', pid) rescue nil
164
167
 
@@ -185,16 +188,22 @@ module God
185
188
  # double fork god-daemonized processes
186
189
  # we don't want to wait for them to finish
187
190
  r, w = IO.pipe
188
- opid = fork do
189
- STDOUT.reopen(w)
190
- r.close
191
- pid = self.spawn(command)
192
- puts pid.to_s
191
+ begin
192
+ opid = fork do
193
+ STDOUT.reopen(w)
194
+ r.close
195
+ pid = self.spawn(command)
196
+ puts pid.to_s # send pid back to forker
197
+ end
198
+
199
+ ::Process.waitpid(opid, 0)
200
+ w.close
201
+ pid = r.gets.chomp
202
+ ensure
203
+ # make sure the file descriptors get closed no matter what
204
+ r.close rescue nil
205
+ w.close rescue nil
193
206
  end
194
-
195
- ::Process.waitpid(opid, 0)
196
- w.close
197
- pid = r.gets.chomp
198
207
  else
199
208
  # single fork self-daemonizing processes
200
209
  # we want to wait for them to finish
@@ -1,57 +1,93 @@
1
1
  require 'drb'
2
2
 
3
- # The God::Server oversees the DRb server which dishes out info on this God daemon.
4
-
5
3
  module God
6
4
 
5
+ # The God::Server oversees the DRb server which dishes out info on this God daemon.
7
6
  class Socket
8
7
  attr_reader :port
9
8
 
9
+ # The location of the socket for a given port
10
+ # +port+ is the port number
11
+ #
12
+ # Returns String (file location)
10
13
  def self.socket_file(port)
11
14
  "/tmp/god.#{port}.sock"
12
15
  end
13
16
 
17
+ # The address of the socket for a given port
18
+ # +port+ is the port number
19
+ #
20
+ # Returns String (drb address)
14
21
  def self.socket(port)
15
22
  "drbunix://#{self.socket_file(port)}"
16
23
  end
17
24
 
25
+ # The location of the socket for this Server
26
+ #
27
+ # Returns String (file location)
18
28
  def socket_file
19
29
  self.class.socket_file(@port)
20
30
  end
21
31
 
32
+ # The address of the socket for this Server
33
+ #
34
+ # Returns String (drb address)
22
35
  def socket
23
36
  self.class.socket(@port)
24
37
  end
25
38
 
39
+ # Create a new Server and star the DRb server
40
+ # +port+ is the port on which to start the DRb service (default nil)
26
41
  def initialize(port = nil)
27
42
  @port = port
28
43
  start
29
44
  end
30
45
 
46
+ # Returns true
31
47
  def ping
32
48
  true
33
49
  end
34
50
 
51
+ # Forward API calls to God
52
+ #
53
+ # Returns whatever the forwarded call returns
35
54
  def method_missing(*args, &block)
36
55
  God.send(*args, &block)
37
56
  end
38
57
 
58
+ # Stop the DRb server and delete the socket file
59
+ #
60
+ # Returns nothing
61
+ def stop
62
+ DRb.stop_service
63
+ FileUtils.rm_f(self.socket_file)
64
+ end
65
+
39
66
  private
40
67
 
68
+ # Start the DRb server. Abort if there is already a running god instance
69
+ # on the socket.
70
+ #
71
+ # Returns nothing
41
72
  def start
42
73
  begin
43
74
  @drb ||= DRb.start_service(self.socket, self)
44
- LOG.log(nil, :info, "Started on #{DRb.uri}")
75
+ applog(nil, :info, "Started on #{DRb.uri}")
45
76
  rescue Errno::EADDRINUSE
77
+ applog(nil, :info, "Socket already in use")
46
78
  DRb.start_service
47
79
  server = DRbObject.new(nil, self.socket)
48
80
 
49
81
  begin
50
- server.ping
82
+ Timeout.timeout(5) do
83
+ server.ping
84
+ end
51
85
  abort "Socket #{self.socket} already in use by another instance of god"
52
- rescue
86
+ rescue StandardError, Timeout::Error
87
+ applog(nil, :info, "Socket is stale, reopening")
53
88
  File.delete(self.socket_file) rescue nil
54
89
  @drb ||= DRb.start_service(self.socket, self)
90
+ applog(nil, :info, "Started on #{DRb.uri}")
55
91
  end
56
92
  end
57
93
  end