god 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/History.txt +67 -1
  2. data/Manifest.txt +3 -4
  3. data/Rakefile +1 -1
  4. data/bin/god +19 -1
  5. data/lib/god.rb +86 -49
  6. data/lib/god/cli/command.rb +7 -1
  7. data/lib/god/cli/run.rb +58 -0
  8. data/lib/god/condition.rb +6 -2
  9. data/lib/god/conditions/cpu_usage.rb +7 -6
  10. data/lib/god/conditions/http_response_code.rb +5 -1
  11. data/lib/god/conditions/memory_usage.rb +7 -6
  12. data/lib/god/conditions/process_exits.rb +15 -10
  13. data/lib/god/conditions/process_running.rb +17 -13
  14. data/lib/god/diagnostics.rb +37 -0
  15. data/lib/god/driver.rb +108 -0
  16. data/lib/god/event_handler.rb +41 -1
  17. data/lib/god/logger.rb +69 -19
  18. data/lib/god/metric.rb +2 -2
  19. data/lib/god/process.rb +84 -27
  20. data/lib/god/task.rb +286 -29
  21. data/lib/god/timeline.rb +20 -31
  22. data/lib/god/watch.rb +26 -15
  23. data/test/configs/child_events/child_events.god +0 -5
  24. data/test/configs/child_polls/simple_server.rb +1 -1
  25. data/test/configs/daemon_events/simple_server_stop.rb +2 -0
  26. data/test/configs/stress/stress.god +1 -1
  27. data/test/configs/test.rb +12 -28
  28. data/test/test_condition.rb +8 -0
  29. data/test/test_conditions_http_response_code.rb +5 -5
  30. data/test/test_conditions_process_running.rb +6 -4
  31. data/test/test_driver.rb +11 -0
  32. data/test/test_event_handler.rb +7 -0
  33. data/test/test_god.rb +63 -62
  34. data/test/test_metric.rb +0 -16
  35. data/test/test_process.rb +29 -1
  36. data/test/test_task.rb +177 -1
  37. data/test/test_timeline.rb +2 -1
  38. data/test/test_watch.rb +24 -6
  39. metadata +6 -8
  40. data/lib/god/hub.rb +0 -222
  41. data/lib/god/timer.rb +0 -87
  42. data/test/test_hub.rb +0 -240
  43. data/test/test_timer.rb +0 -69
@@ -1,7 +1,7 @@
1
1
  module God
2
2
 
3
3
  class Condition < Behavior
4
- attr_accessor :transition, :notify, :info
4
+ attr_accessor :transition, :notify, :info, :phase
5
5
 
6
6
  # Generate a Condition of the given kind. The proper class if found by camel casing the
7
7
  # kind (which is given as an underscored symbol).
@@ -14,6 +14,10 @@ module God
14
14
  abort "Condition '#{c.class.name}' must subclass God::PollCondition, God::EventCondition, or God::TriggerCondition"
15
15
  end
16
16
 
17
+ if !EventHandler.loaded? && c.kind_of?(EventCondition)
18
+ abort "Condition '#{c.class.name}' requires an event system but none has been loaded"
19
+ end
20
+
17
21
  c.watch = watch
18
22
  c
19
23
  rescue NameError
@@ -77,7 +81,7 @@ module God
77
81
  end
78
82
 
79
83
  def trigger
80
- Hub.trigger(self)
84
+ self.watch.trigger(self)
81
85
  end
82
86
 
83
87
  def register
@@ -29,7 +29,7 @@ module God
29
29
  # c.pid_file = "/var/run/mongrel.3000.pid"
30
30
  # end
31
31
  class CpuUsage < PollCondition
32
- attr_accessor :above, :times
32
+ attr_accessor :above, :times, :pid_file
33
33
 
34
34
  def initialize
35
35
  super
@@ -49,18 +49,19 @@ module God
49
49
  @timeline.clear
50
50
  end
51
51
 
52
+ def pid
53
+ self.watch.pid || File.read(self.pid_file).strip.to_i
54
+ end
55
+
52
56
  def valid?
53
57
  valid = true
54
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
58
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
55
59
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
56
60
  valid
57
61
  end
58
62
 
59
63
  def test
60
- return false unless File.exist?(self.watch.pid_file)
61
-
62
- pid = File.read(self.watch.pid_file).strip
63
- process = System::Process.new(pid)
64
+ process = System::Process.new(self.pid)
64
65
  @timeline.push(process.percent_cpu)
65
66
 
66
67
  history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
@@ -108,7 +108,7 @@ module God
108
108
 
109
109
  Net::HTTP.start(self.host, self.port) do |http|
110
110
  http.read_timeout = self.timeout
111
- response = http.head(self.path)
111
+ response = http.get(self.path)
112
112
  end
113
113
 
114
114
  actual_response_code = response.code.to_i
@@ -121,6 +121,10 @@ module God
121
121
  end
122
122
  rescue Errno::ECONNREFUSED
123
123
  self.code_is ? fail('Refused') : pass('Refused')
124
+ rescue Errno::ECONNRESET
125
+ self.code_is ? fail('Reset') : pass('Reset')
126
+ rescue EOFError
127
+ self.code_is ? fail('EOF') : pass('EOF')
124
128
  rescue Timeout::Error
125
129
  self.code_is ? fail('Timeout') : pass('Timeout')
126
130
  end
@@ -31,7 +31,7 @@ module God
31
31
  # c.pid_file = "/var/run/mongrel.3000.pid"
32
32
  # end
33
33
  class MemoryUsage < PollCondition
34
- attr_accessor :above, :times
34
+ attr_accessor :above, :times, :pid_file
35
35
 
36
36
  def initialize
37
37
  super
@@ -51,18 +51,19 @@ module God
51
51
  @timeline.clear
52
52
  end
53
53
 
54
+ def pid
55
+ self.watch.pid || File.read(self.pid_file).strip.to_i
56
+ end
57
+
54
58
  def valid?
55
59
  valid = true
56
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
60
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
57
61
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
58
62
  valid
59
63
  end
60
64
 
61
65
  def test
62
- return false unless File.exist?(self.watch.pid_file)
63
-
64
- pid = File.read(self.watch.pid_file).strip
65
- process = System::Process.new(pid)
66
+ process = System::Process.new(self.pid)
66
67
  @timeline.push(process.memory)
67
68
 
68
69
  history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
@@ -28,30 +28,35 @@ module God
28
28
  end
29
29
 
30
30
  def valid?
31
- valid = true
32
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
33
- valid
31
+ true
34
32
  end
35
-
33
+
36
34
  def register
37
- pid = File.read(self.watch.pid_file).strip.to_i
35
+ pid = self.watch.pid
38
36
 
39
37
  begin
40
38
  EventHandler.register(pid, :proc_exit) do |extra|
41
- self.info = "process exited #{extra.inspect}"
42
- Hub.trigger(self)
39
+ formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
40
+ self.info = "process #{pid} exited#{formatted_extra}"
41
+ self.watch.trigger(self)
43
42
  end
43
+
44
+ msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
45
+ applog(self.watch, :info, msg)
44
46
  rescue StandardError
45
47
  raise EventRegistrationFailedError.new
46
48
  end
47
49
  end
48
50
 
49
51
  def deregister
50
- if File.exist?(self.watch.pid_file)
51
- pid = File.read(self.watch.pid_file).strip.to_i
52
+ pid = self.watch.pid
53
+ if pid
52
54
  EventHandler.deregister(pid, :proc_exit)
55
+
56
+ msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
57
+ applog(self.watch, :info, msg)
53
58
  else
54
- applog(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
59
+ applog(self.watch, :error, "#{self.watch.name} could not deregister: no cached PID or PID file #{self.watch.pid_file} (#{self.base_name})")
55
60
  end
56
61
  end
57
62
  end
@@ -34,37 +34,41 @@ module God
34
34
  # c.pid_file = "/var/run/mongrel.3000.pid"
35
35
  # end
36
36
  class ProcessRunning < PollCondition
37
- attr_accessor :running
37
+ attr_accessor :running, :pid_file
38
+
39
+ def pid
40
+ self.watch.pid || File.read(self.pid_file).strip.to_i
41
+ end
38
42
 
39
43
  def valid?
40
44
  valid = true
41
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
45
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
42
46
  valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
43
47
  valid
44
48
  end
45
-
49
+
46
50
  def test
47
51
  self.info = []
48
52
 
49
- unless File.exist?(self.watch.pid_file)
50
- self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
51
- return !self.running
52
- end
53
+ # unless File.exist?(self.watch.pid_file)
54
+ # self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
55
+ # return !self.running
56
+ # end
53
57
 
54
- pid = File.read(self.watch.pid_file).strip
55
- active = System::Process.new(pid).exists?
58
+ pid = self.watch.pid
59
+ active = pid && System::Process.new(pid).exists?
56
60
 
57
61
  if (self.running && active)
58
- self.info << "process is running"
62
+ self.info.concat(["process is running"])
59
63
  true
60
64
  elsif (!self.running && !active)
61
- self.info << "process is not running"
65
+ self.info.concat(["process is not running"])
62
66
  true
63
67
  else
64
68
  if self.running
65
- self.info << "process is not running"
69
+ self.info.concat(["process is not running"])
66
70
  else
67
- self.info << "process is running"
71
+ self.info.concat(["process is running"])
68
72
  end
69
73
  false
70
74
  end
@@ -0,0 +1,37 @@
1
+ def start_dike
2
+ require 'dike'
3
+ Thread.new do
4
+ Dike.logfactory File.join(File.dirname(__FILE__), *%w[.. .. logs])
5
+ loop do
6
+ Dike.finger
7
+ sleep(1)
8
+ end
9
+ end
10
+ end
11
+
12
+ class BleakHouseDiagnostic
13
+ LOG_FILE = File.join(File.dirname(__FILE__), *%w[.. .. logs bleak.log])
14
+
15
+ class << self
16
+ attr_accessor :logger
17
+ end
18
+
19
+ def self.install
20
+ require 'bleak_house'
21
+ self.logger = BleakHouse::Logger.new
22
+ File.delete(LOG_FILE) rescue nil
23
+ end
24
+
25
+ def self.snapshot
26
+ self.logger.snapshot(LOG_FILE, "timer", false) if self.logger
27
+ end
28
+
29
+ def self.spin(delay = 1)
30
+ Thread.new do
31
+ loop do
32
+ self.snapshot
33
+ sleep(delay)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,108 @@
1
+ module God
2
+
3
+ class DriverEvent
4
+ attr_accessor :condition, :at
5
+
6
+ # Instantiate a new TimerEvent that will be triggered after the specified delay
7
+ # +condition+ is the Condition
8
+ # +delay+ is the number of seconds from now at which to trigger
9
+ #
10
+ # Returns TimerEvent
11
+ def initialize(condition, delay)
12
+ self.condition = condition
13
+ self.at = Time.now + delay
14
+ end
15
+
16
+ def due?
17
+ Time.now >= self.at
18
+ end
19
+ end # DriverEvent
20
+
21
+ class Driver
22
+ attr_reader :thread
23
+
24
+ INTERVAL = 0.25
25
+
26
+ # Instantiate a new Driver and start the scheduler loop to handle events
27
+ # +task+ is the Task this Driver belongs to
28
+ #
29
+ # Returns Driver
30
+ def initialize(task)
31
+ @task = task
32
+ @events = []
33
+ @ops = Queue.new
34
+
35
+ @thread = Thread.new do
36
+ loop do
37
+ begin
38
+ if !@ops.empty?
39
+ self.handle_op
40
+ elsif !@events.empty?
41
+ self.handle_event
42
+ else
43
+ sleep INTERVAL
44
+ end
45
+ rescue Exception => e
46
+ message = format("Unhandled exception in driver loop - (%s): %s\n%s",
47
+ e.class, e.message, e.backtrace.join("\n"))
48
+ applog(nil, :fatal, message)
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ # Handle the next queued operation that was issued asynchronously
55
+ #
56
+ # Returns nothing
57
+ def handle_op
58
+ command = @ops.pop
59
+ @task.send(command[0], *command[1])
60
+ end
61
+
62
+ # Handle the next event (poll condition) that is due
63
+ #
64
+ # Returns nothing
65
+ def handle_event
66
+ if @events.first.due?
67
+ event = @events.shift
68
+ @task.handle_poll(event.condition)
69
+ end
70
+
71
+ # don't sleep if there is a pending event and it is due
72
+ unless @events.first && @events.first.due?
73
+ sleep INTERVAL
74
+ end
75
+ end
76
+
77
+ # Clear all events for this Driver
78
+ #
79
+ # Returns nothing
80
+ def clear_events
81
+ @events.clear
82
+ end
83
+
84
+ # Queue an asynchronous message
85
+ # +name+ is the Symbol name of the operation
86
+ # +args+ is an optional Array of arguments
87
+ #
88
+ # Returns nothing
89
+ def message(name, args = [])
90
+ @ops.push([name, args])
91
+ end
92
+
93
+ # Create and schedule a new DriverEvent
94
+ # +condition+ is the Condition
95
+ # +delay+ is the number of seconds to delay (default: interval defined in condition)
96
+ #
97
+ # Returns nothing
98
+ def schedule(condition, delay = condition.interval)
99
+ applog(nil, :debug, "driver schedule #{condition} in #{delay} seconds")
100
+
101
+ @events.concat([DriverEvent.new(condition, delay)])
102
+
103
+ # sort events
104
+ @events.sort! { |x, y| x.at <=> y.at }
105
+ end
106
+ end # Driver
107
+
108
+ end # God
@@ -61,9 +61,49 @@ module God
61
61
  def self.start
62
62
  Thread.new do
63
63
  loop do
64
- @@handler.handle_events
64
+ begin
65
+ @@handler.handle_events
66
+ rescue Exception => e
67
+ message = format("Unhandled exception (%s): %s\n%s",
68
+ e.class, e.message, e.backtrace.join("\n"))
69
+ applog(nil, :fatal, message)
70
+ end
65
71
  end
66
72
  end
73
+
74
+ # do a real test to make sure events are working properly
75
+ @@loaded = self.operational?
76
+ end
77
+
78
+ def self.operational?
79
+ com = [false]
80
+
81
+ Thread.new do
82
+ begin
83
+ event_system = God::EventHandler.event_system
84
+
85
+ pid = fork do
86
+ loop { sleep(1) }
87
+ end
88
+
89
+ self.register(pid, :proc_exit) do
90
+ com[0] = true
91
+ end
92
+
93
+ ::Process.kill('KILL', pid)
94
+
95
+ sleep(0.1)
96
+
97
+ self.deregister(pid, :proc_exit) rescue nil
98
+ rescue => e
99
+ puts e.message
100
+ puts e.backtrace.join("\n")
101
+ end
102
+ end.join
103
+
104
+ sleep(0.1)
105
+
106
+ com.first
67
107
  end
68
108
 
69
109
  end
@@ -9,49 +9,76 @@ module God
9
9
 
10
10
  attr_accessor :logs
11
11
 
12
+ class << self
13
+ attr_accessor :syslog
14
+ end
15
+
16
+ self.syslog ||= true
17
+
18
+ # Instantiate a new Logger object
12
19
  def initialize
13
20
  super($stdout)
14
21
  self.logs = {}
15
22
  @mutex = Mutex.new
16
23
  @capture = nil
24
+ @templogio = StringIO.new
25
+ @templog = ::Logger.new(@templogio)
26
+ @templog.level = Logger::INFO
27
+ load_syslog
17
28
  end
18
29
 
19
- def start_capture
20
- @mutex.synchronize do
21
- @capture = StringIO.new
22
- end
23
- end
24
-
25
- def finish_capture
26
- @mutex.synchronize do
27
- cap = @capture.string
28
- @capture = nil
29
- cap
30
+ # If Logger.syslog is true then attempt to load the syslog bindings. If syslog
31
+ # cannot be loaded, then set Logger.syslog to false and continue.
32
+ #
33
+ # Returns nothing
34
+ def load_syslog
35
+ return unless Logger.syslog
36
+
37
+ begin
38
+ require 'syslog'
39
+
40
+ # Ensure that Syslog is open
41
+ begin
42
+ Syslog.open('god')
43
+ rescue RuntimeError
44
+ Syslog.reopen('god')
45
+ end
46
+ rescue Exception
47
+ Logger.syslog = false
30
48
  end
31
49
  end
32
50
 
51
+ # Log a message
52
+ # +watch+ is the String name of the Watch (may be nil if not Watch is applicable)
53
+ # +level+ is the log level [:debug|:info|:warn|:error|:fatal]
54
+ # +text+ is the String message
55
+ #
56
+ # Returns nothing
33
57
  def log(watch, level, text)
34
58
  # initialize watch log if necessary
35
59
  self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
36
60
 
37
61
  # push onto capture and timeline for the given watch
38
- buf = StringIO.new
39
- templog = ::Logger.new(buf)
40
- templog.level = Logger::INFO
41
- templog.send(level, text % [])
62
+ @templogio.truncate(0)
63
+ @templogio.rewind
64
+ @templog.send(level, text % [])
42
65
  @mutex.synchronize do
43
- @capture.puts(buf.string) if @capture
44
- self.logs[watch.name] << [Time.now, buf.string] if watch
66
+ @capture.puts(@templogio.string.dup) if @capture
67
+ self.logs[watch.name] << [Time.now, @templogio.string.dup] if watch
45
68
  end
46
- templog.close
47
69
 
48
70
  # send to regular logger
49
71
  self.send(level, text % [])
50
72
 
51
73
  # send to syslog
52
- Syslog.send(SYSLOG_EQUIVALENTS[level], text)
74
+ Syslog.send(SYSLOG_EQUIVALENTS[level], text) if Logger.syslog
53
75
  end
54
76
 
77
+ # Get all log output for a given Watch since a certain Time.
78
+ # +watch_name+ is the String name of the Watch
79
+ # +since+ is the Time since which to fetch log lines
80
+ #
81
+ # Returns String
55
82
  def watch_log_since(watch_name, since)
56
83
  # initialize watch log if necessary
57
84
  self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
@@ -65,6 +92,29 @@ module God
65
92
  end.join
66
93
  end
67
94
  end
95
+
96
+ # private
97
+
98
+ # Enable capturing of log
99
+ #
100
+ # Returns nothing
101
+ def start_capture
102
+ @mutex.synchronize do
103
+ @capture = StringIO.new
104
+ end
105
+ end
106
+
107
+ # Disable capturing of log and return what was captured since
108
+ # capturing was enabled with Logger#start_capture
109
+ #
110
+ # Returns String
111
+ def finish_capture
112
+ @mutex.synchronize do
113
+ cap = @capture.string
114
+ @capture = nil
115
+ cap
116
+ end
117
+ end
68
118
  end
69
119
 
70
120
  end