god 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/History.txt +67 -1
  2. data/Manifest.txt +3 -4
  3. data/Rakefile +1 -1
  4. data/bin/god +19 -1
  5. data/lib/god.rb +86 -49
  6. data/lib/god/cli/command.rb +7 -1
  7. data/lib/god/cli/run.rb +58 -0
  8. data/lib/god/condition.rb +6 -2
  9. data/lib/god/conditions/cpu_usage.rb +7 -6
  10. data/lib/god/conditions/http_response_code.rb +5 -1
  11. data/lib/god/conditions/memory_usage.rb +7 -6
  12. data/lib/god/conditions/process_exits.rb +15 -10
  13. data/lib/god/conditions/process_running.rb +17 -13
  14. data/lib/god/diagnostics.rb +37 -0
  15. data/lib/god/driver.rb +108 -0
  16. data/lib/god/event_handler.rb +41 -1
  17. data/lib/god/logger.rb +69 -19
  18. data/lib/god/metric.rb +2 -2
  19. data/lib/god/process.rb +84 -27
  20. data/lib/god/task.rb +286 -29
  21. data/lib/god/timeline.rb +20 -31
  22. data/lib/god/watch.rb +26 -15
  23. data/test/configs/child_events/child_events.god +0 -5
  24. data/test/configs/child_polls/simple_server.rb +1 -1
  25. data/test/configs/daemon_events/simple_server_stop.rb +2 -0
  26. data/test/configs/stress/stress.god +1 -1
  27. data/test/configs/test.rb +12 -28
  28. data/test/test_condition.rb +8 -0
  29. data/test/test_conditions_http_response_code.rb +5 -5
  30. data/test/test_conditions_process_running.rb +6 -4
  31. data/test/test_driver.rb +11 -0
  32. data/test/test_event_handler.rb +7 -0
  33. data/test/test_god.rb +63 -62
  34. data/test/test_metric.rb +0 -16
  35. data/test/test_process.rb +29 -1
  36. data/test/test_task.rb +177 -1
  37. data/test/test_timeline.rb +2 -1
  38. data/test/test_watch.rb +24 -6
  39. metadata +6 -8
  40. data/lib/god/hub.rb +0 -222
  41. data/lib/god/timer.rb +0 -87
  42. data/test/test_hub.rb +0 -240
  43. data/test/test_timer.rb +0 -69
@@ -1,7 +1,7 @@
1
1
  module God
2
2
 
3
3
  class Condition < Behavior
4
- attr_accessor :transition, :notify, :info
4
+ attr_accessor :transition, :notify, :info, :phase
5
5
 
6
6
  # Generate a Condition of the given kind. The proper class if found by camel casing the
7
7
  # kind (which is given as an underscored symbol).
@@ -14,6 +14,10 @@ module God
14
14
  abort "Condition '#{c.class.name}' must subclass God::PollCondition, God::EventCondition, or God::TriggerCondition"
15
15
  end
16
16
 
17
+ if !EventHandler.loaded? && c.kind_of?(EventCondition)
18
+ abort "Condition '#{c.class.name}' requires an event system but none has been loaded"
19
+ end
20
+
17
21
  c.watch = watch
18
22
  c
19
23
  rescue NameError
@@ -77,7 +81,7 @@ module God
77
81
  end
78
82
 
79
83
  def trigger
80
- Hub.trigger(self)
84
+ self.watch.trigger(self)
81
85
  end
82
86
 
83
87
  def register
@@ -29,7 +29,7 @@ module God
29
29
  # c.pid_file = "/var/run/mongrel.3000.pid"
30
30
  # end
31
31
  class CpuUsage < PollCondition
32
- attr_accessor :above, :times
32
+ attr_accessor :above, :times, :pid_file
33
33
 
34
34
  def initialize
35
35
  super
@@ -49,18 +49,19 @@ module God
49
49
  @timeline.clear
50
50
  end
51
51
 
52
+ def pid
53
+ self.watch.pid || File.read(self.pid_file).strip.to_i
54
+ end
55
+
52
56
  def valid?
53
57
  valid = true
54
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
58
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
55
59
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
56
60
  valid
57
61
  end
58
62
 
59
63
  def test
60
- return false unless File.exist?(self.watch.pid_file)
61
-
62
- pid = File.read(self.watch.pid_file).strip
63
- process = System::Process.new(pid)
64
+ process = System::Process.new(self.pid)
64
65
  @timeline.push(process.percent_cpu)
65
66
 
66
67
  history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
@@ -108,7 +108,7 @@ module God
108
108
 
109
109
  Net::HTTP.start(self.host, self.port) do |http|
110
110
  http.read_timeout = self.timeout
111
- response = http.head(self.path)
111
+ response = http.get(self.path)
112
112
  end
113
113
 
114
114
  actual_response_code = response.code.to_i
@@ -121,6 +121,10 @@ module God
121
121
  end
122
122
  rescue Errno::ECONNREFUSED
123
123
  self.code_is ? fail('Refused') : pass('Refused')
124
+ rescue Errno::ECONNRESET
125
+ self.code_is ? fail('Reset') : pass('Reset')
126
+ rescue EOFError
127
+ self.code_is ? fail('EOF') : pass('EOF')
124
128
  rescue Timeout::Error
125
129
  self.code_is ? fail('Timeout') : pass('Timeout')
126
130
  end
@@ -31,7 +31,7 @@ module God
31
31
  # c.pid_file = "/var/run/mongrel.3000.pid"
32
32
  # end
33
33
  class MemoryUsage < PollCondition
34
- attr_accessor :above, :times
34
+ attr_accessor :above, :times, :pid_file
35
35
 
36
36
  def initialize
37
37
  super
@@ -51,18 +51,19 @@ module God
51
51
  @timeline.clear
52
52
  end
53
53
 
54
+ def pid
55
+ self.watch.pid || File.read(self.pid_file).strip.to_i
56
+ end
57
+
54
58
  def valid?
55
59
  valid = true
56
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
60
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
57
61
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
58
62
  valid
59
63
  end
60
64
 
61
65
  def test
62
- return false unless File.exist?(self.watch.pid_file)
63
-
64
- pid = File.read(self.watch.pid_file).strip
65
- process = System::Process.new(pid)
66
+ process = System::Process.new(self.pid)
66
67
  @timeline.push(process.memory)
67
68
 
68
69
  history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
@@ -28,30 +28,35 @@ module God
28
28
  end
29
29
 
30
30
  def valid?
31
- valid = true
32
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
33
- valid
31
+ true
34
32
  end
35
-
33
+
36
34
  def register
37
- pid = File.read(self.watch.pid_file).strip.to_i
35
+ pid = self.watch.pid
38
36
 
39
37
  begin
40
38
  EventHandler.register(pid, :proc_exit) do |extra|
41
- self.info = "process exited #{extra.inspect}"
42
- Hub.trigger(self)
39
+ formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
40
+ self.info = "process #{pid} exited#{formatted_extra}"
41
+ self.watch.trigger(self)
43
42
  end
43
+
44
+ msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
45
+ applog(self.watch, :info, msg)
44
46
  rescue StandardError
45
47
  raise EventRegistrationFailedError.new
46
48
  end
47
49
  end
48
50
 
49
51
  def deregister
50
- if File.exist?(self.watch.pid_file)
51
- pid = File.read(self.watch.pid_file).strip.to_i
52
+ pid = self.watch.pid
53
+ if pid
52
54
  EventHandler.deregister(pid, :proc_exit)
55
+
56
+ msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
57
+ applog(self.watch, :info, msg)
53
58
  else
54
- applog(self.watch, :error, "#{self.watch.name} could not deregister: no such PID file #{self.watch.pid_file} (#{self.base_name})")
59
+ applog(self.watch, :error, "#{self.watch.name} could not deregister: no cached PID or PID file #{self.watch.pid_file} (#{self.base_name})")
55
60
  end
56
61
  end
57
62
  end
@@ -34,37 +34,41 @@ module God
34
34
  # c.pid_file = "/var/run/mongrel.3000.pid"
35
35
  # end
36
36
  class ProcessRunning < PollCondition
37
- attr_accessor :running
37
+ attr_accessor :running, :pid_file
38
+
39
+ def pid
40
+ self.watch.pid || File.read(self.pid_file).strip.to_i
41
+ end
38
42
 
39
43
  def valid?
40
44
  valid = true
41
- valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
45
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil? && self.pid_file.nil?
42
46
  valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
43
47
  valid
44
48
  end
45
-
49
+
46
50
  def test
47
51
  self.info = []
48
52
 
49
- unless File.exist?(self.watch.pid_file)
50
- self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
51
- return !self.running
52
- end
53
+ # unless File.exist?(self.watch.pid_file)
54
+ # self.info << "#{self.watch.name} #{self.class.name}: no such pid file: #{self.watch.pid_file}"
55
+ # return !self.running
56
+ # end
53
57
 
54
- pid = File.read(self.watch.pid_file).strip
55
- active = System::Process.new(pid).exists?
58
+ pid = self.watch.pid
59
+ active = pid && System::Process.new(pid).exists?
56
60
 
57
61
  if (self.running && active)
58
- self.info << "process is running"
62
+ self.info.concat(["process is running"])
59
63
  true
60
64
  elsif (!self.running && !active)
61
- self.info << "process is not running"
65
+ self.info.concat(["process is not running"])
62
66
  true
63
67
  else
64
68
  if self.running
65
- self.info << "process is not running"
69
+ self.info.concat(["process is not running"])
66
70
  else
67
- self.info << "process is running"
71
+ self.info.concat(["process is running"])
68
72
  end
69
73
  false
70
74
  end
@@ -0,0 +1,37 @@
1
+ def start_dike
2
+ require 'dike'
3
+ Thread.new do
4
+ Dike.logfactory File.join(File.dirname(__FILE__), *%w[.. .. logs])
5
+ loop do
6
+ Dike.finger
7
+ sleep(1)
8
+ end
9
+ end
10
+ end
11
+
12
+ class BleakHouseDiagnostic
13
+ LOG_FILE = File.join(File.dirname(__FILE__), *%w[.. .. logs bleak.log])
14
+
15
+ class << self
16
+ attr_accessor :logger
17
+ end
18
+
19
+ def self.install
20
+ require 'bleak_house'
21
+ self.logger = BleakHouse::Logger.new
22
+ File.delete(LOG_FILE) rescue nil
23
+ end
24
+
25
+ def self.snapshot
26
+ self.logger.snapshot(LOG_FILE, "timer", false) if self.logger
27
+ end
28
+
29
+ def self.spin(delay = 1)
30
+ Thread.new do
31
+ loop do
32
+ self.snapshot
33
+ sleep(delay)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,108 @@
1
+ module God
2
+
3
+ class DriverEvent
4
+ attr_accessor :condition, :at
5
+
6
+ # Instantiate a new TimerEvent that will be triggered after the specified delay
7
+ # +condition+ is the Condition
8
+ # +delay+ is the number of seconds from now at which to trigger
9
+ #
10
+ # Returns TimerEvent
11
+ def initialize(condition, delay)
12
+ self.condition = condition
13
+ self.at = Time.now + delay
14
+ end
15
+
16
+ def due?
17
+ Time.now >= self.at
18
+ end
19
+ end # DriverEvent
20
+
21
+ class Driver
22
+ attr_reader :thread
23
+
24
+ INTERVAL = 0.25
25
+
26
+ # Instantiate a new Driver and start the scheduler loop to handle events
27
+ # +task+ is the Task this Driver belongs to
28
+ #
29
+ # Returns Driver
30
+ def initialize(task)
31
+ @task = task
32
+ @events = []
33
+ @ops = Queue.new
34
+
35
+ @thread = Thread.new do
36
+ loop do
37
+ begin
38
+ if !@ops.empty?
39
+ self.handle_op
40
+ elsif !@events.empty?
41
+ self.handle_event
42
+ else
43
+ sleep INTERVAL
44
+ end
45
+ rescue Exception => e
46
+ message = format("Unhandled exception in driver loop - (%s): %s\n%s",
47
+ e.class, e.message, e.backtrace.join("\n"))
48
+ applog(nil, :fatal, message)
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ # Handle the next queued operation that was issued asynchronously
55
+ #
56
+ # Returns nothing
57
+ def handle_op
58
+ command = @ops.pop
59
+ @task.send(command[0], *command[1])
60
+ end
61
+
62
+ # Handle the next event (poll condition) that is due
63
+ #
64
+ # Returns nothing
65
+ def handle_event
66
+ if @events.first.due?
67
+ event = @events.shift
68
+ @task.handle_poll(event.condition)
69
+ end
70
+
71
+ # don't sleep if there is a pending event and it is due
72
+ unless @events.first && @events.first.due?
73
+ sleep INTERVAL
74
+ end
75
+ end
76
+
77
+ # Clear all events for this Driver
78
+ #
79
+ # Returns nothing
80
+ def clear_events
81
+ @events.clear
82
+ end
83
+
84
+ # Queue an asynchronous message
85
+ # +name+ is the Symbol name of the operation
86
+ # +args+ is an optional Array of arguments
87
+ #
88
+ # Returns nothing
89
+ def message(name, args = [])
90
+ @ops.push([name, args])
91
+ end
92
+
93
+ # Create and schedule a new DriverEvent
94
+ # +condition+ is the Condition
95
+ # +delay+ is the number of seconds to delay (default: interval defined in condition)
96
+ #
97
+ # Returns nothing
98
+ def schedule(condition, delay = condition.interval)
99
+ applog(nil, :debug, "driver schedule #{condition} in #{delay} seconds")
100
+
101
+ @events.concat([DriverEvent.new(condition, delay)])
102
+
103
+ # sort events
104
+ @events.sort! { |x, y| x.at <=> y.at }
105
+ end
106
+ end # Driver
107
+
108
+ end # God
@@ -61,9 +61,49 @@ module God
61
61
  def self.start
62
62
  Thread.new do
63
63
  loop do
64
- @@handler.handle_events
64
+ begin
65
+ @@handler.handle_events
66
+ rescue Exception => e
67
+ message = format("Unhandled exception (%s): %s\n%s",
68
+ e.class, e.message, e.backtrace.join("\n"))
69
+ applog(nil, :fatal, message)
70
+ end
65
71
  end
66
72
  end
73
+
74
+ # do a real test to make sure events are working properly
75
+ @@loaded = self.operational?
76
+ end
77
+
78
+ def self.operational?
79
+ com = [false]
80
+
81
+ Thread.new do
82
+ begin
83
+ event_system = God::EventHandler.event_system
84
+
85
+ pid = fork do
86
+ loop { sleep(1) }
87
+ end
88
+
89
+ self.register(pid, :proc_exit) do
90
+ com[0] = true
91
+ end
92
+
93
+ ::Process.kill('KILL', pid)
94
+
95
+ sleep(0.1)
96
+
97
+ self.deregister(pid, :proc_exit) rescue nil
98
+ rescue => e
99
+ puts e.message
100
+ puts e.backtrace.join("\n")
101
+ end
102
+ end.join
103
+
104
+ sleep(0.1)
105
+
106
+ com.first
67
107
  end
68
108
 
69
109
  end
@@ -9,49 +9,76 @@ module God
9
9
 
10
10
  attr_accessor :logs
11
11
 
12
+ class << self
13
+ attr_accessor :syslog
14
+ end
15
+
16
+ self.syslog ||= true
17
+
18
+ # Instantiate a new Logger object
12
19
  def initialize
13
20
  super($stdout)
14
21
  self.logs = {}
15
22
  @mutex = Mutex.new
16
23
  @capture = nil
24
+ @templogio = StringIO.new
25
+ @templog = ::Logger.new(@templogio)
26
+ @templog.level = Logger::INFO
27
+ load_syslog
17
28
  end
18
29
 
19
- def start_capture
20
- @mutex.synchronize do
21
- @capture = StringIO.new
22
- end
23
- end
24
-
25
- def finish_capture
26
- @mutex.synchronize do
27
- cap = @capture.string
28
- @capture = nil
29
- cap
30
+ # If Logger.syslog is true then attempt to load the syslog bindings. If syslog
31
+ # cannot be loaded, then set Logger.syslog to false and continue.
32
+ #
33
+ # Returns nothing
34
+ def load_syslog
35
+ return unless Logger.syslog
36
+
37
+ begin
38
+ require 'syslog'
39
+
40
+ # Ensure that Syslog is open
41
+ begin
42
+ Syslog.open('god')
43
+ rescue RuntimeError
44
+ Syslog.reopen('god')
45
+ end
46
+ rescue Exception
47
+ Logger.syslog = false
30
48
  end
31
49
  end
32
50
 
51
+ # Log a message
52
+ # +watch+ is the String name of the Watch (may be nil if not Watch is applicable)
53
+ # +level+ is the log level [:debug|:info|:warn|:error|:fatal]
54
+ # +text+ is the String message
55
+ #
56
+ # Returns nothing
33
57
  def log(watch, level, text)
34
58
  # initialize watch log if necessary
35
59
  self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
36
60
 
37
61
  # push onto capture and timeline for the given watch
38
- buf = StringIO.new
39
- templog = ::Logger.new(buf)
40
- templog.level = Logger::INFO
41
- templog.send(level, text % [])
62
+ @templogio.truncate(0)
63
+ @templogio.rewind
64
+ @templog.send(level, text % [])
42
65
  @mutex.synchronize do
43
- @capture.puts(buf.string) if @capture
44
- self.logs[watch.name] << [Time.now, buf.string] if watch
66
+ @capture.puts(@templogio.string.dup) if @capture
67
+ self.logs[watch.name] << [Time.now, @templogio.string.dup] if watch
45
68
  end
46
- templog.close
47
69
 
48
70
  # send to regular logger
49
71
  self.send(level, text % [])
50
72
 
51
73
  # send to syslog
52
- Syslog.send(SYSLOG_EQUIVALENTS[level], text)
74
+ Syslog.send(SYSLOG_EQUIVALENTS[level], text) if Logger.syslog
53
75
  end
54
76
 
77
+ # Get all log output for a given Watch since a certain Time.
78
+ # +watch_name+ is the String name of the Watch
79
+ # +since+ is the Time since which to fetch log lines
80
+ #
81
+ # Returns String
55
82
  def watch_log_since(watch_name, since)
56
83
  # initialize watch log if necessary
57
84
  self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
@@ -65,6 +92,29 @@ module God
65
92
  end.join
66
93
  end
67
94
  end
95
+
96
+ # private
97
+
98
+ # Enable capturing of log
99
+ #
100
+ # Returns nothing
101
+ def start_capture
102
+ @mutex.synchronize do
103
+ @capture = StringIO.new
104
+ end
105
+ end
106
+
107
+ # Disable capturing of log and return what was captured since
108
+ # capturing was enabled with Logger#start_capture
109
+ #
110
+ # Returns String
111
+ def finish_capture
112
+ @mutex.synchronize do
113
+ cap = @capture.string
114
+ @capture = nil
115
+ cap
116
+ end
117
+ end
68
118
  end
69
119
 
70
120
  end