god 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. data/Announce.txt +6 -6
  2. data/Gemfile +2 -0
  3. data/History.txt +19 -2
  4. data/{README.txt → LICENSE} +0 -37
  5. data/README.md +31 -0
  6. data/Rakefile +80 -38
  7. data/bin/god +21 -21
  8. data/doc/god.asciidoc +1487 -0
  9. data/doc/intro.asciidoc +20 -0
  10. data/ext/god/extconf.rb +3 -3
  11. data/ext/god/kqueue_handler.c +18 -18
  12. data/ext/god/netlink_handler.c +31 -31
  13. data/god.gemspec +24 -16
  14. data/lib/god.rb +261 -204
  15. data/lib/god/behavior.rb +14 -14
  16. data/lib/god/behaviors/clean_pid_file.rb +5 -5
  17. data/lib/god/behaviors/clean_unix_socket.rb +10 -10
  18. data/lib/god/behaviors/notify_when_flapping.rb +12 -12
  19. data/lib/god/cli/command.rb +59 -46
  20. data/lib/god/cli/run.rb +33 -37
  21. data/lib/god/cli/version.rb +6 -6
  22. data/lib/god/compat19.rb +1 -4
  23. data/lib/god/condition.rb +21 -21
  24. data/lib/god/conditions/always.rb +19 -6
  25. data/lib/god/conditions/complex.rb +18 -18
  26. data/lib/god/conditions/cpu_usage.rb +14 -14
  27. data/lib/god/conditions/degrading_lambda.rb +8 -8
  28. data/lib/god/conditions/disk_usage.rb +5 -5
  29. data/lib/god/conditions/flapping.rb +23 -23
  30. data/lib/god/conditions/http_response_code.rb +35 -19
  31. data/lib/god/conditions/lambda.rb +2 -2
  32. data/lib/god/conditions/memory_usage.rb +13 -13
  33. data/lib/god/conditions/process_exits.rb +14 -20
  34. data/lib/god/conditions/process_running.rb +16 -25
  35. data/lib/god/conditions/socket_responding.rb +132 -0
  36. data/lib/god/conditions/tries.rb +10 -10
  37. data/lib/god/configurable.rb +10 -10
  38. data/lib/god/contact.rb +20 -20
  39. data/lib/god/contacts/email.rb +7 -4
  40. data/lib/god/contacts/jabber.rb +1 -1
  41. data/lib/god/driver.rb +96 -64
  42. data/lib/god/errors.rb +9 -9
  43. data/lib/god/event_handler.rb +19 -19
  44. data/lib/god/event_handlers/dummy_handler.rb +4 -4
  45. data/lib/god/event_handlers/kqueue_handler.rb +3 -3
  46. data/lib/god/event_handlers/netlink_handler.rb +2 -2
  47. data/lib/god/logger.rb +13 -13
  48. data/lib/god/metric.rb +50 -22
  49. data/lib/god/process.rb +53 -52
  50. data/lib/god/registry.rb +7 -7
  51. data/lib/god/simple_logger.rb +14 -14
  52. data/lib/god/socket.rb +11 -11
  53. data/lib/god/sugar.rb +30 -15
  54. data/lib/god/sys_logger.rb +2 -2
  55. data/lib/god/system/portable_poller.rb +8 -8
  56. data/lib/god/system/process.rb +8 -8
  57. data/lib/god/system/slash_proc_poller.rb +13 -13
  58. data/lib/god/task.rb +237 -188
  59. data/lib/god/timeline.rb +5 -5
  60. data/lib/god/trigger.rb +11 -11
  61. data/lib/god/watch.rb +205 -53
  62. data/test/configs/child_events/child_events.god +5 -5
  63. data/test/configs/child_events/simple_server.rb +1 -1
  64. data/test/configs/child_polls/child_polls.god +4 -4
  65. data/test/configs/child_polls/simple_server.rb +4 -4
  66. data/test/configs/complex/complex.god +7 -7
  67. data/test/configs/complex/simple_server.rb +1 -1
  68. data/test/configs/contact/contact.god +1 -1
  69. data/test/configs/contact/simple_server.rb +1 -1
  70. data/test/configs/daemon_events/daemon_events.god +5 -5
  71. data/test/configs/daemon_events/simple_server.rb +1 -1
  72. data/test/configs/daemon_events/simple_server_stop.rb +1 -1
  73. data/test/configs/daemon_polls/daemon_polls.god +3 -3
  74. data/test/configs/daemon_polls/simple_server.rb +1 -1
  75. data/test/configs/degrading_lambda/degrading_lambda.god +3 -3
  76. data/test/configs/keepalive/keepalive.god +9 -0
  77. data/test/configs/keepalive/keepalive.rb +12 -0
  78. data/test/configs/lifecycle/lifecycle.god +2 -2
  79. data/test/configs/matias/matias.god +6 -6
  80. data/test/configs/real.rb +7 -7
  81. data/test/configs/running_load/running_load.god +2 -2
  82. data/test/configs/stop_options/simple_server.rb +1 -1
  83. data/test/configs/stress/simple_server.rb +1 -1
  84. data/test/configs/stress/stress.god +2 -2
  85. data/test/configs/task/task.god +5 -5
  86. data/test/configs/test.rb +7 -7
  87. data/test/helper.rb +8 -8
  88. data/test/test_behavior.rb +3 -3
  89. data/test/test_campfire.rb +1 -2
  90. data/test/test_condition.rb +10 -10
  91. data/test/test_conditions_disk_usage.rb +12 -12
  92. data/test/test_conditions_http_response_code.rb +24 -24
  93. data/test/test_conditions_process_running.rb +7 -7
  94. data/test/test_conditions_socket_responding.rb +122 -0
  95. data/test/test_conditions_tries.rb +12 -12
  96. data/test/test_contact.rb +19 -19
  97. data/test/test_driver.rb +17 -3
  98. data/test/test_event_handler.rb +12 -12
  99. data/test/test_god.rb +195 -117
  100. data/test/test_handlers_kqueue_handler.rb +4 -4
  101. data/test/test_jabber.rb +1 -1
  102. data/test/test_logger.rb +17 -17
  103. data/test/test_metric.rb +16 -16
  104. data/test/test_process.rb +47 -41
  105. data/test/test_prowl.rb +1 -1
  106. data/test/test_registry.rb +2 -2
  107. data/test/test_socket.rb +3 -3
  108. data/test/test_sugar.rb +7 -7
  109. data/test/test_system_portable_poller.rb +1 -1
  110. data/test/test_system_process.rb +5 -5
  111. data/test/test_task.rb +57 -57
  112. data/test/test_timeline.rb +8 -8
  113. data/test/test_trigger.rb +16 -16
  114. data/test/test_watch.rb +69 -62
  115. metadata +182 -69
  116. data/lib/god/dependency_graph.rb +0 -41
  117. data/lib/god/diagnostics.rb +0 -37
  118. data/test/test_dependency_graph.rb +0 -62
@@ -1,64 +1,58 @@
1
1
  module God
2
2
  module Conditions
3
-
4
- # Condition Symbol :process_exits
5
- # Type: Event
6
- #
7
3
  # Trigger when a process exits.
8
4
  #
9
- # Paramaters
10
- # Required
11
5
  # +pid_file+ is the pid file of the process in question. Automatically
12
6
  # populated for Watches.
13
7
  #
14
8
  # Examples
15
9
  #
16
- # Trigger if process exits (from a Watch):
17
- #
10
+ # # Trigger if process exits (from a Watch).
18
11
  # on.condition(:process_exits)
19
12
  #
20
- # Trigger if process exits:
21
- #
13
+ # # Trigger if process exits (non-Watch).
22
14
  # on.condition(:process_exits) do |c|
23
15
  # c.pid_file = "/var/run/mongrel.3000.pid"
24
16
  # end
25
17
  class ProcessExits < EventCondition
18
+ # The String PID file location of the process in question. Automatically
19
+ # populated for Watches.
26
20
  attr_accessor :pid_file
27
-
21
+
28
22
  def initialize
29
23
  self.info = "process exited"
30
24
  end
31
-
25
+
32
26
  def valid?
33
27
  true
34
28
  end
35
-
29
+
36
30
  def pid
37
31
  self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
38
32
  end
39
-
33
+
40
34
  def register
41
35
  pid = self.pid
42
-
36
+
43
37
  begin
44
38
  EventHandler.register(pid, :proc_exit) do |extra|
45
39
  formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
46
40
  self.info = "process #{pid} exited#{formatted_extra}"
47
41
  self.watch.trigger(self)
48
42
  end
49
-
43
+
50
44
  msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
51
45
  applog(self.watch, :info, msg)
52
46
  rescue StandardError
53
47
  raise EventRegistrationFailedError.new
54
48
  end
55
49
  end
56
-
50
+
57
51
  def deregister
58
52
  pid = self.pid
59
53
  if pid
60
54
  EventHandler.deregister(pid, :proc_exit)
61
-
55
+
62
56
  msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
63
57
  applog(self.watch, :info, msg)
64
58
  else
@@ -67,6 +61,6 @@ module God
67
61
  end
68
62
  end
69
63
  end
70
-
64
+
71
65
  end
72
- end
66
+ end
@@ -1,58 +1,50 @@
1
1
  module God
2
2
  module Conditions
3
-
4
- # Condition Symbol :process_running
5
- # Type: Poll
6
- #
7
3
  # Trigger when a process is running or not running depending on attributes.
8
4
  #
9
- # Paramaters
10
- # Required
11
- # +pid_file+ is the pid file of the process in question. Automatically
12
- # populated for Watches.
13
- # +running" specifies whether you want to trigger if the process is
14
- # running (true) or whether it is not running (false)
15
- #
16
5
  # Examples
17
6
  #
18
- # Trigger if process IS NOT running (from a Watch):
19
- #
7
+ # # Trigger if process IS NOT running.
20
8
  # on.condition(:process_running) do |c|
21
9
  # c.running = false
22
10
  # end
23
11
  #
24
- # Trigger if process IS running (from a Watch):
25
- #
12
+ # # Trigger if process IS running.
26
13
  # on.condition(:process_running) do |c|
27
14
  # c.running = true
28
15
  # end
29
16
  #
30
- # Non-Watch Tasks must specify a PID file:
31
- #
17
+ # # Non-Watch Tasks must specify a PID file.
32
18
  # on.condition(:process_running) do |c|
33
19
  # c.running = false
34
20
  # c.pid_file = "/var/run/mongrel.3000.pid"
35
21
  # end
36
22
  class ProcessRunning < PollCondition
37
- attr_accessor :running, :pid_file
38
-
23
+ # Public: The Boolean specifying whether you want to trigger if the
24
+ # process is running (true) or if it is not running (false).
25
+ attr_accessor :running
26
+
27
+ # Public: The String PID file location of the process in question.
28
+ # Automatically populated for Watches.
29
+ attr_accessor :pid_file
30
+
39
31
  def pid
40
32
  self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
41
33
  end
42
-
34
+
43
35
  def valid?
44
36
  valid = true
45
37
  valid &= complain("Attribute 'pid_file' must be specified", self) if self.pid_file.nil? && self.watch.pid_file.nil?
46
38
  valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
47
39
  valid
48
40
  end
49
-
41
+
50
42
  def test
51
43
  self.info = []
52
-
44
+
53
45
  pid = self.pid
54
46
  active = pid && System::Process.new(pid).exists?
55
-
47
+
56
48
  if (self.running && active)
57
49
  self.info.concat(["process is running"])
58
50
  true
@@ -69,6 +61,5 @@ module God
69
61
  end
70
62
  end
71
63
  end
72
-
73
64
  end
74
- end
65
+ end
@@ -0,0 +1,132 @@
1
+ require 'socket'
2
+ include Socket::Constants
3
+
4
+ module God
5
+ module Conditions
6
+ # Condition Symbol :socket_running
7
+ # Type: Poll
8
+ #
9
+ # Trigger when a TCP or UNIX socket is running or not
10
+ #
11
+ # Parameters
12
+ # Required
13
+ # +family+ is the family of socket: either 'tcp' or 'unix'
14
+ # --one of port or path--
15
+ # +port+ is the port (required if +family+ is 'tcp')
16
+ # +path+ is the path (required if +family+ is 'unix')
17
+ #
18
+ # Examples
19
+ #
20
+ # Trigger if the TCP socket on port 80 is not responding or the connection is refused
21
+ #
22
+ # on.condition(:socket_responding) do |c|
23
+ # c.family = 'tcp'
24
+ # c.port = '80'
25
+ # end
26
+ #
27
+ # Trigger if the socket is not responding or the connection is refused (use alternate compact +socket+ interface)
28
+ #
29
+ # on.condition(:socket_responding) do |c|
30
+ # c.socket = 'tcp:80'
31
+ # end
32
+ #
33
+ # Trigger if the socket is not responding or the connection is refused 5 times in a row
34
+ #
35
+ # on.condition(:socket_responding) do |c|
36
+ # c.socket = 'tcp:80'
37
+ # c.times = 5
38
+ # end
39
+ #
40
+ # Trigger if the Unix socket on path '/tmp/sock' is not responding or non-existent
41
+ #
42
+ # on.condition(:socket_responding) do |c|
43
+ # c.family = 'unix'
44
+ # c.port = '/tmp/sock'
45
+ # end
46
+ #
47
+
48
+
49
+ class SocketResponding < PollCondition
50
+ attr_accessor :family, :addr, :port, :path, :times
51
+
52
+ def initialize
53
+ super
54
+ # default to tcp on the localhost
55
+ self.family = 'tcp'
56
+ self.addr = '127.0.0.1'
57
+ # Set these to nil/0 values
58
+ self.port = 0
59
+ self.path = nil
60
+
61
+ self.times = [1, 1]
62
+ end
63
+
64
+ def prepare
65
+ if self.times.kind_of?(Integer)
66
+ self.times = [self.times, self.times]
67
+ end
68
+
69
+ @timeline = Timeline.new(self.times[1])
70
+ @history = Timeline.new(self.times[1])
71
+ end
72
+
73
+ def reset
74
+ @timeline.clear
75
+ @history.clear
76
+ end
77
+
78
+ def socket=(s)
79
+ components = s.split(':')
80
+ if components.size == 3
81
+ @family,@addr,@port = components
82
+ @port = @port.to_i
83
+ elsif components[0] =~ /^tcp$/
84
+ @family = components[0]
85
+ @port = components[1].to_i
86
+ elsif components[0] =~ /^unix$/
87
+ @family = components[0]
88
+ @path = components[1]
89
+ end
90
+ end
91
+
92
+ def valid?
93
+ valid = true
94
+ if self.family == 'tcp' and @port == 0
95
+ valid &= complain("Attribute 'port' must be specified for tcp sockets", self)
96
+ end
97
+ if self.family == 'unix' and self.path.nil?
98
+ valid &= complain("Attribute 'path' must be specified for unix sockets", self)
99
+ end
100
+ valid = false unless %w{tcp unix}.member?(self.family)
101
+ valid
102
+ end
103
+
104
+ def test
105
+ if self.family == 'tcp'
106
+ begin
107
+ s = TCPSocket.new(self.addr, self.port)
108
+ rescue SystemCallError
109
+ end
110
+ status = s.nil?
111
+ elsif self.family == 'unix'
112
+ begin
113
+ s = UNIXSocket.new(self.path)
114
+ rescue SystemCallError
115
+ end
116
+ status = s.nil?
117
+ else
118
+ status = false
119
+ end
120
+ @timeline.push(status)
121
+ history = "[" + @timeline.map {|t| t ? '*' : ''}.join(',') + "]"
122
+ if @timeline.select { |x| x }.size >= self.times.first
123
+ self.info = "socket out of bounds #{history}"
124
+ return true
125
+ else
126
+ self.info = "socket within bounds #{history}"
127
+ return false
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
@@ -1,35 +1,35 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  class Tries < PollCondition
5
5
  attr_accessor :times, :within
6
-
6
+
7
7
  def prepare
8
8
  @timeline = Timeline.new(self.times)
9
9
  end
10
-
10
+
11
11
  def reset
12
12
  @timeline.clear
13
13
  end
14
-
14
+
15
15
  def valid?
16
16
  valid = true
17
17
  valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
18
18
  valid
19
19
  end
20
-
20
+
21
21
  def test
22
22
  @timeline << Time.now
23
-
23
+
24
24
  concensus = (@timeline.size == self.times)
25
25
  duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within
26
-
26
+
27
27
  if within
28
28
  history = "[#{@timeline.size}/#{self.times} within #{(@timeline.last - @timeline.first).to_i}s]"
29
29
  else
30
30
  history = "[#{@timeline.size}/#{self.times}]"
31
31
  end
32
-
32
+
33
33
  if concensus && duration
34
34
  self.info = "tries exceeded #{history}"
35
35
  return true
@@ -39,6 +39,6 @@ module God
39
39
  end
40
40
  end
41
41
  end
42
-
42
+
43
43
  end
44
- end
44
+ end
@@ -1,18 +1,18 @@
1
1
  module God
2
-
2
+
3
3
  module Configurable
4
4
  # Override this method in your Configurable (optional)
5
5
  #
6
6
  # Called once after the Configurable has been sent to the block and attributes have been
7
7
  # set. Do any post-processing on attributes here
8
8
  def prepare
9
-
9
+
10
10
  end
11
-
11
+
12
12
  def reset
13
-
13
+
14
14
  end
15
-
15
+
16
16
  # Override this method in your Configurable (optional)
17
17
  #
18
18
  # Called once during evaluation of the config file. Return true if valid, false otherwise
@@ -29,16 +29,16 @@ module God
29
29
  def valid?
30
30
  true
31
31
  end
32
-
32
+
33
33
  def base_name
34
34
  x = 1 # fix for MRI's local scope optimization bug DO NOT REMOVE!
35
35
  @base_name ||= self.class.name.split('::').last
36
36
  end
37
-
37
+
38
38
  def friendly_name
39
39
  base_name
40
40
  end
41
-
41
+
42
42
  def self.complain(text, c = nil)
43
43
  watch = c.watch rescue nil
44
44
  msg = ""
@@ -48,10 +48,10 @@ module God
48
48
  applog(watch, :error, msg)
49
49
  false
50
50
  end
51
-
51
+
52
52
  def complain(text, c = nil)
53
53
  Configurable.complain(text, c)
54
54
  end
55
55
  end
56
-
56
+
57
57
  end
@@ -1,37 +1,37 @@
1
1
  module God
2
-
2
+
3
3
  class Contact
4
4
  include Configurable
5
-
5
+
6
6
  attr_accessor :name, :group, :info
7
-
7
+
8
8
  def self.generate(kind)
9
9
  sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
10
10
  c = God::Contacts.const_get(sym).new
11
-
11
+
12
12
  unless c.kind_of?(Contact)
13
- abort "Contact '#{c.class.name}' must subclass God::Contact"
13
+ abort "Contact '#{c.class.name}' must subclass God::Contact"
14
14
  end
15
-
15
+
16
16
  c
17
17
  rescue NameError
18
18
  raise NoSuchContactError.new("No Contact found with the class name God::Contacts::#{sym}")
19
19
  end
20
-
20
+
21
21
  def self.valid?(contact)
22
22
  valid = true
23
23
  valid &= Configurable.complain("Attribute 'name' must be specified", contact) if contact.name.nil?
24
24
  valid
25
25
  end
26
-
26
+
27
27
  def self.defaults
28
28
  yield self
29
29
  end
30
-
30
+
31
31
  def arg(name)
32
32
  self.instance_variable_get("@#{name}") || self.class.instance_variable_get("@#{name}")
33
33
  end
34
-
34
+
35
35
  # Normalize the given notify specification into canonical form.
36
36
  # +spec+ is the notify spec as a String, Array of Strings, or Hash
37
37
  #
@@ -40,7 +40,7 @@ module God
40
40
  # Where :contacts will be present and point to an Array of Strings. Both
41
41
  # :priority and :category may not be present but if they are, they will each
42
42
  # contain a single String.
43
- #
43
+ #
44
44
  # Returns normalized notify spec
45
45
  # Raises ArgumentError on invalid spec (message contains details)
46
46
  def self.normalize(spec)
@@ -54,7 +54,7 @@ module God
54
54
  {:contacts => spec}
55
55
  when Hash
56
56
  copy = spec.dup
57
-
57
+
58
58
  # check :contacts
59
59
  if contacts = copy.delete(:contacts)
60
60
  case contacts
@@ -71,27 +71,27 @@ module God
71
71
  else
72
72
  raise ArgumentError.new("must have a :contacts key")
73
73
  end
74
-
74
+
75
75
  # remove priority and category
76
76
  copy.delete(:priority)
77
77
  copy.delete(:category)
78
-
78
+
79
79
  # check for invalid keys
80
80
  unless copy.empty?
81
81
  raise ArgumentError.new("contains extra elements: #{copy.inspect}")
82
82
  end
83
-
83
+
84
84
  # normalize
85
85
  spec[:contacts] &&= Array(spec[:contacts])
86
86
  spec[:priority] &&= spec[:priority].to_s
87
87
  spec[:category] &&= spec[:category].to_s
88
-
88
+
89
89
  spec
90
90
  else
91
91
  raise ArgumentError.new("must be a String (contact name), Array (of contact names), or Hash (contact specification)")
92
92
  end
93
93
  end
94
-
94
+
95
95
  # Abstract
96
96
  # Send the message to the external source
97
97
  # +message+ is the message body returned from the condition
@@ -102,7 +102,7 @@ module God
102
102
  def notify(message, time, priority, category, host)
103
103
  raise AbstractMethodNotOverriddenError.new("Contact#notify must be overridden in subclasses")
104
104
  end
105
-
105
+
106
106
  # Construct the friendly name of this Contact, looks like:
107
107
  #
108
108
  # Contact FooBar
@@ -110,5 +110,5 @@ module God
110
110
  super + " Contact '#{self.name}'"
111
111
  end
112
112
  end
113
-
114
- end
113
+
114
+ end