god 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. data/Announce.txt +6 -6
  2. data/Gemfile +2 -0
  3. data/History.txt +19 -2
  4. data/{README.txt → LICENSE} +0 -37
  5. data/README.md +31 -0
  6. data/Rakefile +80 -38
  7. data/bin/god +21 -21
  8. data/doc/god.asciidoc +1487 -0
  9. data/doc/intro.asciidoc +20 -0
  10. data/ext/god/extconf.rb +3 -3
  11. data/ext/god/kqueue_handler.c +18 -18
  12. data/ext/god/netlink_handler.c +31 -31
  13. data/god.gemspec +24 -16
  14. data/lib/god.rb +261 -204
  15. data/lib/god/behavior.rb +14 -14
  16. data/lib/god/behaviors/clean_pid_file.rb +5 -5
  17. data/lib/god/behaviors/clean_unix_socket.rb +10 -10
  18. data/lib/god/behaviors/notify_when_flapping.rb +12 -12
  19. data/lib/god/cli/command.rb +59 -46
  20. data/lib/god/cli/run.rb +33 -37
  21. data/lib/god/cli/version.rb +6 -6
  22. data/lib/god/compat19.rb +1 -4
  23. data/lib/god/condition.rb +21 -21
  24. data/lib/god/conditions/always.rb +19 -6
  25. data/lib/god/conditions/complex.rb +18 -18
  26. data/lib/god/conditions/cpu_usage.rb +14 -14
  27. data/lib/god/conditions/degrading_lambda.rb +8 -8
  28. data/lib/god/conditions/disk_usage.rb +5 -5
  29. data/lib/god/conditions/flapping.rb +23 -23
  30. data/lib/god/conditions/http_response_code.rb +35 -19
  31. data/lib/god/conditions/lambda.rb +2 -2
  32. data/lib/god/conditions/memory_usage.rb +13 -13
  33. data/lib/god/conditions/process_exits.rb +14 -20
  34. data/lib/god/conditions/process_running.rb +16 -25
  35. data/lib/god/conditions/socket_responding.rb +132 -0
  36. data/lib/god/conditions/tries.rb +10 -10
  37. data/lib/god/configurable.rb +10 -10
  38. data/lib/god/contact.rb +20 -20
  39. data/lib/god/contacts/email.rb +7 -4
  40. data/lib/god/contacts/jabber.rb +1 -1
  41. data/lib/god/driver.rb +96 -64
  42. data/lib/god/errors.rb +9 -9
  43. data/lib/god/event_handler.rb +19 -19
  44. data/lib/god/event_handlers/dummy_handler.rb +4 -4
  45. data/lib/god/event_handlers/kqueue_handler.rb +3 -3
  46. data/lib/god/event_handlers/netlink_handler.rb +2 -2
  47. data/lib/god/logger.rb +13 -13
  48. data/lib/god/metric.rb +50 -22
  49. data/lib/god/process.rb +53 -52
  50. data/lib/god/registry.rb +7 -7
  51. data/lib/god/simple_logger.rb +14 -14
  52. data/lib/god/socket.rb +11 -11
  53. data/lib/god/sugar.rb +30 -15
  54. data/lib/god/sys_logger.rb +2 -2
  55. data/lib/god/system/portable_poller.rb +8 -8
  56. data/lib/god/system/process.rb +8 -8
  57. data/lib/god/system/slash_proc_poller.rb +13 -13
  58. data/lib/god/task.rb +237 -188
  59. data/lib/god/timeline.rb +5 -5
  60. data/lib/god/trigger.rb +11 -11
  61. data/lib/god/watch.rb +205 -53
  62. data/test/configs/child_events/child_events.god +5 -5
  63. data/test/configs/child_events/simple_server.rb +1 -1
  64. data/test/configs/child_polls/child_polls.god +4 -4
  65. data/test/configs/child_polls/simple_server.rb +4 -4
  66. data/test/configs/complex/complex.god +7 -7
  67. data/test/configs/complex/simple_server.rb +1 -1
  68. data/test/configs/contact/contact.god +1 -1
  69. data/test/configs/contact/simple_server.rb +1 -1
  70. data/test/configs/daemon_events/daemon_events.god +5 -5
  71. data/test/configs/daemon_events/simple_server.rb +1 -1
  72. data/test/configs/daemon_events/simple_server_stop.rb +1 -1
  73. data/test/configs/daemon_polls/daemon_polls.god +3 -3
  74. data/test/configs/daemon_polls/simple_server.rb +1 -1
  75. data/test/configs/degrading_lambda/degrading_lambda.god +3 -3
  76. data/test/configs/keepalive/keepalive.god +9 -0
  77. data/test/configs/keepalive/keepalive.rb +12 -0
  78. data/test/configs/lifecycle/lifecycle.god +2 -2
  79. data/test/configs/matias/matias.god +6 -6
  80. data/test/configs/real.rb +7 -7
  81. data/test/configs/running_load/running_load.god +2 -2
  82. data/test/configs/stop_options/simple_server.rb +1 -1
  83. data/test/configs/stress/simple_server.rb +1 -1
  84. data/test/configs/stress/stress.god +2 -2
  85. data/test/configs/task/task.god +5 -5
  86. data/test/configs/test.rb +7 -7
  87. data/test/helper.rb +8 -8
  88. data/test/test_behavior.rb +3 -3
  89. data/test/test_campfire.rb +1 -2
  90. data/test/test_condition.rb +10 -10
  91. data/test/test_conditions_disk_usage.rb +12 -12
  92. data/test/test_conditions_http_response_code.rb +24 -24
  93. data/test/test_conditions_process_running.rb +7 -7
  94. data/test/test_conditions_socket_responding.rb +122 -0
  95. data/test/test_conditions_tries.rb +12 -12
  96. data/test/test_contact.rb +19 -19
  97. data/test/test_driver.rb +17 -3
  98. data/test/test_event_handler.rb +12 -12
  99. data/test/test_god.rb +195 -117
  100. data/test/test_handlers_kqueue_handler.rb +4 -4
  101. data/test/test_jabber.rb +1 -1
  102. data/test/test_logger.rb +17 -17
  103. data/test/test_metric.rb +16 -16
  104. data/test/test_process.rb +47 -41
  105. data/test/test_prowl.rb +1 -1
  106. data/test/test_registry.rb +2 -2
  107. data/test/test_socket.rb +3 -3
  108. data/test/test_sugar.rb +7 -7
  109. data/test/test_system_portable_poller.rb +1 -1
  110. data/test/test_system_process.rb +5 -5
  111. data/test/test_task.rb +57 -57
  112. data/test/test_timeline.rb +8 -8
  113. data/test/test_trigger.rb +16 -16
  114. data/test/test_watch.rb +69 -62
  115. metadata +182 -69
  116. data/lib/god/dependency_graph.rb +0 -41
  117. data/lib/god/diagnostics.rb +0 -37
  118. data/test/test_dependency_graph.rb +0 -62
@@ -1,9 +1,9 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  # Condition Symbol :cpu_usage
5
5
  # Type: Poll
6
- #
6
+ #
7
7
  # Trigger when the percent of CPU use of a process is above a specified limit.
8
8
  # On multi-core systems, this number could conceivably be above 100.
9
9
  #
@@ -11,7 +11,7 @@ module God
11
11
  # Required
12
12
  # +pid_file+ is the pid file of the process in question. Automatically
13
13
  # populated for Watches.
14
- # +above+ is the percent CPU above which to trigger the condition. You
14
+ # +above+ is the percent CPU above which to trigger the condition. You
15
15
  # may use #percent to clarify this amount (see examples).
16
16
  #
17
17
  # Examples
@@ -30,42 +30,42 @@ module God
30
30
  # end
31
31
  class CpuUsage < PollCondition
32
32
  attr_accessor :above, :times, :pid_file
33
-
33
+
34
34
  def initialize
35
35
  super
36
36
  self.above = nil
37
37
  self.times = [1, 1]
38
38
  end
39
-
39
+
40
40
  def prepare
41
41
  if self.times.kind_of?(Integer)
42
42
  self.times = [self.times, self.times]
43
43
  end
44
-
44
+
45
45
  @timeline = Timeline.new(self.times[1])
46
46
  end
47
-
47
+
48
48
  def reset
49
49
  @timeline.clear
50
50
  end
51
-
51
+
52
52
  def pid
53
53
  self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
54
54
  end
55
-
55
+
56
56
  def valid?
57
57
  valid = true
58
58
  valid &= complain("Attribute 'pid_file' must be specified", self) if self.pid_file.nil? && self.watch.pid_file.nil?
59
59
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
60
60
  valid
61
61
  end
62
-
62
+
63
63
  def test
64
64
  process = System::Process.new(self.pid)
65
65
  @timeline.push(process.percent_cpu)
66
-
66
+
67
67
  history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}%%" }.join(", ") + "]"
68
-
68
+
69
69
  if @timeline.select { |x| x > self.above }.size >= self.times.first
70
70
  self.info = "cpu out of bounds #{history}"
71
71
  return true
@@ -75,6 +75,6 @@ module God
75
75
  end
76
76
  end
77
77
  end
78
-
78
+
79
79
  end
80
- end
80
+ end
@@ -1,21 +1,21 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  # This condition degrades its interval by a factor of two for 3 tries before failing
5
5
  class DegradingLambda < PollCondition
6
6
  attr_accessor :lambda
7
-
7
+
8
8
  def initialize
9
9
  super
10
10
  @tries = 0
11
11
  end
12
-
12
+
13
13
  def valid?
14
14
  valid = true
15
15
  valid &= complain("Attribute 'lambda' must be specified", self) if self.lambda.nil?
16
16
  valid
17
17
  end
18
-
18
+
19
19
  def test
20
20
  puts "Calling test. Interval at #{self.interval}"
21
21
  @original_interval ||= self.interval
@@ -30,13 +30,13 @@ module God
30
30
  @tries = 0
31
31
  self.interval = @original_interval
32
32
  end
33
-
33
+
34
34
  self.info = "lambda condition was not satisfied"
35
35
  false
36
36
  end
37
-
37
+
38
38
  private
39
-
39
+
40
40
  def pass?
41
41
  begin
42
42
  Timeout::timeout(@interval) {
@@ -49,4 +49,4 @@ module God
49
49
  end
50
50
 
51
51
  end
52
- end
52
+ end
@@ -1,22 +1,22 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  class DiskUsage < PollCondition
5
5
  attr_accessor :above, :mount_point
6
-
6
+
7
7
  def initialize
8
8
  super
9
9
  self.above = nil
10
10
  self.mount_point = nil
11
11
  end
12
-
12
+
13
13
  def valid?
14
14
  valid = true
15
15
  valid &= complain("Attribute 'mount_point' must be specified", self) if self.mount_point.nil?
16
16
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
17
17
  valid
18
18
  end
19
-
19
+
20
20
  def test
21
21
  usage = `df -P | grep -i " #{self.mount_point}$" | awk '{print $5}' | sed 's/%//'`
22
22
  if usage.to_i > self.above
@@ -29,4 +29,4 @@ module God
29
29
  end
30
30
  end
31
31
  end
32
- end
32
+ end
@@ -1,11 +1,11 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  # Condition Symbol :flapping
5
5
  # Type: Trigger
6
- #
6
+ #
7
7
  # Trigger when a Task transitions to or from a state or states a given number
8
- # of times within a given period.
8
+ # of times within a given period.
9
9
  #
10
10
  # Paramaters
11
11
  # Required
@@ -24,7 +24,7 @@ module God
24
24
  # it has been disabled by the condition.
25
25
  # +retry_times+ is the number of times after which to permanently unmonitor
26
26
  # the Task.
27
- # +retry_within+ is the number of seconds within which
27
+ # +retry_within+ is the number of seconds within which
28
28
  #
29
29
  # Examples
30
30
  #
@@ -37,16 +37,16 @@ module God
37
37
  :retry_in,
38
38
  :retry_times,
39
39
  :retry_within
40
-
40
+
41
41
  def initialize
42
42
  self.info = "process is flapping"
43
43
  end
44
-
44
+
45
45
  def prepare
46
46
  @timeline = Timeline.new(self.times)
47
47
  @retry_timeline = Timeline.new(self.retry_times)
48
48
  end
49
-
49
+
50
50
  def valid?
51
51
  valid = true
52
52
  valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
@@ -54,21 +54,21 @@ module God
54
54
  valid &= complain("Attributes 'from_state', 'to_state', or both must be specified", self) if self.from_state.nil? && self.to_state.nil?
55
55
  valid
56
56
  end
57
-
57
+
58
58
  def process(event, payload)
59
59
  begin
60
60
  if event == :state_change
61
61
  event_from_state, event_to_state = *payload
62
-
62
+
63
63
  from_state_match = !self.from_state || self.from_state && Array(self.from_state).include?(event_from_state)
64
64
  to_state_match = !self.to_state || self.to_state && Array(self.to_state).include?(event_to_state)
65
-
65
+
66
66
  if from_state_match && to_state_match
67
67
  @timeline << Time.now
68
-
68
+
69
69
  concensus = (@timeline.size == self.times)
70
70
  duration = (@timeline.last - @timeline.first) < self.within
71
-
71
+
72
72
  if concensus && duration
73
73
  @timeline.clear
74
74
  trigger
@@ -81,21 +81,21 @@ module God
81
81
  puts e.backtrace.join("\n")
82
82
  end
83
83
  end
84
-
84
+
85
85
  private
86
-
86
+
87
87
  def retry_mechanism
88
88
  if self.retry_in
89
89
  @retry_timeline << Time.now
90
-
90
+
91
91
  concensus = (@retry_timeline.size == self.retry_times)
92
92
  duration = (@retry_timeline.last - @retry_timeline.first) < self.retry_within
93
-
93
+
94
94
  if concensus && duration
95
95
  # give up
96
96
  Thread.new do
97
97
  sleep 1
98
-
98
+
99
99
  # log
100
100
  msg = "#{self.watch.name} giving up"
101
101
  applog(self.watch, :info, msg)
@@ -104,17 +104,17 @@ module God
104
104
  # try again later
105
105
  Thread.new do
106
106
  sleep 1
107
-
107
+
108
108
  # log
109
109
  msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds"
110
110
  applog(self.watch, :info, msg)
111
-
111
+
112
112
  sleep self.retry_in
113
-
113
+
114
114
  # log
115
115
  msg = "#{self.watch.name} auto-reenabling monitoring"
116
116
  applog(self.watch, :info, msg)
117
-
117
+
118
118
  if self.watch.state == :unmonitored
119
119
  self.watch.monitor
120
120
  end
@@ -123,6 +123,6 @@ module God
123
123
  end
124
124
  end
125
125
  end
126
-
126
+
127
127
  end
128
- end
128
+ end
@@ -1,11 +1,12 @@
1
1
  require 'net/http'
2
+ require 'net/https'
2
3
 
3
4
  module God
4
5
  module Conditions
5
-
6
+
6
7
  # Condition Symbol :http_response_code
7
8
  # Type: Poll
8
- #
9
+ #
9
10
  # Trigger based on the response from an HTTP request.
10
11
  #
11
12
  # Paramaters
@@ -23,6 +24,7 @@ module God
23
24
  # +times+ is the number of times after which to trigger (default 1)
24
25
  # e.g. 3 (times in a row) or [3, 5] (three out of fives times)
25
26
  # +timeout+ is the time to wait for a connection (default 60.seconds)
27
+ # +ssl+ should the connection use ssl (default false)
26
28
  #
27
29
  # Examples
28
30
  #
@@ -68,10 +70,12 @@ module God
68
70
  :times, # e.g. 3 or [3, 5]
69
71
  :host, # e.g. www.example.com
70
72
  :port, # e.g. 8080
73
+ :ssl, # e.g. true or false
74
+ :ca_file, # e.g /path/to/pem_file for ssl verification (checkout http://curl.haxx.se/ca/cacert.pem)
71
75
  :timeout, # e.g. 60.seconds
72
76
  :path, # e.g. '/'
73
77
  :headers # e.g. {'Host' => 'myvirtual.mydomain.com'}
74
-
78
+
75
79
  def initialize
76
80
  super
77
81
  self.port = 80
@@ -79,25 +83,27 @@ module God
79
83
  self.headers = {}
80
84
  self.times = [1, 1]
81
85
  self.timeout = 60.seconds
86
+ self.ssl = false
87
+ self.ca_file = nil
82
88
  end
83
-
89
+
84
90
  def prepare
85
91
  self.code_is = Array(self.code_is).map { |x| x.to_i } if self.code_is
86
92
  self.code_is_not = Array(self.code_is_not).map { |x| x.to_i } if self.code_is_not
87
-
93
+
88
94
  if self.times.kind_of?(Integer)
89
95
  self.times = [self.times, self.times]
90
96
  end
91
-
97
+
92
98
  @timeline = Timeline.new(self.times[1])
93
99
  @history = Timeline.new(self.times[1])
94
100
  end
95
-
101
+
96
102
  def reset
97
103
  @timeline.clear
98
104
  @history.clear
99
105
  end
100
-
106
+
101
107
  def valid?
102
108
  valid = true
103
109
  valid &= complain("Attribute 'host' must be specified", self) if self.host.nil?
@@ -105,15 +111,25 @@ module God
105
111
  (self.code_is.nil? && self.code_is_not.nil?) || (self.code_is && self.code_is_not)
106
112
  valid
107
113
  end
108
-
114
+
109
115
  def test
110
116
  response = nil
111
-
112
- Net::HTTP.start(self.host, self.port) do |http|
117
+
118
+ connection = Net::HTTP.new(self.host, self.port)
119
+ connection.use_ssl = self.port == 443 ? true : self.ssl
120
+ connection.verify_mode = OpenSSL::SSL::VERIFY_NONE if connection.use_ssl?
121
+
122
+ if connection.use_ssl? && self.ca_file
123
+ pem = File.read(self.ca_file)
124
+ connection.ca_file = self.ca_file
125
+ connection.verify_mode = OpenSSL::SSL::VERIFY_PEER
126
+ end
127
+
128
+ connection.start do |http|
113
129
  http.read_timeout = self.timeout
114
130
  response = http.get(self.path, self.headers)
115
131
  end
116
-
132
+
117
133
  actual_response_code = response.code.to_i
118
134
  if self.code_is && self.code_is.include?(actual_response_code)
119
135
  pass(actual_response_code)
@@ -135,9 +151,9 @@ module God
135
151
  rescue Exception => failure
136
152
  self.code_is ? fail(failure.class.name) : pass(failure.class.name)
137
153
  end
138
-
154
+
139
155
  private
140
-
156
+
141
157
  def pass(code)
142
158
  @timeline << true
143
159
  if @timeline.select { |x| x }.size >= self.times.first
@@ -148,21 +164,21 @@ module God
148
164
  false
149
165
  end
150
166
  end
151
-
167
+
152
168
  def fail(code)
153
169
  @timeline << false
154
170
  self.info = "http response nominal #{history(code, false)}"
155
171
  false
156
172
  end
157
-
173
+
158
174
  def history(code, passed)
159
175
  entry = code.to_s.dup
160
176
  entry = '*' + entry if passed
161
177
  @history << entry
162
178
  '[' + @history.join(", ") + ']'
163
179
  end
164
-
180
+
165
181
  end
166
-
182
+
167
183
  end
168
- end
184
+ end
@@ -1,6 +1,6 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  class Lambda < PollCondition
5
5
  attr_accessor :lambda
6
6
 
@@ -22,4 +22,4 @@ module God
22
22
  end
23
23
 
24
24
  end
25
- end
25
+ end
@@ -1,9 +1,9 @@
1
1
  module God
2
2
  module Conditions
3
-
3
+
4
4
  # Condition Symbol :memory_usage
5
5
  # Type: Poll
6
- #
6
+ #
7
7
  # Trigger when the resident memory of a process is above a specified limit.
8
8
  #
9
9
  # Paramaters
@@ -32,42 +32,42 @@ module God
32
32
  # end
33
33
  class MemoryUsage < PollCondition
34
34
  attr_accessor :above, :times, :pid_file
35
-
35
+
36
36
  def initialize
37
37
  super
38
38
  self.above = nil
39
39
  self.times = [1, 1]
40
40
  end
41
-
41
+
42
42
  def prepare
43
43
  if self.times.kind_of?(Integer)
44
44
  self.times = [self.times, self.times]
45
45
  end
46
-
46
+
47
47
  @timeline = Timeline.new(self.times[1])
48
48
  end
49
-
49
+
50
50
  def reset
51
51
  @timeline.clear
52
52
  end
53
-
53
+
54
54
  def pid
55
55
  self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
56
56
  end
57
-
57
+
58
58
  def valid?
59
59
  valid = true
60
60
  valid &= complain("Attribute 'pid_file' must be specified", self) if self.pid_file.nil? && self.watch.pid_file.nil?
61
61
  valid &= complain("Attribute 'above' must be specified", self) if self.above.nil?
62
62
  valid
63
63
  end
64
-
64
+
65
65
  def test
66
66
  process = System::Process.new(self.pid)
67
67
  @timeline.push(process.memory)
68
-
68
+
69
69
  history = "[" + @timeline.map { |x| "#{x > self.above ? '*' : ''}#{x}kb" }.join(", ") + "]"
70
-
70
+
71
71
  if @timeline.select { |x| x > self.above }.size >= self.times.first
72
72
  self.info = "memory out of bounds #{history}"
73
73
  return true
@@ -77,6 +77,6 @@ module God
77
77
  end
78
78
  end
79
79
  end
80
-
80
+
81
81
  end
82
- end
82
+ end