god 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Announce.txt +6 -6
- data/Gemfile +2 -0
- data/History.txt +19 -2
- data/{README.txt → LICENSE} +0 -37
- data/README.md +31 -0
- data/Rakefile +80 -38
- data/bin/god +21 -21
- data/doc/god.asciidoc +1487 -0
- data/doc/intro.asciidoc +20 -0
- data/ext/god/extconf.rb +3 -3
- data/ext/god/kqueue_handler.c +18 -18
- data/ext/god/netlink_handler.c +31 -31
- data/god.gemspec +24 -16
- data/lib/god.rb +261 -204
- data/lib/god/behavior.rb +14 -14
- data/lib/god/behaviors/clean_pid_file.rb +5 -5
- data/lib/god/behaviors/clean_unix_socket.rb +10 -10
- data/lib/god/behaviors/notify_when_flapping.rb +12 -12
- data/lib/god/cli/command.rb +59 -46
- data/lib/god/cli/run.rb +33 -37
- data/lib/god/cli/version.rb +6 -6
- data/lib/god/compat19.rb +1 -4
- data/lib/god/condition.rb +21 -21
- data/lib/god/conditions/always.rb +19 -6
- data/lib/god/conditions/complex.rb +18 -18
- data/lib/god/conditions/cpu_usage.rb +14 -14
- data/lib/god/conditions/degrading_lambda.rb +8 -8
- data/lib/god/conditions/disk_usage.rb +5 -5
- data/lib/god/conditions/flapping.rb +23 -23
- data/lib/god/conditions/http_response_code.rb +35 -19
- data/lib/god/conditions/lambda.rb +2 -2
- data/lib/god/conditions/memory_usage.rb +13 -13
- data/lib/god/conditions/process_exits.rb +14 -20
- data/lib/god/conditions/process_running.rb +16 -25
- data/lib/god/conditions/socket_responding.rb +132 -0
- data/lib/god/conditions/tries.rb +10 -10
- data/lib/god/configurable.rb +10 -10
- data/lib/god/contact.rb +20 -20
- data/lib/god/contacts/email.rb +7 -4
- data/lib/god/contacts/jabber.rb +1 -1
- data/lib/god/driver.rb +96 -64
- data/lib/god/errors.rb +9 -9
- data/lib/god/event_handler.rb +19 -19
- data/lib/god/event_handlers/dummy_handler.rb +4 -4
- data/lib/god/event_handlers/kqueue_handler.rb +3 -3
- data/lib/god/event_handlers/netlink_handler.rb +2 -2
- data/lib/god/logger.rb +13 -13
- data/lib/god/metric.rb +50 -22
- data/lib/god/process.rb +53 -52
- data/lib/god/registry.rb +7 -7
- data/lib/god/simple_logger.rb +14 -14
- data/lib/god/socket.rb +11 -11
- data/lib/god/sugar.rb +30 -15
- data/lib/god/sys_logger.rb +2 -2
- data/lib/god/system/portable_poller.rb +8 -8
- data/lib/god/system/process.rb +8 -8
- data/lib/god/system/slash_proc_poller.rb +13 -13
- data/lib/god/task.rb +237 -188
- data/lib/god/timeline.rb +5 -5
- data/lib/god/trigger.rb +11 -11
- data/lib/god/watch.rb +205 -53
- data/test/configs/child_events/child_events.god +5 -5
- data/test/configs/child_events/simple_server.rb +1 -1
- data/test/configs/child_polls/child_polls.god +4 -4
- data/test/configs/child_polls/simple_server.rb +4 -4
- data/test/configs/complex/complex.god +7 -7
- data/test/configs/complex/simple_server.rb +1 -1
- data/test/configs/contact/contact.god +1 -1
- data/test/configs/contact/simple_server.rb +1 -1
- data/test/configs/daemon_events/daemon_events.god +5 -5
- data/test/configs/daemon_events/simple_server.rb +1 -1
- data/test/configs/daemon_events/simple_server_stop.rb +1 -1
- data/test/configs/daemon_polls/daemon_polls.god +3 -3
- data/test/configs/daemon_polls/simple_server.rb +1 -1
- data/test/configs/degrading_lambda/degrading_lambda.god +3 -3
- data/test/configs/keepalive/keepalive.god +9 -0
- data/test/configs/keepalive/keepalive.rb +12 -0
- data/test/configs/lifecycle/lifecycle.god +2 -2
- data/test/configs/matias/matias.god +6 -6
- data/test/configs/real.rb +7 -7
- data/test/configs/running_load/running_load.god +2 -2
- data/test/configs/stop_options/simple_server.rb +1 -1
- data/test/configs/stress/simple_server.rb +1 -1
- data/test/configs/stress/stress.god +2 -2
- data/test/configs/task/task.god +5 -5
- data/test/configs/test.rb +7 -7
- data/test/helper.rb +8 -8
- data/test/test_behavior.rb +3 -3
- data/test/test_campfire.rb +1 -2
- data/test/test_condition.rb +10 -10
- data/test/test_conditions_disk_usage.rb +12 -12
- data/test/test_conditions_http_response_code.rb +24 -24
- data/test/test_conditions_process_running.rb +7 -7
- data/test/test_conditions_socket_responding.rb +122 -0
- data/test/test_conditions_tries.rb +12 -12
- data/test/test_contact.rb +19 -19
- data/test/test_driver.rb +17 -3
- data/test/test_event_handler.rb +12 -12
- data/test/test_god.rb +195 -117
- data/test/test_handlers_kqueue_handler.rb +4 -4
- data/test/test_jabber.rb +1 -1
- data/test/test_logger.rb +17 -17
- data/test/test_metric.rb +16 -16
- data/test/test_process.rb +47 -41
- data/test/test_prowl.rb +1 -1
- data/test/test_registry.rb +2 -2
- data/test/test_socket.rb +3 -3
- data/test/test_sugar.rb +7 -7
- data/test/test_system_portable_poller.rb +1 -1
- data/test/test_system_process.rb +5 -5
- data/test/test_task.rb +57 -57
- data/test/test_timeline.rb +8 -8
- data/test/test_trigger.rb +16 -16
- data/test/test_watch.rb +69 -62
- metadata +182 -69
- data/lib/god/dependency_graph.rb +0 -41
- data/lib/god/diagnostics.rb +0 -37
- data/test/test_dependency_graph.rb +0 -62
data/lib/god/timeline.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module God
|
2
|
-
|
2
|
+
|
3
3
|
class Timeline < Array
|
4
4
|
# Instantiate a new Timeline
|
5
5
|
# +max_size+ is the maximum size to which the timeline should grow
|
@@ -9,7 +9,7 @@ module God
|
|
9
9
|
super()
|
10
10
|
@max_size = max_size
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
# Push a value onto the Timeline
|
14
14
|
# +val+ is the value to push
|
15
15
|
#
|
@@ -18,8 +18,8 @@ module God
|
|
18
18
|
self.concat([val])
|
19
19
|
shift if size > @max_size
|
20
20
|
end
|
21
|
-
|
21
|
+
|
22
22
|
alias_method :<<, :push
|
23
23
|
end
|
24
|
-
|
25
|
-
end
|
24
|
+
|
25
|
+
end
|
data/lib/god/trigger.rb
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
module God
|
2
|
-
|
2
|
+
|
3
3
|
class Trigger
|
4
|
-
|
4
|
+
|
5
5
|
class << self
|
6
6
|
attr_accessor :triggers # {task.name => condition}
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
# init
|
10
10
|
self.triggers = {}
|
11
11
|
@mutex = Mutex.new
|
12
|
-
|
12
|
+
|
13
13
|
def self.register(condition)
|
14
14
|
@mutex.synchronize do
|
15
15
|
self.triggers[condition.watch.name] ||= []
|
16
16
|
self.triggers[condition.watch.name] << condition
|
17
17
|
end
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def self.deregister(condition)
|
21
21
|
@mutex.synchronize do
|
22
22
|
self.triggers[condition.watch.name].delete(condition)
|
23
23
|
self.triggers.delete(condition.watch.name) if self.triggers[condition.watch.name].empty?
|
24
24
|
end
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
27
|
def self.broadcast(task, message, payload)
|
28
28
|
return unless self.triggers[task.name]
|
29
|
-
|
29
|
+
|
30
30
|
@mutex.synchronize do
|
31
31
|
self.triggers[task.name].each do |t|
|
32
32
|
t.process(message, payload)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
def self.reset
|
38
38
|
self.triggers.clear
|
39
39
|
end
|
40
|
-
|
40
|
+
|
41
41
|
end
|
42
|
-
|
43
|
-
end
|
42
|
+
|
43
|
+
end
|
data/lib/god/watch.rb
CHANGED
@@ -2,14 +2,28 @@ require 'etc'
|
|
2
2
|
require 'forwardable'
|
3
3
|
|
4
4
|
module God
|
5
|
-
|
5
|
+
# The Watch class is a specialized Task that handles standard process
|
6
|
+
# workflows. It has four states: init, up, start, and restart.
|
6
7
|
class Watch < Task
|
8
|
+
# The Array of Symbol valid task states.
|
7
9
|
VALID_STATES = [:init, :up, :start, :restart]
|
10
|
+
|
11
|
+
# The Sybmol initial state.
|
8
12
|
INITIAL_STATE = :init
|
9
|
-
|
10
|
-
#
|
11
|
-
attr_accessor :grace
|
12
|
-
|
13
|
+
|
14
|
+
# Public: The grace period for this process (seconds).
|
15
|
+
attr_accessor :grace
|
16
|
+
|
17
|
+
# Public: The start grace period (seconds).
|
18
|
+
attr_accessor :start_grace
|
19
|
+
|
20
|
+
# Public: The stop grace period (seconds).
|
21
|
+
attr_accessor :stop_grace
|
22
|
+
|
23
|
+
# Public: The restart grace period (seconds).
|
24
|
+
attr_accessor :restart_grace
|
25
|
+
|
26
|
+
# Public: God::Process delegators. See lib/god/process.rb for docs.
|
13
27
|
extend Forwardable
|
14
28
|
def_delegators :@process, :name, :uid, :gid, :start, :stop, :restart, :dir,
|
15
29
|
:name=, :uid=, :gid=, :start=, :stop=, :restart=,
|
@@ -18,104 +32,230 @@ module God
|
|
18
32
|
:err_log_cmd, :err_log_cmd=, :alive?, :pid,
|
19
33
|
:unix_socket, :unix_socket=, :chroot, :chroot=,
|
20
34
|
:env, :env=, :signal, :stop_timeout=,
|
21
|
-
:stop_signal=
|
22
|
-
|
35
|
+
:stop_signal=, :umask, :umask=
|
36
|
+
|
37
|
+
# Initialize a new Watch instance.
|
23
38
|
def initialize
|
24
39
|
super
|
25
|
-
|
40
|
+
|
41
|
+
# This God::Process instance holds information specific to the process.
|
26
42
|
@process = God::Process.new
|
27
|
-
|
28
|
-
#
|
43
|
+
|
44
|
+
# Valid states.
|
29
45
|
self.valid_states = VALID_STATES
|
30
46
|
self.initial_state = INITIAL_STATE
|
31
|
-
|
32
|
-
#
|
47
|
+
|
48
|
+
# No grace period by default.
|
33
49
|
self.grace = self.start_grace = self.stop_grace = self.restart_grace = 0
|
34
50
|
end
|
35
|
-
|
51
|
+
|
52
|
+
# Is this Watch valid?
|
53
|
+
#
|
54
|
+
# Returns true if the Watch is valid, false if not.
|
36
55
|
def valid?
|
37
56
|
super && @process.valid?
|
38
57
|
end
|
39
|
-
|
58
|
+
|
40
59
|
###########################################################################
|
41
60
|
#
|
42
61
|
# Behavior
|
43
62
|
#
|
44
63
|
###########################################################################
|
45
|
-
|
64
|
+
|
65
|
+
# Public: Add a behavior to this Watch. See lib/god/behavior.rb.
|
66
|
+
#
|
67
|
+
# kind - The Symbol name of the Behavior to add.
|
68
|
+
#
|
69
|
+
# Yields the newly instantiated Behavior.
|
70
|
+
#
|
71
|
+
# Returns nothing.
|
46
72
|
def behavior(kind)
|
47
|
-
#
|
73
|
+
# Create the behavior.
|
48
74
|
begin
|
49
75
|
b = Behavior.generate(kind, self)
|
50
76
|
rescue NoSuchBehaviorError => e
|
51
77
|
abort e.message
|
52
78
|
end
|
53
|
-
|
54
|
-
#
|
79
|
+
|
80
|
+
# Send to block so config can set attributes.
|
55
81
|
yield(b) if block_given?
|
56
|
-
|
57
|
-
#
|
58
|
-
# out its own error messages by now
|
82
|
+
|
83
|
+
# Abort if the Behavior is invalid, the Behavior will have printed
|
84
|
+
# out its own error messages by now.
|
59
85
|
abort unless b.valid?
|
60
|
-
|
86
|
+
|
61
87
|
self.behaviors << b
|
62
88
|
end
|
63
|
-
|
89
|
+
|
90
|
+
###########################################################################
|
91
|
+
#
|
92
|
+
# Quickstart mode
|
93
|
+
#
|
94
|
+
###########################################################################
|
95
|
+
|
96
|
+
# Default Integer interval at which keepalive will runn poll checks.
|
97
|
+
DEFAULT_KEEPALIVE_INTERVAL = 5.seconds
|
98
|
+
|
99
|
+
# Default Integer or Array of Integers specification of how many times the
|
100
|
+
# memory condition must fail before triggering.
|
101
|
+
DEFAULT_KEEPALIVE_MEMORY_TIMES = [3, 5]
|
102
|
+
|
103
|
+
# Default Integer or Array of Integers specification of how many times the
|
104
|
+
# CPU condition must fail before triggering.
|
105
|
+
DEFAULT_KEEPALIVE_CPU_TIMES = [3, 5]
|
106
|
+
|
107
|
+
# Public: A set of conditions for easily getting started with simple watch
|
108
|
+
# scenarios. Keepalive is intended for use by beginners or on processes
|
109
|
+
# that do not need very sophisticated monitoring.
|
110
|
+
#
|
111
|
+
# If events are enabled, it will use the :process_exit event to determine
|
112
|
+
# if a process fails. Otherwise it will use the :process_running poll.
|
113
|
+
#
|
114
|
+
# options - The option Hash. Possible values are:
|
115
|
+
# :interval - The Integer number of seconds on which to poll
|
116
|
+
# for process status. Affects CPU, memory, and
|
117
|
+
# :process_running conditions (if used).
|
118
|
+
# Default: 5.seconds.
|
119
|
+
# :memory_max - The Integer memory max. A bare integer means
|
120
|
+
# kilobytes. You may use Numeric.kilobytes,
|
121
|
+
# Numeric#megabytes, and Numeric#gigabytes to
|
122
|
+
# makes things more clear.
|
123
|
+
# :memory_times - If :memory_max is set, :memory_times can be
|
124
|
+
# set to either an Integer or a 2 element
|
125
|
+
# Integer Array to specify the number of times
|
126
|
+
# the memory condition must fail. Examples:
|
127
|
+
# 3 (three times), [3, 5] (three out of any five
|
128
|
+
# checks). Default: [3, 5].
|
129
|
+
# :cpu_max - The Integer CPU percentage max. Range is
|
130
|
+
# 0 to 100. You may use the Numberic#percent
|
131
|
+
# sugar to clarify e.g. 50.percent.
|
132
|
+
# :cpu_times - If :cpu_max is set, :cpu_times can be
|
133
|
+
# set to either an Integer or a 2 element
|
134
|
+
# Integer Array to specify the number of times
|
135
|
+
# the memory condition must fail. Examples:
|
136
|
+
# 3 (three times), [3, 5] (three out of any five
|
137
|
+
# checks). Default: [3, 5].
|
138
|
+
def keepalive(options = {})
|
139
|
+
if God::EventHandler.loaded?
|
140
|
+
self.transition(:init, { true => :up, false => :start }) do |on|
|
141
|
+
on.condition(:process_running) do |c|
|
142
|
+
c.interval = options[:interval] || DEFAULT_KEEPALIVE_INTERVAL
|
143
|
+
c.running = true
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
self.transition([:start, :restart], :up) do |on|
|
148
|
+
on.condition(:process_running) do |c|
|
149
|
+
c.interval = options[:interval] || DEFAULT_KEEPALIVE_INTERVAL
|
150
|
+
c.running = true
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
self.transition(:up, :start) do |on|
|
155
|
+
on.condition(:process_exits)
|
156
|
+
end
|
157
|
+
else
|
158
|
+
self.start_if do |start|
|
159
|
+
start.condition(:process_running) do |c|
|
160
|
+
c.interval = options[:interval] || DEFAULT_KEEPALIVE_INTERVAL
|
161
|
+
c.running = false
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
self.restart_if do |restart|
|
167
|
+
if options[:memory_max]
|
168
|
+
restart.condition(:memory_usage) do |c|
|
169
|
+
c.interval = options[:interval] || DEFAULT_KEEPALIVE_INTERVAL
|
170
|
+
c.above = options[:memory_max]
|
171
|
+
c.times = options[:memory_times] || DEFAULT_KEEPALIVE_MEMORY_TIMES
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
if options[:cpu_max]
|
176
|
+
restart.condition(:cpu_usage) do |c|
|
177
|
+
c.interval = options[:interval] || DEFAULT_KEEPALIVE_INTERVAL
|
178
|
+
c.above = options[:cpu_max]
|
179
|
+
c.times = options[:cpu_times] || DEFAULT_KEEPALIVE_CPU_TIMES
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
64
185
|
###########################################################################
|
65
186
|
#
|
66
187
|
# Simple mode
|
67
188
|
#
|
68
189
|
###########################################################################
|
69
|
-
|
190
|
+
|
191
|
+
# Public: Start the process if any of the given conditions are triggered.
|
192
|
+
#
|
193
|
+
# Yields the Metric upon which conditions can be added.
|
194
|
+
#
|
195
|
+
# Returns nothing.
|
70
196
|
def start_if
|
71
197
|
self.transition(:up, :start) do |on|
|
72
198
|
yield(on)
|
73
199
|
end
|
74
200
|
end
|
75
|
-
|
201
|
+
|
202
|
+
# Public: Restart the process if any of the given conditions are triggered.
|
203
|
+
#
|
204
|
+
# Yields the Metric upon which conditions can be added.
|
205
|
+
#
|
206
|
+
# Returns nothing.
|
76
207
|
def restart_if
|
77
208
|
self.transition(:up, :restart) do |on|
|
78
209
|
yield(on)
|
79
210
|
end
|
80
211
|
end
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
212
|
+
|
213
|
+
# Public: Stop the process if any of the given conditions are triggered.
|
214
|
+
#
|
215
|
+
# Yields the Metric upon which conditions can be added.
|
216
|
+
#
|
217
|
+
# Returns nothing.
|
218
|
+
def stop_if
|
219
|
+
self.transition(:up, :stop) do |on|
|
220
|
+
yield(on)
|
221
|
+
end
|
86
222
|
end
|
87
|
-
|
223
|
+
|
88
224
|
###########################################################################
|
89
225
|
#
|
90
226
|
# Lifecycle
|
91
227
|
#
|
92
228
|
###########################################################################
|
93
|
-
|
94
|
-
# Enable monitoring
|
229
|
+
|
230
|
+
# Enable monitoring. Start at the first available of the init or up states.
|
231
|
+
#
|
232
|
+
# Returns nothing.
|
95
233
|
def monitor
|
96
|
-
# start monitoring at the first available of the init or up states
|
97
234
|
if !self.metrics[:init].empty?
|
98
235
|
self.move(:init)
|
99
236
|
else
|
100
237
|
self.move(:up)
|
101
238
|
end
|
102
239
|
end
|
103
|
-
|
240
|
+
|
104
241
|
###########################################################################
|
105
242
|
#
|
106
243
|
# Actions
|
107
244
|
#
|
108
245
|
###########################################################################
|
109
|
-
|
246
|
+
|
247
|
+
# Perform an action.
|
248
|
+
#
|
249
|
+
# a - The Symbol action to perform. One of :start, :restart, :stop.
|
250
|
+
# c - The Condition.
|
251
|
+
#
|
252
|
+
# Returns this Watch.
|
110
253
|
def action(a, c = nil)
|
111
254
|
if !self.driver.in_driver_context?
|
112
|
-
#
|
113
|
-
|
114
|
-
# send an async message to Driver
|
255
|
+
# Called from outside Driver. Send an async message to Driver.
|
115
256
|
self.driver.message(:action, [a, c])
|
116
257
|
else
|
117
|
-
#
|
118
|
-
|
258
|
+
# Called from within Driver.
|
119
259
|
case a
|
120
260
|
when :start
|
121
261
|
call_action(c, :start)
|
@@ -133,12 +273,18 @@ module God
|
|
133
273
|
sleep(self.stop_grace + self.grace)
|
134
274
|
end
|
135
275
|
end
|
136
|
-
|
276
|
+
|
137
277
|
self
|
138
278
|
end
|
139
|
-
|
279
|
+
|
280
|
+
# Perform the specifics of the action.
|
281
|
+
#
|
282
|
+
# condition - The Condition.
|
283
|
+
# action - The Sybmol action.
|
284
|
+
#
|
285
|
+
# Returns nothing.
|
140
286
|
def call_action(condition, action)
|
141
|
-
#
|
287
|
+
# Before.
|
142
288
|
before_items = self.behaviors
|
143
289
|
before_items += [condition] if condition
|
144
290
|
before_items.each do |b|
|
@@ -148,16 +294,17 @@ module God
|
|
148
294
|
applog(self, :info, msg)
|
149
295
|
end
|
150
296
|
end
|
151
|
-
|
152
|
-
#
|
297
|
+
|
298
|
+
# Log.
|
153
299
|
if self.send(action)
|
154
300
|
msg = "#{self.name} #{action}: #{self.send(action).to_s}"
|
155
301
|
applog(self, :info, msg)
|
156
302
|
end
|
157
|
-
|
303
|
+
|
304
|
+
# Execute.
|
158
305
|
@process.call_action(action)
|
159
|
-
|
160
|
-
#
|
306
|
+
|
307
|
+
# After.
|
161
308
|
after_items = self.behaviors
|
162
309
|
after_items += [condition] if condition
|
163
310
|
after_items.each do |b|
|
@@ -168,21 +315,26 @@ module God
|
|
168
315
|
end
|
169
316
|
end
|
170
317
|
end
|
171
|
-
|
318
|
+
|
172
319
|
###########################################################################
|
173
320
|
#
|
174
321
|
# Registration
|
175
322
|
#
|
176
323
|
###########################################################################
|
177
|
-
|
324
|
+
|
325
|
+
# Register the Process in the global process registry.
|
326
|
+
#
|
327
|
+
# Returns nothing.
|
178
328
|
def register!
|
179
329
|
God.registry.add(@process)
|
180
330
|
end
|
181
|
-
|
331
|
+
|
332
|
+
# Unregister the Process in the global process registry.
|
333
|
+
#
|
334
|
+
# Returns nothing.
|
182
335
|
def unregister!
|
183
336
|
God.registry.remove(@process)
|
184
337
|
super
|
185
338
|
end
|
186
339
|
end
|
187
|
-
|
188
340
|
end
|