god 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Announce.txt +6 -6
- data/Gemfile +2 -0
- data/History.txt +19 -2
- data/{README.txt → LICENSE} +0 -37
- data/README.md +31 -0
- data/Rakefile +80 -38
- data/bin/god +21 -21
- data/doc/god.asciidoc +1487 -0
- data/doc/intro.asciidoc +20 -0
- data/ext/god/extconf.rb +3 -3
- data/ext/god/kqueue_handler.c +18 -18
- data/ext/god/netlink_handler.c +31 -31
- data/god.gemspec +24 -16
- data/lib/god.rb +261 -204
- data/lib/god/behavior.rb +14 -14
- data/lib/god/behaviors/clean_pid_file.rb +5 -5
- data/lib/god/behaviors/clean_unix_socket.rb +10 -10
- data/lib/god/behaviors/notify_when_flapping.rb +12 -12
- data/lib/god/cli/command.rb +59 -46
- data/lib/god/cli/run.rb +33 -37
- data/lib/god/cli/version.rb +6 -6
- data/lib/god/compat19.rb +1 -4
- data/lib/god/condition.rb +21 -21
- data/lib/god/conditions/always.rb +19 -6
- data/lib/god/conditions/complex.rb +18 -18
- data/lib/god/conditions/cpu_usage.rb +14 -14
- data/lib/god/conditions/degrading_lambda.rb +8 -8
- data/lib/god/conditions/disk_usage.rb +5 -5
- data/lib/god/conditions/flapping.rb +23 -23
- data/lib/god/conditions/http_response_code.rb +35 -19
- data/lib/god/conditions/lambda.rb +2 -2
- data/lib/god/conditions/memory_usage.rb +13 -13
- data/lib/god/conditions/process_exits.rb +14 -20
- data/lib/god/conditions/process_running.rb +16 -25
- data/lib/god/conditions/socket_responding.rb +132 -0
- data/lib/god/conditions/tries.rb +10 -10
- data/lib/god/configurable.rb +10 -10
- data/lib/god/contact.rb +20 -20
- data/lib/god/contacts/email.rb +7 -4
- data/lib/god/contacts/jabber.rb +1 -1
- data/lib/god/driver.rb +96 -64
- data/lib/god/errors.rb +9 -9
- data/lib/god/event_handler.rb +19 -19
- data/lib/god/event_handlers/dummy_handler.rb +4 -4
- data/lib/god/event_handlers/kqueue_handler.rb +3 -3
- data/lib/god/event_handlers/netlink_handler.rb +2 -2
- data/lib/god/logger.rb +13 -13
- data/lib/god/metric.rb +50 -22
- data/lib/god/process.rb +53 -52
- data/lib/god/registry.rb +7 -7
- data/lib/god/simple_logger.rb +14 -14
- data/lib/god/socket.rb +11 -11
- data/lib/god/sugar.rb +30 -15
- data/lib/god/sys_logger.rb +2 -2
- data/lib/god/system/portable_poller.rb +8 -8
- data/lib/god/system/process.rb +8 -8
- data/lib/god/system/slash_proc_poller.rb +13 -13
- data/lib/god/task.rb +237 -188
- data/lib/god/timeline.rb +5 -5
- data/lib/god/trigger.rb +11 -11
- data/lib/god/watch.rb +205 -53
- data/test/configs/child_events/child_events.god +5 -5
- data/test/configs/child_events/simple_server.rb +1 -1
- data/test/configs/child_polls/child_polls.god +4 -4
- data/test/configs/child_polls/simple_server.rb +4 -4
- data/test/configs/complex/complex.god +7 -7
- data/test/configs/complex/simple_server.rb +1 -1
- data/test/configs/contact/contact.god +1 -1
- data/test/configs/contact/simple_server.rb +1 -1
- data/test/configs/daemon_events/daemon_events.god +5 -5
- data/test/configs/daemon_events/simple_server.rb +1 -1
- data/test/configs/daemon_events/simple_server_stop.rb +1 -1
- data/test/configs/daemon_polls/daemon_polls.god +3 -3
- data/test/configs/daemon_polls/simple_server.rb +1 -1
- data/test/configs/degrading_lambda/degrading_lambda.god +3 -3
- data/test/configs/keepalive/keepalive.god +9 -0
- data/test/configs/keepalive/keepalive.rb +12 -0
- data/test/configs/lifecycle/lifecycle.god +2 -2
- data/test/configs/matias/matias.god +6 -6
- data/test/configs/real.rb +7 -7
- data/test/configs/running_load/running_load.god +2 -2
- data/test/configs/stop_options/simple_server.rb +1 -1
- data/test/configs/stress/simple_server.rb +1 -1
- data/test/configs/stress/stress.god +2 -2
- data/test/configs/task/task.god +5 -5
- data/test/configs/test.rb +7 -7
- data/test/helper.rb +8 -8
- data/test/test_behavior.rb +3 -3
- data/test/test_campfire.rb +1 -2
- data/test/test_condition.rb +10 -10
- data/test/test_conditions_disk_usage.rb +12 -12
- data/test/test_conditions_http_response_code.rb +24 -24
- data/test/test_conditions_process_running.rb +7 -7
- data/test/test_conditions_socket_responding.rb +122 -0
- data/test/test_conditions_tries.rb +12 -12
- data/test/test_contact.rb +19 -19
- data/test/test_driver.rb +17 -3
- data/test/test_event_handler.rb +12 -12
- data/test/test_god.rb +195 -117
- data/test/test_handlers_kqueue_handler.rb +4 -4
- data/test/test_jabber.rb +1 -1
- data/test/test_logger.rb +17 -17
- data/test/test_metric.rb +16 -16
- data/test/test_process.rb +47 -41
- data/test/test_prowl.rb +1 -1
- data/test/test_registry.rb +2 -2
- data/test/test_socket.rb +3 -3
- data/test/test_sugar.rb +7 -7
- data/test/test_system_portable_poller.rb +1 -1
- data/test/test_system_process.rb +5 -5
- data/test/test_task.rb +57 -57
- data/test/test_timeline.rb +8 -8
- data/test/test_trigger.rb +16 -16
- data/test/test_watch.rb +69 -62
- metadata +182 -69
- data/lib/god/dependency_graph.rb +0 -41
- data/lib/god/diagnostics.rb +0 -37
- data/test/test_dependency_graph.rb +0 -62
data/lib/god/contacts/email.rb
CHANGED
@@ -10,10 +10,13 @@
|
|
10
10
|
# === SMTP Options (when delivery_method = :smtp) ===
|
11
11
|
# server_host - The String hostname of the SMTP server (default: localhost).
|
12
12
|
# server_port - The Integer port of the SMTP server (default: 25).
|
13
|
-
# server_auth - The
|
14
|
-
#
|
13
|
+
# server_auth - The Symbol authentication method. Possible values:
|
14
|
+
# [ nil | :plain | :login | :cram_md5 ]
|
15
|
+
# The default is nil, which means no authentication. To
|
16
|
+
# enable authentication, pass the appropriate symbol and
|
17
|
+
# then pass the appropriate SMTP Auth Options (below).
|
15
18
|
#
|
16
|
-
# === SMTP Auth Options (when server_auth
|
19
|
+
# === SMTP Auth Options (when server_auth != nil) ===
|
17
20
|
# server_domain - The String domain.
|
18
21
|
# server_user - The String username.
|
19
22
|
# server_password - The String password.
|
@@ -41,7 +44,7 @@ module God
|
|
41
44
|
self.from_email = 'god@example.com'
|
42
45
|
self.from_name = 'God Process Monitoring'
|
43
46
|
self.delivery_method = :smtp
|
44
|
-
self.server_auth =
|
47
|
+
self.server_auth = nil
|
45
48
|
self.server_host = 'localhost'
|
46
49
|
self.server_port = 25
|
47
50
|
self.sendmail_path = '/usr/sbin/sendmail'
|
data/lib/god/contacts/jabber.rb
CHANGED
data/lib/god/driver.rb
CHANGED
@@ -1,78 +1,107 @@
|
|
1
1
|
require 'monitor'
|
2
2
|
|
3
|
-
#
|
3
|
+
# Ruby 1.9 specific fixes.
|
4
4
|
unless RUBY_VERSION < '1.9'
|
5
5
|
require 'god/compat19'
|
6
6
|
end
|
7
7
|
|
8
8
|
module God
|
9
|
+
# The TimedEvent class represents an event in the future. This class is used
|
10
|
+
# by the drivers to schedule upcoming conditional tests and other scheduled
|
11
|
+
# events.
|
9
12
|
class TimedEvent
|
10
13
|
include Comparable
|
11
14
|
|
15
|
+
# The Time at which this event is due.
|
12
16
|
attr_accessor :at
|
13
|
-
|
14
|
-
# Instantiate a new TimedEvent that will be triggered after the specified
|
15
|
-
#
|
17
|
+
|
18
|
+
# Instantiate a new TimedEvent that will be triggered after the specified
|
19
|
+
# delay.
|
16
20
|
#
|
17
|
-
#
|
21
|
+
# delay - The optional Numeric number of seconds from now at which to
|
22
|
+
# trigger (default: 0).
|
18
23
|
def initialize(delay = 0)
|
19
24
|
self.at = Time.now + delay
|
20
25
|
end
|
21
26
|
|
27
|
+
# Is the current event due (current time >= event time)?
|
28
|
+
#
|
29
|
+
# Returns true if the event is due, false if not.
|
22
30
|
def due?
|
23
31
|
Time.now >= self.at
|
24
32
|
end
|
25
33
|
|
34
|
+
# Compare this event to another.
|
35
|
+
#
|
36
|
+
# other - The other TimedEvent.
|
37
|
+
#
|
38
|
+
# Returns -1 if this event is before the other, 0 if the two events are
|
39
|
+
# due at the same time, 1 if the other event is later.
|
26
40
|
def <=>(other)
|
27
41
|
self.at <=> other.at
|
28
42
|
end
|
29
|
-
end
|
43
|
+
end
|
30
44
|
|
45
|
+
# A DriverEvent is a TimedEvent with an associated Task and Condition. This
|
46
|
+
# is the primary mechanism for poll conditions to be scheduled.
|
31
47
|
class DriverEvent < TimedEvent
|
32
|
-
|
33
|
-
|
48
|
+
# Initialize a new DriverEvent.
|
49
|
+
#
|
50
|
+
# delay - The Numeric delay for this event.
|
51
|
+
# task - The Task associated with this event.
|
52
|
+
# condition - The Condition associated with this event.
|
34
53
|
def initialize(delay, task, condition)
|
35
|
-
super
|
36
|
-
|
37
|
-
|
54
|
+
super(delay)
|
55
|
+
@task = task
|
56
|
+
@condition = condition
|
38
57
|
end
|
39
|
-
|
58
|
+
|
59
|
+
# Handle this event by invoking the underlying condition on the associated
|
60
|
+
# task.
|
61
|
+
#
|
62
|
+
# Returns nothing.
|
40
63
|
def handle_event
|
41
64
|
@task.handle_poll(@condition)
|
42
65
|
end
|
43
|
-
end
|
66
|
+
end
|
44
67
|
|
68
|
+
# A DriverOperation is a TimedEvent that is due as soon as possible. It is
|
69
|
+
# used to execute an arbitrary method on the associated Task.
|
45
70
|
class DriverOperation < TimedEvent
|
46
|
-
|
47
|
-
|
71
|
+
# Initialize a new DriverOperation.
|
72
|
+
#
|
73
|
+
# task - The Task upon which to operate.
|
74
|
+
# name - The Symbol name of the method to call.
|
75
|
+
# args - The Array of arguments to send to the method.
|
48
76
|
def initialize(task, name, args)
|
49
77
|
super(0)
|
50
|
-
|
51
|
-
|
52
|
-
|
78
|
+
@task = task
|
79
|
+
@name = name
|
80
|
+
@args = args
|
53
81
|
end
|
54
|
-
|
55
|
-
# Handle the
|
82
|
+
|
83
|
+
# Handle the operation that was issued asynchronously.
|
56
84
|
#
|
57
|
-
# Returns nothing
|
85
|
+
# Returns nothing.
|
58
86
|
def handle_event
|
59
87
|
@task.send(@name, *@args)
|
60
88
|
end
|
61
89
|
end
|
62
90
|
|
91
|
+
# The DriverEventQueue is a simple queue that holds TimedEvent instances in
|
92
|
+
# order to maintain the schedule of upcoming events.
|
63
93
|
class DriverEventQueue
|
94
|
+
# Initialize a DriverEventQueue.
|
64
95
|
def initialize
|
65
96
|
@shutdown = false
|
66
97
|
@events = []
|
67
98
|
@monitor = Monitor.new
|
68
99
|
@resource = @monitor.new_cond
|
69
|
-
@events.taint
|
70
|
-
self.taint
|
71
100
|
end
|
72
101
|
|
102
|
+
# Wake any sleeping threads after setting the sentinel.
|
73
103
|
#
|
74
|
-
#
|
75
|
-
#
|
104
|
+
# Returns nothing.
|
76
105
|
def shutdown
|
77
106
|
@shutdown = true
|
78
107
|
@monitor.synchronize do
|
@@ -80,15 +109,16 @@ module God
|
|
80
109
|
end
|
81
110
|
end
|
82
111
|
|
112
|
+
# Wait until the queue has something due, pop it off the queue, and return
|
113
|
+
# it.
|
83
114
|
#
|
84
|
-
#
|
85
|
-
#
|
115
|
+
# Returns the popped event.
|
86
116
|
def pop
|
87
117
|
@monitor.synchronize do
|
88
118
|
if @events.empty?
|
89
119
|
raise ThreadError, "queue empty" if @shutdown
|
90
120
|
@resource.wait
|
91
|
-
else
|
121
|
+
else
|
92
122
|
delay = @events.first.at - Time.now
|
93
123
|
@resource.wait(delay) if delay > 0
|
94
124
|
end
|
@@ -97,35 +127,34 @@ module God
|
|
97
127
|
end
|
98
128
|
end
|
99
129
|
|
100
|
-
alias shift pop
|
101
|
-
alias deq pop
|
102
|
-
|
103
|
-
#
|
104
130
|
# Add an event to the queue, wake any waiters if what we added needs to
|
105
|
-
# happen sooner than the next pending event
|
131
|
+
# happen sooner than the next pending event.
|
106
132
|
#
|
133
|
+
# Returns nothing.
|
107
134
|
def push(event)
|
108
135
|
@monitor.synchronize do
|
109
136
|
@events << event
|
110
137
|
@events.sort!
|
111
138
|
|
112
139
|
# If we've sorted the events and found the one we're adding is at
|
113
|
-
# the front, it will likely need to run before the next due date
|
140
|
+
# the front, it will likely need to run before the next due date.
|
114
141
|
@resource.signal if @events.first == event
|
115
142
|
end
|
116
143
|
end
|
117
144
|
|
118
|
-
|
119
|
-
alias enq push
|
120
|
-
|
145
|
+
# Returns true if the queue is empty, false if not.
|
121
146
|
def empty?
|
122
147
|
@events.empty?
|
123
148
|
end
|
124
149
|
|
150
|
+
# Clear the queue.
|
151
|
+
#
|
152
|
+
# Returns nothing.
|
125
153
|
def clear
|
126
154
|
@events.clear
|
127
155
|
end
|
128
156
|
|
157
|
+
# Returns the Integer length of the queue.
|
129
158
|
def length
|
130
159
|
@events.length
|
131
160
|
end
|
@@ -133,18 +162,19 @@ module God
|
|
133
162
|
alias size length
|
134
163
|
end
|
135
164
|
|
136
|
-
|
165
|
+
# The Driver class is responsible for scheduling all of the events for a
|
166
|
+
# given Task.
|
137
167
|
class Driver
|
168
|
+
# The Thread running the driver loop.
|
138
169
|
attr_reader :thread
|
139
170
|
|
140
|
-
# Instantiate a new Driver and start the scheduler loop to handle events
|
141
|
-
# +task+ is the Task this Driver belongs to
|
171
|
+
# Instantiate a new Driver and start the scheduler loop to handle events.
|
142
172
|
#
|
143
|
-
#
|
173
|
+
# task - The Task this Driver belongs to.
|
144
174
|
def initialize(task)
|
145
175
|
@task = task
|
146
176
|
@events = God::DriverEventQueue.new
|
147
|
-
|
177
|
+
|
148
178
|
@thread = Thread.new do
|
149
179
|
loop do
|
150
180
|
begin
|
@@ -160,47 +190,49 @@ module God
|
|
160
190
|
end
|
161
191
|
end
|
162
192
|
end
|
163
|
-
|
164
|
-
# Check if we're in the driver context
|
193
|
+
|
194
|
+
# Check if we're in the driver context.
|
165
195
|
#
|
166
|
-
# Returns true if in driver thread
|
196
|
+
# Returns true if in driver thread, false if not.
|
167
197
|
def in_driver_context?
|
168
198
|
Thread.current == @thread
|
169
199
|
end
|
170
200
|
|
171
|
-
# Clear all events for this Driver
|
172
|
-
#
|
173
|
-
# Returns nothing
|
201
|
+
# Clear all events for this Driver.
|
202
|
+
#
|
203
|
+
# Returns nothing.
|
174
204
|
def clear_events
|
175
205
|
@events.clear
|
176
206
|
end
|
177
207
|
|
178
|
-
# Shutdown the DriverEventQueue threads
|
208
|
+
# Shutdown the DriverEventQueue threads.
|
179
209
|
#
|
180
|
-
# Returns nothing
|
210
|
+
# Returns nothing.
|
181
211
|
def shutdown
|
182
212
|
@events.shutdown
|
183
213
|
end
|
184
|
-
|
185
|
-
# Queue an asynchronous message
|
186
|
-
#
|
187
|
-
#
|
214
|
+
|
215
|
+
# Queue an asynchronous message.
|
216
|
+
#
|
217
|
+
# name - The Symbol name of the operation.
|
218
|
+
# args - An optional Array of arguments.
|
188
219
|
#
|
189
|
-
# Returns nothing
|
220
|
+
# Returns nothing.
|
190
221
|
def message(name, args = [])
|
191
222
|
@events.push(DriverOperation.new(@task, name, args))
|
192
223
|
end
|
193
|
-
|
194
|
-
# Create and schedule a new DriverEvent
|
195
|
-
#
|
196
|
-
#
|
224
|
+
|
225
|
+
# Create and schedule a new DriverEvent.
|
226
|
+
#
|
227
|
+
# condition - The Condition.
|
228
|
+
# delay - The Numeric number of seconds to delay (default: interval
|
229
|
+
# defined in condition).
|
197
230
|
#
|
198
|
-
# Returns nothing
|
231
|
+
# Returns nothing.
|
199
232
|
def schedule(condition, delay = condition.interval)
|
200
233
|
applog(nil, :debug, "driver schedule #{condition} in #{delay} seconds")
|
201
|
-
|
234
|
+
|
202
235
|
@events.push(DriverEvent.new(delay, @task, condition))
|
203
236
|
end
|
204
|
-
end
|
205
|
-
|
206
|
-
end # God
|
237
|
+
end
|
238
|
+
end
|
data/lib/god/errors.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
module God
|
2
|
-
|
2
|
+
|
3
3
|
class AbstractMethodNotOverriddenError < StandardError
|
4
4
|
end
|
5
|
-
|
5
|
+
|
6
6
|
class NoSuchWatchError < StandardError
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
class NoSuchConditionError < StandardError
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
class NoSuchBehaviorError < StandardError
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
class NoSuchContactError < StandardError
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
class InvalidCommandError < StandardError
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
21
|
class EventRegistrationFailedError < StandardError
|
22
22
|
end
|
23
|
-
|
24
|
-
end
|
23
|
+
|
24
|
+
end
|
data/lib/god/event_handler.rb
CHANGED
@@ -3,15 +3,15 @@ module God
|
|
3
3
|
@@actions = {}
|
4
4
|
@@handler = nil
|
5
5
|
@@loaded = false
|
6
|
-
|
6
|
+
|
7
7
|
def self.loaded?
|
8
8
|
@@loaded
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
11
|
def self.event_system
|
12
12
|
@@handler::EVENT_SYSTEM
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
def self.load
|
16
16
|
begin
|
17
17
|
case RUBY_PLATFORM
|
@@ -31,13 +31,13 @@ module God
|
|
31
31
|
@@loaded = false
|
32
32
|
end
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
35
|
def self.register(pid, event, &block)
|
36
36
|
@@actions[pid] ||= {}
|
37
37
|
@@actions[pid][event] = block
|
38
38
|
@@handler.register_process(pid, @@actions[pid].keys)
|
39
39
|
end
|
40
|
-
|
40
|
+
|
41
41
|
def self.deregister(pid, event)
|
42
42
|
if watching_pid? pid
|
43
43
|
running = ::Process.kill(0, pid.to_i) rescue false
|
@@ -46,15 +46,15 @@ module God
|
|
46
46
|
@@actions.delete(pid) if @@actions[pid].empty?
|
47
47
|
end
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
def self.call(pid, event, extra_data = {})
|
51
51
|
@@actions[pid][event].call(extra_data) if watching_pid?(pid) && @@actions[pid][event]
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
def self.watching_pid?(pid)
|
55
55
|
@@actions[pid]
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
def self.start
|
59
59
|
Thread.new do
|
60
60
|
loop do
|
@@ -67,42 +67,42 @@ module God
|
|
67
67
|
end
|
68
68
|
end
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
71
|
# do a real test to make sure events are working properly
|
72
72
|
@@loaded = self.operational?
|
73
73
|
end
|
74
|
-
|
74
|
+
|
75
75
|
def self.operational?
|
76
76
|
com = [false]
|
77
|
-
|
77
|
+
|
78
78
|
Thread.new do
|
79
79
|
begin
|
80
80
|
event_system = God::EventHandler.event_system
|
81
|
-
|
81
|
+
|
82
82
|
pid = fork do
|
83
83
|
loop { sleep(1) }
|
84
84
|
end
|
85
|
-
|
85
|
+
|
86
86
|
self.register(pid, :proc_exit) do
|
87
87
|
com[0] = true
|
88
88
|
end
|
89
|
-
|
89
|
+
|
90
90
|
::Process.kill('KILL', pid)
|
91
91
|
::Process.waitpid(pid)
|
92
|
-
|
92
|
+
|
93
93
|
sleep(0.1)
|
94
|
-
|
94
|
+
|
95
95
|
self.deregister(pid, :proc_exit) rescue nil
|
96
96
|
rescue => e
|
97
97
|
puts e.message
|
98
98
|
puts e.backtrace.join("\n")
|
99
99
|
end
|
100
100
|
end.join
|
101
|
-
|
101
|
+
|
102
102
|
sleep(0.1)
|
103
|
-
|
103
|
+
|
104
104
|
com.first
|
105
105
|
end
|
106
|
-
|
106
|
+
|
107
107
|
end
|
108
108
|
end
|