god 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +23 -0
- data/Manifest.txt +9 -0
- data/README.txt +9 -2
- data/Rakefile +8 -1
- data/bin/god +11 -214
- data/examples/single.god +66 -0
- data/ext/god/netlink_handler.c +16 -3
- data/lib/god.rb +153 -17
- data/lib/god/cli/command.rb +189 -0
- data/lib/god/cli/run.rb +120 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +27 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +42 -11
- data/lib/god/conditions/http_response_code.rb +63 -3
- data/lib/god/conditions/memory_usage.rb +30 -1
- data/lib/god/conditions/process_exits.rb +24 -2
- data/lib/god/conditions/process_running.rb +32 -0
- data/lib/god/configurable.rb +5 -3
- data/lib/god/event_handler.rb +2 -2
- data/lib/god/hub.rb +12 -19
- data/lib/god/logger.rb +11 -2
- data/lib/god/process.rb +29 -20
- data/lib/god/socket.rb +41 -5
- data/lib/god/task.rb +6 -9
- data/lib/god/timer.rb +20 -13
- data/lib/god/watch.rb +3 -6
- data/test/configs/child_events/child_events.god +1 -1
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +15 -21
- data/test/test_god.rb +36 -0
- data/test/test_hub.rb +6 -4
- data/test/test_logger.rb +8 -0
- data/test/test_timer.rb +9 -0
- metadata +12 -2
data/History.txt
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
== 0.6.0 /
|
2
|
+
|
3
|
+
* Minor Enhancement
|
4
|
+
* Move Syslog calls into God::Logger and clean up all calling code
|
5
|
+
* Remove god's pid file on user requested termination
|
6
|
+
* Better handling and cleanup of DRb server's unix domain socket
|
7
|
+
* Allow shorthand for requesting a god log
|
8
|
+
* Add `god check` to make it easier to diagnose event problems
|
9
|
+
* Refactor god binary into class/method structure
|
10
|
+
* Implement `god remove` to remove a Task altogether
|
11
|
+
* New Conditions
|
12
|
+
* DiskUsage < PollCondition - trigger if disk usage is above limit on mount [Rudy Desjardins]
|
13
|
+
|
14
|
+
== 0.5.2 / 2007-10-10
|
15
|
+
|
16
|
+
* Minor Enhancement
|
17
|
+
* Allow extra args to pass through to config file
|
18
|
+
|
19
|
+
== 0.5.1 / 2007-10-08
|
20
|
+
|
21
|
+
* Bug Fixes
|
22
|
+
* Rescue connection refused in http response code condition
|
23
|
+
|
1
24
|
== 0.5.0 / 2007-10-05
|
2
25
|
|
3
26
|
* Major Enhancements
|
data/Manifest.txt
CHANGED
@@ -5,6 +5,7 @@ Rakefile
|
|
5
5
|
bin/god
|
6
6
|
examples/events.god
|
7
7
|
examples/gravatar.god
|
8
|
+
examples/single.god
|
8
9
|
ext/god/extconf.rb
|
9
10
|
ext/god/kqueue_handler.c
|
10
11
|
ext/god/netlink_handler.c
|
@@ -13,10 +14,15 @@ lib/god.rb
|
|
13
14
|
lib/god/behavior.rb
|
14
15
|
lib/god/behaviors/clean_pid_file.rb
|
15
16
|
lib/god/behaviors/notify_when_flapping.rb
|
17
|
+
lib/god/cli/command.rb
|
18
|
+
lib/god/cli/run.rb
|
19
|
+
lib/god/cli/version.rb
|
16
20
|
lib/god/condition.rb
|
17
21
|
lib/god/conditions/always.rb
|
22
|
+
lib/god/conditions/complex.rb
|
18
23
|
lib/god/conditions/cpu_usage.rb
|
19
24
|
lib/god/conditions/degrading_lambda.rb
|
25
|
+
lib/god/conditions/disk_usage.rb
|
20
26
|
lib/god/conditions/flapping.rb
|
21
27
|
lib/god/conditions/http_response_code.rb
|
22
28
|
lib/god/conditions/lambda.rb
|
@@ -50,6 +56,8 @@ test/configs/child_events/child_events.god
|
|
50
56
|
test/configs/child_events/simple_server.rb
|
51
57
|
test/configs/child_polls/child_polls.god
|
52
58
|
test/configs/child_polls/simple_server.rb
|
59
|
+
test/configs/complex/complex.god
|
60
|
+
test/configs/complex/simple_server.rb
|
53
61
|
test/configs/contact/contact.god
|
54
62
|
test/configs/contact/simple_server.rb
|
55
63
|
test/configs/daemon_events/daemon_events.god
|
@@ -70,6 +78,7 @@ test/helper.rb
|
|
70
78
|
test/suite.rb
|
71
79
|
test/test_behavior.rb
|
72
80
|
test/test_condition.rb
|
81
|
+
test/test_conditions_disk_usage.rb
|
73
82
|
test/test_conditions_http_response_code.rb
|
74
83
|
test/test_conditions_process_running.rb
|
75
84
|
test/test_conditions_tries.rb
|
data/README.txt
CHANGED
@@ -6,14 +6,21 @@ god
|
|
6
6
|
|
7
7
|
== DESCRIPTION:
|
8
8
|
|
9
|
-
God is an easy to configure, easy to extend monitoring framework written
|
9
|
+
God is an easy to configure, easy to extend monitoring framework written
|
10
|
+
in Ruby.
|
10
11
|
|
11
|
-
Keeping your server processes and tasks running should be a simple part of
|
12
|
+
Keeping your server processes and tasks running should be a simple part of
|
13
|
+
your deployment process. God aims to be the simplest, most powerful
|
14
|
+
monitoring application available.
|
12
15
|
|
13
16
|
== DOCUMENTATION:
|
14
17
|
|
15
18
|
See online documentation at http://god.rubyforge.org
|
16
19
|
|
20
|
+
== COMMUNITY:
|
21
|
+
|
22
|
+
Sign up for the god mailing list at http://groups.google.com/group/god-rb
|
23
|
+
|
17
24
|
== INSTALL:
|
18
25
|
|
19
26
|
$ sudo gem install god
|
data/Rakefile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'hoe'
|
3
3
|
|
4
|
-
Hoe.new('god', '0.
|
4
|
+
Hoe.new('god', '0.6.0') do |p|
|
5
5
|
p.rubyforge_name = 'god'
|
6
6
|
p.author = 'Tom Preston-Werner'
|
7
7
|
p.email = 'tom@rubyisawesome.com'
|
@@ -25,4 +25,11 @@ end
|
|
25
25
|
desc "Upload site to Rubyforge"
|
26
26
|
task :site_edge do
|
27
27
|
sh "scp -r site/* mojombo@god.rubyforge.org:/var/www/gforge-projects/god/edge"
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "Run rcov"
|
31
|
+
task :coverage do
|
32
|
+
`rm -fr coverage`
|
33
|
+
`rcov test/test_*.rb`
|
34
|
+
`open coverage/index.html`
|
28
35
|
end
|
data/bin/god
CHANGED
@@ -15,7 +15,7 @@ begin
|
|
15
15
|
opts.banner = <<-EOF
|
16
16
|
Usage:
|
17
17
|
Starting:
|
18
|
-
god -c <config file> [-p <port> | -b] [-P <file>] [-l <file>] [-D]
|
18
|
+
god [-c <config file>] [-p <port> | -b] [-P <file>] [-l <file>] [-D]
|
19
19
|
|
20
20
|
Querying:
|
21
21
|
god <command> <argument> [-p <port>]
|
@@ -29,11 +29,13 @@ begin
|
|
29
29
|
stop <task or group name> stop task or group
|
30
30
|
monitor <task or group name> monitor task or group
|
31
31
|
unmonitor <task or group name> unmonitor task or group
|
32
|
+
remove <task or group name> remove task or group from god
|
32
33
|
load <file> load a config into a running god
|
33
34
|
log <task name> show realtime log for given task
|
34
35
|
status show status of each task
|
35
36
|
quit stop god
|
36
37
|
terminate stop god and all tasks
|
38
|
+
check run self diagnostic
|
37
39
|
|
38
40
|
Options:
|
39
41
|
EOF
|
@@ -73,223 +75,18 @@ begin
|
|
73
75
|
|
74
76
|
opts.parse!
|
75
77
|
|
76
|
-
if options[:version]
|
78
|
+
if !options[:config] && options[:version]
|
77
79
|
require 'god'
|
78
|
-
|
79
|
-
|
80
|
-
puts "Version #{God::VERSION}"
|
81
|
-
exit
|
82
|
-
elsif options[:info]
|
80
|
+
God::CLI::Version.version
|
81
|
+
elsif !options[:config] && options[:info]
|
83
82
|
require 'god'
|
84
|
-
|
85
|
-
|
86
|
-
puts "Polls: enabled"
|
87
|
-
puts "Events: " + God::EventHandler.event_system
|
88
|
-
|
89
|
-
exit
|
90
|
-
elsif command = ARGV[0]
|
83
|
+
God::CLI::Version.version_extended
|
84
|
+
elsif !options[:config] && command = ARGV[0]
|
91
85
|
require 'god'
|
92
|
-
|
93
|
-
# a command was specified
|
94
|
-
|
95
|
-
# connect to drb unix socket
|
96
|
-
DRb.start_service
|
97
|
-
server = DRbObject.new(nil, God::Socket.socket(options[:port]))
|
98
|
-
|
99
|
-
begin
|
100
|
-
server.ping
|
101
|
-
rescue DRb::DRbConnError
|
102
|
-
puts "The server is not available (or you do not have permissions to access it)"
|
103
|
-
abort
|
104
|
-
end
|
105
|
-
|
106
|
-
if command == 'load'
|
107
|
-
file = ARGV[1]
|
108
|
-
|
109
|
-
puts "Sending '#{command}' command"
|
110
|
-
puts
|
111
|
-
|
112
|
-
unless File.exist?(file)
|
113
|
-
abort "File not found: #{file}"
|
114
|
-
end
|
115
|
-
|
116
|
-
names, errors = *server.running_load(File.read(file), File.expand_path(file))
|
117
|
-
|
118
|
-
# output response
|
119
|
-
unless names.empty?
|
120
|
-
puts 'The following tasks were affected:'
|
121
|
-
names.each do |w|
|
122
|
-
puts ' ' + w
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
unless errors.empty?
|
127
|
-
puts errors
|
128
|
-
exit(1)
|
129
|
-
end
|
130
|
-
elsif command == 'status'
|
131
|
-
watches = server.status
|
132
|
-
watches.keys.sort.each do |name|
|
133
|
-
state = watches[name][:state]
|
134
|
-
puts "#{name}: #{state}"
|
135
|
-
end
|
136
|
-
elsif command == 'log'
|
137
|
-
begin
|
138
|
-
Signal.trap('INT') { exit }
|
139
|
-
name = ARGV[1]
|
140
|
-
t = Time.at(0)
|
141
|
-
loop do
|
142
|
-
print server.running_log(name, t)
|
143
|
-
t = Time.now
|
144
|
-
sleep 1
|
145
|
-
end
|
146
|
-
rescue God::NoSuchWatchError
|
147
|
-
puts "No such watch"
|
148
|
-
rescue DRb::DRbConnError
|
149
|
-
puts "The server went away"
|
150
|
-
end
|
151
|
-
elsif command == 'quit'
|
152
|
-
begin
|
153
|
-
server.terminate
|
154
|
-
abort 'Could not stop god'
|
155
|
-
rescue DRb::DRbConnError
|
156
|
-
puts 'Stopped god'
|
157
|
-
end
|
158
|
-
elsif command == 'terminate'
|
159
|
-
t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
|
160
|
-
if server.stop_all
|
161
|
-
t.kill; STDOUT.puts
|
162
|
-
puts 'Stopped all watches'
|
163
|
-
else
|
164
|
-
t.kill; STDOUT.puts
|
165
|
-
puts 'Could not stop all watches within 10 seconds'
|
166
|
-
end
|
167
|
-
|
168
|
-
begin
|
169
|
-
server.terminate
|
170
|
-
abort 'Could not stop god'
|
171
|
-
rescue DRb::DRbConnError
|
172
|
-
puts 'Stopped god'
|
173
|
-
end
|
174
|
-
else
|
175
|
-
# get the name of the watch/group
|
176
|
-
name = ARGV[1]
|
177
|
-
|
178
|
-
begin
|
179
|
-
puts "Sending '#{command}' command"
|
180
|
-
|
181
|
-
t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
|
182
|
-
|
183
|
-
# send command
|
184
|
-
watches = server.control(name, command)
|
185
|
-
|
186
|
-
# output response
|
187
|
-
t.kill; STDOUT.puts
|
188
|
-
unless watches.empty?
|
189
|
-
puts 'The following watches were affected:'
|
190
|
-
watches.each do |w|
|
191
|
-
puts ' ' + w
|
192
|
-
end
|
193
|
-
else
|
194
|
-
puts 'No matching task or group'
|
195
|
-
end
|
196
|
-
rescue God::InvalidCommandError
|
197
|
-
t.kill rescue nil; STDOUT.puts
|
198
|
-
abort "Command '#{command}' is not valid. Run 'god --help' for usage"
|
199
|
-
end
|
200
|
-
end
|
86
|
+
God::CLI::Command.new(command, options, ARGV)
|
201
87
|
else
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
if !options[:daemonize]
|
206
|
-
require 'god'
|
207
|
-
|
208
|
-
if options[:port]
|
209
|
-
God.port = options[:port]
|
210
|
-
end
|
211
|
-
|
212
|
-
if options[:config]
|
213
|
-
unless File.exist?(options[:config])
|
214
|
-
abort "File not found: #{options[:config]}"
|
215
|
-
end
|
216
|
-
|
217
|
-
begin
|
218
|
-
load File.expand_path(options[:config])
|
219
|
-
rescue Exception => e
|
220
|
-
if e.instance_of?(SystemExit)
|
221
|
-
raise
|
222
|
-
else
|
223
|
-
puts e.message
|
224
|
-
puts e.backtrace.join("\n")
|
225
|
-
abort "There was an error in your configuration file (see above)"
|
226
|
-
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
else
|
230
|
-
# trap and ignore SIGHUP
|
231
|
-
Signal.trap('HUP') {}
|
232
|
-
|
233
|
-
pid = fork do
|
234
|
-
begin
|
235
|
-
require 'god'
|
236
|
-
|
237
|
-
log_file = options[:log] || "/dev/null"
|
238
|
-
|
239
|
-
unless God::EventHandler.loaded?
|
240
|
-
puts
|
241
|
-
puts "***********************************************************************"
|
242
|
-
puts "*"
|
243
|
-
puts "* Event conditions are not available for your installation of god."
|
244
|
-
puts "* You may still use and write custom conditions using the poll system"
|
245
|
-
puts "*"
|
246
|
-
puts "***********************************************************************"
|
247
|
-
puts
|
248
|
-
end
|
249
|
-
|
250
|
-
# set port if requested
|
251
|
-
if options[:port]
|
252
|
-
God.port = options[:port]
|
253
|
-
end
|
254
|
-
|
255
|
-
# load config
|
256
|
-
if options[:config]
|
257
|
-
unless File.exist?(options[:config])
|
258
|
-
abort "File not found: #{options[:config]}"
|
259
|
-
end
|
260
|
-
|
261
|
-
begin
|
262
|
-
load File.expand_path(options[:config])
|
263
|
-
rescue Exception => e
|
264
|
-
if e.instance_of?(SystemExit)
|
265
|
-
raise
|
266
|
-
else
|
267
|
-
puts e.message
|
268
|
-
puts e.backtrace.join("\n")
|
269
|
-
abort "There was an error in your configuration file (see above)"
|
270
|
-
end
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
# reset file descriptors
|
275
|
-
STDIN.reopen "/dev/null"
|
276
|
-
STDOUT.reopen(log_file, "a")
|
277
|
-
STDERR.reopen STDOUT
|
278
|
-
rescue => e
|
279
|
-
puts e.message
|
280
|
-
puts e.backtrace.join("\n")
|
281
|
-
abort "There was a fatal system error while starting god (see above)"
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
|
-
if options[:pid]
|
286
|
-
File.open(options[:pid], 'w') { |f| f.write pid }
|
287
|
-
end
|
288
|
-
|
289
|
-
::Process.detach pid
|
290
|
-
|
291
|
-
exit
|
292
|
-
end
|
88
|
+
require 'god/cli/run'
|
89
|
+
God::CLI::Run.new(options)
|
293
90
|
end
|
294
91
|
rescue Exception => e
|
295
92
|
if e.instance_of?(SystemExit)
|
data/examples/single.god
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
RAILS_ROOT = "/Users/tom/dev/gravatar2"
|
2
|
+
|
3
|
+
God.watch do |w|
|
4
|
+
w.name = "local-3000"
|
5
|
+
w.interval = 5.seconds # default
|
6
|
+
w.start = "mongrel_rails start -c #{RAILS_ROOT} -P #{RAILS_ROOT}/log/mongrel.pid -p 3000 -d"
|
7
|
+
w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.pid"
|
8
|
+
w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.pid"
|
9
|
+
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
|
10
|
+
|
11
|
+
# clean pid files before start if necessary
|
12
|
+
w.behavior(:clean_pid_file)
|
13
|
+
|
14
|
+
# determine the state on startup
|
15
|
+
w.transition(:init, { true => :up, false => :start }) do |on|
|
16
|
+
on.condition(:process_running) do |c|
|
17
|
+
c.running = true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# determine when process has finished starting
|
22
|
+
w.transition([:start, :restart], :up) do |on|
|
23
|
+
on.condition(:process_running) do |c|
|
24
|
+
c.running = true
|
25
|
+
end
|
26
|
+
|
27
|
+
# failsafe
|
28
|
+
on.condition(:tries) do |c|
|
29
|
+
c.times = 5
|
30
|
+
c.transition = :start
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# start if process is not running
|
35
|
+
w.transition(:up, :start) do |on|
|
36
|
+
on.condition(:process_exits)
|
37
|
+
end
|
38
|
+
|
39
|
+
# restart if memory or cpu is too high
|
40
|
+
w.transition(:up, :restart) do |on|
|
41
|
+
on.condition(:memory_usage) do |c|
|
42
|
+
c.interval = 20
|
43
|
+
c.above = 50.megabytes
|
44
|
+
c.times = [3, 5]
|
45
|
+
end
|
46
|
+
|
47
|
+
on.condition(:cpu_usage) do |c|
|
48
|
+
c.interval = 10
|
49
|
+
c.above = 10.percent
|
50
|
+
c.times = [3, 5]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# lifecycle
|
55
|
+
w.lifecycle do |on|
|
56
|
+
on.condition(:flapping) do |c|
|
57
|
+
c.to_state = [:start, :restart]
|
58
|
+
c.times = 5
|
59
|
+
c.within = 5.minute
|
60
|
+
c.transition = :unmonitored
|
61
|
+
c.retry_in = 10.minutes
|
62
|
+
c.retry_times = 5
|
63
|
+
c.retry_within = 2.hours
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/ext/god/netlink_handler.c
CHANGED
@@ -28,6 +28,8 @@ nlh_handle_events()
|
|
28
28
|
struct nlmsghdr *hdr;
|
29
29
|
struct proc_event *event;
|
30
30
|
|
31
|
+
VALUE extra_data;
|
32
|
+
|
31
33
|
fd_set fds;
|
32
34
|
|
33
35
|
FD_ZERO(&fds);
|
@@ -61,16 +63,27 @@ nlh_handle_events()
|
|
61
63
|
return INT2FIX(0);
|
62
64
|
}
|
63
65
|
|
64
|
-
|
66
|
+
extra_data = rb_hash_new();
|
67
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("pid")), INT2FIX(event->event_data.exit.process_pid));
|
68
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_code")), INT2FIX(event->event_data.exit.exit_code));
|
69
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_signal")), INT2FIX(event->event_data.exit.exit_signal));
|
70
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("thread_group_id")), INT2FIX(event->event_data.exit.process_tgid));
|
71
|
+
|
72
|
+
rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit), extra_data);
|
65
73
|
return INT2FIX(1);
|
66
74
|
|
67
|
-
/* TODO: On fork, call and pass pid of child */
|
68
75
|
case PROC_EVENT_FORK:
|
69
76
|
if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.fork.parent_pid))) {
|
70
77
|
return INT2FIX(0);
|
71
78
|
}
|
79
|
+
|
80
|
+
extra_data = rb_hash_new();
|
81
|
+
rb_hash_aset(extra_data, rb_intern("parent_pid"), INT2FIX(event->event_data.fork.parent_pid));
|
82
|
+
rb_hash_aset(extra_data, rb_intern("parent_thread_group_id"), INT2FIX(event->event_data.fork.parent_tgid));
|
83
|
+
rb_hash_aset(extra_data, rb_intern("child_pid"), INT2FIX(event->event_data.fork.child_pid));
|
84
|
+
rb_hash_aset(extra_data, rb_intern("child_thread_group_id"), INT2FIX(event->event_data.fork.child_tgid));
|
72
85
|
|
73
|
-
rb_funcall(cEventHandler, m_call,
|
86
|
+
rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork), extra_data);
|
74
87
|
return INT2FIX(1);
|
75
88
|
|
76
89
|
case PROC_EVENT_NONE:
|