god 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +23 -0
- data/Manifest.txt +9 -0
- data/README.txt +9 -2
- data/Rakefile +8 -1
- data/bin/god +11 -214
- data/examples/single.god +66 -0
- data/ext/god/netlink_handler.c +16 -3
- data/lib/god.rb +153 -17
- data/lib/god/cli/command.rb +189 -0
- data/lib/god/cli/run.rb +120 -0
- data/lib/god/cli/version.rb +23 -0
- data/lib/god/conditions/complex.rb +86 -0
- data/lib/god/conditions/cpu_usage.rb +27 -0
- data/lib/god/conditions/disk_usage.rb +27 -0
- data/lib/god/conditions/flapping.rb +42 -11
- data/lib/god/conditions/http_response_code.rb +63 -3
- data/lib/god/conditions/memory_usage.rb +30 -1
- data/lib/god/conditions/process_exits.rb +24 -2
- data/lib/god/conditions/process_running.rb +32 -0
- data/lib/god/configurable.rb +5 -3
- data/lib/god/event_handler.rb +2 -2
- data/lib/god/hub.rb +12 -19
- data/lib/god/logger.rb +11 -2
- data/lib/god/process.rb +29 -20
- data/lib/god/socket.rb +41 -5
- data/lib/god/task.rb +6 -9
- data/lib/god/timer.rb +20 -13
- data/lib/god/watch.rb +3 -6
- data/test/configs/child_events/child_events.god +1 -1
- data/test/configs/complex/complex.god +59 -0
- data/test/configs/complex/simple_server.rb +3 -0
- data/test/test_conditions_disk_usage.rb +56 -0
- data/test/test_conditions_http_response_code.rb +15 -21
- data/test/test_god.rb +36 -0
- data/test/test_hub.rb +6 -4
- data/test/test_logger.rb +8 -0
- data/test/test_timer.rb +9 -0
- metadata +12 -2
data/History.txt
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
== 0.6.0 /
|
2
|
+
|
3
|
+
* Minor Enhancement
|
4
|
+
* Move Syslog calls into God::Logger and clean up all calling code
|
5
|
+
* Remove god's pid file on user requested termination
|
6
|
+
* Better handling and cleanup of DRb server's unix domain socket
|
7
|
+
* Allow shorthand for requesting a god log
|
8
|
+
* Add `god check` to make it easier to diagnose event problems
|
9
|
+
* Refactor god binary into class/method structure
|
10
|
+
* Implement `god remove` to remove a Task altogether
|
11
|
+
* New Conditions
|
12
|
+
* DiskUsage < PollCondition - trigger if disk usage is above limit on mount [Rudy Desjardins]
|
13
|
+
|
14
|
+
== 0.5.2 / 2007-10-10
|
15
|
+
|
16
|
+
* Minor Enhancement
|
17
|
+
* Allow extra args to pass through to config file
|
18
|
+
|
19
|
+
== 0.5.1 / 2007-10-08
|
20
|
+
|
21
|
+
* Bug Fixes
|
22
|
+
* Rescue connection refused in http response code condition
|
23
|
+
|
1
24
|
== 0.5.0 / 2007-10-05
|
2
25
|
|
3
26
|
* Major Enhancements
|
data/Manifest.txt
CHANGED
@@ -5,6 +5,7 @@ Rakefile
|
|
5
5
|
bin/god
|
6
6
|
examples/events.god
|
7
7
|
examples/gravatar.god
|
8
|
+
examples/single.god
|
8
9
|
ext/god/extconf.rb
|
9
10
|
ext/god/kqueue_handler.c
|
10
11
|
ext/god/netlink_handler.c
|
@@ -13,10 +14,15 @@ lib/god.rb
|
|
13
14
|
lib/god/behavior.rb
|
14
15
|
lib/god/behaviors/clean_pid_file.rb
|
15
16
|
lib/god/behaviors/notify_when_flapping.rb
|
17
|
+
lib/god/cli/command.rb
|
18
|
+
lib/god/cli/run.rb
|
19
|
+
lib/god/cli/version.rb
|
16
20
|
lib/god/condition.rb
|
17
21
|
lib/god/conditions/always.rb
|
22
|
+
lib/god/conditions/complex.rb
|
18
23
|
lib/god/conditions/cpu_usage.rb
|
19
24
|
lib/god/conditions/degrading_lambda.rb
|
25
|
+
lib/god/conditions/disk_usage.rb
|
20
26
|
lib/god/conditions/flapping.rb
|
21
27
|
lib/god/conditions/http_response_code.rb
|
22
28
|
lib/god/conditions/lambda.rb
|
@@ -50,6 +56,8 @@ test/configs/child_events/child_events.god
|
|
50
56
|
test/configs/child_events/simple_server.rb
|
51
57
|
test/configs/child_polls/child_polls.god
|
52
58
|
test/configs/child_polls/simple_server.rb
|
59
|
+
test/configs/complex/complex.god
|
60
|
+
test/configs/complex/simple_server.rb
|
53
61
|
test/configs/contact/contact.god
|
54
62
|
test/configs/contact/simple_server.rb
|
55
63
|
test/configs/daemon_events/daemon_events.god
|
@@ -70,6 +78,7 @@ test/helper.rb
|
|
70
78
|
test/suite.rb
|
71
79
|
test/test_behavior.rb
|
72
80
|
test/test_condition.rb
|
81
|
+
test/test_conditions_disk_usage.rb
|
73
82
|
test/test_conditions_http_response_code.rb
|
74
83
|
test/test_conditions_process_running.rb
|
75
84
|
test/test_conditions_tries.rb
|
data/README.txt
CHANGED
@@ -6,14 +6,21 @@ god
|
|
6
6
|
|
7
7
|
== DESCRIPTION:
|
8
8
|
|
9
|
-
God is an easy to configure, easy to extend monitoring framework written
|
9
|
+
God is an easy to configure, easy to extend monitoring framework written
|
10
|
+
in Ruby.
|
10
11
|
|
11
|
-
Keeping your server processes and tasks running should be a simple part of
|
12
|
+
Keeping your server processes and tasks running should be a simple part of
|
13
|
+
your deployment process. God aims to be the simplest, most powerful
|
14
|
+
monitoring application available.
|
12
15
|
|
13
16
|
== DOCUMENTATION:
|
14
17
|
|
15
18
|
See online documentation at http://god.rubyforge.org
|
16
19
|
|
20
|
+
== COMMUNITY:
|
21
|
+
|
22
|
+
Sign up for the god mailing list at http://groups.google.com/group/god-rb
|
23
|
+
|
17
24
|
== INSTALL:
|
18
25
|
|
19
26
|
$ sudo gem install god
|
data/Rakefile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'hoe'
|
3
3
|
|
4
|
-
Hoe.new('god', '0.
|
4
|
+
Hoe.new('god', '0.6.0') do |p|
|
5
5
|
p.rubyforge_name = 'god'
|
6
6
|
p.author = 'Tom Preston-Werner'
|
7
7
|
p.email = 'tom@rubyisawesome.com'
|
@@ -25,4 +25,11 @@ end
|
|
25
25
|
desc "Upload site to Rubyforge"
|
26
26
|
task :site_edge do
|
27
27
|
sh "scp -r site/* mojombo@god.rubyforge.org:/var/www/gforge-projects/god/edge"
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "Run rcov"
|
31
|
+
task :coverage do
|
32
|
+
`rm -fr coverage`
|
33
|
+
`rcov test/test_*.rb`
|
34
|
+
`open coverage/index.html`
|
28
35
|
end
|
data/bin/god
CHANGED
@@ -15,7 +15,7 @@ begin
|
|
15
15
|
opts.banner = <<-EOF
|
16
16
|
Usage:
|
17
17
|
Starting:
|
18
|
-
god -c <config file> [-p <port> | -b] [-P <file>] [-l <file>] [-D]
|
18
|
+
god [-c <config file>] [-p <port> | -b] [-P <file>] [-l <file>] [-D]
|
19
19
|
|
20
20
|
Querying:
|
21
21
|
god <command> <argument> [-p <port>]
|
@@ -29,11 +29,13 @@ begin
|
|
29
29
|
stop <task or group name> stop task or group
|
30
30
|
monitor <task or group name> monitor task or group
|
31
31
|
unmonitor <task or group name> unmonitor task or group
|
32
|
+
remove <task or group name> remove task or group from god
|
32
33
|
load <file> load a config into a running god
|
33
34
|
log <task name> show realtime log for given task
|
34
35
|
status show status of each task
|
35
36
|
quit stop god
|
36
37
|
terminate stop god and all tasks
|
38
|
+
check run self diagnostic
|
37
39
|
|
38
40
|
Options:
|
39
41
|
EOF
|
@@ -73,223 +75,18 @@ begin
|
|
73
75
|
|
74
76
|
opts.parse!
|
75
77
|
|
76
|
-
if options[:version]
|
78
|
+
if !options[:config] && options[:version]
|
77
79
|
require 'god'
|
78
|
-
|
79
|
-
|
80
|
-
puts "Version #{God::VERSION}"
|
81
|
-
exit
|
82
|
-
elsif options[:info]
|
80
|
+
God::CLI::Version.version
|
81
|
+
elsif !options[:config] && options[:info]
|
83
82
|
require 'god'
|
84
|
-
|
85
|
-
|
86
|
-
puts "Polls: enabled"
|
87
|
-
puts "Events: " + God::EventHandler.event_system
|
88
|
-
|
89
|
-
exit
|
90
|
-
elsif command = ARGV[0]
|
83
|
+
God::CLI::Version.version_extended
|
84
|
+
elsif !options[:config] && command = ARGV[0]
|
91
85
|
require 'god'
|
92
|
-
|
93
|
-
# a command was specified
|
94
|
-
|
95
|
-
# connect to drb unix socket
|
96
|
-
DRb.start_service
|
97
|
-
server = DRbObject.new(nil, God::Socket.socket(options[:port]))
|
98
|
-
|
99
|
-
begin
|
100
|
-
server.ping
|
101
|
-
rescue DRb::DRbConnError
|
102
|
-
puts "The server is not available (or you do not have permissions to access it)"
|
103
|
-
abort
|
104
|
-
end
|
105
|
-
|
106
|
-
if command == 'load'
|
107
|
-
file = ARGV[1]
|
108
|
-
|
109
|
-
puts "Sending '#{command}' command"
|
110
|
-
puts
|
111
|
-
|
112
|
-
unless File.exist?(file)
|
113
|
-
abort "File not found: #{file}"
|
114
|
-
end
|
115
|
-
|
116
|
-
names, errors = *server.running_load(File.read(file), File.expand_path(file))
|
117
|
-
|
118
|
-
# output response
|
119
|
-
unless names.empty?
|
120
|
-
puts 'The following tasks were affected:'
|
121
|
-
names.each do |w|
|
122
|
-
puts ' ' + w
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
unless errors.empty?
|
127
|
-
puts errors
|
128
|
-
exit(1)
|
129
|
-
end
|
130
|
-
elsif command == 'status'
|
131
|
-
watches = server.status
|
132
|
-
watches.keys.sort.each do |name|
|
133
|
-
state = watches[name][:state]
|
134
|
-
puts "#{name}: #{state}"
|
135
|
-
end
|
136
|
-
elsif command == 'log'
|
137
|
-
begin
|
138
|
-
Signal.trap('INT') { exit }
|
139
|
-
name = ARGV[1]
|
140
|
-
t = Time.at(0)
|
141
|
-
loop do
|
142
|
-
print server.running_log(name, t)
|
143
|
-
t = Time.now
|
144
|
-
sleep 1
|
145
|
-
end
|
146
|
-
rescue God::NoSuchWatchError
|
147
|
-
puts "No such watch"
|
148
|
-
rescue DRb::DRbConnError
|
149
|
-
puts "The server went away"
|
150
|
-
end
|
151
|
-
elsif command == 'quit'
|
152
|
-
begin
|
153
|
-
server.terminate
|
154
|
-
abort 'Could not stop god'
|
155
|
-
rescue DRb::DRbConnError
|
156
|
-
puts 'Stopped god'
|
157
|
-
end
|
158
|
-
elsif command == 'terminate'
|
159
|
-
t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
|
160
|
-
if server.stop_all
|
161
|
-
t.kill; STDOUT.puts
|
162
|
-
puts 'Stopped all watches'
|
163
|
-
else
|
164
|
-
t.kill; STDOUT.puts
|
165
|
-
puts 'Could not stop all watches within 10 seconds'
|
166
|
-
end
|
167
|
-
|
168
|
-
begin
|
169
|
-
server.terminate
|
170
|
-
abort 'Could not stop god'
|
171
|
-
rescue DRb::DRbConnError
|
172
|
-
puts 'Stopped god'
|
173
|
-
end
|
174
|
-
else
|
175
|
-
# get the name of the watch/group
|
176
|
-
name = ARGV[1]
|
177
|
-
|
178
|
-
begin
|
179
|
-
puts "Sending '#{command}' command"
|
180
|
-
|
181
|
-
t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
|
182
|
-
|
183
|
-
# send command
|
184
|
-
watches = server.control(name, command)
|
185
|
-
|
186
|
-
# output response
|
187
|
-
t.kill; STDOUT.puts
|
188
|
-
unless watches.empty?
|
189
|
-
puts 'The following watches were affected:'
|
190
|
-
watches.each do |w|
|
191
|
-
puts ' ' + w
|
192
|
-
end
|
193
|
-
else
|
194
|
-
puts 'No matching task or group'
|
195
|
-
end
|
196
|
-
rescue God::InvalidCommandError
|
197
|
-
t.kill rescue nil; STDOUT.puts
|
198
|
-
abort "Command '#{command}' is not valid. Run 'god --help' for usage"
|
199
|
-
end
|
200
|
-
end
|
86
|
+
God::CLI::Command.new(command, options, ARGV)
|
201
87
|
else
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
if !options[:daemonize]
|
206
|
-
require 'god'
|
207
|
-
|
208
|
-
if options[:port]
|
209
|
-
God.port = options[:port]
|
210
|
-
end
|
211
|
-
|
212
|
-
if options[:config]
|
213
|
-
unless File.exist?(options[:config])
|
214
|
-
abort "File not found: #{options[:config]}"
|
215
|
-
end
|
216
|
-
|
217
|
-
begin
|
218
|
-
load File.expand_path(options[:config])
|
219
|
-
rescue Exception => e
|
220
|
-
if e.instance_of?(SystemExit)
|
221
|
-
raise
|
222
|
-
else
|
223
|
-
puts e.message
|
224
|
-
puts e.backtrace.join("\n")
|
225
|
-
abort "There was an error in your configuration file (see above)"
|
226
|
-
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
else
|
230
|
-
# trap and ignore SIGHUP
|
231
|
-
Signal.trap('HUP') {}
|
232
|
-
|
233
|
-
pid = fork do
|
234
|
-
begin
|
235
|
-
require 'god'
|
236
|
-
|
237
|
-
log_file = options[:log] || "/dev/null"
|
238
|
-
|
239
|
-
unless God::EventHandler.loaded?
|
240
|
-
puts
|
241
|
-
puts "***********************************************************************"
|
242
|
-
puts "*"
|
243
|
-
puts "* Event conditions are not available for your installation of god."
|
244
|
-
puts "* You may still use and write custom conditions using the poll system"
|
245
|
-
puts "*"
|
246
|
-
puts "***********************************************************************"
|
247
|
-
puts
|
248
|
-
end
|
249
|
-
|
250
|
-
# set port if requested
|
251
|
-
if options[:port]
|
252
|
-
God.port = options[:port]
|
253
|
-
end
|
254
|
-
|
255
|
-
# load config
|
256
|
-
if options[:config]
|
257
|
-
unless File.exist?(options[:config])
|
258
|
-
abort "File not found: #{options[:config]}"
|
259
|
-
end
|
260
|
-
|
261
|
-
begin
|
262
|
-
load File.expand_path(options[:config])
|
263
|
-
rescue Exception => e
|
264
|
-
if e.instance_of?(SystemExit)
|
265
|
-
raise
|
266
|
-
else
|
267
|
-
puts e.message
|
268
|
-
puts e.backtrace.join("\n")
|
269
|
-
abort "There was an error in your configuration file (see above)"
|
270
|
-
end
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
# reset file descriptors
|
275
|
-
STDIN.reopen "/dev/null"
|
276
|
-
STDOUT.reopen(log_file, "a")
|
277
|
-
STDERR.reopen STDOUT
|
278
|
-
rescue => e
|
279
|
-
puts e.message
|
280
|
-
puts e.backtrace.join("\n")
|
281
|
-
abort "There was a fatal system error while starting god (see above)"
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
|
-
if options[:pid]
|
286
|
-
File.open(options[:pid], 'w') { |f| f.write pid }
|
287
|
-
end
|
288
|
-
|
289
|
-
::Process.detach pid
|
290
|
-
|
291
|
-
exit
|
292
|
-
end
|
88
|
+
require 'god/cli/run'
|
89
|
+
God::CLI::Run.new(options)
|
293
90
|
end
|
294
91
|
rescue Exception => e
|
295
92
|
if e.instance_of?(SystemExit)
|
data/examples/single.god
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
RAILS_ROOT = "/Users/tom/dev/gravatar2"
|
2
|
+
|
3
|
+
God.watch do |w|
|
4
|
+
w.name = "local-3000"
|
5
|
+
w.interval = 5.seconds # default
|
6
|
+
w.start = "mongrel_rails start -c #{RAILS_ROOT} -P #{RAILS_ROOT}/log/mongrel.pid -p 3000 -d"
|
7
|
+
w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.pid"
|
8
|
+
w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.pid"
|
9
|
+
w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
|
10
|
+
|
11
|
+
# clean pid files before start if necessary
|
12
|
+
w.behavior(:clean_pid_file)
|
13
|
+
|
14
|
+
# determine the state on startup
|
15
|
+
w.transition(:init, { true => :up, false => :start }) do |on|
|
16
|
+
on.condition(:process_running) do |c|
|
17
|
+
c.running = true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# determine when process has finished starting
|
22
|
+
w.transition([:start, :restart], :up) do |on|
|
23
|
+
on.condition(:process_running) do |c|
|
24
|
+
c.running = true
|
25
|
+
end
|
26
|
+
|
27
|
+
# failsafe
|
28
|
+
on.condition(:tries) do |c|
|
29
|
+
c.times = 5
|
30
|
+
c.transition = :start
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# start if process is not running
|
35
|
+
w.transition(:up, :start) do |on|
|
36
|
+
on.condition(:process_exits)
|
37
|
+
end
|
38
|
+
|
39
|
+
# restart if memory or cpu is too high
|
40
|
+
w.transition(:up, :restart) do |on|
|
41
|
+
on.condition(:memory_usage) do |c|
|
42
|
+
c.interval = 20
|
43
|
+
c.above = 50.megabytes
|
44
|
+
c.times = [3, 5]
|
45
|
+
end
|
46
|
+
|
47
|
+
on.condition(:cpu_usage) do |c|
|
48
|
+
c.interval = 10
|
49
|
+
c.above = 10.percent
|
50
|
+
c.times = [3, 5]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# lifecycle
|
55
|
+
w.lifecycle do |on|
|
56
|
+
on.condition(:flapping) do |c|
|
57
|
+
c.to_state = [:start, :restart]
|
58
|
+
c.times = 5
|
59
|
+
c.within = 5.minute
|
60
|
+
c.transition = :unmonitored
|
61
|
+
c.retry_in = 10.minutes
|
62
|
+
c.retry_times = 5
|
63
|
+
c.retry_within = 2.hours
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/ext/god/netlink_handler.c
CHANGED
@@ -28,6 +28,8 @@ nlh_handle_events()
|
|
28
28
|
struct nlmsghdr *hdr;
|
29
29
|
struct proc_event *event;
|
30
30
|
|
31
|
+
VALUE extra_data;
|
32
|
+
|
31
33
|
fd_set fds;
|
32
34
|
|
33
35
|
FD_ZERO(&fds);
|
@@ -61,16 +63,27 @@ nlh_handle_events()
|
|
61
63
|
return INT2FIX(0);
|
62
64
|
}
|
63
65
|
|
64
|
-
|
66
|
+
extra_data = rb_hash_new();
|
67
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("pid")), INT2FIX(event->event_data.exit.process_pid));
|
68
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_code")), INT2FIX(event->event_data.exit.exit_code));
|
69
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_signal")), INT2FIX(event->event_data.exit.exit_signal));
|
70
|
+
rb_hash_aset(extra_data, ID2SYM(rb_intern("thread_group_id")), INT2FIX(event->event_data.exit.process_tgid));
|
71
|
+
|
72
|
+
rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit), extra_data);
|
65
73
|
return INT2FIX(1);
|
66
74
|
|
67
|
-
/* TODO: On fork, call and pass pid of child */
|
68
75
|
case PROC_EVENT_FORK:
|
69
76
|
if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.fork.parent_pid))) {
|
70
77
|
return INT2FIX(0);
|
71
78
|
}
|
79
|
+
|
80
|
+
extra_data = rb_hash_new();
|
81
|
+
rb_hash_aset(extra_data, rb_intern("parent_pid"), INT2FIX(event->event_data.fork.parent_pid));
|
82
|
+
rb_hash_aset(extra_data, rb_intern("parent_thread_group_id"), INT2FIX(event->event_data.fork.parent_tgid));
|
83
|
+
rb_hash_aset(extra_data, rb_intern("child_pid"), INT2FIX(event->event_data.fork.child_pid));
|
84
|
+
rb_hash_aset(extra_data, rb_intern("child_thread_group_id"), INT2FIX(event->event_data.fork.child_tgid));
|
72
85
|
|
73
|
-
rb_funcall(cEventHandler, m_call,
|
86
|
+
rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork), extra_data);
|
74
87
|
return INT2FIX(1);
|
75
88
|
|
76
89
|
case PROC_EVENT_NONE:
|