god 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,26 @@
1
+ == 0.6.0 /
2
+
3
+ * Minor Enhancement
4
+ * Move Syslog calls into God::Logger and clean up all calling code
5
+ * Remove god's pid file on user requested termination
6
+ * Better handling and cleanup of DRb server's unix domain socket
7
+ * Allow shorthand for requesting a god log
8
+ * Add `god check` to make it easier to diagnose event problems
9
+ * Refactor god binary into class/method structure
10
+ * Implement `god remove` to remove a Task altogether
11
+ * New Conditions
12
+ * DiskUsage < PollCondition - trigger if disk usage is above limit on mount [Rudy Desjardins]
13
+
14
+ == 0.5.2 / 2007-10-10
15
+
16
+ * Minor Enhancement
17
+ * Allow extra args to pass through to config file
18
+
19
+ == 0.5.1 / 2007-10-08
20
+
21
+ * Bug Fixes
22
+ * Rescue connection refused in http response code condition
23
+
1
24
  == 0.5.0 / 2007-10-05
2
25
 
3
26
  * Major Enhancements
@@ -5,6 +5,7 @@ Rakefile
5
5
  bin/god
6
6
  examples/events.god
7
7
  examples/gravatar.god
8
+ examples/single.god
8
9
  ext/god/extconf.rb
9
10
  ext/god/kqueue_handler.c
10
11
  ext/god/netlink_handler.c
@@ -13,10 +14,15 @@ lib/god.rb
13
14
  lib/god/behavior.rb
14
15
  lib/god/behaviors/clean_pid_file.rb
15
16
  lib/god/behaviors/notify_when_flapping.rb
17
+ lib/god/cli/command.rb
18
+ lib/god/cli/run.rb
19
+ lib/god/cli/version.rb
16
20
  lib/god/condition.rb
17
21
  lib/god/conditions/always.rb
22
+ lib/god/conditions/complex.rb
18
23
  lib/god/conditions/cpu_usage.rb
19
24
  lib/god/conditions/degrading_lambda.rb
25
+ lib/god/conditions/disk_usage.rb
20
26
  lib/god/conditions/flapping.rb
21
27
  lib/god/conditions/http_response_code.rb
22
28
  lib/god/conditions/lambda.rb
@@ -50,6 +56,8 @@ test/configs/child_events/child_events.god
50
56
  test/configs/child_events/simple_server.rb
51
57
  test/configs/child_polls/child_polls.god
52
58
  test/configs/child_polls/simple_server.rb
59
+ test/configs/complex/complex.god
60
+ test/configs/complex/simple_server.rb
53
61
  test/configs/contact/contact.god
54
62
  test/configs/contact/simple_server.rb
55
63
  test/configs/daemon_events/daemon_events.god
@@ -70,6 +78,7 @@ test/helper.rb
70
78
  test/suite.rb
71
79
  test/test_behavior.rb
72
80
  test/test_condition.rb
81
+ test/test_conditions_disk_usage.rb
73
82
  test/test_conditions_http_response_code.rb
74
83
  test/test_conditions_process_running.rb
75
84
  test/test_conditions_tries.rb
data/README.txt CHANGED
@@ -6,14 +6,21 @@ god
6
6
 
7
7
  == DESCRIPTION:
8
8
 
9
- God is an easy to configure, easy to extend monitoring framework written in Ruby.
9
+ God is an easy to configure, easy to extend monitoring framework written
10
+ in Ruby.
10
11
 
11
- Keeping your server processes and tasks running should be a simple part of your deployment process. God aims to be the simplest, most powerful monitoring application available.
12
+ Keeping your server processes and tasks running should be a simple part of
13
+ your deployment process. God aims to be the simplest, most powerful
14
+ monitoring application available.
12
15
 
13
16
  == DOCUMENTATION:
14
17
 
15
18
  See online documentation at http://god.rubyforge.org
16
19
 
20
+ == COMMUNITY:
21
+
22
+ Sign up for the god mailing list at http://groups.google.com/group/god-rb
23
+
17
24
  == INSTALL:
18
25
 
19
26
  $ sudo gem install god
data/Rakefile CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'hoe'
3
3
 
4
- Hoe.new('god', '0.5.0') do |p|
4
+ Hoe.new('god', '0.6.0') do |p|
5
5
  p.rubyforge_name = 'god'
6
6
  p.author = 'Tom Preston-Werner'
7
7
  p.email = 'tom@rubyisawesome.com'
@@ -25,4 +25,11 @@ end
25
25
  desc "Upload site to Rubyforge"
26
26
  task :site_edge do
27
27
  sh "scp -r site/* mojombo@god.rubyforge.org:/var/www/gforge-projects/god/edge"
28
+ end
29
+
30
+ desc "Run rcov"
31
+ task :coverage do
32
+ `rm -fr coverage`
33
+ `rcov test/test_*.rb`
34
+ `open coverage/index.html`
28
35
  end
data/bin/god CHANGED
@@ -15,7 +15,7 @@ begin
15
15
  opts.banner = <<-EOF
16
16
  Usage:
17
17
  Starting:
18
- god -c <config file> [-p <port> | -b] [-P <file>] [-l <file>] [-D]
18
+ god [-c <config file>] [-p <port> | -b] [-P <file>] [-l <file>] [-D]
19
19
 
20
20
  Querying:
21
21
  god <command> <argument> [-p <port>]
@@ -29,11 +29,13 @@ begin
29
29
  stop <task or group name> stop task or group
30
30
  monitor <task or group name> monitor task or group
31
31
  unmonitor <task or group name> unmonitor task or group
32
+ remove <task or group name> remove task or group from god
32
33
  load <file> load a config into a running god
33
34
  log <task name> show realtime log for given task
34
35
  status show status of each task
35
36
  quit stop god
36
37
  terminate stop god and all tasks
38
+ check run self diagnostic
37
39
 
38
40
  Options:
39
41
  EOF
@@ -73,223 +75,18 @@ begin
73
75
 
74
76
  opts.parse!
75
77
 
76
- if options[:version]
78
+ if !options[:config] && options[:version]
77
79
  require 'god'
78
-
79
- # print version
80
- puts "Version #{God::VERSION}"
81
- exit
82
- elsif options[:info]
80
+ God::CLI::Version.version
81
+ elsif !options[:config] && options[:info]
83
82
  require 'god'
84
-
85
- puts "Version: #{God::VERSION}"
86
- puts "Polls: enabled"
87
- puts "Events: " + God::EventHandler.event_system
88
-
89
- exit
90
- elsif command = ARGV[0]
83
+ God::CLI::Version.version_extended
84
+ elsif !options[:config] && command = ARGV[0]
91
85
  require 'god'
92
-
93
- # a command was specified
94
-
95
- # connect to drb unix socket
96
- DRb.start_service
97
- server = DRbObject.new(nil, God::Socket.socket(options[:port]))
98
-
99
- begin
100
- server.ping
101
- rescue DRb::DRbConnError
102
- puts "The server is not available (or you do not have permissions to access it)"
103
- abort
104
- end
105
-
106
- if command == 'load'
107
- file = ARGV[1]
108
-
109
- puts "Sending '#{command}' command"
110
- puts
111
-
112
- unless File.exist?(file)
113
- abort "File not found: #{file}"
114
- end
115
-
116
- names, errors = *server.running_load(File.read(file), File.expand_path(file))
117
-
118
- # output response
119
- unless names.empty?
120
- puts 'The following tasks were affected:'
121
- names.each do |w|
122
- puts ' ' + w
123
- end
124
- end
125
-
126
- unless errors.empty?
127
- puts errors
128
- exit(1)
129
- end
130
- elsif command == 'status'
131
- watches = server.status
132
- watches.keys.sort.each do |name|
133
- state = watches[name][:state]
134
- puts "#{name}: #{state}"
135
- end
136
- elsif command == 'log'
137
- begin
138
- Signal.trap('INT') { exit }
139
- name = ARGV[1]
140
- t = Time.at(0)
141
- loop do
142
- print server.running_log(name, t)
143
- t = Time.now
144
- sleep 1
145
- end
146
- rescue God::NoSuchWatchError
147
- puts "No such watch"
148
- rescue DRb::DRbConnError
149
- puts "The server went away"
150
- end
151
- elsif command == 'quit'
152
- begin
153
- server.terminate
154
- abort 'Could not stop god'
155
- rescue DRb::DRbConnError
156
- puts 'Stopped god'
157
- end
158
- elsif command == 'terminate'
159
- t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
160
- if server.stop_all
161
- t.kill; STDOUT.puts
162
- puts 'Stopped all watches'
163
- else
164
- t.kill; STDOUT.puts
165
- puts 'Could not stop all watches within 10 seconds'
166
- end
167
-
168
- begin
169
- server.terminate
170
- abort 'Could not stop god'
171
- rescue DRb::DRbConnError
172
- puts 'Stopped god'
173
- end
174
- else
175
- # get the name of the watch/group
176
- name = ARGV[1]
177
-
178
- begin
179
- puts "Sending '#{command}' command"
180
-
181
- t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
182
-
183
- # send command
184
- watches = server.control(name, command)
185
-
186
- # output response
187
- t.kill; STDOUT.puts
188
- unless watches.empty?
189
- puts 'The following watches were affected:'
190
- watches.each do |w|
191
- puts ' ' + w
192
- end
193
- else
194
- puts 'No matching task or group'
195
- end
196
- rescue God::InvalidCommandError
197
- t.kill rescue nil; STDOUT.puts
198
- abort "Command '#{command}' is not valid. Run 'god --help' for usage"
199
- end
200
- end
86
+ God::CLI::Command.new(command, options, ARGV)
201
87
  else
202
- # start god
203
- $run = true
204
-
205
- if !options[:daemonize]
206
- require 'god'
207
-
208
- if options[:port]
209
- God.port = options[:port]
210
- end
211
-
212
- if options[:config]
213
- unless File.exist?(options[:config])
214
- abort "File not found: #{options[:config]}"
215
- end
216
-
217
- begin
218
- load File.expand_path(options[:config])
219
- rescue Exception => e
220
- if e.instance_of?(SystemExit)
221
- raise
222
- else
223
- puts e.message
224
- puts e.backtrace.join("\n")
225
- abort "There was an error in your configuration file (see above)"
226
- end
227
- end
228
- end
229
- else
230
- # trap and ignore SIGHUP
231
- Signal.trap('HUP') {}
232
-
233
- pid = fork do
234
- begin
235
- require 'god'
236
-
237
- log_file = options[:log] || "/dev/null"
238
-
239
- unless God::EventHandler.loaded?
240
- puts
241
- puts "***********************************************************************"
242
- puts "*"
243
- puts "* Event conditions are not available for your installation of god."
244
- puts "* You may still use and write custom conditions using the poll system"
245
- puts "*"
246
- puts "***********************************************************************"
247
- puts
248
- end
249
-
250
- # set port if requested
251
- if options[:port]
252
- God.port = options[:port]
253
- end
254
-
255
- # load config
256
- if options[:config]
257
- unless File.exist?(options[:config])
258
- abort "File not found: #{options[:config]}"
259
- end
260
-
261
- begin
262
- load File.expand_path(options[:config])
263
- rescue Exception => e
264
- if e.instance_of?(SystemExit)
265
- raise
266
- else
267
- puts e.message
268
- puts e.backtrace.join("\n")
269
- abort "There was an error in your configuration file (see above)"
270
- end
271
- end
272
- end
273
-
274
- # reset file descriptors
275
- STDIN.reopen "/dev/null"
276
- STDOUT.reopen(log_file, "a")
277
- STDERR.reopen STDOUT
278
- rescue => e
279
- puts e.message
280
- puts e.backtrace.join("\n")
281
- abort "There was a fatal system error while starting god (see above)"
282
- end
283
- end
284
-
285
- if options[:pid]
286
- File.open(options[:pid], 'w') { |f| f.write pid }
287
- end
288
-
289
- ::Process.detach pid
290
-
291
- exit
292
- end
88
+ require 'god/cli/run'
89
+ God::CLI::Run.new(options)
293
90
  end
294
91
  rescue Exception => e
295
92
  if e.instance_of?(SystemExit)
@@ -0,0 +1,66 @@
1
+ RAILS_ROOT = "/Users/tom/dev/gravatar2"
2
+
3
+ God.watch do |w|
4
+ w.name = "local-3000"
5
+ w.interval = 5.seconds # default
6
+ w.start = "mongrel_rails start -c #{RAILS_ROOT} -P #{RAILS_ROOT}/log/mongrel.pid -p 3000 -d"
7
+ w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.pid"
8
+ w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.pid"
9
+ w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
10
+
11
+ # clean pid files before start if necessary
12
+ w.behavior(:clean_pid_file)
13
+
14
+ # determine the state on startup
15
+ w.transition(:init, { true => :up, false => :start }) do |on|
16
+ on.condition(:process_running) do |c|
17
+ c.running = true
18
+ end
19
+ end
20
+
21
+ # determine when process has finished starting
22
+ w.transition([:start, :restart], :up) do |on|
23
+ on.condition(:process_running) do |c|
24
+ c.running = true
25
+ end
26
+
27
+ # failsafe
28
+ on.condition(:tries) do |c|
29
+ c.times = 5
30
+ c.transition = :start
31
+ end
32
+ end
33
+
34
+ # start if process is not running
35
+ w.transition(:up, :start) do |on|
36
+ on.condition(:process_exits)
37
+ end
38
+
39
+ # restart if memory or cpu is too high
40
+ w.transition(:up, :restart) do |on|
41
+ on.condition(:memory_usage) do |c|
42
+ c.interval = 20
43
+ c.above = 50.megabytes
44
+ c.times = [3, 5]
45
+ end
46
+
47
+ on.condition(:cpu_usage) do |c|
48
+ c.interval = 10
49
+ c.above = 10.percent
50
+ c.times = [3, 5]
51
+ end
52
+ end
53
+
54
+ # lifecycle
55
+ w.lifecycle do |on|
56
+ on.condition(:flapping) do |c|
57
+ c.to_state = [:start, :restart]
58
+ c.times = 5
59
+ c.within = 5.minute
60
+ c.transition = :unmonitored
61
+ c.retry_in = 10.minutes
62
+ c.retry_times = 5
63
+ c.retry_within = 2.hours
64
+ end
65
+ end
66
+ end
@@ -28,6 +28,8 @@ nlh_handle_events()
28
28
  struct nlmsghdr *hdr;
29
29
  struct proc_event *event;
30
30
 
31
+ VALUE extra_data;
32
+
31
33
  fd_set fds;
32
34
 
33
35
  FD_ZERO(&fds);
@@ -61,16 +63,27 @@ nlh_handle_events()
61
63
  return INT2FIX(0);
62
64
  }
63
65
 
64
- rb_funcall(cEventHandler, m_call, 2, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit));
66
+ extra_data = rb_hash_new();
67
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("pid")), INT2FIX(event->event_data.exit.process_pid));
68
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_code")), INT2FIX(event->event_data.exit.exit_code));
69
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_signal")), INT2FIX(event->event_data.exit.exit_signal));
70
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("thread_group_id")), INT2FIX(event->event_data.exit.process_tgid));
71
+
72
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit), extra_data);
65
73
  return INT2FIX(1);
66
74
 
67
- /* TODO: On fork, call and pass pid of child */
68
75
  case PROC_EVENT_FORK:
69
76
  if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.fork.parent_pid))) {
70
77
  return INT2FIX(0);
71
78
  }
79
+
80
+ extra_data = rb_hash_new();
81
+ rb_hash_aset(extra_data, rb_intern("parent_pid"), INT2FIX(event->event_data.fork.parent_pid));
82
+ rb_hash_aset(extra_data, rb_intern("parent_thread_group_id"), INT2FIX(event->event_data.fork.parent_tgid));
83
+ rb_hash_aset(extra_data, rb_intern("child_pid"), INT2FIX(event->event_data.fork.child_pid));
84
+ rb_hash_aset(extra_data, rb_intern("child_thread_group_id"), INT2FIX(event->event_data.fork.child_tgid));
72
85
 
73
- rb_funcall(cEventHandler, m_call, 2, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork));
86
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork), extra_data);
74
87
  return INT2FIX(1);
75
88
 
76
89
  case PROC_EVENT_NONE: