god 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,26 @@
1
+ == 0.6.0 /
2
+
3
+ * Minor Enhancement
4
+ * Move Syslog calls into God::Logger and clean up all calling code
5
+ * Remove god's pid file on user requested termination
6
+ * Better handling and cleanup of DRb server's unix domain socket
7
+ * Allow shorthand for requesting a god log
8
+ * Add `god check` to make it easier to diagnose event problems
9
+ * Refactor god binary into class/method structure
10
+ * Implement `god remove` to remove a Task altogether
11
+ * New Conditions
12
+ * DiskUsage < PollCondition - trigger if disk usage is above limit on mount [Rudy Desjardins]
13
+
14
+ == 0.5.2 / 2007-10-10
15
+
16
+ * Minor Enhancement
17
+ * Allow extra args to pass through to config file
18
+
19
+ == 0.5.1 / 2007-10-08
20
+
21
+ * Bug Fixes
22
+ * Rescue connection refused in http response code condition
23
+
1
24
  == 0.5.0 / 2007-10-05
2
25
 
3
26
  * Major Enhancements
@@ -5,6 +5,7 @@ Rakefile
5
5
  bin/god
6
6
  examples/events.god
7
7
  examples/gravatar.god
8
+ examples/single.god
8
9
  ext/god/extconf.rb
9
10
  ext/god/kqueue_handler.c
10
11
  ext/god/netlink_handler.c
@@ -13,10 +14,15 @@ lib/god.rb
13
14
  lib/god/behavior.rb
14
15
  lib/god/behaviors/clean_pid_file.rb
15
16
  lib/god/behaviors/notify_when_flapping.rb
17
+ lib/god/cli/command.rb
18
+ lib/god/cli/run.rb
19
+ lib/god/cli/version.rb
16
20
  lib/god/condition.rb
17
21
  lib/god/conditions/always.rb
22
+ lib/god/conditions/complex.rb
18
23
  lib/god/conditions/cpu_usage.rb
19
24
  lib/god/conditions/degrading_lambda.rb
25
+ lib/god/conditions/disk_usage.rb
20
26
  lib/god/conditions/flapping.rb
21
27
  lib/god/conditions/http_response_code.rb
22
28
  lib/god/conditions/lambda.rb
@@ -50,6 +56,8 @@ test/configs/child_events/child_events.god
50
56
  test/configs/child_events/simple_server.rb
51
57
  test/configs/child_polls/child_polls.god
52
58
  test/configs/child_polls/simple_server.rb
59
+ test/configs/complex/complex.god
60
+ test/configs/complex/simple_server.rb
53
61
  test/configs/contact/contact.god
54
62
  test/configs/contact/simple_server.rb
55
63
  test/configs/daemon_events/daemon_events.god
@@ -70,6 +78,7 @@ test/helper.rb
70
78
  test/suite.rb
71
79
  test/test_behavior.rb
72
80
  test/test_condition.rb
81
+ test/test_conditions_disk_usage.rb
73
82
  test/test_conditions_http_response_code.rb
74
83
  test/test_conditions_process_running.rb
75
84
  test/test_conditions_tries.rb
data/README.txt CHANGED
@@ -6,14 +6,21 @@ god
6
6
 
7
7
  == DESCRIPTION:
8
8
 
9
- God is an easy to configure, easy to extend monitoring framework written in Ruby.
9
+ God is an easy to configure, easy to extend monitoring framework written
10
+ in Ruby.
10
11
 
11
- Keeping your server processes and tasks running should be a simple part of your deployment process. God aims to be the simplest, most powerful monitoring application available.
12
+ Keeping your server processes and tasks running should be a simple part of
13
+ your deployment process. God aims to be the simplest, most powerful
14
+ monitoring application available.
12
15
 
13
16
  == DOCUMENTATION:
14
17
 
15
18
  See online documentation at http://god.rubyforge.org
16
19
 
20
+ == COMMUNITY:
21
+
22
+ Sign up for the god mailing list at http://groups.google.com/group/god-rb
23
+
17
24
  == INSTALL:
18
25
 
19
26
  $ sudo gem install god
data/Rakefile CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'hoe'
3
3
 
4
- Hoe.new('god', '0.5.0') do |p|
4
+ Hoe.new('god', '0.6.0') do |p|
5
5
  p.rubyforge_name = 'god'
6
6
  p.author = 'Tom Preston-Werner'
7
7
  p.email = 'tom@rubyisawesome.com'
@@ -25,4 +25,11 @@ end
25
25
  desc "Upload site to Rubyforge"
26
26
  task :site_edge do
27
27
  sh "scp -r site/* mojombo@god.rubyforge.org:/var/www/gforge-projects/god/edge"
28
+ end
29
+
30
+ desc "Run rcov"
31
+ task :coverage do
32
+ `rm -fr coverage`
33
+ `rcov test/test_*.rb`
34
+ `open coverage/index.html`
28
35
  end
data/bin/god CHANGED
@@ -15,7 +15,7 @@ begin
15
15
  opts.banner = <<-EOF
16
16
  Usage:
17
17
  Starting:
18
- god -c <config file> [-p <port> | -b] [-P <file>] [-l <file>] [-D]
18
+ god [-c <config file>] [-p <port> | -b] [-P <file>] [-l <file>] [-D]
19
19
 
20
20
  Querying:
21
21
  god <command> <argument> [-p <port>]
@@ -29,11 +29,13 @@ begin
29
29
  stop <task or group name> stop task or group
30
30
  monitor <task or group name> monitor task or group
31
31
  unmonitor <task or group name> unmonitor task or group
32
+ remove <task or group name> remove task or group from god
32
33
  load <file> load a config into a running god
33
34
  log <task name> show realtime log for given task
34
35
  status show status of each task
35
36
  quit stop god
36
37
  terminate stop god and all tasks
38
+ check run self diagnostic
37
39
 
38
40
  Options:
39
41
  EOF
@@ -73,223 +75,18 @@ begin
73
75
 
74
76
  opts.parse!
75
77
 
76
- if options[:version]
78
+ if !options[:config] && options[:version]
77
79
  require 'god'
78
-
79
- # print version
80
- puts "Version #{God::VERSION}"
81
- exit
82
- elsif options[:info]
80
+ God::CLI::Version.version
81
+ elsif !options[:config] && options[:info]
83
82
  require 'god'
84
-
85
- puts "Version: #{God::VERSION}"
86
- puts "Polls: enabled"
87
- puts "Events: " + God::EventHandler.event_system
88
-
89
- exit
90
- elsif command = ARGV[0]
83
+ God::CLI::Version.version_extended
84
+ elsif !options[:config] && command = ARGV[0]
91
85
  require 'god'
92
-
93
- # a command was specified
94
-
95
- # connect to drb unix socket
96
- DRb.start_service
97
- server = DRbObject.new(nil, God::Socket.socket(options[:port]))
98
-
99
- begin
100
- server.ping
101
- rescue DRb::DRbConnError
102
- puts "The server is not available (or you do not have permissions to access it)"
103
- abort
104
- end
105
-
106
- if command == 'load'
107
- file = ARGV[1]
108
-
109
- puts "Sending '#{command}' command"
110
- puts
111
-
112
- unless File.exist?(file)
113
- abort "File not found: #{file}"
114
- end
115
-
116
- names, errors = *server.running_load(File.read(file), File.expand_path(file))
117
-
118
- # output response
119
- unless names.empty?
120
- puts 'The following tasks were affected:'
121
- names.each do |w|
122
- puts ' ' + w
123
- end
124
- end
125
-
126
- unless errors.empty?
127
- puts errors
128
- exit(1)
129
- end
130
- elsif command == 'status'
131
- watches = server.status
132
- watches.keys.sort.each do |name|
133
- state = watches[name][:state]
134
- puts "#{name}: #{state}"
135
- end
136
- elsif command == 'log'
137
- begin
138
- Signal.trap('INT') { exit }
139
- name = ARGV[1]
140
- t = Time.at(0)
141
- loop do
142
- print server.running_log(name, t)
143
- t = Time.now
144
- sleep 1
145
- end
146
- rescue God::NoSuchWatchError
147
- puts "No such watch"
148
- rescue DRb::DRbConnError
149
- puts "The server went away"
150
- end
151
- elsif command == 'quit'
152
- begin
153
- server.terminate
154
- abort 'Could not stop god'
155
- rescue DRb::DRbConnError
156
- puts 'Stopped god'
157
- end
158
- elsif command == 'terminate'
159
- t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
160
- if server.stop_all
161
- t.kill; STDOUT.puts
162
- puts 'Stopped all watches'
163
- else
164
- t.kill; STDOUT.puts
165
- puts 'Could not stop all watches within 10 seconds'
166
- end
167
-
168
- begin
169
- server.terminate
170
- abort 'Could not stop god'
171
- rescue DRb::DRbConnError
172
- puts 'Stopped god'
173
- end
174
- else
175
- # get the name of the watch/group
176
- name = ARGV[1]
177
-
178
- begin
179
- puts "Sending '#{command}' command"
180
-
181
- t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
182
-
183
- # send command
184
- watches = server.control(name, command)
185
-
186
- # output response
187
- t.kill; STDOUT.puts
188
- unless watches.empty?
189
- puts 'The following watches were affected:'
190
- watches.each do |w|
191
- puts ' ' + w
192
- end
193
- else
194
- puts 'No matching task or group'
195
- end
196
- rescue God::InvalidCommandError
197
- t.kill rescue nil; STDOUT.puts
198
- abort "Command '#{command}' is not valid. Run 'god --help' for usage"
199
- end
200
- end
86
+ God::CLI::Command.new(command, options, ARGV)
201
87
  else
202
- # start god
203
- $run = true
204
-
205
- if !options[:daemonize]
206
- require 'god'
207
-
208
- if options[:port]
209
- God.port = options[:port]
210
- end
211
-
212
- if options[:config]
213
- unless File.exist?(options[:config])
214
- abort "File not found: #{options[:config]}"
215
- end
216
-
217
- begin
218
- load File.expand_path(options[:config])
219
- rescue Exception => e
220
- if e.instance_of?(SystemExit)
221
- raise
222
- else
223
- puts e.message
224
- puts e.backtrace.join("\n")
225
- abort "There was an error in your configuration file (see above)"
226
- end
227
- end
228
- end
229
- else
230
- # trap and ignore SIGHUP
231
- Signal.trap('HUP') {}
232
-
233
- pid = fork do
234
- begin
235
- require 'god'
236
-
237
- log_file = options[:log] || "/dev/null"
238
-
239
- unless God::EventHandler.loaded?
240
- puts
241
- puts "***********************************************************************"
242
- puts "*"
243
- puts "* Event conditions are not available for your installation of god."
244
- puts "* You may still use and write custom conditions using the poll system"
245
- puts "*"
246
- puts "***********************************************************************"
247
- puts
248
- end
249
-
250
- # set port if requested
251
- if options[:port]
252
- God.port = options[:port]
253
- end
254
-
255
- # load config
256
- if options[:config]
257
- unless File.exist?(options[:config])
258
- abort "File not found: #{options[:config]}"
259
- end
260
-
261
- begin
262
- load File.expand_path(options[:config])
263
- rescue Exception => e
264
- if e.instance_of?(SystemExit)
265
- raise
266
- else
267
- puts e.message
268
- puts e.backtrace.join("\n")
269
- abort "There was an error in your configuration file (see above)"
270
- end
271
- end
272
- end
273
-
274
- # reset file descriptors
275
- STDIN.reopen "/dev/null"
276
- STDOUT.reopen(log_file, "a")
277
- STDERR.reopen STDOUT
278
- rescue => e
279
- puts e.message
280
- puts e.backtrace.join("\n")
281
- abort "There was a fatal system error while starting god (see above)"
282
- end
283
- end
284
-
285
- if options[:pid]
286
- File.open(options[:pid], 'w') { |f| f.write pid }
287
- end
288
-
289
- ::Process.detach pid
290
-
291
- exit
292
- end
88
+ require 'god/cli/run'
89
+ God::CLI::Run.new(options)
293
90
  end
294
91
  rescue Exception => e
295
92
  if e.instance_of?(SystemExit)
@@ -0,0 +1,66 @@
1
+ RAILS_ROOT = "/Users/tom/dev/gravatar2"
2
+
3
+ God.watch do |w|
4
+ w.name = "local-3000"
5
+ w.interval = 5.seconds # default
6
+ w.start = "mongrel_rails start -c #{RAILS_ROOT} -P #{RAILS_ROOT}/log/mongrel.pid -p 3000 -d"
7
+ w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.pid"
8
+ w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.pid"
9
+ w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
10
+
11
+ # clean pid files before start if necessary
12
+ w.behavior(:clean_pid_file)
13
+
14
+ # determine the state on startup
15
+ w.transition(:init, { true => :up, false => :start }) do |on|
16
+ on.condition(:process_running) do |c|
17
+ c.running = true
18
+ end
19
+ end
20
+
21
+ # determine when process has finished starting
22
+ w.transition([:start, :restart], :up) do |on|
23
+ on.condition(:process_running) do |c|
24
+ c.running = true
25
+ end
26
+
27
+ # failsafe
28
+ on.condition(:tries) do |c|
29
+ c.times = 5
30
+ c.transition = :start
31
+ end
32
+ end
33
+
34
+ # start if process is not running
35
+ w.transition(:up, :start) do |on|
36
+ on.condition(:process_exits)
37
+ end
38
+
39
+ # restart if memory or cpu is too high
40
+ w.transition(:up, :restart) do |on|
41
+ on.condition(:memory_usage) do |c|
42
+ c.interval = 20
43
+ c.above = 50.megabytes
44
+ c.times = [3, 5]
45
+ end
46
+
47
+ on.condition(:cpu_usage) do |c|
48
+ c.interval = 10
49
+ c.above = 10.percent
50
+ c.times = [3, 5]
51
+ end
52
+ end
53
+
54
+ # lifecycle
55
+ w.lifecycle do |on|
56
+ on.condition(:flapping) do |c|
57
+ c.to_state = [:start, :restart]
58
+ c.times = 5
59
+ c.within = 5.minute
60
+ c.transition = :unmonitored
61
+ c.retry_in = 10.minutes
62
+ c.retry_times = 5
63
+ c.retry_within = 2.hours
64
+ end
65
+ end
66
+ end
@@ -28,6 +28,8 @@ nlh_handle_events()
28
28
  struct nlmsghdr *hdr;
29
29
  struct proc_event *event;
30
30
 
31
+ VALUE extra_data;
32
+
31
33
  fd_set fds;
32
34
 
33
35
  FD_ZERO(&fds);
@@ -61,16 +63,27 @@ nlh_handle_events()
61
63
  return INT2FIX(0);
62
64
  }
63
65
 
64
- rb_funcall(cEventHandler, m_call, 2, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit));
66
+ extra_data = rb_hash_new();
67
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("pid")), INT2FIX(event->event_data.exit.process_pid));
68
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_code")), INT2FIX(event->event_data.exit.exit_code));
69
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_signal")), INT2FIX(event->event_data.exit.exit_signal));
70
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("thread_group_id")), INT2FIX(event->event_data.exit.process_tgid));
71
+
72
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit), extra_data);
65
73
  return INT2FIX(1);
66
74
 
67
- /* TODO: On fork, call and pass pid of child */
68
75
  case PROC_EVENT_FORK:
69
76
  if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.fork.parent_pid))) {
70
77
  return INT2FIX(0);
71
78
  }
79
+
80
+ extra_data = rb_hash_new();
81
+ rb_hash_aset(extra_data, rb_intern("parent_pid"), INT2FIX(event->event_data.fork.parent_pid));
82
+ rb_hash_aset(extra_data, rb_intern("parent_thread_group_id"), INT2FIX(event->event_data.fork.parent_tgid));
83
+ rb_hash_aset(extra_data, rb_intern("child_pid"), INT2FIX(event->event_data.fork.child_pid));
84
+ rb_hash_aset(extra_data, rb_intern("child_thread_group_id"), INT2FIX(event->event_data.fork.child_tgid));
72
85
 
73
- rb_funcall(cEventHandler, m_call, 2, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork));
86
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork), extra_data);
74
87
  return INT2FIX(1);
75
88
 
76
89
  case PROC_EVENT_NONE: