god 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/History.txt +26 -0
  2. data/Manifest.txt +15 -1
  3. data/Rakefile +2 -7
  4. data/bin/god +104 -16
  5. data/lib/god.rb +169 -37
  6. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  7. data/lib/god/condition.rb +1 -0
  8. data/lib/god/conditions/degrading_lambda.rb +47 -0
  9. data/lib/god/conditions/process_exits.rb +6 -2
  10. data/lib/god/conditions/tries.rb +33 -0
  11. data/lib/god/dependency_graph.rb +41 -0
  12. data/lib/god/errors.rb +6 -0
  13. data/lib/god/hub.rb +43 -20
  14. data/lib/god/logger.rb +44 -0
  15. data/lib/god/process.rb +91 -19
  16. data/lib/god/registry.rb +4 -0
  17. data/lib/god/server.rb +12 -2
  18. data/lib/god/timeline.rb +36 -0
  19. data/lib/god/watch.rb +27 -8
  20. data/test/configs/child_events/child_events.god +7 -2
  21. data/test/configs/child_polls/child_polls.god +3 -1
  22. data/test/configs/child_polls/simple_server.rb +1 -1
  23. data/test/configs/daemon_events/daemon_events.god +7 -3
  24. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  25. data/test/configs/daemon_polls/simple_server.rb +6 -0
  26. data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
  27. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  28. data/test/configs/real.rb +1 -1
  29. data/test/configs/running_load/running_load.god +16 -0
  30. data/test/configs/stress/simple_server.rb +3 -0
  31. data/test/configs/stress/stress.god +15 -0
  32. data/test/configs/test.rb +14 -2
  33. data/test/helper.rb +12 -2
  34. data/test/test_conditions_tries.rb +46 -0
  35. data/test/test_dependency_graph.rb +62 -0
  36. data/test/test_god.rb +289 -33
  37. data/test/test_handlers_kqueue_handler.rb +11 -7
  38. data/test/test_hub.rb +18 -0
  39. data/test/test_logger.rb +55 -0
  40. data/test/test_process.rb +135 -17
  41. data/test/test_registry.rb +2 -1
  42. data/test/test_server.rb +35 -4
  43. data/test/test_timeline.rb +14 -2
  44. data/test/test_watch.rb +7 -0
  45. metadata +21 -4
  46. data/lib/god/conditions/timeline.rb +0 -17
data/History.txt CHANGED
@@ -1,3 +1,29 @@
1
+ == 0.4.0
2
+
3
+ * Major Enhancements
4
+ * Add the ability for conditions to override transition state (for exceptional cases)
5
+ * Implement dynamic load of config files while god is running (god load <filename>)
6
+ * Add ability to save auto-daemonized process output to a log file
7
+ * Add robust default stop lambda command for auto-daemonized processes (inspired by _eric)
8
+ * Add status command for god binary (shows status of each watch)
9
+ * Create proper logger with timestamps
10
+ * Add log command to god binary to get real time logs for a specific watch from a running god instance
11
+ * Add terminate command for god binary (stop god and all watches)
12
+ * Minor Enhancements
13
+ * Enforce validity of Watches
14
+ * Enforce that God.init is not called after a Watch
15
+ * Move pid_file_directory creation and validation to God.start
16
+ * Remove check for at least one Watch during startup (now that dynamic loading exists)
17
+ * New Conditions
18
+ * Tries < PollCondition - triggers after the specified number of tries
19
+ * Add :notify_when_flapping behavior to check for oscillation [kevinclark]
20
+ * Add :degrading_lambda condition. [kevinclark]
21
+ It uses a decaying interval (1/2 rate) for 3 cycles before failing.
22
+ * Bug Fixes
23
+ * Use exit!(0) instead of exit! in god binary to exit with code 0 (instead of default -1)
24
+ * Command line group control fixed
25
+ * Fix cross-thread return problem (use exit instead)
26
+
1
27
  == 0.3.0 / 2007-08-17
2
28
 
3
29
  * Fix netlink header problem on Ubuntu Edgy [Dan Sully]
data/Manifest.txt CHANGED
@@ -11,20 +11,24 @@ ext/god/netlink_handler.c
11
11
  lib/god.rb
12
12
  lib/god/behavior.rb
13
13
  lib/god/behaviors/clean_pid_file.rb
14
+ lib/god/behaviors/notify_when_flapping.rb
14
15
  lib/god/condition.rb
15
16
  lib/god/conditions/always.rb
16
17
  lib/god/conditions/cpu_usage.rb
18
+ lib/god/conditions/degrading_lambda.rb
17
19
  lib/god/conditions/lambda.rb
18
20
  lib/god/conditions/memory_usage.rb
19
21
  lib/god/conditions/process_exits.rb
20
22
  lib/god/conditions/process_running.rb
21
- lib/god/conditions/timeline.rb
23
+ lib/god/conditions/tries.rb
24
+ lib/god/dependency_graph.rb
22
25
  lib/god/errors.rb
23
26
  lib/god/event_handler.rb
24
27
  lib/god/event_handlers/dummy_handler.rb
25
28
  lib/god/event_handlers/kqueue_handler.rb
26
29
  lib/god/event_handlers/netlink_handler.rb
27
30
  lib/god/hub.rb
31
+ lib/god/logger.rb
28
32
  lib/god/metric.rb
29
33
  lib/god/process.rb
30
34
  lib/god/registry.rb
@@ -32,6 +36,7 @@ lib/god/reporter.rb
32
36
  lib/god/server.rb
33
37
  lib/god/sugar.rb
34
38
  lib/god/system/process.rb
39
+ lib/god/timeline.rb
35
40
  lib/god/timer.rb
36
41
  lib/god/watch.rb
37
42
  test/configs/child_events/child_events.god
@@ -40,17 +45,26 @@ test/configs/child_polls/child_polls.god
40
45
  test/configs/child_polls/simple_server.rb
41
46
  test/configs/daemon_events/daemon_events.god
42
47
  test/configs/daemon_events/simple_server.rb
48
+ test/configs/daemon_polls/daemon_polls.god
49
+ test/configs/daemon_polls/simple_server.rb
50
+ test/configs/degrading_lambda/degrading_lambda.god
51
+ test/configs/degrading_lambda/tcp_server.rb
43
52
  test/configs/real.rb
53
+ test/configs/running_load/running_load.god
54
+ test/configs/stress/simple_server.rb
55
+ test/configs/stress/stress.god
44
56
  test/configs/test.rb
45
57
  test/helper.rb
46
58
  test/suite.rb
47
59
  test/test_behavior.rb
48
60
  test/test_condition.rb
49
61
  test/test_conditions_process_running.rb
62
+ test/test_dependency_graph.rb
50
63
  test/test_event_handler.rb
51
64
  test/test_god.rb
52
65
  test/test_handlers_kqueue_handler.rb
53
66
  test/test_hub.rb
67
+ test/test_logger.rb
54
68
  test/test_metric.rb
55
69
  test/test_process.rb
56
70
  test/test_registry.rb
data/Rakefile CHANGED
@@ -1,9 +1,7 @@
1
- # -*- ruby -*-
2
-
3
1
  require 'rubygems'
4
2
  require 'hoe'
5
3
 
6
- Hoe.new('god', '0.3.0') do |p|
4
+ Hoe.new('god', '0.4.0') do |p|
7
5
  p.rubyforge_name = 'god'
8
6
  p.author = 'Tom Preston-Werner'
9
7
  p.email = 'tom@rubyisawesome.com'
@@ -11,7 +9,6 @@ Hoe.new('god', '0.3.0') do |p|
11
9
  p.summary = 'Like monit, only awesome'
12
10
  p.description = "God is an easy to configure, easy to extend monitoring framework written in Ruby."
13
11
  p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
14
- # p.extra_deps << ['daemons', '>=1.0.7']
15
12
  p.spec_extras = {:extensions => ['ext/god/extconf.rb']}
16
13
  end
17
14
 
@@ -28,6 +25,4 @@ end
28
25
  desc "Upload site to Rubyforge"
29
26
  task :site_edge do
30
27
  sh "scp -r site/* mojombo@god.rubyforge.org:/var/www/gforge-projects/god/edge"
31
- end
32
-
33
- # vim: syntax=Ruby
28
+ end
data/bin/god CHANGED
@@ -14,9 +14,14 @@ Usage: god [command] [options]
14
14
 
15
15
  Commands:
16
16
  start <watch or group name>
17
+ restart <watch or group name>
17
18
  stop <watch or group name>
18
19
  monitor <watch or group name>
19
20
  unmonitor <watch or group name>
21
+ load <file>
22
+ log <watch name>
23
+ status
24
+ terminate
20
25
 
21
26
  Options:
22
27
  EOF
@@ -55,7 +60,7 @@ if options[:version]
55
60
 
56
61
  # print version
57
62
  puts "Version #{God::VERSION}"
58
- exit!
63
+ exit!(0)
59
64
  elsif options[:info]
60
65
  require 'god'
61
66
 
@@ -63,42 +68,119 @@ elsif options[:info]
63
68
  puts "Polls: enabled"
64
69
  puts "Events: " + God::EventHandler.event_system
65
70
 
66
- exit!
71
+ exit!(0)
67
72
  elsif command = ARGV[0]
68
73
  require 'god'
69
74
 
70
75
  # a command was specified
71
76
 
72
- # disable at_exit
73
- # module God; def self.at_exit; end; end
74
-
75
- # get the name of the watch/group
76
- name = ARGV[1]
77
-
78
77
  # connect to remote drb
79
78
  DRb.start_service
80
- server = DRbObject.new nil, "druby://localhost:#{options[:port]}"
79
+ server = DRbObject.new nil, "druby://127.0.0.1:#{options[:port]}"
81
80
 
82
81
  begin
82
+ server.ping
83
+ rescue DRb::DRbConnError
84
+ puts "The server is not available (or you do not have permissions to access it)"
85
+ exit!
86
+ rescue => e
87
+ puts e.message
88
+ puts e.backtrace.join("\n")
89
+ exit!
90
+ end
91
+
92
+ if command == 'load'
93
+ file = ARGV[1]
94
+
83
95
  puts "Sending '#{command}' command"
84
96
 
85
- # send command
86
- watches = server.control(name, command)
97
+ code = File.read(file)
98
+
99
+ watches = server.running_load(code)
87
100
 
88
101
  # output response
89
102
  puts 'The following watches were affected:'
90
103
  watches.each do |w|
91
104
  puts ' ' + w.name
92
105
  end
93
- rescue God::InvalidCommandError
94
- abort "Command '#{command}' is not valid. Run 'god --help' for usage"
106
+
107
+ puts "Done"
108
+ elsif command == 'status'
109
+ watches = server.status
110
+ watches.keys.sort.each do |name|
111
+ state = watches[name][:state]
112
+ puts "#{name}: #{state}"
113
+ end
114
+ elsif command == 'log'
115
+ begin
116
+ Signal.trap('INT') { exit!(0) }
117
+ name = ARGV[1]
118
+ t = Time.at(0)
119
+ loop do
120
+ print server.running_log(name, t)
121
+ t = Time.now
122
+ sleep 1
123
+ end
124
+ rescue God::NoSuchWatchError
125
+ puts "No such watch"
126
+ rescue DRb::DRbConnError
127
+ puts "The server went away"
128
+ rescue => e
129
+ puts e.message
130
+ puts e.backtrace.join("\n")
131
+ ensure
132
+ exit!(0)
133
+ end
134
+ elsif command == 'terminate'
135
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
136
+ if server.stop_all
137
+ t.kill; STDOUT.puts
138
+ puts 'Stopped all watches'
139
+ else
140
+ t.kill; STDOUT.puts
141
+ puts 'Could not stop all watches within 10 seconds'
142
+ end
143
+
144
+ begin
145
+ server.terminate
146
+ abort 'Could not stop god'
147
+ rescue DRb::DRbConnError
148
+ puts 'Stopped god'
149
+ exit!(0)
150
+ end
151
+ else
152
+ # get the name of the watch/group
153
+ name = ARGV[1]
154
+
155
+ begin
156
+ puts "Sending '#{command}' command"
157
+
158
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
159
+
160
+ # send command
161
+ watches = server.control(name, command)
162
+
163
+ # output response
164
+ t.kill; STDOUT.puts
165
+ puts 'The following watches were affected:'
166
+ watches.each do |w|
167
+ puts ' ' + w.name
168
+ end
169
+ rescue God::InvalidCommandError
170
+ abort "Command '#{command}' is not valid. Run 'god --help' for usage"
171
+ end
95
172
  end
96
173
 
97
- exit!
174
+ exit!(0)
98
175
  else
99
176
  # start god
100
177
  if !options[:daemonize]
101
178
  require 'god'
179
+
180
+ if options[:port]
181
+ God.port = options[:port]
182
+ end
183
+
102
184
  load File.expand_path(options[:config])
103
185
  else
104
186
  pid = fork do
@@ -127,11 +209,17 @@ else
127
209
  puts "Resetting file descriptors"
128
210
 
129
211
  puts "Loading config"
212
+
213
+ if options[:port]
214
+ God.port = options[:port]
215
+ end
130
216
 
131
217
  load File.expand_path(options[:config])
218
+
219
+ Signal.trap('HUP') {}
132
220
  rescue => e
133
221
  File.open('god.log', 'a') { |f| f.puts e.message + "\n" + e.backtrace }
134
- abort "!!! ERROR !!!"
222
+ abort "!!! ERROR - See god.log !!!"
135
223
  end
136
224
  end
137
225
 
@@ -141,6 +229,6 @@ else
141
229
 
142
230
  ::Process.detach pid
143
231
 
144
- exit!
232
+ exit!(0)
145
233
  end
146
234
  end
data/lib/god.rb CHANGED
@@ -1,22 +1,31 @@
1
1
  $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
2
 
3
+ # core
4
+ require 'logger'
5
+
6
+ # stdlib
3
7
  require 'syslog'
4
8
 
5
9
  # internal requires
6
10
  require 'god/errors'
7
-
11
+ require 'god/logger'
8
12
  require 'god/system/process'
13
+ require 'god/dependency_graph'
14
+ require 'god/timeline'
9
15
 
10
16
  require 'god/behavior'
11
17
  require 'god/behaviors/clean_pid_file'
18
+ require 'god/behaviors/notify_when_flapping'
12
19
 
13
20
  require 'god/condition'
14
- require 'god/conditions/timeline'
15
21
  require 'god/conditions/process_running'
16
22
  require 'god/conditions/process_exits'
23
+ require 'god/conditions/tries'
17
24
  require 'god/conditions/memory_usage'
18
25
  require 'god/conditions/cpu_usage'
19
26
  require 'god/conditions/always'
27
+ require 'god/conditions/lambda'
28
+ require 'god/conditions/degrading_lambda'
20
29
 
21
30
  require 'god/reporter'
22
31
  require 'god/server'
@@ -43,62 +52,95 @@ end
43
52
  God::EventHandler.load
44
53
 
45
54
  module God
46
- VERSION = '0.3.0'
55
+ VERSION = '0.4.0'
47
56
 
48
- class << self
49
- attr_accessor :inited, :host, :port
57
+ LOG = Logger.new
50
58
 
51
- # drb
52
- attr_accessor :server
59
+ LOG_BUFFER_SIZE_DEFAULT = 100
60
+ PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
61
+ DRB_PORT_DEFAULT = 17165
62
+ DRB_ALLOW_DEFAULT = ['127.0.0.1']
63
+
64
+ class << self
65
+ # user configurable
66
+ attr_accessor :host,
67
+ :port,
68
+ :allow,
69
+ :log_buffer_size,
70
+ :pid_file_directory
53
71
 
54
- # api
55
- attr_accessor :watches, :groups
72
+ # internal
73
+ attr_accessor :inited,
74
+ :running,
75
+ :pending_watches,
76
+ :server,
77
+ :watches,
78
+ :groups
56
79
  end
57
80
 
58
81
  def self.init
82
+ if self.inited
83
+ abort "God.init must be called before any Watches"
84
+ end
85
+
86
+ self.internal_init
87
+ end
88
+
89
+ def self.internal_init
59
90
  # only do this once
60
91
  return if self.inited
61
92
 
62
93
  # variable init
63
94
  self.watches = {}
64
95
  self.groups = {}
96
+ self.pending_watches = []
97
+
98
+ # set defaults
99
+ self.log_buffer_size = LOG_BUFFER_SIZE_DEFAULT
100
+ self.pid_file_directory = PID_FILE_DIRECTORY_DEFAULT
101
+ self.port = DRB_PORT_DEFAULT
102
+ self.allow = DRB_ALLOW_DEFAULT
65
103
 
66
104
  # yield to the config file
67
105
  yield self if block_given?
68
106
 
69
- # instantiate server
70
- self.server = Server.new(self.host, self.port)
71
-
72
107
  # init has been executed
73
108
  self.inited = true
74
- end
75
109
 
76
- # Where pid files created by god will go by default
77
- def self.pid_file_directory
78
- @pid_file_directory ||= '/var/run/god'
110
+ # not yet running
111
+ self.running = false
79
112
  end
80
-
81
- def self.pid_file_directory=(value)
82
- @pid_file_directory = value
83
- end
84
-
113
+
85
114
  # Instantiate a new, empty Watch object and pass it to the mandatory
86
115
  # block. The attributes of the watch will be set by the configuration
87
116
  # file.
88
117
  def self.watch
89
- self.init
118
+ self.internal_init
90
119
 
91
120
  w = Watch.new
92
121
  yield(w)
93
122
 
123
+ # if running, completely remove the watch (if necessary) to
124
+ # prepare for the reload
125
+ existing_watch = self.watches[w.name]
126
+ if self.running && existing_watch
127
+ self.unwatch(existing_watch)
128
+ end
129
+
94
130
  # ensure the new watch has a unique name
95
131
  if self.watches[w.name] || self.groups[w.name]
96
132
  abort "Watch name '#{w.name}' already used for a Watch or Group"
97
133
  end
98
134
 
135
+ # ensure watch is internally valid
136
+ w.valid? || abort("Watch '#{w.name}' is not valid (see above)")
137
+
99
138
  # add to list of watches
100
139
  self.watches[w.name] = w
101
140
 
141
+ # add to pending watches
142
+ self.pending_watches << w
143
+
102
144
  # add to group if specified
103
145
  if w.group
104
146
  # ensure group name hasn't been used for a watch already
@@ -107,39 +149,129 @@ module God
107
149
  end
108
150
 
109
151
  self.groups[w.group] ||= []
110
- self.groups[w.group] << w.name
152
+ self.groups[w.group] << w
111
153
  end
112
154
 
113
155
  # register watch
114
156
  w.register!
115
157
  end
116
158
 
159
+ def self.unwatch(watch)
160
+ # unmonitor
161
+ watch.unmonitor
162
+
163
+ # unregister
164
+ watch.unregister!
165
+
166
+ # remove from watches
167
+ self.watches.delete(watch.name)
168
+
169
+ # remove from groups
170
+ if watch.group
171
+ self.groups[watch.group].delete(watch)
172
+ end
173
+ end
174
+
117
175
  def self.control(name, command)
118
176
  # get the list of watches
119
177
  watches = Array(self.watches[name] || self.groups[name])
120
178
 
179
+ jobs = []
180
+
121
181
  # do the command
122
182
  case command
123
183
  when "start", "monitor"
124
- watches.each { |w| w.monitor }
184
+ watches.each { |w| jobs << Thread.new { w.monitor } }
125
185
  when "restart"
126
- watches.each { |w| w.move(:restart) }
186
+ watches.each { |w| jobs << Thread.new { w.move(:restart) } }
127
187
  when "stop"
128
- watches.each { |w| w.unmonitor.action(:stop) }
188
+ watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) } }
129
189
  when "unmonitor"
130
- watches.each { |w| w.unmonitor }
190
+ watches.each { |w| jobs << Thread.new { w.unmonitor } }
131
191
  else
132
192
  raise InvalidCommandError.new
133
193
  end
134
194
 
195
+ jobs.each { |j| j.join }
196
+
135
197
  watches
136
198
  end
199
+
200
+ def self.stop_all
201
+ self.watches.sort.each do |name, w|
202
+ Thread.new do
203
+ w.unmonitor if w.state
204
+ w.action(:stop) if w.alive?
205
+ end
206
+ end
137
207
 
138
- def self.start
139
- # make sure there's something to do
140
- if self.watches.nil? || self.watches.empty?
141
- abort "You must specify at least one watch!"
208
+ 10.times do
209
+ return true unless self.watches.map { |name, w| w.alive? }.any?
210
+ sleep 1
211
+ end
212
+
213
+ return false
214
+ end
215
+
216
+ def self.terminate
217
+ exit!(0)
218
+ end
219
+
220
+ def self.status
221
+ info = {}
222
+ self.watches.map do |name, w|
223
+ status = w.state || :unmonitored
224
+ info[name] = {:state => status}
225
+ end
226
+ info
227
+ end
228
+
229
+ def self.running_log(watch_name, since)
230
+ unless self.watches[watch_name]
231
+ raise NoSuchWatchError.new
232
+ end
233
+
234
+ LOG.watch_log_since(watch_name, since)
235
+ end
236
+
237
+ def self.running_load(code)
238
+ eval(code)
239
+ self.pending_watches.each { |w| w.monitor if w.autostart? }
240
+ watches = self.pending_watches.dup
241
+ self.pending_watches.clear
242
+ watches
243
+ end
244
+
245
+ def self.load(glob)
246
+ Dir[glob].each do |f|
247
+ Kernel.load f
248
+ end
249
+ end
250
+
251
+ def self.setup
252
+ # Make pid directory
253
+ unless test(?d, self.pid_file_directory)
254
+ begin
255
+ FileUtils.mkdir_p(self.pid_file_directory)
256
+ rescue Errno::EACCES => e
257
+ abort "Failed to create pid file directory: #{e.message}"
258
+ end
259
+ end
260
+ end
261
+
262
+ def self.validater
263
+ unless test(?w, self.pid_file_directory)
264
+ abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
142
265
  end
266
+ end
267
+
268
+ def self.start
269
+ self.internal_init
270
+ self.setup
271
+ self.validater
272
+
273
+ # instantiate server
274
+ self.server = Server.new(self.host, self.port, self.allow)
143
275
 
144
276
  # start event handler system
145
277
  EventHandler.start if EventHandler.loaded?
@@ -150,6 +282,12 @@ module God
150
282
  # start monitoring any watches set to autostart
151
283
  self.watches.values.each { |w| w.monitor if w.autostart? }
152
284
 
285
+ # clear pending watches
286
+ self.pending_watches.clear
287
+
288
+ # mark as running
289
+ self.running = true
290
+
153
291
  # join the timer thread so we don't exit
154
292
  Timer.get.join
155
293
  end
@@ -157,12 +295,6 @@ module God
157
295
  def self.at_exit
158
296
  self.start
159
297
  end
160
-
161
- def self.load(glob)
162
- Dir[glob].each do |f|
163
- Kernel.load f
164
- end
165
- end
166
298
  end
167
299
 
168
300
  at_exit do