god 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/History.txt +26 -0
  2. data/Manifest.txt +15 -1
  3. data/Rakefile +2 -7
  4. data/bin/god +104 -16
  5. data/lib/god.rb +169 -37
  6. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  7. data/lib/god/condition.rb +1 -0
  8. data/lib/god/conditions/degrading_lambda.rb +47 -0
  9. data/lib/god/conditions/process_exits.rb +6 -2
  10. data/lib/god/conditions/tries.rb +33 -0
  11. data/lib/god/dependency_graph.rb +41 -0
  12. data/lib/god/errors.rb +6 -0
  13. data/lib/god/hub.rb +43 -20
  14. data/lib/god/logger.rb +44 -0
  15. data/lib/god/process.rb +91 -19
  16. data/lib/god/registry.rb +4 -0
  17. data/lib/god/server.rb +12 -2
  18. data/lib/god/timeline.rb +36 -0
  19. data/lib/god/watch.rb +27 -8
  20. data/test/configs/child_events/child_events.god +7 -2
  21. data/test/configs/child_polls/child_polls.god +3 -1
  22. data/test/configs/child_polls/simple_server.rb +1 -1
  23. data/test/configs/daemon_events/daemon_events.god +7 -3
  24. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  25. data/test/configs/daemon_polls/simple_server.rb +6 -0
  26. data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
  27. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  28. data/test/configs/real.rb +1 -1
  29. data/test/configs/running_load/running_load.god +16 -0
  30. data/test/configs/stress/simple_server.rb +3 -0
  31. data/test/configs/stress/stress.god +15 -0
  32. data/test/configs/test.rb +14 -2
  33. data/test/helper.rb +12 -2
  34. data/test/test_conditions_tries.rb +46 -0
  35. data/test/test_dependency_graph.rb +62 -0
  36. data/test/test_god.rb +289 -33
  37. data/test/test_handlers_kqueue_handler.rb +11 -7
  38. data/test/test_hub.rb +18 -0
  39. data/test/test_logger.rb +55 -0
  40. data/test/test_process.rb +135 -17
  41. data/test/test_registry.rb +2 -1
  42. data/test/test_server.rb +35 -4
  43. data/test/test_timeline.rb +14 -2
  44. data/test/test_watch.rb +7 -0
  45. metadata +21 -4
  46. data/lib/god/conditions/timeline.rb +0 -17
data/History.txt CHANGED
@@ -1,3 +1,29 @@
1
+ == 0.4.0
2
+
3
+ * Major Enhancements
4
+ * Add the ability for conditions to override transition state (for exceptional cases)
5
+ * Implement dynamic load of config files while god is running (god load <filename>)
6
+ * Add ability to save auto-daemonized process output to a log file
7
+ * Add robust default stop lambda command for auto-daemonized processes (inspired by _eric)
8
+ * Add status command for god binary (shows status of each watch)
9
+ * Create proper logger with timestamps
10
+ * Add log command to god binary to get real time logs for a specific watch from a running god instance
11
+ * Add terminate command for god binary (stop god and all watches)
12
+ * Minor Enhancements
13
+ * Enforce validity of Watches
14
+ * Enforce that God.init is not called after a Watch
15
+ * Move pid_file_directory creation and validation to God.start
16
+ * Remove check for at least one Watch during startup (now that dynamic loading exists)
17
+ * New Conditions
18
+ * Tries < PollCondition - triggers after the specified number of tries
19
+ * Add :notify_when_flapping behavior to check for oscillation [kevinclark]
20
+ * Add :degrading_lambda condition. [kevinclark]
21
+ It uses a decaying interval (1/2 rate) for 3 cycles before failing.
22
+ * Bug Fixes
23
+ * Use exit!(0) instead of exit! in god binary to exit with code 0 (instead of default -1)
24
+ * Command line group control fixed
25
+ * Fix cross-thread return problem (use exit instead)
26
+
1
27
  == 0.3.0 / 2007-08-17
2
28
 
3
29
  * Fix netlink header problem on Ubuntu Edgy [Dan Sully]
data/Manifest.txt CHANGED
@@ -11,20 +11,24 @@ ext/god/netlink_handler.c
11
11
  lib/god.rb
12
12
  lib/god/behavior.rb
13
13
  lib/god/behaviors/clean_pid_file.rb
14
+ lib/god/behaviors/notify_when_flapping.rb
14
15
  lib/god/condition.rb
15
16
  lib/god/conditions/always.rb
16
17
  lib/god/conditions/cpu_usage.rb
18
+ lib/god/conditions/degrading_lambda.rb
17
19
  lib/god/conditions/lambda.rb
18
20
  lib/god/conditions/memory_usage.rb
19
21
  lib/god/conditions/process_exits.rb
20
22
  lib/god/conditions/process_running.rb
21
- lib/god/conditions/timeline.rb
23
+ lib/god/conditions/tries.rb
24
+ lib/god/dependency_graph.rb
22
25
  lib/god/errors.rb
23
26
  lib/god/event_handler.rb
24
27
  lib/god/event_handlers/dummy_handler.rb
25
28
  lib/god/event_handlers/kqueue_handler.rb
26
29
  lib/god/event_handlers/netlink_handler.rb
27
30
  lib/god/hub.rb
31
+ lib/god/logger.rb
28
32
  lib/god/metric.rb
29
33
  lib/god/process.rb
30
34
  lib/god/registry.rb
@@ -32,6 +36,7 @@ lib/god/reporter.rb
32
36
  lib/god/server.rb
33
37
  lib/god/sugar.rb
34
38
  lib/god/system/process.rb
39
+ lib/god/timeline.rb
35
40
  lib/god/timer.rb
36
41
  lib/god/watch.rb
37
42
  test/configs/child_events/child_events.god
@@ -40,17 +45,26 @@ test/configs/child_polls/child_polls.god
40
45
  test/configs/child_polls/simple_server.rb
41
46
  test/configs/daemon_events/daemon_events.god
42
47
  test/configs/daemon_events/simple_server.rb
48
+ test/configs/daemon_polls/daemon_polls.god
49
+ test/configs/daemon_polls/simple_server.rb
50
+ test/configs/degrading_lambda/degrading_lambda.god
51
+ test/configs/degrading_lambda/tcp_server.rb
43
52
  test/configs/real.rb
53
+ test/configs/running_load/running_load.god
54
+ test/configs/stress/simple_server.rb
55
+ test/configs/stress/stress.god
44
56
  test/configs/test.rb
45
57
  test/helper.rb
46
58
  test/suite.rb
47
59
  test/test_behavior.rb
48
60
  test/test_condition.rb
49
61
  test/test_conditions_process_running.rb
62
+ test/test_dependency_graph.rb
50
63
  test/test_event_handler.rb
51
64
  test/test_god.rb
52
65
  test/test_handlers_kqueue_handler.rb
53
66
  test/test_hub.rb
67
+ test/test_logger.rb
54
68
  test/test_metric.rb
55
69
  test/test_process.rb
56
70
  test/test_registry.rb
data/Rakefile CHANGED
@@ -1,9 +1,7 @@
1
- # -*- ruby -*-
2
-
3
1
  require 'rubygems'
4
2
  require 'hoe'
5
3
 
6
- Hoe.new('god', '0.3.0') do |p|
4
+ Hoe.new('god', '0.4.0') do |p|
7
5
  p.rubyforge_name = 'god'
8
6
  p.author = 'Tom Preston-Werner'
9
7
  p.email = 'tom@rubyisawesome.com'
@@ -11,7 +9,6 @@ Hoe.new('god', '0.3.0') do |p|
11
9
  p.summary = 'Like monit, only awesome'
12
10
  p.description = "God is an easy to configure, easy to extend monitoring framework written in Ruby."
13
11
  p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
14
- # p.extra_deps << ['daemons', '>=1.0.7']
15
12
  p.spec_extras = {:extensions => ['ext/god/extconf.rb']}
16
13
  end
17
14
 
@@ -28,6 +25,4 @@ end
28
25
  desc "Upload site to Rubyforge"
29
26
  task :site_edge do
30
27
  sh "scp -r site/* mojombo@god.rubyforge.org:/var/www/gforge-projects/god/edge"
31
- end
32
-
33
- # vim: syntax=Ruby
28
+ end
data/bin/god CHANGED
@@ -14,9 +14,14 @@ Usage: god [command] [options]
14
14
 
15
15
  Commands:
16
16
  start <watch or group name>
17
+ restart <watch or group name>
17
18
  stop <watch or group name>
18
19
  monitor <watch or group name>
19
20
  unmonitor <watch or group name>
21
+ load <file>
22
+ log <watch name>
23
+ status
24
+ terminate
20
25
 
21
26
  Options:
22
27
  EOF
@@ -55,7 +60,7 @@ if options[:version]
55
60
 
56
61
  # print version
57
62
  puts "Version #{God::VERSION}"
58
- exit!
63
+ exit!(0)
59
64
  elsif options[:info]
60
65
  require 'god'
61
66
 
@@ -63,42 +68,119 @@ elsif options[:info]
63
68
  puts "Polls: enabled"
64
69
  puts "Events: " + God::EventHandler.event_system
65
70
 
66
- exit!
71
+ exit!(0)
67
72
  elsif command = ARGV[0]
68
73
  require 'god'
69
74
 
70
75
  # a command was specified
71
76
 
72
- # disable at_exit
73
- # module God; def self.at_exit; end; end
74
-
75
- # get the name of the watch/group
76
- name = ARGV[1]
77
-
78
77
  # connect to remote drb
79
78
  DRb.start_service
80
- server = DRbObject.new nil, "druby://localhost:#{options[:port]}"
79
+ server = DRbObject.new nil, "druby://127.0.0.1:#{options[:port]}"
81
80
 
82
81
  begin
82
+ server.ping
83
+ rescue DRb::DRbConnError
84
+ puts "The server is not available (or you do not have permissions to access it)"
85
+ exit!
86
+ rescue => e
87
+ puts e.message
88
+ puts e.backtrace.join("\n")
89
+ exit!
90
+ end
91
+
92
+ if command == 'load'
93
+ file = ARGV[1]
94
+
83
95
  puts "Sending '#{command}' command"
84
96
 
85
- # send command
86
- watches = server.control(name, command)
97
+ code = File.read(file)
98
+
99
+ watches = server.running_load(code)
87
100
 
88
101
  # output response
89
102
  puts 'The following watches were affected:'
90
103
  watches.each do |w|
91
104
  puts ' ' + w.name
92
105
  end
93
- rescue God::InvalidCommandError
94
- abort "Command '#{command}' is not valid. Run 'god --help' for usage"
106
+
107
+ puts "Done"
108
+ elsif command == 'status'
109
+ watches = server.status
110
+ watches.keys.sort.each do |name|
111
+ state = watches[name][:state]
112
+ puts "#{name}: #{state}"
113
+ end
114
+ elsif command == 'log'
115
+ begin
116
+ Signal.trap('INT') { exit!(0) }
117
+ name = ARGV[1]
118
+ t = Time.at(0)
119
+ loop do
120
+ print server.running_log(name, t)
121
+ t = Time.now
122
+ sleep 1
123
+ end
124
+ rescue God::NoSuchWatchError
125
+ puts "No such watch"
126
+ rescue DRb::DRbConnError
127
+ puts "The server went away"
128
+ rescue => e
129
+ puts e.message
130
+ puts e.backtrace.join("\n")
131
+ ensure
132
+ exit!(0)
133
+ end
134
+ elsif command == 'terminate'
135
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
136
+ if server.stop_all
137
+ t.kill; STDOUT.puts
138
+ puts 'Stopped all watches'
139
+ else
140
+ t.kill; STDOUT.puts
141
+ puts 'Could not stop all watches within 10 seconds'
142
+ end
143
+
144
+ begin
145
+ server.terminate
146
+ abort 'Could not stop god'
147
+ rescue DRb::DRbConnError
148
+ puts 'Stopped god'
149
+ exit!(0)
150
+ end
151
+ else
152
+ # get the name of the watch/group
153
+ name = ARGV[1]
154
+
155
+ begin
156
+ puts "Sending '#{command}' command"
157
+
158
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
159
+
160
+ # send command
161
+ watches = server.control(name, command)
162
+
163
+ # output response
164
+ t.kill; STDOUT.puts
165
+ puts 'The following watches were affected:'
166
+ watches.each do |w|
167
+ puts ' ' + w.name
168
+ end
169
+ rescue God::InvalidCommandError
170
+ abort "Command '#{command}' is not valid. Run 'god --help' for usage"
171
+ end
95
172
  end
96
173
 
97
- exit!
174
+ exit!(0)
98
175
  else
99
176
  # start god
100
177
  if !options[:daemonize]
101
178
  require 'god'
179
+
180
+ if options[:port]
181
+ God.port = options[:port]
182
+ end
183
+
102
184
  load File.expand_path(options[:config])
103
185
  else
104
186
  pid = fork do
@@ -127,11 +209,17 @@ else
127
209
  puts "Resetting file descriptors"
128
210
 
129
211
  puts "Loading config"
212
+
213
+ if options[:port]
214
+ God.port = options[:port]
215
+ end
130
216
 
131
217
  load File.expand_path(options[:config])
218
+
219
+ Signal.trap('HUP') {}
132
220
  rescue => e
133
221
  File.open('god.log', 'a') { |f| f.puts e.message + "\n" + e.backtrace }
134
- abort "!!! ERROR !!!"
222
+ abort "!!! ERROR - See god.log !!!"
135
223
  end
136
224
  end
137
225
 
@@ -141,6 +229,6 @@ else
141
229
 
142
230
  ::Process.detach pid
143
231
 
144
- exit!
232
+ exit!(0)
145
233
  end
146
234
  end
data/lib/god.rb CHANGED
@@ -1,22 +1,31 @@
1
1
  $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
2
 
3
+ # core
4
+ require 'logger'
5
+
6
+ # stdlib
3
7
  require 'syslog'
4
8
 
5
9
  # internal requires
6
10
  require 'god/errors'
7
-
11
+ require 'god/logger'
8
12
  require 'god/system/process'
13
+ require 'god/dependency_graph'
14
+ require 'god/timeline'
9
15
 
10
16
  require 'god/behavior'
11
17
  require 'god/behaviors/clean_pid_file'
18
+ require 'god/behaviors/notify_when_flapping'
12
19
 
13
20
  require 'god/condition'
14
- require 'god/conditions/timeline'
15
21
  require 'god/conditions/process_running'
16
22
  require 'god/conditions/process_exits'
23
+ require 'god/conditions/tries'
17
24
  require 'god/conditions/memory_usage'
18
25
  require 'god/conditions/cpu_usage'
19
26
  require 'god/conditions/always'
27
+ require 'god/conditions/lambda'
28
+ require 'god/conditions/degrading_lambda'
20
29
 
21
30
  require 'god/reporter'
22
31
  require 'god/server'
@@ -43,62 +52,95 @@ end
43
52
  God::EventHandler.load
44
53
 
45
54
  module God
46
- VERSION = '0.3.0'
55
+ VERSION = '0.4.0'
47
56
 
48
- class << self
49
- attr_accessor :inited, :host, :port
57
+ LOG = Logger.new
50
58
 
51
- # drb
52
- attr_accessor :server
59
+ LOG_BUFFER_SIZE_DEFAULT = 100
60
+ PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
61
+ DRB_PORT_DEFAULT = 17165
62
+ DRB_ALLOW_DEFAULT = ['127.0.0.1']
63
+
64
+ class << self
65
+ # user configurable
66
+ attr_accessor :host,
67
+ :port,
68
+ :allow,
69
+ :log_buffer_size,
70
+ :pid_file_directory
53
71
 
54
- # api
55
- attr_accessor :watches, :groups
72
+ # internal
73
+ attr_accessor :inited,
74
+ :running,
75
+ :pending_watches,
76
+ :server,
77
+ :watches,
78
+ :groups
56
79
  end
57
80
 
58
81
  def self.init
82
+ if self.inited
83
+ abort "God.init must be called before any Watches"
84
+ end
85
+
86
+ self.internal_init
87
+ end
88
+
89
+ def self.internal_init
59
90
  # only do this once
60
91
  return if self.inited
61
92
 
62
93
  # variable init
63
94
  self.watches = {}
64
95
  self.groups = {}
96
+ self.pending_watches = []
97
+
98
+ # set defaults
99
+ self.log_buffer_size = LOG_BUFFER_SIZE_DEFAULT
100
+ self.pid_file_directory = PID_FILE_DIRECTORY_DEFAULT
101
+ self.port = DRB_PORT_DEFAULT
102
+ self.allow = DRB_ALLOW_DEFAULT
65
103
 
66
104
  # yield to the config file
67
105
  yield self if block_given?
68
106
 
69
- # instantiate server
70
- self.server = Server.new(self.host, self.port)
71
-
72
107
  # init has been executed
73
108
  self.inited = true
74
- end
75
109
 
76
- # Where pid files created by god will go by default
77
- def self.pid_file_directory
78
- @pid_file_directory ||= '/var/run/god'
110
+ # not yet running
111
+ self.running = false
79
112
  end
80
-
81
- def self.pid_file_directory=(value)
82
- @pid_file_directory = value
83
- end
84
-
113
+
85
114
  # Instantiate a new, empty Watch object and pass it to the mandatory
86
115
  # block. The attributes of the watch will be set by the configuration
87
116
  # file.
88
117
  def self.watch
89
- self.init
118
+ self.internal_init
90
119
 
91
120
  w = Watch.new
92
121
  yield(w)
93
122
 
123
+ # if running, completely remove the watch (if necessary) to
124
+ # prepare for the reload
125
+ existing_watch = self.watches[w.name]
126
+ if self.running && existing_watch
127
+ self.unwatch(existing_watch)
128
+ end
129
+
94
130
  # ensure the new watch has a unique name
95
131
  if self.watches[w.name] || self.groups[w.name]
96
132
  abort "Watch name '#{w.name}' already used for a Watch or Group"
97
133
  end
98
134
 
135
+ # ensure watch is internally valid
136
+ w.valid? || abort("Watch '#{w.name}' is not valid (see above)")
137
+
99
138
  # add to list of watches
100
139
  self.watches[w.name] = w
101
140
 
141
+ # add to pending watches
142
+ self.pending_watches << w
143
+
102
144
  # add to group if specified
103
145
  if w.group
104
146
  # ensure group name hasn't been used for a watch already
@@ -107,39 +149,129 @@ module God
107
149
  end
108
150
 
109
151
  self.groups[w.group] ||= []
110
- self.groups[w.group] << w.name
152
+ self.groups[w.group] << w
111
153
  end
112
154
 
113
155
  # register watch
114
156
  w.register!
115
157
  end
116
158
 
159
+ def self.unwatch(watch)
160
+ # unmonitor
161
+ watch.unmonitor
162
+
163
+ # unregister
164
+ watch.unregister!
165
+
166
+ # remove from watches
167
+ self.watches.delete(watch.name)
168
+
169
+ # remove from groups
170
+ if watch.group
171
+ self.groups[watch.group].delete(watch)
172
+ end
173
+ end
174
+
117
175
  def self.control(name, command)
118
176
  # get the list of watches
119
177
  watches = Array(self.watches[name] || self.groups[name])
120
178
 
179
+ jobs = []
180
+
121
181
  # do the command
122
182
  case command
123
183
  when "start", "monitor"
124
- watches.each { |w| w.monitor }
184
+ watches.each { |w| jobs << Thread.new { w.monitor } }
125
185
  when "restart"
126
- watches.each { |w| w.move(:restart) }
186
+ watches.each { |w| jobs << Thread.new { w.move(:restart) } }
127
187
  when "stop"
128
- watches.each { |w| w.unmonitor.action(:stop) }
188
+ watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) } }
129
189
  when "unmonitor"
130
- watches.each { |w| w.unmonitor }
190
+ watches.each { |w| jobs << Thread.new { w.unmonitor } }
131
191
  else
132
192
  raise InvalidCommandError.new
133
193
  end
134
194
 
195
+ jobs.each { |j| j.join }
196
+
135
197
  watches
136
198
  end
199
+
200
+ def self.stop_all
201
+ self.watches.sort.each do |name, w|
202
+ Thread.new do
203
+ w.unmonitor if w.state
204
+ w.action(:stop) if w.alive?
205
+ end
206
+ end
137
207
 
138
- def self.start
139
- # make sure there's something to do
140
- if self.watches.nil? || self.watches.empty?
141
- abort "You must specify at least one watch!"
208
+ 10.times do
209
+ return true unless self.watches.map { |name, w| w.alive? }.any?
210
+ sleep 1
211
+ end
212
+
213
+ return false
214
+ end
215
+
216
+ def self.terminate
217
+ exit!(0)
218
+ end
219
+
220
+ def self.status
221
+ info = {}
222
+ self.watches.map do |name, w|
223
+ status = w.state || :unmonitored
224
+ info[name] = {:state => status}
225
+ end
226
+ info
227
+ end
228
+
229
+ def self.running_log(watch_name, since)
230
+ unless self.watches[watch_name]
231
+ raise NoSuchWatchError.new
232
+ end
233
+
234
+ LOG.watch_log_since(watch_name, since)
235
+ end
236
+
237
+ def self.running_load(code)
238
+ eval(code)
239
+ self.pending_watches.each { |w| w.monitor if w.autostart? }
240
+ watches = self.pending_watches.dup
241
+ self.pending_watches.clear
242
+ watches
243
+ end
244
+
245
+ def self.load(glob)
246
+ Dir[glob].each do |f|
247
+ Kernel.load f
248
+ end
249
+ end
250
+
251
+ def self.setup
252
+ # Make pid directory
253
+ unless test(?d, self.pid_file_directory)
254
+ begin
255
+ FileUtils.mkdir_p(self.pid_file_directory)
256
+ rescue Errno::EACCES => e
257
+ abort "Failed to create pid file directory: #{e.message}"
258
+ end
259
+ end
260
+ end
261
+
262
+ def self.validater
263
+ unless test(?w, self.pid_file_directory)
264
+ abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
142
265
  end
266
+ end
267
+
268
+ def self.start
269
+ self.internal_init
270
+ self.setup
271
+ self.validater
272
+
273
+ # instantiate server
274
+ self.server = Server.new(self.host, self.port, self.allow)
143
275
 
144
276
  # start event handler system
145
277
  EventHandler.start if EventHandler.loaded?
@@ -150,6 +282,12 @@ module God
150
282
  # start monitoring any watches set to autostart
151
283
  self.watches.values.each { |w| w.monitor if w.autostart? }
152
284
 
285
+ # clear pending watches
286
+ self.pending_watches.clear
287
+
288
+ # mark as running
289
+ self.running = true
290
+
153
291
  # join the timer thread so we don't exit
154
292
  Timer.get.join
155
293
  end
@@ -157,12 +295,6 @@ module God
157
295
  def self.at_exit
158
296
  self.start
159
297
  end
160
-
161
- def self.load(glob)
162
- Dir[glob].each do |f|
163
- Kernel.load f
164
- end
165
- end
166
298
  end
167
299
 
168
300
  at_exit do