god 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/god.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
2
 
3
+ # rubygems
4
+ require 'rubygems'
5
+
3
6
  # core
4
7
  require 'stringio'
5
8
  require 'logger'
@@ -32,6 +35,8 @@ require 'god/conditions/lambda'
32
35
  require 'god/conditions/degrading_lambda'
33
36
  require 'god/conditions/flapping'
34
37
  require 'god/conditions/http_response_code'
38
+ require 'god/conditions/disk_usage'
39
+ require 'god/conditions/complex'
35
40
 
36
41
  require 'god/contact'
37
42
  require 'god/contacts/email'
@@ -50,11 +55,19 @@ require 'god/process'
50
55
 
51
56
  require 'god/sugar'
52
57
 
58
+ require 'god/cli/version'
59
+ require 'god/cli/command'
60
+
53
61
  $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
54
62
 
63
+ # App wide logging system
55
64
  LOG = God::Logger.new
56
65
  LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
57
66
 
67
+ def applog(watch, level, text)
68
+ LOG.log(watch, level, text)
69
+ end
70
+
58
71
  # The $run global determines whether god should be started when the
59
72
  # program would normally end. This should be set to true if when god
60
73
  # should be started (e.g. `god -c <config file>`) and false otherwise
@@ -63,6 +76,7 @@ $run ||= nil
63
76
 
64
77
  GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
65
78
 
79
+ # Ensure that Syslog is open
66
80
  begin
67
81
  Syslog.open('god')
68
82
  rescue RuntimeError
@@ -74,6 +88,7 @@ def root_binding
74
88
  binding
75
89
  end
76
90
 
91
+ # Load the event handler system
77
92
  God::EventHandler.load
78
93
 
79
94
  module Kernel
@@ -81,8 +96,8 @@ module Kernel
81
96
 
82
97
  def abort(text = nil)
83
98
  $run = false
84
- LOG.log(nil, :error, text) if text
85
- text ? abort_orig(text) : exit(1)
99
+ applog(nil, :error, text) if text
100
+ exit(1)
86
101
  end
87
102
 
88
103
  alias_method :exit_orig, :exit
@@ -102,7 +117,7 @@ class Module
102
117
  end
103
118
 
104
119
  if self.running && self.inited
105
- LOG.log(nil, :warn, "God.#{arg} can't be set while god is running")
120
+ applog(nil, :warn, "God.#{arg} can't be set while god is running")
106
121
  return
107
122
  end
108
123
 
@@ -117,7 +132,7 @@ class Module
117
132
  end
118
133
 
119
134
  module God
120
- VERSION = '0.5.0'
135
+ VERSION = '0.6.0'
121
136
 
122
137
  LOG_BUFFER_SIZE_DEFAULT = 1000
123
138
  PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
@@ -126,7 +141,8 @@ module God
126
141
 
127
142
  class << self
128
143
  # user configurable
129
- safe_attr_accessor :host,
144
+ safe_attr_accessor :pid,
145
+ :host,
130
146
  :port,
131
147
  :allow,
132
148
  :log_buffer_size,
@@ -145,12 +161,16 @@ module God
145
161
  end
146
162
 
147
163
  # initialize class instance variables
164
+ self.pid = nil
148
165
  self.host = nil
149
166
  self.port = nil
150
167
  self.allow = nil
151
168
  self.log_buffer_size = nil
152
169
  self.pid_file_directory = nil
153
170
 
171
+ # Initialize internal data.
172
+ #
173
+ # Returns nothing
154
174
  def self.internal_init
155
175
  # only do this once
156
176
  return if self.inited
@@ -180,13 +200,25 @@ module God
180
200
  # Instantiate a new, empty Watch object and pass it to the mandatory
181
201
  # block. The attributes of the watch will be set by the configuration
182
202
  # file.
203
+ #
204
+ # Aborts on duplicate watch name
205
+ # invalid watch
206
+ # conflicting group name
207
+ #
208
+ # Returns nothing
183
209
  def self.watch(&block)
184
210
  self.task(Watch, &block)
185
211
  end
186
212
 
187
- # Instantiate a new, empty Task object and pass it to the mandatory
213
+ # Instantiate a new, empty Task object and yield it to the mandatory
188
214
  # block. The attributes of the task will be set by the configuration
189
215
  # file.
216
+ #
217
+ # Aborts on duplicate task name
218
+ # invalid task
219
+ # conflicting group name
220
+ #
221
+ # Returns nothing
190
222
  def self.task(klass = Task)
191
223
  self.internal_init
192
224
 
@@ -224,22 +256,26 @@ module God
224
256
  if self.watches[t.group]
225
257
  abort "Group name '#{t.group}' already used for a Task"
226
258
  end
227
-
259
+
228
260
  self.groups[t.group] ||= []
229
261
  self.groups[t.group] << t
230
262
  end
231
-
263
+
232
264
  # register watch
233
265
  t.register!
234
266
 
235
267
  # log
236
268
  if self.running && existing_watch
237
- LOG.log(t, :info, "#{t.name} Reloaded config")
269
+ applog(t, :info, "#{t.name} Reloaded config")
238
270
  elsif self.running
239
- LOG.log(t, :info, "#{t.name} Loaded config")
271
+ applog(t, :info, "#{t.name} Loaded config")
240
272
  end
241
273
  end
242
274
 
275
+ # Unmonitor and remove the given watch from god.
276
+ # +watch+ is the Watch to remove
277
+ #
278
+ # Returns nothing
243
279
  def self.unwatch(watch)
244
280
  # unmonitor
245
281
  watch.unmonitor unless watch.state == :unmonitored
@@ -256,10 +292,20 @@ module God
256
292
  end
257
293
  end
258
294
 
295
+ # Instantiate a new Contact of the given kind and send it to the block.
296
+ # Then prepare, validate, and record the Contact.
297
+ # +kind+ is the contact class specifier
298
+ #
299
+ # Aborts on invalid kind
300
+ # duplicate contact name
301
+ # invalid contact
302
+ # conflicting group name
303
+ #
304
+ # Returns nothing
259
305
  def self.contact(kind)
260
306
  self.internal_init
261
307
 
262
- # create the condition
308
+ # create the contact
263
309
  begin
264
310
  c = Contact.generate(kind)
265
311
  rescue NoSuchContactError => e
@@ -298,23 +344,38 @@ module God
298
344
  if self.contacts[c.group]
299
345
  abort "Contact Group name '#{c.group}' already used for a Contact"
300
346
  end
301
-
347
+
302
348
  self.contact_groups[c.group] ||= []
303
349
  self.contact_groups[c.group] << c
304
350
  end
305
351
  end
306
352
 
353
+ # Remove the given contact from god.
354
+ # +contact+ is the Contact to remove
355
+ #
356
+ # Returns nothing
307
357
  def self.uncontact(contact)
308
358
  self.contacts.delete(contact.name)
309
359
  if contact.group
310
360
  self.contact_groups[contact.group].delete(contact)
311
361
  end
312
362
  end
313
-
363
+
364
+ # Control the lifecycle of the given task(s).
365
+ # +name+ is the name of a task/group (String)
366
+ # +command+ is the command to run (String)
367
+ # one of: "start"
368
+ # "monitor"
369
+ # "restart"
370
+ # "stop"
371
+ # "unmonitor"
372
+ # "remove"
373
+ #
374
+ # Returns String[]:task_names
314
375
  def self.control(name, command)
315
376
  # get the list of watches
316
377
  watches = Array(self.watches[name] || self.groups[name])
317
-
378
+
318
379
  jobs = []
319
380
 
320
381
  # do the command
@@ -327,6 +388,8 @@ module God
327
388
  watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
328
389
  when "unmonitor"
329
390
  watches.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
391
+ when "remove"
392
+ watches.each { |w| jobs << Thread.new { self.unwatch(w) } }
330
393
  else
331
394
  raise InvalidCommandError.new
332
395
  end
@@ -336,6 +399,10 @@ module God
336
399
  watches.map { |x| x.name }
337
400
  end
338
401
 
402
+ # Unmonitor and stop all tasks.
403
+ #
404
+ # Returns true on success
405
+ # false if all tasks could not be stopped within 10 seconds
339
406
  def self.stop_all
340
407
  self.watches.sort.each do |name, w|
341
408
  Thread.new do
@@ -352,10 +419,25 @@ module God
352
419
  return false
353
420
  end
354
421
 
422
+ # Force the termination of god.
423
+ # * Clean up pid file if one exists
424
+ # * Stop DRb service
425
+ # * Hard exit using exit!
426
+ #
427
+ # Never returns because the process will no longer exist!
355
428
  def self.terminate
429
+ FileUtils.rm_f(self.pid) if self.pid
430
+ self.server.stop if self.server
356
431
  exit!(0)
357
432
  end
358
433
 
434
+ # Gather the status of each task.
435
+ #
436
+ # Examples
437
+ # God.status
438
+ # # => { 'mongrel' => :up, 'nginx' => :up }
439
+ #
440
+ # Returns { String:task_name => Symbol:status, ... }
359
441
  def self.status
360
442
  info = {}
361
443
  self.watches.map do |name, w|
@@ -364,14 +446,29 @@ module God
364
446
  info
365
447
  end
366
448
 
449
+ # Log lines for the given task since the specified time.
450
+ # +watch_name+ is the name of the task (may be abbreviated)
451
+ # +since+ is the Time since which to report log lines
452
+ #
453
+ # Raises God::NoSuchWatchError if no tasks matched
454
+ #
455
+ # Returns String:joined_log_lines
367
456
  def self.running_log(watch_name, since)
368
- unless self.watches[watch_name]
457
+ matches = pattern_match(watch_name, self.watches.keys)
458
+
459
+ unless matches.first
369
460
  raise NoSuchWatchError.new
370
461
  end
371
462
 
372
- LOG.watch_log_since(watch_name, since)
463
+ LOG.watch_log_since(matches.first, since)
373
464
  end
374
465
 
466
+ # Load a config file into a running god instance. Rescues any exceptions
467
+ # that the config may raise and reports these back to the caller.
468
+ # +code+ is a String containing the config file
469
+ # +filename+ is the filename of the config file
470
+ #
471
+ # Returns [String[]:task_names, String:errors]
375
472
  def self.running_load(code, filename)
376
473
  errors = ""
377
474
  watches = []
@@ -379,6 +476,7 @@ module God
379
476
  begin
380
477
  LOG.start_capture
381
478
 
479
+ Gem.clear_paths
382
480
  eval(code, root_binding, filename)
383
481
  self.pending_watches.each do |w|
384
482
  if previous_state = self.pending_watch_states[w.name]
@@ -404,6 +502,10 @@ module God
404
502
  [names, errors]
405
503
  end
406
504
 
505
+ # Load the given file(s) according to the given glob.
506
+ # +glob+ is the glob-enabled path to load
507
+ #
508
+ # Returns nothing
407
509
  def self.load(glob)
408
510
  Dir[glob].each do |f|
409
511
  Kernel.load f
@@ -420,13 +522,16 @@ module God
420
522
  end
421
523
  end
422
524
  end
423
-
525
+
424
526
  def self.validater
425
527
  unless test(?w, self.pid_file_directory)
426
528
  abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
427
529
  end
428
530
  end
429
531
 
532
+ # Initialize and startup the machinery that makes god work.
533
+ #
534
+ # Returns nothing
430
535
  def self.start
431
536
  self.internal_init
432
537
  self.setup
@@ -454,11 +559,42 @@ module God
454
559
  Timer.get.join
455
560
  end
456
561
 
562
+ # To be called on program exit to start god
563
+ #
564
+ # Returns nothing
457
565
  def self.at_exit
458
566
  self.start
459
567
  end
568
+
569
+ # private
570
+
571
+ # Match a shortened pattern against a list of String candidates.
572
+ # The pattern is expanded into a regular expression by
573
+ # inserting .* between each character.
574
+ # +pattern+ is the String containing the abbreviation
575
+ # +list+ is the Array of Strings to match against
576
+ #
577
+ # Examples
578
+ #
579
+ # list = %w{ foo bar bars }
580
+ # pattern = 'br'
581
+ # God.pattern_match(list, pattern)
582
+ # # => ['bar', 'bars']
583
+ #
584
+ # Returns String[]:matched_elements
585
+ def self.pattern_match(pattern, list)
586
+ regex = pattern.split('').join('.*')
587
+
588
+ list.select do |item|
589
+ item =~ Regexp.new(regex)
590
+ end
591
+ end
460
592
  end
461
593
 
594
+ # Runs immediately before the program exits. If $run is true,
595
+ # start god, if $run is false, exit normally.
596
+ #
597
+ # Returns nothing
462
598
  at_exit do
463
599
  God.at_exit if $run
464
600
  end
@@ -0,0 +1,189 @@
1
+ module God
2
+ module CLI
3
+
4
+ class Command
5
+ def initialize(command, options, args)
6
+ @command = command
7
+ @options = options
8
+ @args = args
9
+
10
+ dispatch
11
+ end
12
+
13
+ def setup
14
+ # connect to drb unix socket
15
+ DRb.start_service
16
+ @server = DRbObject.new(nil, God::Socket.socket(@options[:port]))
17
+
18
+ # ping server to ensure that it is responsive
19
+ begin
20
+ @server.ping
21
+ rescue DRb::DRbConnError
22
+ puts "The server is not available (or you do not have permissions to access it)"
23
+ abort
24
+ end
25
+ end
26
+
27
+ def dispatch
28
+ if %w{load status log quit terminate}.include?(@command)
29
+ setup
30
+ send("#{@command}_command")
31
+ elsif %w{start stop restart monitor unmonitor remove}.include?(@command)
32
+ setup
33
+ lifecycle_command
34
+ elsif @command == 'check'
35
+ check_command
36
+ else
37
+ puts "Command '#{@command}' is not valid. Run 'god --help' for usage"
38
+ abort
39
+ end
40
+ end
41
+
42
+ def load_command
43
+ file = @args[1]
44
+
45
+ puts "Sending '#{@command}' command"
46
+ puts
47
+
48
+ unless File.exist?(file)
49
+ abort "File not found: #{file}"
50
+ end
51
+
52
+ names, errors = *@server.running_load(File.read(file), File.expand_path(file))
53
+
54
+ # output response
55
+ unless names.empty?
56
+ puts 'The following tasks were affected:'
57
+ names.each do |w|
58
+ puts ' ' + w
59
+ end
60
+ end
61
+
62
+ unless errors.empty?
63
+ puts errors
64
+ exit(1)
65
+ end
66
+ end
67
+
68
+ def status_command
69
+ watches = @server.status
70
+ watches.keys.sort.each do |name|
71
+ state = watches[name][:state]
72
+ puts "#{name}: #{state}"
73
+ end
74
+ end
75
+
76
+ def log_command
77
+ begin
78
+ Signal.trap('INT') { exit }
79
+ name = @args[1]
80
+ t = Time.at(0)
81
+ loop do
82
+ print @server.running_log(name, t)
83
+ t = Time.now
84
+ sleep 1
85
+ end
86
+ rescue God::NoSuchWatchError
87
+ puts "No such watch"
88
+ rescue DRb::DRbConnError
89
+ puts "The server went away"
90
+ end
91
+ end
92
+
93
+ def quit_command
94
+ begin
95
+ @server.terminate
96
+ abort 'Could not stop god'
97
+ rescue DRb::DRbConnError
98
+ puts 'Stopped god'
99
+ end
100
+ end
101
+
102
+ def terminate_command
103
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
104
+ if @server.stop_all
105
+ t.kill; STDOUT.puts
106
+ puts 'Stopped all watches'
107
+ else
108
+ t.kill; STDOUT.puts
109
+ puts 'Could not stop all watches within 10 seconds'
110
+ end
111
+
112
+ begin
113
+ @server.terminate
114
+ abort 'Could not stop god'
115
+ rescue DRb::DRbConnError
116
+ puts 'Stopped god'
117
+ end
118
+ end
119
+
120
+ def check_command
121
+ Thread.new do
122
+ begin
123
+ event_system = God::EventHandler.event_system
124
+ puts "using event system: #{event_system}"
125
+
126
+ if God::EventHandler.loaded?
127
+ puts "starting event handler"
128
+ God::EventHandler.start
129
+ else
130
+ puts "[fail] event system did not load"
131
+ exit(1)
132
+ end
133
+
134
+ puts 'forking off new process'
135
+
136
+ pid = fork do
137
+ loop { sleep(1) }
138
+ end
139
+
140
+ puts "forked process with pid = #{pid}"
141
+
142
+ God::EventHandler.register(pid, :proc_exit) do
143
+ puts "[ok] process exit event received"
144
+ exit(0)
145
+ end
146
+
147
+ sleep(1)
148
+
149
+ puts "killing process"
150
+
151
+ ::Process.kill('KILL', pid)
152
+ rescue => e
153
+ puts e.message
154
+ puts e.backtrace.join("\n")
155
+ end
156
+ end
157
+
158
+ sleep(2)
159
+
160
+ puts "[fail] never received process exit event"
161
+ exit(1)
162
+ end
163
+
164
+ def lifecycle_command
165
+ # get the name of the watch/group
166
+ name = @args[1]
167
+
168
+ puts "Sending '#{@command}' command"
169
+
170
+ t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
171
+
172
+ # send @command
173
+ watches = @server.control(name, @command)
174
+
175
+ # output response
176
+ t.kill; STDOUT.puts
177
+ unless watches.empty?
178
+ puts 'The following watches were affected:'
179
+ watches.each do |w|
180
+ puts ' ' + w
181
+ end
182
+ else
183
+ puts 'No matching task or group'
184
+ end
185
+ end
186
+ end # Command
187
+
188
+ end
189
+ end