god 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/god.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
2
 
3
+ # rubygems
4
+ require 'rubygems'
5
+
3
6
  # core
4
7
  require 'stringio'
5
8
  require 'logger'
@@ -32,6 +35,8 @@ require 'god/conditions/lambda'
32
35
  require 'god/conditions/degrading_lambda'
33
36
  require 'god/conditions/flapping'
34
37
  require 'god/conditions/http_response_code'
38
+ require 'god/conditions/disk_usage'
39
+ require 'god/conditions/complex'
35
40
 
36
41
  require 'god/contact'
37
42
  require 'god/contacts/email'
@@ -50,11 +55,19 @@ require 'god/process'
50
55
 
51
56
  require 'god/sugar'
52
57
 
58
+ require 'god/cli/version'
59
+ require 'god/cli/command'
60
+
53
61
  $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
54
62
 
63
+ # App wide logging system
55
64
  LOG = God::Logger.new
56
65
  LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
57
66
 
67
+ def applog(watch, level, text)
68
+ LOG.log(watch, level, text)
69
+ end
70
+
58
71
  # The $run global determines whether god should be started when the
59
72
  # program would normally end. This should be set to true if when god
60
73
  # should be started (e.g. `god -c <config file>`) and false otherwise
@@ -63,6 +76,7 @@ $run ||= nil
63
76
 
64
77
  GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
65
78
 
79
+ # Ensure that Syslog is open
66
80
  begin
67
81
  Syslog.open('god')
68
82
  rescue RuntimeError
@@ -74,6 +88,7 @@ def root_binding
74
88
  binding
75
89
  end
76
90
 
91
+ # Load the event handler system
77
92
  God::EventHandler.load
78
93
 
79
94
  module Kernel
@@ -81,8 +96,8 @@ module Kernel
81
96
 
82
97
  def abort(text = nil)
83
98
  $run = false
84
- LOG.log(nil, :error, text) if text
85
- text ? abort_orig(text) : exit(1)
99
+ applog(nil, :error, text) if text
100
+ exit(1)
86
101
  end
87
102
 
88
103
  alias_method :exit_orig, :exit
@@ -102,7 +117,7 @@ class Module
102
117
  end
103
118
 
104
119
  if self.running && self.inited
105
- LOG.log(nil, :warn, "God.#{arg} can't be set while god is running")
120
+ applog(nil, :warn, "God.#{arg} can't be set while god is running")
106
121
  return
107
122
  end
108
123
 
@@ -117,7 +132,7 @@ class Module
117
132
  end
118
133
 
119
134
  module God
120
- VERSION = '0.5.0'
135
+ VERSION = '0.6.0'
121
136
 
122
137
  LOG_BUFFER_SIZE_DEFAULT = 1000
123
138
  PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
@@ -126,7 +141,8 @@ module God
126
141
 
127
142
  class << self
128
143
  # user configurable
129
- safe_attr_accessor :host,
144
+ safe_attr_accessor :pid,
145
+ :host,
130
146
  :port,
131
147
  :allow,
132
148
  :log_buffer_size,
@@ -145,12 +161,16 @@ module God
145
161
  end
146
162
 
147
163
  # initialize class instance variables
164
+ self.pid = nil
148
165
  self.host = nil
149
166
  self.port = nil
150
167
  self.allow = nil
151
168
  self.log_buffer_size = nil
152
169
  self.pid_file_directory = nil
153
170
 
171
+ # Initialize internal data.
172
+ #
173
+ # Returns nothing
154
174
  def self.internal_init
155
175
  # only do this once
156
176
  return if self.inited
@@ -180,13 +200,25 @@ module God
180
200
  # Instantiate a new, empty Watch object and pass it to the mandatory
181
201
  # block. The attributes of the watch will be set by the configuration
182
202
  # file.
203
+ #
204
+ # Aborts on duplicate watch name
205
+ # invalid watch
206
+ # conflicting group name
207
+ #
208
+ # Returns nothing
183
209
  def self.watch(&block)
184
210
  self.task(Watch, &block)
185
211
  end
186
212
 
187
- # Instantiate a new, empty Task object and pass it to the mandatory
213
+ # Instantiate a new, empty Task object and yield it to the mandatory
188
214
  # block. The attributes of the task will be set by the configuration
189
215
  # file.
216
+ #
217
+ # Aborts on duplicate task name
218
+ # invalid task
219
+ # conflicting group name
220
+ #
221
+ # Returns nothing
190
222
  def self.task(klass = Task)
191
223
  self.internal_init
192
224
 
@@ -224,22 +256,26 @@ module God
224
256
  if self.watches[t.group]
225
257
  abort "Group name '#{t.group}' already used for a Task"
226
258
  end
227
-
259
+
228
260
  self.groups[t.group] ||= []
229
261
  self.groups[t.group] << t
230
262
  end
231
-
263
+
232
264
  # register watch
233
265
  t.register!
234
266
 
235
267
  # log
236
268
  if self.running && existing_watch
237
- LOG.log(t, :info, "#{t.name} Reloaded config")
269
+ applog(t, :info, "#{t.name} Reloaded config")
238
270
  elsif self.running
239
- LOG.log(t, :info, "#{t.name} Loaded config")
271
+ applog(t, :info, "#{t.name} Loaded config")
240
272
  end
241
273
  end
242
274
 
275
+ # Unmonitor and remove the given watch from god.
276
+ # +watch+ is the Watch to remove
277
+ #
278
+ # Returns nothing
243
279
  def self.unwatch(watch)
244
280
  # unmonitor
245
281
  watch.unmonitor unless watch.state == :unmonitored
@@ -256,10 +292,20 @@ module God
256
292
  end
257
293
  end
258
294
 
295
+ # Instantiate a new Contact of the given kind and send it to the block.
296
+ # Then prepare, validate, and record the Contact.
297
+ # +kind+ is the contact class specifier
298
+ #
299
+ # Aborts on invalid kind
300
+ # duplicate contact name
301
+ # invalid contact
302
+ # conflicting group name
303
+ #
304
+ # Returns nothing
259
305
  def self.contact(kind)
260
306
  self.internal_init
261
307
 
262
- # create the condition
308
+ # create the contact
263
309
  begin
264
310
  c = Contact.generate(kind)
265
311
  rescue NoSuchContactError => e
@@ -298,23 +344,38 @@ module God
298
344
  if self.contacts[c.group]
299
345
  abort "Contact Group name '#{c.group}' already used for a Contact"
300
346
  end
301
-
347
+
302
348
  self.contact_groups[c.group] ||= []
303
349
  self.contact_groups[c.group] << c
304
350
  end
305
351
  end
306
352
 
353
+ # Remove the given contact from god.
354
+ # +contact+ is the Contact to remove
355
+ #
356
+ # Returns nothing
307
357
  def self.uncontact(contact)
308
358
  self.contacts.delete(contact.name)
309
359
  if contact.group
310
360
  self.contact_groups[contact.group].delete(contact)
311
361
  end
312
362
  end
313
-
363
+
364
+ # Control the lifecycle of the given task(s).
365
+ # +name+ is the name of a task/group (String)
366
+ # +command+ is the command to run (String)
367
+ # one of: "start"
368
+ # "monitor"
369
+ # "restart"
370
+ # "stop"
371
+ # "unmonitor"
372
+ # "remove"
373
+ #
374
+ # Returns String[]:task_names
314
375
  def self.control(name, command)
315
376
  # get the list of watches
316
377
  watches = Array(self.watches[name] || self.groups[name])
317
-
378
+
318
379
  jobs = []
319
380
 
320
381
  # do the command
@@ -327,6 +388,8 @@ module God
327
388
  watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
328
389
  when "unmonitor"
329
390
  watches.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
391
+ when "remove"
392
+ watches.each { |w| jobs << Thread.new { self.unwatch(w) } }
330
393
  else
331
394
  raise InvalidCommandError.new
332
395
  end
@@ -336,6 +399,10 @@ module God
336
399
  watches.map { |x| x.name }
337
400
  end
338
401
 
402
+ # Unmonitor and stop all tasks.
403
+ #
404
+ # Returns true on success
405
+ # false if all tasks could not be stopped within 10 seconds
339
406
  def self.stop_all
340
407
  self.watches.sort.each do |name, w|
341
408
  Thread.new do
@@ -352,10 +419,25 @@ module God
352
419
  return false
353
420
  end
354
421
 
422
+ # Force the termination of god.
423
+ # * Clean up pid file if one exists
424
+ # * Stop DRb service
425
+ # * Hard exit using exit!
426
+ #
427
+ # Never returns because the process will no longer exist!
355
428
  def self.terminate
429
+ FileUtils.rm_f(self.pid) if self.pid
430
+ self.server.stop if self.server
356
431
  exit!(0)
357
432
  end
358
433
 
434
+ # Gather the status of each task.
435
+ #
436
+ # Examples
437
+ # God.status
438
+ # # => { 'mongrel' => :up, 'nginx' => :up }
439
+ #
440
+ # Returns { String:task_name => Symbol:status, ... }
359
441
  def self.status
360
442
  info = {}
361
443
  self.watches.map do |name, w|
@@ -364,14 +446,29 @@ module God
364
446
  info
365
447
  end
366
448
 
449
+ # Log lines for the given task since the specified time.
450
+ # +watch_name+ is the name of the task (may be abbreviated)
451
+ # +since+ is the Time since which to report log lines
452
+ #
453
+ # Raises God::NoSuchWatchError if no tasks matched
454
+ #
455
+ # Returns String:joined_log_lines
367
456
  def self.running_log(watch_name, since)
368
- unless self.watches[watch_name]
457
+ matches = pattern_match(watch_name, self.watches.keys)
458
+
459
+ unless matches.first
369
460
  raise NoSuchWatchError.new
370
461
  end
371
462
 
372
- LOG.watch_log_since(watch_name, since)
463
+ LOG.watch_log_since(matches.first, since)
373
464
  end
374
465
 
466
+ # Load a config file into a running god instance. Rescues any exceptions
467
+ # that the config may raise and reports these back to the caller.
468
+ # +code+ is a String containing the config file
469
+ # +filename+ is the filename of the config file
470
+ #
471
+ # Returns [String[]:task_names, String:errors]
375
472
  def self.running_load(code, filename)
376
473
  errors = ""
377
474
  watches = []
@@ -379,6 +476,7 @@ module God
379
476
  begin
380
477
  LOG.start_capture
381
478
 
479
+ Gem.clear_paths
382
480
  eval(code, root_binding, filename)
383
481
  self.pending_watches.each do |w|
384
482
  if previous_state = self.pending_watch_states[w.name]
@@ -404,6 +502,10 @@ module God
404
502
  [names, errors]
405
503
  end
406
504
 
505
+ # Load the given file(s) according to the given glob.
506
+ # +glob+ is the glob-enabled path to load
507
+ #
508
+ # Returns nothing
407
509
  def self.load(glob)
408
510
  Dir[glob].each do |f|
409
511
  Kernel.load f
@@ -420,13 +522,16 @@ module God
420
522
  end
421
523
  end
422
524
  end
423
-
525
+
424
526
  def self.validater
425
527
  unless test(?w, self.pid_file_directory)
426
528
  abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
427
529
  end
428
530
  end
429
531
 
532
+ # Initialize and startup the machinery that makes god work.
533
+ #
534
+ # Returns nothing
430
535
  def self.start
431
536
  self.internal_init
432
537
  self.setup
@@ -454,11 +559,42 @@ module God
454
559
  Timer.get.join
455
560
  end
456
561
 
562
+ # To be called on program exit to start god
563
+ #
564
+ # Returns nothing
457
565
  def self.at_exit
458
566
  self.start
459
567
  end
568
+
569
+ # private
570
+
571
+ # Match a shortened pattern against a list of String candidates.
572
+ # The pattern is expanded into a regular expression by
573
+ # inserting .* between each character.
574
+ # +pattern+ is the String containing the abbreviation
575
+ # +list+ is the Array of Strings to match against
576
+ #
577
+ # Examples
578
+ #
579
+ # list = %w{ foo bar bars }
580
+ # pattern = 'br'
581
+ # God.pattern_match(list, pattern)
582
+ # # => ['bar', 'bars']
583
+ #
584
+ # Returns String[]:matched_elements
585
+ def self.pattern_match(pattern, list)
586
+ regex = pattern.split('').join('.*')
587
+
588
+ list.select do |item|
589
+ item =~ Regexp.new(regex)
590
+ end
591
+ end
460
592
  end
461
593
 
594
+ # Runs immediately before the program exits. If $run is true,
595
+ # start god, if $run is false, exit normally.
596
+ #
597
+ # Returns nothing
462
598
  at_exit do
463
599
  God.at_exit if $run
464
600
  end
@@ -0,0 +1,189 @@
1
+ module God
2
+ module CLI
3
+
4
+ class Command
5
+ def initialize(command, options, args)
6
+ @command = command
7
+ @options = options
8
+ @args = args
9
+
10
+ dispatch
11
+ end
12
+
13
+ def setup
14
+ # connect to drb unix socket
15
+ DRb.start_service
16
+ @server = DRbObject.new(nil, God::Socket.socket(@options[:port]))
17
+
18
+ # ping server to ensure that it is responsive
19
+ begin
20
+ @server.ping
21
+ rescue DRb::DRbConnError
22
+ puts "The server is not available (or you do not have permissions to access it)"
23
+ abort
24
+ end
25
+ end
26
+
27
+ def dispatch
28
+ if %w{load status log quit terminate}.include?(@command)
29
+ setup
30
+ send("#{@command}_command")
31
+ elsif %w{start stop restart monitor unmonitor remove}.include?(@command)
32
+ setup
33
+ lifecycle_command
34
+ elsif @command == 'check'
35
+ check_command
36
+ else
37
+ puts "Command '#{@command}' is not valid. Run 'god --help' for usage"
38
+ abort
39
+ end
40
+ end
41
+
42
+ def load_command
43
+ file = @args[1]
44
+
45
+ puts "Sending '#{@command}' command"
46
+ puts
47
+
48
+ unless File.exist?(file)
49
+ abort "File not found: #{file}"
50
+ end
51
+
52
+ names, errors = *@server.running_load(File.read(file), File.expand_path(file))
53
+
54
+ # output response
55
+ unless names.empty?
56
+ puts 'The following tasks were affected:'
57
+ names.each do |w|
58
+ puts ' ' + w
59
+ end
60
+ end
61
+
62
+ unless errors.empty?
63
+ puts errors
64
+ exit(1)
65
+ end
66
+ end
67
+
68
+ def status_command
69
+ watches = @server.status
70
+ watches.keys.sort.each do |name|
71
+ state = watches[name][:state]
72
+ puts "#{name}: #{state}"
73
+ end
74
+ end
75
+
76
+ def log_command
77
+ begin
78
+ Signal.trap('INT') { exit }
79
+ name = @args[1]
80
+ t = Time.at(0)
81
+ loop do
82
+ print @server.running_log(name, t)
83
+ t = Time.now
84
+ sleep 1
85
+ end
86
+ rescue God::NoSuchWatchError
87
+ puts "No such watch"
88
+ rescue DRb::DRbConnError
89
+ puts "The server went away"
90
+ end
91
+ end
92
+
93
+ def quit_command
94
+ begin
95
+ @server.terminate
96
+ abort 'Could not stop god'
97
+ rescue DRb::DRbConnError
98
+ puts 'Stopped god'
99
+ end
100
+ end
101
+
102
+ def terminate_command
103
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
104
+ if @server.stop_all
105
+ t.kill; STDOUT.puts
106
+ puts 'Stopped all watches'
107
+ else
108
+ t.kill; STDOUT.puts
109
+ puts 'Could not stop all watches within 10 seconds'
110
+ end
111
+
112
+ begin
113
+ @server.terminate
114
+ abort 'Could not stop god'
115
+ rescue DRb::DRbConnError
116
+ puts 'Stopped god'
117
+ end
118
+ end
119
+
120
+ def check_command
121
+ Thread.new do
122
+ begin
123
+ event_system = God::EventHandler.event_system
124
+ puts "using event system: #{event_system}"
125
+
126
+ if God::EventHandler.loaded?
127
+ puts "starting event handler"
128
+ God::EventHandler.start
129
+ else
130
+ puts "[fail] event system did not load"
131
+ exit(1)
132
+ end
133
+
134
+ puts 'forking off new process'
135
+
136
+ pid = fork do
137
+ loop { sleep(1) }
138
+ end
139
+
140
+ puts "forked process with pid = #{pid}"
141
+
142
+ God::EventHandler.register(pid, :proc_exit) do
143
+ puts "[ok] process exit event received"
144
+ exit(0)
145
+ end
146
+
147
+ sleep(1)
148
+
149
+ puts "killing process"
150
+
151
+ ::Process.kill('KILL', pid)
152
+ rescue => e
153
+ puts e.message
154
+ puts e.backtrace.join("\n")
155
+ end
156
+ end
157
+
158
+ sleep(2)
159
+
160
+ puts "[fail] never received process exit event"
161
+ exit(1)
162
+ end
163
+
164
+ def lifecycle_command
165
+ # get the name of the watch/group
166
+ name = @args[1]
167
+
168
+ puts "Sending '#{@command}' command"
169
+
170
+ t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
171
+
172
+ # send @command
173
+ watches = @server.control(name, @command)
174
+
175
+ # output response
176
+ t.kill; STDOUT.puts
177
+ unless watches.empty?
178
+ puts 'The following watches were affected:'
179
+ watches.each do |w|
180
+ puts ' ' + w
181
+ end
182
+ else
183
+ puts 'No matching task or group'
184
+ end
185
+ end
186
+ end # Command
187
+
188
+ end
189
+ end