god 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/History.txt +26 -0
  2. data/Manifest.txt +15 -1
  3. data/Rakefile +2 -7
  4. data/bin/god +104 -16
  5. data/lib/god.rb +169 -37
  6. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  7. data/lib/god/condition.rb +1 -0
  8. data/lib/god/conditions/degrading_lambda.rb +47 -0
  9. data/lib/god/conditions/process_exits.rb +6 -2
  10. data/lib/god/conditions/tries.rb +33 -0
  11. data/lib/god/dependency_graph.rb +41 -0
  12. data/lib/god/errors.rb +6 -0
  13. data/lib/god/hub.rb +43 -20
  14. data/lib/god/logger.rb +44 -0
  15. data/lib/god/process.rb +91 -19
  16. data/lib/god/registry.rb +4 -0
  17. data/lib/god/server.rb +12 -2
  18. data/lib/god/timeline.rb +36 -0
  19. data/lib/god/watch.rb +27 -8
  20. data/test/configs/child_events/child_events.god +7 -2
  21. data/test/configs/child_polls/child_polls.god +3 -1
  22. data/test/configs/child_polls/simple_server.rb +1 -1
  23. data/test/configs/daemon_events/daemon_events.god +7 -3
  24. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  25. data/test/configs/daemon_polls/simple_server.rb +6 -0
  26. data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
  27. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  28. data/test/configs/real.rb +1 -1
  29. data/test/configs/running_load/running_load.god +16 -0
  30. data/test/configs/stress/simple_server.rb +3 -0
  31. data/test/configs/stress/stress.god +15 -0
  32. data/test/configs/test.rb +14 -2
  33. data/test/helper.rb +12 -2
  34. data/test/test_conditions_tries.rb +46 -0
  35. data/test/test_dependency_graph.rb +62 -0
  36. data/test/test_god.rb +289 -33
  37. data/test/test_handlers_kqueue_handler.rb +11 -7
  38. data/test/test_hub.rb +18 -0
  39. data/test/test_logger.rb +55 -0
  40. data/test/test_process.rb +135 -17
  41. data/test/test_registry.rb +2 -1
  42. data/test/test_server.rb +35 -4
  43. data/test/test_timeline.rb +14 -2
  44. data/test/test_watch.rb +7 -0
  45. metadata +21 -4
  46. data/lib/god/conditions/timeline.rb +0 -17
@@ -0,0 +1,51 @@
1
+ module God
2
+ module Behaviors
3
+
4
+ class NotifyWhenFlapping < Behavior
5
+ attr_accessor :failures # number of failures
6
+ attr_accessor :seconds # number of seconds
7
+ attr_accessor :notifier # class to notify with
8
+
9
+ def initialize
10
+ super
11
+ @startup_times = []
12
+ end
13
+
14
+ def valid?
15
+ valid = true
16
+ valid &= complain("You must specify the 'failures' attribute for :notify_when_flapping") unless self.failures
17
+ valid &= complain("You must specify the 'seconds' attribute for :notify_when_flapping") unless self.seconds
18
+ valid &= complain("You must specify the 'notifier' attribute for :notify_when_flapping") unless self.notifier
19
+
20
+ # Must take one arg or variable args
21
+ unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
22
+ valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args")
23
+ end
24
+
25
+ valid
26
+ end
27
+
28
+ def before_start
29
+ now = Time.now.to_i
30
+ @startup_times << now
31
+ check_for_flapping(now)
32
+ end
33
+
34
+ def before_restart
35
+ now = Time.now.to_i
36
+ @startup_times << now
37
+ check_for_flapping(now)
38
+ end
39
+
40
+ private
41
+
42
+ def check_for_flapping(now)
43
+ @startup_times.select! {|time| time >= now - self.seconds }
44
+ if @startup_times.length >= self.failures
45
+ self.notifier.notify("#{self.watch.name} has called start/restart #{@startup_times.length} times in #{self.seconds} seconds")
46
+ end
47
+ end
48
+ end
49
+
50
+ end
51
+ end
data/lib/god/condition.rb CHANGED
@@ -22,6 +22,7 @@ module God
22
22
  class PollCondition < Condition
23
23
  # all poll conditions can specify a poll interval
24
24
  attr_accessor :interval
25
+ attr_accessor :transition
25
26
 
26
27
  # Override this method in your Conditions (optional)
27
28
  def before
@@ -0,0 +1,47 @@
1
+ module God
2
+ module Conditions
3
+
4
+ # This condition degrades its interval by a factor of two for 3 tries before failing
5
+ class DegradingLambda < PollCondition
6
+ attr_accessor :lambda
7
+
8
+ def initialize
9
+ super
10
+ @tries = 0
11
+ end
12
+
13
+ def valid?
14
+ valid = true
15
+ valid &= complain("You must specify the 'lambda' attribute for :degrading_lambda") if self.lambda.nil?
16
+ valid
17
+ end
18
+
19
+ def test
20
+ puts "Calling test. Interval at #{self.interval}"
21
+ @original_interval ||= self.interval
22
+ unless pass?
23
+ return true if @tries == 2
24
+ self.interval = self.interval / 2.0
25
+ @tries += 1
26
+ else
27
+ @tries = 0
28
+ self.interval = @original_interval
29
+ end
30
+ false
31
+ end
32
+
33
+ private
34
+
35
+ def pass?
36
+ begin
37
+ Timeout::timeout(@interval) {
38
+ self.lambda.call()
39
+ }
40
+ rescue Timeout::Error
41
+ false
42
+ end
43
+ end
44
+ end
45
+
46
+ end
47
+ end
@@ -11,8 +11,12 @@ module God
11
11
  def register
12
12
  pid = File.read(self.watch.pid_file).strip.to_i
13
13
 
14
- EventHandler.register(pid, :proc_exit) do
15
- Hub.trigger(self)
14
+ begin
15
+ EventHandler.register(pid, :proc_exit) do
16
+ Hub.trigger(self)
17
+ end
18
+ rescue StandardError
19
+ raise EventRegistrationFailedError.new
16
20
  end
17
21
  end
18
22
 
@@ -0,0 +1,33 @@
1
+ module God
2
+ module Conditions
3
+
4
+ class Tries < PollCondition
5
+ attr_accessor :times, :within
6
+
7
+ def prepare
8
+ @timeline = Timeline.new(self.times)
9
+ end
10
+
11
+ def valid?
12
+ valid = true
13
+ valid &= complain("You must specify the 'times' attribute for :tries") if self.times.nil?
14
+ valid
15
+ end
16
+
17
+ def test
18
+ @timeline << Time.now
19
+
20
+ concensus = (@timeline.size == self.times)
21
+ duration = within.nil? || (@timeline.last - @timeline.first) < self.within
22
+
23
+ if concensus && duration
24
+ @timeline.clear if within.nil?
25
+ return true
26
+ else
27
+ return false
28
+ end
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,41 @@
1
+ module God
2
+ class DependencyGraph
3
+ attr_accessor :nodes
4
+
5
+ def initialize
6
+ self.nodes = {}
7
+ end
8
+
9
+ def add(a, b)
10
+ node_a = self.nodes[a] || Node.new(a)
11
+ node_b = self.nodes[b] || Node.new(b)
12
+
13
+ node_a.add(node_b)
14
+
15
+ self.nodes[a] ||= node_a
16
+ self.nodes[b] ||= node_b
17
+ end
18
+ end
19
+ end
20
+
21
+ module God
22
+ class DependencyGraph
23
+ class Node
24
+ attr_accessor :name
25
+ attr_accessor :dependencies
26
+
27
+ def initialize(name)
28
+ self.name = name
29
+ self.dependencies = []
30
+ end
31
+
32
+ def add(node)
33
+ self.dependencies << node unless self.dependencies.include?(node)
34
+ end
35
+
36
+ def has_node?(node)
37
+ (self == node) || self.dependencies.any { |x| x.has_node?(node) }
38
+ end
39
+ end
40
+ end
41
+ end
data/lib/god/errors.rb CHANGED
@@ -3,6 +3,9 @@ module God
3
3
  class AbstractMethodNotOverriddenError < StandardError
4
4
  end
5
5
 
6
+ class NoSuchWatchError < StandardError
7
+ end
8
+
6
9
  class NoSuchConditionError < StandardError
7
10
  end
8
11
 
@@ -12,4 +15,7 @@ module God
12
15
  class InvalidCommandError < StandardError
13
16
  end
14
17
 
18
+ class EventRegistrationFailedError < StandardError
19
+ end
20
+
15
21
  end
data/lib/god/hub.rb CHANGED
@@ -51,25 +51,48 @@ module God
51
51
 
52
52
  # it's possible that the timer will trigger an event before it can be cleared
53
53
  # by an exiting metric, in which case it should be ignored
54
- return if metric.nil?
55
-
56
- watch = metric.watch
57
-
58
- watch.mutex.synchronize do
59
- result = condition.test
60
-
61
- msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
62
- Syslog.debug(msg)
63
- puts msg
64
-
65
- condition.after
66
-
67
- dest = metric.destination[result]
68
- if dest
69
- watch.move(dest)
70
- else
71
- # reschedule
72
- Timer.get.schedule(condition)
54
+ unless metric.nil?
55
+ watch = metric.watch
56
+
57
+ watch.mutex.synchronize do
58
+ # run the test
59
+ result = condition.test
60
+
61
+ # log
62
+ msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
63
+ Syslog.debug(msg)
64
+ LOG.log(watch, :info, msg)
65
+
66
+ # after-condition
67
+ condition.after
68
+
69
+ # get the destination
70
+ dest =
71
+ if result && condition.transition
72
+ # condition override
73
+ condition.transition
74
+ else
75
+ # regular
76
+ metric.destination[result]
77
+ end
78
+
79
+ # transition or reschedule
80
+ if dest
81
+ # transition
82
+ begin
83
+ watch.move(dest)
84
+ rescue EventRegistrationFailedError
85
+ msg = watch.name + ' Event registration failed, moving back to previous state'
86
+ Syslog.debug(msg)
87
+ LOG.log(watch, :info, msg)
88
+
89
+ dest = watch.state
90
+ retry
91
+ end
92
+ else
93
+ # reschedule
94
+ Timer.get.schedule(condition)
95
+ end
73
96
  end
74
97
  end
75
98
  rescue => e
@@ -89,7 +112,7 @@ module God
89
112
  watch.mutex.synchronize do
90
113
  msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect
91
114
  Syslog.debug(msg)
92
- puts msg
115
+ LOG.log(watch, :info, msg)
93
116
 
94
117
  dest = metric.destination[true]
95
118
  watch.move(dest)
data/lib/god/logger.rb ADDED
@@ -0,0 +1,44 @@
1
+ module God
2
+
3
+ class Logger < ::Logger
4
+ attr_accessor :logs
5
+
6
+ def initialize
7
+ super(STDOUT)
8
+ self.logs = {}
9
+ @mutex = Mutex.new
10
+ end
11
+
12
+ def log(watch, level, text)
13
+ # initialize watch log if necessary
14
+ self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
15
+
16
+ # push onto timeline for the given watch
17
+ buf = StringIO.new
18
+ templog = ::Logger.new(buf)
19
+ templog.send(level, text)
20
+ @mutex.synchronize do
21
+ self.logs[watch.name] << [Time.now, buf.string]
22
+ end
23
+ templog.close
24
+
25
+ # send to regular logger
26
+ self.send(level, text)
27
+ end
28
+
29
+ def watch_log_since(watch_name, since)
30
+ # initialize watch log if necessary
31
+ self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
32
+
33
+ # get and join lines since given time
34
+ @mutex.synchronize do
35
+ self.logs[watch_name].select do |x|
36
+ x.first > since
37
+ end.map do |x|
38
+ x[1]
39
+ end.join
40
+ end
41
+ end
42
+ end
43
+
44
+ end
data/lib/god/process.rb CHANGED
@@ -4,14 +4,69 @@ module God
4
4
  class Process
5
5
  WRITES_PID = [:start, :restart]
6
6
 
7
- attr_accessor :name, :uid, :gid, :start, :stop, :restart
7
+ attr_accessor :name, :uid, :gid, :log, :start, :stop, :restart
8
8
 
9
- def initialize(options={})
10
- options.each do |k,v|
11
- send("#{k}=", v)
9
+ def initialize
10
+ @pid_file = nil
11
+ @tracking_pid = false
12
+ end
13
+
14
+ def alive?
15
+ pid = File.read(self.pid_file).strip.to_i
16
+ System::Process.new(pid).exists?
17
+ end
18
+
19
+ def valid?
20
+ # determine if we're tracking pid or not
21
+ self.pid_file
22
+
23
+ valid = true
24
+
25
+ # a name must be specified
26
+ if self.name.nil?
27
+ valid = false
28
+ LOG.log(self, :error, "No name was specified")
12
29
  end
13
30
 
14
- @tracking_pid = false
31
+ # a start command must be specified
32
+ if self.start.nil?
33
+ valid = false
34
+ LOG.log(self, :error, "No start command was specified")
35
+ end
36
+
37
+ # self-daemonizing processes must specify a stop command
38
+ if !@tracking_pid && self.stop.nil?
39
+ valid = false
40
+ LOG.log(self, :error, "No stop command was specified")
41
+ end
42
+
43
+ # self-daemonizing processes cannot specify log
44
+ if !@tracking_pid && self.log
45
+ valid = false
46
+ LOG.log(self, :error, "Self-daemonizing processes cannot specify a log file")
47
+ end
48
+
49
+ # uid must exist if specified
50
+ if self.uid
51
+ begin
52
+ Etc.getpwnam(self.uid)
53
+ rescue ArgumentError
54
+ valid = false
55
+ LOG.log(self, :error, "UID for '#{self.uid}' does not exist")
56
+ end
57
+ end
58
+
59
+ # gid must exist if specified
60
+ if self.gid
61
+ begin
62
+ Etc.getgrnam(self.gid)
63
+ rescue ArgumentError
64
+ valid = false
65
+ LOG.log(self, :error, "GID for '#{self.gid}' does not exist")
66
+ end
67
+ end
68
+
69
+ valid
15
70
  end
16
71
 
17
72
  # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
@@ -43,20 +98,32 @@ module God
43
98
 
44
99
  def call_action(action)
45
100
  command = send(action)
46
- if command.kind_of?(String)
47
- # Make pid directory
48
- unless test(?d, God.pid_file_directory)
49
- begin
50
- FileUtils.mkdir_p(God.pid_file_directory)
51
- rescue Errno::EACCES => e
52
- abort"Failed to create pid file directory: #{e.message}"
101
+
102
+ if action == :stop && command.nil?
103
+ pid = File.read(self.pid_file).strip.to_i
104
+ name = self.name
105
+ command = lambda do
106
+ LOG.log(self, :info, "#{self.name} stop: default lambda killer")
107
+
108
+ ::Process.kill('HUP', pid) rescue nil
109
+
110
+ # Poll to see if it's dead
111
+ 5.times do
112
+ begin
113
+ ::Process.kill(0, pid)
114
+ rescue Errno::ESRCH
115
+ # It died. Good.
116
+ return
117
+ end
118
+
119
+ sleep 1
53
120
  end
121
+
122
+ ::Process.kill('KILL', pid) rescue nil
54
123
  end
55
-
56
- unless test(?w, God.pid_file_directory)
57
- abort "The pid file directory (#{God.pid_file_directory}) is not writable by #{Etc.getlogin}"
58
- end
59
-
124
+ end
125
+
126
+ if command.kind_of?(String)
60
127
  # string command
61
128
  # fork/exec to setuid/gid
62
129
  r, w = IO.pipe
@@ -70,9 +137,14 @@ module God
70
137
  Dir.chdir "/"
71
138
  $0 = command
72
139
  STDIN.reopen "/dev/null"
73
- STDOUT.reopen "/dev/null", "a"
140
+ if self.log
141
+ STDOUT.reopen self.log, "a"
142
+ else
143
+ STDOUT.reopen "/dev/null", "a"
144
+ end
74
145
  STDERR.reopen STDOUT
75
- exec command
146
+
147
+ exec command unless command.empty?
76
148
  end
77
149
  puts pid.to_s
78
150
  end