god 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/History.txt +26 -0
  2. data/Manifest.txt +15 -1
  3. data/Rakefile +2 -7
  4. data/bin/god +104 -16
  5. data/lib/god.rb +169 -37
  6. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  7. data/lib/god/condition.rb +1 -0
  8. data/lib/god/conditions/degrading_lambda.rb +47 -0
  9. data/lib/god/conditions/process_exits.rb +6 -2
  10. data/lib/god/conditions/tries.rb +33 -0
  11. data/lib/god/dependency_graph.rb +41 -0
  12. data/lib/god/errors.rb +6 -0
  13. data/lib/god/hub.rb +43 -20
  14. data/lib/god/logger.rb +44 -0
  15. data/lib/god/process.rb +91 -19
  16. data/lib/god/registry.rb +4 -0
  17. data/lib/god/server.rb +12 -2
  18. data/lib/god/timeline.rb +36 -0
  19. data/lib/god/watch.rb +27 -8
  20. data/test/configs/child_events/child_events.god +7 -2
  21. data/test/configs/child_polls/child_polls.god +3 -1
  22. data/test/configs/child_polls/simple_server.rb +1 -1
  23. data/test/configs/daemon_events/daemon_events.god +7 -3
  24. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  25. data/test/configs/daemon_polls/simple_server.rb +6 -0
  26. data/test/configs/degrading_lambda/degrading_lambda.god +33 -0
  27. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  28. data/test/configs/real.rb +1 -1
  29. data/test/configs/running_load/running_load.god +16 -0
  30. data/test/configs/stress/simple_server.rb +3 -0
  31. data/test/configs/stress/stress.god +15 -0
  32. data/test/configs/test.rb +14 -2
  33. data/test/helper.rb +12 -2
  34. data/test/test_conditions_tries.rb +46 -0
  35. data/test/test_dependency_graph.rb +62 -0
  36. data/test/test_god.rb +289 -33
  37. data/test/test_handlers_kqueue_handler.rb +11 -7
  38. data/test/test_hub.rb +18 -0
  39. data/test/test_logger.rb +55 -0
  40. data/test/test_process.rb +135 -17
  41. data/test/test_registry.rb +2 -1
  42. data/test/test_server.rb +35 -4
  43. data/test/test_timeline.rb +14 -2
  44. data/test/test_watch.rb +7 -0
  45. metadata +21 -4
  46. data/lib/god/conditions/timeline.rb +0 -17
@@ -0,0 +1,51 @@
1
+ module God
2
+ module Behaviors
3
+
4
+ class NotifyWhenFlapping < Behavior
5
+ attr_accessor :failures # number of failures
6
+ attr_accessor :seconds # number of seconds
7
+ attr_accessor :notifier # class to notify with
8
+
9
+ def initialize
10
+ super
11
+ @startup_times = []
12
+ end
13
+
14
+ def valid?
15
+ valid = true
16
+ valid &= complain("You must specify the 'failures' attribute for :notify_when_flapping") unless self.failures
17
+ valid &= complain("You must specify the 'seconds' attribute for :notify_when_flapping") unless self.seconds
18
+ valid &= complain("You must specify the 'notifier' attribute for :notify_when_flapping") unless self.notifier
19
+
20
+ # Must take one arg or variable args
21
+ unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
22
+ valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args")
23
+ end
24
+
25
+ valid
26
+ end
27
+
28
+ def before_start
29
+ now = Time.now.to_i
30
+ @startup_times << now
31
+ check_for_flapping(now)
32
+ end
33
+
34
+ def before_restart
35
+ now = Time.now.to_i
36
+ @startup_times << now
37
+ check_for_flapping(now)
38
+ end
39
+
40
+ private
41
+
42
+ def check_for_flapping(now)
43
+ @startup_times.select! {|time| time >= now - self.seconds }
44
+ if @startup_times.length >= self.failures
45
+ self.notifier.notify("#{self.watch.name} has called start/restart #{@startup_times.length} times in #{self.seconds} seconds")
46
+ end
47
+ end
48
+ end
49
+
50
+ end
51
+ end
data/lib/god/condition.rb CHANGED
@@ -22,6 +22,7 @@ module God
22
22
  class PollCondition < Condition
23
23
  # all poll conditions can specify a poll interval
24
24
  attr_accessor :interval
25
+ attr_accessor :transition
25
26
 
26
27
  # Override this method in your Conditions (optional)
27
28
  def before
@@ -0,0 +1,47 @@
1
+ module God
2
+ module Conditions
3
+
4
+ # This condition degrades its interval by a factor of two for 3 tries before failing
5
+ class DegradingLambda < PollCondition
6
+ attr_accessor :lambda
7
+
8
+ def initialize
9
+ super
10
+ @tries = 0
11
+ end
12
+
13
+ def valid?
14
+ valid = true
15
+ valid &= complain("You must specify the 'lambda' attribute for :degrading_lambda") if self.lambda.nil?
16
+ valid
17
+ end
18
+
19
+ def test
20
+ puts "Calling test. Interval at #{self.interval}"
21
+ @original_interval ||= self.interval
22
+ unless pass?
23
+ return true if @tries == 2
24
+ self.interval = self.interval / 2.0
25
+ @tries += 1
26
+ else
27
+ @tries = 0
28
+ self.interval = @original_interval
29
+ end
30
+ false
31
+ end
32
+
33
+ private
34
+
35
+ def pass?
36
+ begin
37
+ Timeout::timeout(@interval) {
38
+ self.lambda.call()
39
+ }
40
+ rescue Timeout::Error
41
+ false
42
+ end
43
+ end
44
+ end
45
+
46
+ end
47
+ end
@@ -11,8 +11,12 @@ module God
11
11
  def register
12
12
  pid = File.read(self.watch.pid_file).strip.to_i
13
13
 
14
- EventHandler.register(pid, :proc_exit) do
15
- Hub.trigger(self)
14
+ begin
15
+ EventHandler.register(pid, :proc_exit) do
16
+ Hub.trigger(self)
17
+ end
18
+ rescue StandardError
19
+ raise EventRegistrationFailedError.new
16
20
  end
17
21
  end
18
22
 
@@ -0,0 +1,33 @@
1
+ module God
2
+ module Conditions
3
+
4
+ class Tries < PollCondition
5
+ attr_accessor :times, :within
6
+
7
+ def prepare
8
+ @timeline = Timeline.new(self.times)
9
+ end
10
+
11
+ def valid?
12
+ valid = true
13
+ valid &= complain("You must specify the 'times' attribute for :tries") if self.times.nil?
14
+ valid
15
+ end
16
+
17
+ def test
18
+ @timeline << Time.now
19
+
20
+ concensus = (@timeline.size == self.times)
21
+ duration = within.nil? || (@timeline.last - @timeline.first) < self.within
22
+
23
+ if concensus && duration
24
+ @timeline.clear if within.nil?
25
+ return true
26
+ else
27
+ return false
28
+ end
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,41 @@
1
+ module God
2
+ class DependencyGraph
3
+ attr_accessor :nodes
4
+
5
+ def initialize
6
+ self.nodes = {}
7
+ end
8
+
9
+ def add(a, b)
10
+ node_a = self.nodes[a] || Node.new(a)
11
+ node_b = self.nodes[b] || Node.new(b)
12
+
13
+ node_a.add(node_b)
14
+
15
+ self.nodes[a] ||= node_a
16
+ self.nodes[b] ||= node_b
17
+ end
18
+ end
19
+ end
20
+
21
+ module God
22
+ class DependencyGraph
23
+ class Node
24
+ attr_accessor :name
25
+ attr_accessor :dependencies
26
+
27
+ def initialize(name)
28
+ self.name = name
29
+ self.dependencies = []
30
+ end
31
+
32
+ def add(node)
33
+ self.dependencies << node unless self.dependencies.include?(node)
34
+ end
35
+
36
+ def has_node?(node)
37
+ (self == node) || self.dependencies.any { |x| x.has_node?(node) }
38
+ end
39
+ end
40
+ end
41
+ end
data/lib/god/errors.rb CHANGED
@@ -3,6 +3,9 @@ module God
3
3
  class AbstractMethodNotOverriddenError < StandardError
4
4
  end
5
5
 
6
+ class NoSuchWatchError < StandardError
7
+ end
8
+
6
9
  class NoSuchConditionError < StandardError
7
10
  end
8
11
 
@@ -12,4 +15,7 @@ module God
12
15
  class InvalidCommandError < StandardError
13
16
  end
14
17
 
18
+ class EventRegistrationFailedError < StandardError
19
+ end
20
+
15
21
  end
data/lib/god/hub.rb CHANGED
@@ -51,25 +51,48 @@ module God
51
51
 
52
52
  # it's possible that the timer will trigger an event before it can be cleared
53
53
  # by an exiting metric, in which case it should be ignored
54
- return if metric.nil?
55
-
56
- watch = metric.watch
57
-
58
- watch.mutex.synchronize do
59
- result = condition.test
60
-
61
- msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
62
- Syslog.debug(msg)
63
- puts msg
64
-
65
- condition.after
66
-
67
- dest = metric.destination[result]
68
- if dest
69
- watch.move(dest)
70
- else
71
- # reschedule
72
- Timer.get.schedule(condition)
54
+ unless metric.nil?
55
+ watch = metric.watch
56
+
57
+ watch.mutex.synchronize do
58
+ # run the test
59
+ result = condition.test
60
+
61
+ # log
62
+ msg = watch.name + ' ' + condition.class.name + " [#{result}] " + metric.destination.inspect
63
+ Syslog.debug(msg)
64
+ LOG.log(watch, :info, msg)
65
+
66
+ # after-condition
67
+ condition.after
68
+
69
+ # get the destination
70
+ dest =
71
+ if result && condition.transition
72
+ # condition override
73
+ condition.transition
74
+ else
75
+ # regular
76
+ metric.destination[result]
77
+ end
78
+
79
+ # transition or reschedule
80
+ if dest
81
+ # transition
82
+ begin
83
+ watch.move(dest)
84
+ rescue EventRegistrationFailedError
85
+ msg = watch.name + ' Event registration failed, moving back to previous state'
86
+ Syslog.debug(msg)
87
+ LOG.log(watch, :info, msg)
88
+
89
+ dest = watch.state
90
+ retry
91
+ end
92
+ else
93
+ # reschedule
94
+ Timer.get.schedule(condition)
95
+ end
73
96
  end
74
97
  end
75
98
  rescue => e
@@ -89,7 +112,7 @@ module God
89
112
  watch.mutex.synchronize do
90
113
  msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect
91
114
  Syslog.debug(msg)
92
- puts msg
115
+ LOG.log(watch, :info, msg)
93
116
 
94
117
  dest = metric.destination[true]
95
118
  watch.move(dest)
data/lib/god/logger.rb ADDED
@@ -0,0 +1,44 @@
1
+ module God
2
+
3
+ class Logger < ::Logger
4
+ attr_accessor :logs
5
+
6
+ def initialize
7
+ super(STDOUT)
8
+ self.logs = {}
9
+ @mutex = Mutex.new
10
+ end
11
+
12
+ def log(watch, level, text)
13
+ # initialize watch log if necessary
14
+ self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
15
+
16
+ # push onto timeline for the given watch
17
+ buf = StringIO.new
18
+ templog = ::Logger.new(buf)
19
+ templog.send(level, text)
20
+ @mutex.synchronize do
21
+ self.logs[watch.name] << [Time.now, buf.string]
22
+ end
23
+ templog.close
24
+
25
+ # send to regular logger
26
+ self.send(level, text)
27
+ end
28
+
29
+ def watch_log_since(watch_name, since)
30
+ # initialize watch log if necessary
31
+ self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
32
+
33
+ # get and join lines since given time
34
+ @mutex.synchronize do
35
+ self.logs[watch_name].select do |x|
36
+ x.first > since
37
+ end.map do |x|
38
+ x[1]
39
+ end.join
40
+ end
41
+ end
42
+ end
43
+
44
+ end
data/lib/god/process.rb CHANGED
@@ -4,14 +4,69 @@ module God
4
4
  class Process
5
5
  WRITES_PID = [:start, :restart]
6
6
 
7
- attr_accessor :name, :uid, :gid, :start, :stop, :restart
7
+ attr_accessor :name, :uid, :gid, :log, :start, :stop, :restart
8
8
 
9
- def initialize(options={})
10
- options.each do |k,v|
11
- send("#{k}=", v)
9
+ def initialize
10
+ @pid_file = nil
11
+ @tracking_pid = false
12
+ end
13
+
14
+ def alive?
15
+ pid = File.read(self.pid_file).strip.to_i
16
+ System::Process.new(pid).exists?
17
+ end
18
+
19
+ def valid?
20
+ # determine if we're tracking pid or not
21
+ self.pid_file
22
+
23
+ valid = true
24
+
25
+ # a name must be specified
26
+ if self.name.nil?
27
+ valid = false
28
+ LOG.log(self, :error, "No name was specified")
12
29
  end
13
30
 
14
- @tracking_pid = false
31
+ # a start command must be specified
32
+ if self.start.nil?
33
+ valid = false
34
+ LOG.log(self, :error, "No start command was specified")
35
+ end
36
+
37
+ # self-daemonizing processes must specify a stop command
38
+ if !@tracking_pid && self.stop.nil?
39
+ valid = false
40
+ LOG.log(self, :error, "No stop command was specified")
41
+ end
42
+
43
+ # self-daemonizing processes cannot specify log
44
+ if !@tracking_pid && self.log
45
+ valid = false
46
+ LOG.log(self, :error, "Self-daemonizing processes cannot specify a log file")
47
+ end
48
+
49
+ # uid must exist if specified
50
+ if self.uid
51
+ begin
52
+ Etc.getpwnam(self.uid)
53
+ rescue ArgumentError
54
+ valid = false
55
+ LOG.log(self, :error, "UID for '#{self.uid}' does not exist")
56
+ end
57
+ end
58
+
59
+ # gid must exist if specified
60
+ if self.gid
61
+ begin
62
+ Etc.getgrnam(self.gid)
63
+ rescue ArgumentError
64
+ valid = false
65
+ LOG.log(self, :error, "GID for '#{self.gid}' does not exist")
66
+ end
67
+ end
68
+
69
+ valid
15
70
  end
16
71
 
17
72
  # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
@@ -43,20 +98,32 @@ module God
43
98
 
44
99
  def call_action(action)
45
100
  command = send(action)
46
- if command.kind_of?(String)
47
- # Make pid directory
48
- unless test(?d, God.pid_file_directory)
49
- begin
50
- FileUtils.mkdir_p(God.pid_file_directory)
51
- rescue Errno::EACCES => e
52
- abort"Failed to create pid file directory: #{e.message}"
101
+
102
+ if action == :stop && command.nil?
103
+ pid = File.read(self.pid_file).strip.to_i
104
+ name = self.name
105
+ command = lambda do
106
+ LOG.log(self, :info, "#{self.name} stop: default lambda killer")
107
+
108
+ ::Process.kill('HUP', pid) rescue nil
109
+
110
+ # Poll to see if it's dead
111
+ 5.times do
112
+ begin
113
+ ::Process.kill(0, pid)
114
+ rescue Errno::ESRCH
115
+ # It died. Good.
116
+ return
117
+ end
118
+
119
+ sleep 1
53
120
  end
121
+
122
+ ::Process.kill('KILL', pid) rescue nil
54
123
  end
55
-
56
- unless test(?w, God.pid_file_directory)
57
- abort "The pid file directory (#{God.pid_file_directory}) is not writable by #{Etc.getlogin}"
58
- end
59
-
124
+ end
125
+
126
+ if command.kind_of?(String)
60
127
  # string command
61
128
  # fork/exec to setuid/gid
62
129
  r, w = IO.pipe
@@ -70,9 +137,14 @@ module God
70
137
  Dir.chdir "/"
71
138
  $0 = command
72
139
  STDIN.reopen "/dev/null"
73
- STDOUT.reopen "/dev/null", "a"
140
+ if self.log
141
+ STDOUT.reopen self.log, "a"
142
+ else
143
+ STDOUT.reopen "/dev/null", "a"
144
+ end
74
145
  STDERR.reopen STDOUT
75
- exec command
146
+
147
+ exec command unless command.empty?
76
148
  end
77
149
  puts pid.to_s
78
150
  end