mcproc 2016.2.20

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
@@ -0,0 +1,66 @@
1
+ module God
2
+ module Conditions
3
+ # Trigger when a process exits.
4
+ #
5
+ # +pid_file+ is the pid file of the process in question. Automatically
6
+ # populated for Watches.
7
+ #
8
+ # Examples
9
+ #
10
+ # # Trigger if process exits (from a Watch).
11
+ # on.condition(:process_exits)
12
+ #
13
+ # # Trigger if process exits (non-Watch).
14
+ # on.condition(:process_exits) do |c|
15
+ # c.pid_file = "/var/run/mongrel.3000.pid"
16
+ # end
17
+ class ProcessExits < EventCondition
18
+ # The String PID file location of the process in question. Automatically
19
+ # populated for Watches.
20
+ attr_accessor :pid_file
21
+
22
+ def initialize
23
+ self.info = "process exited"
24
+ end
25
+
26
+ def valid?
27
+ true
28
+ end
29
+
30
+ def pid
31
+ self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
32
+ end
33
+
34
+ def register
35
+ pid = self.pid
36
+
37
+ begin
38
+ EventHandler.register(pid, :proc_exit) do |extra|
39
+ formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
40
+ self.info = "process #{pid} exited#{formatted_extra}"
41
+ self.watch.trigger(self)
42
+ end
43
+
44
+ msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
45
+ applog(self.watch, :info, msg)
46
+ rescue StandardError
47
+ raise EventRegistrationFailedError.new
48
+ end
49
+ end
50
+
51
+ def deregister
52
+ pid = self.pid
53
+ if pid
54
+ EventHandler.deregister(pid, :proc_exit)
55
+
56
+ msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
57
+ applog(self.watch, :info, msg)
58
+ else
59
+ pid_file_location = self.pid_file || self.watch.pid_file
60
+ applog(self.watch, :error, "#{self.watch.name} could not deregister: no cached PID or PID file #{pid_file_location} (#{self.base_name})")
61
+ end
62
+ end
63
+ end
64
+
65
+ end
66
+ end
@@ -0,0 +1,63 @@
1
+ module God
2
+ module Conditions
3
+ # Trigger when a process is running or not running depending on attributes.
4
+ #
5
+ # Examples
6
+ #
7
+ # # Trigger if process IS NOT running.
8
+ # on.condition(:process_running) do |c|
9
+ # c.running = false
10
+ # end
11
+ #
12
+ # # Trigger if process IS running.
13
+ # on.condition(:process_running) do |c|
14
+ # c.running = true
15
+ # end
16
+ #
17
+ # # Non-Watch Tasks must specify a PID file.
18
+ # on.condition(:process_running) do |c|
19
+ # c.running = false
20
+ # c.pid_file = "/var/run/mongrel.3000.pid"
21
+ # end
22
+ class ProcessRunning < PollCondition
23
+ # Public: The Boolean specifying whether you want to trigger if the
24
+ # process is running (true) or if it is not running (false).
25
+ attr_accessor :running
26
+
27
+ # Public: The String PID file location of the process in question.
28
+ # Automatically populated for Watches.
29
+ attr_accessor :pid_file
30
+
31
+ def pid
32
+ self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
33
+ end
34
+
35
+ def valid?
36
+ valid = true
37
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.pid_file.nil? && self.watch.pid_file.nil?
38
+ valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
39
+ valid
40
+ end
41
+
42
+ def test
43
+ self.info = []
44
+
45
+ pid = self.pid
46
+ active = pid && System::Process.new(pid).exists?
47
+
48
+ if (self.running && active)
49
+ self.info.concat(["process is running"])
50
+ true
51
+ elsif (!self.running && !active)
52
+ self.info.concat(["process is not running"])
53
+ true
54
+ else
55
+ if self.running
56
+ self.info.concat(["process is not running"])
57
+ end
58
+ false
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,142 @@
1
+ require 'socket'
2
+ include Socket::Constants
3
+
4
+ module God
5
+ module Conditions
6
+ # Condition Symbol :socket_running
7
+ # Type: Poll
8
+ #
9
+ # Trigger when a TCP or UNIX socket is running or not
10
+ #
11
+ # Parameters
12
+ # Required
13
+ # +family+ is the family of socket: either 'tcp' or 'unix'
14
+ # --one of port or path--
15
+ # +port+ is the port (required if +family+ is 'tcp')
16
+ # +path+ is the path (required if +family+ is 'unix')
17
+ #
18
+ # Optional
19
+ # +responding+ is the boolean specifying whether you want to trigger if the socket is responding (true)
20
+ # or if it is not responding (false) (default false)
21
+ #
22
+ # Examples
23
+ #
24
+ # Trigger if the TCP socket on port 80 is not responding or the connection is refused
25
+ #
26
+ # on.condition(:socket_responding) do |c|
27
+ # c.family = 'tcp'
28
+ # c.port = '80'
29
+ # end
30
+ #
31
+ # Trigger if the socket is not responding or the connection is refused (use alternate compact +socket+ interface)
32
+ #
33
+ # on.condition(:socket_responding) do |c|
34
+ # c.socket = 'tcp:80'
35
+ # end
36
+ #
37
+ # Trigger if the socket is responding
38
+ #
39
+ # on.condition(:socket_responding) do |c|
40
+ # c.socket = 'tcp:80'
41
+ # c.responding = true
42
+ # end
43
+ #
44
+ # Trigger if the socket is not responding or the connection is refused 5 times in a row
45
+ #
46
+ # on.condition(:socket_responding) do |c|
47
+ # c.socket = 'tcp:80'
48
+ # c.times = 5
49
+ # end
50
+ #
51
+ # Trigger if the Unix socket on path '/tmp/sock' is not responding or non-existent
52
+ #
53
+ # on.condition(:socket_responding) do |c|
54
+ # c.family = 'unix'
55
+ # c.path = '/tmp/sock'
56
+ # end
57
+ #
58
+ class SocketResponding < PollCondition
59
+ attr_accessor :family, :addr, :port, :path, :times, :responding
60
+
61
+ def initialize
62
+ super
63
+ # default to tcp on the localhost
64
+ self.family = 'tcp'
65
+ self.addr = '127.0.0.1'
66
+ # Set these to nil/0 values
67
+ self.port = 0
68
+ self.path = nil
69
+ self.responding = false
70
+
71
+ self.times = [1, 1]
72
+ end
73
+
74
+ def prepare
75
+ if self.times.kind_of?(Integer)
76
+ self.times = [self.times, self.times]
77
+ end
78
+
79
+ @timeline = Timeline.new(self.times[1])
80
+ @history = Timeline.new(self.times[1])
81
+ end
82
+
83
+ def reset
84
+ @timeline.clear
85
+ @history.clear
86
+ end
87
+
88
+ def socket=(s)
89
+ components = s.split(':')
90
+ if components.size == 3
91
+ @family,@addr,@port = components
92
+ @port = @port.to_i
93
+ elsif components[0] =~ /^tcp$/
94
+ @family = components[0]
95
+ @port = components[1].to_i
96
+ elsif components[0] =~ /^unix$/
97
+ @family = components[0]
98
+ @path = components[1]
99
+ end
100
+ end
101
+
102
+ def valid?
103
+ valid = true
104
+ if self.family == 'tcp' and @port == 0
105
+ valid &= complain("Attribute 'port' must be specified for tcp sockets", self)
106
+ end
107
+ if self.family == 'unix' and self.path.nil?
108
+ valid &= complain("Attribute 'path' must be specified for unix sockets", self)
109
+ end
110
+ valid = false unless %w{tcp unix}.member?(self.family)
111
+ valid
112
+ end
113
+
114
+ def test
115
+ self.info = []
116
+ if self.family == 'tcp'
117
+ begin
118
+ s = TCPSocket.new(self.addr, self.port)
119
+ rescue SystemCallError
120
+ end
121
+ status = self.responding == !s.nil?
122
+ elsif self.family == 'unix'
123
+ begin
124
+ s = UNIXSocket.new(self.path)
125
+ rescue SystemCallError
126
+ end
127
+ status = self.responding == !s.nil?
128
+ else
129
+ status = false
130
+ end
131
+ @timeline.push(status)
132
+ history = "[" + @timeline.map {|t| t ? '*' : ''}.join(',') + "]"
133
+ if @timeline.select { |x| x }.size >= self.times.first
134
+ self.info = "socket out of bounds #{history}"
135
+ return true
136
+ else
137
+ return false
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,44 @@
1
+ module God
2
+ module Conditions
3
+
4
+ class Tries < PollCondition
5
+ attr_accessor :times, :within
6
+
7
+ def prepare
8
+ @timeline = Timeline.new(self.times)
9
+ end
10
+
11
+ def reset
12
+ @timeline.clear
13
+ end
14
+
15
+ def valid?
16
+ valid = true
17
+ valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
18
+ valid
19
+ end
20
+
21
+ def test
22
+ @timeline << Time.now
23
+
24
+ concensus = (@timeline.size == self.times)
25
+ duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within
26
+
27
+ if within
28
+ history = "[#{@timeline.size}/#{self.times} within #{(@timeline.last - @timeline.first).to_i}s]"
29
+ else
30
+ history = "[#{@timeline.size}/#{self.times}]"
31
+ end
32
+
33
+ if concensus && duration
34
+ self.info = "tries exceeded #{history}"
35
+ return true
36
+ else
37
+ self.info = "tries within bounds #{history}"
38
+ return false
39
+ end
40
+ end
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,57 @@
1
+ module God
2
+
3
+ module Configurable
4
+ # Override this method in your Configurable (optional)
5
+ #
6
+ # Called once after the Configurable has been sent to the block and attributes have been
7
+ # set. Do any post-processing on attributes here
8
+ def prepare
9
+
10
+ end
11
+
12
+ def reset
13
+
14
+ end
15
+
16
+ # Override this method in your Configurable (optional)
17
+ #
18
+ # Called once during evaluation of the config file. Return true if valid, false otherwise
19
+ #
20
+ # A convenience method 'complain' is available that will print out a message and return false,
21
+ # making it easy to report multiple validation errors:
22
+ #
23
+ # def valid?
24
+ # valid = true
25
+ # valid &= complain("You must specify the 'pid_file' attribute for :memory_usage") if self.pid_file.nil?
26
+ # valid &= complain("You must specify the 'above' attribute for :memory_usage") if self.above.nil?
27
+ # valid
28
+ # end
29
+ def valid?
30
+ true
31
+ end
32
+
33
+ def base_name
34
+ x = 1 # fix for MRI's local scope optimization bug DO NOT REMOVE!
35
+ @base_name ||= self.class.name.split('::').last
36
+ end
37
+
38
+ def friendly_name
39
+ base_name
40
+ end
41
+
42
+ def self.complain(text, c = nil)
43
+ watch = c.watch rescue nil
44
+ msg = ""
45
+ msg += "#{watch.name}: " if watch
46
+ msg += text
47
+ msg += " for #{c.friendly_name}" if c
48
+ applog(watch, :error, msg)
49
+ false
50
+ end
51
+
52
+ def complain(text, c = nil)
53
+ Configurable.complain(text, c)
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,114 @@
1
+ module God
2
+
3
+ class Contact
4
+ include Configurable
5
+
6
+ attr_accessor :name, :group, :info
7
+
8
+ def self.generate(kind)
9
+ sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
10
+ c = God::Contacts.const_get(sym).new
11
+
12
+ unless c.kind_of?(Contact)
13
+ abort "Contact '#{c.class.name}' must subclass God::Contact"
14
+ end
15
+
16
+ c
17
+ rescue NameError
18
+ raise NoSuchContactError.new("No Contact found with the class name God::Contacts::#{sym}")
19
+ end
20
+
21
+ def self.valid?(contact)
22
+ valid = true
23
+ valid &= Configurable.complain("Attribute 'name' must be specified", contact) if contact.name.nil?
24
+ valid
25
+ end
26
+
27
+ def self.defaults
28
+ yield self
29
+ end
30
+
31
+ def arg(name)
32
+ self.instance_variable_get("@#{name}") || self.class.instance_variable_get("@#{name}")
33
+ end
34
+
35
+ # Normalize the given notify specification into canonical form.
36
+ # +spec+ is the notify spec as a String, Array of Strings, or Hash
37
+ #
38
+ # Canonical form looks like:
39
+ # {:contacts => ['fred', 'john'], :priority => '1', :category => 'awesome'}
40
+ # Where :contacts will be present and point to an Array of Strings. Both
41
+ # :priority and :category may not be present but if they are, they will each
42
+ # contain a single String.
43
+ #
44
+ # Returns normalized notify spec
45
+ # Raises ArgumentError on invalid spec (message contains details)
46
+ def self.normalize(spec)
47
+ case spec
48
+ when String
49
+ {:contacts => Array(spec)}
50
+ when Array
51
+ unless spec.select { |x| !x.instance_of?(String) }.empty?
52
+ raise ArgumentError.new("contains non-String elements")
53
+ end
54
+ {:contacts => spec}
55
+ when Hash
56
+ copy = spec.dup
57
+
58
+ # check :contacts
59
+ if contacts = copy.delete(:contacts)
60
+ case contacts
61
+ when String
62
+ # valid
63
+ when Array
64
+ unless contacts.select { |x| !x.instance_of?(String) }.empty?
65
+ raise ArgumentError.new("has a :contacts key containing non-String elements")
66
+ end
67
+ # valid
68
+ else
69
+ raise ArgumentError.new("must have a :contacts key pointing to a String or Array of Strings")
70
+ end
71
+ else
72
+ raise ArgumentError.new("must have a :contacts key")
73
+ end
74
+
75
+ # remove priority and category
76
+ copy.delete(:priority)
77
+ copy.delete(:category)
78
+
79
+ # check for invalid keys
80
+ unless copy.empty?
81
+ raise ArgumentError.new("contains extra elements: #{copy.inspect}")
82
+ end
83
+
84
+ # normalize
85
+ spec[:contacts] &&= Array(spec[:contacts])
86
+ spec[:priority] &&= spec[:priority].to_s
87
+ spec[:category] &&= spec[:category].to_s
88
+
89
+ spec
90
+ else
91
+ raise ArgumentError.new("must be a String (contact name), Array (of contact names), or Hash (contact specification)")
92
+ end
93
+ end
94
+
95
+ # Abstract
96
+ # Send the message to the external source
97
+ # +message+ is the message body returned from the condition
98
+ # +time+ is the Time at which the notification was made
99
+ # +priority+ is the arbitrary priority String
100
+ # +category+ is the arbitrary category String
101
+ # +host+ is the hostname of the server
102
+ def notify(message, time, priority, category, host)
103
+ raise AbstractMethodNotOverriddenError.new("Contact#notify must be overridden in subclasses")
104
+ end
105
+
106
+ # Construct the friendly name of this Contact, looks like:
107
+ #
108
+ # Contact FooBar
109
+ def friendly_name
110
+ super + " Contact '#{self.name}'"
111
+ end
112
+ end
113
+
114
+ end