mcproc 2016.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
@@ -0,0 +1,66 @@
1
+ module God
2
+ module Conditions
3
+ # Trigger when a process exits.
4
+ #
5
+ # +pid_file+ is the pid file of the process in question. Automatically
6
+ # populated for Watches.
7
+ #
8
+ # Examples
9
+ #
10
+ # # Trigger if process exits (from a Watch).
11
+ # on.condition(:process_exits)
12
+ #
13
+ # # Trigger if process exits (non-Watch).
14
+ # on.condition(:process_exits) do |c|
15
+ # c.pid_file = "/var/run/mongrel.3000.pid"
16
+ # end
17
+ class ProcessExits < EventCondition
18
+ # The String PID file location of the process in question. Automatically
19
+ # populated for Watches.
20
+ attr_accessor :pid_file
21
+
22
+ def initialize
23
+ self.info = "process exited"
24
+ end
25
+
26
+ def valid?
27
+ true
28
+ end
29
+
30
+ def pid
31
+ self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
32
+ end
33
+
34
+ def register
35
+ pid = self.pid
36
+
37
+ begin
38
+ EventHandler.register(pid, :proc_exit) do |extra|
39
+ formatted_extra = extra.size > 0 ? " #{extra.inspect}" : ""
40
+ self.info = "process #{pid} exited#{formatted_extra}"
41
+ self.watch.trigger(self)
42
+ end
43
+
44
+ msg = "#{self.watch.name} registered 'proc_exit' event for pid #{pid}"
45
+ applog(self.watch, :info, msg)
46
+ rescue StandardError
47
+ raise EventRegistrationFailedError.new
48
+ end
49
+ end
50
+
51
+ def deregister
52
+ pid = self.pid
53
+ if pid
54
+ EventHandler.deregister(pid, :proc_exit)
55
+
56
+ msg = "#{self.watch.name} deregistered 'proc_exit' event for pid #{pid}"
57
+ applog(self.watch, :info, msg)
58
+ else
59
+ pid_file_location = self.pid_file || self.watch.pid_file
60
+ applog(self.watch, :error, "#{self.watch.name} could not deregister: no cached PID or PID file #{pid_file_location} (#{self.base_name})")
61
+ end
62
+ end
63
+ end
64
+
65
+ end
66
+ end
@@ -0,0 +1,63 @@
1
+ module God
2
+ module Conditions
3
+ # Trigger when a process is running or not running depending on attributes.
4
+ #
5
+ # Examples
6
+ #
7
+ # # Trigger if process IS NOT running.
8
+ # on.condition(:process_running) do |c|
9
+ # c.running = false
10
+ # end
11
+ #
12
+ # # Trigger if process IS running.
13
+ # on.condition(:process_running) do |c|
14
+ # c.running = true
15
+ # end
16
+ #
17
+ # # Non-Watch Tasks must specify a PID file.
18
+ # on.condition(:process_running) do |c|
19
+ # c.running = false
20
+ # c.pid_file = "/var/run/mongrel.3000.pid"
21
+ # end
22
+ class ProcessRunning < PollCondition
23
+ # Public: The Boolean specifying whether you want to trigger if the
24
+ # process is running (true) or if it is not running (false).
25
+ attr_accessor :running
26
+
27
+ # Public: The String PID file location of the process in question.
28
+ # Automatically populated for Watches.
29
+ attr_accessor :pid_file
30
+
31
+ def pid
32
+ self.pid_file ? File.read(self.pid_file).strip.to_i : self.watch.pid
33
+ end
34
+
35
+ def valid?
36
+ valid = true
37
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.pid_file.nil? && self.watch.pid_file.nil?
38
+ valid &= complain("Attribute 'running' must be specified", self) if self.running.nil?
39
+ valid
40
+ end
41
+
42
+ def test
43
+ self.info = []
44
+
45
+ pid = self.pid
46
+ active = pid && System::Process.new(pid).exists?
47
+
48
+ if (self.running && active)
49
+ self.info.concat(["process is running"])
50
+ true
51
+ elsif (!self.running && !active)
52
+ self.info.concat(["process is not running"])
53
+ true
54
+ else
55
+ if self.running
56
+ self.info.concat(["process is not running"])
57
+ end
58
+ false
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,142 @@
1
+ require 'socket'
2
+ include Socket::Constants
3
+
4
+ module God
5
+ module Conditions
6
+ # Condition Symbol :socket_running
7
+ # Type: Poll
8
+ #
9
+ # Trigger when a TCP or UNIX socket is running or not
10
+ #
11
+ # Parameters
12
+ # Required
13
+ # +family+ is the family of socket: either 'tcp' or 'unix'
14
+ # --one of port or path--
15
+ # +port+ is the port (required if +family+ is 'tcp')
16
+ # +path+ is the path (required if +family+ is 'unix')
17
+ #
18
+ # Optional
19
+ # +responding+ is the boolean specifying whether you want to trigger if the socket is responding (true)
20
+ # or if it is not responding (false) (default false)
21
+ #
22
+ # Examples
23
+ #
24
+ # Trigger if the TCP socket on port 80 is not responding or the connection is refused
25
+ #
26
+ # on.condition(:socket_responding) do |c|
27
+ # c.family = 'tcp'
28
+ # c.port = '80'
29
+ # end
30
+ #
31
+ # Trigger if the socket is not responding or the connection is refused (use alternate compact +socket+ interface)
32
+ #
33
+ # on.condition(:socket_responding) do |c|
34
+ # c.socket = 'tcp:80'
35
+ # end
36
+ #
37
+ # Trigger if the socket is responding
38
+ #
39
+ # on.condition(:socket_responding) do |c|
40
+ # c.socket = 'tcp:80'
41
+ # c.responding = true
42
+ # end
43
+ #
44
+ # Trigger if the socket is not responding or the connection is refused 5 times in a row
45
+ #
46
+ # on.condition(:socket_responding) do |c|
47
+ # c.socket = 'tcp:80'
48
+ # c.times = 5
49
+ # end
50
+ #
51
+ # Trigger if the Unix socket on path '/tmp/sock' is not responding or non-existent
52
+ #
53
+ # on.condition(:socket_responding) do |c|
54
+ # c.family = 'unix'
55
+ # c.path = '/tmp/sock'
56
+ # end
57
+ #
58
+ class SocketResponding < PollCondition
59
+ attr_accessor :family, :addr, :port, :path, :times, :responding
60
+
61
+ def initialize
62
+ super
63
+ # default to tcp on the localhost
64
+ self.family = 'tcp'
65
+ self.addr = '127.0.0.1'
66
+ # Set these to nil/0 values
67
+ self.port = 0
68
+ self.path = nil
69
+ self.responding = false
70
+
71
+ self.times = [1, 1]
72
+ end
73
+
74
+ def prepare
75
+ if self.times.kind_of?(Integer)
76
+ self.times = [self.times, self.times]
77
+ end
78
+
79
+ @timeline = Timeline.new(self.times[1])
80
+ @history = Timeline.new(self.times[1])
81
+ end
82
+
83
+ def reset
84
+ @timeline.clear
85
+ @history.clear
86
+ end
87
+
88
+ def socket=(s)
89
+ components = s.split(':')
90
+ if components.size == 3
91
+ @family,@addr,@port = components
92
+ @port = @port.to_i
93
+ elsif components[0] =~ /^tcp$/
94
+ @family = components[0]
95
+ @port = components[1].to_i
96
+ elsif components[0] =~ /^unix$/
97
+ @family = components[0]
98
+ @path = components[1]
99
+ end
100
+ end
101
+
102
+ def valid?
103
+ valid = true
104
+ if self.family == 'tcp' and @port == 0
105
+ valid &= complain("Attribute 'port' must be specified for tcp sockets", self)
106
+ end
107
+ if self.family == 'unix' and self.path.nil?
108
+ valid &= complain("Attribute 'path' must be specified for unix sockets", self)
109
+ end
110
+ valid = false unless %w{tcp unix}.member?(self.family)
111
+ valid
112
+ end
113
+
114
+ def test
115
+ self.info = []
116
+ if self.family == 'tcp'
117
+ begin
118
+ s = TCPSocket.new(self.addr, self.port)
119
+ rescue SystemCallError
120
+ end
121
+ status = self.responding == !s.nil?
122
+ elsif self.family == 'unix'
123
+ begin
124
+ s = UNIXSocket.new(self.path)
125
+ rescue SystemCallError
126
+ end
127
+ status = self.responding == !s.nil?
128
+ else
129
+ status = false
130
+ end
131
+ @timeline.push(status)
132
+ history = "[" + @timeline.map {|t| t ? '*' : ''}.join(',') + "]"
133
+ if @timeline.select { |x| x }.size >= self.times.first
134
+ self.info = "socket out of bounds #{history}"
135
+ return true
136
+ else
137
+ return false
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,44 @@
1
+ module God
2
+ module Conditions
3
+
4
+ class Tries < PollCondition
5
+ attr_accessor :times, :within
6
+
7
+ def prepare
8
+ @timeline = Timeline.new(self.times)
9
+ end
10
+
11
+ def reset
12
+ @timeline.clear
13
+ end
14
+
15
+ def valid?
16
+ valid = true
17
+ valid &= complain("Attribute 'times' must be specified", self) if self.times.nil?
18
+ valid
19
+ end
20
+
21
+ def test
22
+ @timeline << Time.now
23
+
24
+ concensus = (@timeline.size == self.times)
25
+ duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within
26
+
27
+ if within
28
+ history = "[#{@timeline.size}/#{self.times} within #{(@timeline.last - @timeline.first).to_i}s]"
29
+ else
30
+ history = "[#{@timeline.size}/#{self.times}]"
31
+ end
32
+
33
+ if concensus && duration
34
+ self.info = "tries exceeded #{history}"
35
+ return true
36
+ else
37
+ self.info = "tries within bounds #{history}"
38
+ return false
39
+ end
40
+ end
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,57 @@
1
+ module God
2
+
3
+ module Configurable
4
+ # Override this method in your Configurable (optional)
5
+ #
6
+ # Called once after the Configurable has been sent to the block and attributes have been
7
+ # set. Do any post-processing on attributes here
8
+ def prepare
9
+
10
+ end
11
+
12
+ def reset
13
+
14
+ end
15
+
16
+ # Override this method in your Configurable (optional)
17
+ #
18
+ # Called once during evaluation of the config file. Return true if valid, false otherwise
19
+ #
20
+ # A convenience method 'complain' is available that will print out a message and return false,
21
+ # making it easy to report multiple validation errors:
22
+ #
23
+ # def valid?
24
+ # valid = true
25
+ # valid &= complain("You must specify the 'pid_file' attribute for :memory_usage") if self.pid_file.nil?
26
+ # valid &= complain("You must specify the 'above' attribute for :memory_usage") if self.above.nil?
27
+ # valid
28
+ # end
29
+ def valid?
30
+ true
31
+ end
32
+
33
+ def base_name
34
+ x = 1 # fix for MRI's local scope optimization bug DO NOT REMOVE!
35
+ @base_name ||= self.class.name.split('::').last
36
+ end
37
+
38
+ def friendly_name
39
+ base_name
40
+ end
41
+
42
+ def self.complain(text, c = nil)
43
+ watch = c.watch rescue nil
44
+ msg = ""
45
+ msg += "#{watch.name}: " if watch
46
+ msg += text
47
+ msg += " for #{c.friendly_name}" if c
48
+ applog(watch, :error, msg)
49
+ false
50
+ end
51
+
52
+ def complain(text, c = nil)
53
+ Configurable.complain(text, c)
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,114 @@
1
+ module God
2
+
3
+ class Contact
4
+ include Configurable
5
+
6
+ attr_accessor :name, :group, :info
7
+
8
+ def self.generate(kind)
9
+ sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
10
+ c = God::Contacts.const_get(sym).new
11
+
12
+ unless c.kind_of?(Contact)
13
+ abort "Contact '#{c.class.name}' must subclass God::Contact"
14
+ end
15
+
16
+ c
17
+ rescue NameError
18
+ raise NoSuchContactError.new("No Contact found with the class name God::Contacts::#{sym}")
19
+ end
20
+
21
+ def self.valid?(contact)
22
+ valid = true
23
+ valid &= Configurable.complain("Attribute 'name' must be specified", contact) if contact.name.nil?
24
+ valid
25
+ end
26
+
27
+ def self.defaults
28
+ yield self
29
+ end
30
+
31
+ def arg(name)
32
+ self.instance_variable_get("@#{name}") || self.class.instance_variable_get("@#{name}")
33
+ end
34
+
35
+ # Normalize the given notify specification into canonical form.
36
+ # +spec+ is the notify spec as a String, Array of Strings, or Hash
37
+ #
38
+ # Canonical form looks like:
39
+ # {:contacts => ['fred', 'john'], :priority => '1', :category => 'awesome'}
40
+ # Where :contacts will be present and point to an Array of Strings. Both
41
+ # :priority and :category may not be present but if they are, they will each
42
+ # contain a single String.
43
+ #
44
+ # Returns normalized notify spec
45
+ # Raises ArgumentError on invalid spec (message contains details)
46
+ def self.normalize(spec)
47
+ case spec
48
+ when String
49
+ {:contacts => Array(spec)}
50
+ when Array
51
+ unless spec.select { |x| !x.instance_of?(String) }.empty?
52
+ raise ArgumentError.new("contains non-String elements")
53
+ end
54
+ {:contacts => spec}
55
+ when Hash
56
+ copy = spec.dup
57
+
58
+ # check :contacts
59
+ if contacts = copy.delete(:contacts)
60
+ case contacts
61
+ when String
62
+ # valid
63
+ when Array
64
+ unless contacts.select { |x| !x.instance_of?(String) }.empty?
65
+ raise ArgumentError.new("has a :contacts key containing non-String elements")
66
+ end
67
+ # valid
68
+ else
69
+ raise ArgumentError.new("must have a :contacts key pointing to a String or Array of Strings")
70
+ end
71
+ else
72
+ raise ArgumentError.new("must have a :contacts key")
73
+ end
74
+
75
+ # remove priority and category
76
+ copy.delete(:priority)
77
+ copy.delete(:category)
78
+
79
+ # check for invalid keys
80
+ unless copy.empty?
81
+ raise ArgumentError.new("contains extra elements: #{copy.inspect}")
82
+ end
83
+
84
+ # normalize
85
+ spec[:contacts] &&= Array(spec[:contacts])
86
+ spec[:priority] &&= spec[:priority].to_s
87
+ spec[:category] &&= spec[:category].to_s
88
+
89
+ spec
90
+ else
91
+ raise ArgumentError.new("must be a String (contact name), Array (of contact names), or Hash (contact specification)")
92
+ end
93
+ end
94
+
95
+ # Abstract
96
+ # Send the message to the external source
97
+ # +message+ is the message body returned from the condition
98
+ # +time+ is the Time at which the notification was made
99
+ # +priority+ is the arbitrary priority String
100
+ # +category+ is the arbitrary category String
101
+ # +host+ is the hostname of the server
102
+ def notify(message, time, priority, category, host)
103
+ raise AbstractMethodNotOverriddenError.new("Contact#notify must be overridden in subclasses")
104
+ end
105
+
106
+ # Construct the friendly name of this Contact, looks like:
107
+ #
108
+ # Contact FooBar
109
+ def friendly_name
110
+ super + " Contact '#{self.name}'"
111
+ end
112
+ end
113
+
114
+ end