mcproc 2016.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2007 Tom Preston-Werner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ McProc: The Ruby Framework for Process Management
2
+ =================================================
3
+
4
+ * Authors: Mario Scondo
5
+ * Website: http://www.linux-support.com/mcproc
6
+ * Issue Tracker: http://support.linux-support.com/projects/mcproc
7
+ * Fork of: https://github.com/mojombo/god.git
8
+
9
+ Description
10
+ -----------
11
+
12
+ This fork will stay compatible with the parent project as long as possible.
13
+ At the moment McProc will work with all of your projects configured
14
+ to run with https://github.com/mojombo/god.git
15
+
16
+ This project is an easy to configure, easy to extend monitoring framework written in
17
+ Ruby.
18
+
19
+ Keeping your server processes and tasks running should be a simple part of
20
+ your deployment process. McProc aims to be the simplest, most powerful monitoring
21
+ application available.
22
+
23
+ The software product is beeing continuously enhanced. It has been heavily tested with
24
+ production systems!
25
+
26
+
27
+ Documentation
28
+ -------------
29
+
30
+ See in-repo documentation at `REPO_ROOT/doc`.
31
+ See online documentation at http://www.linux-support.com/mcproc.
32
+
33
+
34
+ License
35
+ -------
36
+
37
+ See LICENSE file.
data/Rakefile ADDED
@@ -0,0 +1,185 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rdoc/task'
4
+ require 'date'
5
+
6
+ #############################################################################
7
+ #
8
+ # Helper functions
9
+ #
10
+ #############################################################################
11
+
12
+ def name
13
+ @name ||= Dir['*.gemspec'].first.split('.').first
14
+ end
15
+
16
+ def version
17
+ line = File.read("lib/god.rb")[/^\s*VERSION\s*=\s*.*/]
18
+ line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
19
+ end
20
+
21
+ def date
22
+ Date.today.to_s
23
+ end
24
+
25
+ def rubyforge_project
26
+ name
27
+ end
28
+
29
+ def gemspec_file
30
+ "#{name}.gemspec"
31
+ end
32
+
33
+ def gem_file
34
+ "#{name}-#{version}.gem"
35
+ end
36
+
37
+ def replace_header(head, header_name)
38
+ head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
39
+ end
40
+
41
+ #############################################################################
42
+ #
43
+ # Standard tasks
44
+ #
45
+ #############################################################################
46
+
47
+ task :default => :test
48
+
49
+ require 'rake/testtask'
50
+ Rake::TestTask.new(:test) do |test|
51
+ file_list = FileList['test/**/test_*.rb']
52
+ file_list = file_list.exclude("test/test_god_system.rb")
53
+ test.test_files = file_list
54
+ test.libs << 'lib' << 'test'
55
+ test.verbose = true
56
+ end
57
+
58
+ require 'rake/testtask'
59
+ Rake::TestTask.new(:system_test) do |test|
60
+ test.libs << 'lib' << 'test'
61
+ test.pattern = 'test/**/test_god_system.rb'
62
+ test.verbose = true
63
+ end
64
+
65
+ desc "Generate RCov test coverage and open in your browser"
66
+ task :coverage do
67
+ require 'rcov'
68
+ sh "rm -fr coverage"
69
+ sh "rcov test/test_*.rb"
70
+ sh "open coverage/index.html"
71
+ end
72
+
73
+ require 'rdoc/task'
74
+ Rake::RDocTask.new do |rdoc|
75
+ rdoc.rdoc_dir = 'rdoc'
76
+ rdoc.title = "#{name} #{version}"
77
+ rdoc.rdoc_files.include('README*')
78
+ rdoc.rdoc_files.include('lib/**/*.rb')
79
+ end
80
+
81
+ desc "Open an irb session preloaded with this library"
82
+ task :console do
83
+ sh "irb -rubygems -r ./lib/#{name}.rb"
84
+ end
85
+
86
+ #############################################################################
87
+ #
88
+ # Custom tasks (add your own tasks here)
89
+ #
90
+ #############################################################################
91
+
92
+ desc "Generate and view the site locally"
93
+ task :site do
94
+ # Generate the dynamic parts of the site.
95
+ puts "Generating dynamic..."
96
+ require 'asciidoctor'
97
+ content = File.read('./doc/mcproc.asciidoc')
98
+ template = File.read('./site/index.template.html')
99
+ html = Asciidoctor.convert content, safe: :safe
100
+ index = template.sub("{{ content }}", html)
101
+ File.open('./site/index.html', 'w') do |f|
102
+ f.write(index)
103
+ end
104
+
105
+ # works just with Gnome
106
+ puts "Done. Opening in browser..."
107
+ sh "gnome-open site/index.html"
108
+ end
109
+
110
+ desc "Commit the local site to the gh-pages branch and deploy"
111
+ task :site_release do
112
+ # Ensure the gh-pages dir exists so we can generate into it.
113
+ puts "Checking for gh-pages dir..."
114
+ unless File.exist?("./gh-pages")
115
+ puts "No gh-pages directory found. Run the following commands first:"
116
+ puts " `git clone git@github.com:linux-support/mcproc gh-pages"
117
+ puts " `cd gh-pages"
118
+ puts " `git checkout gh-pages`"
119
+ exit(1)
120
+ end
121
+
122
+ # Copy the rest of the site over.
123
+ puts "Copying static..."
124
+ sh "cp -R site/* gh-pages/"
125
+
126
+ # Commit the changes
127
+ sha = `git log`.match(/[a-z0-9]{40}/)[0]
128
+ sh "cd gh-pages && git add . && git commit -m 'Updating to #{sha}.' && git push"
129
+ puts 'Done.'
130
+ end
131
+
132
+ #############################################################################
133
+ #
134
+ # Packaging tasks
135
+ #
136
+ #############################################################################
137
+
138
+ desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
139
+ task :release => :build do
140
+ unless `git branch` =~ /^\* master$/
141
+ puts "You must be on the master branch to release!"
142
+ exit!
143
+ end
144
+ sh "git commit --allow-empty -a -m 'Release #{version}'"
145
+ sh "git tag v#{version}"
146
+ sh "git push origin master"
147
+ sh "git push origin v#{version}"
148
+ sh "gem push pkg/#{name}-#{version}.gem"
149
+ end
150
+
151
+ desc "Build #{gem_file} into the pkg directory"
152
+ task :build => :gemspec do
153
+ sh "mkdir -p pkg"
154
+ sh "gem build #{gemspec_file}"
155
+ sh "mv #{gem_file} pkg"
156
+ end
157
+
158
+ desc "Generate #{gemspec_file}"
159
+ task :gemspec do
160
+ # read spec file and split out manifest section
161
+ spec = File.read(gemspec_file)
162
+ head, manifest, tail = spec.split(" # = MANIFEST =\n")
163
+
164
+ # replace name version and date
165
+ replace_header(head, :name)
166
+ replace_header(head, :version)
167
+ replace_header(head, :date)
168
+ #comment this out if your rubyforge_project has a different name
169
+ replace_header(head, :rubyforge_project)
170
+
171
+ # determine file list from git ls-files
172
+ files = `git ls-files`.
173
+ split("\n").
174
+ sort.
175
+ reject { |file| file =~ /^\./ }.
176
+ reject { |file| file =~ /^(rdoc|pkg|examples|ideas|init|site)/ }.
177
+ map { |file| " #{file}" }.
178
+ join("\n")
179
+
180
+ # piece file back together and write
181
+ manifest = " s.files = %w[\n#{files}\n ]\n"
182
+ spec = [head, manifest, tail].join(" # = MANIFEST =\n")
183
+ File.open(gemspec_file, 'w') { |io| io.write(spec) }
184
+ puts "Updated #{gemspec_file}"
185
+ end
data/TODO.md ADDED
@@ -0,0 +1,37 @@
1
+ McProc Todos
2
+ ============
3
+
4
+ This is the list of upcoming releases and features.
5
+
6
+
7
+ Release 2016.2.20:
8
+
9
+ Change #8578: Clean-up the project
10
+ Change #8579: Setup a website (project home)
11
+ Change #8580: Setup a task tracker
12
+
13
+
14
+ Release 2016.5.20:
15
+
16
+ * add monitoring agent features to report the status of filesystems, applications and other details
17
+ * provied a clear and slim documentation
18
+ * add aliases for some of the misleading commands
19
+ * neue monitoring-optionen
20
+ * extend file_mtime
21
+ * file_content reader (check regex)
22
+ * add a number of extensions to monitor applications
23
+ * add unit tests
24
+
25
+
26
+ Release 2016.10.13:
27
+
28
+ * add HA features to control applications in clouds
29
+ * add monitoring collector (broker) and a web ui
30
+
31
+
32
+ Release 2016.12.20:
33
+
34
+ * add support for limited and 'save' configuration files
35
+ * add a adminitration ui
36
+
37
+
data/bin/mcproc ADDED
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ STDOUT.sync = true
4
+
5
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
6
+
7
+ require 'optparse'
8
+ require 'drb'
9
+ require 'yaml'
10
+
11
+ begin
12
+ # Save ARGV in case someone wants to use it later
13
+ ORIGINAL_ARGV = ARGV.dup
14
+
15
+ options = {:daemonize => true, :port => 17165, :syslog => true, :events => true}
16
+
17
+ opts = OptionParser.new do |opts|
18
+ opts.banner = <<-EOF
19
+ Usage:
20
+ Starting:
21
+ mcproc [-c <config file>] [-p <port> | -b] [-P <file>] [-l <file>] [-D]
22
+
23
+ Querying:
24
+ mcproc <command> <argument> [-p <port>]
25
+ mcproc <command> [-p <port>]
26
+ mcproc -v
27
+ mcproc -V (must be run as root to be accurate on Linux)
28
+
29
+ Commands:
30
+ start <task or group name> start task or group
31
+ restart <task or group name> restart task or group
32
+ stop <task or group name> stop task or group
33
+ monitor <task or group name> monitor task or group
34
+ unmonitor <task or group name> unmonitor task or group
35
+ remove <task or group name> remove task or group from mcproc
36
+ load <file> [action] load a config into a running mcproc
37
+ log <task name> show realtime log for given task
38
+ status [task or group name] show status
39
+ signal <task or group name> <sig> signal all matching tasks
40
+ quit stop mcproc
41
+ terminate stop mcproc and all tasks
42
+ check run self diagnostic
43
+
44
+ Options:
45
+ EOF
46
+
47
+ opts.on("-cCONFIG", "--config-file CONFIG", "Configuration file") do |x|
48
+ options[:config] = x
49
+ end
50
+
51
+ opts.on("-pPORT", "--port PORT", "Communications port (default 17165)") do |x|
52
+ options[:port] = x
53
+ end
54
+
55
+ opts.on("-b", "--auto-bind", "Auto-bind to an unused port number") do
56
+ options[:port] = "0"
57
+ end
58
+
59
+ opts.on("-PFILE", "--pid FILE", "Where to write the PID file") do |x|
60
+ options[:pid] = x
61
+ end
62
+
63
+ opts.on("-lFILE", "--log FILE", "Where to write the log file") do |x|
64
+ options[:log] = x
65
+ end
66
+
67
+ opts.on("-D", "--no-daemonize", "Don't daemonize") do
68
+ options[:daemonize] = false
69
+ end
70
+
71
+ opts.on("-v", "--version", "Print the version number and exit") do
72
+ options[:version] = true
73
+ end
74
+
75
+ opts.on("-V", "Print extended version and build information") do
76
+ options[:info] = true
77
+ end
78
+
79
+ opts.on("--log-level LEVEL", "Log level [debug|info|warn|error|fatal]") do |x|
80
+ options[:log_level] = x.to_sym
81
+ end
82
+
83
+ opts.on("--no-syslog", "Disable output to syslog") do
84
+ options[:syslog] = false
85
+ end
86
+
87
+ opts.on("--attach PID", "Quit mcproc when the attached process dies") do |x|
88
+ options[:attach] = x
89
+ end
90
+
91
+ opts.on("--no-events", "Disable the event system") do
92
+ options[:events] = false
93
+ end
94
+
95
+ opts.on("--bleakhouse", "Enable bleakhouse profiling") do
96
+ options[:bleakhouse] = true
97
+ end
98
+ end
99
+
100
+ opts.parse!
101
+
102
+ # validate
103
+ if options[:log_level] && ![:debug, :info, :warn, :error, :fatal].include?(options[:log_level])
104
+ abort("Invalid log level '#{options[:log_level]}'")
105
+ end
106
+
107
+ # Use this flag to actually load all of the mcproc infrastructure
108
+ $load_god = true
109
+
110
+ # dispatch
111
+ if !options[:config] && options[:version]
112
+ require 'god'
113
+ God::CLI::Version.version
114
+ elsif !options[:config] && options[:info]
115
+ require 'god'
116
+ God::EventHandler.load
117
+ God::CLI::Version.version_extended
118
+ elsif !options[:config] && command = ARGV[0]
119
+ require 'god'
120
+ God::EventHandler.load
121
+ God::CLI::Command.new(command, options, ARGV)
122
+ else
123
+ require 'god/cli/run'
124
+ God::CLI::Run.new(options)
125
+ end
126
+ rescue Exception => e
127
+ if e.instance_of?(SystemExit)
128
+ raise
129
+ else
130
+ puts 'Uncaught exception'
131
+ puts e.message
132
+ puts e.backtrace.join("\n")
133
+ end
134
+ end
@@ -0,0 +1,20 @@
1
+ McProc: The Ruby Framework for Process Management
2
+ ==============================================
3
+ Mario Scondo <mcproc@linux-support.com>
4
+
5
+ McProc is an easy to configure, easy to extend monitoring framework written in
6
+ Ruby.
7
+
8
+ Keeping your server processes and tasks running should be a simple part of
9
+ your deployment process. McProc aims to be the simplest, most powerful monitoring
10
+ application available.
11
+
12
+ Features
13
+ --------
14
+
15
+ * Config file is written in Ruby
16
+ * Easily write your own custom conditions in Ruby
17
+ * Supports both poll and event based conditions
18
+ * Different poll conditions can have different intervals
19
+ * Integrated notification system (write your own too!)
20
+ * Easily control non-daemonizing scripts
@@ -0,0 +1,1592 @@
1
+ Installation
2
+ ------------
3
+
4
+ The best way to get mcproc is via rubygems:
5
+
6
+ ```terminal
7
+ $ [sudo] gem install mcproc
8
+ ```
9
+
10
+ Requirements
11
+ ------------
12
+
13
+ McProc currently only works on *Linux (kernel 2.6.15+), BSD,* and *Darwin*
14
+ systems. MS Windows is not supported, yet. Event based conditions on Linux
15
+ systems require the `cn` (connector) kernel module loaded or compiled into
16
+ the kernel and mcproc must be run as root.
17
+
18
+ The following systems have been tested. Help us test it on others!
19
+
20
+ * Darwin 10.4.10
21
+ * RedHat Fedora 6-15
22
+ * Ubuntu Dapper (no events)
23
+ * Ubuntu Feisty
24
+ * CentOS 4.5 (no events), 5, 6
25
+
26
+
27
+ Quick Start
28
+ -----------
29
+
30
+ Note: this quick start guide requires mcproc 0.12.0 or above. You can check your
31
+ version by running:
32
+
33
+ ```terminal
34
+ $ mcproc --version
35
+ ```
36
+
37
+ The easiest way to understand how mcproc will make your life better is by trying
38
+ out a simple example. To get you up and running quickly, I'll show you how to
39
+ keep a trivial server running.
40
+
41
+ Open up a new directory and write a simple server. Let's call it
42
+ `simple.rb`:
43
+
44
+ ```ruby
45
+ loop do
46
+ puts 'Hello'
47
+ sleep 1
48
+ end
49
+ ```
50
+
51
+ Now we'll write a mcproc config file that tells mcproc about our process. Place it
52
+ in the same directory and call it `simple.mcproc`:
53
+
54
+ ```ruby
55
+ McProc.watch do |w|
56
+ w.name = "simple"
57
+ w.start = "ruby /full/path/to/simple.rb"
58
+ w.keepalive
59
+ end
60
+ ```
61
+
62
+ This is the simplest possible mcproc configuration. We start by declaring a
63
+ `McProc.watch` block. A watch in mcproc represents a process that we want to watch
64
+ and control. Each watch must have, at minimum, a unique name and a command that
65
+ tells mcproc how to start the process. The `keepalive` declaration tells mcproc to
66
+ keep this process alive. If the process is not running when mcproc starts, it will
67
+ be started. If the process dies, it will be restarted.
68
+
69
+ In this example the `simple` process runs foreground, so mcproc will take care of
70
+ daemonizing it and keeping track of the PID for us. When possible, it's best to
71
+ let mcproc daemonize processes for us, that way we don't have to worry about
72
+ specifying and keeping track of PID files. Later on we'll see how to manage
73
+ processes that can't run foreground or that require PID files to be specified.
74
+
75
+ To run mcproc, we give it the configuration file we wrote with `-c`. To see what's
76
+ going on, we can ask it to run foreground with `-D`:
77
+
78
+ ```terminal
79
+ $ mcproc -c path/to/simple.mcproc -D
80
+ ```
81
+
82
+ There are two ways that mcproc can monitor your process. The first and better way
83
+ is with process events. Not every system supports it, but those that do will
84
+ automatically use it. With events, mcproc will know immediately when a process
85
+ exits. For those systems without process event support, mcproc will use a polling
86
+ mechanism. The output you see throughout this section will show both ways.
87
+
88
+ After starting mcproc, you should see some output like the following:
89
+
90
+ ```terminal
91
+ # Events
92
+
93
+ I [2011-12-10 15:24:34] INFO: Loading simple.mcproc
94
+ I [2011-12-10 15:24:34] INFO: Syslog enabled.
95
+ I [2011-12-10 15:24:34] INFO: Using pid file directory: /Users/tom/.god/pids
96
+ I [2011-12-10 15:24:34] INFO: Started on drbunix:///tmp/god.17165.sock
97
+ I [2011-12-10 15:24:34] INFO: simple move 'unmonitored' to 'init'
98
+ I [2011-12-10 15:24:34] INFO: simple moved 'unmonitored' to 'init'
99
+ I [2011-12-10 15:24:34] INFO: simple [trigger] process is not running (ProcessRunning)
100
+ I [2011-12-10 15:24:34] INFO: simple move 'init' to 'start'
101
+ I [2011-12-10 15:24:34] INFO: simple start: ruby /Users/tom/dev/mojombo/mcproc/simple.rb
102
+ I [2011-12-10 15:24:34] INFO: simple moved 'init' to 'start'
103
+ I [2011-12-10 15:24:34] INFO: simple [trigger] process is running (ProcessRunning)
104
+ I [2011-12-10 15:24:34] INFO: simple move 'start' to 'up'
105
+ I [2011-12-10 15:24:34] INFO: simple registered 'proc_exit' event for pid 23298
106
+ I [2011-12-10 15:24:34] INFO: simple moved 'start' to 'up'
107
+
108
+ # Polls
109
+
110
+ I [2011-12-07 09:40:18] INFO: Loading simple.mcproc
111
+ I [2011-12-07 09:40:18] INFO: Syslog enabled.
112
+ I [2011-12-07 09:40:18] INFO: Using pid file directory: /Users/tom/.god/pids
113
+ I [2011-12-07 09:40:18] INFO: Started on drbunix:///tmp/god.17165.sock
114
+ I [2011-12-07 09:40:18] INFO: simple move 'unmonitored' to 'up'
115
+ I [2011-12-07 09:40:18] INFO: simple moved 'unmonitored' to 'up'
116
+ I [2011-12-07 09:40:18] INFO: simple [trigger] process is not running (ProcessRunning)
117
+ I [2011-12-07 09:40:18] INFO: simple move 'up' to 'start'
118
+ I [2011-12-07 09:40:18] INFO: simple start: ruby /Users/tom/dev/mojombo/mcproc/simple.rb
119
+ I [2011-12-07 09:40:19] INFO: simple moved 'up' to 'up'
120
+ I [2011-12-07 09:40:19] INFO: simple [ok] process is running (ProcessRunning)
121
+ I [2011-12-07 09:40:24] INFO: simple [ok] process is running (ProcessRunning)
122
+ I [2011-12-07 09:40:29] INFO: simple [ok] process is running (ProcessRunning)
123
+ ```
124
+
125
+ Here you can see mcproc starting up, noticing that the `simple` process isn't
126
+ running, starting it, and then checking every five seconds to make sure it's
127
+ up. If you'd like to see mcproc work its magic, go ahead and kill the `simple`
128
+ process. You should then see something like this:
129
+
130
+ ```terminal
131
+ # Events
132
+
133
+ I [2011-12-10 15:33:38] INFO: simple [trigger] process 23416 exited (ProcessExits)
134
+ I [2011-12-10 15:33:38] INFO: simple move 'up' to 'start'
135
+ I [2011-12-10 15:33:38] INFO: simple deregistered 'proc_exit' event for pid 23416
136
+ I [2011-12-10 15:33:38] INFO: simple start: ruby /Users/tom/dev/mojombo/mcproc/simple.rb
137
+ I [2011-12-10 15:33:38] INFO: simple moved 'up' to 'start'
138
+ I [2011-12-10 15:33:38] INFO: simple [trigger] process is running (ProcessRunning)
139
+ I [2011-12-10 15:33:38] INFO: simple move 'start' to 'up'
140
+ I [2011-12-10 15:33:38] INFO: simple registered 'proc_exit' event for pid 23601
141
+ I [2011-12-10 15:33:38] INFO: simple moved 'start' to 'up'
142
+
143
+ # Polls
144
+
145
+ I [2011-12-07 09:54:59] INFO: simple [ok] process is running (ProcessRunning)
146
+ I [2011-12-07 09:55:04] INFO: simple [ok] process is running (ProcessRunning)
147
+ I [2011-12-07 09:55:09] INFO: simple [trigger] process is not running (ProcessRunning)
148
+ I [2011-12-07 09:55:09] INFO: simple move 'up' to 'start'
149
+ I [2011-12-07 09:55:09] INFO: simple start: ruby /Users/tom/dev/mojombo/mcproc/simple.rb
150
+ I [2011-12-07 09:55:09] INFO: simple moved 'up' to 'up'
151
+ I [2011-12-07 09:55:09] INFO: simple [ok] process is running (ProcessRunning)
152
+ I [2011-12-07 09:55:14] INFO: simple [ok] process is running (ProcessRunning)
153
+ ```
154
+
155
+ While keeping a process up is useful, it would be even better if we could make
156
+ sure our process was behaving well and restart it when resource utilization
157
+ exceeds our specifications. With a few additions, we can easily have our
158
+ process restarted when memory usage or CPU goes above certain limits. Edit
159
+ your `sample.mcproc` config file to look like this:
160
+
161
+ ```ruby
162
+ McProc.watch do |w|
163
+ w.name = "simple"
164
+ w.start = "ruby /full/path/to/simple.rb"
165
+ w.keepalive(:memory_max => 150.megabytes,
166
+ :cpu_max => 50.percent)
167
+ end
168
+ ```
169
+
170
+ Here I've specified a `:memory_max` option to the `keepalive` command. Now if
171
+ the process memory usage goes above 150 megabytes, mcproc will restart it.
172
+ Similarly, by setting the `:cpu_max`, mcproc will restart my process if its CPU
173
+ usage goes over 50%. By default these properties will be checked every 30
174
+ seconds and will be acted upon if there is an overage for three out of any
175
+ five checks. This prevents the process from getting restarted for temporary
176
+ resource spikes.
177
+
178
+ To test this out, modify your `simple.rb` server script to introduce a memory
179
+ leak:
180
+
181
+ ```ruby
182
+ data = ''
183
+ loop do
184
+ puts 'Hello'
185
+ 100000.times { data << 'x' }
186
+ end
187
+ ```
188
+
189
+ Ctrl-C out of the foregrounded mcproc instance. Notice that your current `simple`
190
+ server will continue to run. Start mcproc again with the same command as before.
191
+ Now instead of starting the `simple` process, it will notice that one is
192
+ already running and simply switch to the `up` state.
193
+
194
+ ```terminal
195
+ # Events
196
+
197
+ I [2011-12-10 15:36:00] INFO: Loading simple.mcproc
198
+ I [2011-12-10 15:36:00] INFO: Syslog enabled.
199
+ I [2011-12-10 15:36:00] INFO: Using pid file directory: /Users/tom/.god/pids
200
+ I [2011-12-10 15:36:00] INFO: Started on drbunix:///tmp/god.17165.sock
201
+ I [2011-12-10 15:36:00] INFO: simple move 'unmonitored' to 'init'
202
+ I [2011-12-10 15:36:00] INFO: simple moved 'unmonitored' to 'init'
203
+ I [2011-12-10 15:36:00] INFO: simple [trigger] process is running (ProcessRunning)
204
+ I [2011-12-10 15:36:00] INFO: simple move 'init' to 'up'
205
+ I [2011-12-10 15:36:00] INFO: simple registered 'proc_exit' event for pid 23601
206
+ I [2011-12-10 15:36:00] INFO: simple moved 'init' to 'up'
207
+
208
+ # Polls
209
+
210
+ I [2011-12-07 14:50:46] INFO: Loading simple.mcproc
211
+ I [2011-12-07 14:50:46] INFO: Syslog enabled.
212
+ I [2011-12-07 14:50:46] INFO: Using pid file directory: /Users/tom/.god/pids
213
+ I [2011-12-07 14:50:47] INFO: Started on drbunix:///tmp/god.17165.sock
214
+ I [2011-12-07 14:50:47] INFO: simple move 'unmonitored' to 'up'
215
+ I [2011-12-07 14:50:47] INFO: simple moved 'unmonitored' to 'up'
216
+ I [2011-12-07 14:50:47] INFO: simple [ok] process is running (ProcessRunning)
217
+ ```
218
+
219
+ In order to get our new `simple` server running, we can issue a command to mcproc
220
+ to have our process restarted:
221
+
222
+ ```terminal
223
+ $ mcproc restart simple
224
+ ```
225
+
226
+ From the logs you can see mcproc killing and restarting the process:
227
+
228
+ ```terminal
229
+ # Events
230
+
231
+ I [2011-12-10 15:38:13] INFO: simple move 'up' to 'restart'
232
+ I [2011-12-10 15:38:13] INFO: simple deregistered 'proc_exit' event for pid 23601
233
+ I [2011-12-10 15:38:13] INFO: simple stop: default lambda killer
234
+ I [2011-12-10 15:38:13] INFO: simple sent SIGTERM
235
+ I [2011-12-10 15:38:14] INFO: simple process stopped
236
+ I [2011-12-10 15:38:14] INFO: simple start: ruby /Users/tom/dev/mojombo/mcproc/simple.rb
237
+ I [2011-12-10 15:38:14] INFO: simple moved 'up' to 'restart'
238
+ I [2011-12-10 15:38:14] INFO: simple [trigger] process is running (ProcessRunning)
239
+ I [2011-12-10 15:38:14] INFO: simple move 'restart' to 'up'
240
+ I [2011-12-10 15:38:14] INFO: simple registered 'proc_exit' event for pid 23707
241
+ I [2011-12-10 15:38:14] INFO: simple moved 'restart' to 'up'
242
+
243
+ # Polls
244
+
245
+ I [2011-12-07 14:51:13] INFO: simple [ok] process is running (ProcessRunning)
246
+ I [2011-12-07 14:51:13] INFO: simple move 'up' to 'restart'
247
+ I [2011-12-07 14:51:13] INFO: simple stop: default lambda killer
248
+ I [2011-12-07 14:51:13] INFO: simple sent SIGTERM
249
+ I [2011-12-07 14:51:14] INFO: simple process stopped
250
+ I [2011-12-07 14:51:14] INFO: simple start: ruby /Users/tom/dev/mojombo/mcproc/simple.rb
251
+ I [2011-12-07 14:51:14] INFO: simple moved 'up' to 'up'
252
+ I [2011-12-07 14:51:14] INFO: simple [ok] process is running (ProcessRunning)
253
+ ```
254
+
255
+ McProc will now start reporting on memory and CPU utilization of your process:
256
+
257
+ ```terminal
258
+ # Events and Polls
259
+
260
+ I [2011-12-07 14:54:37] INFO: simple [ok] process is running (ProcessRunning)
261
+ I [2011-12-07 14:54:37] INFO: simple [ok] memory within bounds [2032kb] (MemoryUsage)
262
+ I [2011-12-07 14:54:37] INFO: simple [ok] cpu within bounds [0.0%%] (CpuUsage)
263
+ I [2011-12-07 14:54:42] INFO: simple [ok] process is running (ProcessRunning)
264
+ I [2011-12-07 14:54:42] INFO: simple [ok] memory within bounds [2032kb, 13492kb] (MemoryUsage)
265
+ I [2011-12-07 14:54:42] INFO: simple [ok] cpu within bounds [0.0%%, *99.7%%] (CpuUsage)
266
+ I [2011-12-07 14:54:47] INFO: simple [ok] process is running (ProcessRunning)
267
+ I [2011-12-07 14:54:47] INFO: simple [ok] memory within bounds [2032kb, 13492kb, 25568kb] (MemoryUsage)
268
+ I [2011-12-07 14:54:47] INFO: simple [ok] cpu within bounds [0.0%%, *99.7%%, *100.0%%] (CpuUsage)
269
+ I [2011-12-07 14:54:52] INFO: simple [ok] process is running (ProcessRunning)
270
+ I [2011-12-07 14:54:52] INFO: simple [ok] memory within bounds [2032kb, 13492kb, 25568kb, 37556kb] (MemoryUsage)
271
+ I [2011-12-07 14:54:52] INFO: simple [trigger] cpu out of bounds [0.0%%, *99.7%%, *100.0%%, *98.4%%] (CpuUsage)
272
+ I [2011-12-07 14:54:52] INFO: simple move 'up' to 'restart'
273
+ ```
274
+
275
+ On the last line of the above log you can see that CPU usage has gone above
276
+ 50% for three cycles and mcproc will issue a restart operation. McProc will continue
277
+ to monitor the `simple` process for as long as mcproc is running and the process
278
+ is set to be monitored.
279
+
280
+ Now, before you kill the mcproc process, let's kill the `simple` server by asking
281
+ mcproc to stop it for us. In a new terminal, issue the command:
282
+
283
+ ```terminal
284
+ $ mcproc stop simple
285
+ ```
286
+
287
+ You should see the following output:
288
+
289
+ ```terminal
290
+ Sending 'stop' command
291
+
292
+ The following watches were affected:
293
+ simple
294
+ ```
295
+
296
+ And in the foregrounded mcproc terminal window, you'll see the log of what
297
+ happened:
298
+
299
+ ```terminal
300
+ # Events
301
+
302
+ I [2011-12-10 15:41:04] INFO: simple stop: default lambda killer
303
+ I [2011-12-10 15:41:04] INFO: simple sent SIGTERM
304
+ I [2011-12-10 15:41:05] INFO: simple process stopped
305
+ I [2011-12-10 15:41:05] INFO: simple move 'up' to 'unmonitored'
306
+ I [2011-12-10 15:41:05] INFO: simple deregistered 'proc_exit' event for pid 23707
307
+ I [2011-12-10 15:41:05] INFO: simple moved 'up' to 'unmonitored'
308
+
309
+ # Polls
310
+
311
+ I [2011-12-07 09:59:59] INFO: simple [ok] process is running (ProcessRunning)
312
+ I [2011-12-07 10:00:04] INFO: simple [ok] process is running (ProcessRunning)
313
+ I [2011-12-07 10:00:07] INFO: simple stop: default lambda killer
314
+ I [2011-12-07 10:00:07] INFO: simple sent SIGTERM
315
+ I [2011-12-07 10:00:08] INFO: simple process stopped
316
+ I [2011-12-07 10:00:08] INFO: simple move 'up' to 'unmonitored'
317
+ I [2011-12-07 10:00:08] INFO: simple moved 'up' to 'unmonitored'
318
+ ```
319
+
320
+ Now feel free to Ctrl-C out of mcproc. Congratulations! You've just taken mcproc for
321
+ a test ride and seen how easy it is to keep your processes running.
322
+
323
+ This is just the beginning of what mcproc can do, and in reality, the `keepalive`
324
+ command is a convenience method written using more advanced transitional and
325
+ condition constructs that may be used directly. You can configure many
326
+ different kinds of conditions to have your process restarted when memory or
327
+ CPU are too high, when disk usage is above a threshold, when a process returns
328
+ an HTTP error code on a specific URL, and many more. In addition you can write
329
+ your own custom conditions and use them in your configuration files. Many
330
+ different lifecycle controls are available alongside a sophisticated and
331
+ extensible notifications system. Keep reading to find out what makes mcproc
332
+ different from other monitoring systems and how it can help you solve many of
333
+ your process monitoring and control problems.
334
+
335
+
336
+ Config Files are Ruby Code!
337
+ ---------------------------
338
+
339
+ Now that you've seen how to get started quickly, let's see how to use the more
340
+ powerful aspects of mcproc. Once again, the best way to learn will be through an
341
+ example. The following configuration file is what I once used at gravatar.com
342
+ to keep the mongrels running:
343
+
344
+ ```ruby
345
+ RAILS_ROOT = "/Users/tom/dev/gravatar2"
346
+
347
+ %w{8200 8201 8202}.each do |port|
348
+ McProc.watch do |w|
349
+ w.name = "gravatar2-mongrel-#{port}"
350
+
351
+ w.start = "mongrel_rails start -c #{RAILS_ROOT} -p #{port} \
352
+ -P #{RAILS_ROOT}/log/mongrel.#{port}.pid -d"
353
+ w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
354
+ w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
355
+
356
+ w.pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
357
+
358
+ w.behavior(:clean_pid_file)
359
+
360
+ w.start_if do |start|
361
+ start.condition(:process_running) do |c|
362
+ c.interval = 5.seconds
363
+ c.running = false
364
+ end
365
+ end
366
+
367
+ w.restart_if do |restart|
368
+ restart.condition(:memory_usage) do |c|
369
+ c.above = 150.megabytes
370
+ c.times = [3, 5] # 3 out of 5 intervals
371
+ end
372
+
373
+ restart.condition(:cpu_usage) do |c|
374
+ c.above = 50.percent
375
+ c.times = 5
376
+ end
377
+ end
378
+
379
+ # lifecycle
380
+ w.lifecycle do |on|
381
+ on.condition(:flapping) do |c|
382
+ c.to_state = [:start, :restart]
383
+ c.times = 5
384
+ c.within = 5.minute
385
+ c.transition = :unmonitored
386
+ c.retry_in = 10.minutes
387
+ c.retry_times = 5
388
+ c.retry_within = 2.hours
389
+ end
390
+ end
391
+ end
392
+ end
393
+ ```
394
+
395
+ That's a lot to take in at once, so I'll break it down by section and explain
396
+ what's going on in each.
397
+
398
+ ```ruby
399
+ RAILS_ROOT = "/var/www/gravatar2/current"
400
+ ```
401
+
402
+ Here I've set a constant that is used throughout the file. Keeping the
403
+ `RAILS_ROOT` value in a constant makes it easy to adapt this script to other
404
+ applications. Because the config file is Ruby code, I can set whatever
405
+ variables or constants I want that make the configuration more concise and
406
+ easier to work with.
407
+
408
+ ```ruby
409
+ %w{8200 8201 8202}.each do |port|
410
+ ...
411
+ end
412
+ ```
413
+
414
+ Because the config file is written in actual Ruby code, we can construct loops
415
+ and do other intelligent things that are impossible in your every day, run of
416
+ the mill config file. I need to watch three mongrels, so I simply loop over
417
+ their port numbers, eliminating duplication and making my life a whole lot
418
+ easier.
419
+
420
+ ```ruby
421
+ McProc.watch do |w|
422
+ w.name = "gravatar2-mongrel-#{port}"
423
+
424
+ w.start = "mongrel_rails start -c #{RAILS_ROOT} -p #{port} \
425
+ -P #{RAILS_ROOT}/log/mongrel.#{port}.pid -d"
426
+ w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
427
+ w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.#{port}.pid"
428
+
429
+ w.pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid")
430
+
431
+ ...
432
+ end
433
+ ```
434
+
435
+ A `watch` represents a single process that has concrete start, stop, and/or
436
+ restart operations. You can define as many watches as you like. In the example
437
+ above, I've got some Rails instances running in Mongrels that I need to keep
438
+ alive. Every watch must have a unique `name` so that it can be identified
439
+ later on. The `start` and `stop` attributes specify the commands to start
440
+ and stop the process. If no `restart` attribute is set, restart will be
441
+ represented by a call to stop followed by a call to start. The
442
+ optional `grace` attribute sets the amount of time following a
443
+ start/stop/restart command to wait before resuming normal monitoring
444
+ operations. If the process you're watching runs as a daemon (as
445
+ mine does), you'll need to set the `pid_file` attribute.
446
+
447
+ ```ruby
448
+ w.behavior(:clean_pid_file)
449
+ ```
450
+
451
+ Behaviors allow you to execute additional commands around start/stop/restart
452
+ commands. In our case, if the process dies it will leave a PID file behind.
453
+ The next time a start command is issued, it will fail, complaining about the
454
+ leftover PID file. We'd like the PID file cleaned up before a start command is
455
+ issued. The built-in behavior `clean_pid_file` will do just that.
456
+
457
+ ```ruby
458
+ w.start_if do |start|
459
+ start.condition(:process_running) do |c|
460
+ c.interval = 5.seconds
461
+ c.running = false
462
+ end
463
+ end
464
+ ```
465
+
466
+ Watches contain conditions grouped by the action to execute should they return
467
+ `true`. I start with a `start_if` block that contains a single condition.
468
+ Conditions are specified by calling `condition` with an identifier, in this
469
+ case `:process_running`. Each condition can specify a poll interval that will
470
+ override the default watch interval. In this case, I want to check that the
471
+ process is still running every 5 seconds instead of the 30 second interval
472
+ that other conditions will inherit. The ability to set condition specific poll
473
+ intervals makes it possible to run critical tests (such as :process_running)
474
+ more often than less critical tests (such as :memory_usage and :cpu_usage).
475
+
476
+ ```ruby
477
+ w.restart_if do |restart|
478
+ restart.condition(:memory_usage) do |c|
479
+ c.above = 150.megabytes
480
+ c.times = [3, 5] # 3 out of 5 intervals
481
+ end
482
+
483
+ ...
484
+ end
485
+ ```
486
+
487
+ Similar to `start_if` there is a `restart_if` command that groups conditions
488
+ that should trigger a restart. The `memory_usage` condition will fail if the
489
+ specified process is using too much memory. The maximum allowable amount of
490
+ memory is specified with the `above` attribute (you can use the `kilobytes`,
491
+ `megabytes`, or `gigabytes` helpers). The number of times the test needs to
492
+ fail in order to trigger a restart is set with `times`. This can be either an
493
+ integer or an array. An integer means it must fail that many times in a row
494
+ while an array `[x, y]` means it must fail `x` times out of the last `y`
495
+ tests.
496
+
497
+ ```ruby
498
+ w.restart_if do |restart|
499
+ ...
500
+
501
+ restart.condition(:cpu_usage) do |c|
502
+ c.above = 50.percent
503
+ c.times = 5
504
+ end
505
+ end
506
+ ```
507
+
508
+ To keep an eye on CPU usage, I've employed the `cpu_usage` condition. When CPU
509
+ usage for a Mongrel process is over 50% for 5 consecutive intervals, it will
510
+ be restarted.
511
+
512
+ ```ruby
513
+ w.lifecycle do |on|
514
+ on.condition(:flapping) do |c|
515
+ c.to_state = [:start, :restart]
516
+ c.times = 5
517
+ c.within = 5.minute
518
+ c.transition = :unmonitored
519
+ c.retry_in = 10.minutes
520
+ c.retry_times = 5
521
+ c.retry_within = 2.hours
522
+ end
523
+ end
524
+ ```
525
+
526
+ Conditions inside a `lifecycle` section are active as long as the process is being monitored (they live across state changes).
527
+
528
+ The `:flapping` condition guards against the edge case wherein mcproc rapidly
529
+ starts or restarts your application. Things like server configuration changes
530
+ or the unavailability of external services could make it impossible for my
531
+ process to start. In that case, mcproc will try to start my process over and over
532
+ to no avail. The `:flapping` condition provides two levels of giving up on
533
+ flapping processes. If I were to translate the options of the code above, it
534
+ would be something like: If this watch is started or restarted five times
535
+ within 5 minutes, then unmonitor it...then after ten minutes, monitor it
536
+ again to see if it was just a temporary problem; if the process is seen to be
537
+ flapping five times within two hours, then give up completely.
538
+
539
+ That's it!
540
+
541
+ /////////////////////////////////////////////////////////////////////////////
542
+ /////////////////////////////////////////////////////////////////////////////
543
+
544
+ Starting and Controlling McProc
545
+ ----------------------------
546
+
547
+ To start the mcproc monitoring process as a daemon simply run the `mcproc`
548
+ executable passing in the path to the config file (you need to sudo if you're
549
+ using events on Linux or want to use the setuid/setgid functionality):
550
+
551
+ ```terminal
552
+ $ sudo mcproc -c /path/to/config.mcproc
553
+ ```
554
+
555
+ While you're writing your config file, it can be helpful to run mcproc in the
556
+ foreground so you can see the log messages. You can do that with:
557
+
558
+ ```terminal
559
+ $ sudo mcproc -c /path/to/config.mcproc -D
560
+ ```
561
+
562
+ You can start/restart/stop/monitor/unmonitor your Watches with the same
563
+ utility like so:
564
+
565
+ ```terminal
566
+ $ sudo mcproc stop gravatar2-mongrel-8200
567
+ ```
568
+
569
+ /////////////////////////////////////////////////////////////////////////////
570
+ /////////////////////////////////////////////////////////////////////////////
571
+
572
+ Watching Non-Daemon Processes
573
+ -----------------------------
574
+
575
+ Need to watch a script that doesn't have built in daemonization? No problem!
576
+ McProc will daemonize and keep track of your process for you. If you don't
577
+ specify a `pid_file` attribute for a watch, it will be auto-daemonized and a
578
+ PID file will be stored for it in `/var/run/mcproc`.
579
+
580
+
581
+ ```ruby
582
+ McProc.pid_file_directory = '/home/tom/pids'
583
+
584
+ # Watcher that auto-daemonizes and creates the pid file
585
+ McProc.watch do |w|
586
+ w.name = 'mongrel'
587
+ w.pid_file = w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
588
+
589
+ w.start = "mongrel_rails start -P #{RAILS_ROOT}/log/mongrel.pid -d"
590
+
591
+ # ...
592
+ end
593
+
594
+ # Watcher that does not auto-daemonize
595
+ McProc.watch do |w|
596
+ w.name = 'worker'
597
+ # w.pid_file = is not set
598
+
599
+ w.start = "rake resque:worker"
600
+
601
+ # ...
602
+ end
603
+ ```
604
+
605
+
606
+ If you'd rather have the PID file stored in a different location, you can
607
+ set it at the top of your config:
608
+
609
+ ```ruby
610
+ McProc.pid_file_directory = '/home/tom/pids'
611
+ ```
612
+
613
+ The directory you specify must be writable by mcproc.
614
+
615
+
616
+ /////////////////////////////////////////////////////////////////////////////
617
+ /////////////////////////////////////////////////////////////////////////////
618
+
619
+ Grouping Watches
620
+ ----------------
621
+
622
+ Watches can be assigned to groups. These groups can then be controlled
623
+ together from the command line.
624
+
625
+ ```ruby
626
+ McProc.watch do |w|
627
+ ...
628
+
629
+ w.group = 'mongrels'
630
+
631
+ ...
632
+ end
633
+ ```
634
+
635
+ The above configuration now allows you to control the watch (and any others
636
+ that are in the group) with a single command:
637
+
638
+ ```terminal
639
+ $ sudo mcproc stop mongrels
640
+ ```
641
+
642
+ /////////////////////////////////////////////////////////////////////////////
643
+ /////////////////////////////////////////////////////////////////////////////
644
+
645
+ Invoke Commands for all watches
646
+ -------------------------------
647
+
648
+ If you need to invoke a command (e.g. Stop / Start / Restart) on all watches
649
+ you can simply omit the second parameter. For example, to start all watches:
650
+
651
+ ```terminal
652
+ $ sudo mcproc start
653
+ ```
654
+
655
+ /////////////////////////////////////////////////////////////////////////////
656
+ /////////////////////////////////////////////////////////////////////////////
657
+
658
+ Redirecting STDOUT and STDERR of your Process
659
+ ---------------------------------------------
660
+
661
+ By default, the STDOUT stream for your process is redirected to `/dev/null`.
662
+ To get access to this output, you can redirect the stream either to a file or
663
+ to a command.
664
+
665
+ To redirect STDOUT to a file, set the `log` attribute to a file path. The file
666
+ will be written in append mode and created if it does not exist.
667
+
668
+ ```ruby
669
+ McProc.watch do |w|
670
+ ...
671
+
672
+ w.log = '/var/log/myprocess.log'
673
+
674
+ ...
675
+ end
676
+ ```
677
+
678
+ To redirect STDOUT to a command that will be run for you, set the `log_cmd`
679
+ attribute to a command.
680
+
681
+ ```ruby
682
+ McProc.watch do |w|
683
+ ...
684
+
685
+ w.log_cmd = '/usr/bin/logger'
686
+
687
+ ...
688
+ end
689
+ ```
690
+
691
+ By default, STDERR is redirected to STDOUT. You can redirect it to a file or a
692
+ command just like STDOUT by setting the `err_log` or `err_log_cmd` attributes
693
+ respectively.
694
+
695
+ /////////////////////////////////////////////////////////////////////////////
696
+ /////////////////////////////////////////////////////////////////////////////
697
+
698
+ Changing UID/GID for processes
699
+ ------------------------------
700
+
701
+ It is possible to have mcproc run your start/stop/restart commands as a specific
702
+ user/group. This can be done by setting the `uid` and/or `gid` attributes of a
703
+ watch.
704
+
705
+ ```ruby
706
+ McProc.watch do |w|
707
+ ...
708
+
709
+ w.uid = 'tom'
710
+ w.gid = 'devs'
711
+
712
+ ...
713
+ end
714
+ ```
715
+
716
+ This only works for commands specified as a string. Lambda commands are
717
+ unaffected.
718
+
719
+ /////////////////////////////////////////////////////////////////////////////
720
+ /////////////////////////////////////////////////////////////////////////////
721
+
722
+ Setting the Working Directory
723
+ -----------------------------
724
+
725
+ By default, McProc sets the working directory to `/` before running your process.
726
+ You can change this by setting the `dir` attribute on the watch.
727
+
728
+ ```ruby
729
+ McProc.watch do |w|
730
+ ...
731
+
732
+ w.dir = '/var/www/myapp'
733
+
734
+ ...
735
+ end
736
+ ```
737
+
738
+ /////////////////////////////////////////////////////////////////////////////
739
+ /////////////////////////////////////////////////////////////////////////////
740
+
741
+ Setting environment variables
742
+ -----------------------------
743
+
744
+ You can set any number of environment variables you wish via the `env`
745
+ attribute of a watch.
746
+
747
+ ```ruby
748
+ McProc.watch do |w|
749
+ ...
750
+
751
+ w.env = { 'RAILS_ROOT' => "/var/www/myapp",
752
+ 'RAILS_ENV' => "production" }
753
+
754
+ ...
755
+ end
756
+ ```
757
+
758
+ /////////////////////////////////////////////////////////////////////////////
759
+ /////////////////////////////////////////////////////////////////////////////
760
+
761
+ Using chroot to Change the File System Root
762
+ -------------------------------------------
763
+
764
+ If you want your process to run chrooted, simply use the `chroot` attribute on
765
+ the watch. The specified directory must exist and have a `/dev/null`.
766
+
767
+ ```ruby
768
+ McProc.watch do |w|
769
+ ...
770
+
771
+ w.chroot = '/var/myroot'
772
+
773
+ ...
774
+ end
775
+ ```
776
+
777
+ /////////////////////////////////////////////////////////////////////////////
778
+ /////////////////////////////////////////////////////////////////////////////
779
+
780
+ Lambda commands
781
+ ---------------
782
+
783
+ In addition to specifying start/stop/restart commands as strings (to be
784
+ executed via the shell), you can specify a lambda that will be called.
785
+
786
+ ```ruby
787
+ McProc.watch do |w|
788
+ ...
789
+
790
+ w.start = lambda { ENV['APACHE'] ? `apachectl -k graceful` : `lighttpd restart` }
791
+
792
+ ...
793
+ end
794
+ ```
795
+
796
+ /////////////////////////////////////////////////////////////////////////////
797
+ /////////////////////////////////////////////////////////////////////////////
798
+
799
+ Customizing the Default Stop Lambda
800
+ -----------------------------------
801
+
802
+ If you do not provide a stop command, McProc will attempt to stop your process by
803
+ first sending a SIGTERM. It will then wait for ten seconds for the process to
804
+ exit. If after this time it still has not exited, it will be sent a SIGKILL.
805
+ You can customize the stop signal and/or the time to wait for the process to
806
+ exit by setting the `stop_signal` and `stop_timeout` attributes on the watch.
807
+
808
+ ```ruby
809
+ McProc.watch do |w|
810
+ ...
811
+
812
+ w.stop_signal = 'QUIT'
813
+ w.stop_timeout = 20.seconds
814
+
815
+ ...
816
+ end
817
+ ```
818
+
819
+
820
+ /////////////////////////////////////////////////////////////////////////////
821
+ /////////////////////////////////////////////////////////////////////////////
822
+
823
+ Loading Other Config Files
824
+ --------------------------
825
+
826
+ You should feel free to separate your mcproc configs into separate files for
827
+ easier organization. You can load in other configs using Ruby's normal `load`
828
+ method, or use the convenience method `McProc.load` which allows for glob-style
829
+ paths:
830
+
831
+ ```ruby
832
+ # load in all mcproc configs
833
+ McProc.load "/usr/local/conf/*.mcproc"
834
+ ```
835
+
836
+ McProc won't start its monitoring operations until all configurations have been
837
+ loaded.
838
+
839
+ /////////////////////////////////////////////////////////////////////////////
840
+ /////////////////////////////////////////////////////////////////////////////
841
+
842
+ Dynamically Loading Config Files Into an Already Running McProc
843
+ ------------------------------------------------------------
844
+
845
+ McProc allows you to load or reload configurations into an already running
846
+ instance. There are a few things to consider when doing this:
847
+
848
+ * Existng Watches with the same `name` as the incoming Watches will be
849
+ overidden by the new config.
850
+ * All paths must be either absolute or relative to the path from which mcproc was
851
+ started.
852
+
853
+ To load a config into a running mcproc, issue the following command:
854
+
855
+ ```terminal
856
+ $ sudo mcproc load path/to/config.mcproc
857
+ ```
858
+
859
+ Config files that are loaded dynamically can contain anything that a normal
860
+ config file contains, however, global options such as `McProc.pid_file_directory`
861
+ blocks will be ignored (and produce a warning in the logs).
862
+
863
+ /////////////////////////////////////////////////////////////////////////////
864
+ /////////////////////////////////////////////////////////////////////////////
865
+
866
+ Getting Logs for a Single Watch
867
+ -------------------------------
868
+
869
+ Sifting through the mcproc logs for statements specific to a single Watch can be
870
+ frustrating when you have many of them. You can get the realtime logs for a
871
+ single Watch via the command line:
872
+
873
+ ```terminal
874
+ $ sudo mcproc log local-3000
875
+ ```
876
+
877
+ This will display log output for the 'local-3000' Watch and update every
878
+ second with new log messages.
879
+
880
+ You can also supply a shorthand to the log command that will match one of your
881
+ watches. If it happens to match several, the shortest match will be used:
882
+
883
+ ```terminal
884
+ $ sudo mcproc log l3
885
+ ```
886
+
887
+ /////////////////////////////////////////////////////////////////////////////
888
+ /////////////////////////////////////////////////////////////////////////////
889
+
890
+ Notifications
891
+ -------------
892
+
893
+ McProc has an extensible notification framework built in that makes it easy to
894
+ have notifications sent when conditions are triggered. Each notification type
895
+ has a set of configuration parameters that must be set. These parameters may
896
+ be set globally via Contact Defaults or individually via Contact Instances.
897
+
898
+ *Contact Defaults* - Some parameters are unlikely to change on a per-contact
899
+ basis. You should set those parameters via the defaults mechanism.
900
+
901
+ ```ruby
902
+ McProc::Contacts::Email.defaults do |d|
903
+ d.from_email = 'mcproc@example.com'
904
+ d.from_name = 'McProc'
905
+ d.delivery_method = :sendmail
906
+ end
907
+ ```
908
+
909
+ *Contact Instances* - Each contact must have a unique `name` set. You may
910
+ optionally assign each contact to a `group`.
911
+
912
+ ```ruby
913
+ McProc.contact(:email) do |c|
914
+ c.name = 'tom'
915
+ c.group = 'developers'
916
+ c.to_email = 'tom@example.com'
917
+ end
918
+
919
+ McProc.contact(:email) do |c|
920
+ c.name = 'vanpelt'
921
+ c.group = 'developers'
922
+ c.to_email = 'vanpelt@example.com'
923
+ end
924
+
925
+ McProc.contact(:email) do |c|
926
+ c.name = 'kevin'
927
+ c.group = 'developers'
928
+ c.to_email = 'kevin@example.com'
929
+ end
930
+ ```
931
+
932
+ *Condition Attachment* - To have a specific contact notified when a condition
933
+ is triggered, simply set the condition's `notify` attribute to the name of the
934
+ individual contact.
935
+
936
+ ```ruby
937
+ w.transition(:up, :start) do |on|
938
+ on.condition(:process_exits) do |c|
939
+ c.notify = 'tom'
940
+ end
941
+ end
942
+ ```
943
+
944
+ There are two ways to specify that a notification should be sent. The first,
945
+ easier way is shown above. Every condition can take an optional `notify`
946
+ attribute that specifies which contacts should be notified when the condition
947
+ is triggered. The value can be a contact name or contact group *or* an array
948
+ of contact names and/or contact groups.
949
+
950
+ ```ruby
951
+ w.transition(:up, :start) do |on|
952
+ on.condition(:process_exits) do |c|
953
+ c.notify = {:contacts => ['tom', 'developers'], :priority => 1, :category => 'product'}
954
+ end
955
+ end
956
+ ```
957
+
958
+ The second way allows you to specify the `priority` and `category` in addition
959
+ to the contacts. The extra attributes can be arbitrary integers or strings and
960
+ will be passed as-is to the notification subsystem.
961
+
962
+ The above notification will arrive as an email similar to the following.
963
+
964
+ ```
965
+ From: McProc &lt;mcproc@example.com&gt;
966
+ To: tom &lt;tom@example.com&gt;
967
+ Subject: [mcproc] mongrel-8600 [trigger] process exited (ProcessExits)
968
+
969
+ Message: mongrel-8600 [trigger] process exited (ProcessExits)
970
+ Host: candymountain.example.com
971
+ Priority: 1
972
+ Category: product
973
+ ```
974
+
975
+ Available Notification Types
976
+ ----------------------------
977
+
978
+ Campfire
979
+ ~~~~~~~~
980
+
981
+ Send a notice to a Campfire room (http://campfirenow.com).
982
+
983
+ ```ruby
984
+ McProc::Contacts::Campfire.defaults do |d|
985
+ ...
986
+ end
987
+
988
+ McProc.contact(:campfire) do |c|
989
+ ...
990
+ end
991
+ ```
992
+
993
+ ```
994
+ subdomain - The String subdomain of the Campfire account. If your URL is
995
+ "foo.campfirenow.com" then your subdomain is "foo".
996
+ token - The String token used for authentication.
997
+ room - The String room name to which the message should be sent.
998
+ ssl - A Boolean determining whether or not to use SSL
999
+ (default: false).
1000
+ ```
1001
+
1002
+ Hipchat
1003
+ ~~~~~~~~
1004
+
1005
+ Send a notice to a Hipchat room (http://hipchat.com).
1006
+
1007
+ ```ruby
1008
+ McProc::Contacts::Hipchat.defaults do |d|
1009
+ ...
1010
+ end
1011
+
1012
+ McProc.contact(:hipchat) do |c|
1013
+ ...
1014
+ end
1015
+ ```
1016
+
1017
+ ```
1018
+ token - The String token used for authentication.
1019
+ room - The String room name to which the message should be sent.
1020
+ ssl - A Boolean determining whether or not to use SSL
1021
+ (default: false).
1022
+ from - The String representing who the message should be sent as.
1023
+ ```
1024
+
1025
+ NOTE: in Hipchat you must have a token with 'admin' privileges. 'Notification' privileges will not be enough.
1026
+
1027
+ Email
1028
+ ~~~~~
1029
+
1030
+ Send a notice to an email address.
1031
+
1032
+ ```ruby
1033
+ McProc::Contacts::Email.defaults do |d|
1034
+ ...
1035
+ end
1036
+
1037
+ McProc.contact(:email) do |c|
1038
+ ...
1039
+ end
1040
+ ```
1041
+
1042
+ ```
1043
+ to_email - The String email address to which the email will be sent.
1044
+ to_name - The String name corresponding to the recipient.
1045
+ from_email - The String email address from which the email will be sent.
1046
+ from_name - The String name corresponding to the sender.
1047
+ delivery_method - The Symbol delivery method. [ :smtp | :sendmail ]
1048
+ (default: :smtp).
1049
+
1050
+ === SMTP Options (when delivery_method = :smtp) ===
1051
+ server_host - The String hostname of the SMTP server (default: localhost).
1052
+ server_port - The Integer port of the SMTP server (default: 25).
1053
+ server_auth - A Boolean or Symbol, false if no authentication else a symbol
1054
+ for the type of authentication [false | :plain | :login | :cram_md5]
1055
+ (default: false).
1056
+
1057
+ === SMTP Auth Options (when server_auth = true) ===
1058
+ server_domain - The String domain.
1059
+ server_user - The String username.
1060
+ server_password - The String password.
1061
+
1062
+ === Sendmail Options (when delivery_method = :sendmail) ===
1063
+ sendmail_path - The String path to the sendmail executable
1064
+ (default: "/usr/sbin/sendmail").
1065
+ sendmail_args - The String args to send to sendmail (default "-i -t").
1066
+ ```
1067
+
1068
+ Jabber
1069
+ ~~~~~~
1070
+
1071
+ Send a notice to a Jabber address (http://jabber.org/).
1072
+
1073
+ Google Mail addresses should work. If you need a non-Gmail address, you can
1074
+ sign up for one at http://register.jabber.org/.
1075
+
1076
+ ```ruby
1077
+ McProc::Contacts::Jabber.defaults do |d|
1078
+ ...
1079
+ end
1080
+
1081
+ McProc.contact(:jabber) do |c|
1082
+ ...
1083
+ end
1084
+ ```
1085
+
1086
+ ```
1087
+ host - The String hostname of the Jabber server.
1088
+ port - The Integer port of the Jabber server.
1089
+ from_jid - The String Jabber ID of the sender.
1090
+ password - The String password of the sender.
1091
+ to_jid - The String Jabber ID of the recipient.
1092
+ subject - The String subject of the message (default: "McProc Notification").
1093
+ ```
1094
+
1095
+ Prowl
1096
+ ~~~~~
1097
+
1098
+ Send a notice to Prowl (http://prowl.weks.net/).
1099
+
1100
+ ```ruby
1101
+ McProc::Contacts::Prowl.defaults do |d|
1102
+ ...
1103
+ end
1104
+
1105
+ McProc.contact(:prowl) do |c|
1106
+ ...
1107
+ end
1108
+ ```
1109
+
1110
+ ```
1111
+ apikey - The String API key.
1112
+ ```
1113
+
1114
+ Scout
1115
+ ~~~~~
1116
+
1117
+ Send a notice to Scout (http://scoutapp.com/).
1118
+
1119
+ ```ruby
1120
+ McProc::Contacts::Scout.defaults do |d|
1121
+ ...
1122
+ end
1123
+
1124
+ McProc.contact(:scout) do |c|
1125
+ ...
1126
+ end
1127
+ ```
1128
+
1129
+ ```
1130
+ client_key - The String client key.
1131
+ plugin_id - The String plugin id.
1132
+
1133
+ ```
1134
+
1135
+ Twitter
1136
+ ~~~~~~~
1137
+
1138
+ Send a notice to a Twitter account (http://twitter.com/).
1139
+
1140
+ In order to use the Twitter notification, you will need to authorize McProc via
1141
+ OAuth and then get the OAuth token and secret for your account. The easiest
1142
+ way to do this is with a Ruby gem called `twurl`. Install it like so:
1143
+
1144
+ ```terminal
1145
+ [sudo] gem install twurl
1146
+ ```
1147
+
1148
+ Then, run the following:
1149
+
1150
+ ```terminal
1151
+ twurl auth --consumer-key gOhjax6s0L3mLeaTtBWPw \
1152
+ --consumer-secret yz4gpAVXJHKxvsGK85tEyzQJ7o2FEy27H1KEWL75jfA
1153
+ ```
1154
+
1155
+ This will return a URL. Copy it to your clipboard. Make sure you are logged
1156
+ into Twitter with the account that will used for the notifications, and then
1157
+ paste the URL into a new browser window. At the end of the authentication
1158
+ process, you will be given a PIN. Copy this PIN and paste it back to the
1159
+ command line prompt. Once this is complete, you need to find your access token
1160
+ and secret:
1161
+
1162
+ ```terminal
1163
+ cat ~/.twurlrc
1164
+ ```
1165
+
1166
+ This will output the contents of the config file from which you can grab your
1167
+ access token and secret:
1168
+
1169
+ ```
1170
+ ---
1171
+ profiles:
1172
+ mojombo:
1173
+ gOhjax6s0L3mLeaTtBWPw:
1174
+ [red]token: 17376380-KXA91nCrgaQ4HxUXMmZtM38gB56qS3hx1NYbjT6mQ
1175
+ consumer_key: gOhjax6s0L3mLeaTtBWPw
1176
+ username: mojombo
1177
+ consumer_secret: yz4gpAVXJHKxvsGK85tEyzQJ7o2FEy27H1KEWL75jfA
1178
+ [red]secret: EBWFQBCtuMwCDeU4OXlc3LwGyY8OdWAV0Jg5KVB0
1179
+ configuration:
1180
+ default_profile:
1181
+ - mojombo
1182
+ - gOhjax6s0L3mLeaTtBWPw
1183
+
1184
+ ```
1185
+
1186
+ The access token and secret (highlighted in red above) are what you need to
1187
+ use as parameters to the Twitter notification.
1188
+
1189
+ ```ruby
1190
+ McProc::Contacts::Twitter.defaults do |d|
1191
+ ...
1192
+ end
1193
+
1194
+ McProc.contact(:twitter) do |c|
1195
+ ...
1196
+ end
1197
+ ```
1198
+
1199
+ ```
1200
+ consumer_token - The String OAuth consumer token (defaults to McProc's
1201
+ existing consumer token).
1202
+ consumer_secret - The String OAuth consumer secret (defaults to McProc's
1203
+ existing consumer secret).
1204
+ access_token - The String OAuth access token.
1205
+ access_secret - The String OAuth access secret.
1206
+ ```
1207
+
1208
+ Webhook
1209
+ ~~~~~~~
1210
+
1211
+ Send a notice to a webhook (http://www.webhooks.org/).
1212
+
1213
+ ```ruby
1214
+ McProc::Contacts::Webhook.defaults do |d|
1215
+ ...
1216
+ end
1217
+
1218
+ McProc.contact(:webhook) do |c|
1219
+ ...
1220
+ end
1221
+ ```
1222
+
1223
+ ```
1224
+ url - The String webhook URL.
1225
+ format - The Symbol format [ :form | :json ] (default: :form).
1226
+
1227
+ ```
1228
+
1229
+ Airbrake
1230
+ ~~~~~~~
1231
+
1232
+ Send a notice to airbrake (http://airbrake.io/).
1233
+
1234
+ ```ruby
1235
+ McProc::Contacts::Airbrake.defaults do |d|
1236
+ ...
1237
+ end
1238
+
1239
+ McProc.contact(:airbrake) do |c|
1240
+ ...
1241
+ end
1242
+ ```
1243
+
1244
+ ```
1245
+ apikey - The String API key.
1246
+ ```
1247
+
1248
+ Slack
1249
+ ~~~~~
1250
+
1251
+ Send a message to a channel in Slack (https://slack.com/).
1252
+
1253
+ First, set up an Incoming Webhook in your Slack account.
1254
+
1255
+ Then, in your McProc configuration, set the defaults:
1256
+
1257
+ ```ruby
1258
+ McProc::Contacts::Slack.defaults do |d|
1259
+ d.account = "foo"
1260
+ d.token = "abc123abc123abc123"
1261
+ c.notify_channel = true
1262
+ c.format = '%{host} alert: %{message}'
1263
+ end
1264
+ ```
1265
+
1266
+ `account` is the name of your Slack account; if you view slack at
1267
+ "foo.slack.com", then your account is "foo". `token` is from your
1268
+ newly-created webhook, and will be a string of unintelligible
1269
+ characters.
1270
+
1271
+ The `notify_channel` and `format` settings are optional. The first
1272
+ controls whether the message includes `@channel` (sending notifications
1273
+ to everyone in the channel); the second controls how the message is
1274
+ formatted. Acceptable values within the format are `priority`, `host`,
1275
+ `message`, `category`, and `time`.
1276
+
1277
+ Once you've set the defaults, create contacts for the channels that you
1278
+ want to notify. You can create as many as you like, and they'll look
1279
+ something like this:
1280
+
1281
+ ```ruby
1282
+ McProc.contact(:slack) do |c|
1283
+ c.name = '#ops'
1284
+ c.channel = '#ops'
1285
+ end
1286
+ ```
1287
+
1288
+ /////////////////////////////////////////////////////////////////////////////
1289
+ /////////////////////////////////////////////////////////////////////////////
1290
+
1291
+ Advanced Configuration with Transitions and Events
1292
+ --------------------------------------------------
1293
+
1294
+ So far you've been introduced to a simple poll-based config file and seen how
1295
+ to run it. Poll-based monitoring works great for simple things, but falls
1296
+ short for highly critical tasks. McProc has native support for kqueue/netlink
1297
+ events on BSD/Darwin/Linux systems. For instance, instead of using the
1298
+ `process_running` condition to poll for the status of your process, you can
1299
+ use the `process_exits` condition that will be notified *immediately* upon the
1300
+ exit of your process. This means less load on your system and shorter downtime
1301
+ after a crash.
1302
+
1303
+ While the configuration syntax you saw in the previous example is very simple,
1304
+ it lacks the power that we need to deal with event based monitoring. In fact,
1305
+ the `start_if` and `restart_if` methods are really just calling out to a
1306
+ lower-level API. If we use the low-level API directly, we can harness the full
1307
+ power of mcproc's event based lifecycle system. Let's look at another example
1308
+ config file.
1309
+
1310
+ ```ruby
1311
+ RAILS_ROOT = "/Users/tom/dev/gravatar2"
1312
+
1313
+ McProc.watch do |w|
1314
+ w.name = "local-3000"
1315
+
1316
+ w.start = "mongrel_rails start -c #{RAILS_ROOT} -P #{RAILS_ROOT}/log/mongrel.pid -p 3000 -d"
1317
+ w.stop = "mongrel_rails stop -P #{RAILS_ROOT}/log/mongrel.pid"
1318
+ w.restart = "mongrel_rails restart -P #{RAILS_ROOT}/log/mongrel.pid"
1319
+
1320
+ w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid")
1321
+
1322
+ # clean pid files before start if necessary
1323
+ w.behavior(:clean_pid_file)
1324
+
1325
+ # determine the state on startup
1326
+ w.transition(:init, { true => :up, false => :start }) do |on|
1327
+ on.condition(:process_running) do |c|
1328
+ c.running = true
1329
+ end
1330
+ end
1331
+
1332
+ # determine when process has finished starting
1333
+ w.transition([:start, :restart], :up) do |on|
1334
+ on.condition(:process_running) do |c|
1335
+ c.running = true
1336
+ end
1337
+
1338
+ # failsafe
1339
+ on.condition(:tries) do |c|
1340
+ c.times = 5
1341
+ c.transition = :start
1342
+ end
1343
+ end
1344
+
1345
+ # start if process is not running
1346
+ w.transition(:up, :start) do |on|
1347
+ on.condition(:process_exits)
1348
+ end
1349
+
1350
+ # restart if memory or cpu is too high
1351
+ w.transition(:up, :restart) do |on|
1352
+ on.condition(:memory_usage) do |c|
1353
+ c.interval = 20
1354
+ c.above = 50.megabytes
1355
+ c.times = [3, 5]
1356
+ end
1357
+
1358
+ on.condition(:cpu_usage) do |c|
1359
+ c.interval = 10
1360
+ c.above = 10.percent
1361
+ c.times = [3, 5]
1362
+ end
1363
+ end
1364
+
1365
+ # lifecycle
1366
+ w.lifecycle do |on|
1367
+ on.condition(:flapping) do |c|
1368
+ c.to_state = [:start, :restart]
1369
+ c.times = 5
1370
+ c.within = 5.minute
1371
+ c.transition = :unmonitored
1372
+ c.retry_in = 10.minutes
1373
+ c.retry_times = 5
1374
+ c.retry_within = 2.hours
1375
+ end
1376
+ end
1377
+ end
1378
+
1379
+ ```
1380
+
1381
+ A bit longer, I know, but very straighforward once you understand how the
1382
+ `transition` calls work. The `name`, `interval`, `start`, `stop`, and
1383
+ `pid_file` attributes should be familiar. We also specify the `clean_pid_file`
1384
+ behavior.
1385
+
1386
+ Before jumping into the code, it's important to understand the different
1387
+ states that a Watch can have, and how that state changes over time. At any
1388
+ given time, a Watch will be in one of the `init`, `up`, `start`, or `restart`
1389
+ states. As different conditions are satisfied, the Watch will progress from
1390
+ state to state, enabling and disabling conditions along the way.
1391
+
1392
+ When mcproc first starts, each Watch is placed in the `init` state.
1393
+
1394
+ You'll use the `transition` method to tell mcproc how to transition between
1395
+ states. It takes two arguments. The first argument may be either a symbol or
1396
+ an array of symbols representing the state or states during which the
1397
+ specified conditions should be enabled. The second argument may be either a
1398
+ symbol or a hash. If it is a symbol, then that is the state that will be
1399
+ transitioned to if any of the conditions return `true`. If it is a hash, then
1400
+ that hash must have both `true` and `false` keys, each of which point to a
1401
+ symbol that represents the state to transition to given the corresponding
1402
+ return from the single condition that must be specified.
1403
+
1404
+ ```ruby
1405
+ # determine the state on startup
1406
+ w.transition(:init, { true => :up, false => :start }) do |on|
1407
+ on.condition(:process_running) do |c|
1408
+ c.running = true
1409
+ end
1410
+ end
1411
+ ```
1412
+
1413
+ The first transition block tells mcproc what to do when the Watch is in the
1414
+ `init` state (first argument). This is where I tell mcproc how to determine if my
1415
+ task is already running. Since I'm monitoring a process, I can use the
1416
+ `process_running` condition to determine whether the process is running. If
1417
+ the process is running, it will return true, otherwise it will return false.
1418
+ Since I sent a hash as the second argument to `transition`, the return from
1419
+ `process_running` will determine which of the two states will be transitioned
1420
+ to. If the process is running, the return is true and mcproc will put the Watch
1421
+ into the `up` state. If the process is not running, the return is false and
1422
+ mcproc will put the Watch into the `start` state.
1423
+
1424
+ ```ruby
1425
+ # determine when process has finished starting
1426
+ w.transition([:start, :restart], :up) do |on|
1427
+ on.condition(:process_running) do |c|
1428
+ c.running = true
1429
+ end
1430
+
1431
+ ...
1432
+ end
1433
+ ```
1434
+
1435
+ If mcproc has determined that my process isn't running, the Watch will be put
1436
+ into the `start` state. Upon entering this state, the `start` command that I
1437
+ specified on the Watch will be called. In addition, the above transition
1438
+ specifies a condition that should be enabled when in either the `start` or
1439
+ `restart` states. The condition is another `process_running`, however this
1440
+ time I'm only interested in moving to another state once it returns `true`. A
1441
+ `true` return from this condition means that the process is running and it's
1442
+ ok to transition to the `up` state (second argument to `transition`).
1443
+
1444
+ ```ruby
1445
+ # determine when process has finished starting
1446
+ w.transition([:start, :restart], :up) do |on|
1447
+ ...
1448
+
1449
+ # failsafe
1450
+ on.condition(:tries) do |c|
1451
+ c.times = 5
1452
+ c.transition = :start
1453
+ end
1454
+ end
1455
+ ```
1456
+
1457
+ The other half of this transition uses the `tries` condition to ensure that
1458
+ mcproc doesn't get stuck in this state. It's possible that the process could go
1459
+ down while the transition is being made, in which case mcproc would end up
1460
+ polling forever to see if the process is up. Here I've specified that if this
1461
+ condition is called five times, mcproc should override the normal transition
1462
+ destination and move to the `start` state instead. If you specify a
1463
+ `transition` attribute on any condition, that state will be transferred to
1464
+ instead of the normal transfer destination.
1465
+
1466
+ ```ruby
1467
+ # start if process is not running
1468
+ w.transition(:up, :start) do |on|
1469
+ on.condition(:process_exits)
1470
+ end
1471
+ ```
1472
+
1473
+ This is where the event based system comes into play. Once in the `up` state,
1474
+ I want to be notified when my process exits. The `process_exits` condition
1475
+ registers a callback that will trigger a transition change when it is fired
1476
+ off. Event conditions (like this one) cannot be used in transitions that have
1477
+ a hash for the second argument (as they do not return true or false).
1478
+
1479
+ ```ruby
1480
+ # restart if memory or cpu is too high
1481
+ w.transition(:up, :restart) do |on|
1482
+ on.condition(:memory_usage) do |c|
1483
+ c.interval = 20
1484
+ c.above = 50.megabytes
1485
+ c.times = [3, 5]
1486
+ end
1487
+
1488
+ on.condition(:cpu_usage) do |c|
1489
+ c.interval = 10
1490
+ c.above = 10.percent
1491
+ c.times = [3, 5]
1492
+ end
1493
+ end
1494
+ ```
1495
+
1496
+ Notice that I can have multiple transitions with the same start state. In this
1497
+ case, I want to have the `memory_usage` and `cpu_usage` poll conditions going
1498
+ at the same time that I listen for the process exit event. In the case of
1499
+ runaway CPU or memory usage, however, I want to transition to the `restart`
1500
+ state. When a Watch enters the `restart` state it will either call the
1501
+ `restart` command that you specified, or if none has been set, call the `stop`
1502
+ and then `start` commands.
1503
+
1504
+
1505
+ /////////////////////////////////////////////////////////////////////////////
1506
+ /////////////////////////////////////////////////////////////////////////////
1507
+
1508
+ Extend McProc with your own Conditions
1509
+ -----------------------------------
1510
+
1511
+ McProc was designed from the start to allow you to easily write your own custom
1512
+ conditions, making it simple to add tests that are application specific.
1513
+
1514
+
1515
+ /////////////////////////////////////////////////////////////////////////////
1516
+ /////////////////////////////////////////////////////////////////////////////
1517
+
1518
+ Contribute
1519
+ ----------
1520
+
1521
+ If you'd like to hack on mcproc itself or contribute fixes or new functionality,
1522
+ read this section.
1523
+
1524
+ The codebase can be found at https://github.com/linux-support/mcproc. To get started,
1525
+ fork mcproc on GitHub into your own account and then pull that down to your local
1526
+ machine. This way you can easily submit changes via Pull Requests later on.
1527
+
1528
+ ```terminal
1529
+ $ git clone git@github.com:yourusername/mcproc
1530
+ ```
1531
+
1532
+ We recommend using link:https://github.com/sstephenson/rbenv[rbenv] and
1533
+ link:https://github.com/sstephenson/ruby-build[ruby-build] to manage multiple
1534
+ versions of Ruby and their separate gemsets. Any changes to mcproc must work on
1535
+ both Ruby 1.8.7-p352 and 1.9.3-p0.
1536
+
1537
+ McProc uses link:http://gembundler.com/[bundler] to deal with development
1538
+ dependencies. Once you have the code locally, you can pull in all the
1539
+ dependencies like so:
1540
+
1541
+ ```terminal
1542
+ $ cd mcproc
1543
+ $ bundle install
1544
+ ```
1545
+
1546
+ In order for process events to function during development you'll need to
1547
+ compile the C extensions:
1548
+
1549
+ ```terminal
1550
+ $ cd ext/god
1551
+ $ ruby extconf.rb
1552
+ $ make
1553
+ $ cd ../..
1554
+ ```
1555
+
1556
+ Now you're ready to run the tests and make sure everything is configured
1557
+ properly. On Linux you'll need to run the tests as root in order for the
1558
+ events system to load. On MacOS there is no need to run the tests as root.
1559
+
1560
+ ```terminal
1561
+ $ [sudo] bundle exec rake
1562
+ ```
1563
+
1564
+ To run your development mcproc to make sure config files and such still work
1565
+ properly, just run:
1566
+
1567
+ ```terminal
1568
+ $ [sudo] bundle exec mcproc -c myconfig.mcproc -D
1569
+ ```
1570
+
1571
+ There are a bunch of example config files for various scenarios in
1572
+ `test/configs` that you can try out. For big new features, it's great to add a
1573
+ new test config showing off the usage of the feature.
1574
+
1575
+ If you intend to contribute your changes back to mcproc core, make sure you create
1576
+ a new branch and do your work there. Then, when your changes are ready to be
1577
+ shared with the world, push them to your fork and issue a Pull Request against
1578
+ linux-support/mcproc. Make sure to describe your changes in detail and add relevant
1579
+ tests.
1580
+
1581
+ Any feature additions or changes should be accompanied by corresponding updates
1582
+ to the documentation. It can be found in the `docs` directory. The
1583
+ documentation is done in link:http://github.com/github/gollum[Gollum] format
1584
+ and then converted into the public site at http://www.linux-support.com/mcproc. To see the
1585
+ generated site locally you'll first need to commit your changes to git and then
1586
+ issue the following:
1587
+
1588
+ ```terminal
1589
+ $ bundle exec rake site
1590
+ ```
1591
+
1592
+ This will open the site in your browser so you can check for correctness.