mcproc 2016.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
data/lib/god/metric.rb ADDED
@@ -0,0 +1,87 @@
1
+ module God
2
+ # Metrics are responsible for holding watch conditions. An instance of
3
+ # Metric is yielded to blocks in the start_if, restart_if, stop_if, and
4
+ # transition methods.
5
+ class Metric
6
+ # The Watch.
7
+ attr_accessor :watch
8
+
9
+ # The destination Hash in canonical hash form. Example:
10
+ # { true => :up, false => :restart}
11
+ attr_accessor :destination
12
+
13
+ # The Array of Condition instances.
14
+ attr_accessor :conditions
15
+
16
+ # Initialize a new Metric.
17
+ #
18
+ # watch - The Watch.
19
+ # destination - The optional destination Hash in canonical hash form.
20
+ def initialize(watch, destination = nil)
21
+ self.watch = watch
22
+ self.destination = destination
23
+ self.conditions = []
24
+ end
25
+
26
+ # Public: Instantiate the given Condition and pass it into the optional
27
+ # block. Attributes of the condition must be set in the config file.
28
+ #
29
+ # kind - The Symbol name of the condition.
30
+ #
31
+ # Returns nothing.
32
+ def condition(kind)
33
+ # Create the condition.
34
+ begin
35
+ c = Condition.generate(kind, self.watch)
36
+ rescue NoSuchConditionError => e
37
+ abort e.message
38
+ end
39
+
40
+ # Send to block so config can set attributes.
41
+ yield(c) if block_given?
42
+
43
+ # Prepare the condition.
44
+ c.prepare
45
+
46
+ # Test generic and specific validity.
47
+ unless Condition.valid?(c) && c.valid?
48
+ abort "Exiting on invalid condition"
49
+ end
50
+
51
+ # Inherit interval from watch if no poll condition specific interval was
52
+ # set.
53
+ if c.kind_of?(PollCondition) && !c.interval
54
+ if self.watch.interval
55
+ c.interval = self.watch.interval
56
+ else
57
+ abort "No interval set for Condition '#{c.class.name}' in Watch " +
58
+ "'#{self.watch.name}', and no default Watch interval from " +
59
+ "which to inherit."
60
+ end
61
+ end
62
+
63
+ # Add the condition to the list.
64
+ self.conditions << c
65
+ end
66
+
67
+ # Enable all of this Metric's conditions. Poll conditions will be
68
+ # scheduled and event/trigger conditions will be registered.
69
+ #
70
+ # Returns nothing.
71
+ def enable
72
+ self.conditions.each do |c|
73
+ self.watch.attach(c)
74
+ end
75
+ end
76
+
77
+ # Disable all of this Metric's conditions. Poll conditions will be
78
+ # halted and event/trigger conditions will be deregistered.
79
+ #
80
+ # Returns nothing.
81
+ def disable
82
+ self.conditions.each do |c|
83
+ self.watch.detach(c)
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,381 @@
1
+ module God
2
+ class Process
3
+ WRITES_PID = [:start, :restart]
4
+
5
+ attr_accessor :name, :uid, :gid, :log, :log_cmd, :err_log, :err_log_cmd,
6
+ :start, :stop, :restart, :unix_socket, :chroot, :env, :dir,
7
+ :stop_timeout, :stop_signal, :umask
8
+
9
+ def initialize
10
+ self.log = '/dev/null'
11
+
12
+ @pid_file = nil
13
+ @tracking_pid = true
14
+ @user_log = false
15
+ @pid = nil
16
+ @unix_socket = nil
17
+ @log_cmd = nil
18
+ @stop_timeout = God::STOP_TIMEOUT_DEFAULT
19
+ @stop_signal = God::STOP_SIGNAL_DEFAULT
20
+ end
21
+
22
+ def alive?
23
+ if self.pid
24
+ System::Process.new(self.pid).exists?
25
+ else
26
+ false
27
+ end
28
+ end
29
+
30
+ def file_writable?(file)
31
+ pid = fork do
32
+ begin
33
+ if self.uid
34
+ user_method = self.uid.is_a?(Integer) ? :getpwuid : :getpwnam
35
+ uid_num = Etc.send(user_method, self.uid).uid
36
+ gid_num = Etc.send(user_method, self.uid).gid
37
+ end
38
+ if self.gid
39
+ group_method = self.gid.is_a?(Integer) ? :getgrgid : :getgrnam
40
+ gid_num = Etc.send(group_method, self.gid).gid
41
+ end
42
+
43
+ ::Dir.chroot(self.chroot) if self.chroot
44
+ ::Process.groups = [gid_num] if gid_num
45
+ ::Process.initgroups(self.uid, gid_num) if self.uid && gid_num
46
+ ::Process::Sys.setgid(gid_num) if gid_num
47
+ ::Process::Sys.setuid(uid_num) if self.uid
48
+ rescue ArgumentError, Errno::EPERM, Errno::ENOENT
49
+ exit(1)
50
+ end
51
+
52
+ File.writable?(file_in_chroot(file)) ? exit!(0) : exit!(1)
53
+ end
54
+
55
+ wpid, status = ::Process.waitpid2(pid)
56
+ status.exitstatus == 0 ? true : false
57
+ end
58
+
59
+ def valid?
60
+ # determine if we're tracking pid or not
61
+ self.pid_file
62
+
63
+ valid = true
64
+
65
+ # a start command must be specified
66
+ if self.start.nil?
67
+ valid = false
68
+ applog(self, :error, "No start command was specified")
69
+ end
70
+
71
+ # uid must exist if specified
72
+ if self.uid
73
+ begin
74
+ Etc.getpwnam(self.uid)
75
+ rescue ArgumentError
76
+ valid = false
77
+ applog(self, :error, "UID for '#{self.uid}' does not exist")
78
+ end
79
+ end
80
+
81
+ # gid must exist if specified
82
+ if self.gid
83
+ begin
84
+ Etc.getgrnam(self.gid)
85
+ rescue ArgumentError
86
+ valid = false
87
+ applog(self, :error, "GID for '#{self.gid}' does not exist")
88
+ end
89
+ end
90
+
91
+ # dir must exist and be a directory if specified
92
+ if self.dir
93
+ if !File.exist?(self.dir)
94
+ valid = false
95
+ applog(self, :error, "Specified directory '#{self.dir}' does not exist")
96
+ elsif !File.directory?(self.dir)
97
+ valid = false
98
+ applog(self, :error, "Specified directory '#{self.dir}' is not a directory")
99
+ end
100
+ end
101
+
102
+ # pid dir must exist if specified
103
+ if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
104
+ valid = false
105
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
106
+ end
107
+
108
+ # pid dir must be writable if specified
109
+ if !@tracking_pid && File.exist?(File.dirname(self.pid_file)) && !file_writable?(File.dirname(self.pid_file))
110
+ valid = false
111
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
112
+ end
113
+
114
+ # log dir must exist
115
+ if !File.exist?(File.dirname(self.log))
116
+ valid = false
117
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
118
+ end
119
+
120
+ # log file or dir must be writable
121
+ if File.exist?(self.log)
122
+ unless file_writable?(self.log)
123
+ valid = false
124
+ applog(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
125
+ end
126
+ else
127
+ unless file_writable?(File.dirname(self.log))
128
+ valid = false
129
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
130
+ end
131
+ end
132
+
133
+ # chroot directory must exist and have /dev/null in it
134
+ if self.chroot
135
+ if !File.directory?(self.chroot)
136
+ valid = false
137
+ applog(self, :error, "CHROOT directory '#{self.chroot}' does not exist")
138
+ end
139
+
140
+ if !File.exist?(File.join(self.chroot, '/dev/null'))
141
+ valid = false
142
+ applog(self, :error, "CHROOT directory '#{self.chroot}' does not contain '/dev/null'")
143
+ end
144
+ end
145
+
146
+ valid
147
+ end
148
+
149
+ # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
150
+ # No really, trust me. Use the instance variable.
151
+ def pid_file=(value)
152
+ # if value is nil, do the right thing
153
+ if value
154
+ @tracking_pid = false
155
+ else
156
+ @tracking_pid = true
157
+ end
158
+
159
+ @pid_file = value
160
+ end
161
+
162
+ def pid_file
163
+ @pid_file ||= default_pid_file
164
+ end
165
+
166
+ # Fetch the PID from pid_file. If the pid_file does not
167
+ # exist, then use the PID from the last time it was read.
168
+ # If it has never been read, then return nil.
169
+ #
170
+ # Returns Integer(pid) or nil
171
+ def pid
172
+ contents = File.read(self.pid_file).strip rescue ''
173
+ real_pid = contents =~ /^\d+$/ ? contents.to_i : nil
174
+
175
+ if real_pid
176
+ @pid = real_pid
177
+ real_pid
178
+ else
179
+ @pid
180
+ end
181
+ end
182
+
183
+ # Send the given signal to this process.
184
+ #
185
+ # Returns nothing
186
+ def signal(sig)
187
+ sig = sig.to_i if sig.to_i != 0
188
+ applog(self, :info, "#{self.name} sending signal '#{sig}' to pid #{self.pid}")
189
+ ::Process.kill(sig, self.pid) rescue nil
190
+ end
191
+
192
+ def start!
193
+ call_action(:start)
194
+ end
195
+
196
+ def stop!
197
+ call_action(:stop)
198
+ end
199
+
200
+ def restart!
201
+ call_action(:restart)
202
+ end
203
+
204
+ def default_pid_file
205
+ File.join(God.pid_file_directory, "#{self.name}.pid")
206
+ end
207
+
208
+ def call_action(action)
209
+ command = send(action)
210
+
211
+ if action == :stop && command.nil?
212
+ pid = self.pid
213
+ name = self.name
214
+ command = lambda do
215
+ applog(self, :info, "#{self.name} stop: default lambda killer")
216
+
217
+ ::Process.kill(@stop_signal, pid) rescue nil
218
+ applog(self, :info, "#{self.name} sent SIG#{@stop_signal}")
219
+
220
+ # Poll to see if it's dead
221
+ pid_not_found = false
222
+ @stop_timeout.times do
223
+ if pid
224
+ begin
225
+ ::Process.kill(0, pid)
226
+ rescue Errno::ESRCH
227
+ # It died. Good.
228
+ applog(self, :info, "#{self.name} process stopped")
229
+ return
230
+ end
231
+ else
232
+ applog(self, :warn, "#{self.name} pid not found in #{self.pid_file}") unless pid_not_found
233
+ pid_not_found = true
234
+ end
235
+
236
+ sleep 1
237
+ end
238
+
239
+ ::Process.kill('KILL', pid) rescue nil
240
+ applog(self, :warn, "#{self.name} still alive after #{@stop_timeout}s; sent SIGKILL")
241
+ end
242
+ end
243
+
244
+ if command.kind_of?(String)
245
+ pid = nil
246
+
247
+ if [:start, :restart].include?(action) && @tracking_pid
248
+ # double fork god-daemonized processes
249
+ # we don't want to wait for them to finish
250
+ r, w = IO.pipe
251
+ begin
252
+ opid = fork do
253
+ STDOUT.reopen(w)
254
+ r.close
255
+ pid = self.spawn(command)
256
+ puts pid.to_s # send pid back to forker
257
+ exit!(0)
258
+ end
259
+
260
+ ::Process.waitpid(opid, 0)
261
+ w.close
262
+ pid = r.gets.chomp
263
+ ensure
264
+ # make sure the file descriptors get closed no matter what
265
+ r.close rescue nil
266
+ w.close rescue nil
267
+ end
268
+ else
269
+ # single fork self-daemonizing processes
270
+ # we want to wait for them to finish
271
+ pid = self.spawn(command)
272
+ status = ::Process.waitpid2(pid, 0)
273
+ exit_code = status[1] >> 8
274
+
275
+ if exit_code != 0
276
+ applog(self, :warn, "#{self.name} #{action} command exited with non-zero code = #{exit_code}")
277
+ end
278
+
279
+ ensure_stop if action == :stop
280
+ end
281
+
282
+ if @tracking_pid or (@pid_file.nil? and WRITES_PID.include?(action))
283
+ File.open(default_pid_file, 'w') do |f|
284
+ f.write pid
285
+ end
286
+
287
+ @tracking_pid = true
288
+ @pid_file = default_pid_file
289
+ end
290
+ elsif command.kind_of?(Proc)
291
+ # lambda command
292
+ command.call
293
+ else
294
+ raise NotImplementedError
295
+ end
296
+ end
297
+
298
+ # Fork/exec the given command, returns immediately
299
+ # +command+ is the String containing the shell command
300
+ #
301
+ # Returns nothing
302
+ def spawn(command)
303
+ fork do
304
+ File.umask self.umask if self.umask
305
+ uid_num = Etc.getpwnam(self.uid).uid if self.uid
306
+ gid_num = Etc.getgrnam(self.gid).gid if self.gid
307
+ gid_num = Etc.getpwnam(self.uid).gid if self.gid.nil? && self.uid
308
+
309
+ ::Dir.chroot(self.chroot) if self.chroot
310
+ ::Process.setsid
311
+ ::Process.groups = [gid_num] if gid_num
312
+ ::Process.initgroups(self.uid, gid_num) if self.uid && gid_num
313
+ ::Process::Sys.setgid(gid_num) if gid_num
314
+ ::Process::Sys.setuid(uid_num) if self.uid
315
+ self.dir ||= '/'
316
+ Dir.chdir self.dir
317
+ $0 = command
318
+ STDIN.reopen "/dev/null"
319
+ if self.log_cmd
320
+ STDOUT.reopen IO.popen(self.log_cmd, "a")
321
+ else
322
+ STDOUT.reopen file_in_chroot(self.log), "a"
323
+ end
324
+ if err_log_cmd
325
+ STDERR.reopen IO.popen(err_log_cmd, "a")
326
+ elsif err_log && (log_cmd || err_log != log)
327
+ STDERR.reopen file_in_chroot(err_log), "a"
328
+ else
329
+ STDERR.reopen STDOUT
330
+ end
331
+
332
+ # close any other file descriptors
333
+ 3.upto(256){|fd| IO::new(fd).close rescue nil}
334
+
335
+ if self.env && self.env.is_a?(Hash)
336
+ self.env.each do |(key, value)|
337
+ ENV[key] = value.to_s
338
+ end
339
+ end
340
+
341
+ exec command unless command.empty?
342
+ end
343
+ end
344
+
345
+ # Ensure that a stop command actually stops the process. Force kill
346
+ # if necessary.
347
+ #
348
+ # Returns nothing
349
+ def ensure_stop
350
+ applog(self, :warn, "#{self.name} ensuring stop...")
351
+
352
+ unless self.pid
353
+ applog(self, :warn, "#{self.name} stop called but pid is uknown")
354
+ return
355
+ end
356
+
357
+ # Poll to see if it's dead
358
+ @stop_timeout.times do
359
+ begin
360
+ ::Process.kill(0, self.pid)
361
+ rescue Errno::ESRCH
362
+ # It died. Good.
363
+ return
364
+ end
365
+
366
+ sleep 1
367
+ end
368
+
369
+ # last resort
370
+ ::Process.kill('KILL', self.pid) rescue nil
371
+ applog(self, :warn, "#{self.name} still alive after #{@stop_timeout}s; sent SIGKILL")
372
+ end
373
+
374
+ private
375
+ def file_in_chroot(file)
376
+ return file unless self.chroot
377
+
378
+ file.gsub(/^#{Regexp.escape(File.expand_path(self.chroot))}/, '')
379
+ end
380
+ end
381
+ end