mcproc 2016.2.20

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
data/lib/god/metric.rb ADDED
@@ -0,0 +1,87 @@
1
+ module God
2
+ # Metrics are responsible for holding watch conditions. An instance of
3
+ # Metric is yielded to blocks in the start_if, restart_if, stop_if, and
4
+ # transition methods.
5
+ class Metric
6
+ # The Watch.
7
+ attr_accessor :watch
8
+
9
+ # The destination Hash in canonical hash form. Example:
10
+ # { true => :up, false => :restart}
11
+ attr_accessor :destination
12
+
13
+ # The Array of Condition instances.
14
+ attr_accessor :conditions
15
+
16
+ # Initialize a new Metric.
17
+ #
18
+ # watch - The Watch.
19
+ # destination - The optional destination Hash in canonical hash form.
20
+ def initialize(watch, destination = nil)
21
+ self.watch = watch
22
+ self.destination = destination
23
+ self.conditions = []
24
+ end
25
+
26
+ # Public: Instantiate the given Condition and pass it into the optional
27
+ # block. Attributes of the condition must be set in the config file.
28
+ #
29
+ # kind - The Symbol name of the condition.
30
+ #
31
+ # Returns nothing.
32
+ def condition(kind)
33
+ # Create the condition.
34
+ begin
35
+ c = Condition.generate(kind, self.watch)
36
+ rescue NoSuchConditionError => e
37
+ abort e.message
38
+ end
39
+
40
+ # Send to block so config can set attributes.
41
+ yield(c) if block_given?
42
+
43
+ # Prepare the condition.
44
+ c.prepare
45
+
46
+ # Test generic and specific validity.
47
+ unless Condition.valid?(c) && c.valid?
48
+ abort "Exiting on invalid condition"
49
+ end
50
+
51
+ # Inherit interval from watch if no poll condition specific interval was
52
+ # set.
53
+ if c.kind_of?(PollCondition) && !c.interval
54
+ if self.watch.interval
55
+ c.interval = self.watch.interval
56
+ else
57
+ abort "No interval set for Condition '#{c.class.name}' in Watch " +
58
+ "'#{self.watch.name}', and no default Watch interval from " +
59
+ "which to inherit."
60
+ end
61
+ end
62
+
63
+ # Add the condition to the list.
64
+ self.conditions << c
65
+ end
66
+
67
+ # Enable all of this Metric's conditions. Poll conditions will be
68
+ # scheduled and event/trigger conditions will be registered.
69
+ #
70
+ # Returns nothing.
71
+ def enable
72
+ self.conditions.each do |c|
73
+ self.watch.attach(c)
74
+ end
75
+ end
76
+
77
+ # Disable all of this Metric's conditions. Poll conditions will be
78
+ # halted and event/trigger conditions will be deregistered.
79
+ #
80
+ # Returns nothing.
81
+ def disable
82
+ self.conditions.each do |c|
83
+ self.watch.detach(c)
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,381 @@
1
+ module God
2
+ class Process
3
+ WRITES_PID = [:start, :restart]
4
+
5
+ attr_accessor :name, :uid, :gid, :log, :log_cmd, :err_log, :err_log_cmd,
6
+ :start, :stop, :restart, :unix_socket, :chroot, :env, :dir,
7
+ :stop_timeout, :stop_signal, :umask
8
+
9
+ def initialize
10
+ self.log = '/dev/null'
11
+
12
+ @pid_file = nil
13
+ @tracking_pid = true
14
+ @user_log = false
15
+ @pid = nil
16
+ @unix_socket = nil
17
+ @log_cmd = nil
18
+ @stop_timeout = God::STOP_TIMEOUT_DEFAULT
19
+ @stop_signal = God::STOP_SIGNAL_DEFAULT
20
+ end
21
+
22
+ def alive?
23
+ if self.pid
24
+ System::Process.new(self.pid).exists?
25
+ else
26
+ false
27
+ end
28
+ end
29
+
30
+ def file_writable?(file)
31
+ pid = fork do
32
+ begin
33
+ if self.uid
34
+ user_method = self.uid.is_a?(Integer) ? :getpwuid : :getpwnam
35
+ uid_num = Etc.send(user_method, self.uid).uid
36
+ gid_num = Etc.send(user_method, self.uid).gid
37
+ end
38
+ if self.gid
39
+ group_method = self.gid.is_a?(Integer) ? :getgrgid : :getgrnam
40
+ gid_num = Etc.send(group_method, self.gid).gid
41
+ end
42
+
43
+ ::Dir.chroot(self.chroot) if self.chroot
44
+ ::Process.groups = [gid_num] if gid_num
45
+ ::Process.initgroups(self.uid, gid_num) if self.uid && gid_num
46
+ ::Process::Sys.setgid(gid_num) if gid_num
47
+ ::Process::Sys.setuid(uid_num) if self.uid
48
+ rescue ArgumentError, Errno::EPERM, Errno::ENOENT
49
+ exit(1)
50
+ end
51
+
52
+ File.writable?(file_in_chroot(file)) ? exit!(0) : exit!(1)
53
+ end
54
+
55
+ wpid, status = ::Process.waitpid2(pid)
56
+ status.exitstatus == 0 ? true : false
57
+ end
58
+
59
+ def valid?
60
+ # determine if we're tracking pid or not
61
+ self.pid_file
62
+
63
+ valid = true
64
+
65
+ # a start command must be specified
66
+ if self.start.nil?
67
+ valid = false
68
+ applog(self, :error, "No start command was specified")
69
+ end
70
+
71
+ # uid must exist if specified
72
+ if self.uid
73
+ begin
74
+ Etc.getpwnam(self.uid)
75
+ rescue ArgumentError
76
+ valid = false
77
+ applog(self, :error, "UID for '#{self.uid}' does not exist")
78
+ end
79
+ end
80
+
81
+ # gid must exist if specified
82
+ if self.gid
83
+ begin
84
+ Etc.getgrnam(self.gid)
85
+ rescue ArgumentError
86
+ valid = false
87
+ applog(self, :error, "GID for '#{self.gid}' does not exist")
88
+ end
89
+ end
90
+
91
+ # dir must exist and be a directory if specified
92
+ if self.dir
93
+ if !File.exist?(self.dir)
94
+ valid = false
95
+ applog(self, :error, "Specified directory '#{self.dir}' does not exist")
96
+ elsif !File.directory?(self.dir)
97
+ valid = false
98
+ applog(self, :error, "Specified directory '#{self.dir}' is not a directory")
99
+ end
100
+ end
101
+
102
+ # pid dir must exist if specified
103
+ if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
104
+ valid = false
105
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
106
+ end
107
+
108
+ # pid dir must be writable if specified
109
+ if !@tracking_pid && File.exist?(File.dirname(self.pid_file)) && !file_writable?(File.dirname(self.pid_file))
110
+ valid = false
111
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
112
+ end
113
+
114
+ # log dir must exist
115
+ if !File.exist?(File.dirname(self.log))
116
+ valid = false
117
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
118
+ end
119
+
120
+ # log file or dir must be writable
121
+ if File.exist?(self.log)
122
+ unless file_writable?(self.log)
123
+ valid = false
124
+ applog(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
125
+ end
126
+ else
127
+ unless file_writable?(File.dirname(self.log))
128
+ valid = false
129
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
130
+ end
131
+ end
132
+
133
+ # chroot directory must exist and have /dev/null in it
134
+ if self.chroot
135
+ if !File.directory?(self.chroot)
136
+ valid = false
137
+ applog(self, :error, "CHROOT directory '#{self.chroot}' does not exist")
138
+ end
139
+
140
+ if !File.exist?(File.join(self.chroot, '/dev/null'))
141
+ valid = false
142
+ applog(self, :error, "CHROOT directory '#{self.chroot}' does not contain '/dev/null'")
143
+ end
144
+ end
145
+
146
+ valid
147
+ end
148
+
149
+ # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
150
+ # No really, trust me. Use the instance variable.
151
+ def pid_file=(value)
152
+ # if value is nil, do the right thing
153
+ if value
154
+ @tracking_pid = false
155
+ else
156
+ @tracking_pid = true
157
+ end
158
+
159
+ @pid_file = value
160
+ end
161
+
162
+ def pid_file
163
+ @pid_file ||= default_pid_file
164
+ end
165
+
166
+ # Fetch the PID from pid_file. If the pid_file does not
167
+ # exist, then use the PID from the last time it was read.
168
+ # If it has never been read, then return nil.
169
+ #
170
+ # Returns Integer(pid) or nil
171
+ def pid
172
+ contents = File.read(self.pid_file).strip rescue ''
173
+ real_pid = contents =~ /^\d+$/ ? contents.to_i : nil
174
+
175
+ if real_pid
176
+ @pid = real_pid
177
+ real_pid
178
+ else
179
+ @pid
180
+ end
181
+ end
182
+
183
+ # Send the given signal to this process.
184
+ #
185
+ # Returns nothing
186
+ def signal(sig)
187
+ sig = sig.to_i if sig.to_i != 0
188
+ applog(self, :info, "#{self.name} sending signal '#{sig}' to pid #{self.pid}")
189
+ ::Process.kill(sig, self.pid) rescue nil
190
+ end
191
+
192
+ def start!
193
+ call_action(:start)
194
+ end
195
+
196
+ def stop!
197
+ call_action(:stop)
198
+ end
199
+
200
+ def restart!
201
+ call_action(:restart)
202
+ end
203
+
204
+ def default_pid_file
205
+ File.join(God.pid_file_directory, "#{self.name}.pid")
206
+ end
207
+
208
+ def call_action(action)
209
+ command = send(action)
210
+
211
+ if action == :stop && command.nil?
212
+ pid = self.pid
213
+ name = self.name
214
+ command = lambda do
215
+ applog(self, :info, "#{self.name} stop: default lambda killer")
216
+
217
+ ::Process.kill(@stop_signal, pid) rescue nil
218
+ applog(self, :info, "#{self.name} sent SIG#{@stop_signal}")
219
+
220
+ # Poll to see if it's dead
221
+ pid_not_found = false
222
+ @stop_timeout.times do
223
+ if pid
224
+ begin
225
+ ::Process.kill(0, pid)
226
+ rescue Errno::ESRCH
227
+ # It died. Good.
228
+ applog(self, :info, "#{self.name} process stopped")
229
+ return
230
+ end
231
+ else
232
+ applog(self, :warn, "#{self.name} pid not found in #{self.pid_file}") unless pid_not_found
233
+ pid_not_found = true
234
+ end
235
+
236
+ sleep 1
237
+ end
238
+
239
+ ::Process.kill('KILL', pid) rescue nil
240
+ applog(self, :warn, "#{self.name} still alive after #{@stop_timeout}s; sent SIGKILL")
241
+ end
242
+ end
243
+
244
+ if command.kind_of?(String)
245
+ pid = nil
246
+
247
+ if [:start, :restart].include?(action) && @tracking_pid
248
+ # double fork god-daemonized processes
249
+ # we don't want to wait for them to finish
250
+ r, w = IO.pipe
251
+ begin
252
+ opid = fork do
253
+ STDOUT.reopen(w)
254
+ r.close
255
+ pid = self.spawn(command)
256
+ puts pid.to_s # send pid back to forker
257
+ exit!(0)
258
+ end
259
+
260
+ ::Process.waitpid(opid, 0)
261
+ w.close
262
+ pid = r.gets.chomp
263
+ ensure
264
+ # make sure the file descriptors get closed no matter what
265
+ r.close rescue nil
266
+ w.close rescue nil
267
+ end
268
+ else
269
+ # single fork self-daemonizing processes
270
+ # we want to wait for them to finish
271
+ pid = self.spawn(command)
272
+ status = ::Process.waitpid2(pid, 0)
273
+ exit_code = status[1] >> 8
274
+
275
+ if exit_code != 0
276
+ applog(self, :warn, "#{self.name} #{action} command exited with non-zero code = #{exit_code}")
277
+ end
278
+
279
+ ensure_stop if action == :stop
280
+ end
281
+
282
+ if @tracking_pid or (@pid_file.nil? and WRITES_PID.include?(action))
283
+ File.open(default_pid_file, 'w') do |f|
284
+ f.write pid
285
+ end
286
+
287
+ @tracking_pid = true
288
+ @pid_file = default_pid_file
289
+ end
290
+ elsif command.kind_of?(Proc)
291
+ # lambda command
292
+ command.call
293
+ else
294
+ raise NotImplementedError
295
+ end
296
+ end
297
+
298
+ # Fork/exec the given command, returns immediately
299
+ # +command+ is the String containing the shell command
300
+ #
301
+ # Returns nothing
302
+ def spawn(command)
303
+ fork do
304
+ File.umask self.umask if self.umask
305
+ uid_num = Etc.getpwnam(self.uid).uid if self.uid
306
+ gid_num = Etc.getgrnam(self.gid).gid if self.gid
307
+ gid_num = Etc.getpwnam(self.uid).gid if self.gid.nil? && self.uid
308
+
309
+ ::Dir.chroot(self.chroot) if self.chroot
310
+ ::Process.setsid
311
+ ::Process.groups = [gid_num] if gid_num
312
+ ::Process.initgroups(self.uid, gid_num) if self.uid && gid_num
313
+ ::Process::Sys.setgid(gid_num) if gid_num
314
+ ::Process::Sys.setuid(uid_num) if self.uid
315
+ self.dir ||= '/'
316
+ Dir.chdir self.dir
317
+ $0 = command
318
+ STDIN.reopen "/dev/null"
319
+ if self.log_cmd
320
+ STDOUT.reopen IO.popen(self.log_cmd, "a")
321
+ else
322
+ STDOUT.reopen file_in_chroot(self.log), "a"
323
+ end
324
+ if err_log_cmd
325
+ STDERR.reopen IO.popen(err_log_cmd, "a")
326
+ elsif err_log && (log_cmd || err_log != log)
327
+ STDERR.reopen file_in_chroot(err_log), "a"
328
+ else
329
+ STDERR.reopen STDOUT
330
+ end
331
+
332
+ # close any other file descriptors
333
+ 3.upto(256){|fd| IO::new(fd).close rescue nil}
334
+
335
+ if self.env && self.env.is_a?(Hash)
336
+ self.env.each do |(key, value)|
337
+ ENV[key] = value.to_s
338
+ end
339
+ end
340
+
341
+ exec command unless command.empty?
342
+ end
343
+ end
344
+
345
+ # Ensure that a stop command actually stops the process. Force kill
346
+ # if necessary.
347
+ #
348
+ # Returns nothing
349
+ def ensure_stop
350
+ applog(self, :warn, "#{self.name} ensuring stop...")
351
+
352
+ unless self.pid
353
+ applog(self, :warn, "#{self.name} stop called but pid is uknown")
354
+ return
355
+ end
356
+
357
+ # Poll to see if it's dead
358
+ @stop_timeout.times do
359
+ begin
360
+ ::Process.kill(0, self.pid)
361
+ rescue Errno::ESRCH
362
+ # It died. Good.
363
+ return
364
+ end
365
+
366
+ sleep 1
367
+ end
368
+
369
+ # last resort
370
+ ::Process.kill('KILL', self.pid) rescue nil
371
+ applog(self, :warn, "#{self.name} still alive after #{@stop_timeout}s; sent SIGKILL")
372
+ end
373
+
374
+ private
375
+ def file_in_chroot(file)
376
+ return file unless self.chroot
377
+
378
+ file.gsub(/^#{Regexp.escape(File.expand_path(self.chroot))}/, '')
379
+ end
380
+ end
381
+ end