samhendley-god 0.7.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +293 -0
  2. data/Manifest.txt +114 -0
  3. data/README.txt +60 -0
  4. data/Rakefile +35 -0
  5. data/bin/god +128 -0
  6. data/examples/events.god +84 -0
  7. data/examples/gravatar.god +54 -0
  8. data/examples/single.god +66 -0
  9. data/ext/god/extconf.rb +55 -0
  10. data/ext/god/kqueue_handler.c +123 -0
  11. data/ext/god/netlink_handler.c +167 -0
  12. data/init/god +42 -0
  13. data/lib/god.rb +667 -0
  14. data/lib/god/behavior.rb +52 -0
  15. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  16. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  17. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  18. data/lib/god/cli/command.rb +229 -0
  19. data/lib/god/cli/run.rb +176 -0
  20. data/lib/god/cli/version.rb +23 -0
  21. data/lib/god/condition.rb +96 -0
  22. data/lib/god/conditions/always.rb +23 -0
  23. data/lib/god/conditions/complex.rb +86 -0
  24. data/lib/god/conditions/cpu_usage.rb +80 -0
  25. data/lib/god/conditions/degrading_lambda.rb +52 -0
  26. data/lib/god/conditions/disk_usage.rb +27 -0
  27. data/lib/god/conditions/file_mtime.rb +28 -0
  28. data/lib/god/conditions/flapping.rb +128 -0
  29. data/lib/god/conditions/http_response_code.rb +168 -0
  30. data/lib/god/conditions/lambda.rb +25 -0
  31. data/lib/god/conditions/memory_usage.rb +82 -0
  32. data/lib/god/conditions/process_exits.rb +72 -0
  33. data/lib/god/conditions/process_running.rb +74 -0
  34. data/lib/god/conditions/tries.rb +44 -0
  35. data/lib/god/configurable.rb +57 -0
  36. data/lib/god/contact.rb +106 -0
  37. data/lib/god/contacts/campfire.rb +82 -0
  38. data/lib/god/contacts/email.rb +95 -0
  39. data/lib/god/contacts/jabber.rb +65 -0
  40. data/lib/god/contacts/twitter.rb +39 -0
  41. data/lib/god/contacts/webhook.rb +47 -0
  42. data/lib/god/dependency_graph.rb +41 -0
  43. data/lib/god/diagnostics.rb +37 -0
  44. data/lib/god/driver.rb +206 -0
  45. data/lib/god/errors.rb +24 -0
  46. data/lib/god/event_handler.rb +111 -0
  47. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  48. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  49. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  50. data/lib/god/logger.rb +120 -0
  51. data/lib/god/metric.rb +59 -0
  52. data/lib/god/process.rb +342 -0
  53. data/lib/god/registry.rb +32 -0
  54. data/lib/god/simple_logger.rb +53 -0
  55. data/lib/god/socket.rb +96 -0
  56. data/lib/god/sugar.rb +47 -0
  57. data/lib/god/system/portable_poller.rb +42 -0
  58. data/lib/god/system/process.rb +42 -0
  59. data/lib/god/system/slash_proc_poller.rb +92 -0
  60. data/lib/god/task.rb +491 -0
  61. data/lib/god/timeline.rb +25 -0
  62. data/lib/god/trigger.rb +43 -0
  63. data/lib/god/watch.rb +184 -0
  64. data/test/configs/child_events/child_events.god +44 -0
  65. data/test/configs/child_events/simple_server.rb +3 -0
  66. data/test/configs/child_polls/child_polls.god +37 -0
  67. data/test/configs/child_polls/simple_server.rb +12 -0
  68. data/test/configs/complex/complex.god +59 -0
  69. data/test/configs/complex/simple_server.rb +3 -0
  70. data/test/configs/contact/contact.god +84 -0
  71. data/test/configs/contact/simple_server.rb +3 -0
  72. data/test/configs/daemon_events/daemon_events.god +37 -0
  73. data/test/configs/daemon_events/simple_server.rb +8 -0
  74. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  75. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  76. data/test/configs/daemon_polls/simple_server.rb +6 -0
  77. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  78. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  79. data/test/configs/matias/matias.god +50 -0
  80. data/test/configs/real.rb +59 -0
  81. data/test/configs/running_load/running_load.god +16 -0
  82. data/test/configs/stress/simple_server.rb +3 -0
  83. data/test/configs/stress/stress.god +15 -0
  84. data/test/configs/task/logs/.placeholder +0 -0
  85. data/test/configs/task/task.god +26 -0
  86. data/test/configs/test.rb +61 -0
  87. data/test/helper.rb +151 -0
  88. data/test/suite.rb +6 -0
  89. data/test/test_behavior.rb +21 -0
  90. data/test/test_campfire.rb +41 -0
  91. data/test/test_condition.rb +50 -0
  92. data/test/test_conditions_disk_usage.rb +56 -0
  93. data/test/test_conditions_http_response_code.rb +109 -0
  94. data/test/test_conditions_process_running.rb +44 -0
  95. data/test/test_conditions_tries.rb +67 -0
  96. data/test/test_contact.rb +109 -0
  97. data/test/test_dependency_graph.rb +62 -0
  98. data/test/test_driver.rb +11 -0
  99. data/test/test_email.rb +45 -0
  100. data/test/test_event_handler.rb +80 -0
  101. data/test/test_god.rb +598 -0
  102. data/test/test_handlers_kqueue_handler.rb +16 -0
  103. data/test/test_logger.rb +63 -0
  104. data/test/test_metric.rb +72 -0
  105. data/test/test_process.rb +246 -0
  106. data/test/test_registry.rb +15 -0
  107. data/test/test_socket.rb +42 -0
  108. data/test/test_sugar.rb +42 -0
  109. data/test/test_system_portable_poller.rb +17 -0
  110. data/test/test_system_process.rb +30 -0
  111. data/test/test_task.rb +262 -0
  112. data/test/test_timeline.rb +37 -0
  113. data/test/test_trigger.rb +59 -0
  114. data/test/test_watch.rb +279 -0
  115. metadata +193 -0
@@ -0,0 +1,13 @@
1
+ module God
2
+ class DummyHandler
3
+ EVENT_SYSTEM = "none"
4
+
5
+ def self.register_process
6
+ raise NotImplementedError
7
+ end
8
+
9
+ def self.handle_events
10
+ raise NotImplementedError
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,17 @@
1
+ require 'kqueue_handler_ext'
2
+
3
+ module God
4
+ class KQueueHandler
5
+ EVENT_SYSTEM = "kqueue"
6
+
7
+ def self.register_process(pid, events)
8
+ monitor_process(pid, events_mask(events))
9
+ end
10
+
11
+ def self.events_mask(events)
12
+ events.inject(0) do |mask, event|
13
+ mask |= event_mask(event)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require 'netlink_handler_ext'
2
+
3
+ module God
4
+ class NetlinkHandler
5
+ EVENT_SYSTEM = "netlink"
6
+
7
+ def self.register_process(pid, events)
8
+ # netlink doesn't need to do this
9
+ # it just reads from the eventhandler actions to see if the pid
10
+ # matches the list we're looking for -- Kev
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,120 @@
1
+ module God
2
+
3
+ class Logger < SimpleLogger
4
+ SYSLOG_EQUIVALENTS = {:fatal => :crit,
5
+ :error => :err,
6
+ :warn => :debug,
7
+ :info => :debug,
8
+ :debug => :debug}
9
+
10
+ attr_accessor :logs
11
+
12
+ class << self
13
+ attr_accessor :syslog
14
+ end
15
+
16
+ self.syslog ||= true
17
+
18
+ # Instantiate a new Logger object
19
+ def initialize
20
+ super($stdout)
21
+ self.logs = {}
22
+ @mutex = Mutex.new
23
+ @capture = nil
24
+ @templogio = StringIO.new
25
+ @templog = SimpleLogger.new(@templogio)
26
+ @templog.level = Logger::INFO
27
+ load_syslog
28
+ end
29
+
30
+ # If Logger.syslog is true then attempt to load the syslog bindings. If syslog
31
+ # cannot be loaded, then set Logger.syslog to false and continue.
32
+ #
33
+ # Returns nothing
34
+ def load_syslog
35
+ return unless Logger.syslog
36
+
37
+ begin
38
+ require 'syslog'
39
+
40
+ # Ensure that Syslog is open
41
+ begin
42
+ Syslog.open('god')
43
+ rescue RuntimeError
44
+ Syslog.reopen('god')
45
+ end
46
+ rescue Exception
47
+ Logger.syslog = false
48
+ end
49
+ end
50
+
51
+ # Log a message
52
+ # +watch+ is the String name of the Watch (may be nil if not Watch is applicable)
53
+ # +level+ is the log level [:debug|:info|:warn|:error|:fatal]
54
+ # +text+ is the String message
55
+ #
56
+ # Returns nothing
57
+ def log(watch, level, text)
58
+ # initialize watch log if necessary
59
+ self.logs[watch.name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT) if watch
60
+
61
+ # push onto capture and timeline for the given watch
62
+ @templogio.truncate(0)
63
+ @templogio.rewind
64
+ @templog.send(level, text % [])
65
+ @mutex.synchronize do
66
+ @capture.puts(@templogio.string.dup) if @capture
67
+ self.logs[watch.name] << [Time.now, @templogio.string.dup] if watch
68
+ end
69
+
70
+ # send to regular logger
71
+ self.send(level, text % [])
72
+
73
+ # send to syslog
74
+ Syslog.send(SYSLOG_EQUIVALENTS[level], text) if Logger.syslog
75
+ end
76
+
77
+ # Get all log output for a given Watch since a certain Time.
78
+ # +watch_name+ is the String name of the Watch
79
+ # +since+ is the Time since which to fetch log lines
80
+ #
81
+ # Returns String
82
+ def watch_log_since(watch_name, since)
83
+ # initialize watch log if necessary
84
+ self.logs[watch_name] ||= Timeline.new(God::LOG_BUFFER_SIZE_DEFAULT)
85
+
86
+ # get and join lines since given time
87
+ @mutex.synchronize do
88
+ self.logs[watch_name].select do |x|
89
+ x.first > since
90
+ end.map do |x|
91
+ x[1]
92
+ end.join
93
+ end
94
+ end
95
+
96
+ # private
97
+
98
+ # Enable capturing of log
99
+ #
100
+ # Returns nothing
101
+ def start_capture
102
+ @mutex.synchronize do
103
+ @capture = StringIO.new
104
+ end
105
+ end
106
+
107
+ # Disable capturing of log and return what was captured since
108
+ # capturing was enabled with Logger#start_capture
109
+ #
110
+ # Returns String
111
+ def finish_capture
112
+ @mutex.synchronize do
113
+ cap = @capture.string
114
+ @capture = nil
115
+ cap
116
+ end
117
+ end
118
+ end
119
+
120
+ end
@@ -0,0 +1,59 @@
1
+ module God
2
+
3
+ class Metric
4
+ attr_accessor :watch, :destination, :conditions
5
+
6
+ def initialize(watch, destination = nil)
7
+ self.watch = watch
8
+ self.destination = destination
9
+ self.conditions = []
10
+ end
11
+
12
+ # Instantiate a Condition of type +kind+ and pass it into the optional
13
+ # block. Attributes of the condition must be set in the config file
14
+ def condition(kind)
15
+ # create the condition
16
+ begin
17
+ c = Condition.generate(kind, self.watch)
18
+ rescue NoSuchConditionError => e
19
+ abort e.message
20
+ end
21
+
22
+ # send to block so config can set attributes
23
+ yield(c) if block_given?
24
+
25
+ # call prepare on the condition
26
+ c.prepare
27
+
28
+ # test generic and specific validity
29
+ unless Condition.valid?(c) && c.valid?
30
+ abort "Exiting on invalid condition"
31
+ end
32
+
33
+ # inherit interval from watch if no poll condition specific interval was set
34
+ if c.kind_of?(PollCondition) && !c.interval
35
+ if self.watch.interval
36
+ c.interval = self.watch.interval
37
+ else
38
+ abort "No interval set for Condition '#{c.class.name}' in Watch '#{self.watch.name}', and no default Watch interval from which to inherit"
39
+ end
40
+ end
41
+
42
+ # remember
43
+ self.conditions << c
44
+ end
45
+
46
+ def enable
47
+ self.conditions.each do |c|
48
+ self.watch.attach(c)
49
+ end
50
+ end
51
+
52
+ def disable
53
+ self.conditions.each do |c|
54
+ self.watch.detach(c)
55
+ end
56
+ end
57
+ end
58
+
59
+ end
@@ -0,0 +1,342 @@
1
+ module God
2
+ class Process
3
+ WRITES_PID = [:start, :restart]
4
+
5
+ attr_accessor :name, :uid, :gid, :log, :log_cmd, :start, :stop, :restart, :unix_socket, :chroot, :env, :dir
6
+
7
+ def initialize
8
+ self.log = '/dev/null'
9
+
10
+ @pid_file = nil
11
+ @tracking_pid = true
12
+ @user_log = false
13
+ @pid = nil
14
+ @unix_socket = nil
15
+ @log_cmd = nil
16
+ end
17
+
18
+ def alive?
19
+ if self.pid
20
+ System::Process.new(self.pid).exists?
21
+ else
22
+ false
23
+ end
24
+ end
25
+
26
+ def file_writable?(file)
27
+ pid = fork do
28
+ uid_num = Etc.getpwnam(self.uid).uid if self.uid
29
+ gid_num = Etc.getgrnam(self.gid).gid if self.gid
30
+
31
+ ::Dir.chroot(self.chroot) if self.chroot
32
+ ::Process.groups = [gid_num] if self.gid
33
+ ::Process::Sys.setgid(gid_num) if self.gid
34
+ ::Process::Sys.setuid(uid_num) if self.uid
35
+
36
+ File.writable?(file_in_chroot(file)) ? exit(0) : exit(1)
37
+ end
38
+
39
+ wpid, status = ::Process.waitpid2(pid)
40
+ status.exitstatus == 0 ? true : false
41
+ end
42
+
43
+ def valid?
44
+ # determine if we're tracking pid or not
45
+ self.pid_file
46
+
47
+ valid = true
48
+
49
+ # a start command must be specified
50
+ if self.start.nil?
51
+ valid = false
52
+ applog(self, :error, "No start command was specified")
53
+ end
54
+
55
+ # self-daemonizing processes must specify a stop command
56
+ if !@tracking_pid && self.stop.nil?
57
+ valid = false
58
+ applog(self, :error, "No stop command was specified")
59
+ end
60
+
61
+ # uid must exist if specified
62
+ if self.uid
63
+ begin
64
+ Etc.getpwnam(self.uid)
65
+ rescue ArgumentError
66
+ valid = false
67
+ applog(self, :error, "UID for '#{self.uid}' does not exist")
68
+ end
69
+ end
70
+
71
+ # gid must exist if specified
72
+ if self.gid
73
+ begin
74
+ Etc.getgrnam(self.gid)
75
+ rescue ArgumentError
76
+ valid = false
77
+ applog(self, :error, "GID for '#{self.gid}' does not exist")
78
+ end
79
+ end
80
+
81
+ # pid dir must exist if specified
82
+ if !@tracking_pid && !File.exist?(File.dirname(self.pid_file))
83
+ valid = false
84
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' does not exist")
85
+ end
86
+
87
+ # pid dir must be writable if specified
88
+ if !@tracking_pid && File.exist?(File.dirname(self.pid_file)) && !file_writable?(File.dirname(self.pid_file))
89
+ valid = false
90
+ applog(self, :error, "PID file directory '#{File.dirname(self.pid_file)}' is not writable by #{self.uid || Etc.getlogin}")
91
+ end
92
+
93
+ # log dir must exist
94
+ if !File.exist?(File.dirname(self.log))
95
+ valid = false
96
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' does not exist")
97
+ end
98
+
99
+ # log file or dir must be writable
100
+ if File.exist?(self.log)
101
+ unless file_writable?(self.log)
102
+ valid = false
103
+ applog(self, :error, "Log file '#{self.log}' exists but is not writable by #{self.uid || Etc.getlogin}")
104
+ end
105
+ else
106
+ unless file_writable?(File.dirname(self.log))
107
+ valid = false
108
+ applog(self, :error, "Log directory '#{File.dirname(self.log)}' is not writable by #{self.uid || Etc.getlogin}")
109
+ end
110
+ end
111
+
112
+ # chroot directory must exist and have /dev/null in it
113
+ if self.chroot
114
+ if !File.directory?(self.chroot)
115
+ valid = false
116
+ LOG.log(self, :error, "CHROOT directory '#{self.chroot}' does not exist")
117
+ end
118
+
119
+ if !File.exist?(File.join(self.chroot, '/dev/null'))
120
+ valid = false
121
+ LOG.log(self, :error, "CHROOT directory '#{self.chroot}' does not contain '/dev/null'")
122
+ end
123
+ end
124
+
125
+ valid
126
+ end
127
+
128
+ # DON'T USE THIS INTERNALLY. Use the instance variable. -- Kev
129
+ # No really, trust me. Use the instance variable.
130
+ def pid_file=(value)
131
+ # if value is nil, do the right thing
132
+ if value
133
+ @tracking_pid = false
134
+ else
135
+ @tracking_pid = true
136
+ end
137
+
138
+ @pid_file = value
139
+ end
140
+
141
+ def pid_file
142
+ @pid_file ||= default_pid_file
143
+ end
144
+
145
+ # Fetch the PID from pid_file. If the pid_file does not
146
+ # exist, then use the PID from the last time it was read.
147
+ # If it has never been read, then return nil.
148
+ #
149
+ # Returns Integer(pid) or nil
150
+ def pid
151
+ contents = File.read(self.pid_file).strip rescue ''
152
+ real_pid = contents =~ /^\d+$/ ? contents.to_i : nil
153
+
154
+ if real_pid
155
+ @pid = real_pid
156
+ real_pid
157
+ else
158
+ @pid
159
+ end
160
+ end
161
+
162
+ # Send the given signal to this process.
163
+ #
164
+ # Returns nothing
165
+ def signal(sig)
166
+ sig = sig.to_i if sig.to_i != 0
167
+ applog(self, :info, "#{self.name} sending signal '#{sig}' to pid #{self.pid}")
168
+ ::Process.kill(sig, self.pid) rescue nil
169
+ end
170
+
171
+ def start!
172
+ call_action(:start)
173
+ end
174
+
175
+ def stop!
176
+ call_action(:stop)
177
+ end
178
+
179
+ def restart!
180
+ call_action(:restart)
181
+ end
182
+
183
+ def default_pid_file
184
+ File.join(God.pid_file_directory, "#{self.name}.pid")
185
+ end
186
+
187
+ def call_action(action)
188
+ command = send(action)
189
+
190
+ if action == :stop && command.nil?
191
+ pid = self.pid
192
+ name = self.name
193
+ command = lambda do
194
+ applog(self, :info, "#{self.name} stop: default lambda killer")
195
+
196
+ ::Process.kill('TERM', pid) rescue nil
197
+ applog(self, :info, "#{self.name} sent SIGTERM")
198
+
199
+ # Poll to see if it's dead
200
+ 5.times do
201
+ begin
202
+ ::Process.kill(0, pid)
203
+ rescue Errno::ESRCH
204
+ # It died. Good.
205
+ applog(self, :info, "#{self.name} process stopped")
206
+ return
207
+ end
208
+
209
+ sleep 1
210
+ end
211
+
212
+ ::Process.kill('KILL', pid) rescue nil
213
+ applog(self, :info, "#{self.name} still alive; sent SIGKILL")
214
+ end
215
+ end
216
+
217
+ if command.kind_of?(String)
218
+ pid = nil
219
+
220
+ if @tracking_pid
221
+ # double fork god-daemonized processes
222
+ # we don't want to wait for them to finish
223
+ r, w = IO.pipe
224
+ begin
225
+ opid = fork do
226
+ STDOUT.reopen(w)
227
+ r.close
228
+ pid = self.spawn(command)
229
+ puts pid.to_s # send pid back to forker
230
+ end
231
+
232
+ ::Process.waitpid(opid, 0)
233
+ w.close
234
+ pid = r.gets.chomp
235
+ ensure
236
+ # make sure the file descriptors get closed no matter what
237
+ r.close rescue nil
238
+ w.close rescue nil
239
+ end
240
+ else
241
+ # single fork self-daemonizing processes
242
+ # we want to wait for them to finish
243
+ pid = self.spawn(command)
244
+ status = ::Process.waitpid2(pid, 0)
245
+ exit_code = status[1] >> 8
246
+
247
+ if exit_code != 0
248
+ applog(self, :warn, "#{self.name} #{action} command exited with non-zero code = #{exit_code}")
249
+ end
250
+
251
+ ensure_stop if action == :stop
252
+ end
253
+
254
+ if @tracking_pid or (@pid_file.nil? and WRITES_PID.include?(action))
255
+ File.open(default_pid_file, 'w') do |f|
256
+ f.write pid
257
+ end
258
+
259
+ @tracking_pid = true
260
+ @pid_file = default_pid_file
261
+ end
262
+ elsif command.kind_of?(Proc)
263
+ # lambda command
264
+ command.call
265
+ else
266
+ raise NotImplementedError
267
+ end
268
+ end
269
+
270
+ # Fork/exec the given command, returns immediately
271
+ # +command+ is the String containing the shell command
272
+ #
273
+ # Returns nothing
274
+ def spawn(command)
275
+ fork do
276
+ uid_num = Etc.getpwnam(self.uid).uid if self.uid
277
+ gid_num = Etc.getgrnam(self.gid).gid if self.gid
278
+
279
+ ::Dir.chroot(self.chroot) if self.chroot
280
+ ::Process.setsid
281
+ ::Process.groups = [gid_num] if self.gid
282
+ ::Process::Sys.setgid(gid_num) if self.gid
283
+ ::Process::Sys.setuid(uid_num) if self.uid
284
+ self.dir ||= '/'
285
+ Dir.chdir self.dir
286
+ $0 = command
287
+ STDIN.reopen "/dev/null"
288
+ if self.log_cmd
289
+ STDOUT.reopen IO.popen(self.log_cmd, "a")
290
+ else
291
+ STDOUT.reopen file_in_chroot(self.log), "a"
292
+ end
293
+ STDERR.reopen STDOUT
294
+
295
+ # close any other file descriptors
296
+ 3.upto(256){|fd| IO::new(fd).close rescue nil}
297
+
298
+ if self.env && self.env.is_a?(Hash)
299
+ self.env.each do |(key, value)|
300
+ ENV[key] = value
301
+ end
302
+ end
303
+
304
+ exec command unless command.empty?
305
+ end
306
+ end
307
+
308
+ # Ensure that a stop command actually stops the process. Force kill
309
+ # if necessary.
310
+ #
311
+ # Returns nothing
312
+ def ensure_stop
313
+ unless self.pid
314
+ applog(self, :warn, "#{self.name} stop called but pid is uknown")
315
+ return
316
+ end
317
+
318
+ # Poll to see if it's dead
319
+ 10.times do
320
+ begin
321
+ ::Process.kill(0, self.pid)
322
+ rescue Errno::ESRCH
323
+ # It died. Good.
324
+ return
325
+ end
326
+
327
+ sleep 1
328
+ end
329
+
330
+ # last resort
331
+ ::Process.kill('KILL', self.pid) rescue nil
332
+ applog(self, :warn, "#{self.name} process still running 10 seconds after stop command returned. Force killing.")
333
+ end
334
+
335
+ private
336
+ def file_in_chroot(file)
337
+ return file unless self.chroot
338
+
339
+ file.gsub(/^#{Regexp.escape(File.expand_path(self.chroot))}/, '')
340
+ end
341
+ end
342
+ end