firenxis-god 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. data/Announce.txt +135 -0
  2. data/History.txt +393 -0
  3. data/README.txt +59 -0
  4. data/Rakefile +142 -0
  5. data/bin/god +132 -0
  6. data/ext/god/.gitignore +5 -0
  7. data/ext/god/extconf.rb +55 -0
  8. data/ext/god/kqueue_handler.c +125 -0
  9. data/ext/god/netlink_handler.c +168 -0
  10. data/god.gemspec +164 -0
  11. data/lib/god.rb +701 -0
  12. data/lib/god/behavior.rb +52 -0
  13. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  14. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  15. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  16. data/lib/god/cli/command.rb +256 -0
  17. data/lib/god/cli/run.rb +172 -0
  18. data/lib/god/cli/version.rb +23 -0
  19. data/lib/god/compat19.rb +36 -0
  20. data/lib/god/condition.rb +96 -0
  21. data/lib/god/conditions/always.rb +23 -0
  22. data/lib/god/conditions/complex.rb +86 -0
  23. data/lib/god/conditions/cpu_usage.rb +80 -0
  24. data/lib/god/conditions/degrading_lambda.rb +52 -0
  25. data/lib/god/conditions/disk_usage.rb +32 -0
  26. data/lib/god/conditions/file_mtime.rb +28 -0
  27. data/lib/god/conditions/flapping.rb +128 -0
  28. data/lib/god/conditions/http_response_code.rb +168 -0
  29. data/lib/god/conditions/lambda.rb +25 -0
  30. data/lib/god/conditions/memory_usage.rb +82 -0
  31. data/lib/god/conditions/process_exits.rb +72 -0
  32. data/lib/god/conditions/process_running.rb +74 -0
  33. data/lib/god/conditions/tries.rb +44 -0
  34. data/lib/god/configurable.rb +57 -0
  35. data/lib/god/contact.rb +114 -0
  36. data/lib/god/contacts/campfire.rb +121 -0
  37. data/lib/god/contacts/email.rb +136 -0
  38. data/lib/god/contacts/jabber.rb +75 -0
  39. data/lib/god/contacts/prowl.rb +57 -0
  40. data/lib/god/contacts/scout.rb +55 -0
  41. data/lib/god/contacts/twitter.rb +51 -0
  42. data/lib/god/contacts/webhook.rb +73 -0
  43. data/lib/god/dependency_graph.rb +41 -0
  44. data/lib/god/diagnostics.rb +37 -0
  45. data/lib/god/driver.rb +206 -0
  46. data/lib/god/errors.rb +24 -0
  47. data/lib/god/event_handler.rb +108 -0
  48. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  49. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  50. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  51. data/lib/god/logger.rb +109 -0
  52. data/lib/god/metric.rb +59 -0
  53. data/lib/god/process.rb +363 -0
  54. data/lib/god/registry.rb +32 -0
  55. data/lib/god/simple_logger.rb +59 -0
  56. data/lib/god/socket.rb +107 -0
  57. data/lib/god/sugar.rb +47 -0
  58. data/lib/god/sys_logger.rb +45 -0
  59. data/lib/god/system/portable_poller.rb +42 -0
  60. data/lib/god/system/process.rb +50 -0
  61. data/lib/god/system/slash_proc_poller.rb +92 -0
  62. data/lib/god/task.rb +503 -0
  63. data/lib/god/timeline.rb +25 -0
  64. data/lib/god/trigger.rb +43 -0
  65. data/lib/god/watch.rb +188 -0
  66. data/test/configs/child_events/child_events.god +44 -0
  67. data/test/configs/child_events/simple_server.rb +3 -0
  68. data/test/configs/child_polls/child_polls.god +37 -0
  69. data/test/configs/child_polls/simple_server.rb +12 -0
  70. data/test/configs/complex/complex.god +59 -0
  71. data/test/configs/complex/simple_server.rb +3 -0
  72. data/test/configs/contact/contact.god +108 -0
  73. data/test/configs/contact/simple_server.rb +3 -0
  74. data/test/configs/daemon_events/daemon_events.god +37 -0
  75. data/test/configs/daemon_events/simple_server.rb +8 -0
  76. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  77. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  78. data/test/configs/daemon_polls/simple_server.rb +6 -0
  79. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  80. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  81. data/test/configs/lifecycle/lifecycle.god +25 -0
  82. data/test/configs/matias/matias.god +50 -0
  83. data/test/configs/real.rb +59 -0
  84. data/test/configs/running_load/running_load.god +16 -0
  85. data/test/configs/stop_options/simple_server.rb +12 -0
  86. data/test/configs/stop_options/stop_options.god +39 -0
  87. data/test/configs/stress/simple_server.rb +3 -0
  88. data/test/configs/stress/stress.god +15 -0
  89. data/test/configs/task/logs/.placeholder +0 -0
  90. data/test/configs/task/task.god +26 -0
  91. data/test/configs/test.rb +61 -0
  92. data/test/helper.rb +141 -0
  93. data/test/suite.rb +6 -0
  94. data/test/test_behavior.rb +18 -0
  95. data/test/test_campfire.rb +23 -0
  96. data/test/test_condition.rb +50 -0
  97. data/test/test_conditions_disk_usage.rb +50 -0
  98. data/test/test_conditions_http_response_code.rb +109 -0
  99. data/test/test_conditions_process_running.rb +40 -0
  100. data/test/test_conditions_tries.rb +67 -0
  101. data/test/test_contact.rb +109 -0
  102. data/test/test_dependency_graph.rb +62 -0
  103. data/test/test_driver.rb +11 -0
  104. data/test/test_email.rb +34 -0
  105. data/test/test_event_handler.rb +80 -0
  106. data/test/test_god.rb +570 -0
  107. data/test/test_handlers_kqueue_handler.rb +16 -0
  108. data/test/test_jabber.rb +29 -0
  109. data/test/test_logger.rb +55 -0
  110. data/test/test_metric.rb +72 -0
  111. data/test/test_process.rb +247 -0
  112. data/test/test_prowl.rb +15 -0
  113. data/test/test_registry.rb +15 -0
  114. data/test/test_socket.rb +34 -0
  115. data/test/test_sugar.rb +42 -0
  116. data/test/test_system_portable_poller.rb +17 -0
  117. data/test/test_system_process.rb +30 -0
  118. data/test/test_task.rb +246 -0
  119. data/test/test_timeline.rb +37 -0
  120. data/test/test_trigger.rb +59 -0
  121. data/test/test_watch.rb +279 -0
  122. data/test/test_webhook.rb +15 -0
  123. metadata +362 -0
@@ -0,0 +1,52 @@
1
+ module God
2
+
3
+ class Behavior
4
+ include Configurable
5
+
6
+ attr_accessor :watch
7
+
8
+ # Generate a Behavior of the given kind. The proper class is found by camel casing the
9
+ # kind (which is given as an underscored symbol).
10
+ # +kind+ is the underscored symbol representing the class (e.g. foo_bar for God::Behaviors::FooBar)
11
+ def self.generate(kind, watch)
12
+ sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern
13
+ b = God::Behaviors.const_get(sym).new
14
+ b.watch = watch
15
+ b
16
+ rescue NameError
17
+ raise NoSuchBehaviorError.new("No Behavior found with the class name God::Behaviors::#{sym}")
18
+ end
19
+
20
+ def valid?
21
+ true
22
+ end
23
+
24
+ #######
25
+
26
+ def before_start
27
+ end
28
+
29
+ def after_start
30
+ end
31
+
32
+ def before_restart
33
+ end
34
+
35
+ def after_restart
36
+ end
37
+
38
+ def before_stop
39
+ end
40
+
41
+ def after_stop
42
+ end
43
+
44
+ # Construct the friendly name of this Behavior, looks like:
45
+ #
46
+ # Behavior FooBar on Watch 'baz'
47
+ def friendly_name
48
+ "Behavior " + super + " on Watch '#{self.watch.name}'"
49
+ end
50
+ end
51
+
52
+ end
@@ -0,0 +1,21 @@
1
+ module God
2
+ module Behaviors
3
+
4
+ class CleanPidFile < Behavior
5
+ def valid?
6
+ valid = true
7
+ valid &= complain("Attribute 'pid_file' must be specified", self) if self.watch.pid_file.nil?
8
+ valid
9
+ end
10
+
11
+ def before_start
12
+ File.delete(self.watch.pid_file)
13
+
14
+ "deleted pid file"
15
+ rescue
16
+ "no pid file to delete"
17
+ end
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ module God
2
+ module Behaviors
3
+
4
+ class CleanUnixSocket < Behavior
5
+ def valid?
6
+ valid = true
7
+ valid &= complain("Attribute 'unix_socket' must be specified", self) if self.watch.unix_socket.nil?
8
+ valid
9
+ end
10
+
11
+ def before_start
12
+ File.delete(self.watch.unix_socket)
13
+
14
+ "deleted unix socket"
15
+ rescue
16
+ "no unix socket to delete"
17
+ end
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,51 @@
1
+ module God
2
+ module Behaviors
3
+
4
+ class NotifyWhenFlapping < Behavior
5
+ attr_accessor :failures # number of failures
6
+ attr_accessor :seconds # number of seconds
7
+ attr_accessor :notifier # class to notify with
8
+
9
+ def initialize
10
+ super
11
+ @startup_times = []
12
+ end
13
+
14
+ def valid?
15
+ valid = true
16
+ valid &= complain("Attribute 'failures' must be specified", self) unless self.failures
17
+ valid &= complain("Attribute 'seconds' must be specified", self) unless self.seconds
18
+ valid &= complain("Attribute 'notifier' must be specified", self) unless self.notifier
19
+
20
+ # Must take one arg or variable args
21
+ unless self.notifier.respond_to?(:notify) and [1,-1].include?(self.notifier.method(:notify).arity)
22
+ valid &= complain("The 'notifier' must have a method 'notify' which takes 1 or variable args", self)
23
+ end
24
+
25
+ valid
26
+ end
27
+
28
+ def before_start
29
+ now = Time.now.to_i
30
+ @startup_times << now
31
+ check_for_flapping(now)
32
+ end
33
+
34
+ def before_restart
35
+ now = Time.now.to_i
36
+ @startup_times << now
37
+ check_for_flapping(now)
38
+ end
39
+
40
+ private
41
+
42
+ def check_for_flapping(now)
43
+ @startup_times.select! {|time| time >= now - self.seconds }
44
+ if @startup_times.length >= self.failures
45
+ self.notifier.notify("#{self.watch.name} has called start/restart #{@startup_times.length} times in #{self.seconds} seconds")
46
+ end
47
+ end
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,256 @@
1
+ module God
2
+ module CLI
3
+
4
+ class Command
5
+ def initialize(command, options, args)
6
+ @command = command
7
+ @options = options
8
+ @args = args
9
+
10
+ dispatch
11
+ end
12
+
13
+ def setup
14
+ # connect to drb unix socket
15
+ DRb.start_service("druby://127.0.0.1:0")
16
+ @server = DRbObject.new(nil, God::Socket.socket(@options[:port]))
17
+
18
+ # ping server to ensure that it is responsive
19
+ begin
20
+ @server.ping
21
+ rescue DRb::DRbConnError
22
+ puts "The server is not available (or you do not have permissions to access it)"
23
+ abort
24
+ end
25
+ end
26
+
27
+ def dispatch
28
+ if %w{load status signal log quit terminate}.include?(@command)
29
+ setup
30
+ send("#{@command}_command")
31
+ elsif %w{start stop restart monitor unmonitor remove}.include?(@command)
32
+ setup
33
+ lifecycle_command
34
+ elsif @command == 'check'
35
+ check_command
36
+ else
37
+ puts "Command '#{@command}' is not valid. Run 'god --help' for usage"
38
+ abort
39
+ end
40
+ end
41
+
42
+ def load_command
43
+ file = @args[1]
44
+
45
+ puts "Sending '#{@command}' command"
46
+ puts
47
+
48
+ unless File.exist?(file)
49
+ abort "File not found: #{file}"
50
+ end
51
+
52
+ names, errors = *@server.running_load(File.read(file), File.expand_path(file))
53
+
54
+ # output response
55
+ unless names.empty?
56
+ puts 'The following tasks were affected:'
57
+ names.each do |w|
58
+ puts ' ' + w
59
+ end
60
+ end
61
+
62
+ unless errors.empty?
63
+ puts errors
64
+ exit(1)
65
+ end
66
+ end
67
+
68
+ def status_command
69
+ exitcode = 0
70
+ statuses = @server.status
71
+ groups = {}
72
+ statuses.each do |name, status|
73
+ g = status[:group] || ''
74
+ groups[g] ||= {}
75
+ groups[g][name] = status
76
+ end
77
+
78
+ if item = @args[1]
79
+ if single = statuses[item]
80
+ # specified task (0 -> up, 1 -> unmonitored, 2 -> other)
81
+ state = single[:state]
82
+ puts "#{item}: #{state}"
83
+ exitcode = state == :up ? 0 : (state == :unmonitored ? 1 : 2)
84
+ elsif groups[item]
85
+ # specified group (0 -> up, N -> other)
86
+ puts "#{item}:"
87
+ groups[item].keys.sort.each do |name|
88
+ state = groups[item][name][:state]
89
+ print " "
90
+ puts "#{name}: #{state}"
91
+ exitcode += 1 unless state == :up
92
+ end
93
+ else
94
+ puts "Task or Group '#{item}' not found."
95
+ exit(1)
96
+ end
97
+ else
98
+ # show all groups and watches
99
+ groups.keys.sort.each do |group|
100
+ puts "#{group}:" unless group.empty?
101
+ groups[group].keys.sort.each do |name|
102
+ state = groups[group][name][:state]
103
+ print " " unless group.empty?
104
+ puts "#{name}: #{state}"
105
+ end
106
+ end
107
+ end
108
+
109
+ exit(exitcode)
110
+ end
111
+
112
+ def signal_command
113
+ # get the name of the watch/group
114
+ name = @args[1]
115
+ signal = @args[2]
116
+
117
+ puts "Sending signal '#{signal}' to '#{name}'"
118
+
119
+ t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
120
+
121
+ watches = @server.signal(name, signal)
122
+
123
+ # output response
124
+ t.kill; STDOUT.puts
125
+ unless watches.empty?
126
+ puts 'The following watches were affected:'
127
+ watches.each do |w|
128
+ puts ' ' + w
129
+ end
130
+ else
131
+ puts 'No matching task or group'
132
+ end
133
+ end
134
+
135
+ def log_command
136
+ begin
137
+ Signal.trap('INT') { exit }
138
+ name = @args[1]
139
+
140
+ unless name
141
+ puts "You must specify a Task or Group name"
142
+ exit!
143
+ end
144
+
145
+ puts "Please wait..."
146
+ t = Time.at(0)
147
+ loop do
148
+ print @server.running_log(name, t)
149
+ t = Time.now
150
+ sleep 0.25
151
+ end
152
+ rescue God::NoSuchWatchError
153
+ puts "No such watch"
154
+ rescue DRb::DRbConnError
155
+ puts "The server went away"
156
+ end
157
+ end
158
+
159
+ def quit_command
160
+ begin
161
+ @server.terminate
162
+ abort 'Could not stop god'
163
+ rescue DRb::DRbConnError
164
+ puts 'Stopped god'
165
+ end
166
+ end
167
+
168
+ def terminate_command
169
+ t = Thread.new { loop { STDOUT.print('.'); STDOUT.flush; sleep(1) } }
170
+ if @server.stop_all
171
+ t.kill; STDOUT.puts
172
+ puts 'Stopped all watches'
173
+ else
174
+ t.kill; STDOUT.puts
175
+ puts "Could not stop all watches within #{@server.terminate_timeout} seconds"
176
+ end
177
+
178
+ begin
179
+ @server.terminate
180
+ abort 'Could not stop god'
181
+ rescue DRb::DRbConnError
182
+ puts 'Stopped god'
183
+ end
184
+ end
185
+
186
+ def check_command
187
+ Thread.new do
188
+ begin
189
+ event_system = God::EventHandler.event_system
190
+ puts "using event system: #{event_system}"
191
+
192
+ if God::EventHandler.loaded?
193
+ puts "starting event handler"
194
+ God::EventHandler.start
195
+ else
196
+ puts "[fail] event system did not load"
197
+ exit(1)
198
+ end
199
+
200
+ puts 'forking off new process'
201
+
202
+ pid = fork do
203
+ loop { sleep(1) }
204
+ end
205
+
206
+ puts "forked process with pid = #{pid}"
207
+
208
+ God::EventHandler.register(pid, :proc_exit) do
209
+ puts "[ok] process exit event received"
210
+ exit!(0)
211
+ end
212
+
213
+ sleep(1)
214
+
215
+ puts "killing process"
216
+
217
+ ::Process.kill('KILL', pid)
218
+ ::Process.waitpid(pid)
219
+ rescue => e
220
+ puts e.message
221
+ puts e.backtrace.join("\n")
222
+ end
223
+ end
224
+
225
+ sleep(2)
226
+
227
+ puts "[fail] never received process exit event"
228
+ exit(1)
229
+ end
230
+
231
+ def lifecycle_command
232
+ # get the name of the watch/group
233
+ name = @args[1]
234
+
235
+ puts "Sending '#{@command}' command"
236
+
237
+ t = Thread.new { loop { sleep(1); STDOUT.print('.'); STDOUT.flush; sleep(1) } }
238
+
239
+ # send @command
240
+ watches = @server.control(name, @command)
241
+
242
+ # output response
243
+ t.kill; STDOUT.puts
244
+ unless watches.empty?
245
+ puts 'The following watches were affected:'
246
+ watches.each do |w|
247
+ puts ' ' + w
248
+ end
249
+ else
250
+ puts 'No matching task or group'
251
+ end
252
+ end
253
+ end # Command
254
+
255
+ end
256
+ end
@@ -0,0 +1,172 @@
1
+ module God
2
+ module CLI
3
+
4
+ class Run
5
+ def initialize(options)
6
+ @options = options
7
+
8
+ dispatch
9
+ end
10
+
11
+ def dispatch
12
+ # have at_exit start god
13
+ $run = true
14
+
15
+ if @options[:syslog]
16
+ require 'god/sys_logger'
17
+ end
18
+
19
+ # run
20
+ if @options[:daemonize]
21
+ run_daemonized
22
+ else
23
+ run_in_front
24
+ end
25
+ end
26
+
27
+ def attach
28
+ process = System::Process.new(@options[:attach])
29
+ Thread.new do
30
+ loop do
31
+ unless process.exists?
32
+ applog(nil, :info, "Going down because attached process #{@options[:attach]} exited")
33
+ exit!
34
+ end
35
+ sleep 5
36
+ end
37
+ end
38
+ end
39
+
40
+ def default_run
41
+ # make sure we have STDIN/STDOUT redirected immediately
42
+ setup_logging
43
+
44
+ # start attached pid watcher if necessary
45
+ if @options[:attach]
46
+ self.attach
47
+ end
48
+
49
+ if @options[:port]
50
+ God.port = @options[:port]
51
+ end
52
+
53
+ if @options[:events]
54
+ God::EventHandler.load
55
+ end
56
+
57
+ # set log level, defaults to WARN
58
+ if @options[:log_level]
59
+ God.log_level = @options[:log_level]
60
+ else
61
+ God.log_level = @options[:daemonize] ? :warn : :info
62
+ end
63
+
64
+ if @options[:config]
65
+ if !@options[:config].include?('*') && !File.exist?(@options[:config])
66
+ abort "File not found: #{@options[:config]}"
67
+ end
68
+
69
+ # start the event handler
70
+ God::EventHandler.start if God::EventHandler.loaded?
71
+
72
+ load_config @options[:config]
73
+ end
74
+ setup_logging
75
+ end
76
+
77
+ def run_in_front
78
+ require 'god'
79
+
80
+ if @options[:bleakhouse]
81
+ BleakHouseDiagnostic.install
82
+ end
83
+
84
+ default_run
85
+ end
86
+
87
+ def run_daemonized
88
+ # trap and ignore SIGHUP
89
+ Signal.trap('HUP') {}
90
+
91
+ pid = fork do
92
+ begin
93
+ require 'god'
94
+
95
+ # set pid if requested
96
+ if @options[:pid] # and as deamon
97
+ God.pid = @options[:pid]
98
+ end
99
+
100
+ default_run
101
+
102
+ unless God::EventHandler.loaded?
103
+ puts
104
+ puts "***********************************************************************"
105
+ puts "*"
106
+ puts "* Event conditions are not available for your installation of god."
107
+ puts "* You may still use and write custom conditions using the poll system"
108
+ puts "*"
109
+ puts "***********************************************************************"
110
+ puts
111
+ end
112
+
113
+ rescue => e
114
+ puts e.message
115
+ puts e.backtrace.join("\n")
116
+ abort "There was a fatal system error while starting god (see above)"
117
+ end
118
+ end
119
+
120
+ if @options[:pid]
121
+ File.open(@options[:pid], 'w') { |f| f.write pid }
122
+ end
123
+
124
+ ::Process.detach pid
125
+
126
+ exit
127
+ end
128
+
129
+ def setup_logging
130
+ log_file = God.log_file
131
+ log_file = File.expand_path(@options[:log]) if @options[:log]
132
+ log_file = "/dev/null" if !log_file && @options[:daemonize]
133
+ if log_file
134
+ puts "Sending output to log file: #{log_file}" unless @options[:daemonize]
135
+
136
+ # reset file descriptors
137
+ STDIN.reopen "/dev/null"
138
+ STDOUT.reopen(log_file, "a")
139
+ STDERR.reopen STDOUT
140
+ STDOUT.sync = true
141
+ end
142
+ end
143
+
144
+ def load_config(config)
145
+ files = File.directory?(config) ? Dir['**/*.god'] : Dir[config]
146
+ abort "No files could be found" if files.empty?
147
+ files.each do |god_file|
148
+ unless load_god_file(god_file)
149
+ abort "File '#{god_file}' could not be loaded"
150
+ end
151
+ end
152
+ end
153
+
154
+ def load_god_file(god_file)
155
+ applog(nil, :info, "Loading #{god_file}")
156
+ load File.expand_path(god_file)
157
+ true
158
+ rescue Exception => e
159
+ if e.instance_of?(SystemExit)
160
+ raise
161
+ else
162
+ puts "There was an error in #{god_file}"
163
+ puts "\t" + e.message
164
+ puts "\t" + e.backtrace.join("\n\t")
165
+ false
166
+ end
167
+ end
168
+
169
+ end # Run
170
+
171
+ end
172
+ end