alert_machine 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +65 -0
- data/alert_machine.gemspec +23 -0
- data/lib/alert_machine.rb +173 -0
- data/lib/process.rb +71 -0
- data/lib/rails_environment.rb +27 -0
- data/lib/run_task.rb +122 -0
- data/test/all.rb +2 -0
- data/test/helper.rb +46 -0
- data/test/process.rb +52 -0
- data/test/watcher.rb +31 -0
- metadata +92 -0
data/README
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
= AlertMachine
|
|
2
|
+
|
|
3
|
+
Get notifications if bad things happen to your server. You can easily make sure
|
|
4
|
+
all processes are running, ports are open. You can also add checks for bad events
|
|
5
|
+
that get run once every few minutes and get error reports to your email.
|
|
6
|
+
|
|
7
|
+
== Usage
|
|
8
|
+
|
|
9
|
+
1. Defining a Watcher class:
|
|
10
|
+
|
|
11
|
+
class MyWatcher < AlertMachine::Watcher
|
|
12
|
+
|
|
13
|
+
# Example 1: Make sure port 80 is running on server1 and server2.
|
|
14
|
+
watch_process(["server1.example.com", "server2.com"], :port => 80)
|
|
15
|
+
|
|
16
|
+
# Example 2: Make sure the two thin servers are running in server1.
|
|
17
|
+
# Check if the two ports are open, and check if the two pid files are present
|
|
18
|
+
# and pointing to valid processes.
|
|
19
|
+
watch_process("server1.example.com", :port => [3000, 3001], :pid_file =>
|
|
20
|
+
["/tmp/thin.3000.pid", "/tmp/thin.3001.pid"])
|
|
21
|
+
|
|
22
|
+
# Example 3: We can also make sure there are no new crashes.
|
|
23
|
+
watch(:retries => 0) do
|
|
24
|
+
new_crashes = Crash.where(unread: false).all
|
|
25
|
+
assert new_crashes.empty?, <<MAIL
|
|
26
|
+
#{new_crashes.length} new crashes found.
|
|
27
|
+
#{ new_crashes.collect {|c| c.print }.join("\n") }
|
|
28
|
+
MAIL
|
|
29
|
+
# The above code asserts that new_crashes is empty. If it's not empty
|
|
30
|
+
# an alert is triggerred with the message contents being the string
|
|
31
|
+
# that follows the assert.
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
2. Running the watcher class:
|
|
36
|
+
|
|
37
|
+
File: my_watcher_runner.rb
|
|
38
|
+
require 'alert_machine'
|
|
39
|
+
|
|
40
|
+
# The below line safely loads the rails environment sending out alerts
|
|
41
|
+
# incase things are broken.
|
|
42
|
+
AlertMachine::RailsEnvironment.bootup
|
|
43
|
+
|
|
44
|
+
# Require your alert files.
|
|
45
|
+
require "offline/alerts/my_watcher1.rb"
|
|
46
|
+
require "offline/alerts/my_watcher2.rb"
|
|
47
|
+
|
|
48
|
+
# Run the machine.
|
|
49
|
+
AlertMachine.run
|
|
50
|
+
|
|
51
|
+
== Configuration
|
|
52
|
+
|
|
53
|
+
If you want to change the default settings, you can call:
|
|
54
|
+
|
|
55
|
+
AlertMachine.config("config_file_path")
|
|
56
|
+
|
|
57
|
+
before the `AlertMachine.run`
|
|
58
|
+
|
|
59
|
+
You can also easily pass diff config files for development and production, if
|
|
60
|
+
you are using rails.
|
|
61
|
+
|
|
62
|
+
A list of all config options are available at AlertMachine::Watcher#watch and
|
|
63
|
+
AlertMachine::Watcher#watch_process
|
|
64
|
+
|
|
65
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |s|
|
|
5
|
+
s.name = "alert_machine"
|
|
6
|
+
s.version = "0.0.1"
|
|
7
|
+
s.authors = ["prasanna"]
|
|
8
|
+
s.email = ["myprasanna@gmail.com"]
|
|
9
|
+
s.homepage = "http://github.com/likealittle/alert_machine"
|
|
10
|
+
s.summary = "Ruby way of alerting server events."
|
|
11
|
+
s.description = "Make sure you get mailed when bad things happen to your server."
|
|
12
|
+
|
|
13
|
+
s.rubyforge_project = "alert_machine"
|
|
14
|
+
|
|
15
|
+
s.files = `git ls-files`.split("\n")
|
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
18
|
+
s.require_paths = ["lib"]
|
|
19
|
+
|
|
20
|
+
s.add_runtime_dependency "rye"
|
|
21
|
+
s.add_runtime_dependency "actionmailer"
|
|
22
|
+
s.add_runtime_dependency "eventmachine"
|
|
23
|
+
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
require 'eventmachine'
|
|
2
|
+
require 'action_mailer'
|
|
3
|
+
|
|
4
|
+
class AlertMachine
|
|
5
|
+
|
|
6
|
+
class Watcher
|
|
7
|
+
# == Options:
|
|
8
|
+
# The below options can also be overridden via config/alert_machine.yml
|
|
9
|
+
#
|
|
10
|
+
# * interval:
|
|
11
|
+
# Seconds between each run, during the steady state. 5 min default.
|
|
12
|
+
#
|
|
13
|
+
# * interval_error:
|
|
14
|
+
# How soon to check again, in-case an error occurred. (interval)/5 default.
|
|
15
|
+
#
|
|
16
|
+
# * from, to:
|
|
17
|
+
# Comma seperated list of emails, to bother when there are alerts. defaults
|
|
18
|
+
# to whatever was specified in the config file.
|
|
19
|
+
#
|
|
20
|
+
# * retries:
|
|
21
|
+
# Number of times to try before alerting on error. Defaults to 1.
|
|
22
|
+
#
|
|
23
|
+
# * dont_check_long_processes:
|
|
24
|
+
# Don't assert if my watch took too long to run. [false defaults]
|
|
25
|
+
#
|
|
26
|
+
def self.watch(opts = {}, caller = caller, &block)
|
|
27
|
+
AlertMachine.tasks << RunTask.new(opts, block, caller)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.assert(conditions, msg = nil, caller = caller)
|
|
31
|
+
AlertMachine.current_task.assert(conditions, msg, caller)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Make sure the process keeps running. machines can be one or many.
|
|
35
|
+
#
|
|
36
|
+
# == Options:
|
|
37
|
+
# One or more of the below constraints. Any of the below can either
|
|
38
|
+
# be a single element or an array. (eg. multiple ports)
|
|
39
|
+
#
|
|
40
|
+
# * port:
|
|
41
|
+
# Ensure the port is open.
|
|
42
|
+
#
|
|
43
|
+
# * pid_file:
|
|
44
|
+
# Make sure the pid file exists and the process corresponding to it,
|
|
45
|
+
# is alive.
|
|
46
|
+
#
|
|
47
|
+
# * grep:
|
|
48
|
+
# Executes `ps aux | grep <string>` to ensure process is running.
|
|
49
|
+
#
|
|
50
|
+
# Other usual options of watcher, mentioned above.
|
|
51
|
+
#
|
|
52
|
+
def self.watch_process(machines, opts = {})
|
|
53
|
+
machines = [machines].flatten
|
|
54
|
+
Process.watch(machines, opts, caller)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Run a command on a set of machines.
|
|
58
|
+
def self.run_command(machines, cmd)
|
|
59
|
+
machines = [machines].flatten
|
|
60
|
+
require 'rye'
|
|
61
|
+
set = Rye::Set.new(machines.join(","), :parallel => true)
|
|
62
|
+
machines.each { |m| set.add_box(Rye::Box.new(m, AlertMachine.ssh_config.merge(:safe => false))) }
|
|
63
|
+
puts "executing on #{machines}: #{cmd}"
|
|
64
|
+
res = set.execute(cmd).group_by {|ry| ry.box.hostname }.sort_by {|name, op| machines.index(name) }
|
|
65
|
+
res.each { |machine, op|
|
|
66
|
+
puts "[#{machine}]\n#{op.join("\n")}\n"
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
# To suppress logging in test mode.
|
|
72
|
+
def puts(*args)
|
|
73
|
+
super unless AlertMachine.test_mode?
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Configure your machine before running it.
|
|
78
|
+
CONFIG_FILE = 'config/alert_machine.yml'
|
|
79
|
+
@@config = nil
|
|
80
|
+
def self.config(config_file = CONFIG_FILE)
|
|
81
|
+
@@config ||= YAML::load(File.open(config_file))
|
|
82
|
+
rescue
|
|
83
|
+
{}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Invoke this whenever you are ready to enter the AlertMachine loop.
|
|
87
|
+
def self.run
|
|
88
|
+
unless @@em_invoked
|
|
89
|
+
@@em_invoked = true
|
|
90
|
+
EM::run do
|
|
91
|
+
@@tasks.each do |t|
|
|
92
|
+
t.schedule
|
|
93
|
+
end
|
|
94
|
+
yield if block_given?
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def self.ssh_config
|
|
100
|
+
res = {}
|
|
101
|
+
config['ssh'].each_pair do |k, v|
|
|
102
|
+
res[k.to_sym] = v
|
|
103
|
+
end
|
|
104
|
+
return res
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def self.disable(disabled = true)
|
|
108
|
+
@@em_invoked = disabled
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Figures out how to parse the call stack and pretty print it.
|
|
112
|
+
class Caller
|
|
113
|
+
attr_reader :caller, :file, :line
|
|
114
|
+
|
|
115
|
+
def initialize(caller, &block)
|
|
116
|
+
@block = block if block_given?
|
|
117
|
+
@caller = caller
|
|
118
|
+
/^(?<fname>[^:]+)\:(?<line>\d+)\:/ =~ caller[0] and
|
|
119
|
+
@file = fname and @line = line
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def file_line
|
|
123
|
+
"#{file}:#{line}"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def log
|
|
127
|
+
"#{caller[0]}\n" +
|
|
128
|
+
log_source_file.to_s
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def log_source_file
|
|
132
|
+
File.open(file) {|fh|
|
|
133
|
+
fh.readlines[line.to_i - 1..line.to_i + 3].collect {|l|
|
|
134
|
+
">> #{l}"
|
|
135
|
+
}.join + "\n---\n"
|
|
136
|
+
} if file && File.exists?(file)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
@@tasks = []
|
|
141
|
+
@@em_invoked = false
|
|
142
|
+
@@current_task = nil
|
|
143
|
+
|
|
144
|
+
def self.tasks
|
|
145
|
+
@@tasks
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def self.current_task
|
|
149
|
+
@@current_task
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def self.current_task=(task)
|
|
153
|
+
@@current_task = task
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def self.reset
|
|
157
|
+
@@tasks = []
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
private
|
|
161
|
+
def puts(*args)
|
|
162
|
+
super unless AlertMachine.test_mode?
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def self.test_mode?
|
|
166
|
+
false
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
dname = File.dirname(__FILE__)
|
|
171
|
+
require "#{dname}/process.rb"
|
|
172
|
+
require "#{dname}/run_task.rb"
|
|
173
|
+
require "#{dname}/rails_environment.rb"
|
data/lib/process.rb
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
class AlertMachine
|
|
2
|
+
|
|
3
|
+
# Checks if processes are living, and have their ports open.
|
|
4
|
+
class Process < Watcher
|
|
5
|
+
class << self
|
|
6
|
+
|
|
7
|
+
def watch(machines, opts, caller)
|
|
8
|
+
raise ArgumentError, "Must mention atleast one of (port, pid_file, grep)" unless
|
|
9
|
+
opts[:port] || opts[:pid_file] || opts[:grep]
|
|
10
|
+
raise ArgumentError, "Must not be passed a block" if block_given?
|
|
11
|
+
|
|
12
|
+
super(opts, caller) do
|
|
13
|
+
check(:port, machines, opts, caller)
|
|
14
|
+
check(:pid_file, machines, opts, caller)
|
|
15
|
+
check(:grep, machines, opts, caller)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def check_port(machines, port, caller)
|
|
20
|
+
check_command(machines,
|
|
21
|
+
"netstat -na | grep 'LISTEN' | grep '\\(\\:\\|\\.\\)#{port} ' | grep -v grep",
|
|
22
|
+
"Checking if port #{port} is open on %s",
|
|
23
|
+
"Port #{port} seems down on %s", caller)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def check_pid_file(machines, file, caller)
|
|
27
|
+
check_command(machines, "ps -p `cat #{file}`",
|
|
28
|
+
"Checking if valid pidfile #{file} exists in %s",
|
|
29
|
+
"Pidfile #{file} doesnt seem valid at %s", caller)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def check_grep(machines, grep, caller)
|
|
33
|
+
check_command(machines, "ps aux | grep '#{grep}' | grep -v grep",
|
|
34
|
+
"Grepping the process list for '#{grep}' in %s",
|
|
35
|
+
"Grepping the process list for '#{grep}' failed at %s", caller)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def check_command(machines, cmd, check_msg, error_msg, caller)
|
|
39
|
+
puts check_msg % machines.join(", ")
|
|
40
|
+
bad_machines = []
|
|
41
|
+
run_command(machines,
|
|
42
|
+
"#{cmd} || echo BAD"
|
|
43
|
+
).each { |machine, output|
|
|
44
|
+
bad_machines << machine if output.join(" ").match(/BAD/)
|
|
45
|
+
}
|
|
46
|
+
check_command_failed(bad_machines, error_msg, caller) unless
|
|
47
|
+
bad_machines.empty?
|
|
48
|
+
rescue Exception => e
|
|
49
|
+
puts "Exception: #{e.to_s}"
|
|
50
|
+
puts "#{e.backtrace.join("\n")}"
|
|
51
|
+
check_command_failed(machines, error_msg, caller)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def check_command_failed(machines, error_msg, caller)
|
|
55
|
+
assert false, error_msg % machines.join(", "), caller
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def check(entity, machines, opts, caller)
|
|
59
|
+
[opts[entity]].flatten.each { |val|
|
|
60
|
+
Process.send("check_#{entity}".to_sym, machines, val, caller)
|
|
61
|
+
} if opts[entity]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
def puts(*args)
|
|
66
|
+
super unless AlertMachine.test_mode?
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
class AlertMachine
|
|
2
|
+
class RailsEnvironment
|
|
3
|
+
|
|
4
|
+
ENVIRONMENT_PATH = "./config/environment.rb"
|
|
5
|
+
def self.bootup(path = ENVIRONMENT_PATH)
|
|
6
|
+
require path
|
|
7
|
+
rescue Exception => e
|
|
8
|
+
puts "Exception: #{e.to_s}"
|
|
9
|
+
puts e.backtrace.join("\n")
|
|
10
|
+
config = AlertMachine.config
|
|
11
|
+
ActionMailer::Base.mail(
|
|
12
|
+
:from => config['from'],
|
|
13
|
+
:to => config['to'],
|
|
14
|
+
:subject => "AlertMachine Failed: Environment could not load."
|
|
15
|
+
) do |format|
|
|
16
|
+
format.text {
|
|
17
|
+
render :text => <<TXT
|
|
18
|
+
machine: #{`hostname`}
|
|
19
|
+
exception: #{e.to_s}
|
|
20
|
+
#{e.caller.join('\n')}
|
|
21
|
+
TXT
|
|
22
|
+
}
|
|
23
|
+
end.deliver
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
end
|
data/lib/run_task.rb
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# A single watch and it's life cycle.
|
|
2
|
+
class AlertMachine
|
|
3
|
+
class RunTask
|
|
4
|
+
def initialize(opts, block, caller)
|
|
5
|
+
@opts, @block, @caller = opts, block, caller
|
|
6
|
+
@errors = []
|
|
7
|
+
@alert_state = false
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def schedule
|
|
11
|
+
@timer = EM::PeriodicTimer.new(interval) do
|
|
12
|
+
with_task do
|
|
13
|
+
start = Time.now
|
|
14
|
+
begin
|
|
15
|
+
# The main call to the user-defined watcher function.
|
|
16
|
+
@block.call(*@opts[:args])
|
|
17
|
+
|
|
18
|
+
assert(Time.now - start < interval / 5.0,
|
|
19
|
+
"Task ran for too long. Invoked every #{
|
|
20
|
+
interval}s. Ran for #{Time.now - start}s.", @caller) unless
|
|
21
|
+
dont_check_long_processes?
|
|
22
|
+
|
|
23
|
+
# Things finished successfully.
|
|
24
|
+
@timer.interval = interval if !@errors.empty?
|
|
25
|
+
@errors = []
|
|
26
|
+
|
|
27
|
+
alert_state(false)
|
|
28
|
+
|
|
29
|
+
rescue Exception => af
|
|
30
|
+
unless af.is_a?(AssertionFailure)
|
|
31
|
+
puts "Task Exception: #{af.to_s}"
|
|
32
|
+
puts "#{af.backtrace.join("\n")}"
|
|
33
|
+
af = AssertionFailure.new(af.to_s, af.backtrace)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
@timer.interval = interval_error if @errors.empty?
|
|
37
|
+
@errors << af
|
|
38
|
+
|
|
39
|
+
alert_state(true) if @errors.length > retries
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def with_task
|
|
46
|
+
AlertMachine.current_task = self
|
|
47
|
+
yield
|
|
48
|
+
ensure
|
|
49
|
+
AlertMachine.current_task = nil
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def assert(condition, msg, caller)
|
|
53
|
+
return if condition
|
|
54
|
+
assert_failed(msg, caller)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def assert_failed(msg, caller)
|
|
58
|
+
fail = AssertionFailure.new(msg, caller)
|
|
59
|
+
puts fail.log
|
|
60
|
+
raise fail
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Is the alert firing?
|
|
64
|
+
def alert_state(firing)
|
|
65
|
+
if firing != @alert_state
|
|
66
|
+
mail unless @last_mailed && @last_mailed > Time.now - 60*10 && firing
|
|
67
|
+
@last_mailed = Time.now
|
|
68
|
+
end
|
|
69
|
+
@alert_state = firing
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def mail
|
|
73
|
+
last = @errors[-1]
|
|
74
|
+
ActionMailer::Base.mail(
|
|
75
|
+
:from => opts(:from),
|
|
76
|
+
:to => opts(:to),
|
|
77
|
+
:subject => "AlertMachine Failed: #{last.msg || last.parsed_caller.file_line}",
|
|
78
|
+
:body => @errors.collect {|e| e.log}.join("\n=============\n")
|
|
79
|
+
).deliver
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def opts(key, defaults = nil)
|
|
83
|
+
@opts[key] || config[key.to_s] || defaults || block_given? && yield
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def interval; opts(:interval, 5 * 60).to_f; end
|
|
87
|
+
|
|
88
|
+
def interval_error; opts(:interval_error) { interval / 5.0 }.to_f; end
|
|
89
|
+
|
|
90
|
+
def retries; opts(:retries, 1).to_i; end
|
|
91
|
+
|
|
92
|
+
def dont_check_long_processes?; opts(:dont_check_long_processes, false).to_s == "true"; end
|
|
93
|
+
|
|
94
|
+
def config; AlertMachine.config; end
|
|
95
|
+
|
|
96
|
+
# When an assertion fails, this exception is thrown so that
|
|
97
|
+
# we can unwind the stack frame. It's also deliberately throwing
|
|
98
|
+
# something that's not derived from Exception.
|
|
99
|
+
class AssertionFailure < Exception
|
|
100
|
+
attr_reader :msg, :caller, :time
|
|
101
|
+
def initialize(msg, caller)
|
|
102
|
+
@msg, @caller, @time = msg, caller, Time.now
|
|
103
|
+
super(@msg)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def log
|
|
107
|
+
"[#{Time.now}] #{msg ? msg + "\n" : ""}" +
|
|
108
|
+
"#{Caller.new(caller).log}"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def parsed_caller
|
|
112
|
+
Caller.new(caller)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
def puts(*args)
|
|
119
|
+
super unless AlertMachine.test_mode?
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
data/test/all.rb
ADDED
data/test/helper.rb
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'test/unit'
|
|
3
|
+
require 'mocha'
|
|
4
|
+
require File.dirname(__FILE__) + '/../lib/alert_machine.rb'
|
|
5
|
+
|
|
6
|
+
AlertMachine.disable
|
|
7
|
+
|
|
8
|
+
class AlertMachineTestHelper < Test::Unit::TestCase
|
|
9
|
+
def setup(runs_long = false)
|
|
10
|
+
AlertMachine.reset
|
|
11
|
+
AlertMachine.expects(:config).returns(
|
|
12
|
+
{
|
|
13
|
+
'dont_check_long_processes' => runs_long ? "true" : "false",
|
|
14
|
+
'ssh' => {
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
).at_least(0)
|
|
18
|
+
AlertMachine.expects(:test_mode?).returns(true).at_least(0)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def watcher(opts = {})
|
|
22
|
+
Class.new(AlertMachine::Watcher) do
|
|
23
|
+
watch opts.merge(:interval => 0.05) do
|
|
24
|
+
yield
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def process_watcher(opts = {})
|
|
30
|
+
Class.new(AlertMachine::Watcher) do
|
|
31
|
+
watch_process "localhost", {interval: 0.05}.merge(opts)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def run_machine
|
|
36
|
+
AlertMachine.disable(false)
|
|
37
|
+
AlertMachine.run {
|
|
38
|
+
EM::Timer.new(0.1) do
|
|
39
|
+
EM::stop_event_loop
|
|
40
|
+
end
|
|
41
|
+
yield if block_given?
|
|
42
|
+
}
|
|
43
|
+
ensure
|
|
44
|
+
AlertMachine.disable
|
|
45
|
+
end
|
|
46
|
+
end
|
data/test/process.rb
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/helper.rb')
|
|
2
|
+
|
|
3
|
+
class ProcessTest < AlertMachineTestHelper
|
|
4
|
+
|
|
5
|
+
def setup
|
|
6
|
+
super(true)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def test_port_open_failuire
|
|
10
|
+
process_watcher(:port => 3343)
|
|
11
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).at_least_once
|
|
12
|
+
run_machine
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_port_open_success
|
|
16
|
+
process_watcher(:port => 3343)
|
|
17
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).never
|
|
18
|
+
run_machine {
|
|
19
|
+
EM::start_server "localhost", 3343 do
|
|
20
|
+
end
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_pid_file_failuire
|
|
25
|
+
`rm -f /tmp/pid_x; touch /tmp/pid_x`
|
|
26
|
+
process_watcher(:pid_file => "/tmp/pid_x")
|
|
27
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).at_least_once
|
|
28
|
+
run_machine
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_pid_file_success
|
|
32
|
+
`rm -f /tmp/pid_x; touch /tmp/pid_x`
|
|
33
|
+
process_watcher(:pid_file => "/tmp/pid_x")
|
|
34
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).never
|
|
35
|
+
File.open("/tmp/pid_x", "w") {|fh| fh.write "#{Process.pid}" }
|
|
36
|
+
run_machine
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_grep_failuire
|
|
40
|
+
process_watcher(:grep => "test/stupid.rb")
|
|
41
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).at_least_once
|
|
42
|
+
run_machine
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_grep_success
|
|
46
|
+
process_watcher(:grep => "test/process.rb\\|test/all.rb")
|
|
47
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).never
|
|
48
|
+
File.open("/tmp/pid_x", "w") {|fh| fh.write "#{Process.pid}" }
|
|
49
|
+
run_machine
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
data/test/watcher.rb
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/helper.rb')
|
|
2
|
+
|
|
3
|
+
class WatcherTest < AlertMachineTestHelper
|
|
4
|
+
|
|
5
|
+
def test_no_alerts_triggerred
|
|
6
|
+
watcher {}
|
|
7
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).never
|
|
8
|
+
run_machine
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_alerts_for_long_running_processes
|
|
12
|
+
watcher { sleep 0.05 }
|
|
13
|
+
AlertMachine::RunTask.any_instance.expects(:assert_failed).at_least_once
|
|
14
|
+
run_machine
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_no_alerts_before_retries
|
|
18
|
+
cnt = 0
|
|
19
|
+
watcher(:retries => 1) { AlertMachine::Watcher.assert false if (cnt += 1) <= 1 }
|
|
20
|
+
AlertMachine::RunTask.any_instance.expects(:mail).never
|
|
21
|
+
run_machine
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_alert_fires_after_retries
|
|
25
|
+
cnt = 0
|
|
26
|
+
watcher(:retries => 1) { AlertMachine::Watcher.assert false if (cnt += 1) <= 2 }
|
|
27
|
+
AlertMachine::RunTask.any_instance.expects(:mail).at_least_once
|
|
28
|
+
run_machine
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: alert_machine
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- prasanna
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2012-01-15 00:00:00.000000000Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: rye
|
|
16
|
+
requirement: &70208510413660 !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
18
|
+
requirements:
|
|
19
|
+
- - ! '>='
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: *70208510413660
|
|
25
|
+
- !ruby/object:Gem::Dependency
|
|
26
|
+
name: actionmailer
|
|
27
|
+
requirement: &70208510412880 !ruby/object:Gem::Requirement
|
|
28
|
+
none: false
|
|
29
|
+
requirements:
|
|
30
|
+
- - ! '>='
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: *70208510412880
|
|
36
|
+
- !ruby/object:Gem::Dependency
|
|
37
|
+
name: eventmachine
|
|
38
|
+
requirement: &70208510412160 !ruby/object:Gem::Requirement
|
|
39
|
+
none: false
|
|
40
|
+
requirements:
|
|
41
|
+
- - ! '>='
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '0'
|
|
44
|
+
type: :runtime
|
|
45
|
+
prerelease: false
|
|
46
|
+
version_requirements: *70208510412160
|
|
47
|
+
description: Make sure you get mailed when bad things happen to your server.
|
|
48
|
+
email:
|
|
49
|
+
- myprasanna@gmail.com
|
|
50
|
+
executables: []
|
|
51
|
+
extensions: []
|
|
52
|
+
extra_rdoc_files: []
|
|
53
|
+
files:
|
|
54
|
+
- README
|
|
55
|
+
- alert_machine.gemspec
|
|
56
|
+
- lib/alert_machine.rb
|
|
57
|
+
- lib/process.rb
|
|
58
|
+
- lib/rails_environment.rb
|
|
59
|
+
- lib/run_task.rb
|
|
60
|
+
- test/all.rb
|
|
61
|
+
- test/helper.rb
|
|
62
|
+
- test/process.rb
|
|
63
|
+
- test/watcher.rb
|
|
64
|
+
homepage: http://github.com/likealittle/alert_machine
|
|
65
|
+
licenses: []
|
|
66
|
+
post_install_message:
|
|
67
|
+
rdoc_options: []
|
|
68
|
+
require_paths:
|
|
69
|
+
- lib
|
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
71
|
+
none: false
|
|
72
|
+
requirements:
|
|
73
|
+
- - ! '>='
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
|
+
none: false
|
|
78
|
+
requirements:
|
|
79
|
+
- - ! '>='
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '0'
|
|
82
|
+
requirements: []
|
|
83
|
+
rubyforge_project: alert_machine
|
|
84
|
+
rubygems_version: 1.8.10
|
|
85
|
+
signing_key:
|
|
86
|
+
specification_version: 3
|
|
87
|
+
summary: Ruby way of alerting server events.
|
|
88
|
+
test_files:
|
|
89
|
+
- test/all.rb
|
|
90
|
+
- test/helper.rb
|
|
91
|
+
- test/process.rb
|
|
92
|
+
- test/watcher.rb
|