ey-big-brother 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +3 -0
- data/Rakefile +60 -0
- data/TODO +4 -0
- data/bin/ey-alert-agent +26 -0
- data/bin/ey-drain-queues +43 -0
- data/bin/ey-monitor-agent +26 -0
- data/lib/alert_agent.rb +126 -0
- data/lib/cron_feeder.rb +68 -0
- data/lib/ey_config.rb +42 -0
- data/lib/feeder.rb +18 -0
- data/lib/monitor_agent.rb +137 -0
- data/lib/packet.rb +61 -0
- data/lib/utils.rb +8 -0
- data/spec/integration_spec.rb +185 -0
- data/spec/spec_helper.rb +15 -0
- metadata +71 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 YOUR NAME
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'rubygems/specification'
|
4
|
+
require 'date'
|
5
|
+
require 'spec/rake/spectask'
|
6
|
+
|
7
|
+
GEM = "ey-big-brother"
|
8
|
+
GEM_VERSION = "0.0.9"
|
9
|
+
AUTHOR = "Ezra Zygmuntowicz"
|
10
|
+
EMAIL = "ez@engineyard.com"
|
11
|
+
HOMEPAGE = "http://engineyard.com"
|
12
|
+
SUMMARY = "A gem that provides oversight of the ey-cloud"
|
13
|
+
|
14
|
+
spec = Gem::Specification.new do |s|
|
15
|
+
s.name = GEM
|
16
|
+
s.version = GEM_VERSION
|
17
|
+
s.platform = Gem::Platform::RUBY
|
18
|
+
s.has_rdoc = true
|
19
|
+
s.extra_rdoc_files = ["README.rdoc", "LICENSE", 'TODO']
|
20
|
+
s.summary = SUMMARY
|
21
|
+
s.description = s.summary
|
22
|
+
s.author = AUTHOR
|
23
|
+
s.email = EMAIL
|
24
|
+
s.homepage = HOMEPAGE
|
25
|
+
|
26
|
+
# Uncomment this to add a dependency
|
27
|
+
# s.add_dependency "foo"
|
28
|
+
s.bindir = "bin"
|
29
|
+
s.executables = %w(ey-monitor-agent ey-alert-agent ey-drain-queues)
|
30
|
+
|
31
|
+
|
32
|
+
s.require_path = 'lib'
|
33
|
+
s.autorequire = GEM
|
34
|
+
s.files = %w(LICENSE README.rdoc Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
|
35
|
+
end
|
36
|
+
|
37
|
+
task :default => :spec
|
38
|
+
|
39
|
+
desc "Run specs"
|
40
|
+
Spec::Rake::SpecTask.new do |t|
|
41
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
42
|
+
t.spec_opts = %w(-fs --color)
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
47
|
+
pkg.gem_spec = spec
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "install the gem locally"
|
51
|
+
task :install => [:package] do
|
52
|
+
sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "create a gemspec file"
|
56
|
+
task :make_spec do
|
57
|
+
File.open("#{GEM}.gemspec", "w") do |file|
|
58
|
+
file.puts spec.to_ruby
|
59
|
+
end
|
60
|
+
end
|
data/TODO
ADDED
data/bin/ey-alert-agent
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'alert_agent'
|
2
|
+
require 'optparse'
|
3
|
+
require 'ey_config'
|
4
|
+
include EY::Config
|
5
|
+
|
6
|
+
options = {:host => 'ec2-67-202-29-203.compute-1.amazonaws.com', :user => 'guest', :pass => 'guest', :vhost => '/production'}
|
7
|
+
|
8
|
+
opts = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: ey-monitor-agent [-flag] [argument]"
|
10
|
+
opts.define_head "Ey Monitor Agent: watching over your cloud since way back when"
|
11
|
+
opts.separator '*'*80
|
12
|
+
|
13
|
+
setup_common_options(opts, options)
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.parse!
|
17
|
+
|
18
|
+
Signal.trap('INT') { AMQP.stop{ EM.stop } }
|
19
|
+
Signal.trap('TERM'){ AMQP.stop{ EM.stop } }
|
20
|
+
|
21
|
+
AMQP.start(:host => options[:host], :user => options[:user],
|
22
|
+
:pass => options[:pass], :vhost => options[:vhost]) do
|
23
|
+
mq = MQ.new
|
24
|
+
mq.prefetch(10)
|
25
|
+
AlertAgent.new(mq)
|
26
|
+
end
|
data/bin/ey-drain-queues
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'ey_config'
|
3
|
+
require 'alert_agent'
|
4
|
+
require 'monitor_agent'
|
5
|
+
|
6
|
+
include EY::Config
|
7
|
+
|
8
|
+
options = {:host => 'ec2-67-202-29-203.compute-1.amazonaws.com', :user => 'guest', :pass => 'guest', :vhost => '/production'}
|
9
|
+
|
10
|
+
opts = OptionParser.new do |opts|
|
11
|
+
opts.banner = "Usage: ey-drain-queues [-flag] [argument]"
|
12
|
+
opts.define_head "Draining ur q's since way back when"
|
13
|
+
opts.separator '*'*80
|
14
|
+
|
15
|
+
setup_common_options(opts, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
opts.parse!
|
19
|
+
|
20
|
+
Signal.trap('INT') { AMQP.stop{ EM.stop } }
|
21
|
+
Signal.trap('TERM'){ AMQP.stop{ EM.stop } }
|
22
|
+
|
23
|
+
AMQP.start(:host => options[:host], :user => options[:user],
|
24
|
+
:pass => options[:pass], :vhost => options[:vhost]) do
|
25
|
+
mq = MQ.new
|
26
|
+
mq.prefetch(100)
|
27
|
+
mq.queue(MonitorAgent::QUEUE_NAME).subscribe(:ack => true) do |h,m|
|
28
|
+
if AMQP.closing?
|
29
|
+
Utils::LOG.info "(ignoring message, will be redelivered later)"
|
30
|
+
else
|
31
|
+
puts "drained message"
|
32
|
+
h.ack
|
33
|
+
end
|
34
|
+
end
|
35
|
+
mq.queue(AlertAgent::QUEUE_NAME).subscribe(:ack => true) do |h,m|
|
36
|
+
if AMQP.closing?
|
37
|
+
Utils::LOG.info "(ignoring message, will be redelivered later)"
|
38
|
+
else
|
39
|
+
puts "drained alert"
|
40
|
+
h.ack
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'monitor_agent'
|
2
|
+
require 'optparse'
|
3
|
+
require 'ey_config'
|
4
|
+
include EY::Config
|
5
|
+
|
6
|
+
options = {:host => 'ec2-67-202-29-203.compute-1.amazonaws.com', :user => 'guest', :pass => 'guest', :vhost => '/production'}
|
7
|
+
|
8
|
+
opts = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: ey-monitor-agent [-flag] [argument]"
|
10
|
+
opts.define_head "Ey Monitor Agent: watching over your cloud since way back when"
|
11
|
+
opts.separator '*'*80
|
12
|
+
|
13
|
+
setup_common_options(opts, options)
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.parse!
|
17
|
+
|
18
|
+
Signal.trap('INT') { AMQP.stop{ EM.stop } }
|
19
|
+
Signal.trap('TERM'){ AMQP.stop{ EM.stop } }
|
20
|
+
|
21
|
+
AMQP.start(:host => options[:host], :user => options[:user],
|
22
|
+
:pass => options[:pass], :vhost => options[:vhost]) do
|
23
|
+
mq = MQ.new
|
24
|
+
mq.prefetch(10)
|
25
|
+
MonitorAgent.new(mq)
|
26
|
+
end
|
data/lib/alert_agent.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'amqp'
|
3
|
+
require 'socket'
|
4
|
+
require 'mq'
|
5
|
+
require "#{File.dirname(__FILE__)}/packet"
|
6
|
+
require "#{File.dirname(__FILE__)}/utils"
|
7
|
+
gem 'ezmobius-redis'
|
8
|
+
require 'redis'
|
9
|
+
|
10
|
+
require 'rest_client'
|
11
|
+
|
12
|
+
Thread.abort_on_exception = true
|
13
|
+
|
14
|
+
class AlertAgent
|
15
|
+
QUEUE_NAME = 'failed-cloud-monitors'
|
16
|
+
attr_accessor :mq, :redis
|
17
|
+
FAIL = 'fail'.freeze
|
18
|
+
OK = 'ok'.freeze
|
19
|
+
|
20
|
+
def initialize(mq)
|
21
|
+
@redis = Redis.new
|
22
|
+
@mq = mq
|
23
|
+
mq.queue(QUEUE_NAME).subscribe(:ack => true) do |headers, message|
|
24
|
+
if AMQP.closing?
|
25
|
+
Utils::LOG.info "(ignoring message, will be redelivered later)"
|
26
|
+
else
|
27
|
+
process_alerts(message, headers)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def process_alerts(message, headers)
|
33
|
+
alert = JSON.parse(message)
|
34
|
+
alert.servers.each do |server|
|
35
|
+
process_server(server.merge('env_id' => alert.env_id, 'api' => alert.api))
|
36
|
+
end
|
37
|
+
headers.ack
|
38
|
+
end
|
39
|
+
|
40
|
+
# {"host" => host, "result" => result,
|
41
|
+
# "timestamp" => Time.now.to_i,
|
42
|
+
# 'token' => token}
|
43
|
+
def process_server(server)
|
44
|
+
errors = []
|
45
|
+
server['result'].each do |service, status|
|
46
|
+
errors << service if status == 'down'
|
47
|
+
end
|
48
|
+
state = get_state(server)
|
49
|
+
if errors.empty?
|
50
|
+
if state != OK
|
51
|
+
set_state(server, OK)
|
52
|
+
process_alert(server)
|
53
|
+
end
|
54
|
+
else
|
55
|
+
if state != FAIL
|
56
|
+
set_state(server, FAIL)
|
57
|
+
process_alert(server)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def set_state(server, state)
|
63
|
+
tries = 0
|
64
|
+
@redis[server['host']] = state
|
65
|
+
rescue RuntimeError
|
66
|
+
@redis = Redis.new
|
67
|
+
tries += 1
|
68
|
+
retry if tries < 10
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_state(server)
|
72
|
+
tries = 0
|
73
|
+
@redis[server['host']]
|
74
|
+
rescue RuntimeError
|
75
|
+
@redis = Redis.new
|
76
|
+
tries += 1
|
77
|
+
retry if tries < 10
|
78
|
+
end
|
79
|
+
|
80
|
+
def format_alert(server)
|
81
|
+
result = {}
|
82
|
+
result['message'] = 'alert'
|
83
|
+
result['data'] = {}
|
84
|
+
result['data']['Time'] = server['timestamp'].to_s
|
85
|
+
result['data']['Severity'] = server['result'].any? {|_, status| status == 'down' } ? 'FAILURE' : 'OKAY'
|
86
|
+
result['data']['Type'] = result['data']['Plugin'] = "services"
|
87
|
+
result['data']['Host'] = server['host']
|
88
|
+
result['data']['alert_message'] = server['result'].sort.map do |service, status|
|
89
|
+
"#{service}: #{status}"
|
90
|
+
end.join(", ")
|
91
|
+
result
|
92
|
+
end
|
93
|
+
|
94
|
+
def report_to_irc(server, env)
|
95
|
+
s = ::TCPSocket.new('localhost', 5678)
|
96
|
+
s.write "[#{env}] #{server['host']} [#{server['role']}:#{server['env_id']}]\n#{server['result'].inspect}\n"
|
97
|
+
s.close
|
98
|
+
rescue => e
|
99
|
+
Utils::LOG.error e.message
|
100
|
+
end
|
101
|
+
|
102
|
+
def report_to_awsm(server)
|
103
|
+
rest = RestClient::Resource.new(server['api'])
|
104
|
+
rest["/reporting/#{server['token']}"].post(format_alert(server), {"Accept" => "application/json"})
|
105
|
+
Utils::LOG.info "http report to awsm successful"
|
106
|
+
rescue RestClient::RequestFailed => e
|
107
|
+
Utils::LOG.error "http report failed!: #{msg}:#{err}"
|
108
|
+
end
|
109
|
+
|
110
|
+
def process_alert(server)
|
111
|
+
env = nil
|
112
|
+
if server['api'] == 'https://cloud.engineyard.com'
|
113
|
+
env = 'production'
|
114
|
+
else
|
115
|
+
env = 'staging'
|
116
|
+
end
|
117
|
+
|
118
|
+
report_to_irc(server, env)
|
119
|
+
report_to_awsm(server)
|
120
|
+
Utils::LOG.info '*' * 50
|
121
|
+
Utils::LOG.info server['timestamp']
|
122
|
+
Utils::LOG.info server['host']
|
123
|
+
Utils::LOG.info server['result']
|
124
|
+
Utils::LOG.info server['token']
|
125
|
+
end
|
126
|
+
end
|
data/lib/cron_feeder.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'json'
|
3
|
+
gem 'famoseagle-carrot'
|
4
|
+
require 'carrot'
|
5
|
+
|
6
|
+
class Packet
|
7
|
+
def initialize
|
8
|
+
raise NotImplementedError.new("#{self.class.name} is an abstract class.")
|
9
|
+
end
|
10
|
+
def to_json(*a)
|
11
|
+
{
|
12
|
+
'json_class' => self.class.name,
|
13
|
+
'data' => instance_variables.inject({}) {|m,ivar| m[ivar.sub(/@/,'')] = instance_variable_get(ivar); m }
|
14
|
+
}.to_json(*a)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class MonitorGroup < Packet
|
19
|
+
attr_accessor :api, :token, :servers
|
20
|
+
def initialize(env_id, api, servers)
|
21
|
+
@env_id = env_id
|
22
|
+
@api = api
|
23
|
+
@servers = servers
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.json_create(o)
|
27
|
+
i = o['data']
|
28
|
+
new(i['env_id'],i['api'],i['servers'])
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def next_envs(page, q)
|
33
|
+
envs = Environment.paginate(:page => page).each do |e|
|
34
|
+
arr = []
|
35
|
+
e.instances(:status => :running).each do |i|
|
36
|
+
host = i.public_hostname
|
37
|
+
next unless host
|
38
|
+
arr << {:host => host, :role => i.role, :token => i.token}
|
39
|
+
end
|
40
|
+
|
41
|
+
next if arr.empty?
|
42
|
+
|
43
|
+
mg = MonitorGroup.new e.id, HOSTNAME, arr
|
44
|
+
|
45
|
+
puts "publishing: #{mg}"
|
46
|
+
|
47
|
+
q.publish(mg.to_json)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
start = Time.now
|
52
|
+
|
53
|
+
carrot = Carrot.new(:host => 'ec2-67-202-29-203.compute-1.amazonaws.com',
|
54
|
+
:user => 'production', :pass => '3kslices!',
|
55
|
+
:vhost => '/production')
|
56
|
+
|
57
|
+
q = carrot.queue('cloud-monitors')
|
58
|
+
|
59
|
+
|
60
|
+
count = Environment.count
|
61
|
+
|
62
|
+
pages = (count / Array.new.paginate.per_page) + 1
|
63
|
+
|
64
|
+
1.upto(pages) do |page|
|
65
|
+
next_envs(page, q)
|
66
|
+
end
|
67
|
+
|
68
|
+
puts "time: #{Time.now - start}"
|
data/lib/ey_config.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
|
2
|
+
module EY
|
3
|
+
module Config
|
4
|
+
def setup_common_options(opts, options)
|
5
|
+
opts.on("-u", "--user USER", "Specify the rabbitmq username.") do |user|
|
6
|
+
options[:user] = user
|
7
|
+
end
|
8
|
+
|
9
|
+
opts.on("-h", "--host HOST", "Specify the rabbitmq hostname.") do |host|
|
10
|
+
options[:host] = host
|
11
|
+
end
|
12
|
+
|
13
|
+
opts.on("-P", "--port PORT", "Specify the rabbitmq PORT, default 5672.") do |port|
|
14
|
+
options[:port] = port
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-p", "--pass PASSWORD", "Specify the rabbitmq password") do |pass|
|
18
|
+
options[:pass] = pass
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-v", "--vhost VHOST", "Specify the rabbitmq vhost") do |vhost|
|
22
|
+
options[:vhost] = vhost
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-d", "--daemonize", "Run #{type} as a daemon") do |d|
|
26
|
+
options[:daemonize] = true
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("--pid-dir PATH", "Specify the pid path, only used with daemonize") do |dir|
|
30
|
+
options[:pid_dir] = dir
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-l", "--log-level LEVEL", "Specify the log level (fatal, error, warn, info, debug). Default is info") do |level|
|
34
|
+
options[:log_level] = level
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("--log-dir PATH", "Specify the log path") do |dir|
|
38
|
+
options[:log_dir] = dir
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/feeder.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'amqp'
|
3
|
+
require 'mq'
|
4
|
+
require 'json'
|
5
|
+
require "#{File.dirname(__FILE__)}/packet"
|
6
|
+
|
7
|
+
mg1 = MonitorGroup.new '42',
|
8
|
+
'https://cloud.engineyard.com', [
|
9
|
+
{:host=>"75.101.150.230", :role=>:app_master, :token => 'deadbeef'},
|
10
|
+
{:host=>"ec2-75-101-183-93.compute-1.amazonaws.com", :role=>:app, :token => 'deadbeefdsfs'},
|
11
|
+
{:host=>"ec2-174-129-88-76.compute-1.amazonaws.com", :role=>:app, :token => 'deadbeeffdsf'},
|
12
|
+
{:host=>"ec2-174-129-83-10.compute-1.amazonaws.com", :role=>:db_master, :token => 'deadbeefweqwef'}
|
13
|
+
]
|
14
|
+
|
15
|
+
AMQP.start(:host => '174.129.11.230') do
|
16
|
+
MQ.queue('cloud-monitors1').publish(mg1.to_json)
|
17
|
+
AMQP.stop{ EM.stop }
|
18
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'amqp'
|
3
|
+
require 'mq'
|
4
|
+
require 'dataflow'
|
5
|
+
require 'extlib'
|
6
|
+
require 'net/ssh'
|
7
|
+
require "#{File.dirname(__FILE__)}/packet"
|
8
|
+
require "#{File.dirname(__FILE__)}/alert_agent"
|
9
|
+
require "#{File.dirname(__FILE__)}/utils"
|
10
|
+
Thread.abort_on_exception = true
|
11
|
+
|
12
|
+
class MonitorAgent
|
13
|
+
include Dataflow
|
14
|
+
QUEUE_NAME = 'cloud-monitors'
|
15
|
+
attr_reader :mq
|
16
|
+
|
17
|
+
def initialize(mq)
|
18
|
+
@mq = mq
|
19
|
+
mq.queue(QUEUE_NAME).subscribe(:ack => true) do |h,m|
|
20
|
+
if AMQP.closing?
|
21
|
+
Utils::LOG.info "(ignoring message, will be redelivered later)"
|
22
|
+
else
|
23
|
+
process_message(m,h)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def process_message(msg, header)
|
29
|
+
msg = JSON.parse(msg)
|
30
|
+
if MonitorGroup === msg
|
31
|
+
Utils::LOG.info "got MonitorGroup: #{msg}"
|
32
|
+
process_monitor(msg, header)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def process_monitor(monitor_group, header)
|
37
|
+
EM.defer(lambda {
|
38
|
+
monitor_group.servers.map do |srv|
|
39
|
+
check_server(srv)
|
40
|
+
end.map{|m| m.to_hash }
|
41
|
+
},
|
42
|
+
|
43
|
+
lambda {|monitors|
|
44
|
+
alert = MonitorResult.new(monitor_group.env_id, monitor_group.api, monitors)
|
45
|
+
Utils::LOG.info alert.inspect
|
46
|
+
mq.queue(AlertAgent::QUEUE_NAME).publish(alert.to_json)
|
47
|
+
header.ack
|
48
|
+
})
|
49
|
+
end
|
50
|
+
|
51
|
+
def check_server(srv)
|
52
|
+
need_later do
|
53
|
+
Utils::LOG.info "checking server: #{srv.inspect}"
|
54
|
+
MonitorFactory.build(srv['role'], srv['host'], srv['token']).check
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class MonitorFactory
|
60
|
+
def self.build(role, host, token)
|
61
|
+
Object.const_get("#{role.camel_case}Monitor").new(host, token)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class AbstractMonitor
|
66
|
+
|
67
|
+
attr_reader :host, :token, :result
|
68
|
+
|
69
|
+
def initialize(host, token)
|
70
|
+
@host = host
|
71
|
+
@token = token
|
72
|
+
@result = {}
|
73
|
+
end
|
74
|
+
|
75
|
+
def role
|
76
|
+
self.class.name.snake_case.chomp("_monitor")
|
77
|
+
end
|
78
|
+
|
79
|
+
def check
|
80
|
+
@result = check_ssh
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_hash
|
85
|
+
{"host" => host, "result" => result,
|
86
|
+
"timestamp" => Utils.epoch_time,
|
87
|
+
'token' => token, 'role' => role}
|
88
|
+
end
|
89
|
+
|
90
|
+
def check_ssh
|
91
|
+
Utils::LOG.info "checking ssh: #{host}"
|
92
|
+
result = {}
|
93
|
+
begin
|
94
|
+
Net::SSH.start(host, "root", :keys => ["/root/.ssh/awsm"], :paranoid => false, :timeout => 10) do |ssh|
|
95
|
+
begin
|
96
|
+
# ignore any STDOUT and only grab the final json from the scripts output
|
97
|
+
script_output = ssh.exec!("bash -lc \"ey-agent\"").split("\n").last
|
98
|
+
result = JSON.parse(script_output)
|
99
|
+
result['ssh'] = 'up'
|
100
|
+
rescue JSON::ParserError
|
101
|
+
Utils::LOG.error '*' * 80
|
102
|
+
Utils::LOG.error "#{host}>>>>>>>>>>>\n#{script_output}"
|
103
|
+
result['ey-agent'] = 'down'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
rescue Net::SSH::HostKeyMismatch => e
|
107
|
+
e.remember_host!
|
108
|
+
retry
|
109
|
+
end
|
110
|
+
result
|
111
|
+
rescue SocketError, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ETIMEDOUT, Timeout::Error, Net::SSH::AuthenticationFailed
|
112
|
+
result['ssh'] = 'down'
|
113
|
+
result
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class AppMonitor < AbstractMonitor
|
118
|
+
end
|
119
|
+
|
120
|
+
class AppMasterMonitor < AbstractMonitor
|
121
|
+
# def check
|
122
|
+
# check_url
|
123
|
+
# super
|
124
|
+
# end
|
125
|
+
end
|
126
|
+
|
127
|
+
class DbMasterMonitor < AbstractMonitor
|
128
|
+
end
|
129
|
+
|
130
|
+
class DbSlaveMonitor < AbstractMonitor
|
131
|
+
end
|
132
|
+
|
133
|
+
class SoloMonitor < AbstractMonitor
|
134
|
+
end
|
135
|
+
|
136
|
+
class UtilMonitor < AbstractMonitor
|
137
|
+
end
|
data/lib/packet.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
# this is an abstract packet class that knows how to
|
4
|
+
# marshal itself into and out of JSON format
|
5
|
+
class Packet
|
6
|
+
def initialize
|
7
|
+
raise NotImplementedError.new("#{self.class.name} is an abstract class.")
|
8
|
+
end
|
9
|
+
def to_json(*a)
|
10
|
+
{
|
11
|
+
'json_class' => self.class.name,
|
12
|
+
'data' => instance_variables.inject({}) {|m,ivar| m[ivar.sub(/@/,'')] = instance_variable_get(ivar); m }
|
13
|
+
}.to_json(*a)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class MonitorGroup < Packet
|
18
|
+
attr_accessor :api, :env_id, :servers
|
19
|
+
def initialize(env_id, api, servers)
|
20
|
+
@env_id = env_id
|
21
|
+
@api = api
|
22
|
+
@servers = servers
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.json_create(o)
|
26
|
+
i = o['data']
|
27
|
+
new(i['env_id'],i['api'],i['servers'])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class MonitorResult < Packet
|
32
|
+
attr_accessor :api, :env_id, :servers
|
33
|
+
def initialize(env_id, api, servers)
|
34
|
+
@env_id = env_id
|
35
|
+
@api = api
|
36
|
+
@servers = servers
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.json_create(o)
|
40
|
+
i = o['data']
|
41
|
+
new(i['env_id'],i['api'],i['servers'])
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class MonitorGroupTimeStamp < Packet
|
46
|
+
attr_accessor :time
|
47
|
+
def initialize(time)
|
48
|
+
@time = time
|
49
|
+
end
|
50
|
+
def self.json_create(o)
|
51
|
+
i = o['data']
|
52
|
+
new(i['time'])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Quit < Packet
|
57
|
+
attr_accessor :time
|
58
|
+
def initialize(time)
|
59
|
+
@time = time
|
60
|
+
end
|
61
|
+
end
|
data/lib/utils.rb
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
class MockAppMasterMonitor < AbstractMonitor
|
4
|
+
def check
|
5
|
+
@result = {'haproxy' => 'up', 'nginx' => 'up', 'ssh' => 'up'}
|
6
|
+
self
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class MockAppMonitor < AbstractMonitor
|
11
|
+
def check
|
12
|
+
@result = {'haproxy' => 'down', 'nginx' => 'down', 'ssh' => 'up'}
|
13
|
+
self
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class MockDbMasterMonitor < AbstractMonitor
|
18
|
+
def check
|
19
|
+
@result = {'mysqld' => 'down', 'ssh' => 'up'}
|
20
|
+
self
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MockSoloMonitor < AbstractMonitor
|
25
|
+
def check
|
26
|
+
@result = {'mysqld' => 'down', 'nginx' => 'down', 'ssh' => 'up'}
|
27
|
+
self
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "ey-big-brother" do
|
32
|
+
it "should monitor and process alerts on flex clusters and solos" do
|
33
|
+
AMQP.start(:host => 'localhost') do
|
34
|
+
flex = MonitorGroup.new '42',
|
35
|
+
'https://cloud.engineyard.com', [
|
36
|
+
{:host=>"75.101.150.230", :role=>:mock_app_master, :token => 'deadbeef'},
|
37
|
+
{:host=>"ec2-75-101-183-93.compute-1.amazonaws.com", :role=>:mock_app, :token => 'jhghjg'},
|
38
|
+
{:host=>"ec2-174-129-83-10.compute-1.amazonaws.com", :role=>:mock_db_master, :token => 'dfdsf'}
|
39
|
+
]
|
40
|
+
solo = MonitorGroup.new '41',
|
41
|
+
'https://cloud.engineyard.com', [
|
42
|
+
{:host=>"75.101.150.230", :role=>:mock_solo,:token => 'dsdfsdf'},
|
43
|
+
]
|
44
|
+
@monitor_agent = MonitorAgent.new(MQ.new)
|
45
|
+
@alert_agent = AlertAgent.new(MQ.new)
|
46
|
+
|
47
|
+
@alert_agent.should_receive(:process_server).exactly(4).times
|
48
|
+
|
49
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).publish(flex.to_json)
|
50
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).publish(solo.to_json)
|
51
|
+
|
52
|
+
EM.add_timer(0.0001) { AMQP.stop{ EM.stop } }
|
53
|
+
end
|
54
|
+
|
55
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).should have(2).acked_messages
|
56
|
+
alerts = @alert_agent.mq.queue(AlertAgent::QUEUE_NAME).acked_messages
|
57
|
+
alerts.size.should == 2
|
58
|
+
alerts.map{|x| JSON.parse(x).servers }.flatten.each do |server|
|
59
|
+
case server['role']
|
60
|
+
when 'mock_solo'
|
61
|
+
server['result'].should == {'mysqld' => 'down', 'nginx' => 'down', 'ssh' => 'up'}
|
62
|
+
when 'mock_app_master'
|
63
|
+
server['result'].should == {'haproxy' => 'up', 'nginx' => 'up', 'ssh' => 'up'}
|
64
|
+
when 'mock_app'
|
65
|
+
server['result'].should == {'haproxy' => 'down', 'nginx' => 'down', 'ssh' => 'up'}
|
66
|
+
when 'mock_db_master'
|
67
|
+
server['result'].should == {'mysqld' => 'down', 'ssh' => 'up'}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should POST alerts as collectd'formatted json objects to the awsm reporting api" do
|
73
|
+
Utils.stub!(:epoch_time).and_return(1249595111)
|
74
|
+
AMQP.start(:host => 'localhost') do
|
75
|
+
flex = MonitorGroup.new '42',
|
76
|
+
'https://cloud.engineyard.com', [
|
77
|
+
{:host=>"75.101.150.230", :role=>:mock_app_master, :token => 'deadbeef'},
|
78
|
+
{:host=>"ec2-174-129-83-10.compute-1.amazonaws.com", :role=>:mock_db_master, :token => 'dfdsf'}
|
79
|
+
]
|
80
|
+
@monitor_agent = MonitorAgent.new(MQ.new)
|
81
|
+
@alert_agent = AlertAgent.new(MQ.new)
|
82
|
+
|
83
|
+
@alert_agent.should_receive(:process_alert).exactly(2).times
|
84
|
+
|
85
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).publish(flex.to_json)
|
86
|
+
|
87
|
+
EM.add_timer(0.0001) { AMQP.stop{ EM.stop } }
|
88
|
+
end
|
89
|
+
|
90
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).should have(1).acked_messages
|
91
|
+
alerts = @alert_agent.mq.queue(AlertAgent::QUEUE_NAME).acked_messages
|
92
|
+
alerts.size.should == 1
|
93
|
+
alerts.map{|x| JSON.parse(x).servers }.flatten.each do |server|
|
94
|
+
case server['role']
|
95
|
+
when 'mock_app_master'
|
96
|
+
@alert_agent.format_alert(server).should == {
|
97
|
+
"message" => "alert",
|
98
|
+
"data" => {
|
99
|
+
"Time"=>"1249595111",
|
100
|
+
"alert_message"=>"haproxy: up, nginx: up, ssh: up",
|
101
|
+
"Severity"=>"OKAY",
|
102
|
+
"Type"=>"services",
|
103
|
+
"Plugin"=>"services",
|
104
|
+
"Host"=>"75.101.150.230"
|
105
|
+
}
|
106
|
+
}
|
107
|
+
when 'mock_db_master'
|
108
|
+
@alert_agent.format_alert(server).should == {
|
109
|
+
"message" => "alert",
|
110
|
+
"data" => {
|
111
|
+
"Time"=>"1249595111",
|
112
|
+
"alert_message"=>"mysqld: down, ssh: up",
|
113
|
+
"Severity"=>"FAILURE",
|
114
|
+
"Type"=>"services",
|
115
|
+
"Plugin"=>"services",
|
116
|
+
"Host"=>"ec2-174-129-83-10.compute-1.amazonaws.com"
|
117
|
+
}
|
118
|
+
}
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should send alert if state was nil" do
|
124
|
+
AMQP.start(:host => 'localhost') do
|
125
|
+
solo = MonitorGroup.new '41',
|
126
|
+
'https://cloud.engineyard.com', [
|
127
|
+
{:host=>"75.101.150.230", :role=>:mock_solo,:token => 'dsdfsdf'},
|
128
|
+
]
|
129
|
+
@monitor_agent = MonitorAgent.new(MQ.new)
|
130
|
+
@alert_agent = AlertAgent.new(MQ.new)
|
131
|
+
|
132
|
+
@alert_agent.should_receive(:process_alert).exactly(1).times
|
133
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).publish(solo.to_json)
|
134
|
+
|
135
|
+
EM.add_timer(0.0001) { AMQP.stop{ EM.stop } }
|
136
|
+
end
|
137
|
+
|
138
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).should have(1).acked_messages
|
139
|
+
alerts = @alert_agent.mq.queue(AlertAgent::QUEUE_NAME).acked_messages
|
140
|
+
alerts.size.should == 1
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should send alert if state was changed" do
|
144
|
+
AMQP.start(:host => 'localhost') do
|
145
|
+
solo = MonitorGroup.new '41',
|
146
|
+
'https://cloud.engineyard.com', [
|
147
|
+
{:host=>"75.101.150.230", :role=>:mock_solo,:token => 'dsdfsdf'},
|
148
|
+
]
|
149
|
+
@monitor_agent = MonitorAgent.new(MQ.new)
|
150
|
+
@alert_agent = AlertAgent.new(MQ.new)
|
151
|
+
|
152
|
+
@alert_agent.redis = {"75.101.150.230" => 'ok'}
|
153
|
+
@alert_agent.should_receive(:process_alert).exactly(1).times
|
154
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).publish(solo.to_json)
|
155
|
+
|
156
|
+
EM.add_timer(0.0001) { AMQP.stop{ EM.stop } }
|
157
|
+
end
|
158
|
+
|
159
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).should have(1).acked_messages
|
160
|
+
alerts = @alert_agent.mq.queue(AlertAgent::QUEUE_NAME).acked_messages
|
161
|
+
alerts.size.should == 1
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should not send alert if state has not changed" do
|
165
|
+
AMQP.start(:host => 'localhost') do
|
166
|
+
solo = MonitorGroup.new '41',
|
167
|
+
'https://cloud.engineyard.com', [
|
168
|
+
{:host=>"75.101.150.230", :role=>:mock_solo,:token => 'dsdfsdf'},
|
169
|
+
]
|
170
|
+
@monitor_agent = MonitorAgent.new(MQ.new)
|
171
|
+
@alert_agent = AlertAgent.new(MQ.new)
|
172
|
+
|
173
|
+
@alert_agent.redis = {"75.101.150.230" => 'fail'}
|
174
|
+
@alert_agent.should_not_receive(:process_alert)
|
175
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).publish(solo.to_json)
|
176
|
+
|
177
|
+
|
178
|
+
EM.add_timer(0.0001) { AMQP.stop{ EM.stop } }
|
179
|
+
end
|
180
|
+
|
181
|
+
@monitor_agent.mq.queue(MonitorAgent::QUEUE_NAME).should have(1).acked_messages
|
182
|
+
alerts = @alert_agent.mq.queue(AlertAgent::QUEUE_NAME).acked_messages
|
183
|
+
alerts.size.should == 1
|
184
|
+
end
|
185
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$TESTING=true
|
2
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'monitor_agent')
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'alert_agent')
|
4
|
+
gem "danielsdeleo-moqueue"
|
5
|
+
require "moqueue"
|
6
|
+
|
7
|
+
Spec::Runner.configure do |config|
|
8
|
+
overload_amqp
|
9
|
+
config.include Moqueue
|
10
|
+
Redis = Hash
|
11
|
+
config.before(:each) do
|
12
|
+
reset_broker
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ey-big-brother
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.9
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ezra Zygmuntowicz
|
8
|
+
autorequire: ey-big-brother
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-18 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A gem that provides oversight of the ey-cloud
|
17
|
+
email: ez@engineyard.com
|
18
|
+
executables:
|
19
|
+
- ey-monitor-agent
|
20
|
+
- ey-alert-agent
|
21
|
+
- ey-drain-queues
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- README.rdoc
|
26
|
+
- LICENSE
|
27
|
+
- TODO
|
28
|
+
files:
|
29
|
+
- LICENSE
|
30
|
+
- README.rdoc
|
31
|
+
- Rakefile
|
32
|
+
- TODO
|
33
|
+
- lib/alert_agent.rb
|
34
|
+
- lib/cron_feeder.rb
|
35
|
+
- lib/ey_config.rb
|
36
|
+
- lib/feeder.rb
|
37
|
+
- lib/monitor_agent.rb
|
38
|
+
- lib/packet.rb
|
39
|
+
- lib/utils.rb
|
40
|
+
- spec/integration_spec.rb
|
41
|
+
- spec/spec_helper.rb
|
42
|
+
has_rdoc: true
|
43
|
+
homepage: http://engineyard.com
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
version:
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: "0"
|
62
|
+
version:
|
63
|
+
requirements: []
|
64
|
+
|
65
|
+
rubyforge_project:
|
66
|
+
rubygems_version: 1.3.5
|
67
|
+
signing_key:
|
68
|
+
specification_version: 3
|
69
|
+
summary: A gem that provides oversight of the ey-cloud
|
70
|
+
test_files: []
|
71
|
+
|