malevich 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/bin/malevich +38 -0
- data/example/cpu.rb +31 -0
- data/example/disk.rb +59 -0
- data/example/disk_stat.rb +28 -0
- data/example/dns_check.rb +7 -0
- data/example/exim.rb +15 -0
- data/example/find_files.rb +21 -0
- data/example/http.rb +25 -0
- data/example/iptables.rb +27 -0
- data/example/la.rb +21 -0
- data/example/mdadm.rb +44 -0
- data/example/megacli.rb +13 -0
- data/example/memory.rb +31 -0
- data/example/net.rb +25 -0
- data/example/net_stat.rb +25 -0
- data/example/nginx.rb +22 -0
- data/example/ntp.rb +15 -0
- data/example/pgsql.rb +71 -0
- data/example/runit.rb +48 -0
- data/example/status_file.rb +17 -0
- data/example/tw_cli.rb +17 -0
- data/lib/malevich.rb +59 -0
- data/lib/malevich/dsl.rb +78 -0
- data/lib/malevich/init.rb +17 -0
- data/lib/malevich/loader.rb +80 -0
- data/lib/malevich/monitor.rb +40 -0
- data/lib/malevich/plugin.rb +70 -0
- data/lib/malevich/plugin/error.rb +12 -0
- data/lib/malevich/plugin/event.rb +68 -0
- data/lib/malevich/plugin/http.rb +25 -0
- data/lib/malevich/plugin/init.rb +5 -0
- data/lib/malevich/plugin/shell_out.rb +28 -0
- data/lib/malevich/plugin/time.rb +9 -0
- data/lib/malevich/responders/error.rb +30 -0
- data/lib/malevich/responders/http.rb +46 -0
- data/lib/malevich/responders/init.rb +4 -0
- data/lib/malevich/responders/riemann.rb +59 -0
- data/lib/malevich/responders/udp.rb +59 -0
- data/lib/malevich/version.rb +3 -0
- data/malevich.gemspec +32 -0
- data/test/plugin_helpers_spec.rb +6 -0
- metadata +219 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
module Malevich
|
2
|
+
class Monitor
|
3
|
+
|
4
|
+
CHECK_ALIVE = 5
|
5
|
+
|
6
|
+
attr_reader :tasks
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@tasks = Array.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def plugins
|
13
|
+
@tasks.select {|t| t[1].is_a?(Malevich::Plugin) }.map {|x| {x[1].name => x[1].settings.to_hash} }
|
14
|
+
end
|
15
|
+
|
16
|
+
def <<(obj)
|
17
|
+
return false unless obj.respond_to?(:name) || obj.respond_to?(:run!)
|
18
|
+
th = Thread.new do
|
19
|
+
obj.run!
|
20
|
+
end
|
21
|
+
log :info, "Add '#{obj.class}(#{obj.name})'"
|
22
|
+
@tasks << [th, obj]
|
23
|
+
end
|
24
|
+
|
25
|
+
def run!
|
26
|
+
loop do
|
27
|
+
@tasks.each_with_index do |task, i|
|
28
|
+
next if task[0].alive?
|
29
|
+
# start new thread
|
30
|
+
log :error, "Thread for '#{task[1].class}(#{task[1].name})' is dead, start it"
|
31
|
+
@tasks.delete_at(i)
|
32
|
+
self << task[1]
|
33
|
+
end
|
34
|
+
log :debug, "Check alive #{@tasks.count} threads"
|
35
|
+
sleep CHECK_ALIVE
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative 'plugin/init'
|
2
|
+
|
3
|
+
module Malevich
|
4
|
+
class Plugin
|
5
|
+
|
6
|
+
include Malevich::PluginHelpers
|
7
|
+
|
8
|
+
attr_accessor :name, :always_start, :interval,
|
9
|
+
:run_if, :collect, :settings, :histories, :errors
|
10
|
+
|
11
|
+
alias :plugin :settings
|
12
|
+
|
13
|
+
def initialize(name)
|
14
|
+
@name = name
|
15
|
+
@interval = 60
|
16
|
+
@run_if = Proc.new { true }
|
17
|
+
@always_start = false
|
18
|
+
end
|
19
|
+
|
20
|
+
def suitable_platform?(name)
|
21
|
+
platforms = name.nil? || name.empty? ? ['linux'] : name
|
22
|
+
platforms.include?(ohai[:platform]) || platforms.include?(ohai[:os])
|
23
|
+
end
|
24
|
+
|
25
|
+
def runnable?
|
26
|
+
def log_and_false(level = :info, msg)
|
27
|
+
log level, msg
|
28
|
+
false
|
29
|
+
end
|
30
|
+
return log_and_false("'#{name}' not started, because have not 'collect'") if collect.nil?
|
31
|
+
return log_and_false("'#{name}' disabled in config") if settings.disable? && settings.disable
|
32
|
+
return log_and_false("'#{name} disabled by run_if statement'") unless !!self.instance_eval(&run_if)
|
33
|
+
log :unknown, "'#{name}' started"
|
34
|
+
end
|
35
|
+
|
36
|
+
def run!
|
37
|
+
loop do
|
38
|
+
t_start = Time.now
|
39
|
+
begin
|
40
|
+
Timeout.timeout(interval.to_f * 2/3) do
|
41
|
+
self.instance_eval(&collect)
|
42
|
+
end
|
43
|
+
rescue => e
|
44
|
+
error(e)
|
45
|
+
end
|
46
|
+
sleep(interval - (Time.now - t_start).to_i)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def ohai
|
53
|
+
malevich.ohai
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.make_container(container)
|
57
|
+
define_method(container) do |&block|
|
58
|
+
malevich.plugins[container] ||= {}
|
59
|
+
malevich.plugins[container][self.name] ||= Hashie::Mash.new
|
60
|
+
malevich.plugins[container][self.name]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
make_container :settings
|
65
|
+
alias :plugin :settings
|
66
|
+
make_container :histories
|
67
|
+
make_container :errors
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Malevich
|
2
|
+
module PluginHelpers
|
3
|
+
|
4
|
+
def error(e)
|
5
|
+
errors.last_at = Time.now
|
6
|
+
errors.msg = "#{e.class}: #{e}\n #{e.backtrace.join("\n")}"
|
7
|
+
errors.reported = false
|
8
|
+
log :error, "Plugin '#{name}' has a error: #{e.class}: #{e}\n #{e.backtrace.join("\n")}"
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Malevich
|
2
|
+
module PluginHelpers
|
3
|
+
|
4
|
+
def event(hash)
|
5
|
+
event_normalize(hash)
|
6
|
+
hash[:metric] = metric_diff(hash) if hash[:diff]
|
7
|
+
hash[:state] = state_check(hash)
|
8
|
+
if malevich.cmd[:test_given]
|
9
|
+
log :unknown, "Event message from test plugin: #{hash.inspect}"
|
10
|
+
else
|
11
|
+
event_minimizer(hash)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def metric_diff(hash)
|
16
|
+
return unless hash[:metric]
|
17
|
+
current_metric = hash[:metric]
|
18
|
+
old_metric = histories[hash[:service]]
|
19
|
+
histories[hash[:service]] = current_metric
|
20
|
+
hash.delete(:diff)
|
21
|
+
old_metric ? current_metric - old_metric : nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def state_check(hash)
|
25
|
+
return hash[:state] if hash[:state]
|
26
|
+
return hash[:state] if hash[:metric].nil?
|
27
|
+
return hash[:state] if hash[:metric].kind_of?(Float) && hash[:metric].nan?
|
28
|
+
warning = settings.respond_to?(:warning) ? settings.warning : nil
|
29
|
+
critical = settings.respond_to?(:critical) ? settings.critical : nil
|
30
|
+
return 'ok' if (warning || critical).nil?
|
31
|
+
metric = hash[:metric].to_f
|
32
|
+
if warning && critical
|
33
|
+
return case
|
34
|
+
when metric.between?(warning, critical)
|
35
|
+
'warning'
|
36
|
+
when metric > warning
|
37
|
+
'critical'
|
38
|
+
else
|
39
|
+
'ok'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
if warning
|
43
|
+
return (metric >= warning) ? 'warning' : 'ok'
|
44
|
+
end
|
45
|
+
if critical
|
46
|
+
return (metric >= critical) ? 'critical' : 'ok'
|
47
|
+
end
|
48
|
+
'critical'
|
49
|
+
end
|
50
|
+
|
51
|
+
def event_normalize(hash)
|
52
|
+
hash[:metric] = hash[:metric].round(2) if hash[:metric].kind_of?(Float)
|
53
|
+
hash[:state] = 'ok' if hash[:state].kind_of?(TrueClass)
|
54
|
+
hash[:state] = 'critical' if hash[:state].kind_of?(FalseClass)
|
55
|
+
hash[:service] ||= name
|
56
|
+
hash[:host] ||= ohai[:fqdn]
|
57
|
+
hash[:tags] ||= malevich.cmd.tags
|
58
|
+
end
|
59
|
+
|
60
|
+
def event_minimizer(hash)
|
61
|
+
return if hash[:state] == 'ok' && hash[:metric].nil? && histories[hash[:service]] &&
|
62
|
+
histories[hash[:service]][:state] == 'ok'
|
63
|
+
histories[hash[:service]] = hash
|
64
|
+
malevich.events << hash
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'rest_client'
|
2
|
+
require 'uri'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module Malevich
|
6
|
+
module PluginHelpers
|
7
|
+
|
8
|
+
def http_get(url)
|
9
|
+
RestClient.get(url)
|
10
|
+
end
|
11
|
+
alias :rest_get :http_get
|
12
|
+
|
13
|
+
def body_get(url)
|
14
|
+
case URI.parse(url).scheme
|
15
|
+
when 'http', 'https'
|
16
|
+
http_get(url)
|
17
|
+
when 'file'
|
18
|
+
raise "body_get('#{url}'): hasn't support yet"
|
19
|
+
else
|
20
|
+
File.read(url)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mixlib/shellout'
|
2
|
+
|
3
|
+
module Malevich
|
4
|
+
module PluginHelpers
|
5
|
+
|
6
|
+
def shell_out(*command_args)
|
7
|
+
cmd = Mixlib::ShellOut.new(*command_args)
|
8
|
+
#cmd.live_stream = STDOUT if STDOUT.tty?
|
9
|
+
cmd.run_command
|
10
|
+
cmd
|
11
|
+
end
|
12
|
+
|
13
|
+
def shell_out!(*command_args)
|
14
|
+
cmd= shell_out(*command_args)
|
15
|
+
cmd.error!
|
16
|
+
cmd
|
17
|
+
end
|
18
|
+
|
19
|
+
def shell(*command_args)
|
20
|
+
shell_out(*command_args).stdout
|
21
|
+
end
|
22
|
+
|
23
|
+
def shell!(*command_args)
|
24
|
+
shell_out!(*command_args).stdout
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Malevich
|
2
|
+
module Responder
|
3
|
+
class Error
|
4
|
+
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@name = 'plugin error responder'
|
9
|
+
end
|
10
|
+
|
11
|
+
def run!
|
12
|
+
loop do
|
13
|
+
if malevich.plugins.errors
|
14
|
+
errors = 0
|
15
|
+
malevich.plugins.errors.each { |_, val| errors += 1 unless val.reported? }
|
16
|
+
if errors == 0
|
17
|
+
malevich.events << {:service => 'plugins errors', :state => 'ok'}
|
18
|
+
else
|
19
|
+
malevich.events << {:service => 'plugin errors', :metric => errors,
|
20
|
+
:description => "Plugins has errors \n #{malevich.plugins.errors}"}
|
21
|
+
malevich.plugins.errors.each { |_, val| val.reported = true unless val.reported? }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
sleep 60
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Malevich
|
2
|
+
module Responder
|
3
|
+
class Http
|
4
|
+
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@host, @port = malevich.cmd['http-responder'].split(':')
|
9
|
+
@started_at = Time.now.to_i
|
10
|
+
@name = 'http api'
|
11
|
+
end
|
12
|
+
|
13
|
+
def run!
|
14
|
+
log :unknown, "Start http server at #{@host}:#{@port}"
|
15
|
+
server = TCPServer.new(@host, @port)
|
16
|
+
loop do
|
17
|
+
client = server.accept
|
18
|
+
log :unknown, "Accepted client: #{client.inspect}"
|
19
|
+
response = info
|
20
|
+
headers = "HTTP/1.1 200 OK\r\n" +
|
21
|
+
"Server: Malevich Ruby\r\n" +
|
22
|
+
"Content-Length: #{response.bytesize}\r\n" +
|
23
|
+
"Content-Type: application/json\r\n\r\n"
|
24
|
+
client.print headers
|
25
|
+
client.print response
|
26
|
+
client.close
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def info
|
31
|
+
{
|
32
|
+
:cmd => malevich.cmd,
|
33
|
+
:config => malevich.config,
|
34
|
+
:plugins => malevich.monitor.plugins,
|
35
|
+
:monitor => malevich.monitor.tasks.map {|x| { x[1].name => x[0].alive? } },
|
36
|
+
:errors => (malevich.plugins.errors rescue {}),
|
37
|
+
:version => Malevich::VERSION,
|
38
|
+
:ruby => "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}",
|
39
|
+
:uptime => Time.now.to_i - @started_at,
|
40
|
+
:queue_size => malevich.events.size
|
41
|
+
}.to_json + "\n"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'riemann/client'
|
2
|
+
require 'resolv'
|
3
|
+
|
4
|
+
module Malevich
|
5
|
+
module Responder
|
6
|
+
class Riemann
|
7
|
+
|
8
|
+
INTERVAL_FLUSH = 0.5
|
9
|
+
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@riemanns = Array.new
|
14
|
+
@name = "riemann client"
|
15
|
+
end
|
16
|
+
|
17
|
+
def run!
|
18
|
+
make_clients
|
19
|
+
loop do
|
20
|
+
flush
|
21
|
+
sleep INTERVAL_FLUSH
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def make_clients
|
28
|
+
@riemanns.clear
|
29
|
+
malevich.cmd['riemann-host'].each do |host|
|
30
|
+
riemann, port = host.split(':')
|
31
|
+
port ||= 5555
|
32
|
+
client = ::Riemann::Client.new({
|
33
|
+
:host => Resolv.new.getaddress(riemann),
|
34
|
+
:port => port,
|
35
|
+
:timeout => 10
|
36
|
+
})
|
37
|
+
client = client.tcp if malevich.cmd['riemann-tcp']
|
38
|
+
log :debug, "Add new riemann client: #{client.host}:#{client.port}"
|
39
|
+
@riemanns << client
|
40
|
+
@name = @riemanns.map {|c| "riemann client [#{c.host}:#{c.port}]" }.join(" , ")
|
41
|
+
end
|
42
|
+
@riemanns
|
43
|
+
end
|
44
|
+
|
45
|
+
def flush
|
46
|
+
until malevich.events.empty?
|
47
|
+
event = malevich.events.pop
|
48
|
+
@riemanns.each do |riemann|
|
49
|
+
Timeout::timeout(10) {
|
50
|
+
log :debug, "Sent message #{event} for #{riemann.host}:#{riemann.port}"
|
51
|
+
riemann << event
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'socket'
|
2
|
+
|
3
|
+
module Malevich
|
4
|
+
module Responder
|
5
|
+
class Udp
|
6
|
+
|
7
|
+
RIEMANN_RESERVED_FIELDS = [
|
8
|
+
:time,
|
9
|
+
:state,
|
10
|
+
:service,
|
11
|
+
:host,
|
12
|
+
:description,
|
13
|
+
:metric,
|
14
|
+
:tags,
|
15
|
+
:ttl
|
16
|
+
]
|
17
|
+
|
18
|
+
attr_reader :name
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@host, @port = malevich.cmd['udp-responder'].split(':')
|
22
|
+
@name = 'udp api'
|
23
|
+
end
|
24
|
+
|
25
|
+
def process(data, src)
|
26
|
+
begin
|
27
|
+
malevich.events << event_from_json(data)
|
28
|
+
src.reply "sended\n\n"
|
29
|
+
rescue
|
30
|
+
log :error, "Failed to send message: #{data.inspect}"
|
31
|
+
src.reply "failed to send: #{data.inspect}\n"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def event_from_hash(hash=nil)
|
36
|
+
new_hash = Hash.new
|
37
|
+
RIEMANN_RESERVED_FIELDS.each do |key|
|
38
|
+
new_hash[key] = hash[key] || hash[key.to_s]
|
39
|
+
end
|
40
|
+
new_hash[:host] ||= malevich.ohai[:fqdn]
|
41
|
+
new_hash[:tags] ||= malevich.cmd.tags
|
42
|
+
new_hash
|
43
|
+
end
|
44
|
+
|
45
|
+
def event_from_json(str)
|
46
|
+
event_from_hash(JSON.parse(str))
|
47
|
+
end
|
48
|
+
|
49
|
+
def run!
|
50
|
+
log :unknown, "Start udp server at #{@host}:#{@port}"
|
51
|
+
Socket.udp_server_loop(@host, @port) do |data, src|
|
52
|
+
log :debug, "Received data: #{data.inspect}, from client: #{src.inspect}"
|
53
|
+
process(data, src)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|