malevich 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/bin/malevich +38 -0
- data/example/cpu.rb +31 -0
- data/example/disk.rb +59 -0
- data/example/disk_stat.rb +28 -0
- data/example/dns_check.rb +7 -0
- data/example/exim.rb +15 -0
- data/example/find_files.rb +21 -0
- data/example/http.rb +25 -0
- data/example/iptables.rb +27 -0
- data/example/la.rb +21 -0
- data/example/mdadm.rb +44 -0
- data/example/megacli.rb +13 -0
- data/example/memory.rb +31 -0
- data/example/net.rb +25 -0
- data/example/net_stat.rb +25 -0
- data/example/nginx.rb +22 -0
- data/example/ntp.rb +15 -0
- data/example/pgsql.rb +71 -0
- data/example/runit.rb +48 -0
- data/example/status_file.rb +17 -0
- data/example/tw_cli.rb +17 -0
- data/lib/malevich.rb +59 -0
- data/lib/malevich/dsl.rb +78 -0
- data/lib/malevich/init.rb +17 -0
- data/lib/malevich/loader.rb +80 -0
- data/lib/malevich/monitor.rb +40 -0
- data/lib/malevich/plugin.rb +70 -0
- data/lib/malevich/plugin/error.rb +12 -0
- data/lib/malevich/plugin/event.rb +68 -0
- data/lib/malevich/plugin/http.rb +25 -0
- data/lib/malevich/plugin/init.rb +5 -0
- data/lib/malevich/plugin/shell_out.rb +28 -0
- data/lib/malevich/plugin/time.rb +9 -0
- data/lib/malevich/responders/error.rb +30 -0
- data/lib/malevich/responders/http.rb +46 -0
- data/lib/malevich/responders/init.rb +4 -0
- data/lib/malevich/responders/riemann.rb +59 -0
- data/lib/malevich/responders/udp.rb +59 -0
- data/lib/malevich/version.rb +3 -0
- data/malevich.gemspec +32 -0
- data/test/plugin_helpers_spec.rb +6 -0
- metadata +219 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
module Malevich
|
2
|
+
class Monitor
|
3
|
+
|
4
|
+
CHECK_ALIVE = 5
|
5
|
+
|
6
|
+
attr_reader :tasks
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@tasks = Array.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def plugins
|
13
|
+
@tasks.select {|t| t[1].is_a?(Malevich::Plugin) }.map {|x| {x[1].name => x[1].settings.to_hash} }
|
14
|
+
end
|
15
|
+
|
16
|
+
def <<(obj)
|
17
|
+
return false unless obj.respond_to?(:name) || obj.respond_to?(:run!)
|
18
|
+
th = Thread.new do
|
19
|
+
obj.run!
|
20
|
+
end
|
21
|
+
log :info, "Add '#{obj.class}(#{obj.name})'"
|
22
|
+
@tasks << [th, obj]
|
23
|
+
end
|
24
|
+
|
25
|
+
def run!
|
26
|
+
loop do
|
27
|
+
@tasks.each_with_index do |task, i|
|
28
|
+
next if task[0].alive?
|
29
|
+
# start new thread
|
30
|
+
log :error, "Thread for '#{task[1].class}(#{task[1].name})' is dead, start it"
|
31
|
+
@tasks.delete_at(i)
|
32
|
+
self << task[1]
|
33
|
+
end
|
34
|
+
log :debug, "Check alive #{@tasks.count} threads"
|
35
|
+
sleep CHECK_ALIVE
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative 'plugin/init'
|
2
|
+
|
3
|
+
module Malevich
|
4
|
+
class Plugin
|
5
|
+
|
6
|
+
include Malevich::PluginHelpers
|
7
|
+
|
8
|
+
attr_accessor :name, :always_start, :interval,
|
9
|
+
:run_if, :collect, :settings, :histories, :errors
|
10
|
+
|
11
|
+
alias :plugin :settings
|
12
|
+
|
13
|
+
def initialize(name)
|
14
|
+
@name = name
|
15
|
+
@interval = 60
|
16
|
+
@run_if = Proc.new { true }
|
17
|
+
@always_start = false
|
18
|
+
end
|
19
|
+
|
20
|
+
def suitable_platform?(name)
|
21
|
+
platforms = name.nil? || name.empty? ? ['linux'] : name
|
22
|
+
platforms.include?(ohai[:platform]) || platforms.include?(ohai[:os])
|
23
|
+
end
|
24
|
+
|
25
|
+
def runnable?
|
26
|
+
def log_and_false(level = :info, msg)
|
27
|
+
log level, msg
|
28
|
+
false
|
29
|
+
end
|
30
|
+
return log_and_false("'#{name}' not started, because have not 'collect'") if collect.nil?
|
31
|
+
return log_and_false("'#{name}' disabled in config") if settings.disable? && settings.disable
|
32
|
+
return log_and_false("'#{name} disabled by run_if statement'") unless !!self.instance_eval(&run_if)
|
33
|
+
log :unknown, "'#{name}' started"
|
34
|
+
end
|
35
|
+
|
36
|
+
def run!
|
37
|
+
loop do
|
38
|
+
t_start = Time.now
|
39
|
+
begin
|
40
|
+
Timeout.timeout(interval.to_f * 2/3) do
|
41
|
+
self.instance_eval(&collect)
|
42
|
+
end
|
43
|
+
rescue => e
|
44
|
+
error(e)
|
45
|
+
end
|
46
|
+
sleep(interval - (Time.now - t_start).to_i)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def ohai
|
53
|
+
malevich.ohai
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.make_container(container)
|
57
|
+
define_method(container) do |&block|
|
58
|
+
malevich.plugins[container] ||= {}
|
59
|
+
malevich.plugins[container][self.name] ||= Hashie::Mash.new
|
60
|
+
malevich.plugins[container][self.name]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
make_container :settings
|
65
|
+
alias :plugin :settings
|
66
|
+
make_container :histories
|
67
|
+
make_container :errors
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Malevich
|
2
|
+
module PluginHelpers
|
3
|
+
|
4
|
+
def error(e)
|
5
|
+
errors.last_at = Time.now
|
6
|
+
errors.msg = "#{e.class}: #{e}\n #{e.backtrace.join("\n")}"
|
7
|
+
errors.reported = false
|
8
|
+
log :error, "Plugin '#{name}' has a error: #{e.class}: #{e}\n #{e.backtrace.join("\n")}"
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Malevich
|
2
|
+
module PluginHelpers
|
3
|
+
|
4
|
+
def event(hash)
|
5
|
+
event_normalize(hash)
|
6
|
+
hash[:metric] = metric_diff(hash) if hash[:diff]
|
7
|
+
hash[:state] = state_check(hash)
|
8
|
+
if malevich.cmd[:test_given]
|
9
|
+
log :unknown, "Event message from test plugin: #{hash.inspect}"
|
10
|
+
else
|
11
|
+
event_minimizer(hash)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def metric_diff(hash)
|
16
|
+
return unless hash[:metric]
|
17
|
+
current_metric = hash[:metric]
|
18
|
+
old_metric = histories[hash[:service]]
|
19
|
+
histories[hash[:service]] = current_metric
|
20
|
+
hash.delete(:diff)
|
21
|
+
old_metric ? current_metric - old_metric : nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def state_check(hash)
|
25
|
+
return hash[:state] if hash[:state]
|
26
|
+
return hash[:state] if hash[:metric].nil?
|
27
|
+
return hash[:state] if hash[:metric].kind_of?(Float) && hash[:metric].nan?
|
28
|
+
warning = settings.respond_to?(:warning) ? settings.warning : nil
|
29
|
+
critical = settings.respond_to?(:critical) ? settings.critical : nil
|
30
|
+
return 'ok' if (warning || critical).nil?
|
31
|
+
metric = hash[:metric].to_f
|
32
|
+
if warning && critical
|
33
|
+
return case
|
34
|
+
when metric.between?(warning, critical)
|
35
|
+
'warning'
|
36
|
+
when metric > warning
|
37
|
+
'critical'
|
38
|
+
else
|
39
|
+
'ok'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
if warning
|
43
|
+
return (metric >= warning) ? 'warning' : 'ok'
|
44
|
+
end
|
45
|
+
if critical
|
46
|
+
return (metric >= critical) ? 'critical' : 'ok'
|
47
|
+
end
|
48
|
+
'critical'
|
49
|
+
end
|
50
|
+
|
51
|
+
def event_normalize(hash)
|
52
|
+
hash[:metric] = hash[:metric].round(2) if hash[:metric].kind_of?(Float)
|
53
|
+
hash[:state] = 'ok' if hash[:state].kind_of?(TrueClass)
|
54
|
+
hash[:state] = 'critical' if hash[:state].kind_of?(FalseClass)
|
55
|
+
hash[:service] ||= name
|
56
|
+
hash[:host] ||= ohai[:fqdn]
|
57
|
+
hash[:tags] ||= malevich.cmd.tags
|
58
|
+
end
|
59
|
+
|
60
|
+
def event_minimizer(hash)
|
61
|
+
return if hash[:state] == 'ok' && hash[:metric].nil? && histories[hash[:service]] &&
|
62
|
+
histories[hash[:service]][:state] == 'ok'
|
63
|
+
histories[hash[:service]] = hash
|
64
|
+
malevich.events << hash
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'rest_client'
|
2
|
+
require 'uri'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module Malevich
|
6
|
+
module PluginHelpers
|
7
|
+
|
8
|
+
def http_get(url)
|
9
|
+
RestClient.get(url)
|
10
|
+
end
|
11
|
+
alias :rest_get :http_get
|
12
|
+
|
13
|
+
def body_get(url)
|
14
|
+
case URI.parse(url).scheme
|
15
|
+
when 'http', 'https'
|
16
|
+
http_get(url)
|
17
|
+
when 'file'
|
18
|
+
raise "body_get('#{url}'): hasn't support yet"
|
19
|
+
else
|
20
|
+
File.read(url)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mixlib/shellout'
|
2
|
+
|
3
|
+
module Malevich
|
4
|
+
module PluginHelpers
|
5
|
+
|
6
|
+
def shell_out(*command_args)
|
7
|
+
cmd = Mixlib::ShellOut.new(*command_args)
|
8
|
+
#cmd.live_stream = STDOUT if STDOUT.tty?
|
9
|
+
cmd.run_command
|
10
|
+
cmd
|
11
|
+
end
|
12
|
+
|
13
|
+
def shell_out!(*command_args)
|
14
|
+
cmd= shell_out(*command_args)
|
15
|
+
cmd.error!
|
16
|
+
cmd
|
17
|
+
end
|
18
|
+
|
19
|
+
def shell(*command_args)
|
20
|
+
shell_out(*command_args).stdout
|
21
|
+
end
|
22
|
+
|
23
|
+
def shell!(*command_args)
|
24
|
+
shell_out!(*command_args).stdout
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Malevich
|
2
|
+
module Responder
|
3
|
+
class Error
|
4
|
+
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@name = 'plugin error responder'
|
9
|
+
end
|
10
|
+
|
11
|
+
def run!
|
12
|
+
loop do
|
13
|
+
if malevich.plugins.errors
|
14
|
+
errors = 0
|
15
|
+
malevich.plugins.errors.each { |_, val| errors += 1 unless val.reported? }
|
16
|
+
if errors == 0
|
17
|
+
malevich.events << {:service => 'plugins errors', :state => 'ok'}
|
18
|
+
else
|
19
|
+
malevich.events << {:service => 'plugin errors', :metric => errors,
|
20
|
+
:description => "Plugins has errors \n #{malevich.plugins.errors}"}
|
21
|
+
malevich.plugins.errors.each { |_, val| val.reported = true unless val.reported? }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
sleep 60
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Malevich
|
2
|
+
module Responder
|
3
|
+
class Http
|
4
|
+
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@host, @port = malevich.cmd['http-responder'].split(':')
|
9
|
+
@started_at = Time.now.to_i
|
10
|
+
@name = 'http api'
|
11
|
+
end
|
12
|
+
|
13
|
+
def run!
|
14
|
+
log :unknown, "Start http server at #{@host}:#{@port}"
|
15
|
+
server = TCPServer.new(@host, @port)
|
16
|
+
loop do
|
17
|
+
client = server.accept
|
18
|
+
log :unknown, "Accepted client: #{client.inspect}"
|
19
|
+
response = info
|
20
|
+
headers = "HTTP/1.1 200 OK\r\n" +
|
21
|
+
"Server: Malevich Ruby\r\n" +
|
22
|
+
"Content-Length: #{response.bytesize}\r\n" +
|
23
|
+
"Content-Type: application/json\r\n\r\n"
|
24
|
+
client.print headers
|
25
|
+
client.print response
|
26
|
+
client.close
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def info
|
31
|
+
{
|
32
|
+
:cmd => malevich.cmd,
|
33
|
+
:config => malevich.config,
|
34
|
+
:plugins => malevich.monitor.plugins,
|
35
|
+
:monitor => malevich.monitor.tasks.map {|x| { x[1].name => x[0].alive? } },
|
36
|
+
:errors => (malevich.plugins.errors rescue {}),
|
37
|
+
:version => Malevich::VERSION,
|
38
|
+
:ruby => "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}",
|
39
|
+
:uptime => Time.now.to_i - @started_at,
|
40
|
+
:queue_size => malevich.events.size
|
41
|
+
}.to_json + "\n"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'riemann/client'
|
2
|
+
require 'resolv'
|
3
|
+
|
4
|
+
module Malevich
|
5
|
+
module Responder
|
6
|
+
class Riemann
|
7
|
+
|
8
|
+
INTERVAL_FLUSH = 0.5
|
9
|
+
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@riemanns = Array.new
|
14
|
+
@name = "riemann client"
|
15
|
+
end
|
16
|
+
|
17
|
+
def run!
|
18
|
+
make_clients
|
19
|
+
loop do
|
20
|
+
flush
|
21
|
+
sleep INTERVAL_FLUSH
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def make_clients
|
28
|
+
@riemanns.clear
|
29
|
+
malevich.cmd['riemann-host'].each do |host|
|
30
|
+
riemann, port = host.split(':')
|
31
|
+
port ||= 5555
|
32
|
+
client = ::Riemann::Client.new({
|
33
|
+
:host => Resolv.new.getaddress(riemann),
|
34
|
+
:port => port,
|
35
|
+
:timeout => 10
|
36
|
+
})
|
37
|
+
client = client.tcp if malevich.cmd['riemann-tcp']
|
38
|
+
log :debug, "Add new riemann client: #{client.host}:#{client.port}"
|
39
|
+
@riemanns << client
|
40
|
+
@name = @riemanns.map {|c| "riemann client [#{c.host}:#{c.port}]" }.join(" , ")
|
41
|
+
end
|
42
|
+
@riemanns
|
43
|
+
end
|
44
|
+
|
45
|
+
def flush
|
46
|
+
until malevich.events.empty?
|
47
|
+
event = malevich.events.pop
|
48
|
+
@riemanns.each do |riemann|
|
49
|
+
Timeout::timeout(10) {
|
50
|
+
log :debug, "Sent message #{event} for #{riemann.host}:#{riemann.port}"
|
51
|
+
riemann << event
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'socket'
|
2
|
+
|
3
|
+
module Malevich
|
4
|
+
module Responder
|
5
|
+
class Udp
|
6
|
+
|
7
|
+
RIEMANN_RESERVED_FIELDS = [
|
8
|
+
:time,
|
9
|
+
:state,
|
10
|
+
:service,
|
11
|
+
:host,
|
12
|
+
:description,
|
13
|
+
:metric,
|
14
|
+
:tags,
|
15
|
+
:ttl
|
16
|
+
]
|
17
|
+
|
18
|
+
attr_reader :name
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@host, @port = malevich.cmd['udp-responder'].split(':')
|
22
|
+
@name = 'udp api'
|
23
|
+
end
|
24
|
+
|
25
|
+
def process(data, src)
|
26
|
+
begin
|
27
|
+
malevich.events << event_from_json(data)
|
28
|
+
src.reply "sended\n\n"
|
29
|
+
rescue
|
30
|
+
log :error, "Failed to send message: #{data.inspect}"
|
31
|
+
src.reply "failed to send: #{data.inspect}\n"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def event_from_hash(hash=nil)
|
36
|
+
new_hash = Hash.new
|
37
|
+
RIEMANN_RESERVED_FIELDS.each do |key|
|
38
|
+
new_hash[key] = hash[key] || hash[key.to_s]
|
39
|
+
end
|
40
|
+
new_hash[:host] ||= malevich.ohai[:fqdn]
|
41
|
+
new_hash[:tags] ||= malevich.cmd.tags
|
42
|
+
new_hash
|
43
|
+
end
|
44
|
+
|
45
|
+
def event_from_json(str)
|
46
|
+
event_from_hash(JSON.parse(str))
|
47
|
+
end
|
48
|
+
|
49
|
+
def run!
|
50
|
+
log :unknown, "Start udp server at #{@host}:#{@port}"
|
51
|
+
Socket.udp_server_loop(@host, @port) do |data, src|
|
52
|
+
log :debug, "Received data: #{data.inspect}, from client: #{src.inspect}"
|
53
|
+
process(data, src)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|