riemann-tools-dgvz 0.2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/README.markdown +18 -0
- data/bin/riemann-apache-status +98 -0
- data/bin/riemann-aws-billing +79 -0
- data/bin/riemann-aws-status +64 -0
- data/bin/riemann-bench +70 -0
- data/bin/riemann-cloudant +58 -0
- data/bin/riemann-diskstats +86 -0
- data/bin/riemann-elasticsearch +87 -0
- data/bin/riemann-elb-metrics +154 -0
- data/bin/riemann-fd +66 -0
- data/bin/riemann-freeswitch +79 -0
- data/bin/riemann-haproxy +52 -0
- data/bin/riemann-health +272 -0
- data/bin/riemann-kvminstance +22 -0
- data/bin/riemann-memcached +37 -0
- data/bin/riemann-munin +36 -0
- data/bin/riemann-net +101 -0
- data/bin/riemann-nginx-status +84 -0
- data/bin/riemann-proc +48 -0
- data/bin/riemann-rabbitmq +99 -0
- data/bin/riemann-resmon +103 -0
- data/bin/riemann-riak +300 -0
- data/bin/riemann-riak-keys +12 -0
- data/bin/riemann-riak-ring +8 -0
- data/bin/riemann-varnish +36 -0
- data/bin/riemann-zookeeper +41 -0
- data/lib/riemann/tools.rb +107 -0
- metadata +210 -0
data/bin/riemann-munin
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers munin statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Munin
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'munin-ruby'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@munin = ::Munin::Node.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def tick
|
16
|
+
services = opts[:services] || @munin.list
|
17
|
+
services.each do |service|
|
18
|
+
@munin.fetch(service).each do |service, parts|
|
19
|
+
parts.each do |part, metric|
|
20
|
+
report(
|
21
|
+
:service => "#{service} #{part}",
|
22
|
+
:metric => metric.to_f,
|
23
|
+
:state => 'ok',
|
24
|
+
:tags => ['munin']
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
opt :munin_host, "Munin hostname", :default => 'localhost'
|
32
|
+
opt :munin_port, "Munin port", :default => 4949
|
33
|
+
opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
|
34
|
+
end
|
35
|
+
|
36
|
+
Riemann::Tools::Munin.run
|
data/bin/riemann-net
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers network interface statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Net
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
|
11
|
+
opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@old_state = nil
|
15
|
+
@interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
|
16
|
+
@ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
|
17
|
+
end
|
18
|
+
|
19
|
+
def state
|
20
|
+
f = File.read('/proc/net/dev')
|
21
|
+
state = f.split("\n").inject({}) do |s, line|
|
22
|
+
if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
|
23
|
+
iface = $1
|
24
|
+
|
25
|
+
['rx bytes',
|
26
|
+
'rx packets',
|
27
|
+
'rx errs',
|
28
|
+
'rx drop',
|
29
|
+
'rx fifo',
|
30
|
+
'rx frame',
|
31
|
+
'rx compressed',
|
32
|
+
'rx multicast',
|
33
|
+
'tx bytes',
|
34
|
+
'tx packets',
|
35
|
+
'tx errs',
|
36
|
+
'tx drops',
|
37
|
+
'tx fifo',
|
38
|
+
'tx colls',
|
39
|
+
'tx carrier',
|
40
|
+
'tx compressed'].map do |service|
|
41
|
+
"#{iface} #{service}"
|
42
|
+
end.zip(
|
43
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
44
|
+
).each do |service, value|
|
45
|
+
s[service] = value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
s
|
50
|
+
end
|
51
|
+
|
52
|
+
# Filter interfaces
|
53
|
+
if is = @interfaces
|
54
|
+
state = state.select do |service, value|
|
55
|
+
is.include? service.split(' ').first
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
state = state.reject do |service, value|
|
60
|
+
@ignore_interfaces.include? service.split(' ').first
|
61
|
+
end
|
62
|
+
|
63
|
+
state
|
64
|
+
end
|
65
|
+
|
66
|
+
def tick
|
67
|
+
state = self.state
|
68
|
+
|
69
|
+
if @old_state
|
70
|
+
state.each do |service, metric|
|
71
|
+
delta = metric - @old_state[service]
|
72
|
+
svc_state = case service
|
73
|
+
when /drop$/
|
74
|
+
if metric > 0
|
75
|
+
'warning'
|
76
|
+
else
|
77
|
+
'ok'
|
78
|
+
end
|
79
|
+
when /errs$/
|
80
|
+
if metric > 0
|
81
|
+
'warning'
|
82
|
+
else
|
83
|
+
'ok'
|
84
|
+
end
|
85
|
+
else
|
86
|
+
'ok'
|
87
|
+
end
|
88
|
+
|
89
|
+
report(
|
90
|
+
:service => service.dup,
|
91
|
+
:metric => (delta.to_f / opts[:interval]),
|
92
|
+
:state => svc_state
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@old_state = state
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
Riemann::Tools::Net.run
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers nginx status stub statistics and submits them to Riemann.
|
4
|
+
# See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
|
5
|
+
|
6
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
7
|
+
|
8
|
+
class Riemann::Tools::NginxStatus
|
9
|
+
include Riemann::Tools
|
10
|
+
require 'net/http'
|
11
|
+
require 'uri'
|
12
|
+
|
13
|
+
opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
|
14
|
+
opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
|
15
|
+
opt :active_warning, "Active connections warning threshold", :default => 0
|
16
|
+
opt :active_critical, "Active connections critical threshold", :default => 0
|
17
|
+
opt :reading_warning, "Reading connections warning threshold", :default => 0
|
18
|
+
opt :reading_critical, "Reading connections critical threshold", :default => 0
|
19
|
+
opt :writing_warning, "Writing connections warning threshold", :default => 0
|
20
|
+
opt :writing_critical, "Writing connections critical threshold", :default => 0
|
21
|
+
opt :waiting_warning, "Waiting connections warning threshold", :default => 0
|
22
|
+
opt :waiting_critical, "Waiting connections critical threshold", :default => 0
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@uri = URI.parse(opts[:uri])
|
26
|
+
|
27
|
+
# sample response:
|
28
|
+
#
|
29
|
+
# Active connections: 1
|
30
|
+
# server accepts handled requests
|
31
|
+
# 39 39 39
|
32
|
+
# Reading: 0 Writing: 1 Waiting: 0
|
33
|
+
@keys = %w{active accepted handled requests reading writing waiting}
|
34
|
+
@re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
|
35
|
+
end
|
36
|
+
|
37
|
+
def state(key, value)
|
38
|
+
if opts.has_key? "#{key}_critical".to_sym
|
39
|
+
critical_threshold = opts["#{key}_critical".to_sym]
|
40
|
+
return 'critical' if critical_threshold > 0 and value >= critical_threshold
|
41
|
+
end
|
42
|
+
|
43
|
+
if opts.has_key? "#{key}_warning".to_sym
|
44
|
+
warning_threshold = opts["#{key}_warning".to_sym]
|
45
|
+
return 'warning' if warning_threshold > 0 and value >= warning_threshold
|
46
|
+
end
|
47
|
+
|
48
|
+
return 'ok'
|
49
|
+
end
|
50
|
+
|
51
|
+
def tick
|
52
|
+
response = nil
|
53
|
+
begin
|
54
|
+
response = Net::HTTP.get(@uri)
|
55
|
+
rescue => e
|
56
|
+
report(
|
57
|
+
:service => "nginx health",
|
58
|
+
:state => "critical",
|
59
|
+
:description => "Connection error: #{e.class} - #{e.message}"
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
return if response.nil?
|
64
|
+
|
65
|
+
report(
|
66
|
+
:service => "nginx health",
|
67
|
+
:state => "ok",
|
68
|
+
:description => "Nginx status connection ok"
|
69
|
+
)
|
70
|
+
|
71
|
+
values = @re.match(response).to_a[1,7].map { |v| v.to_i }
|
72
|
+
|
73
|
+
@keys.zip(values).each do |key, value|
|
74
|
+
report({
|
75
|
+
:service => "nginx #{key}",
|
76
|
+
:metric => value,
|
77
|
+
:state => state(key, value),
|
78
|
+
:tags => ['nginx']
|
79
|
+
})
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
Riemann::Tools::NginxStatus.run
|
data/bin/riemann-proc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reports running process count to riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Proc
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
|
11
|
+
opt :proc_min_critical, "running process count minimum", :default => 1
|
12
|
+
opt :proc_max_critical, "running process count maximum", :default => 1
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
|
16
|
+
|
17
|
+
abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
|
18
|
+
|
19
|
+
ostype = `uname -s`.chomp.downcase
|
20
|
+
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
21
|
+
@check = method :linux_proc
|
22
|
+
end
|
23
|
+
|
24
|
+
def alert(service, state, metric, description)
|
25
|
+
report(
|
26
|
+
:service => service.to_s,
|
27
|
+
:state => state.to_s,
|
28
|
+
:metric => metric.to_f,
|
29
|
+
:description => description
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def linux_proc
|
34
|
+
process = opts[:proc_regex]
|
35
|
+
running = Integer(`ps axo args | grep '#{process}' | grep -v grep | grep -v riemann-proc | wc -l`)
|
36
|
+
if running > @limits[:critical][:max] or running < @limits[:critical][:min]
|
37
|
+
alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
|
38
|
+
else
|
39
|
+
alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tick
|
44
|
+
@check.call
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
Riemann::Tools::Proc.run
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
|
4
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
5
|
+
|
6
|
+
class Riemann::Tools::Rabbitmq
|
7
|
+
include Riemann::Tools
|
8
|
+
|
9
|
+
require 'faraday'
|
10
|
+
require 'json'
|
11
|
+
require 'uri'
|
12
|
+
|
13
|
+
|
14
|
+
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
15
|
+
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
16
|
+
|
17
|
+
opt :monitor_user, 'RabbitMQ monitoring user', type: :string
|
18
|
+
opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
|
19
|
+
opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
|
20
|
+
opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
|
21
|
+
|
22
|
+
def monitor_url
|
23
|
+
"http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
|
24
|
+
end
|
25
|
+
|
26
|
+
def event_host
|
27
|
+
if options[:event_host]
|
28
|
+
return options[:event_host]
|
29
|
+
else
|
30
|
+
return options[:monitor_host]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def safe_get(uri, event_host)
|
35
|
+
# Handle connection timeouts
|
36
|
+
response = nil
|
37
|
+
begin
|
38
|
+
connection = Faraday.new(uri)
|
39
|
+
response = connection.get do |req|
|
40
|
+
req.options[:timeout] = options[:read_timeout]
|
41
|
+
req.options[:open_timeout] = options[:open_timeout]
|
42
|
+
end
|
43
|
+
rescue => e
|
44
|
+
report(:host => event_host,
|
45
|
+
:service => "rabbitmq monitoring",
|
46
|
+
:state => "critical",
|
47
|
+
:description => "HTTP connection error: #{e.class} - #{e.message}"
|
48
|
+
)
|
49
|
+
end
|
50
|
+
response
|
51
|
+
end
|
52
|
+
|
53
|
+
def tick
|
54
|
+
uri = URI(monitor_url)
|
55
|
+
response = safe_get(uri, event_host)
|
56
|
+
|
57
|
+
return if response.nil?
|
58
|
+
|
59
|
+
json = JSON.parse(response.body)
|
60
|
+
|
61
|
+
if response.status != 200
|
62
|
+
report(:host => event_host,
|
63
|
+
:service => "rabbitmq",
|
64
|
+
:state => "critical",
|
65
|
+
:description => "HTTP connection error: #{response.status} - #{response.body}"
|
66
|
+
)
|
67
|
+
else
|
68
|
+
report(:host => event_host,
|
69
|
+
:service => "rabbitmq monitoring",
|
70
|
+
:state => "ok",
|
71
|
+
:description => "HTTP connection ok"
|
72
|
+
)
|
73
|
+
|
74
|
+
%w( message_stats queue_totals object_totals ).each do |stat|
|
75
|
+
# NOTE / BUG ?
|
76
|
+
# Brand new servers can have blank message stats. Is this ok?
|
77
|
+
# I can't decide.
|
78
|
+
next if json[stat].empty?
|
79
|
+
json[stat].each_pair do |k,v|
|
80
|
+
service = "rabbitmq.#{stat}.#{k}"
|
81
|
+
if k =~ /details$/
|
82
|
+
metric = v['rate']
|
83
|
+
else
|
84
|
+
metric = v
|
85
|
+
end
|
86
|
+
|
87
|
+
# TODO: Set state via thresholds which can be configured
|
88
|
+
|
89
|
+
report(:host => event_host,
|
90
|
+
:service => service,
|
91
|
+
:metric => metric,
|
92
|
+
:description => "RabbitMQ monitor"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
Riemann::Tools::Rabbitmq.run
|
data/bin/riemann-resmon
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
4
|
+
|
5
|
+
class Riemann::Tools::Resmon
|
6
|
+
include Riemann::Tools
|
7
|
+
require 'nokogiri'
|
8
|
+
require 'faraday'
|
9
|
+
|
10
|
+
opt :resmon_hostfile, 'File with hostnames running Resmon (one URI per line)', type: :string
|
11
|
+
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
12
|
+
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
13
|
+
opt :fqdn, 'Use FQDN for event host'
|
14
|
+
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@hosts = File.read(options[:resmon_hostfile]).split("\n")
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Work out the hostname to submit with the event
|
23
|
+
def get_event_host(host)
|
24
|
+
unless options[:fqdn]
|
25
|
+
return host.split('.')[0]
|
26
|
+
end
|
27
|
+
return host
|
28
|
+
end
|
29
|
+
|
30
|
+
# Handles HTTP connections and GET requests safely
|
31
|
+
def safe_get(uri, event_host)
|
32
|
+
# Handle connection timeouts
|
33
|
+
response = nil
|
34
|
+
begin
|
35
|
+
connection = Faraday.new(uri)
|
36
|
+
response = connection.get do |req|
|
37
|
+
req.options[:timeout] = options[:read_timeout]
|
38
|
+
req.options[:open_timeout] = options[:open_timeout]
|
39
|
+
end
|
40
|
+
rescue => e
|
41
|
+
report(:host => event_host,
|
42
|
+
:service => "resmon",
|
43
|
+
:state => "critical",
|
44
|
+
:description => "HTTP connection error: #{e.class} - #{e.message}"
|
45
|
+
)
|
46
|
+
end
|
47
|
+
response
|
48
|
+
end
|
49
|
+
|
50
|
+
def tick
|
51
|
+
@hosts.each do |host|
|
52
|
+
|
53
|
+
uri = URI(host)
|
54
|
+
event_host = get_event_host(uri.host)
|
55
|
+
|
56
|
+
response = safe_get(uri, event_host)
|
57
|
+
next if response.nil?
|
58
|
+
|
59
|
+
# Handle non-200 responses
|
60
|
+
if response.status != 200
|
61
|
+
report(:host => event_host,
|
62
|
+
:service => "resmon",
|
63
|
+
:state => "critical",
|
64
|
+
:description => "HTTP connection error: #{response.status} - #{response.body}"
|
65
|
+
)
|
66
|
+
next
|
67
|
+
else
|
68
|
+
report(:host => event_host,
|
69
|
+
:service => "resmon",
|
70
|
+
:state => "ok",
|
71
|
+
:description => "Resmon connection ok"
|
72
|
+
)
|
73
|
+
doc = Nokogiri::XML(response.body)
|
74
|
+
end
|
75
|
+
|
76
|
+
doc.xpath('//ResmonResults/ResmonResult').each do |result|
|
77
|
+
timestamp = result.xpath('last_update').first.text
|
78
|
+
result.xpath('metric').each do |metric|
|
79
|
+
hash = {
|
80
|
+
host: event_host,
|
81
|
+
service: "#{result.attributes['module'].value}`#{result.attributes['service'].value}`#{metric.attributes['name'].value}",
|
82
|
+
time: timestamp.to_i
|
83
|
+
}
|
84
|
+
|
85
|
+
case metric.attributes['type'].value
|
86
|
+
when /[iIlL]/
|
87
|
+
hash[:metric] = metric.text.to_i
|
88
|
+
when 'n'
|
89
|
+
hash[:metric] = metric.text.to_f
|
90
|
+
when 's'
|
91
|
+
hash[:description] = metric.text
|
92
|
+
when '0'
|
93
|
+
raise 'dunno what 0 is yet'
|
94
|
+
end
|
95
|
+
|
96
|
+
report(hash)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
Riemann::Tools::Resmon.run
|