riemann-tools.haf 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/README.markdown +18 -0
- data/bin/riemann-aws-status +64 -0
- data/bin/riemann-bench +70 -0
- data/bin/riemann-cloudant +58 -0
- data/bin/riemann-diskstats +86 -0
- data/bin/riemann-elasticsearch +86 -0
- data/bin/riemann-elb-metrics +154 -0
- data/bin/riemann-fd +66 -0
- data/bin/riemann-freeswitch +31 -0
- data/bin/riemann-haproxy +52 -0
- data/bin/riemann-health +270 -0
- data/bin/riemann-kvminstance +22 -0
- data/bin/riemann-memcached +37 -0
- data/bin/riemann-munin +36 -0
- data/bin/riemann-net +101 -0
- data/bin/riemann-nginx-status +84 -0
- data/bin/riemann-proc +48 -0
- data/bin/riemann-rabbitmq +99 -0
- data/bin/riemann-redis +71 -0
- data/bin/riemann-redis-slowlog +44 -0
- data/bin/riemann-resmon +103 -0
- data/bin/riemann-riak +237 -0
- data/bin/riemann-riak-keys +12 -0
- data/bin/riemann-riak-ring +8 -0
- data/lib/riemann/tools.rb +114 -0
- metadata +224 -0
data/bin/riemann-munin
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers munin statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Munin
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'munin-ruby'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@munin = ::Munin::Node.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def tick
|
16
|
+
services = opts[:services] || @munin.list
|
17
|
+
services.each do |service|
|
18
|
+
@munin.fetch(service).each do |service, parts|
|
19
|
+
parts.each do |part, metric|
|
20
|
+
report(
|
21
|
+
:service => "#{service} #{part}",
|
22
|
+
:metric => metric.to_f,
|
23
|
+
:state => 'ok',
|
24
|
+
:tags => ['munin']
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
opt :munin_host, "Munin hostname", :default => 'localhost'
|
32
|
+
opt :munin_port, "Munin port", :default => 4949
|
33
|
+
opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
|
34
|
+
end
|
35
|
+
|
36
|
+
Riemann::Tools::Munin.run
|
data/bin/riemann-net
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers munin statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Net
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
|
11
|
+
opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@old_state = nil
|
15
|
+
@interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
|
16
|
+
@ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
|
17
|
+
end
|
18
|
+
|
19
|
+
def state
|
20
|
+
f = File.read('/proc/net/dev')
|
21
|
+
state = f.split("\n").inject({}) do |s, line|
|
22
|
+
if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
|
23
|
+
iface = $1
|
24
|
+
|
25
|
+
['rx bytes',
|
26
|
+
'rx packets',
|
27
|
+
'rx errs',
|
28
|
+
'rx drop',
|
29
|
+
'rx fifo',
|
30
|
+
'rx frame',
|
31
|
+
'rx compressed',
|
32
|
+
'rx multicast',
|
33
|
+
'tx bytes',
|
34
|
+
'tx packets',
|
35
|
+
'tx errs',
|
36
|
+
'tx drops',
|
37
|
+
'tx fifo',
|
38
|
+
'tx colls',
|
39
|
+
'tx carrier',
|
40
|
+
'tx compressed'].map do |service|
|
41
|
+
"#{iface} #{service}"
|
42
|
+
end.zip(
|
43
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
44
|
+
).each do |service, value|
|
45
|
+
s[service] = value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
s
|
50
|
+
end
|
51
|
+
|
52
|
+
# Filter interfaces
|
53
|
+
if is = @interfaces
|
54
|
+
state = state.select do |service, value|
|
55
|
+
is.include? service.split(' ').first
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
state = state.reject do |service, value|
|
60
|
+
@ignore_interfaces.include? service.split(' ').first
|
61
|
+
end
|
62
|
+
|
63
|
+
state
|
64
|
+
end
|
65
|
+
|
66
|
+
def tick
|
67
|
+
state = self.state
|
68
|
+
|
69
|
+
if @old_state
|
70
|
+
state.each do |service, metric|
|
71
|
+
delta = metric - @old_state[service]
|
72
|
+
svc_state = case service
|
73
|
+
when /drop$/
|
74
|
+
if metric > 0
|
75
|
+
'warning'
|
76
|
+
else
|
77
|
+
'ok'
|
78
|
+
end
|
79
|
+
when /errs$/
|
80
|
+
if metric > 0
|
81
|
+
'warning'
|
82
|
+
else
|
83
|
+
'ok'
|
84
|
+
end
|
85
|
+
else
|
86
|
+
'ok'
|
87
|
+
end
|
88
|
+
|
89
|
+
report(
|
90
|
+
:service => service.dup,
|
91
|
+
:metric => (delta.to_f / opts[:interval]),
|
92
|
+
:state => svc_state
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@old_state = state
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
Riemann::Tools::Net.run
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers nginx status stub statistics and submits them to Riemann.
|
4
|
+
# See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
|
5
|
+
|
6
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
7
|
+
|
8
|
+
class Riemann::Tools::NginxStatus
|
9
|
+
include Riemann::Tools
|
10
|
+
require 'net/http'
|
11
|
+
require 'uri'
|
12
|
+
|
13
|
+
opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
|
14
|
+
opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
|
15
|
+
opt :active_warning, "Active connections warning threshold", :default => 0
|
16
|
+
opt :active_critical, "Active connections critical threshold", :default => 0
|
17
|
+
opt :reading_warning, "Reading connections warning threshold", :default => 0
|
18
|
+
opt :reading_critical, "Reading connections critical threshold", :default => 0
|
19
|
+
opt :writing_warning, "Writing connections warning threshold", :default => 0
|
20
|
+
opt :writing_critical, "Writing connections critical threshold", :default => 0
|
21
|
+
opt :waiting_warning, "Waiting connections warning threshold", :default => 0
|
22
|
+
opt :waiting_critical, "Waiting connections critical threshold", :default => 0
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@uri = URI.parse(opts[:uri])
|
26
|
+
|
27
|
+
# sample response:
|
28
|
+
#
|
29
|
+
# Active connections: 1
|
30
|
+
# server accepts handled requests
|
31
|
+
# 39 39 39
|
32
|
+
# Reading: 0 Writing: 1 Waiting: 0
|
33
|
+
@keys = %w{active accepted handled requests reading writing waiting}
|
34
|
+
@re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
|
35
|
+
end
|
36
|
+
|
37
|
+
def state(key, value)
|
38
|
+
if opts.has_key? "#{key}_critical".to_sym
|
39
|
+
critical_threshold = opts["#{key}_critical".to_sym]
|
40
|
+
return 'critical' if critical_threshold > 0 and value >= critical_threshold
|
41
|
+
end
|
42
|
+
|
43
|
+
if opts.has_key? "#{key}_warning".to_sym
|
44
|
+
warning_threshold = opts["#{key}_warning".to_sym]
|
45
|
+
return 'warning' if warning_threshold > 0 and value >= warning_threshold
|
46
|
+
end
|
47
|
+
|
48
|
+
return 'ok'
|
49
|
+
end
|
50
|
+
|
51
|
+
def tick
|
52
|
+
response = nil
|
53
|
+
begin
|
54
|
+
response = Net::HTTP.get(@uri)
|
55
|
+
rescue => e
|
56
|
+
report(
|
57
|
+
:service => "nginx health",
|
58
|
+
:state => "critical",
|
59
|
+
:description => "Connection error: #{e.class} - #{e.message}"
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
return if response.nil?
|
64
|
+
|
65
|
+
report(
|
66
|
+
:service => "nginx health",
|
67
|
+
:state => "ok",
|
68
|
+
:description => "Nginx status connection ok"
|
69
|
+
)
|
70
|
+
|
71
|
+
values = @re.match(response).to_a[1,7].map { |v| v.to_i }
|
72
|
+
|
73
|
+
@keys.zip(values).each do |key, value|
|
74
|
+
report({
|
75
|
+
:service => "nginx #{key}",
|
76
|
+
:metric => value,
|
77
|
+
:state => state(key, value),
|
78
|
+
:tags => ['nginx']
|
79
|
+
})
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
Riemann::Tools::NginxStatus.run
|
data/bin/riemann-proc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reports running process count to riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Proc
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
|
11
|
+
opt :proc_min_critical, "running process count minimum", :default => 1
|
12
|
+
opt :proc_max_critical, "running process count maximum", :default => 1
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
|
16
|
+
|
17
|
+
abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
|
18
|
+
|
19
|
+
ostype = `uname -s`.chomp.downcase
|
20
|
+
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
21
|
+
@check = method :linux_proc
|
22
|
+
end
|
23
|
+
|
24
|
+
def alert(service, state, metric, description)
|
25
|
+
report(
|
26
|
+
:service => service.to_s,
|
27
|
+
:state => state.to_s,
|
28
|
+
:metric => metric.to_f,
|
29
|
+
:description => description
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def linux_proc
|
34
|
+
process = opts[:proc_regex]
|
35
|
+
running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
|
36
|
+
if running > @limits[:critical][:max] or running < @limits[:critical][:min]
|
37
|
+
alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
|
38
|
+
else
|
39
|
+
alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tick
|
44
|
+
@check.call
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
Riemann::Tools::Proc.run
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
|
4
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
5
|
+
|
6
|
+
class Riemann::Tools::Rabbitmq
|
7
|
+
include Riemann::Tools
|
8
|
+
|
9
|
+
require 'faraday'
|
10
|
+
require 'json'
|
11
|
+
require 'uri'
|
12
|
+
|
13
|
+
|
14
|
+
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
15
|
+
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
16
|
+
|
17
|
+
opt :monitor_user, 'RabbitMQ monitoring user', type: :string
|
18
|
+
opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
|
19
|
+
opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
|
20
|
+
opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
|
21
|
+
|
22
|
+
def monitor_url
|
23
|
+
"http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
|
24
|
+
end
|
25
|
+
|
26
|
+
def event_host
|
27
|
+
if options[:event_host]
|
28
|
+
return options[:event_host]
|
29
|
+
else
|
30
|
+
return options[:monitor_host]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def safe_get(uri, event_host)
|
35
|
+
# Handle connection timeouts
|
36
|
+
response = nil
|
37
|
+
begin
|
38
|
+
connection = Faraday.new(uri)
|
39
|
+
response = connection.get do |req|
|
40
|
+
req.options[:timeout] = options[:read_timeout]
|
41
|
+
req.options[:open_timeout] = options[:open_timeout]
|
42
|
+
end
|
43
|
+
rescue => e
|
44
|
+
report(:host => event_host,
|
45
|
+
:service => "rabbitmq monitoring",
|
46
|
+
:state => "critical",
|
47
|
+
:description => "HTTP connection error: #{e.class} - #{e.message}"
|
48
|
+
)
|
49
|
+
end
|
50
|
+
response
|
51
|
+
end
|
52
|
+
|
53
|
+
def tick
|
54
|
+
uri = URI(monitor_url)
|
55
|
+
response = safe_get(uri, event_host)
|
56
|
+
|
57
|
+
return if response.nil?
|
58
|
+
|
59
|
+
json = JSON.parse(response.body)
|
60
|
+
|
61
|
+
if response.status != 200
|
62
|
+
report(:host => event_host,
|
63
|
+
:service => "rabbitmq",
|
64
|
+
:state => "critical",
|
65
|
+
:description => "HTTP connection error: #{response.status} - #{response.body}"
|
66
|
+
)
|
67
|
+
else
|
68
|
+
report(:host => event_host,
|
69
|
+
:service => "rabbitmq monitoring",
|
70
|
+
:state => "ok",
|
71
|
+
:description => "HTTP connection ok"
|
72
|
+
)
|
73
|
+
|
74
|
+
%w( message_stats queue_totals object_totals ).each do |stat|
|
75
|
+
# NOTE / BUG ?
|
76
|
+
# Brand new servers can have blank message stats. Is this ok?
|
77
|
+
# I can't decide.
|
78
|
+
next if json[stat].empty?
|
79
|
+
json[stat].each_pair do |k,v|
|
80
|
+
service = "rabbitmq.#{stat}.#{k}"
|
81
|
+
if k =~ /details$/
|
82
|
+
metric = v['rate']
|
83
|
+
else
|
84
|
+
metric = v
|
85
|
+
end
|
86
|
+
|
87
|
+
# TODO: Set state via thresholds which can be configured
|
88
|
+
|
89
|
+
report(:host => event_host,
|
90
|
+
:service => service,
|
91
|
+
:metric => metric,
|
92
|
+
:description => "RabbitMQ monitor"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
Riemann::Tools::Rabbitmq.run
|
data/bin/riemann-redis
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers redis INFO statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Redis
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'redis'
|
10
|
+
|
11
|
+
opt :redis_host, "Redis hostname", :default => 'localhost'
|
12
|
+
opt :redis_port, "Redis port", :default => 6379
|
13
|
+
opt :redis_password, "Redis password", :default => ''
|
14
|
+
opt :redis_url, "Redis URL", :default => ''
|
15
|
+
opt :redis_socket, "Redis socket", :default => ''
|
16
|
+
opt :redis_section, "Redis INFO section", :default => 'default'
|
17
|
+
|
18
|
+
STRING_VALUES = %w{ redis_version redis_git_sha1 redis_mode os
|
19
|
+
multiplexing_api gcc_version run_id used_memory_human
|
20
|
+
used_memory_peak_human mem_allocator
|
21
|
+
rdb_last_bgsave_status aof_last_bgrewrite_status role }
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
options = if opts[:redis_url] != ''
|
25
|
+
{ :url => opts[:redis_url] }
|
26
|
+
elsif opts[:redis_socket] != ''
|
27
|
+
{ :path => opts[:redis_socket] }
|
28
|
+
else
|
29
|
+
{ :host => opts[:redis_host], :port => opts[:redis_port] }
|
30
|
+
end
|
31
|
+
@redis = ::Redis.new(options)
|
32
|
+
@redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
|
33
|
+
@section = opts[:redis_section]
|
34
|
+
end
|
35
|
+
|
36
|
+
def tick
|
37
|
+
begin
|
38
|
+
@redis.info(@section).each do |property, value|
|
39
|
+
data = {
|
40
|
+
:host => opts[:redis_host].dup,
|
41
|
+
:service => "redis #{property}",
|
42
|
+
:metric => value.to_f,
|
43
|
+
:state => value.to_s,
|
44
|
+
:tags => ['redis']
|
45
|
+
}
|
46
|
+
|
47
|
+
if STRING_VALUES.include?(property) || property.match(/^db\d+/)
|
48
|
+
if %w{ rdb_last_bgsave_status aof_last_bgrewrite_status }.include?(property)
|
49
|
+
data[:state] = value
|
50
|
+
else
|
51
|
+
data[:description] = value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if property == "run_id"
|
56
|
+
data[:metric] = 0
|
57
|
+
end
|
58
|
+
|
59
|
+
report(data)
|
60
|
+
end
|
61
|
+
rescue ::Redis::CommandError => e
|
62
|
+
if e.message == "ERR operation not permitted"
|
63
|
+
@redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
Riemann::Tools::Redis.run
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers Redis SLOWLOG statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::RedisSlowlog
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'redis'
|
10
|
+
|
11
|
+
opt :redis_url, "Redis URL", :default => 'redis://127.0.0.1:6379/'
|
12
|
+
opt :redis_password, "Redis password", :default => ''
|
13
|
+
opt :slowlog_len, "Number of SLOWLOG entries to get", :default => 10
|
14
|
+
opt :slowlog_reset, "Reset SLOWLOG after querying it", :default => false
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@redis = ::Redis.new(url: opts[:redis_url])
|
18
|
+
|
19
|
+
@slowlog_len = opts[:slowlog_len]
|
20
|
+
@slowlog_reset = opts[:slowlog_reset]
|
21
|
+
|
22
|
+
@redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
|
23
|
+
end
|
24
|
+
|
25
|
+
def tick
|
26
|
+
@redis.slowlog("GET", @slowlog_len).each do |id, timestamp, us, cmd|
|
27
|
+
data = {
|
28
|
+
:host => @redis.client.host,
|
29
|
+
:service => "redis",
|
30
|
+
:time => timestamp,
|
31
|
+
:metric => us.to_f,
|
32
|
+
:state => 'warning',
|
33
|
+
:tags => ['redis', 'slowlog'],
|
34
|
+
:description => cmd.inspect
|
35
|
+
}
|
36
|
+
report(data)
|
37
|
+
end
|
38
|
+
|
39
|
+
@redis.slowlog("RESET") if @slowlog_reset
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
Riemann::Tools::RedisSlowlog.run
|