riemann-tools.haf 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
- data/README.markdown +18 -0
- data/bin/riemann-aws-status +64 -0
- data/bin/riemann-bench +70 -0
- data/bin/riemann-cloudant +58 -0
- data/bin/riemann-diskstats +86 -0
- data/bin/riemann-elasticsearch +86 -0
- data/bin/riemann-elb-metrics +154 -0
- data/bin/riemann-fd +66 -0
- data/bin/riemann-freeswitch +31 -0
- data/bin/riemann-haproxy +52 -0
- data/bin/riemann-health +270 -0
- data/bin/riemann-kvminstance +22 -0
- data/bin/riemann-memcached +37 -0
- data/bin/riemann-munin +36 -0
- data/bin/riemann-net +101 -0
- data/bin/riemann-nginx-status +84 -0
- data/bin/riemann-proc +48 -0
- data/bin/riemann-rabbitmq +99 -0
- data/bin/riemann-redis +71 -0
- data/bin/riemann-redis-slowlog +44 -0
- data/bin/riemann-resmon +103 -0
- data/bin/riemann-riak +237 -0
- data/bin/riemann-riak-keys +12 -0
- data/bin/riemann-riak-ring +8 -0
- data/lib/riemann/tools.rb +114 -0
- metadata +224 -0
data/bin/riemann-munin
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers munin statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Munin
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'munin-ruby'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@munin = ::Munin::Node.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def tick
|
16
|
+
services = opts[:services] || @munin.list
|
17
|
+
services.each do |service|
|
18
|
+
@munin.fetch(service).each do |service, parts|
|
19
|
+
parts.each do |part, metric|
|
20
|
+
report(
|
21
|
+
:service => "#{service} #{part}",
|
22
|
+
:metric => metric.to_f,
|
23
|
+
:state => 'ok',
|
24
|
+
:tags => ['munin']
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
opt :munin_host, "Munin hostname", :default => 'localhost'
|
32
|
+
opt :munin_port, "Munin port", :default => 4949
|
33
|
+
opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
|
34
|
+
end
|
35
|
+
|
36
|
+
Riemann::Tools::Munin.run
|
data/bin/riemann-net
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers munin statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Net
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
|
11
|
+
opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@old_state = nil
|
15
|
+
@interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
|
16
|
+
@ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
|
17
|
+
end
|
18
|
+
|
19
|
+
def state
|
20
|
+
f = File.read('/proc/net/dev')
|
21
|
+
state = f.split("\n").inject({}) do |s, line|
|
22
|
+
if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
|
23
|
+
iface = $1
|
24
|
+
|
25
|
+
['rx bytes',
|
26
|
+
'rx packets',
|
27
|
+
'rx errs',
|
28
|
+
'rx drop',
|
29
|
+
'rx fifo',
|
30
|
+
'rx frame',
|
31
|
+
'rx compressed',
|
32
|
+
'rx multicast',
|
33
|
+
'tx bytes',
|
34
|
+
'tx packets',
|
35
|
+
'tx errs',
|
36
|
+
'tx drops',
|
37
|
+
'tx fifo',
|
38
|
+
'tx colls',
|
39
|
+
'tx carrier',
|
40
|
+
'tx compressed'].map do |service|
|
41
|
+
"#{iface} #{service}"
|
42
|
+
end.zip(
|
43
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
44
|
+
).each do |service, value|
|
45
|
+
s[service] = value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
s
|
50
|
+
end
|
51
|
+
|
52
|
+
# Filter interfaces
|
53
|
+
if is = @interfaces
|
54
|
+
state = state.select do |service, value|
|
55
|
+
is.include? service.split(' ').first
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
state = state.reject do |service, value|
|
60
|
+
@ignore_interfaces.include? service.split(' ').first
|
61
|
+
end
|
62
|
+
|
63
|
+
state
|
64
|
+
end
|
65
|
+
|
66
|
+
def tick
|
67
|
+
state = self.state
|
68
|
+
|
69
|
+
if @old_state
|
70
|
+
state.each do |service, metric|
|
71
|
+
delta = metric - @old_state[service]
|
72
|
+
svc_state = case service
|
73
|
+
when /drop$/
|
74
|
+
if metric > 0
|
75
|
+
'warning'
|
76
|
+
else
|
77
|
+
'ok'
|
78
|
+
end
|
79
|
+
when /errs$/
|
80
|
+
if metric > 0
|
81
|
+
'warning'
|
82
|
+
else
|
83
|
+
'ok'
|
84
|
+
end
|
85
|
+
else
|
86
|
+
'ok'
|
87
|
+
end
|
88
|
+
|
89
|
+
report(
|
90
|
+
:service => service.dup,
|
91
|
+
:metric => (delta.to_f / opts[:interval]),
|
92
|
+
:state => svc_state
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@old_state = state
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
Riemann::Tools::Net.run
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers nginx status stub statistics and submits them to Riemann.
|
4
|
+
# See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
|
5
|
+
|
6
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
7
|
+
|
8
|
+
class Riemann::Tools::NginxStatus
|
9
|
+
include Riemann::Tools
|
10
|
+
require 'net/http'
|
11
|
+
require 'uri'
|
12
|
+
|
13
|
+
opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
|
14
|
+
opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
|
15
|
+
opt :active_warning, "Active connections warning threshold", :default => 0
|
16
|
+
opt :active_critical, "Active connections critical threshold", :default => 0
|
17
|
+
opt :reading_warning, "Reading connections warning threshold", :default => 0
|
18
|
+
opt :reading_critical, "Reading connections critical threshold", :default => 0
|
19
|
+
opt :writing_warning, "Writing connections warning threshold", :default => 0
|
20
|
+
opt :writing_critical, "Writing connections critical threshold", :default => 0
|
21
|
+
opt :waiting_warning, "Waiting connections warning threshold", :default => 0
|
22
|
+
opt :waiting_critical, "Waiting connections critical threshold", :default => 0
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@uri = URI.parse(opts[:uri])
|
26
|
+
|
27
|
+
# sample response:
|
28
|
+
#
|
29
|
+
# Active connections: 1
|
30
|
+
# server accepts handled requests
|
31
|
+
# 39 39 39
|
32
|
+
# Reading: 0 Writing: 1 Waiting: 0
|
33
|
+
@keys = %w{active accepted handled requests reading writing waiting}
|
34
|
+
@re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
|
35
|
+
end
|
36
|
+
|
37
|
+
def state(key, value)
|
38
|
+
if opts.has_key? "#{key}_critical".to_sym
|
39
|
+
critical_threshold = opts["#{key}_critical".to_sym]
|
40
|
+
return 'critical' if critical_threshold > 0 and value >= critical_threshold
|
41
|
+
end
|
42
|
+
|
43
|
+
if opts.has_key? "#{key}_warning".to_sym
|
44
|
+
warning_threshold = opts["#{key}_warning".to_sym]
|
45
|
+
return 'warning' if warning_threshold > 0 and value >= warning_threshold
|
46
|
+
end
|
47
|
+
|
48
|
+
return 'ok'
|
49
|
+
end
|
50
|
+
|
51
|
+
def tick
|
52
|
+
response = nil
|
53
|
+
begin
|
54
|
+
response = Net::HTTP.get(@uri)
|
55
|
+
rescue => e
|
56
|
+
report(
|
57
|
+
:service => "nginx health",
|
58
|
+
:state => "critical",
|
59
|
+
:description => "Connection error: #{e.class} - #{e.message}"
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
return if response.nil?
|
64
|
+
|
65
|
+
report(
|
66
|
+
:service => "nginx health",
|
67
|
+
:state => "ok",
|
68
|
+
:description => "Nginx status connection ok"
|
69
|
+
)
|
70
|
+
|
71
|
+
values = @re.match(response).to_a[1,7].map { |v| v.to_i }
|
72
|
+
|
73
|
+
@keys.zip(values).each do |key, value|
|
74
|
+
report({
|
75
|
+
:service => "nginx #{key}",
|
76
|
+
:metric => value,
|
77
|
+
:state => state(key, value),
|
78
|
+
:tags => ['nginx']
|
79
|
+
})
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
Riemann::Tools::NginxStatus.run
|
data/bin/riemann-proc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reports running process count to riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Proc
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
|
11
|
+
opt :proc_min_critical, "running process count minimum", :default => 1
|
12
|
+
opt :proc_max_critical, "running process count maximum", :default => 1
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
|
16
|
+
|
17
|
+
abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
|
18
|
+
|
19
|
+
ostype = `uname -s`.chomp.downcase
|
20
|
+
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
21
|
+
@check = method :linux_proc
|
22
|
+
end
|
23
|
+
|
24
|
+
def alert(service, state, metric, description)
|
25
|
+
report(
|
26
|
+
:service => service.to_s,
|
27
|
+
:state => state.to_s,
|
28
|
+
:metric => metric.to_f,
|
29
|
+
:description => description
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def linux_proc
|
34
|
+
process = opts[:proc_regex]
|
35
|
+
running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
|
36
|
+
if running > @limits[:critical][:max] or running < @limits[:critical][:min]
|
37
|
+
alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
|
38
|
+
else
|
39
|
+
alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tick
|
44
|
+
@check.call
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
Riemann::Tools::Proc.run
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
|
4
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
5
|
+
|
6
|
+
class Riemann::Tools::Rabbitmq
|
7
|
+
include Riemann::Tools
|
8
|
+
|
9
|
+
require 'faraday'
|
10
|
+
require 'json'
|
11
|
+
require 'uri'
|
12
|
+
|
13
|
+
|
14
|
+
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
15
|
+
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
16
|
+
|
17
|
+
opt :monitor_user, 'RabbitMQ monitoring user', type: :string
|
18
|
+
opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
|
19
|
+
opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
|
20
|
+
opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
|
21
|
+
|
22
|
+
def monitor_url
|
23
|
+
"http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
|
24
|
+
end
|
25
|
+
|
26
|
+
def event_host
|
27
|
+
if options[:event_host]
|
28
|
+
return options[:event_host]
|
29
|
+
else
|
30
|
+
return options[:monitor_host]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def safe_get(uri, event_host)
|
35
|
+
# Handle connection timeouts
|
36
|
+
response = nil
|
37
|
+
begin
|
38
|
+
connection = Faraday.new(uri)
|
39
|
+
response = connection.get do |req|
|
40
|
+
req.options[:timeout] = options[:read_timeout]
|
41
|
+
req.options[:open_timeout] = options[:open_timeout]
|
42
|
+
end
|
43
|
+
rescue => e
|
44
|
+
report(:host => event_host,
|
45
|
+
:service => "rabbitmq monitoring",
|
46
|
+
:state => "critical",
|
47
|
+
:description => "HTTP connection error: #{e.class} - #{e.message}"
|
48
|
+
)
|
49
|
+
end
|
50
|
+
response
|
51
|
+
end
|
52
|
+
|
53
|
+
def tick
|
54
|
+
uri = URI(monitor_url)
|
55
|
+
response = safe_get(uri, event_host)
|
56
|
+
|
57
|
+
return if response.nil?
|
58
|
+
|
59
|
+
json = JSON.parse(response.body)
|
60
|
+
|
61
|
+
if response.status != 200
|
62
|
+
report(:host => event_host,
|
63
|
+
:service => "rabbitmq",
|
64
|
+
:state => "critical",
|
65
|
+
:description => "HTTP connection error: #{response.status} - #{response.body}"
|
66
|
+
)
|
67
|
+
else
|
68
|
+
report(:host => event_host,
|
69
|
+
:service => "rabbitmq monitoring",
|
70
|
+
:state => "ok",
|
71
|
+
:description => "HTTP connection ok"
|
72
|
+
)
|
73
|
+
|
74
|
+
%w( message_stats queue_totals object_totals ).each do |stat|
|
75
|
+
# NOTE / BUG ?
|
76
|
+
# Brand new servers can have blank message stats. Is this ok?
|
77
|
+
# I can't decide.
|
78
|
+
next if json[stat].empty?
|
79
|
+
json[stat].each_pair do |k,v|
|
80
|
+
service = "rabbitmq.#{stat}.#{k}"
|
81
|
+
if k =~ /details$/
|
82
|
+
metric = v['rate']
|
83
|
+
else
|
84
|
+
metric = v
|
85
|
+
end
|
86
|
+
|
87
|
+
# TODO: Set state via thresholds which can be configured
|
88
|
+
|
89
|
+
report(:host => event_host,
|
90
|
+
:service => service,
|
91
|
+
:metric => metric,
|
92
|
+
:description => "RabbitMQ monitor"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
Riemann::Tools::Rabbitmq.run
|
data/bin/riemann-redis
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers redis INFO statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Redis
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'redis'
|
10
|
+
|
11
|
+
opt :redis_host, "Redis hostname", :default => 'localhost'
|
12
|
+
opt :redis_port, "Redis port", :default => 6379
|
13
|
+
opt :redis_password, "Redis password", :default => ''
|
14
|
+
opt :redis_url, "Redis URL", :default => ''
|
15
|
+
opt :redis_socket, "Redis socket", :default => ''
|
16
|
+
opt :redis_section, "Redis INFO section", :default => 'default'
|
17
|
+
|
18
|
+
STRING_VALUES = %w{ redis_version redis_git_sha1 redis_mode os
|
19
|
+
multiplexing_api gcc_version run_id used_memory_human
|
20
|
+
used_memory_peak_human mem_allocator
|
21
|
+
rdb_last_bgsave_status aof_last_bgrewrite_status role }
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
options = if opts[:redis_url] != ''
|
25
|
+
{ :url => opts[:redis_url] }
|
26
|
+
elsif opts[:redis_socket] != ''
|
27
|
+
{ :path => opts[:redis_socket] }
|
28
|
+
else
|
29
|
+
{ :host => opts[:redis_host], :port => opts[:redis_port] }
|
30
|
+
end
|
31
|
+
@redis = ::Redis.new(options)
|
32
|
+
@redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
|
33
|
+
@section = opts[:redis_section]
|
34
|
+
end
|
35
|
+
|
36
|
+
def tick
|
37
|
+
begin
|
38
|
+
@redis.info(@section).each do |property, value|
|
39
|
+
data = {
|
40
|
+
:host => opts[:redis_host].dup,
|
41
|
+
:service => "redis #{property}",
|
42
|
+
:metric => value.to_f,
|
43
|
+
:state => value.to_s,
|
44
|
+
:tags => ['redis']
|
45
|
+
}
|
46
|
+
|
47
|
+
if STRING_VALUES.include?(property) || property.match(/^db\d+/)
|
48
|
+
if %w{ rdb_last_bgsave_status aof_last_bgrewrite_status }.include?(property)
|
49
|
+
data[:state] = value
|
50
|
+
else
|
51
|
+
data[:description] = value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if property == "run_id"
|
56
|
+
data[:metric] = 0
|
57
|
+
end
|
58
|
+
|
59
|
+
report(data)
|
60
|
+
end
|
61
|
+
rescue ::Redis::CommandError => e
|
62
|
+
if e.message == "ERR operation not permitted"
|
63
|
+
@redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
Riemann::Tools::Redis.run
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers Redis SLOWLOG statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::RedisSlowlog
|
8
|
+
include Riemann::Tools
|
9
|
+
require 'redis'
|
10
|
+
|
11
|
+
opt :redis_url, "Redis URL", :default => 'redis://127.0.0.1:6379/'
|
12
|
+
opt :redis_password, "Redis password", :default => ''
|
13
|
+
opt :slowlog_len, "Number of SLOWLOG entries to get", :default => 10
|
14
|
+
opt :slowlog_reset, "Reset SLOWLOG after querying it", :default => false
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@redis = ::Redis.new(url: opts[:redis_url])
|
18
|
+
|
19
|
+
@slowlog_len = opts[:slowlog_len]
|
20
|
+
@slowlog_reset = opts[:slowlog_reset]
|
21
|
+
|
22
|
+
@redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
|
23
|
+
end
|
24
|
+
|
25
|
+
def tick
|
26
|
+
@redis.slowlog("GET", @slowlog_len).each do |id, timestamp, us, cmd|
|
27
|
+
data = {
|
28
|
+
:host => @redis.client.host,
|
29
|
+
:service => "redis",
|
30
|
+
:time => timestamp,
|
31
|
+
:metric => us.to_f,
|
32
|
+
:state => 'warning',
|
33
|
+
:tags => ['redis', 'slowlog'],
|
34
|
+
:description => cmd.inspect
|
35
|
+
}
|
36
|
+
report(data)
|
37
|
+
end
|
38
|
+
|
39
|
+
@redis.slowlog("RESET") if @slowlog_reset
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
Riemann::Tools::RedisSlowlog.run
|