riemann-tools 1.1.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +2 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +8 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +55 -2
- data/Rakefile +17 -3
- data/bin/riemann-apache-status +1 -106
- data/bin/riemann-bench +2 -70
- data/bin/riemann-cloudant +1 -56
- data/bin/riemann-consul +1 -106
- data/bin/riemann-dir-files-count +1 -55
- data/bin/riemann-dir-space +1 -55
- data/bin/riemann-diskstats +1 -92
- data/bin/riemann-fd +2 -81
- data/bin/riemann-freeswitch +2 -119
- data/bin/riemann-haproxy +1 -58
- data/bin/riemann-health +0 -2
- data/bin/riemann-kvminstance +2 -22
- data/bin/riemann-md +8 -0
- data/bin/riemann-memcached +1 -37
- data/bin/riemann-net +0 -2
- data/bin/riemann-nginx-status +1 -85
- data/bin/riemann-ntp +0 -2
- data/bin/riemann-portcheck +1 -44
- data/bin/riemann-proc +1 -108
- data/bin/riemann-varnish +1 -54
- data/bin/riemann-wrapper +113 -0
- data/bin/riemann-zookeeper +1 -39
- data/bin/riemann-zpool +8 -0
- data/lib/riemann/tools/apache_status.rb +107 -0
- data/lib/riemann/tools/bench.rb +72 -0
- data/lib/riemann/tools/cloudant.rb +57 -0
- data/lib/riemann/tools/consul_health.rb +107 -0
- data/lib/riemann/tools/dir_files_count.rb +56 -0
- data/lib/riemann/tools/dir_space.rb +56 -0
- data/lib/riemann/tools/diskstats.rb +94 -0
- data/lib/riemann/tools/fd.rb +81 -0
- data/lib/riemann/tools/freeswitch.rb +119 -0
- data/lib/riemann/tools/haproxy.rb +59 -0
- data/lib/riemann/tools/health.rb +150 -19
- data/lib/riemann/tools/kvm.rb +23 -0
- data/lib/riemann/tools/md.rb +35 -0
- data/lib/riemann/tools/mdstat_parser.tab.rb +340 -0
- data/lib/riemann/tools/memcached.rb +38 -0
- data/lib/riemann/tools/net.rb +2 -1
- data/lib/riemann/tools/nginx_status.rb +86 -0
- data/lib/riemann/tools/ntp.rb +1 -0
- data/lib/riemann/tools/portcheck.rb +45 -0
- data/lib/riemann/tools/proc.rb +109 -0
- data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
- data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
- data/lib/riemann/tools/varnish.rb +55 -0
- data/lib/riemann/tools/version.rb +1 -1
- data/lib/riemann/tools/zookeeper.rb +40 -0
- data/lib/riemann/tools/zpool.rb +29 -0
- data/lib/riemann/tools.rb +2 -20
- data/riemann-tools.gemspec +10 -1
- data/tools/riemann-aws/Rakefile +6 -9
- data/tools/riemann-aws/bin/riemann-aws-billing +2 -87
- data/tools/riemann-aws/bin/riemann-aws-rds-status +2 -62
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +2 -44
- data/tools/riemann-aws/bin/riemann-aws-status +2 -77
- data/tools/riemann-aws/bin/riemann-elb-metrics +2 -162
- data/tools/riemann-aws/bin/riemann-s3-list +2 -81
- data/tools/riemann-aws/bin/riemann-s3-status +2 -96
- data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
- data/tools/riemann-chronos/Rakefile +6 -9
- data/tools/riemann-chronos/bin/riemann-chronos +1 -154
- data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
- data/tools/riemann-docker/Rakefile +5 -8
- data/tools/riemann-docker/bin/riemann-docker +2 -200
- data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
- data/tools/riemann-elasticsearch/Rakefile +6 -9
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +1 -167
- data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
- data/tools/riemann-marathon/Rakefile +6 -9
- data/tools/riemann-marathon/bin/riemann-marathon +1 -156
- data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
- data/tools/riemann-mesos/Rakefile +6 -9
- data/tools/riemann-mesos/bin/riemann-mesos +1 -139
- data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
- data/tools/riemann-munin/Rakefile +5 -8
- data/tools/riemann-munin/bin/riemann-munin +1 -36
- data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
- data/tools/riemann-rabbitmq/Rakefile +6 -9
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +1 -266
- data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
- data/tools/riemann-riak/Rakefile +5 -8
- data/tools/riemann-riak/bin/riemann-riak +1 -316
- data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
- data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
- data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
- metadata +61 -7
- data/.travis.yml +0 -31
- data/tools/riemann-riak/riak_status/key_count.erl +0 -13
- data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
- data/tools/riemann-riak/riak_status/ringready.erl +0 -9
data/bin/riemann-proc
CHANGED
@@ -3,113 +3,6 @@
|
|
3
3
|
|
4
4
|
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
require File.expand_path('../lib/riemann/tools', __dir__)
|
9
|
-
|
10
|
-
module Riemann
|
11
|
-
module Tools
|
12
|
-
class Proc
|
13
|
-
include Riemann::Tools
|
14
|
-
|
15
|
-
opt :proc_regex, 'regular expression that matches the process to be monitored', type: :string, default: '.*'
|
16
|
-
opt :proc_min_critical, 'running process count minimum', default: 0
|
17
|
-
opt :proc_max_critical, 'running process count maximum', default: 65_536
|
18
|
-
|
19
|
-
def initialize
|
20
|
-
@limits = { critical: { min: opts[:proc_min_critical], max: opts[:proc_max_critical] } }
|
21
|
-
|
22
|
-
abort 'FATAL: specify a process regular expression, see --help for usage' unless opts[:proc_regex]
|
23
|
-
|
24
|
-
ostype = `uname -s`.chomp.downcase
|
25
|
-
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == 'linux'
|
26
|
-
@check = method :linux_proc
|
27
|
-
end
|
28
|
-
|
29
|
-
def alert(service, state, metric, description)
|
30
|
-
report(
|
31
|
-
service: service.to_s,
|
32
|
-
state: state.to_s,
|
33
|
-
metric: metric.to_f,
|
34
|
-
description: description,
|
35
|
-
)
|
36
|
-
end
|
37
|
-
|
38
|
-
def linux_proc
|
39
|
-
process = opts[:proc_regex]
|
40
|
-
found = `ps axo pid=,rss=,vsize=,state=,cputime=,lstart=,command= | grep '#{process}' | grep -v grep | grep -v riemann-proc`
|
41
|
-
running = found.count("\n")
|
42
|
-
if (running > @limits[:critical][:max]) || (running < @limits[:critical][:min])
|
43
|
-
alert "proc count/#{process}", :critical, running, "process #{process} is running #{running} instances.\n"
|
44
|
-
else
|
45
|
-
alert "proc count/#{process}", :ok, running, "process #{process} is running #{running} instances.\n"
|
46
|
-
end
|
47
|
-
# Iterate on all the lines and create an entry for the following metrics:
|
48
|
-
#
|
49
|
-
# process/<pid>-<start-time>/rss
|
50
|
-
# process/<pid>-<start-time>/vsize
|
51
|
-
# process/<pid>-<start-time>/running
|
52
|
-
# process/<pid>-<start-time>/cputime
|
53
|
-
#
|
54
|
-
# description should contain the command itself.
|
55
|
-
# value should be either process RSS, VSIZE, or 1 if running
|
56
|
-
# state is always unknown for the moment
|
57
|
-
#
|
58
|
-
ps_regex = /([0-9]+) +([0-9]+) +([0-9]+) +([A-Z]) +([0-9:.]+) +[A-Za-z]{3} +([A-Za-z]{3} {1,2}[0-9]+ [0-9:]+ [0-9]+) +(.*)/
|
59
|
-
found.each_line do |line|
|
60
|
-
m = ps_regex.match(line)
|
61
|
-
next if m.nil?
|
62
|
-
|
63
|
-
pid, rss, vsize, state, cputime, start, command = m.captures
|
64
|
-
start_s = DateTime.parse(start, 'Mmm DD HH:MM:ss YYYY').to_time.to_i
|
65
|
-
cputime_s = DateTime.parse(cputime, '%H:%M:%S')
|
66
|
-
cputime_seconds = (cputime_s.hour * 3600) + (cputime_s.minute * 60) + cputime_s.second
|
67
|
-
running = 0
|
68
|
-
case state[0]
|
69
|
-
when 'R'
|
70
|
-
state_s = 'ok'
|
71
|
-
running = 1
|
72
|
-
when 'S'
|
73
|
-
state_s = 'ok'
|
74
|
-
when 'I'
|
75
|
-
state_s = 'warning'
|
76
|
-
when 'T', 'U', 'Z'
|
77
|
-
state_s = 'critical'
|
78
|
-
else
|
79
|
-
state_s = 'unknown'
|
80
|
-
end
|
81
|
-
report(
|
82
|
-
service: "proc #{pid}-#{start_s}/rss",
|
83
|
-
state: state_s.to_s,
|
84
|
-
metric: rss.to_f,
|
85
|
-
description: command,
|
86
|
-
)
|
87
|
-
report(
|
88
|
-
service: "proc #{pid}-#{start_s}/vsize",
|
89
|
-
state: state_s.to_s,
|
90
|
-
metric: vsize.to_f,
|
91
|
-
description: command,
|
92
|
-
)
|
93
|
-
report(
|
94
|
-
service: "proc #{pid}-#{start_s}/running",
|
95
|
-
state: state_s.to_s,
|
96
|
-
metric: running.to_f,
|
97
|
-
description: command,
|
98
|
-
)
|
99
|
-
report(
|
100
|
-
service: "proc #{pid}-#{start_s}/cputime",
|
101
|
-
state: state_s.to_s,
|
102
|
-
metric: cputime_seconds,
|
103
|
-
description: command,
|
104
|
-
)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def tick
|
109
|
-
@check.call
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
6
|
+
require 'riemann/tools/proc'
|
114
7
|
|
115
8
|
Riemann::Tools::Proc.run
|
data/bin/riemann-varnish
CHANGED
@@ -3,59 +3,6 @@
|
|
3
3
|
|
4
4
|
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
require 'open3'
|
9
|
-
require File.expand_path('../lib/riemann/tools', __dir__)
|
10
|
-
|
11
|
-
module Riemann
|
12
|
-
module Tools
|
13
|
-
class Varnish
|
14
|
-
include Riemann::Tools
|
15
|
-
|
16
|
-
opt :varnish_host, 'Varnish hostname', default: `hostname`.chomp
|
17
|
-
|
18
|
-
def initialize
|
19
|
-
cmd = 'varnishstat -V'
|
20
|
-
Open3.popen3(cmd) do |_stdin, _stdout, stderr, _wait_thr|
|
21
|
-
@ver = /varnishstat \(varnish-(\d+)/.match(stderr.read)[1].to_i
|
22
|
-
end
|
23
|
-
|
24
|
-
@vstats = if @ver >= 4
|
25
|
-
['MAIN.sess_conn',
|
26
|
-
'MAIN.sess_drop ',
|
27
|
-
'MAIN.client_req',
|
28
|
-
'MAIN.cache_hit',
|
29
|
-
'MAIN.cache_miss',]
|
30
|
-
else
|
31
|
-
%w[client_conn
|
32
|
-
client_drop
|
33
|
-
client_req
|
34
|
-
cache_hit
|
35
|
-
cache_miss]
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def tick
|
40
|
-
stats = if @ver >= 4
|
41
|
-
`varnishstat -1 -f #{@vstats.join(' -f ')}`
|
42
|
-
else
|
43
|
-
`varnishstat -1 -f #{@vstats.join(',')}`
|
44
|
-
end
|
45
|
-
stats.each_line do |stat|
|
46
|
-
m = stat.split
|
47
|
-
report(
|
48
|
-
host: opts[:varnish_host].dup,
|
49
|
-
service: "varnish #{m[0]}",
|
50
|
-
metric: m[1].to_f,
|
51
|
-
state: 'ok',
|
52
|
-
description: m[3..].join(' ').to_s,
|
53
|
-
tags: ['varnish'],
|
54
|
-
)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
6
|
+
require 'riemann/tools/varnish'
|
60
7
|
|
61
8
|
Riemann::Tools::Varnish.run
|
data/bin/riemann-wrapper
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
|
+
|
6
|
+
def camelize(subject)
|
7
|
+
subject.gsub(%r{(^|[/_])[a-z]}) { |x| x.sub('/', '::').sub('_', '').upcase }
|
8
|
+
end
|
9
|
+
|
10
|
+
def underscore(subject)
|
11
|
+
subject.split(/(?=[A-Z])/).map(&:downcase).join('_').gsub('::_', '/')
|
12
|
+
end
|
13
|
+
|
14
|
+
def constantize(subject)
|
15
|
+
Object.const_get(subject)
|
16
|
+
end
|
17
|
+
|
18
|
+
def read_flags(argv)
|
19
|
+
res = []
|
20
|
+
|
21
|
+
while (arg = argv.shift)
|
22
|
+
break if arg == '--'
|
23
|
+
|
24
|
+
res << arg
|
25
|
+
end
|
26
|
+
|
27
|
+
res
|
28
|
+
end
|
29
|
+
|
30
|
+
def usage
|
31
|
+
warn <<~USAGE
|
32
|
+
usage: riemann-wrapper [common options] -- tool1 [tool1 options] [-- tool2 [tool2 options] ...]
|
33
|
+
riemann-wrapper /path/to/configuration/file.yml
|
34
|
+
|
35
|
+
Run multiple Riemann tools in a single process. A single connection to
|
36
|
+
riemann is maintained and shared for all tools, the connection flags should
|
37
|
+
only be passed as common options.
|
38
|
+
|
39
|
+
Examples:
|
40
|
+
1. Run the fd, health and ntp tools with default options:
|
41
|
+
|
42
|
+
riemann-wrapper -- fd -- health -- ntp
|
43
|
+
|
44
|
+
2. Run the fd, health and ntp tools against a remote riemann server using
|
45
|
+
TCP and tagging each event with the name of the tool that produced it:
|
46
|
+
|
47
|
+
riemann-wrapper --host riemann.example.com --tcp -- \\
|
48
|
+
fd --tag=fd -- \\
|
49
|
+
health --tag=health -- \\
|
50
|
+
ntp --tag=ntp
|
51
|
+
|
52
|
+
3. Same as above example, but using a configuration file (more verbose but
|
53
|
+
easier to handle when running riemann-wrapper manually of managing it
|
54
|
+
with a Configuration Management system):
|
55
|
+
|
56
|
+
cat > config.yml << EOT
|
57
|
+
---
|
58
|
+
options: --host riemann.example.com --tcp
|
59
|
+
tools:
|
60
|
+
- name: fd
|
61
|
+
options: --tag=fd
|
62
|
+
- name: health
|
63
|
+
options: --tag=health
|
64
|
+
- name: ntp
|
65
|
+
options: --tag=ntp
|
66
|
+
EOT
|
67
|
+
riemann-wrapper config.yml
|
68
|
+
USAGE
|
69
|
+
exit 1
|
70
|
+
end
|
71
|
+
|
72
|
+
usage if ARGV.empty?
|
73
|
+
|
74
|
+
if ARGV.size == 1
|
75
|
+
unless File.readable?(ARGV[0])
|
76
|
+
warn "Cannot open file for reading: #{ARGV[0]}"
|
77
|
+
usage
|
78
|
+
end
|
79
|
+
|
80
|
+
require 'yaml'
|
81
|
+
config = YAML.safe_load(File.read(ARGV[0]))
|
82
|
+
|
83
|
+
commandline = config['options']
|
84
|
+
config['tools'].each { |tool| commandline << " -- #{tool['name']} #{tool['options']}" }
|
85
|
+
|
86
|
+
ARGV.replace(commandline.split)
|
87
|
+
end
|
88
|
+
|
89
|
+
argv = ARGV.dup
|
90
|
+
|
91
|
+
common_argv = read_flags(argv)
|
92
|
+
|
93
|
+
threads = []
|
94
|
+
|
95
|
+
# Terminate the whole process is some thread fail
|
96
|
+
Thread.abort_on_exception = true
|
97
|
+
|
98
|
+
while argv.any?
|
99
|
+
tool = argv.shift
|
100
|
+
tool_argv = read_flags(argv)
|
101
|
+
|
102
|
+
require "riemann/tools/#{tool}"
|
103
|
+
tool_class = constantize(camelize("riemann/tools/#{tool}"))
|
104
|
+
|
105
|
+
ARGV.replace(common_argv + tool_argv)
|
106
|
+
instance = tool_class.new
|
107
|
+
# Force evaluation of options. This rely on ARGV and needs to be done before
|
108
|
+
# we launch multiple threads which compete to read information from there.
|
109
|
+
instance.options
|
110
|
+
threads << Thread.new(instance, &:run)
|
111
|
+
end
|
112
|
+
|
113
|
+
threads.each(&:join)
|
data/bin/riemann-zookeeper
CHANGED
@@ -3,44 +3,6 @@
|
|
3
3
|
|
4
4
|
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
require File.expand_path('../lib/riemann/tools', __dir__)
|
9
|
-
|
10
|
-
module Riemann
|
11
|
-
module Tools
|
12
|
-
class Zookeeper
|
13
|
-
include Riemann::Tools
|
14
|
-
require 'socket'
|
15
|
-
|
16
|
-
opt :zookeeper_host, 'Zookeeper hostname', default: 'localhost'
|
17
|
-
opt :zookeeper_port, 'Zookeeper port', default: 2181
|
18
|
-
|
19
|
-
def tick
|
20
|
-
sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
|
21
|
-
sock.sync = true
|
22
|
-
sock.print('mntr')
|
23
|
-
sock.flush
|
24
|
-
|
25
|
-
loop do
|
26
|
-
stats = sock.gets
|
27
|
-
|
28
|
-
break if stats.nil?
|
29
|
-
|
30
|
-
m = stats.match(/^(\w+)\t+(.*)/)
|
31
|
-
|
32
|
-
report(
|
33
|
-
host: opts[:zookeeper_host].dup,
|
34
|
-
service: "zookeeper #{m[1]}",
|
35
|
-
metric: m[2].to_f,
|
36
|
-
state: 'ok',
|
37
|
-
tags: ['zookeeper'],
|
38
|
-
)
|
39
|
-
end
|
40
|
-
sock.close
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
6
|
+
require 'riemann/tools/zookeeper'
|
45
7
|
|
46
8
|
Riemann::Tools::Zookeeper.run
|
data/bin/riemann-zpool
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'riemann/tools'
|
4
|
+
|
5
|
+
# Collects Apache metrics and submits them to Riemann
|
6
|
+
# More information can be found at http://httpd.apache.org/docs/2.4/mod/mod_status.html
|
7
|
+
|
8
|
+
# Removes whitespace from 'Total Accesses' and 'Total kBytes' for output to graphite
|
9
|
+
module Riemann
|
10
|
+
module Tools
|
11
|
+
class ApacheStatus
|
12
|
+
include Riemann::Tools
|
13
|
+
require 'net/http'
|
14
|
+
require 'uri'
|
15
|
+
|
16
|
+
opt :uri, 'Apache Server Status URI', default: 'http://localhost/server-status'
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@uri = "#{URI.parse(opts[:uri])}?auto"
|
20
|
+
# Sample Response with ExtendedStatus On
|
21
|
+
# Total Accesses: 20643
|
22
|
+
# Total kBytes: 36831
|
23
|
+
# CPULoad: .0180314
|
24
|
+
# Uptime: 43868
|
25
|
+
# ReqPerSec: .470571
|
26
|
+
# BytesPerSec: 859.737
|
27
|
+
# BytesPerReq: 1827.01
|
28
|
+
# BusyWorkers: 6
|
29
|
+
# IdleWorkers: 94
|
30
|
+
# Scoreboard: ___K_____K____________W_
|
31
|
+
|
32
|
+
@scoreboard_map = {
|
33
|
+
'_' => 'waiting',
|
34
|
+
'S' => 'starting',
|
35
|
+
'R' => 'reading',
|
36
|
+
'W' => 'sending',
|
37
|
+
'K' => 'keepalive',
|
38
|
+
'D' => 'dns',
|
39
|
+
'C' => 'closing',
|
40
|
+
'L' => 'logging',
|
41
|
+
'G' => 'graceful',
|
42
|
+
'I' => 'idle',
|
43
|
+
'.' => 'open',
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_scoreboard_metrics(response)
|
48
|
+
results = Hash.new(0)
|
49
|
+
|
50
|
+
response.slice! 'Scoreboard: '
|
51
|
+
response.each_char do |char|
|
52
|
+
results[char] += 1
|
53
|
+
end
|
54
|
+
results.transform_keys { |k| @scoreboard_map[k] }
|
55
|
+
end
|
56
|
+
|
57
|
+
def report_metrics(metrics)
|
58
|
+
metrics.each do |k, v|
|
59
|
+
report(
|
60
|
+
service: "httpd #{k}",
|
61
|
+
metric: v.to_f,
|
62
|
+
state: 'ok',
|
63
|
+
tags: ['httpd'],
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def connection
|
69
|
+
response = nil
|
70
|
+
begin
|
71
|
+
response = ::Net::HTTP.get(@uri)
|
72
|
+
rescue StandardError => e
|
73
|
+
report(
|
74
|
+
service: 'httpd health',
|
75
|
+
state: 'critical',
|
76
|
+
description: "Httpd connection error: #{e.class} - #{e.message}",
|
77
|
+
tags: ['httpd'],
|
78
|
+
)
|
79
|
+
else
|
80
|
+
report(
|
81
|
+
service: 'httpd health',
|
82
|
+
state: 'ok',
|
83
|
+
description: 'Httpd connection status ok',
|
84
|
+
tags: ['httpd'],
|
85
|
+
)
|
86
|
+
end
|
87
|
+
response
|
88
|
+
end
|
89
|
+
|
90
|
+
def tick
|
91
|
+
return if (response = connection).nil?
|
92
|
+
|
93
|
+
response.each_line do |line|
|
94
|
+
metrics = {}
|
95
|
+
|
96
|
+
if line =~ /Scoreboard/
|
97
|
+
metrics = get_scoreboard_metrics(line.strip)
|
98
|
+
else
|
99
|
+
key, value = line.strip.split(':')
|
100
|
+
metrics[key.gsub(/\s/, '')] = value
|
101
|
+
end
|
102
|
+
report_metrics(metrics)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'riemann/client'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
# Connects to a server (first arg) and populates it with a constant stream of
|
8
|
+
# events for testing.
|
9
|
+
module Riemann
|
10
|
+
module Tools
|
11
|
+
class Bench
|
12
|
+
attr_accessor :client, :hosts, :services, :states
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@hosts = [nil] + (0...10).map { |i| "host#{i}" }
|
16
|
+
@hosts = %w[a b c d e f g h i j]
|
17
|
+
@services = %w[test1 test2 test3 foo bar baz xyzzy attack cat treat]
|
18
|
+
@states = {}
|
19
|
+
@client = Riemann::Client.new(host: (ARGV.first || 'localhost'))
|
20
|
+
end
|
21
|
+
|
22
|
+
def evolve(state)
|
23
|
+
m = state[:metric] + (rand - 0.5) * 0.1
|
24
|
+
m = [[0, m].max, 1].min
|
25
|
+
|
26
|
+
s = case m
|
27
|
+
when 0...0.75
|
28
|
+
'ok'
|
29
|
+
when 0.75...0.9
|
30
|
+
'warning'
|
31
|
+
when 0.9..1.0
|
32
|
+
'critical'
|
33
|
+
end
|
34
|
+
|
35
|
+
{
|
36
|
+
metric: m,
|
37
|
+
state: s,
|
38
|
+
host: state[:host],
|
39
|
+
service: state[:service],
|
40
|
+
description: "at #{Time.now}",
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def tick
|
45
|
+
# pp @states
|
46
|
+
hosts.product(services).each do |id|
|
47
|
+
client << (states[id] = evolve(states[id]))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def run
|
52
|
+
start
|
53
|
+
loop do
|
54
|
+
sleep 0.05
|
55
|
+
tick
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def start
|
60
|
+
hosts.product(services).each do |host, service|
|
61
|
+
states[[host, service]] = {
|
62
|
+
metric: 0.5,
|
63
|
+
state: 'ok',
|
64
|
+
description: 'Starting up',
|
65
|
+
host: host,
|
66
|
+
service: service,
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'riemann/tools'
|
4
|
+
|
5
|
+
# Gathers load balancer statistics from Cloudant.com (shared cluster) and submits them to Riemann.
|
6
|
+
module Riemann
|
7
|
+
module Tools
|
8
|
+
class Cloudant
|
9
|
+
include Riemann::Tools
|
10
|
+
require 'net/http'
|
11
|
+
require 'json'
|
12
|
+
|
13
|
+
opt :cloudant_username, 'Cloudant username', type: :string, required: true
|
14
|
+
opt :cloudant_password, 'Cloudant pasword', type: :string, required: true
|
15
|
+
|
16
|
+
def tick
|
17
|
+
json.each do |node|
|
18
|
+
break if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
|
19
|
+
|
20
|
+
ns = "cloudant #{node['pxname']}"
|
21
|
+
cluster_name = node['tracked'].split('.')[0] # ie: meritage.cloudant.com
|
22
|
+
|
23
|
+
# report health of each node.
|
24
|
+
report(
|
25
|
+
service: ns,
|
26
|
+
state: (node['status'] == 'UP' ? 'ok' : 'critical'),
|
27
|
+
tags: ['cloudant', cluster_name],
|
28
|
+
)
|
29
|
+
|
30
|
+
# report property->metric of each node.
|
31
|
+
node.each do |property, metric|
|
32
|
+
next if %w[pxname svname status tracked].include?(property)
|
33
|
+
|
34
|
+
report(
|
35
|
+
host: node['tracked'],
|
36
|
+
service: "#{ns} #{property}",
|
37
|
+
metric: metric.to_f,
|
38
|
+
state: (node['status'] == 'UP' ? 'ok' : 'critical'),
|
39
|
+
tags: ['cloudant', cluster_name],
|
40
|
+
)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def json
|
46
|
+
http = ::Net::HTTP.new('cloudant.com', 443)
|
47
|
+
http.use_ssl = true
|
48
|
+
http.start do |h|
|
49
|
+
get = ::Net::HTTP::Get.new('/api/load_balancer')
|
50
|
+
get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
|
51
|
+
h.request get
|
52
|
+
end
|
53
|
+
JSON.parse(http.boby)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'riemann/tools'
|
4
|
+
require 'socket'
|
5
|
+
require 'net/http'
|
6
|
+
require 'uri'
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
# Reports service and node status to riemann
|
10
|
+
module Riemann
|
11
|
+
module Tools
|
12
|
+
class ConsulHealth
|
13
|
+
include Riemann::Tools
|
14
|
+
|
15
|
+
opt :consul_host, 'Consul API Host (default to localhost)', default: 'localhost'
|
16
|
+
opt :consul_port, 'Consul API Host (default to 8500)', default: '8500'
|
17
|
+
opt :prefix, 'prefix to use for all service names when reporting', default: 'consul '
|
18
|
+
opt :minimum_services_per_node, 'minimum services per node (default: 0)', default: 0
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@hostname = opts[:consul_host]
|
22
|
+
@prefix = opts[:prefix]
|
23
|
+
@minimum_services_per_node = opts[:minimum_services_per_node]
|
24
|
+
@underlying_ip = IPSocket.getaddress(@hostname)
|
25
|
+
@consul_leader_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/status/leader")
|
26
|
+
@consul_services_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/services")
|
27
|
+
@consul_nodes_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/nodes")
|
28
|
+
@consul_health_url_prefix = "http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/health/service/"
|
29
|
+
|
30
|
+
@last_services_read = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
def alert(hostname, service, state, metric, description)
|
34
|
+
opts = {
|
35
|
+
host: hostname,
|
36
|
+
service: service.to_s,
|
37
|
+
state: state.to_s,
|
38
|
+
metric: metric,
|
39
|
+
description: description,
|
40
|
+
}
|
41
|
+
|
42
|
+
report(opts)
|
43
|
+
end
|
44
|
+
|
45
|
+
def get(url)
|
46
|
+
::Net::HTTP.get_response(url).body
|
47
|
+
end
|
48
|
+
|
49
|
+
def tick
|
50
|
+
leader = JSON.parse(get(@consul_leader_url))
|
51
|
+
leader_hostname = URI.parse("http://#{leader}").hostname
|
52
|
+
|
53
|
+
return unless leader_hostname == @underlying_ip
|
54
|
+
|
55
|
+
nodes = JSON.parse(get(@consul_nodes_url))
|
56
|
+
services = JSON.parse(get(@consul_services_url))
|
57
|
+
services_by_nodes = {}
|
58
|
+
|
59
|
+
nodes.each do |node|
|
60
|
+
node_name = node['Node']
|
61
|
+
services_by_nodes[node_name] = 0
|
62
|
+
end
|
63
|
+
|
64
|
+
# For every service
|
65
|
+
services.each do |service|
|
66
|
+
service_name = service[0]
|
67
|
+
health_url = URI.parse(@consul_health_url_prefix + service_name)
|
68
|
+
health_nodes = JSON.parse(get(health_url))
|
69
|
+
|
70
|
+
total_count = 0
|
71
|
+
ok_count = 0
|
72
|
+
|
73
|
+
health_nodes.each do |node|
|
74
|
+
hostname = node['Node']['Node']
|
75
|
+
ok = node['Checks'].all? { |check| check['Status'] == 'passing' }
|
76
|
+
alert(hostname, "#{@prefix}#{service_name}", ok ? :ok : :critical, ok ? 1 : 0, JSON.generate(node))
|
77
|
+
total_count += 1
|
78
|
+
ok_count += ok ? 1 : 0
|
79
|
+
|
80
|
+
last_services_by_nodes = services_by_nodes[hostname].to_i
|
81
|
+
services_by_nodes[hostname] = last_services_by_nodes + 1
|
82
|
+
end
|
83
|
+
|
84
|
+
unless @last_services_read[service_name].nil?
|
85
|
+
last_ok = @last_services_read[service_name]
|
86
|
+
if last_ok != ok_count
|
87
|
+
alert(
|
88
|
+
'total', "#{@prefix}#{service_name}-count", ok_count >= last_ok ? :ok : :critical, ok_count,
|
89
|
+
"Number of passing #{service_name} is: #{ok_count}/#{total_count}, Last time it was: #{last_ok}",
|
90
|
+
)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
@last_services_read[service_name] = ok_count
|
95
|
+
end
|
96
|
+
|
97
|
+
# For every node
|
98
|
+
services_by_nodes.each do |node, count|
|
99
|
+
alert(
|
100
|
+
node, "#{@prefix}total-services", count >= @minimum_services_per_node ? :ok : :critical, count,
|
101
|
+
"#{count} services in the specified node",
|
102
|
+
)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|