riemann-tools 0.2.13 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.docker/Dockerfile +7 -0
- data/.docker/publish.sh +35 -0
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +42 -0
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.rubocop.yml +32 -0
- data/.travis.yml +31 -0
- data/CHANGELOG.md +422 -0
- data/Gemfile +6 -0
- data/ISSUE_TEMPLATE.md +15 -0
- data/README.markdown +14 -15
- data/Rakefile +23 -0
- data/SECURITY.md +42 -0
- data/bin/riemann-apache-status +92 -77
- data/bin/riemann-bench +54 -48
- data/bin/riemann-cloudant +44 -39
- data/bin/riemann-consul +82 -75
- data/bin/riemann-dir-files-count +53 -46
- data/bin/riemann-dir-space +53 -46
- data/bin/riemann-diskstats +78 -74
- data/bin/riemann-fd +68 -47
- data/bin/riemann-freeswitch +108 -102
- data/bin/riemann-haproxy +46 -39
- data/bin/riemann-health +4 -335
- data/bin/riemann-kvminstance +18 -12
- data/bin/riemann-memcached +35 -28
- data/bin/riemann-net +4 -103
- data/bin/riemann-nginx-status +74 -66
- data/bin/riemann-ntp +4 -32
- data/bin/riemann-portcheck +40 -30
- data/bin/riemann-proc +96 -89
- data/bin/riemann-varnish +51 -44
- data/bin/riemann-zookeeper +38 -33
- data/lib/riemann/tools/health.rb +347 -0
- data/lib/riemann/tools/net.rb +104 -0
- data/lib/riemann/tools/ntp.rb +41 -0
- data/lib/riemann/tools/utils.rb +17 -0
- data/lib/riemann/tools/version.rb +7 -0
- data/lib/riemann/tools.rb +40 -33
- data/riemann-tools.gemspec +42 -0
- data/tools/riemann-aws/LICENSE +21 -0
- data/tools/riemann-aws/README.md +54 -0
- data/tools/riemann-aws/Rakefile +37 -0
- data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
- data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
- data/tools/riemann-aws/bin/riemann-aws-status +83 -0
- data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
- data/tools/riemann-aws/bin/riemann-s3-list +87 -0
- data/tools/riemann-aws/bin/riemann-s3-status +102 -0
- data/tools/riemann-chronos/LICENSE +21 -0
- data/tools/riemann-chronos/README.md +10 -0
- data/tools/riemann-chronos/Rakefile +37 -0
- data/tools/riemann-chronos/bin/riemann-chronos +161 -0
- data/tools/riemann-docker/LICENSE +21 -0
- data/tools/riemann-docker/README.md +10 -0
- data/tools/riemann-docker/Rakefile +36 -0
- data/tools/riemann-docker/bin/riemann-docker +206 -0
- data/tools/riemann-elasticsearch/LICENSE +21 -0
- data/tools/riemann-elasticsearch/README.md +10 -0
- data/tools/riemann-elasticsearch/Rakefile +37 -0
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
- data/tools/riemann-marathon/LICENSE +21 -0
- data/tools/riemann-marathon/README.md +10 -0
- data/tools/riemann-marathon/Rakefile +37 -0
- data/tools/riemann-marathon/bin/riemann-marathon +163 -0
- data/tools/riemann-mesos/LICENSE +21 -0
- data/tools/riemann-mesos/README.md +10 -0
- data/tools/riemann-mesos/Rakefile +37 -0
- data/tools/riemann-mesos/bin/riemann-mesos +146 -0
- data/tools/riemann-munin/LICENSE +21 -0
- data/tools/riemann-munin/README.md +10 -0
- data/tools/riemann-munin/Rakefile +36 -0
- data/tools/riemann-munin/bin/riemann-munin +43 -0
- data/tools/riemann-rabbitmq/LICENSE +21 -0
- data/tools/riemann-rabbitmq/README.md +10 -0
- data/tools/riemann-rabbitmq/Rakefile +37 -0
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
- data/tools/riemann-riak/LICENSE +21 -0
- data/tools/riemann-riak/README.md +10 -0
- data/tools/riemann-riak/Rakefile +36 -0
- data/tools/riemann-riak/bin/riemann-riak +323 -0
- data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
- data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
- data/tools/riemann-riak/riak_status/key_count.erl +13 -0
- data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
- data/tools/riemann-riak/riak_status/ringready.erl +9 -0
- metadata +195 -34
data/bin/riemann-apache-status
CHANGED
@@ -1,98 +1,113 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
2
5
|
|
3
6
|
# Collects Apache metrics and submits them to Riemann
|
4
7
|
# More information can be found at http://httpd.apache.org/docs/2.4/mod/mod_status.html
|
5
8
|
|
6
9
|
# Removes whitespace from 'Total Accesses' and 'Total kBytes' for output to graphite
|
7
10
|
|
8
|
-
require File.expand_path('
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
11
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
12
|
+
|
13
|
+
module Riemann
|
14
|
+
module Tools
|
15
|
+
class ApacheStatus
|
16
|
+
include Riemann::Tools
|
17
|
+
require 'net/http'
|
18
|
+
require 'uri'
|
19
|
+
|
20
|
+
opt :uri, 'Apache Server Status URI', default: 'http://localhost/server-status'
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@uri = "#{URI.parse(opts[:uri])}?auto"
|
24
|
+
# Sample Response with ExtendedStatus On
|
25
|
+
# Total Accesses: 20643
|
26
|
+
# Total kBytes: 36831
|
27
|
+
# CPULoad: .0180314
|
28
|
+
# Uptime: 43868
|
29
|
+
# ReqPerSec: .470571
|
30
|
+
# BytesPerSec: 859.737
|
31
|
+
# BytesPerReq: 1827.01
|
32
|
+
# BusyWorkers: 6
|
33
|
+
# IdleWorkers: 94
|
34
|
+
# Scoreboard: ___K_____K____________W_
|
35
|
+
|
36
|
+
@scoreboard_map = {
|
37
|
+
'_' => 'waiting',
|
38
|
+
'S' => 'starting',
|
39
|
+
'R' => 'reading',
|
40
|
+
'W' => 'sending',
|
41
|
+
'K' => 'keepalive',
|
42
|
+
'D' => 'dns',
|
43
|
+
'C' => 'closing',
|
44
|
+
'L' => 'logging',
|
45
|
+
'G' => 'graceful',
|
46
|
+
'I' => 'idle',
|
47
|
+
'.' => 'open',
|
48
|
+
}
|
49
|
+
end
|
35
50
|
|
51
|
+
def get_scoreboard_metrics(response)
|
52
|
+
results = Hash.new(0)
|
36
53
|
|
37
|
-
|
38
|
-
|
54
|
+
response.slice! 'Scoreboard: '
|
55
|
+
response.each_char do |char|
|
56
|
+
results[char] += 1
|
57
|
+
end
|
58
|
+
results.transform_keys { |k| @scoreboard_map[k] }
|
59
|
+
end
|
39
60
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
61
|
+
def report_metrics(metrics)
|
62
|
+
metrics.each do |k, v|
|
63
|
+
report(
|
64
|
+
service: "httpd #{k}",
|
65
|
+
metric: v.to_f,
|
66
|
+
state: 'ok',
|
67
|
+
tags: ['httpd'],
|
68
|
+
)
|
69
|
+
end
|
70
|
+
end
|
46
71
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
72
|
+
def connection
|
73
|
+
response = nil
|
74
|
+
begin
|
75
|
+
response = Net::HTTP.get(@uri)
|
76
|
+
rescue StandardError => e
|
77
|
+
report(
|
78
|
+
service: 'httpd health',
|
79
|
+
state: 'critical',
|
80
|
+
description: "Httpd connection error: #{e.class} - #{e.message}",
|
81
|
+
tags: ['httpd'],
|
82
|
+
)
|
83
|
+
else
|
84
|
+
report(
|
85
|
+
service: 'httpd health',
|
86
|
+
state: 'ok',
|
87
|
+
description: 'Httpd connection status ok',
|
88
|
+
tags: ['httpd'],
|
89
|
+
)
|
90
|
+
end
|
91
|
+
response
|
92
|
+
end
|
57
93
|
|
58
|
-
|
59
|
-
|
60
|
-
begin
|
61
|
-
response = Net::HTTP.get(@uri)
|
62
|
-
rescue => e
|
63
|
-
report(
|
64
|
-
:service => 'httpd health',
|
65
|
-
:state => 'critical',
|
66
|
-
:description => 'Httpd connection error: #{e.class} - #{e.message}',
|
67
|
-
:tags => ['httpd']
|
68
|
-
)
|
69
|
-
else
|
70
|
-
report(
|
71
|
-
:service => 'httpd health',
|
72
|
-
:state => 'ok',
|
73
|
-
:description => 'Httpd connection status ok',
|
74
|
-
:tags => ['httpd']
|
75
|
-
)
|
76
|
-
end
|
77
|
-
response
|
78
|
-
end
|
94
|
+
def tick
|
95
|
+
return if (response = connection).nil?
|
79
96
|
|
80
|
-
|
81
|
-
|
82
|
-
response.each_line do |line|
|
83
|
-
metrics = Hash.new
|
97
|
+
response.each_line do |line|
|
98
|
+
metrics = {}
|
84
99
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
100
|
+
if line =~ /Scoreboard/
|
101
|
+
metrics = get_scoreboard_metrics(line.strip)
|
102
|
+
else
|
103
|
+
key, value = line.strip.split(':')
|
104
|
+
metrics[key.gsub(/\s/, '')] = value
|
105
|
+
end
|
106
|
+
report_metrics(metrics)
|
90
107
|
end
|
91
|
-
report_metrics(metrics)
|
92
108
|
end
|
93
109
|
end
|
94
110
|
end
|
95
|
-
|
96
111
|
end
|
97
112
|
|
98
113
|
Riemann::Tools::ApacheStatus.run
|
data/bin/riemann-bench
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
2
5
|
|
3
6
|
# Connects to a server (first arg) and populates it with a constant stream of
|
4
7
|
# events for testing.
|
@@ -7,62 +10,65 @@ require 'rubygems'
|
|
7
10
|
require 'riemann/client'
|
8
11
|
require 'pp'
|
9
12
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
module Riemann
|
14
|
+
class Bench
|
15
|
+
attr_accessor :client, :hosts, :services, :states
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@hosts = [nil] + (0...10).map { |i| "host#{i}" }
|
19
|
+
@hosts = %w[a b c d e f g h i j]
|
20
|
+
@services = %w[test1 test2 test3 foo bar baz xyzzy attack cat treat]
|
21
|
+
@states = {}
|
22
|
+
@client = Riemann::Client.new(host: (ARGV.first || 'localhost'))
|
23
|
+
end
|
24
|
+
|
25
|
+
def evolve(state)
|
26
|
+
m = state[:metric] + (rand - 0.5) * 0.1
|
27
|
+
m = [[0, m].max, 1].min
|
19
28
|
|
20
|
-
|
21
|
-
|
22
|
-
|
29
|
+
s = case m
|
30
|
+
when 0...0.75
|
31
|
+
'ok'
|
32
|
+
when 0.75...0.9
|
33
|
+
'warning'
|
34
|
+
when 0.9..1.0
|
35
|
+
'critical'
|
36
|
+
end
|
23
37
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
38
|
+
{
|
39
|
+
metric: m,
|
40
|
+
state: s,
|
41
|
+
host: state[:host],
|
42
|
+
service: state[:service],
|
43
|
+
description: "at #{Time.now}",
|
44
|
+
}
|
31
45
|
end
|
32
46
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
:description => "at #{Time.now}"
|
39
|
-
}
|
40
|
-
end
|
41
|
-
|
42
|
-
def tick
|
43
|
-
# pp @states
|
44
|
-
hosts.product(services).each do |id|
|
45
|
-
client << (states[id] = evolve(states[id]))
|
47
|
+
def tick
|
48
|
+
# pp @states
|
49
|
+
hosts.product(services).each do |id|
|
50
|
+
client << (states[id] = evolve(states[id]))
|
51
|
+
end
|
46
52
|
end
|
47
|
-
end
|
48
53
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
+
def run
|
55
|
+
start
|
56
|
+
loop do
|
57
|
+
sleep 0.05
|
58
|
+
tick
|
59
|
+
end
|
54
60
|
end
|
55
|
-
end
|
56
61
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
def start
|
63
|
+
hosts.product(services).each do |host, service|
|
64
|
+
states[[host, service]] = {
|
65
|
+
metric: 0.5,
|
66
|
+
state: 'ok',
|
67
|
+
description: 'Starting up',
|
68
|
+
host: host,
|
69
|
+
service: service,
|
70
|
+
}
|
71
|
+
end
|
66
72
|
end
|
67
73
|
end
|
68
74
|
end
|
data/bin/riemann-cloudant
CHANGED
@@ -1,58 +1,63 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
4
5
|
|
5
|
-
|
6
|
+
# Gathers load balancer statistics from Cloudant.com (shared cluster) and submits them to Riemann.
|
6
7
|
|
7
|
-
|
8
|
-
include Riemann::Tools
|
9
|
-
require 'net/http'
|
10
|
-
require 'json'
|
8
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
11
9
|
|
12
|
-
|
13
|
-
|
10
|
+
module Riemann
|
11
|
+
module Tools
|
12
|
+
class Cloudant
|
13
|
+
include Riemann::Tools
|
14
|
+
require 'net/http'
|
15
|
+
require 'json'
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
json.each do |node|
|
18
|
-
return if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
|
17
|
+
opt :cloudant_username, 'Cloudant username', type: :string, required: true
|
18
|
+
opt :cloudant_password, 'Cloudant pasword', type: :string, required: true
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
def tick
|
21
|
+
json.each do |node|
|
22
|
+
break if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
:service => ns,
|
26
|
-
:state => (node['status'] == 'UP' ? 'ok' : 'critical'),
|
27
|
-
:tags => ['cloudant', cluster_name]
|
28
|
-
)
|
24
|
+
ns = "cloudant #{node['pxname']}"
|
25
|
+
cluster_name = node['tracked'].split('.')[0] # ie: meritage.cloudant.com
|
29
26
|
|
30
|
-
|
31
|
-
node.each do |property, metric|
|
32
|
-
unless ['pxname', 'svname', 'status', 'tracked'].include?(property)
|
27
|
+
# report health of each node.
|
33
28
|
report(
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:state => (node['status'] == 'UP' ? 'ok' : 'critical'),
|
38
|
-
:tags => ['cloudant', cluster_name]
|
29
|
+
service: ns,
|
30
|
+
state: (node['status'] == 'UP' ? 'ok' : 'critical'),
|
31
|
+
tags: ['cloudant', cluster_name],
|
39
32
|
)
|
33
|
+
|
34
|
+
# report property->metric of each node.
|
35
|
+
node.each do |property, metric|
|
36
|
+
next if %w[pxname svname status tracked].include?(property)
|
37
|
+
|
38
|
+
report(
|
39
|
+
host: node['tracked'],
|
40
|
+
service: "#{ns} #{property}",
|
41
|
+
metric: metric.to_f,
|
42
|
+
state: (node['status'] == 'UP' ? 'ok' : 'critical'),
|
43
|
+
tags: ['cloudant', cluster_name],
|
44
|
+
)
|
45
|
+
end
|
40
46
|
end
|
41
47
|
end
|
42
48
|
|
49
|
+
def json
|
50
|
+
http = Net::HTTP.new('cloudant.com', 443)
|
51
|
+
http.use_ssl = true
|
52
|
+
http.start do |h|
|
53
|
+
get = Net::HTTP::Get.new('/api/load_balancer')
|
54
|
+
get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
|
55
|
+
h.request get
|
56
|
+
end
|
57
|
+
JSON.parse(http.boby)
|
58
|
+
end
|
43
59
|
end
|
44
60
|
end
|
45
|
-
|
46
|
-
def get_json
|
47
|
-
http = Net::HTTP.new('cloudant.com', 443)
|
48
|
-
http.use_ssl = true
|
49
|
-
http.start do |h|
|
50
|
-
get = Net::HTTP::Get.new('/api/load_balancer')
|
51
|
-
get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
|
52
|
-
h.request get
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
61
|
end
|
57
62
|
|
58
63
|
Riemann::Tools::Cloudant.run
|
data/bin/riemann-consul
CHANGED
@@ -1,105 +1,112 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
2
5
|
|
3
6
|
# Reports service and node status to riemann
|
4
7
|
|
5
|
-
require File.expand_path('
|
8
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
6
9
|
require 'socket'
|
7
10
|
require 'net/http'
|
8
11
|
require 'uri'
|
9
12
|
require 'json'
|
10
13
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
14
|
+
module Riemann
|
15
|
+
module Tools
|
16
|
+
class ConsulHealth
|
17
|
+
include Riemann::Tools
|
18
|
+
|
19
|
+
opt :consul_host, 'Consul API Host (default to localhost)', default: 'localhost'
|
20
|
+
opt :consul_port, 'Consul API Host (default to 8500)', default: '8500'
|
21
|
+
opt :prefix, 'prefix to use for all service names when reporting', default: 'consul '
|
22
|
+
opt :minimum_services_per_node, 'minimum services per node (default: 0)', default: 0
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@hostname = opts[:consul_host]
|
26
|
+
@prefix = opts[:prefix]
|
27
|
+
@minimum_services_per_node = opts[:minimum_services_per_node]
|
28
|
+
@underlying_ip = IPSocket.getaddress(@hostname)
|
29
|
+
@consul_leader_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/status/leader")
|
30
|
+
@consul_services_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/services")
|
31
|
+
@consul_nodes_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/nodes")
|
32
|
+
@consul_health_url_prefix = "http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/health/service/"
|
33
|
+
|
34
|
+
@last_services_read = {}
|
35
|
+
end
|
33
36
|
|
34
|
-
|
37
|
+
def alert(hostname, service, state, metric, description)
|
38
|
+
opts = {
|
39
|
+
host: hostname,
|
40
|
+
service: service.to_s,
|
41
|
+
state: state.to_s,
|
42
|
+
metric: metric,
|
43
|
+
description: description,
|
44
|
+
}
|
35
45
|
|
36
|
-
|
37
|
-
|
38
|
-
:state => state.to_s,
|
39
|
-
:metric => metric,
|
40
|
-
:description => description }
|
46
|
+
report(opts)
|
47
|
+
end
|
41
48
|
|
42
|
-
|
43
|
-
|
49
|
+
def get(url)
|
50
|
+
Net::HTTP.get_response(url).body
|
51
|
+
end
|
44
52
|
|
45
|
-
|
46
|
-
|
47
|
-
|
53
|
+
def tick
|
54
|
+
leader = JSON.parse(get(@consul_leader_url))
|
55
|
+
leader_hostname = URI.parse("http://#{leader}").hostname
|
48
56
|
|
49
|
-
|
57
|
+
return unless leader_hostname == @underlying_ip
|
50
58
|
|
51
|
-
|
52
|
-
|
59
|
+
nodes = JSON.parse(get(@consul_nodes_url))
|
60
|
+
services = JSON.parse(get(@consul_services_url))
|
61
|
+
services_by_nodes = {}
|
53
62
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
63
|
+
nodes.each do |node|
|
64
|
+
node_name = node['Node']
|
65
|
+
services_by_nodes[node_name] = 0
|
66
|
+
end
|
58
67
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
68
|
+
# For every service
|
69
|
+
services.each do |service|
|
70
|
+
service_name = service[0]
|
71
|
+
health_url = URI.parse(@consul_health_url_prefix + service_name)
|
72
|
+
health_nodes = JSON.parse(get(health_url))
|
63
73
|
|
74
|
+
total_count = 0
|
75
|
+
ok_count = 0
|
64
76
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
77
|
+
health_nodes.each do |node|
|
78
|
+
hostname = node['Node']['Node']
|
79
|
+
ok = node['Checks'].all? { |check| check['Status'] == 'passing' }
|
80
|
+
alert(hostname, "#{@prefix}#{service_name}", ok ? :ok : :critical, ok ? 1 : 0, JSON.generate(node))
|
81
|
+
total_count += 1
|
82
|
+
ok_count += ok ? 1 : 0
|
70
83
|
|
71
|
-
|
72
|
-
|
84
|
+
last_services_by_nodes = services_by_nodes[hostname].to_i
|
85
|
+
services_by_nodes[hostname] = last_services_by_nodes + 1
|
86
|
+
end
|
73
87
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
88
|
+
unless @last_services_read[service_name].nil?
|
89
|
+
last_ok = @last_services_read[service_name]
|
90
|
+
if last_ok != ok_count
|
91
|
+
alert(
|
92
|
+
'total', "#{@prefix}#{service_name}-count", ok_count >= last_ok ? :ok : :critical, ok_count,
|
93
|
+
"Number of passing #{service_name} is: #{ok_count}/#{total_count}, Last time it was: #{last_ok}",
|
94
|
+
)
|
95
|
+
end
|
96
|
+
end
|
80
97
|
|
81
|
-
|
82
|
-
services_by_nodes[hostname] = last_services_by_nodes + 1
|
98
|
+
@last_services_read[service_name] = ok_count
|
83
99
|
end
|
84
100
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
101
|
+
# For every node
|
102
|
+
services_by_nodes.each do |node, count|
|
103
|
+
alert(
|
104
|
+
node, "#{@prefix}total-services", count >= @minimum_services_per_node ? :ok : :critical, count,
|
105
|
+
"#{count} services in the specified node",
|
106
|
+
)
|
90
107
|
end
|
91
|
-
|
92
|
-
@last_services_read[service_name] = okCount
|
93
|
-
|
94
|
-
end
|
95
|
-
|
96
|
-
# For every node
|
97
|
-
for node,count in services_by_nodes
|
98
|
-
alert(node, "#{@prefix}total-services", (count >= @minimum_services_per_node) ? :ok : :critical, count, "#{count} services in the specified node")
|
99
108
|
end
|
100
|
-
|
101
109
|
end
|
102
|
-
|
103
110
|
end
|
104
111
|
end
|
105
112
|
|