riemann-tools 0.2.13 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.docker/Dockerfile +7 -0
- data/.docker/publish.sh +35 -0
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +42 -0
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.rubocop.yml +32 -0
- data/.travis.yml +31 -0
- data/CHANGELOG.md +422 -0
- data/Gemfile +6 -0
- data/ISSUE_TEMPLATE.md +15 -0
- data/README.markdown +14 -15
- data/Rakefile +23 -0
- data/SECURITY.md +42 -0
- data/bin/riemann-apache-status +92 -77
- data/bin/riemann-bench +54 -48
- data/bin/riemann-cloudant +44 -39
- data/bin/riemann-consul +82 -75
- data/bin/riemann-dir-files-count +53 -46
- data/bin/riemann-dir-space +53 -46
- data/bin/riemann-diskstats +78 -74
- data/bin/riemann-fd +68 -47
- data/bin/riemann-freeswitch +108 -102
- data/bin/riemann-haproxy +46 -39
- data/bin/riemann-health +4 -335
- data/bin/riemann-kvminstance +18 -12
- data/bin/riemann-memcached +35 -28
- data/bin/riemann-net +4 -103
- data/bin/riemann-nginx-status +74 -66
- data/bin/riemann-ntp +4 -32
- data/bin/riemann-portcheck +40 -30
- data/bin/riemann-proc +96 -89
- data/bin/riemann-varnish +51 -44
- data/bin/riemann-zookeeper +38 -33
- data/lib/riemann/tools/health.rb +347 -0
- data/lib/riemann/tools/net.rb +104 -0
- data/lib/riemann/tools/ntp.rb +41 -0
- data/lib/riemann/tools/utils.rb +17 -0
- data/lib/riemann/tools/version.rb +7 -0
- data/lib/riemann/tools.rb +40 -33
- data/riemann-tools.gemspec +42 -0
- data/tools/riemann-aws/LICENSE +21 -0
- data/tools/riemann-aws/README.md +54 -0
- data/tools/riemann-aws/Rakefile +37 -0
- data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
- data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
- data/tools/riemann-aws/bin/riemann-aws-status +83 -0
- data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
- data/tools/riemann-aws/bin/riemann-s3-list +87 -0
- data/tools/riemann-aws/bin/riemann-s3-status +102 -0
- data/tools/riemann-chronos/LICENSE +21 -0
- data/tools/riemann-chronos/README.md +10 -0
- data/tools/riemann-chronos/Rakefile +37 -0
- data/tools/riemann-chronos/bin/riemann-chronos +161 -0
- data/tools/riemann-docker/LICENSE +21 -0
- data/tools/riemann-docker/README.md +10 -0
- data/tools/riemann-docker/Rakefile +36 -0
- data/tools/riemann-docker/bin/riemann-docker +206 -0
- data/tools/riemann-elasticsearch/LICENSE +21 -0
- data/tools/riemann-elasticsearch/README.md +10 -0
- data/tools/riemann-elasticsearch/Rakefile +37 -0
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
- data/tools/riemann-marathon/LICENSE +21 -0
- data/tools/riemann-marathon/README.md +10 -0
- data/tools/riemann-marathon/Rakefile +37 -0
- data/tools/riemann-marathon/bin/riemann-marathon +163 -0
- data/tools/riemann-mesos/LICENSE +21 -0
- data/tools/riemann-mesos/README.md +10 -0
- data/tools/riemann-mesos/Rakefile +37 -0
- data/tools/riemann-mesos/bin/riemann-mesos +146 -0
- data/tools/riemann-munin/LICENSE +21 -0
- data/tools/riemann-munin/README.md +10 -0
- data/tools/riemann-munin/Rakefile +36 -0
- data/tools/riemann-munin/bin/riemann-munin +43 -0
- data/tools/riemann-rabbitmq/LICENSE +21 -0
- data/tools/riemann-rabbitmq/README.md +10 -0
- data/tools/riemann-rabbitmq/Rakefile +37 -0
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
- data/tools/riemann-riak/LICENSE +21 -0
- data/tools/riemann-riak/README.md +10 -0
- data/tools/riemann-riak/Rakefile +36 -0
- data/tools/riemann-riak/bin/riemann-riak +323 -0
- data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
- data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
- data/tools/riemann-riak/riak_status/key_count.erl +13 -0
- data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
- data/tools/riemann-riak/riak_status/ringready.erl +9 -0
- metadata +195 -34
data/bin/riemann-apache-status
CHANGED
@@ -1,98 +1,113 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
2
5
|
|
3
6
|
# Collects Apache metrics and submits them to Riemann
|
4
7
|
# More information can be found at http://httpd.apache.org/docs/2.4/mod/mod_status.html
|
5
8
|
|
6
9
|
# Removes whitespace from 'Total Accesses' and 'Total kBytes' for output to graphite
|
7
10
|
|
8
|
-
require File.expand_path('
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
11
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
12
|
+
|
13
|
+
module Riemann
|
14
|
+
module Tools
|
15
|
+
class ApacheStatus
|
16
|
+
include Riemann::Tools
|
17
|
+
require 'net/http'
|
18
|
+
require 'uri'
|
19
|
+
|
20
|
+
opt :uri, 'Apache Server Status URI', default: 'http://localhost/server-status'
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@uri = "#{URI.parse(opts[:uri])}?auto"
|
24
|
+
# Sample Response with ExtendedStatus On
|
25
|
+
# Total Accesses: 20643
|
26
|
+
# Total kBytes: 36831
|
27
|
+
# CPULoad: .0180314
|
28
|
+
# Uptime: 43868
|
29
|
+
# ReqPerSec: .470571
|
30
|
+
# BytesPerSec: 859.737
|
31
|
+
# BytesPerReq: 1827.01
|
32
|
+
# BusyWorkers: 6
|
33
|
+
# IdleWorkers: 94
|
34
|
+
# Scoreboard: ___K_____K____________W_
|
35
|
+
|
36
|
+
@scoreboard_map = {
|
37
|
+
'_' => 'waiting',
|
38
|
+
'S' => 'starting',
|
39
|
+
'R' => 'reading',
|
40
|
+
'W' => 'sending',
|
41
|
+
'K' => 'keepalive',
|
42
|
+
'D' => 'dns',
|
43
|
+
'C' => 'closing',
|
44
|
+
'L' => 'logging',
|
45
|
+
'G' => 'graceful',
|
46
|
+
'I' => 'idle',
|
47
|
+
'.' => 'open',
|
48
|
+
}
|
49
|
+
end
|
35
50
|
|
51
|
+
def get_scoreboard_metrics(response)
|
52
|
+
results = Hash.new(0)
|
36
53
|
|
37
|
-
|
38
|
-
|
54
|
+
response.slice! 'Scoreboard: '
|
55
|
+
response.each_char do |char|
|
56
|
+
results[char] += 1
|
57
|
+
end
|
58
|
+
results.transform_keys { |k| @scoreboard_map[k] }
|
59
|
+
end
|
39
60
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
61
|
+
def report_metrics(metrics)
|
62
|
+
metrics.each do |k, v|
|
63
|
+
report(
|
64
|
+
service: "httpd #{k}",
|
65
|
+
metric: v.to_f,
|
66
|
+
state: 'ok',
|
67
|
+
tags: ['httpd'],
|
68
|
+
)
|
69
|
+
end
|
70
|
+
end
|
46
71
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
72
|
+
def connection
|
73
|
+
response = nil
|
74
|
+
begin
|
75
|
+
response = Net::HTTP.get(@uri)
|
76
|
+
rescue StandardError => e
|
77
|
+
report(
|
78
|
+
service: 'httpd health',
|
79
|
+
state: 'critical',
|
80
|
+
description: "Httpd connection error: #{e.class} - #{e.message}",
|
81
|
+
tags: ['httpd'],
|
82
|
+
)
|
83
|
+
else
|
84
|
+
report(
|
85
|
+
service: 'httpd health',
|
86
|
+
state: 'ok',
|
87
|
+
description: 'Httpd connection status ok',
|
88
|
+
tags: ['httpd'],
|
89
|
+
)
|
90
|
+
end
|
91
|
+
response
|
92
|
+
end
|
57
93
|
|
58
|
-
|
59
|
-
|
60
|
-
begin
|
61
|
-
response = Net::HTTP.get(@uri)
|
62
|
-
rescue => e
|
63
|
-
report(
|
64
|
-
:service => 'httpd health',
|
65
|
-
:state => 'critical',
|
66
|
-
:description => 'Httpd connection error: #{e.class} - #{e.message}',
|
67
|
-
:tags => ['httpd']
|
68
|
-
)
|
69
|
-
else
|
70
|
-
report(
|
71
|
-
:service => 'httpd health',
|
72
|
-
:state => 'ok',
|
73
|
-
:description => 'Httpd connection status ok',
|
74
|
-
:tags => ['httpd']
|
75
|
-
)
|
76
|
-
end
|
77
|
-
response
|
78
|
-
end
|
94
|
+
def tick
|
95
|
+
return if (response = connection).nil?
|
79
96
|
|
80
|
-
|
81
|
-
|
82
|
-
response.each_line do |line|
|
83
|
-
metrics = Hash.new
|
97
|
+
response.each_line do |line|
|
98
|
+
metrics = {}
|
84
99
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
100
|
+
if line =~ /Scoreboard/
|
101
|
+
metrics = get_scoreboard_metrics(line.strip)
|
102
|
+
else
|
103
|
+
key, value = line.strip.split(':')
|
104
|
+
metrics[key.gsub(/\s/, '')] = value
|
105
|
+
end
|
106
|
+
report_metrics(metrics)
|
90
107
|
end
|
91
|
-
report_metrics(metrics)
|
92
108
|
end
|
93
109
|
end
|
94
110
|
end
|
95
|
-
|
96
111
|
end
|
97
112
|
|
98
113
|
Riemann::Tools::ApacheStatus.run
|
data/bin/riemann-bench
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
2
5
|
|
3
6
|
# Connects to a server (first arg) and populates it with a constant stream of
|
4
7
|
# events for testing.
|
@@ -7,62 +10,65 @@ require 'rubygems'
|
|
7
10
|
require 'riemann/client'
|
8
11
|
require 'pp'
|
9
12
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
module Riemann
|
14
|
+
class Bench
|
15
|
+
attr_accessor :client, :hosts, :services, :states
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@hosts = [nil] + (0...10).map { |i| "host#{i}" }
|
19
|
+
@hosts = %w[a b c d e f g h i j]
|
20
|
+
@services = %w[test1 test2 test3 foo bar baz xyzzy attack cat treat]
|
21
|
+
@states = {}
|
22
|
+
@client = Riemann::Client.new(host: (ARGV.first || 'localhost'))
|
23
|
+
end
|
24
|
+
|
25
|
+
def evolve(state)
|
26
|
+
m = state[:metric] + (rand - 0.5) * 0.1
|
27
|
+
m = [[0, m].max, 1].min
|
19
28
|
|
20
|
-
|
21
|
-
|
22
|
-
|
29
|
+
s = case m
|
30
|
+
when 0...0.75
|
31
|
+
'ok'
|
32
|
+
when 0.75...0.9
|
33
|
+
'warning'
|
34
|
+
when 0.9..1.0
|
35
|
+
'critical'
|
36
|
+
end
|
23
37
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
38
|
+
{
|
39
|
+
metric: m,
|
40
|
+
state: s,
|
41
|
+
host: state[:host],
|
42
|
+
service: state[:service],
|
43
|
+
description: "at #{Time.now}",
|
44
|
+
}
|
31
45
|
end
|
32
46
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
:description => "at #{Time.now}"
|
39
|
-
}
|
40
|
-
end
|
41
|
-
|
42
|
-
def tick
|
43
|
-
# pp @states
|
44
|
-
hosts.product(services).each do |id|
|
45
|
-
client << (states[id] = evolve(states[id]))
|
47
|
+
def tick
|
48
|
+
# pp @states
|
49
|
+
hosts.product(services).each do |id|
|
50
|
+
client << (states[id] = evolve(states[id]))
|
51
|
+
end
|
46
52
|
end
|
47
|
-
end
|
48
53
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
+
def run
|
55
|
+
start
|
56
|
+
loop do
|
57
|
+
sleep 0.05
|
58
|
+
tick
|
59
|
+
end
|
54
60
|
end
|
55
|
-
end
|
56
61
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
def start
|
63
|
+
hosts.product(services).each do |host, service|
|
64
|
+
states[[host, service]] = {
|
65
|
+
metric: 0.5,
|
66
|
+
state: 'ok',
|
67
|
+
description: 'Starting up',
|
68
|
+
host: host,
|
69
|
+
service: service,
|
70
|
+
}
|
71
|
+
end
|
66
72
|
end
|
67
73
|
end
|
68
74
|
end
|
data/bin/riemann-cloudant
CHANGED
@@ -1,58 +1,63 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
4
5
|
|
5
|
-
|
6
|
+
# Gathers load balancer statistics from Cloudant.com (shared cluster) and submits them to Riemann.
|
6
7
|
|
7
|
-
|
8
|
-
include Riemann::Tools
|
9
|
-
require 'net/http'
|
10
|
-
require 'json'
|
8
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
11
9
|
|
12
|
-
|
13
|
-
|
10
|
+
module Riemann
|
11
|
+
module Tools
|
12
|
+
class Cloudant
|
13
|
+
include Riemann::Tools
|
14
|
+
require 'net/http'
|
15
|
+
require 'json'
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
json.each do |node|
|
18
|
-
return if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
|
17
|
+
opt :cloudant_username, 'Cloudant username', type: :string, required: true
|
18
|
+
opt :cloudant_password, 'Cloudant pasword', type: :string, required: true
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
def tick
|
21
|
+
json.each do |node|
|
22
|
+
break if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
:service => ns,
|
26
|
-
:state => (node['status'] == 'UP' ? 'ok' : 'critical'),
|
27
|
-
:tags => ['cloudant', cluster_name]
|
28
|
-
)
|
24
|
+
ns = "cloudant #{node['pxname']}"
|
25
|
+
cluster_name = node['tracked'].split('.')[0] # ie: meritage.cloudant.com
|
29
26
|
|
30
|
-
|
31
|
-
node.each do |property, metric|
|
32
|
-
unless ['pxname', 'svname', 'status', 'tracked'].include?(property)
|
27
|
+
# report health of each node.
|
33
28
|
report(
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:state => (node['status'] == 'UP' ? 'ok' : 'critical'),
|
38
|
-
:tags => ['cloudant', cluster_name]
|
29
|
+
service: ns,
|
30
|
+
state: (node['status'] == 'UP' ? 'ok' : 'critical'),
|
31
|
+
tags: ['cloudant', cluster_name],
|
39
32
|
)
|
33
|
+
|
34
|
+
# report property->metric of each node.
|
35
|
+
node.each do |property, metric|
|
36
|
+
next if %w[pxname svname status tracked].include?(property)
|
37
|
+
|
38
|
+
report(
|
39
|
+
host: node['tracked'],
|
40
|
+
service: "#{ns} #{property}",
|
41
|
+
metric: metric.to_f,
|
42
|
+
state: (node['status'] == 'UP' ? 'ok' : 'critical'),
|
43
|
+
tags: ['cloudant', cluster_name],
|
44
|
+
)
|
45
|
+
end
|
40
46
|
end
|
41
47
|
end
|
42
48
|
|
49
|
+
def json
|
50
|
+
http = Net::HTTP.new('cloudant.com', 443)
|
51
|
+
http.use_ssl = true
|
52
|
+
http.start do |h|
|
53
|
+
get = Net::HTTP::Get.new('/api/load_balancer')
|
54
|
+
get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
|
55
|
+
h.request get
|
56
|
+
end
|
57
|
+
JSON.parse(http.boby)
|
58
|
+
end
|
43
59
|
end
|
44
60
|
end
|
45
|
-
|
46
|
-
def get_json
|
47
|
-
http = Net::HTTP.new('cloudant.com', 443)
|
48
|
-
http.use_ssl = true
|
49
|
-
http.start do |h|
|
50
|
-
get = Net::HTTP::Get.new('/api/load_balancer')
|
51
|
-
get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
|
52
|
-
h.request get
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
61
|
end
|
57
62
|
|
58
63
|
Riemann::Tools::Cloudant.run
|
data/bin/riemann-consul
CHANGED
@@ -1,105 +1,112 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
2
5
|
|
3
6
|
# Reports service and node status to riemann
|
4
7
|
|
5
|
-
require File.expand_path('
|
8
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
6
9
|
require 'socket'
|
7
10
|
require 'net/http'
|
8
11
|
require 'uri'
|
9
12
|
require 'json'
|
10
13
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
14
|
+
module Riemann
|
15
|
+
module Tools
|
16
|
+
class ConsulHealth
|
17
|
+
include Riemann::Tools
|
18
|
+
|
19
|
+
opt :consul_host, 'Consul API Host (default to localhost)', default: 'localhost'
|
20
|
+
opt :consul_port, 'Consul API Host (default to 8500)', default: '8500'
|
21
|
+
opt :prefix, 'prefix to use for all service names when reporting', default: 'consul '
|
22
|
+
opt :minimum_services_per_node, 'minimum services per node (default: 0)', default: 0
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@hostname = opts[:consul_host]
|
26
|
+
@prefix = opts[:prefix]
|
27
|
+
@minimum_services_per_node = opts[:minimum_services_per_node]
|
28
|
+
@underlying_ip = IPSocket.getaddress(@hostname)
|
29
|
+
@consul_leader_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/status/leader")
|
30
|
+
@consul_services_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/services")
|
31
|
+
@consul_nodes_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/nodes")
|
32
|
+
@consul_health_url_prefix = "http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/health/service/"
|
33
|
+
|
34
|
+
@last_services_read = {}
|
35
|
+
end
|
33
36
|
|
34
|
-
|
37
|
+
def alert(hostname, service, state, metric, description)
|
38
|
+
opts = {
|
39
|
+
host: hostname,
|
40
|
+
service: service.to_s,
|
41
|
+
state: state.to_s,
|
42
|
+
metric: metric,
|
43
|
+
description: description,
|
44
|
+
}
|
35
45
|
|
36
|
-
|
37
|
-
|
38
|
-
:state => state.to_s,
|
39
|
-
:metric => metric,
|
40
|
-
:description => description }
|
46
|
+
report(opts)
|
47
|
+
end
|
41
48
|
|
42
|
-
|
43
|
-
|
49
|
+
def get(url)
|
50
|
+
Net::HTTP.get_response(url).body
|
51
|
+
end
|
44
52
|
|
45
|
-
|
46
|
-
|
47
|
-
|
53
|
+
def tick
|
54
|
+
leader = JSON.parse(get(@consul_leader_url))
|
55
|
+
leader_hostname = URI.parse("http://#{leader}").hostname
|
48
56
|
|
49
|
-
|
57
|
+
return unless leader_hostname == @underlying_ip
|
50
58
|
|
51
|
-
|
52
|
-
|
59
|
+
nodes = JSON.parse(get(@consul_nodes_url))
|
60
|
+
services = JSON.parse(get(@consul_services_url))
|
61
|
+
services_by_nodes = {}
|
53
62
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
63
|
+
nodes.each do |node|
|
64
|
+
node_name = node['Node']
|
65
|
+
services_by_nodes[node_name] = 0
|
66
|
+
end
|
58
67
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
68
|
+
# For every service
|
69
|
+
services.each do |service|
|
70
|
+
service_name = service[0]
|
71
|
+
health_url = URI.parse(@consul_health_url_prefix + service_name)
|
72
|
+
health_nodes = JSON.parse(get(health_url))
|
63
73
|
|
74
|
+
total_count = 0
|
75
|
+
ok_count = 0
|
64
76
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
77
|
+
health_nodes.each do |node|
|
78
|
+
hostname = node['Node']['Node']
|
79
|
+
ok = node['Checks'].all? { |check| check['Status'] == 'passing' }
|
80
|
+
alert(hostname, "#{@prefix}#{service_name}", ok ? :ok : :critical, ok ? 1 : 0, JSON.generate(node))
|
81
|
+
total_count += 1
|
82
|
+
ok_count += ok ? 1 : 0
|
70
83
|
|
71
|
-
|
72
|
-
|
84
|
+
last_services_by_nodes = services_by_nodes[hostname].to_i
|
85
|
+
services_by_nodes[hostname] = last_services_by_nodes + 1
|
86
|
+
end
|
73
87
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
88
|
+
unless @last_services_read[service_name].nil?
|
89
|
+
last_ok = @last_services_read[service_name]
|
90
|
+
if last_ok != ok_count
|
91
|
+
alert(
|
92
|
+
'total', "#{@prefix}#{service_name}-count", ok_count >= last_ok ? :ok : :critical, ok_count,
|
93
|
+
"Number of passing #{service_name} is: #{ok_count}/#{total_count}, Last time it was: #{last_ok}",
|
94
|
+
)
|
95
|
+
end
|
96
|
+
end
|
80
97
|
|
81
|
-
|
82
|
-
services_by_nodes[hostname] = last_services_by_nodes + 1
|
98
|
+
@last_services_read[service_name] = ok_count
|
83
99
|
end
|
84
100
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
101
|
+
# For every node
|
102
|
+
services_by_nodes.each do |node, count|
|
103
|
+
alert(
|
104
|
+
node, "#{@prefix}total-services", count >= @minimum_services_per_node ? :ok : :critical, count,
|
105
|
+
"#{count} services in the specified node",
|
106
|
+
)
|
90
107
|
end
|
91
|
-
|
92
|
-
@last_services_read[service_name] = okCount
|
93
|
-
|
94
|
-
end
|
95
|
-
|
96
|
-
# For every node
|
97
|
-
for node,count in services_by_nodes
|
98
|
-
alert(node, "#{@prefix}total-services", (count >= @minimum_services_per_node) ? :ok : :critical, count, "#{count} services in the specified node")
|
99
108
|
end
|
100
|
-
|
101
109
|
end
|
102
|
-
|
103
110
|
end
|
104
111
|
end
|
105
112
|
|