riemann-tools 1.0.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +15 -0
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +40 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +62 -2
- data/README.markdown +8 -24
- data/Rakefile +14 -5
- data/SECURITY.md +42 -0
- data/bin/riemann-apache-status +3 -94
- data/bin/riemann-bench +4 -67
- data/bin/riemann-cloudant +3 -54
- data/bin/riemann-consul +3 -102
- data/bin/riemann-dir-files-count +3 -51
- data/bin/riemann-dir-space +3 -51
- data/bin/riemann-diskstats +3 -91
- data/bin/riemann-fd +4 -63
- data/bin/riemann-freeswitch +4 -116
- data/bin/riemann-haproxy +3 -54
- data/bin/riemann-health +3 -344
- data/bin/riemann-kvminstance +4 -19
- data/bin/riemann-memcached +3 -33
- data/bin/riemann-net +3 -105
- data/bin/riemann-nginx-status +3 -80
- data/bin/riemann-ntp +3 -34
- data/bin/riemann-portcheck +3 -37
- data/bin/riemann-proc +3 -104
- data/bin/riemann-varnish +3 -50
- data/bin/riemann-wrapper +75 -0
- data/bin/riemann-zookeeper +3 -37
- data/lib/riemann/tools/apache_status.rb +107 -0
- data/lib/riemann/tools/bench.rb +72 -0
- data/lib/riemann/tools/cloudant.rb +57 -0
- data/lib/riemann/tools/consul_health.rb +107 -0
- data/lib/riemann/tools/dir_files_count.rb +56 -0
- data/lib/riemann/tools/dir_space.rb +56 -0
- data/lib/riemann/tools/diskstats.rb +94 -0
- data/lib/riemann/tools/fd.rb +81 -0
- data/lib/riemann/tools/freeswitch.rb +119 -0
- data/lib/riemann/tools/haproxy.rb +59 -0
- data/lib/riemann/tools/health.rb +478 -0
- data/lib/riemann/tools/kvm.rb +23 -0
- data/lib/riemann/tools/memcached.rb +38 -0
- data/lib/riemann/tools/net.rb +105 -0
- data/lib/riemann/tools/nginx_status.rb +86 -0
- data/lib/riemann/tools/ntp.rb +42 -0
- data/lib/riemann/tools/portcheck.rb +45 -0
- data/lib/riemann/tools/proc.rb +109 -0
- data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
- data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
- data/lib/riemann/tools/varnish.rb +55 -0
- data/lib/riemann/tools/version.rb +1 -1
- data/lib/riemann/tools/zookeeper.rb +40 -0
- data/lib/riemann/tools.rb +31 -52
- data/riemann-tools.gemspec +8 -2
- data/tools/riemann-aws/{Rakefile.rb → Rakefile} +8 -9
- data/tools/riemann-aws/bin/riemann-aws-billing +4 -83
- data/tools/riemann-aws/bin/riemann-aws-rds-status +4 -50
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +4 -40
- data/tools/riemann-aws/bin/riemann-aws-status +4 -67
- data/tools/riemann-aws/bin/riemann-elb-metrics +4 -163
- data/tools/riemann-aws/bin/riemann-s3-list +4 -78
- data/tools/riemann-aws/bin/riemann-s3-status +4 -95
- data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
- data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
- data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +8 -9
- data/tools/riemann-chronos/bin/riemann-chronos +3 -139
- data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
- data/tools/riemann-docker/{Rakefile.rb → Rakefile} +7 -8
- data/tools/riemann-docker/bin/riemann-docker +4 -213
- data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
- data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +8 -9
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +3 -161
- data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
- data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +8 -9
- data/tools/riemann-marathon/bin/riemann-marathon +3 -142
- data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
- data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +8 -9
- data/tools/riemann-mesos/bin/riemann-mesos +3 -126
- data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
- data/tools/riemann-munin/{Rakefile.rb → Rakefile} +7 -8
- data/tools/riemann-munin/bin/riemann-munin +3 -32
- data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
- data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +8 -9
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +3 -264
- data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
- data/tools/riemann-riak/{Rakefile.rb → Rakefile} +7 -8
- data/tools/riemann-riak/bin/riemann-riak +3 -326
- data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
- data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
- data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
- metadata +112 -16
- data/.travis.yml +0 -31
- data/tools/riemann-riak/riak_status/key_count.erl +0 -13
- data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
- data/tools/riemann-riak/riak_status/ringready.erl +0 -9
data/bin/riemann-consul
CHANGED
@@ -1,107 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
require
|
7
|
-
require 'socket'
|
8
|
-
require 'net/http'
|
9
|
-
require 'uri'
|
10
|
-
require 'json'
|
11
|
-
|
12
|
-
class Riemann::Tools::ConsulHealth
|
13
|
-
include Riemann::Tools
|
14
|
-
|
15
|
-
opt :consul_host, "Consul API Host (default to localhost)", :default => "localhost"
|
16
|
-
opt :consul_port, "Consul API Host (default to 8500)", :default => "8500"
|
17
|
-
opt :prefix, "prefix to use for all service names when reporting", :default => "consul "
|
18
|
-
opt :minimum_services_per_node, "minimum services per node (default: 0)", :default => 0
|
19
|
-
|
20
|
-
def initialize
|
21
|
-
|
22
|
-
@hostname = opts[:consul_host]
|
23
|
-
@prefix = opts[:prefix]
|
24
|
-
@minimum_services_per_node = opts[:minimum_services_per_node]
|
25
|
-
@underlying_ip = IPSocket.getaddress(@hostname)
|
26
|
-
@consul_leader_url = URI.parse("http://" + opts[:consul_host] + ":" + opts[:consul_port] + "/v1/status/leader")
|
27
|
-
@consul_services_url = URI.parse("http://" + opts[:consul_host] + ":" + opts[:consul_port] + "/v1/catalog/services")
|
28
|
-
@consul_nodes_url = URI.parse("http://" + opts[:consul_host] + ":" + opts[:consul_port] + "/v1/catalog/nodes")
|
29
|
-
@consul_health_url_prefix = "http://" + opts[:consul_host] + ":" + opts[:consul_port] + "/v1/health/service/"
|
30
|
-
|
31
|
-
@last_services_read = Hash.new
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
def alert(hostname, service, state, metric, description)
|
36
|
-
|
37
|
-
opts = { :host => hostname,
|
38
|
-
:service => service.to_s,
|
39
|
-
:state => state.to_s,
|
40
|
-
:metric => metric,
|
41
|
-
:description => description }
|
42
|
-
|
43
|
-
report(opts)
|
44
|
-
end
|
45
|
-
|
46
|
-
def get(url)
|
47
|
-
Net::HTTP.get_response(url).body
|
48
|
-
end
|
49
|
-
|
50
|
-
def tick
|
51
|
-
|
52
|
-
leader = JSON.parse(get(@consul_leader_url))
|
53
|
-
leader_hostname = URI.parse("http://" + leader).hostname
|
54
|
-
|
55
|
-
if (leader_hostname == @underlying_ip)
|
56
|
-
nodes = JSON.parse(get(@consul_nodes_url))
|
57
|
-
services = JSON.parse(get(@consul_services_url))
|
58
|
-
services_by_nodes = Hash.new
|
59
|
-
|
60
|
-
for node in nodes
|
61
|
-
node_name = node["Node"]
|
62
|
-
services_by_nodes[node_name] = 0
|
63
|
-
end
|
64
|
-
|
65
|
-
|
66
|
-
# For every service
|
67
|
-
for service in services
|
68
|
-
service_name = service[0]
|
69
|
-
health_url = URI.parse(@consul_health_url_prefix + service_name)
|
70
|
-
health_nodes = JSON.parse(get(health_url))
|
71
|
-
|
72
|
-
totalCount = 0
|
73
|
-
okCount = 0
|
74
|
-
|
75
|
-
for node in health_nodes
|
76
|
-
hostname = node["Node"]["Node"]
|
77
|
-
ok = node["Checks"].all? {|check| check["Status"] == "passing"}
|
78
|
-
alert(hostname, "#{@prefix}#{service_name}", ok ? :ok : :critical, ok ? 1 : 0, JSON.generate(node))
|
79
|
-
totalCount += 1
|
80
|
-
okCount += ok ? 1 : 0
|
81
|
-
|
82
|
-
last_services_by_nodes = services_by_nodes[hostname].to_i
|
83
|
-
services_by_nodes[hostname] = last_services_by_nodes + 1
|
84
|
-
end
|
85
|
-
|
86
|
-
if (@last_services_read[service_name] != nil)
|
87
|
-
lastOk = @last_services_read[service_name]
|
88
|
-
if (lastOk != okCount)
|
89
|
-
alert("total", "#{@prefix}#{service_name}-count", okCount >= lastOk ? :ok : :critical, okCount, "Number of passing #{service_name} is: #{okCount}/#{totalCount}, Last time it was: #{lastOk}")
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
@last_services_read[service_name] = okCount
|
94
|
-
|
95
|
-
end
|
96
|
-
|
97
|
-
# For every node
|
98
|
-
for node,count in services_by_nodes
|
99
|
-
alert(node, "#{@prefix}total-services", (count >= @minimum_services_per_node) ? :ok : :critical, count, "#{count} services in the specified node")
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
|
104
|
-
end
|
105
|
-
end
|
6
|
+
require 'riemann/tools/consul_health'
|
106
7
|
|
107
8
|
Riemann::Tools::ConsulHealth.run
|
data/bin/riemann-dir-files-count
CHANGED
@@ -1,56 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
require
|
7
|
-
|
8
|
-
class Riemann::Tools::DirFilesCount
|
9
|
-
include Riemann::Tools
|
10
|
-
|
11
|
-
opt :directory, "", :default => '/var/log'
|
12
|
-
opt :service_prefix, "The first part of the service name, before the directory path", :default => "dir-files-count"
|
13
|
-
opt :warning, "Dir files number warning threshold", :type => Integer
|
14
|
-
opt :critical, "Dir files number critical threshold", :type => Integer
|
15
|
-
opt :alert_on_missing, "Send a critical metric if the directory is missing?", :default => true
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@dir = opts.fetch(:directory)
|
19
|
-
@service_prefix = opts.fetch(:service_prefix)
|
20
|
-
@warning = opts.fetch(:warning, nil)
|
21
|
-
@critical = opts.fetch(:critical, nil)
|
22
|
-
@alert_on_missing = opts.fetch(:alert_on_missing)
|
23
|
-
end
|
24
|
-
|
25
|
-
def tick
|
26
|
-
if Dir.exists?(@dir)
|
27
|
-
metric = Dir.entries(@dir).size - 2
|
28
|
-
report(
|
29
|
-
:service => "#{@service_prefix} #{@dir}",
|
30
|
-
:metric => metric,
|
31
|
-
:state => state(metric),
|
32
|
-
:tags => ['dir_files_count']
|
33
|
-
)
|
34
|
-
elsif @alert_on_missing
|
35
|
-
report(
|
36
|
-
:service => "#{@service_prefix} #{@dir} missing",
|
37
|
-
:description => "#{@service_prefix} #{@dir} does not exist",
|
38
|
-
:metric => metric,
|
39
|
-
:state => 'critical',
|
40
|
-
:tags => ['dir_files_count']
|
41
|
-
)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
def state(metric)
|
46
|
-
if @critical && metric > @critical
|
47
|
-
'critical'
|
48
|
-
elsif @warning && metric > @warning
|
49
|
-
'warning'
|
50
|
-
else
|
51
|
-
'ok'
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
6
|
+
require 'riemann/tools/dir_files_count'
|
55
7
|
|
56
8
|
Riemann::Tools::DirFilesCount.run
|
data/bin/riemann-dir-space
CHANGED
@@ -1,56 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
require
|
7
|
-
|
8
|
-
class Riemann::Tools::DirSpace
|
9
|
-
include Riemann::Tools
|
10
|
-
|
11
|
-
opt :directory, "", :default => '/var/log'
|
12
|
-
opt :service_prefix, "The first part of the service name, before the directory path", :default => "dir-space"
|
13
|
-
opt :warning, "Dir space warning threshold (in bytes)", :type => Integer
|
14
|
-
opt :critical, "Dir space critical threshold (in bytes)", :type => Integer
|
15
|
-
opt :alert_on_missing, "Send a critical metric if the directory is missing?", :default => true
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@dir = opts.fetch(:directory)
|
19
|
-
@service_prefix = opts.fetch(:service_prefix)
|
20
|
-
@warning = opts.fetch(:warning, nil)
|
21
|
-
@critical = opts.fetch(:critical, nil)
|
22
|
-
@alert_on_missing = opts.fetch(:alert_on_missing)
|
23
|
-
end
|
24
|
-
|
25
|
-
def tick
|
26
|
-
if Dir.exists?(@dir)
|
27
|
-
metric = `du '#{@dir}'`.lines.to_a.last.split("\t")[0].to_i
|
28
|
-
report(
|
29
|
-
:service => "#{@service_prefix} #{@dir}",
|
30
|
-
:metric => metric,
|
31
|
-
:state => state(metric),
|
32
|
-
:tags => ['dir_space']
|
33
|
-
)
|
34
|
-
elsif @alert_on_missing
|
35
|
-
report(
|
36
|
-
:service => "#{@service_prefix} #{@dir} missing",
|
37
|
-
:description => "#{@service_prefix} #{@dir} does not exist",
|
38
|
-
:metric => metric,
|
39
|
-
:state => 'critical',
|
40
|
-
:tags => ['dir_space']
|
41
|
-
)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
def state(metric)
|
46
|
-
if @critical && metric > @critical
|
47
|
-
'critical'
|
48
|
-
elsif @warning && metric > @warning
|
49
|
-
'warning'
|
50
|
-
else
|
51
|
-
'ok'
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
6
|
+
require 'riemann/tools/dir_space'
|
55
7
|
|
56
8
|
Riemann::Tools::DirSpace.run
|
data/bin/riemann-diskstats
CHANGED
@@ -1,96 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
#
|
4
|
-
require 'rubygems'
|
5
|
-
require 'riemann/tools'
|
2
|
+
# frozen_string_literal: true
|
6
3
|
|
7
|
-
|
8
|
-
include Riemann::Tools
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
9
5
|
|
10
|
-
|
11
|
-
opt :ignore_devices, "Devices to ignore", :type => :strings, :default =>nil
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@old_state = nil
|
15
|
-
end
|
16
|
-
|
17
|
-
def state
|
18
|
-
f = File.read('/proc/diskstats')
|
19
|
-
state = f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |s, line|
|
20
|
-
if line =~ /^(?:\s+\d+){2}\s+([\w\d\-]+) (.*)$/
|
21
|
-
dev = $1
|
22
|
-
|
23
|
-
['reads reqs',
|
24
|
-
'reads merged',
|
25
|
-
'reads sector',
|
26
|
-
'reads time',
|
27
|
-
'writes reqs',
|
28
|
-
'writes merged',
|
29
|
-
'writes sector',
|
30
|
-
'writes time',
|
31
|
-
'io reqs',
|
32
|
-
'io time',
|
33
|
-
'io weighted'
|
34
|
-
].map do |service|
|
35
|
-
"#{dev} #{service}"
|
36
|
-
end.zip(
|
37
|
-
$2.split(/\s+/).map { |str| str.to_i }
|
38
|
-
).each do |service, value|
|
39
|
-
s[service] = value
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
s
|
44
|
-
end
|
45
|
-
|
46
|
-
# Filter interfaces
|
47
|
-
if is = opts[:devices]
|
48
|
-
state = state.select do |service, value|
|
49
|
-
is.include? service.split(' ').first
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
if ign = opts[:ignore_devices]
|
54
|
-
state = state.reject do |service, value|
|
55
|
-
ign.include? service.split(' ').first
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
state
|
60
|
-
end
|
61
|
-
|
62
|
-
def tick
|
63
|
-
state = self.state
|
64
|
-
|
65
|
-
if @old_state
|
66
|
-
state.each do |service, metric|
|
67
|
-
|
68
|
-
if service =~ /io reqs$/
|
69
|
-
report(
|
70
|
-
:service => "diskstats " + service,
|
71
|
-
:metric => metric,
|
72
|
-
:state => "ok"
|
73
|
-
)
|
74
|
-
else
|
75
|
-
delta = metric - @old_state[service]
|
76
|
-
|
77
|
-
report(
|
78
|
-
:service => "diskstats " + service,
|
79
|
-
:metric => (delta.to_f / opts[:interval]),
|
80
|
-
:state => "ok"
|
81
|
-
)
|
82
|
-
end
|
83
|
-
|
84
|
-
if service =~ /io time$/
|
85
|
-
report(:service => "diskstats " + service.gsub(/time/, 'util'),
|
86
|
-
:metric => (delta.to_f / (opts[:interval]*1000)),
|
87
|
-
:state => "ok")
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
@old_state = state
|
93
|
-
end
|
94
|
-
end
|
6
|
+
require 'riemann/tools/diskstats'
|
95
7
|
|
96
8
|
Riemann::Tools::Diskstats.run
|
data/bin/riemann-fd
CHANGED
@@ -1,67 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
5
|
-
# By default reports the total system fd usage, can also report usage of individual processes
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
6
5
|
|
7
|
-
require
|
6
|
+
require 'riemann/tools/fd'
|
8
7
|
|
9
|
-
|
10
|
-
include Riemann::Tools
|
11
|
-
|
12
|
-
opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
|
13
|
-
opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
|
14
|
-
opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
|
15
|
-
opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
|
16
|
-
opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
|
17
|
-
|
18
|
-
def initialize
|
19
|
-
@limits = {
|
20
|
-
:fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
|
21
|
-
:process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
|
22
|
-
}
|
23
|
-
ostype = `uname -s`.chomp.downcase
|
24
|
-
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
25
|
-
@fd = method :linux_fd
|
26
|
-
end
|
27
|
-
|
28
|
-
def alert(service, state, metric, description)
|
29
|
-
report(
|
30
|
-
:service => service.to_s,
|
31
|
-
:state => state.to_s,
|
32
|
-
:metric => metric.to_f,
|
33
|
-
:description => description
|
34
|
-
)
|
35
|
-
end
|
36
|
-
|
37
|
-
def linux_fd
|
38
|
-
sys_used = Integer(`lsof | wc -l`)
|
39
|
-
if sys_used > @limits[:fd][:critical]
|
40
|
-
alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
|
41
|
-
elsif sys_used > @limits[:fd][:warning]
|
42
|
-
alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
|
43
|
-
else
|
44
|
-
alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
|
45
|
-
end
|
46
|
-
|
47
|
-
unless opts[:processes].nil?
|
48
|
-
opts[:processes].each do |process|
|
49
|
-
used = Integer(`lsof -p #{process} | wc -l`)
|
50
|
-
name, pid = `ps axo comm,pid | grep -w #{process}`.split
|
51
|
-
if used > @limits[:process][:critical]
|
52
|
-
alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
|
53
|
-
elsif used > @limits[:process][:warning]
|
54
|
-
alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
|
55
|
-
else
|
56
|
-
alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def tick
|
63
|
-
@fd.call
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
Riemann::Tools::Health.run
|
8
|
+
Riemann::Tools::Fd.run
|
data/bin/riemann-freeswitch
CHANGED
@@ -1,120 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
|
7
|
-
include Riemann::Tools
|
6
|
+
require 'riemann/tools/freeswitch'
|
8
7
|
|
9
|
-
|
10
|
-
opt :calls_critical, "Calls critical threshold", :default => 300
|
11
|
-
opt :pid_file, "FreeSWITCH daemon pidfile", :type => String, :default => "/var/run/freeswitch/freeswitch.pid"
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@limits = {
|
15
|
-
:calls => {:critical => opts[:calls_critical], :warning => opts[:calls_warning]}
|
16
|
-
}
|
17
|
-
end
|
18
|
-
|
19
|
-
def dead_proc?(pid)
|
20
|
-
begin
|
21
|
-
Process.getpgid(pid)
|
22
|
-
false
|
23
|
-
rescue Errno::ESRCH
|
24
|
-
true
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def alert(service, state, metric, description)
|
29
|
-
report(
|
30
|
-
:service => service.to_s,
|
31
|
-
:state => state.to_s,
|
32
|
-
:metric => metric.to_f,
|
33
|
-
:description => description
|
34
|
-
)
|
35
|
-
end
|
36
|
-
|
37
|
-
def exec_with_timeout(cmd, timeout)
|
38
|
-
pid = Process.spawn(cmd, {[:err,:out] => :close, :pgroup => true})
|
39
|
-
begin
|
40
|
-
Timeout.timeout(timeout) do
|
41
|
-
Process.waitpid(pid, 0)
|
42
|
-
$?.exitstatus == 0
|
43
|
-
end
|
44
|
-
rescue Timeout::Error
|
45
|
-
Process.kill(15, -Process.getpgid(pid))
|
46
|
-
puts "Killed pid: #{pid}"
|
47
|
-
false
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def tick
|
52
|
-
# Determine how many current calls I have according to FreeSWITCH
|
53
|
-
fs_calls = %x[fs_cli -x "show calls count"| grep -Po '^\\d+'].to_i
|
54
|
-
|
55
|
-
# Determine how many current channels I have according to FreeSWITCH
|
56
|
-
fs_channels = %x[fs_cli -x "show channels count"| grep -Po '^\\d+'].to_i
|
57
|
-
|
58
|
-
# Determine how many conferences I have according to FreeSWITCH
|
59
|
-
fs_conferences = %x[fs_cli -x "conference list"| grep -Pco '^Conference'].to_i
|
60
|
-
|
61
|
-
# Try to read pidfile. If it fails use Devil's dummy PID
|
62
|
-
begin
|
63
|
-
fs_pid = File.read(opts[:pid_file]).to_i
|
64
|
-
rescue
|
65
|
-
puts "Couldn't read pidfile: #{opts[:pid_file]}"
|
66
|
-
fs_pid = -666
|
67
|
-
end
|
68
|
-
|
69
|
-
fs_threads = fs_pid > 0 ? %x[ps huH p #{fs_pid} | wc -l].to_i : 0
|
70
|
-
|
71
|
-
# Submit calls to riemann
|
72
|
-
if fs_calls > @limits[:calls][:critical]
|
73
|
-
alert "FreeSWITCH current calls", :critical, fs_calls, "Number of calls are #{fs_calls}"
|
74
|
-
elsif fs_calls > @limits[:calls][:warning]
|
75
|
-
alert "FreeSWITCH current calls", :warning, fs_calls, "Number of calls are #{fs_calls}"
|
76
|
-
else
|
77
|
-
alert "FreeSWITCH current calls", :ok, fs_calls, "Number of calls are #{fs_calls}"
|
78
|
-
end
|
79
|
-
|
80
|
-
# Submit channels to riemann
|
81
|
-
if fs_channels > @limits[:calls][:critical]
|
82
|
-
alert "FreeSWITCH current channels", :critical, fs_channels, "Number of channels are #{fs_channels}"
|
83
|
-
elsif fs_channels > @limits[:calls][:warning]
|
84
|
-
alert "FreeSWITCH current channels", :warning, fs_channels, "Number of channels are #{fs_channels}"
|
85
|
-
else
|
86
|
-
alert "FreeSWITCH current channels", :ok, fs_channels, "Number of channels are #{fs_channels}"
|
87
|
-
end
|
88
|
-
|
89
|
-
# Submit conferences to riemann
|
90
|
-
if fs_conferences > @limits[:calls][:critical]
|
91
|
-
alert "FreeSWITCH current conferences", :critical, fs_conferences, "Number of conferences are #{fs_conferences}"
|
92
|
-
elsif fs_conferences > @limits[:calls][:warning]
|
93
|
-
alert "FreeSWITCH current conferences", :warning, fs_conferences, "Number of conferences are #{fs_conferences}"
|
94
|
-
else
|
95
|
-
alert "FreeSWITCH current conferences", :ok, fs_conferences, "Number of conferences are #{fs_conferences}"
|
96
|
-
end
|
97
|
-
|
98
|
-
# Submit threads to riemann
|
99
|
-
if fs_threads
|
100
|
-
alert "FreeSWITCH current threads", :ok, fs_threads, "Number of threads are #{fs_threads}"
|
101
|
-
end
|
102
|
-
|
103
|
-
# Submit status to riemann
|
104
|
-
if dead_proc?(fs_pid)
|
105
|
-
alert "FreeSWITCH status", :critical, -1, "FreeSWITCH service status: not running"
|
106
|
-
else
|
107
|
-
alert "FreeSWITCH status", :ok, nil, "FreeSWITCH service status: running"
|
108
|
-
end
|
109
|
-
|
110
|
-
# Submit CLI status to riemann using timeout in case it's unresponsive
|
111
|
-
if exec_with_timeout("fs_cli -x status", 2)
|
112
|
-
alert "FreeSWITCH CLI status", :ok, nil, "FreeSWITCH CLI status: responsive"
|
113
|
-
else
|
114
|
-
alert "FreeSWITCH CLI status", :critical, -1, "FreeSWITCH CLI status: not responding"
|
115
|
-
end
|
116
|
-
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
Riemann::Tools::FreeSWITCH.run
|
8
|
+
Riemann::Tools::Freeswitch.run
|
data/bin/riemann-haproxy
CHANGED
@@ -1,59 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
5
|
|
6
|
-
require
|
7
|
-
|
8
|
-
class Riemann::Tools::Haproxy
|
9
|
-
include Riemann::Tools
|
10
|
-
require 'net/http'
|
11
|
-
require 'csv'
|
12
|
-
|
13
|
-
opt :stats_url, "Full url to haproxy stats (eg: https://user:password@host.com:9999/stats)", :required => true, :type => :string
|
14
|
-
|
15
|
-
def initialize
|
16
|
-
@uri = URI(opts[:stats_url]+';csv')
|
17
|
-
end
|
18
|
-
|
19
|
-
def tick
|
20
|
-
csv = CSV.parse(get_csv.body.split("# ")[1], { :headers => true })
|
21
|
-
csv.each do |row|
|
22
|
-
row = row.to_hash
|
23
|
-
ns = "haproxy #{row['pxname']} #{row['svname']}"
|
24
|
-
row.each do |property, metric|
|
25
|
-
unless (property.nil? || property == 'pxname' || property == 'svname')
|
26
|
-
report(
|
27
|
-
:host => @uri.host,
|
28
|
-
:service => "#{ns} #{property}",
|
29
|
-
:metric => metric.to_f,
|
30
|
-
:tags => ['haproxy']
|
31
|
-
)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
report(
|
36
|
-
:host => @uri.host,
|
37
|
-
:service => "#{ns} state",
|
38
|
-
:state => (['UP', 'OPEN'].include?(row['status']) ? 'ok' : 'critical'),
|
39
|
-
:tags => ['haproxy']
|
40
|
-
)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def get_csv
|
45
|
-
http = Net::HTTP.new(@uri.host, @uri.port)
|
46
|
-
http.use_ssl = true if @uri.scheme == 'https'
|
47
|
-
http.start do |h|
|
48
|
-
get = Net::HTTP::Get.new(@uri.request_uri)
|
49
|
-
unless @uri.userinfo.nil?
|
50
|
-
userinfo = @uri.userinfo.split(":")
|
51
|
-
get.basic_auth userinfo[0], userinfo[1]
|
52
|
-
end
|
53
|
-
h.request get
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
end
|
6
|
+
require 'riemann/tools/haproxy'
|
58
7
|
|
59
8
|
Riemann::Tools::Haproxy.run
|