riemann-tools 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +13 -0
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.rubocop.yml +32 -0
- data/CHANGELOG.md +31 -2
- data/README.markdown +8 -24
- data/Rakefile +4 -2
- data/SECURITY.md +42 -0
- data/bin/riemann-apache-status +92 -78
- data/bin/riemann-bench +54 -49
- data/bin/riemann-cloudant +44 -40
- data/bin/riemann-consul +82 -76
- data/bin/riemann-dir-files-count +53 -47
- data/bin/riemann-dir-space +53 -47
- data/bin/riemann-diskstats +78 -75
- data/bin/riemann-fd +68 -48
- data/bin/riemann-freeswitch +108 -103
- data/bin/riemann-haproxy +46 -40
- data/bin/riemann-health +4 -343
- data/bin/riemann-kvminstance +18 -13
- data/bin/riemann-memcached +35 -29
- data/bin/riemann-net +4 -104
- data/bin/riemann-nginx-status +74 -67
- data/bin/riemann-ntp +4 -33
- data/bin/riemann-portcheck +40 -31
- data/bin/riemann-proc +96 -90
- data/bin/riemann-varnish +51 -45
- data/bin/riemann-zookeeper +38 -34
- data/lib/riemann/tools/health.rb +347 -0
- data/lib/riemann/tools/net.rb +104 -0
- data/lib/riemann/tools/ntp.rb +41 -0
- data/lib/riemann/tools/version.rb +1 -1
- data/lib/riemann/tools.rb +37 -40
- data/riemann-tools.gemspec +4 -1
- data/tools/riemann-aws/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-aws/bin/riemann-aws-billing +72 -66
- data/tools/riemann-aws/bin/riemann-aws-rds-status +55 -41
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +37 -31
- data/tools/riemann-aws/bin/riemann-aws-status +63 -51
- data/tools/riemann-aws/bin/riemann-elb-metrics +149 -148
- data/tools/riemann-aws/bin/riemann-s3-list +70 -65
- data/tools/riemann-aws/bin/riemann-s3-status +85 -82
- data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-chronos/bin/riemann-chronos +136 -119
- data/tools/riemann-docker/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-docker/bin/riemann-docker +163 -174
- data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +155 -147
- data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-marathon/bin/riemann-marathon +138 -122
- data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-mesos/bin/riemann-mesos +125 -110
- data/tools/riemann-munin/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-munin/bin/riemann-munin +28 -22
- data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +226 -222
- data/tools/riemann-riak/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-riak/bin/riemann-riak +281 -289
- data/tools/riemann-riak/riak_status/riak_status.rb +39 -39
- metadata +65 -16
data/bin/riemann-zookeeper
CHANGED
@@ -1,41 +1,45 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
3
5
|
|
4
6
|
# Gathers zookeeper STATS and submits them to Riemann.
|
5
7
|
|
6
|
-
require File.expand_path('
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
8
|
+
require File.expand_path('../lib/riemann/tools', __dir__)
|
9
|
+
|
10
|
+
module Riemann
|
11
|
+
module Tools
|
12
|
+
class Zookeeper
|
13
|
+
include Riemann::Tools
|
14
|
+
require 'socket'
|
15
|
+
|
16
|
+
opt :zookeeper_host, 'Zookeeper hostname', default: 'localhost'
|
17
|
+
opt :zookeeper_port, 'Zookeeper port', default: 2181
|
18
|
+
|
19
|
+
def tick
|
20
|
+
sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
|
21
|
+
sock.sync = true
|
22
|
+
sock.print('mntr')
|
23
|
+
sock.flush
|
24
|
+
|
25
|
+
loop do
|
26
|
+
stats = sock.gets
|
27
|
+
|
28
|
+
break if stats.nil?
|
29
|
+
|
30
|
+
m = stats.match(/^(\w+)\t+(.*)/)
|
31
|
+
|
32
|
+
report(
|
33
|
+
host: opts[:zookeeper_host].dup,
|
34
|
+
service: "zookeeper #{m[1]}",
|
35
|
+
metric: m[2].to_f,
|
36
|
+
state: 'ok',
|
37
|
+
tags: ['zookeeper'],
|
38
|
+
)
|
39
|
+
end
|
40
|
+
sock.close
|
41
|
+
end
|
42
|
+
end
|
39
43
|
end
|
40
44
|
end
|
41
45
|
|
@@ -0,0 +1,347 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'riemann/tools'
|
4
|
+
require 'riemann/tools/utils'
|
5
|
+
|
6
|
+
module Riemann
|
7
|
+
module Tools
|
8
|
+
class Health
|
9
|
+
include Riemann::Tools
|
10
|
+
include Riemann::Tools::Utils
|
11
|
+
|
12
|
+
opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
|
13
|
+
opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
|
14
|
+
opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
|
15
|
+
opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
|
16
|
+
opt :disk_ignorefs, 'A list of filesystem types to ignore',
|
17
|
+
default: %w[anon_inodefs autofs cd9660 devfs devtmpfs fdescfs iso9660 linprocfs linsysfs nfs procfs tmpfs]
|
18
|
+
opt :load_warning, 'Load warning threshold (load average / core)', default: 3.0
|
19
|
+
opt :load_critical, 'Load critical threshold (load average / core)', default: 8.0
|
20
|
+
opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
|
21
|
+
opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
|
22
|
+
opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu load memory disk]
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@limits = {
|
26
|
+
cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
|
27
|
+
disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
|
28
|
+
load: { critical: opts[:load_critical], warning: opts[:load_warning] },
|
29
|
+
memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
|
30
|
+
}
|
31
|
+
case (@ostype = `uname -s`.chomp.downcase)
|
32
|
+
when 'darwin'
|
33
|
+
@cores = `sysctl -n hw.ncpu`.to_i
|
34
|
+
@cpu = method :darwin_cpu
|
35
|
+
@disk = method :disk
|
36
|
+
@load = method :darwin_load
|
37
|
+
@memory = method :darwin_memory
|
38
|
+
darwin_top
|
39
|
+
when 'freebsd'
|
40
|
+
@cores = `sysctl -n hw.ncpu`.to_i
|
41
|
+
@cpu = method :freebsd_cpu
|
42
|
+
@disk = method :disk
|
43
|
+
@load = method :bsd_load
|
44
|
+
@memory = method :freebsd_memory
|
45
|
+
when 'openbsd'
|
46
|
+
@cores = `sysctl -n hw.ncpu`.to_i
|
47
|
+
@cpu = method :openbsd_cpu
|
48
|
+
@disk = method :disk
|
49
|
+
@load = method :bsd_load
|
50
|
+
@memory = method :openbsd_memory
|
51
|
+
when 'sunos'
|
52
|
+
@cores = `mpstat -a 2>/dev/null`.split[33].to_i
|
53
|
+
@cpu = method :sunos_cpu
|
54
|
+
@disk = method :disk
|
55
|
+
@load = method :bsd_load
|
56
|
+
@memory = method :sunos_memory
|
57
|
+
else
|
58
|
+
@cores = `nproc`.to_i
|
59
|
+
puts "WARNING: OS '#{@ostype}' not explicitly supported. Falling back to Linux" unless @ostype == 'linux'
|
60
|
+
@cpu = method :linux_cpu
|
61
|
+
@disk = method :disk
|
62
|
+
@load = method :linux_load
|
63
|
+
@memory = method :linux_memory
|
64
|
+
@supports_exclude_type = `df --help 2>&1 | grep -e "--exclude-type"` != ''
|
65
|
+
end
|
66
|
+
|
67
|
+
opts[:checks].each do |check|
|
68
|
+
case check
|
69
|
+
when 'disk'
|
70
|
+
@disk_enabled = true
|
71
|
+
when 'load'
|
72
|
+
@load_enabled = true
|
73
|
+
when 'cpu'
|
74
|
+
@cpu_enabled = true
|
75
|
+
when 'memory'
|
76
|
+
@memory_enabled = true
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def alert(service, state, metric, description)
|
82
|
+
report(
|
83
|
+
service: service.to_s,
|
84
|
+
state: state.to_s,
|
85
|
+
metric: metric.to_f,
|
86
|
+
description: description,
|
87
|
+
)
|
88
|
+
end
|
89
|
+
|
90
|
+
def report_pct(service, fraction, report)
|
91
|
+
return unless fraction
|
92
|
+
|
93
|
+
if fraction > @limits[service][:critical]
|
94
|
+
alert service, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
95
|
+
elsif fraction > @limits[service][:warning]
|
96
|
+
alert service, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
97
|
+
else
|
98
|
+
alert service, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def linux_cpu
|
103
|
+
new = File.read('/proc/stat')
|
104
|
+
unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
|
105
|
+
alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
|
106
|
+
return false
|
107
|
+
end
|
108
|
+
u2, n2, s2, i2 = [Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3),
|
109
|
+
Regexp.last_match(4),].map(&:to_i)
|
110
|
+
|
111
|
+
if @old_cpu
|
112
|
+
u1, n1, s1, i1 = @old_cpu
|
113
|
+
|
114
|
+
used = (u2 + n2 + s2) - (u1 + n1 + s1)
|
115
|
+
total = used + i2 - i1
|
116
|
+
fraction = used.to_f / total
|
117
|
+
|
118
|
+
report_pct :cpu, fraction, "user+nice+system\n\n#{reverse_numeric_sort_with_header(`ps -eo pcpu,pid,comm`)}"
|
119
|
+
end
|
120
|
+
|
121
|
+
@old_cpu = [u2, n2, s2, i2]
|
122
|
+
end
|
123
|
+
|
124
|
+
def linux_load
|
125
|
+
load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
|
126
|
+
if load > @limits[:load][:critical]
|
127
|
+
alert 'load', :critical, load, "1-minute load average/core is #{load}"
|
128
|
+
elsif load > @limits[:load][:warning]
|
129
|
+
alert 'load', :warning, load, "1-minute load average/core is #{load}"
|
130
|
+
else
|
131
|
+
alert 'load', :ok, load, "1-minute load average/core is #{load}"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def linux_memory
|
136
|
+
m = File.read('/proc/meminfo').split(/\n/).each_with_object({}) do |line, info|
|
137
|
+
x = line.split(/:?\s+/)
|
138
|
+
# Assume kB...
|
139
|
+
info[x[0]] = x[1].to_i
|
140
|
+
end
|
141
|
+
|
142
|
+
free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
|
143
|
+
total = m['MemTotal'].to_i
|
144
|
+
fraction = 1 - (free.to_f / total)
|
145
|
+
|
146
|
+
report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -eo pmem,pid,comm`)}"
|
147
|
+
end
|
148
|
+
|
149
|
+
def freebsd_cpu
|
150
|
+
u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map(&:to_i) # FreeBSD has 5 cpu stats
|
151
|
+
|
152
|
+
if @old_cpu
|
153
|
+
u1, n1, s1, t1, i1 = @old_cpu
|
154
|
+
|
155
|
+
used = (u2 + n2 + s2 + t2) - (u1 + n1 + s1 + t1)
|
156
|
+
total = used + i2 - i1
|
157
|
+
fraction = used.to_f / total
|
158
|
+
|
159
|
+
report_pct :cpu, fraction,
|
160
|
+
"user+nice+sytem+interrupt\n\n#{reverse_numeric_sort_with_header(`ps -axo pcpu,pid,comm`)}"
|
161
|
+
end
|
162
|
+
|
163
|
+
@old_cpu = [u2, n2, s2, t2, i2]
|
164
|
+
end
|
165
|
+
|
166
|
+
def openbsd_cpu
|
167
|
+
u2, n2, s2, t2, i2 = # OpenBSD separates with ,
|
168
|
+
`sysctl -n kern.cp_time 2>/dev/null`.split(',').map(&:to_i)
|
169
|
+
if @old_cpu
|
170
|
+
u1, n1, s1, t1, i1 = @old_cpu
|
171
|
+
|
172
|
+
used = (u2 + n2 + s2 + t2) - (u1 + n1 + s1 + t1)
|
173
|
+
total = used + i2 - i1
|
174
|
+
fraction = used.to_f / total
|
175
|
+
|
176
|
+
report_pct :cpu, fraction,
|
177
|
+
"user+nice+sytem+interrupt\n\n#{reverse_numeric_sort_with_header(`ps -axo pcpu,pid,comm`)}"
|
178
|
+
end
|
179
|
+
|
180
|
+
@old_cpu = [u2, n2, s2, t2, i2]
|
181
|
+
end
|
182
|
+
|
183
|
+
def sunos_cpu
|
184
|
+
mpstats = `mpstat -a 2>/dev/null`.split
|
185
|
+
u2 = mpstats[29].to_i
|
186
|
+
s2 = mpstats[30].to_i
|
187
|
+
t2 = mpstats[31].to_i
|
188
|
+
i2 = mpstats[32].to_i
|
189
|
+
|
190
|
+
if @old_cpu
|
191
|
+
u1, s1, t1, i1 = @old_cpu
|
192
|
+
|
193
|
+
used = (u2 + s2 + t2) - (u1 + s1 + t1)
|
194
|
+
total = used + i2 - i1
|
195
|
+
fraction = if i2 == i1 && used.zero? # If the system is <1% used in both samples then total will be 0 + (99 - 99), avoid a div by 0
|
196
|
+
0
|
197
|
+
else
|
198
|
+
used.to_f / total
|
199
|
+
end
|
200
|
+
|
201
|
+
report_pct :cpu, fraction,
|
202
|
+
"user+sytem+interrupt\n\n#{reverse_numeric_sort_with_header(`ps -ao pcpu,pid,comm`)}"
|
203
|
+
end
|
204
|
+
|
205
|
+
@old_cpu = [u2, s2, t2, i2]
|
206
|
+
end
|
207
|
+
|
208
|
+
def bsd_load
|
209
|
+
m = `uptime`.split(':')[-1].chomp.gsub(/\s+/, '').split(',')
|
210
|
+
load = m[0].to_f / @cores
|
211
|
+
if load > @limits[:load][:critical]
|
212
|
+
alert 'load', :critical, load, "1-minute load average/core is #{load}"
|
213
|
+
elsif load > @limits[:load][:warning]
|
214
|
+
alert 'load', :warning, load, "1-minute load average/core is #{load}"
|
215
|
+
else
|
216
|
+
alert 'load', :ok, load, "1-minute load average/core is #{load}"
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def freebsd_memory
|
221
|
+
meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
|
222
|
+
fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
|
223
|
+
|
224
|
+
report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -axo pmem,pid,comm`)}"
|
225
|
+
end
|
226
|
+
|
227
|
+
def openbsd_memory
|
228
|
+
meminfo = `vmstat 2>/dev/null`.chomp.split
|
229
|
+
fraction = meminfo[28].to_f / meminfo[29] # The ratio of active to free memory unlike the others :(
|
230
|
+
|
231
|
+
report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -axo pmem,pid,comm`)}"
|
232
|
+
end
|
233
|
+
|
234
|
+
def sunos_memory
|
235
|
+
meminfo = `vmstat 2>/dev/null`.chomp.split
|
236
|
+
total_mem = `prtconf | grep Memory`.split[2].to_f * 1024 # reports in GB but vmstat is in MB
|
237
|
+
fraction = (total_mem - meminfo[32].to_f) / total_mem
|
238
|
+
|
239
|
+
report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -ao pmem,pid,comm`)}"
|
240
|
+
end
|
241
|
+
|
242
|
+
def darwin_top
|
243
|
+
raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
|
244
|
+
@topdata = { stamp: Time.now.to_i }
|
245
|
+
raw.each_line do |ln|
|
246
|
+
if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
|
247
|
+
@topdata[:load] = Regexp.last_match(1).to_f
|
248
|
+
elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
|
249
|
+
@topdata[:cpu] = 1 - (Regexp.last_match(1).to_f / 100)
|
250
|
+
elsif (mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i))
|
251
|
+
wired = mdat[1].to_i * (1024**'BKMGT'.index(mdat[2]))
|
252
|
+
active = mdat[3].to_i * (1024**'BKMGT'.index(mdat[4]))
|
253
|
+
inactive = mdat[5].to_i * (1024**'BKMGT'.index(mdat[6]))
|
254
|
+
used = mdat[7].to_i * (1024**'BKMGT'.index(mdat[8]))
|
255
|
+
free = mdat[9].to_i * (1024**'BKMGT'.index(mdat[10]))
|
256
|
+
@topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
|
257
|
+
# This is for OSX Mavericks which
|
258
|
+
# uses a different format for top
|
259
|
+
# Example: PhysMem: 4662M used (1328M wired), 2782M unused.
|
260
|
+
elsif (mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) used \([0-9]+[BKMGT] wired\), ([0-9]+)([BKMGT]) unused/i))
|
261
|
+
used = mdat[1].to_i * (1024**'BKMGT'.index(mdat[2]))
|
262
|
+
unused = mdat[3].to_i * (1024**'BKMGT'.index(mdat[4]))
|
263
|
+
@topdata[:memory] = used.to_f / (used + unused)
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def darwin_cpu
|
269
|
+
darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
|
270
|
+
unless @topdata[:cpu]
|
271
|
+
alert 'cpu', :unknown, nil, 'unable to get CPU stats from top'
|
272
|
+
return false
|
273
|
+
end
|
274
|
+
report_pct :cpu, @topdata[:cpu], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pcpu,pid,comm`)}"
|
275
|
+
end
|
276
|
+
|
277
|
+
def darwin_load
|
278
|
+
darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
|
279
|
+
unless @topdata[:load]
|
280
|
+
alert 'load', :unknown, nil, 'unable to get load ave from top'
|
281
|
+
return false
|
282
|
+
end
|
283
|
+
metric = @topdata[:load] / @cores
|
284
|
+
if metric > @limits[:load][:critical]
|
285
|
+
alert 'load', :critical, metric, "1-minute load average per core is #{metric}"
|
286
|
+
elsif metric > @limits[:load][:warning]
|
287
|
+
alert 'load', :warning, metric, "1-minute load average per core is #{metric}"
|
288
|
+
else
|
289
|
+
alert 'load', :ok, metric, "1-minute load average per core is #{metric}"
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def darwin_memory
|
294
|
+
darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
|
295
|
+
unless @topdata[:memory]
|
296
|
+
alert 'memory', :unknown, nil, 'unable to get memory data from top'
|
297
|
+
return false
|
298
|
+
end
|
299
|
+
report_pct :memory, @topdata[:memory], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pmem,pid,comm`)}"
|
300
|
+
end
|
301
|
+
|
302
|
+
def df
|
303
|
+
case @ostype
|
304
|
+
when 'darwin', 'freebsd', 'openbsd'
|
305
|
+
`df -P -t no#{opts[:disk_ignorefs].join(',')}`
|
306
|
+
when 'sunos'
|
307
|
+
`df -P` # Is there a good way to exlude iso9660 here?
|
308
|
+
else
|
309
|
+
if @supports_exclude_type
|
310
|
+
`df -P #{opts[:disk_ignorefs].map { |fstype| "--exclude-type=#{fstype}" }.join(' ')}`
|
311
|
+
else
|
312
|
+
`df -P`
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
def disk
|
318
|
+
df.split(/\n/).each do |r|
|
319
|
+
f = r.split(/\s+/)
|
320
|
+
next if f[0] == 'Filesystem'
|
321
|
+
|
322
|
+
# Calculate capacity
|
323
|
+
used = f[2].to_i
|
324
|
+
available = f[3].to_i
|
325
|
+
total_without_reservation = used + available
|
326
|
+
|
327
|
+
x = used.to_f / total_without_reservation
|
328
|
+
|
329
|
+
if x > @limits[:disk][:critical]
|
330
|
+
alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
|
331
|
+
elsif x > @limits[:disk][:warning]
|
332
|
+
alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
|
333
|
+
else
|
334
|
+
alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
|
335
|
+
end
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
def tick
|
340
|
+
@cpu.call if @cpu_enabled
|
341
|
+
@memory.call if @memory_enabled
|
342
|
+
@disk.call if @disk_enabled
|
343
|
+
@load.call if @load_enabled
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'riemann/tools'
|
4
|
+
|
5
|
+
module Riemann
|
6
|
+
module Tools
|
7
|
+
class Net
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :interfaces, 'Interfaces to monitor', type: :strings, default: nil
|
11
|
+
opt :ignore_interfaces, 'Interfaces to ignore', type: :strings, default: ['lo']
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@old_state = nil
|
15
|
+
@interfaces = if opts[:interfaces]
|
16
|
+
opts[:interfaces].reject(&:empty?).map(&:dup)
|
17
|
+
else
|
18
|
+
[]
|
19
|
+
end
|
20
|
+
@ignore_interfaces = opts[:ignore_interfaces].reject(&:empty?).map(&:dup)
|
21
|
+
end
|
22
|
+
|
23
|
+
def state
|
24
|
+
f = File.read('/proc/net/dev')
|
25
|
+
state = {}
|
26
|
+
f.split("\n").each do |line|
|
27
|
+
next unless line =~ /\A\s*([[:alnum:]-]+?):\s*([\s\d]+)\s*/
|
28
|
+
|
29
|
+
iface = Regexp.last_match(1)
|
30
|
+
|
31
|
+
next unless @interfaces.empty? || @interfaces.any? { |pattern| iface.match?(pattern) }
|
32
|
+
next if @ignore_interfaces.any? { |pattern| iface.match?(pattern) }
|
33
|
+
|
34
|
+
['rx bytes',
|
35
|
+
'rx packets',
|
36
|
+
'rx errs',
|
37
|
+
'rx drop',
|
38
|
+
'rx fifo',
|
39
|
+
'rx frame',
|
40
|
+
'rx compressed',
|
41
|
+
'rx multicast',
|
42
|
+
'tx bytes',
|
43
|
+
'tx packets',
|
44
|
+
'tx errs',
|
45
|
+
'tx drops',
|
46
|
+
'tx fifo',
|
47
|
+
'tx colls',
|
48
|
+
'tx carrier',
|
49
|
+
'tx compressed',].map do |service|
|
50
|
+
"#{iface} #{service}"
|
51
|
+
end.zip( # rubocop:disable Style/MultilineBlockChain
|
52
|
+
Regexp.last_match(2).split(/\s+/).map(&:to_i),
|
53
|
+
).each do |service, value|
|
54
|
+
state[service] = value
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
state
|
59
|
+
end
|
60
|
+
|
61
|
+
def tick
|
62
|
+
state = self.state
|
63
|
+
|
64
|
+
if @old_state
|
65
|
+
# Report services from `@old_state` that don't exist in `state` as expired
|
66
|
+
@old_state.reject { |k| state.key?(k) }.each do |service, _metric|
|
67
|
+
report(service: service.dup, state: 'expired')
|
68
|
+
end
|
69
|
+
|
70
|
+
# Report delta for services that have values in both `@old_state` and `state`
|
71
|
+
state.each do |service, metric|
|
72
|
+
next unless @old_state.key?(service)
|
73
|
+
|
74
|
+
delta = metric - @old_state[service]
|
75
|
+
svc_state = case service
|
76
|
+
when /drop$/
|
77
|
+
if delta.positive?
|
78
|
+
'warning'
|
79
|
+
else
|
80
|
+
'ok'
|
81
|
+
end
|
82
|
+
when /errs$/
|
83
|
+
if delta.positive?
|
84
|
+
'warning'
|
85
|
+
else
|
86
|
+
'ok'
|
87
|
+
end
|
88
|
+
else
|
89
|
+
'ok'
|
90
|
+
end
|
91
|
+
|
92
|
+
report(
|
93
|
+
service: service.dup,
|
94
|
+
metric: (delta.to_f / opts[:interval]),
|
95
|
+
state: svc_state,
|
96
|
+
)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
@old_state = state
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'riemann/tools'
|
4
|
+
|
5
|
+
module Riemann
|
6
|
+
module Tools
|
7
|
+
class Ntp
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@hostname = `hostname`.chomp
|
12
|
+
@ostype = `uname -s`.chomp.downcase
|
13
|
+
abort 'WARNING: macOS not explicitly supported. Exiting.' if @ostype == 'darwin'
|
14
|
+
end
|
15
|
+
|
16
|
+
def tick
|
17
|
+
stats = `ntpq -p -n`
|
18
|
+
stats.each_line do |stat|
|
19
|
+
m = stat.split
|
20
|
+
next if m.grep(/^===/).any? || m.grep(/^remote/).any?
|
21
|
+
|
22
|
+
@ntp_host = m[0].gsub('*', '').gsub('-', '').gsub('+', '')
|
23
|
+
send('delay', m[7])
|
24
|
+
send('offset', m[8])
|
25
|
+
send('jitter', m[9])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def send(type, metric)
|
30
|
+
report(
|
31
|
+
host: @hostname,
|
32
|
+
service: "ntp peer #{@ntp_host} #{type}",
|
33
|
+
metric: metric.to_f,
|
34
|
+
state: 'ok',
|
35
|
+
description: "ntp peer #{@ntp_host} #{type}",
|
36
|
+
tags: ['ntp'],
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|