riemann-tools 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +15 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.gitignore +2 -0
  6. data/.rubocop.yml +40 -0
  7. data/.ruby-version +1 -0
  8. data/CHANGELOG.md +62 -2
  9. data/README.markdown +8 -24
  10. data/Rakefile +14 -5
  11. data/SECURITY.md +42 -0
  12. data/bin/riemann-apache-status +3 -94
  13. data/bin/riemann-bench +4 -67
  14. data/bin/riemann-cloudant +3 -54
  15. data/bin/riemann-consul +3 -102
  16. data/bin/riemann-dir-files-count +3 -51
  17. data/bin/riemann-dir-space +3 -51
  18. data/bin/riemann-diskstats +3 -91
  19. data/bin/riemann-fd +4 -63
  20. data/bin/riemann-freeswitch +4 -116
  21. data/bin/riemann-haproxy +3 -54
  22. data/bin/riemann-health +3 -344
  23. data/bin/riemann-kvminstance +4 -19
  24. data/bin/riemann-memcached +3 -33
  25. data/bin/riemann-net +3 -105
  26. data/bin/riemann-nginx-status +3 -80
  27. data/bin/riemann-ntp +3 -34
  28. data/bin/riemann-portcheck +3 -37
  29. data/bin/riemann-proc +3 -104
  30. data/bin/riemann-varnish +3 -50
  31. data/bin/riemann-wrapper +75 -0
  32. data/bin/riemann-zookeeper +3 -37
  33. data/lib/riemann/tools/apache_status.rb +107 -0
  34. data/lib/riemann/tools/bench.rb +72 -0
  35. data/lib/riemann/tools/cloudant.rb +57 -0
  36. data/lib/riemann/tools/consul_health.rb +107 -0
  37. data/lib/riemann/tools/dir_files_count.rb +56 -0
  38. data/lib/riemann/tools/dir_space.rb +56 -0
  39. data/lib/riemann/tools/diskstats.rb +94 -0
  40. data/lib/riemann/tools/fd.rb +81 -0
  41. data/lib/riemann/tools/freeswitch.rb +119 -0
  42. data/lib/riemann/tools/haproxy.rb +59 -0
  43. data/lib/riemann/tools/health.rb +478 -0
  44. data/lib/riemann/tools/kvm.rb +23 -0
  45. data/lib/riemann/tools/memcached.rb +38 -0
  46. data/lib/riemann/tools/net.rb +105 -0
  47. data/lib/riemann/tools/nginx_status.rb +86 -0
  48. data/lib/riemann/tools/ntp.rb +42 -0
  49. data/lib/riemann/tools/portcheck.rb +45 -0
  50. data/lib/riemann/tools/proc.rb +109 -0
  51. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  52. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  53. data/lib/riemann/tools/varnish.rb +55 -0
  54. data/lib/riemann/tools/version.rb +1 -1
  55. data/lib/riemann/tools/zookeeper.rb +40 -0
  56. data/lib/riemann/tools.rb +31 -52
  57. data/riemann-tools.gemspec +8 -2
  58. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +8 -9
  59. data/tools/riemann-aws/bin/riemann-aws-billing +4 -83
  60. data/tools/riemann-aws/bin/riemann-aws-rds-status +4 -50
  61. data/tools/riemann-aws/bin/riemann-aws-sqs-status +4 -40
  62. data/tools/riemann-aws/bin/riemann-aws-status +4 -67
  63. data/tools/riemann-aws/bin/riemann-elb-metrics +4 -163
  64. data/tools/riemann-aws/bin/riemann-s3-list +4 -78
  65. data/tools/riemann-aws/bin/riemann-s3-status +4 -95
  66. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  67. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  69. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  70. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  71. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  72. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  73. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +8 -9
  74. data/tools/riemann-chronos/bin/riemann-chronos +3 -139
  75. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  76. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +7 -8
  77. data/tools/riemann-docker/bin/riemann-docker +4 -213
  78. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  79. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +8 -9
  80. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +3 -161
  81. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  82. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +8 -9
  83. data/tools/riemann-marathon/bin/riemann-marathon +3 -142
  84. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  85. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +8 -9
  86. data/tools/riemann-mesos/bin/riemann-mesos +3 -126
  87. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  88. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +7 -8
  89. data/tools/riemann-munin/bin/riemann-munin +3 -32
  90. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  91. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +8 -9
  92. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +3 -264
  93. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  94. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +7 -8
  95. data/tools/riemann-riak/bin/riemann-riak +3 -326
  96. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  97. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  98. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  99. metadata +112 -16
  100. data/.travis.yml +0 -31
  101. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  102. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  103. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
@@ -0,0 +1,478 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+ require 'riemann/tools/utils'
5
+ require 'riemann/tools/uptime_parser.tab'
6
+
7
+ # Reports current CPU, disk, load average, and memory use to riemann.
8
+ module Riemann
9
+ module Tools
10
+ class Health
11
+ include Riemann::Tools
12
+ include Riemann::Tools::Utils
13
+
14
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
15
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
16
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
17
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
18
+ opt :disk_ignorefs, 'A list of filesystem types to ignore',
19
+ default: %w[anon_inodefs autofs cd9660 devfs devtmpfs fdescfs iso9660 linprocfs linsysfs nfs overlay procfs squashfs tmpfs]
20
+ opt :load_warning, 'Load warning threshold (load average / core)', default: 3.0
21
+ opt :load_critical, 'Load critical threshold (load average / core)', default: 8.0
22
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
23
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
24
+ opt :uptime_warning, 'Uptime warning threshold', default: 86_400
25
+ opt :uptime_critical, 'Uptime critical threshold', default: 3600
26
+ opt :users_warning, 'Users warning threshold', default: 1
27
+ opt :users_critical, 'Users critical threshold', default: 1
28
+ opt :swap_warning, 'Swap warning threshold', default: 0.4
29
+ opt :swap_critical, 'Swap critical threshold', default: 0.5
30
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu load memory disk swap]
31
+
32
+ def initialize
33
+ @limits = {
34
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
35
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
36
+ load: { critical: opts[:load_critical], warning: opts[:load_warning] },
37
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
38
+ uptime: { critical: opts[:uptime_critical], warning: opts[:uptime_warning] },
39
+ users: { critical: opts[:users_critical], warning: opts[:users_warning] },
40
+ swap: { critical: opts[:swap_critical], warning: opts[:swap_warning] },
41
+ }
42
+ case (@ostype = `uname -s`.chomp.downcase)
43
+ when 'darwin'
44
+ @cores = `sysctl -n hw.ncpu`.to_i
45
+ @cpu = method :darwin_cpu
46
+ @disk = method :disk
47
+ @load = method :darwin_load
48
+ @memory = method :darwin_memory
49
+ @uptime = method :bsd_uptime
50
+ @swap = method :bsd_swap
51
+ when 'freebsd'
52
+ @cores = `sysctl -n hw.ncpu`.to_i
53
+ @cpu = method :freebsd_cpu
54
+ @disk = method :disk
55
+ @load = method :bsd_load
56
+ @memory = method :freebsd_memory
57
+ @uptime = method :bsd_uptime
58
+ @swap = method :bsd_swap
59
+ when 'openbsd'
60
+ @cores = `sysctl -n hw.ncpu`.to_i
61
+ @cpu = method :openbsd_cpu
62
+ @disk = method :disk
63
+ @load = method :bsd_load
64
+ @memory = method :openbsd_memory
65
+ @uptime = method :bsd_uptime
66
+ @swap = method :bsd_swap
67
+ when 'sunos'
68
+ @cores = `mpstat -a 2>/dev/null`.split[33].to_i
69
+ @cpu = method :sunos_cpu
70
+ @disk = method :disk
71
+ @load = method :bsd_load
72
+ @memory = method :sunos_memory
73
+ @uptime = method :bsd_uptime
74
+ @swap = method :bsd_swap
75
+ else
76
+ @cores = `nproc`.to_i
77
+ puts "WARNING: OS '#{@ostype}' not explicitly supported. Falling back to Linux" unless @ostype == 'linux'
78
+ @cpu = method :linux_cpu
79
+ @disk = method :disk
80
+ @load = method :linux_load
81
+ @memory = method :linux_memory
82
+ @uptime = method :linux_uptime
83
+ @swap = method :linux_swap
84
+ @supports_exclude_type = `df --help 2>&1 | grep -e "--exclude-type"` != ''
85
+ end
86
+ @users = method :users
87
+
88
+ opts[:checks].each do |check|
89
+ case check
90
+ when 'disk'
91
+ @disk_enabled = true
92
+ when 'load'
93
+ @load_enabled = true
94
+ when 'cpu'
95
+ @cpu_enabled = true
96
+ when 'memory'
97
+ @memory_enabled = true
98
+ when 'uptime'
99
+ @uptime_enabled = true
100
+ when 'users'
101
+ @users_enabled = true
102
+ when 'swap'
103
+ @swap_enabled = true
104
+ end
105
+ end
106
+ end
107
+
108
+ def alert(service, state, metric, description)
109
+ report(
110
+ service: service.to_s,
111
+ state: state.to_s,
112
+ metric: metric.to_f,
113
+ description: description,
114
+ )
115
+ end
116
+
117
+ def report_pct(service, fraction, report)
118
+ return unless fraction
119
+
120
+ if fraction > @limits[service][:critical]
121
+ alert service, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
122
+ elsif fraction > @limits[service][:warning]
123
+ alert service, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
124
+ else
125
+ alert service, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
126
+ end
127
+ end
128
+
129
+ def report_int(service, value, report)
130
+ return unless value
131
+
132
+ if value >= @limits[service][:critical]
133
+ alert service, :critical, value, "#{value} #{report}"
134
+ elsif value >= @limits[service][:warning]
135
+ alert service, :warning, value, "#{value} #{report}"
136
+ else
137
+ alert service, :ok, value, "#{value} #{report}"
138
+ end
139
+ end
140
+
141
+ def report_uptime(uptime)
142
+ description = uptime_to_human(uptime)
143
+
144
+ if uptime < @limits[:uptime][:critical]
145
+ alert 'uptime', :critical, uptime, description
146
+ elsif uptime < @limits[:uptime][:warning]
147
+ alert 'uptime', :warning, uptime, description
148
+ else
149
+ alert 'uptime', :ok, uptime, description
150
+ end
151
+ end
152
+
153
+ def linux_cpu
154
+ new = File.read('/proc/stat')
155
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
156
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
157
+ return false
158
+ end
159
+ u2, n2, s2, i2 = [Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3),
160
+ Regexp.last_match(4),].map(&:to_i)
161
+
162
+ if @old_cpu
163
+ u1, n1, s1, i1 = @old_cpu
164
+
165
+ used = (u2 + n2 + s2) - (u1 + n1 + s1)
166
+ total = used + i2 - i1
167
+ fraction = used.to_f / total
168
+
169
+ report_pct :cpu, fraction, "user+nice+system\n\n#{reverse_numeric_sort_with_header(`ps -eo pcpu,pid,comm`)}"
170
+ end
171
+
172
+ @old_cpu = [u2, n2, s2, i2]
173
+ end
174
+
175
+ def linux_load
176
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
177
+ if load > @limits[:load][:critical]
178
+ alert 'load', :critical, load, "1-minute load average/core is #{load}"
179
+ elsif load > @limits[:load][:warning]
180
+ alert 'load', :warning, load, "1-minute load average/core is #{load}"
181
+ else
182
+ alert 'load', :ok, load, "1-minute load average/core is #{load}"
183
+ end
184
+ end
185
+
186
+ def linux_memory
187
+ m = File.read('/proc/meminfo').split(/\n/).each_with_object({}) do |line, info|
188
+ x = line.split(/:?\s+/)
189
+ # Assume kB...
190
+ info[x[0]] = x[1].to_i
191
+ end
192
+
193
+ free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
194
+ total = m['MemTotal'].to_i
195
+ fraction = 1 - (free.to_f / total)
196
+
197
+ report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -eo pmem,pid,comm`)}"
198
+ end
199
+
200
+ def freebsd_cpu
201
+ u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map(&:to_i) # FreeBSD has 5 cpu stats
202
+
203
+ if @old_cpu
204
+ u1, n1, s1, t1, i1 = @old_cpu
205
+
206
+ used = (u2 + n2 + s2 + t2) - (u1 + n1 + s1 + t1)
207
+ total = used + i2 - i1
208
+ fraction = used.to_f / total
209
+
210
+ report_pct :cpu, fraction,
211
+ "user+nice+sytem+interrupt\n\n#{reverse_numeric_sort_with_header(`ps -axo pcpu,pid,comm`)}"
212
+ end
213
+
214
+ @old_cpu = [u2, n2, s2, t2, i2]
215
+ end
216
+
217
+ def openbsd_cpu
218
+ u2, n2, s2, t2, i2 = # OpenBSD separates with ,
219
+ `sysctl -n kern.cp_time 2>/dev/null`.split(',').map(&:to_i)
220
+ if @old_cpu
221
+ u1, n1, s1, t1, i1 = @old_cpu
222
+
223
+ used = (u2 + n2 + s2 + t2) - (u1 + n1 + s1 + t1)
224
+ total = used + i2 - i1
225
+ fraction = used.to_f / total
226
+
227
+ report_pct :cpu, fraction,
228
+ "user+nice+sytem+interrupt\n\n#{reverse_numeric_sort_with_header(`ps -axo pcpu,pid,comm`)}"
229
+ end
230
+
231
+ @old_cpu = [u2, n2, s2, t2, i2]
232
+ end
233
+
234
+ def sunos_cpu
235
+ mpstats = `mpstat -a 2>/dev/null`.split
236
+ u2 = mpstats[29].to_i
237
+ s2 = mpstats[30].to_i
238
+ t2 = mpstats[31].to_i
239
+ i2 = mpstats[32].to_i
240
+
241
+ if @old_cpu
242
+ u1, s1, t1, i1 = @old_cpu
243
+
244
+ used = (u2 + s2 + t2) - (u1 + s1 + t1)
245
+ total = used + i2 - i1
246
+ fraction = if i2 == i1 && used.zero? # If the system is <1% used in both samples then total will be 0 + (99 - 99), avoid a div by 0
247
+ 0
248
+ else
249
+ used.to_f / total
250
+ end
251
+
252
+ report_pct :cpu, fraction,
253
+ "user+sytem+interrupt\n\n#{reverse_numeric_sort_with_header(`ps -ao pcpu,pid,comm`)}"
254
+ end
255
+
256
+ @old_cpu = [u2, s2, t2, i2]
257
+ end
258
+
259
+ def uptime_parser
260
+ @uptime_parser ||= UptimeParser.new
261
+ end
262
+
263
+ def uptime
264
+ @cached_data[:uptime] ||= uptime_parser.parse(`uptime`)
265
+ end
266
+
267
+ def bsd_load
268
+ load = uptime[:load_averages][1] / @cores
269
+ if load > @limits[:load][:critical]
270
+ alert 'load', :critical, load, "1-minute load average/core is #{load}"
271
+ elsif load > @limits[:load][:warning]
272
+ alert 'load', :warning, load, "1-minute load average/core is #{load}"
273
+ else
274
+ alert 'load', :ok, load, "1-minute load average/core is #{load}"
275
+ end
276
+ end
277
+
278
+ def freebsd_memory
279
+ meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
280
+ fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
281
+
282
+ report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -axo pmem,pid,comm`)}"
283
+ end
284
+
285
+ def openbsd_memory
286
+ meminfo = `vmstat 2>/dev/null`.chomp.split
287
+ fraction = meminfo[28].to_f / meminfo[29] # The ratio of active to free memory unlike the others :(
288
+
289
+ report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -axo pmem,pid,comm`)}"
290
+ end
291
+
292
+ def sunos_memory
293
+ meminfo = `vmstat 2>/dev/null`.chomp.split
294
+ total_mem = `prtconf | grep Memory`.split[2].to_f * 1024 # reports in GB but vmstat is in MB
295
+ fraction = (total_mem - meminfo[32].to_f) / total_mem
296
+
297
+ report_pct :memory, fraction, "used\n\n#{reverse_numeric_sort_with_header(`ps -ao pmem,pid,comm`)}"
298
+ end
299
+
300
+ def darwin_top
301
+ return @cached_data[:darwin_top] if @cached_data[:darwin_top]
302
+
303
+ raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
304
+ topdata = {}
305
+ raw.each_line do |ln|
306
+ if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
307
+ topdata[:load] = Regexp.last_match(1).to_f
308
+ elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
309
+ topdata[:cpu] = 1 - (Regexp.last_match(1).to_f / 100)
310
+ elsif (mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i))
311
+ wired = mdat[1].to_i * (1024**'BKMGT'.index(mdat[2]))
312
+ active = mdat[3].to_i * (1024**'BKMGT'.index(mdat[4]))
313
+ inactive = mdat[5].to_i * (1024**'BKMGT'.index(mdat[6]))
314
+ used = mdat[7].to_i * (1024**'BKMGT'.index(mdat[8]))
315
+ free = mdat[9].to_i * (1024**'BKMGT'.index(mdat[10]))
316
+ topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
317
+ # This is for OSX Mavericks which
318
+ # uses a different format for top
319
+ # Example: PhysMem: 4662M used (1328M wired), 2782M unused.
320
+ elsif (mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) used \([0-9]+[BKMGT] wired\), ([0-9]+)([BKMGT]) unused/i))
321
+ used = mdat[1].to_i * (1024**'BKMGT'.index(mdat[2]))
322
+ unused = mdat[3].to_i * (1024**'BKMGT'.index(mdat[4]))
323
+ topdata[:memory] = used.to_f / (used + unused)
324
+ end
325
+ end
326
+ @cached_data[:darwin_top] = topdata
327
+ end
328
+
329
+ def darwin_cpu
330
+ topdata = darwin_top
331
+ unless topdata[:cpu]
332
+ alert 'cpu', :unknown, nil, 'unable to get CPU stats from top'
333
+ return false
334
+ end
335
+ report_pct :cpu, topdata[:cpu], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pcpu,pid,comm`)}"
336
+ end
337
+
338
+ def darwin_load
339
+ topdata = darwin_top
340
+ unless topdata[:load]
341
+ alert 'load', :unknown, nil, 'unable to get load ave from top'
342
+ return false
343
+ end
344
+ metric = topdata[:load] / @cores
345
+ if metric > @limits[:load][:critical]
346
+ alert 'load', :critical, metric, "1-minute load average per core is #{metric}"
347
+ elsif metric > @limits[:load][:warning]
348
+ alert 'load', :warning, metric, "1-minute load average per core is #{metric}"
349
+ else
350
+ alert 'load', :ok, metric, "1-minute load average per core is #{metric}"
351
+ end
352
+ end
353
+
354
+ def darwin_memory
355
+ topdata = darwin_top
356
+ unless topdata[:memory]
357
+ alert 'memory', :unknown, nil, 'unable to get memory data from top'
358
+ return false
359
+ end
360
+ report_pct :memory, topdata[:memory], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pmem,pid,comm`)}"
361
+ end
362
+
363
+ def df
364
+ case @ostype
365
+ when 'darwin', 'freebsd', 'openbsd'
366
+ `df -P -t no#{opts[:disk_ignorefs].join(',')}`
367
+ when 'sunos'
368
+ `df -P` # Is there a good way to exlude iso9660 here?
369
+ else
370
+ if @supports_exclude_type
371
+ `df -P #{opts[:disk_ignorefs].map { |fstype| "--exclude-type=#{fstype}" }.join(' ')}`
372
+ else
373
+ `df -P`
374
+ end
375
+ end
376
+ end
377
+
378
+ def disk
379
+ df.split(/\n/).each do |r|
380
+ f = r.split(/\s+/)
381
+ next if f[0] == 'Filesystem'
382
+
383
+ # Calculate capacity
384
+ used = f[2].to_i
385
+ available = f[3].to_i
386
+ total_without_reservation = used + available
387
+
388
+ x = used.to_f / total_without_reservation
389
+
390
+ if x > @limits[:disk][:critical]
391
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
392
+ elsif x > @limits[:disk][:warning]
393
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
394
+ else
395
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
396
+ end
397
+ end
398
+ end
399
+
400
+ def bsd_uptime
401
+ value = uptime[:uptime]
402
+
403
+ report_uptime(value)
404
+ end
405
+
406
+ def linux_uptime
407
+ value = File.read('/proc/uptime').split(/\s+/)[0].to_f
408
+
409
+ report_uptime(value)
410
+ end
411
+
412
+ def users
413
+ value = uptime[:users]
414
+
415
+ report_int(:users, value, "user#{'s' if value != 1}")
416
+ end
417
+
418
+ def bsd_swap
419
+ _device, blocks, used, _avail, _capacity = `swapinfo`.lines.last.split(/\s+/)
420
+
421
+ value = Float(used) / Integer(blocks)
422
+
423
+ report_pct :swap, value, 'used'
424
+ rescue ArgumentError
425
+ # Ignore
426
+ end
427
+
428
+ def linux_swap
429
+ total_size = 0.0
430
+ total_used = 0.0
431
+
432
+ File.read('/proc/swaps').lines.each_with_index do |line, n|
433
+ next if n.zero?
434
+
435
+ _filename, _type, size, used, _priority = line.split(/\s+/)
436
+
437
+ total_size += size.to_f
438
+ total_used += used.to_f
439
+ end
440
+
441
+ return if total_size.zero?
442
+
443
+ value = total_used / total_size
444
+
445
+ report_pct :swap, value, 'used'
446
+ end
447
+
448
+ def uptime_to_human(value)
449
+ seconds = value.to_i
450
+ days = seconds / 86_400
451
+ seconds %= 86_400
452
+ hrs = seconds / 3600
453
+ seconds %= 3600
454
+ mins = seconds / 60
455
+ [
456
+ ("#{days} day#{'s' if days > 1}" unless days.zero?),
457
+ format('%<hrs>2d:%<mins>02d', hrs: hrs, mins: mins),
458
+ ].compact.join(' ')
459
+ end
460
+
461
+ def tick
462
+ invalidate_cache
463
+
464
+ @cpu.call if @cpu_enabled
465
+ @memory.call if @memory_enabled
466
+ @disk.call if @disk_enabled
467
+ @load.call if @load_enabled
468
+ @uptime.call if @uptime_enabled
469
+ @users.call if @users_enabled
470
+ @swap.call if @swap_enabled
471
+ end
472
+
473
+ def invalidate_cache
474
+ @cached_data = {}
475
+ end
476
+ end
477
+ end
478
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ module Riemann
6
+ module Tools
7
+ class Kvm
8
+ include Riemann::Tools
9
+
10
+ def tick
11
+ # determine how many instances I have according to libvirt
12
+ kvm_instances = `LANG=C virsh list | grep -c running`
13
+
14
+ # submit them to riemann
15
+ report(
16
+ service: 'KVM Running VMs',
17
+ metric: kvm_instances.to_i,
18
+ state: 'info',
19
+ )
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Gathers memcached STATS and submits them to Riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Memcached
9
+ include Riemann::Tools
10
+ require 'socket'
11
+
12
+ opt :memcached_host, 'Memcached hostname', default: 'localhost'
13
+ opt :memcached_port, 'Memcached port', default: 11_211
14
+
15
+ def tick
16
+ sock = TCPSocket.new(opts[:memcached_host], opts[:memcached_port])
17
+ sock.print("stats\r\n")
18
+ sock.flush
19
+ stats = sock.gets
20
+
21
+ loop do
22
+ stats = sock.gets
23
+ break if stats.strip == 'END'
24
+
25
+ m = stats.match(/STAT (\w+) (\S+)/)
26
+ report(
27
+ host: opts[:memcached_host].dup,
28
+ service: "memcached #{m[1]}",
29
+ metric: m[2].to_f,
30
+ state: 'ok',
31
+ tags: ['memcached'],
32
+ )
33
+ end
34
+ sock.close
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Gathers network interface statistics and submits them to Riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Net
9
+ include Riemann::Tools
10
+
11
+ opt :interfaces, 'Interfaces to monitor', type: :strings, default: nil
12
+ opt :ignore_interfaces, 'Interfaces to ignore', type: :strings, default: ['lo']
13
+
14
+ def initialize
15
+ @old_state = nil
16
+ @interfaces = if opts[:interfaces]
17
+ opts[:interfaces].reject(&:empty?).map(&:dup)
18
+ else
19
+ []
20
+ end
21
+ @ignore_interfaces = opts[:ignore_interfaces].reject(&:empty?).map(&:dup)
22
+ end
23
+
24
+ def state
25
+ f = File.read('/proc/net/dev')
26
+ state = {}
27
+ f.split("\n").each do |line|
28
+ next unless line =~ /\A\s*([[:alnum:]-]+?):\s*([\s\d]+)\s*/
29
+
30
+ iface = Regexp.last_match(1)
31
+
32
+ next unless @interfaces.empty? || @interfaces.any? { |pattern| iface.match?(pattern) }
33
+ next if @ignore_interfaces.any? { |pattern| iface.match?(pattern) }
34
+
35
+ ['rx bytes',
36
+ 'rx packets',
37
+ 'rx errs',
38
+ 'rx drop',
39
+ 'rx fifo',
40
+ 'rx frame',
41
+ 'rx compressed',
42
+ 'rx multicast',
43
+ 'tx bytes',
44
+ 'tx packets',
45
+ 'tx errs',
46
+ 'tx drop',
47
+ 'tx fifo',
48
+ 'tx colls',
49
+ 'tx carrier',
50
+ 'tx compressed',].map do |service|
51
+ "#{iface} #{service}"
52
+ end.zip( # rubocop:disable Style/MultilineBlockChain
53
+ Regexp.last_match(2).split(/\s+/).map(&:to_i),
54
+ ).each do |service, value|
55
+ state[service] = value
56
+ end
57
+ end
58
+
59
+ state
60
+ end
61
+
62
+ def tick
63
+ state = self.state
64
+
65
+ if @old_state
66
+ # Report services from `@old_state` that don't exist in `state` as expired
67
+ @old_state.reject { |k| state.key?(k) }.each do |service, _metric|
68
+ report(service: service.dup, state: 'expired')
69
+ end
70
+
71
+ # Report delta for services that have values in both `@old_state` and `state`
72
+ state.each do |service, metric|
73
+ next unless @old_state.key?(service)
74
+
75
+ delta = metric - @old_state[service]
76
+ svc_state = case service
77
+ when /drop$/
78
+ if delta.positive?
79
+ 'warning'
80
+ else
81
+ 'ok'
82
+ end
83
+ when /errs$/
84
+ if delta.positive?
85
+ 'warning'
86
+ else
87
+ 'ok'
88
+ end
89
+ else
90
+ 'ok'
91
+ end
92
+
93
+ report(
94
+ service: service.dup,
95
+ metric: (delta.to_f / opts[:interval]),
96
+ state: svc_state,
97
+ )
98
+ end
99
+ end
100
+
101
+ @old_state = state
102
+ end
103
+ end
104
+ end
105
+ end