riemann-tools 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +2 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.ruby-version +1 -0
  6. data/CHANGELOG.md +25 -2
  7. data/Rakefile +10 -3
  8. data/bin/riemann-apache-status +1 -106
  9. data/bin/riemann-bench +2 -70
  10. data/bin/riemann-cloudant +1 -56
  11. data/bin/riemann-consul +1 -106
  12. data/bin/riemann-dir-files-count +1 -55
  13. data/bin/riemann-dir-space +1 -55
  14. data/bin/riemann-diskstats +1 -92
  15. data/bin/riemann-fd +2 -81
  16. data/bin/riemann-freeswitch +2 -119
  17. data/bin/riemann-haproxy +1 -58
  18. data/bin/riemann-health +0 -2
  19. data/bin/riemann-kvminstance +2 -22
  20. data/bin/riemann-memcached +1 -37
  21. data/bin/riemann-net +0 -2
  22. data/bin/riemann-nginx-status +1 -85
  23. data/bin/riemann-ntp +0 -2
  24. data/bin/riemann-portcheck +1 -44
  25. data/bin/riemann-proc +1 -108
  26. data/bin/riemann-varnish +1 -54
  27. data/bin/riemann-wrapper +75 -0
  28. data/bin/riemann-zookeeper +1 -39
  29. data/lib/riemann/tools/apache_status.rb +107 -0
  30. data/lib/riemann/tools/bench.rb +72 -0
  31. data/lib/riemann/tools/cloudant.rb +57 -0
  32. data/lib/riemann/tools/consul_health.rb +107 -0
  33. data/lib/riemann/tools/dir_files_count.rb +56 -0
  34. data/lib/riemann/tools/dir_space.rb +56 -0
  35. data/lib/riemann/tools/diskstats.rb +94 -0
  36. data/lib/riemann/tools/fd.rb +81 -0
  37. data/lib/riemann/tools/freeswitch.rb +119 -0
  38. data/lib/riemann/tools/haproxy.rb +59 -0
  39. data/lib/riemann/tools/health.rb +150 -19
  40. data/lib/riemann/tools/kvm.rb +23 -0
  41. data/lib/riemann/tools/memcached.rb +38 -0
  42. data/lib/riemann/tools/net.rb +2 -1
  43. data/lib/riemann/tools/nginx_status.rb +86 -0
  44. data/lib/riemann/tools/ntp.rb +1 -0
  45. data/lib/riemann/tools/portcheck.rb +45 -0
  46. data/lib/riemann/tools/proc.rb +109 -0
  47. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  48. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  49. data/lib/riemann/tools/varnish.rb +55 -0
  50. data/lib/riemann/tools/version.rb +1 -1
  51. data/lib/riemann/tools/zookeeper.rb +40 -0
  52. data/lib/riemann/tools.rb +2 -20
  53. data/riemann-tools.gemspec +4 -1
  54. data/tools/riemann-aws/Rakefile +6 -9
  55. data/tools/riemann-aws/bin/riemann-aws-billing +2 -87
  56. data/tools/riemann-aws/bin/riemann-aws-rds-status +2 -62
  57. data/tools/riemann-aws/bin/riemann-aws-sqs-status +2 -44
  58. data/tools/riemann-aws/bin/riemann-aws-status +2 -77
  59. data/tools/riemann-aws/bin/riemann-elb-metrics +2 -162
  60. data/tools/riemann-aws/bin/riemann-s3-list +2 -81
  61. data/tools/riemann-aws/bin/riemann-s3-status +2 -96
  62. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  63. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  64. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  65. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  66. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  67. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  69. data/tools/riemann-chronos/Rakefile +6 -9
  70. data/tools/riemann-chronos/bin/riemann-chronos +1 -154
  71. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  72. data/tools/riemann-docker/Rakefile +5 -8
  73. data/tools/riemann-docker/bin/riemann-docker +2 -200
  74. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  75. data/tools/riemann-elasticsearch/Rakefile +6 -9
  76. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +1 -167
  77. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  78. data/tools/riemann-marathon/Rakefile +6 -9
  79. data/tools/riemann-marathon/bin/riemann-marathon +1 -156
  80. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  81. data/tools/riemann-mesos/Rakefile +6 -9
  82. data/tools/riemann-mesos/bin/riemann-mesos +1 -139
  83. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  84. data/tools/riemann-munin/Rakefile +5 -8
  85. data/tools/riemann-munin/bin/riemann-munin +1 -36
  86. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  87. data/tools/riemann-rabbitmq/Rakefile +6 -9
  88. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +1 -266
  89. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  90. data/tools/riemann-riak/Rakefile +5 -8
  91. data/tools/riemann-riak/bin/riemann-riak +1 -316
  92. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  93. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  94. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  95. metadata +57 -10
  96. data/.travis.yml +0 -31
  97. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  98. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  99. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
@@ -2,7 +2,9 @@
2
2
 
3
3
  require 'riemann/tools'
4
4
  require 'riemann/tools/utils'
5
+ require 'riemann/tools/uptime_parser.tab'
5
6
 
7
+ # Reports current CPU, disk, load average, and memory use to riemann.
6
8
  module Riemann
7
9
  module Tools
8
10
  class Health
@@ -14,12 +16,18 @@ module Riemann
14
16
  opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
15
17
  opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
16
18
  opt :disk_ignorefs, 'A list of filesystem types to ignore',
17
- default: %w[anon_inodefs autofs cd9660 devfs devtmpfs fdescfs iso9660 linprocfs linsysfs nfs overlay procfs tmpfs]
19
+ default: %w[anon_inodefs autofs cd9660 devfs devtmpfs fdescfs iso9660 linprocfs linsysfs nfs overlay procfs squashfs tmpfs]
18
20
  opt :load_warning, 'Load warning threshold (load average / core)', default: 3.0
19
21
  opt :load_critical, 'Load critical threshold (load average / core)', default: 8.0
20
22
  opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
21
23
  opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
22
- opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu load memory disk]
24
+ opt :uptime_warning, 'Uptime warning threshold', default: 86_400
25
+ opt :uptime_critical, 'Uptime critical threshold', default: 3600
26
+ opt :users_warning, 'Users warning threshold', default: 1
27
+ opt :users_critical, 'Users critical threshold', default: 1
28
+ opt :swap_warning, 'Swap warning threshold', default: 0.4
29
+ opt :swap_critical, 'Swap critical threshold', default: 0.5
30
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu load memory disk swap]
23
31
 
24
32
  def initialize
25
33
  @limits = {
@@ -27,6 +35,9 @@ module Riemann
27
35
  disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
28
36
  load: { critical: opts[:load_critical], warning: opts[:load_warning] },
29
37
  memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
38
+ uptime: { critical: opts[:uptime_critical], warning: opts[:uptime_warning] },
39
+ users: { critical: opts[:users_critical], warning: opts[:users_warning] },
40
+ swap: { critical: opts[:swap_critical], warning: opts[:swap_warning] },
30
41
  }
31
42
  case (@ostype = `uname -s`.chomp.downcase)
32
43
  when 'darwin'
@@ -35,25 +46,32 @@ module Riemann
35
46
  @disk = method :disk
36
47
  @load = method :darwin_load
37
48
  @memory = method :darwin_memory
38
- darwin_top
49
+ @uptime = method :bsd_uptime
50
+ @swap = method :bsd_swap
39
51
  when 'freebsd'
40
52
  @cores = `sysctl -n hw.ncpu`.to_i
41
53
  @cpu = method :freebsd_cpu
42
54
  @disk = method :disk
43
55
  @load = method :bsd_load
44
56
  @memory = method :freebsd_memory
57
+ @uptime = method :bsd_uptime
58
+ @swap = method :bsd_swap
45
59
  when 'openbsd'
46
60
  @cores = `sysctl -n hw.ncpu`.to_i
47
61
  @cpu = method :openbsd_cpu
48
62
  @disk = method :disk
49
63
  @load = method :bsd_load
50
64
  @memory = method :openbsd_memory
65
+ @uptime = method :bsd_uptime
66
+ @swap = method :bsd_swap
51
67
  when 'sunos'
52
68
  @cores = `mpstat -a 2>/dev/null`.split[33].to_i
53
69
  @cpu = method :sunos_cpu
54
70
  @disk = method :disk
55
71
  @load = method :bsd_load
56
72
  @memory = method :sunos_memory
73
+ @uptime = method :bsd_uptime
74
+ @swap = method :bsd_swap
57
75
  else
58
76
  @cores = `nproc`.to_i
59
77
  puts "WARNING: OS '#{@ostype}' not explicitly supported. Falling back to Linux" unless @ostype == 'linux'
@@ -61,8 +79,11 @@ module Riemann
61
79
  @disk = method :disk
62
80
  @load = method :linux_load
63
81
  @memory = method :linux_memory
82
+ @uptime = method :linux_uptime
83
+ @swap = method :linux_swap
64
84
  @supports_exclude_type = `df --help 2>&1 | grep -e "--exclude-type"` != ''
65
85
  end
86
+ @users = method :users
66
87
 
67
88
  opts[:checks].each do |check|
68
89
  case check
@@ -74,6 +95,12 @@ module Riemann
74
95
  @cpu_enabled = true
75
96
  when 'memory'
76
97
  @memory_enabled = true
98
+ when 'uptime'
99
+ @uptime_enabled = true
100
+ when 'users'
101
+ @users_enabled = true
102
+ when 'swap'
103
+ @swap_enabled = true
77
104
  end
78
105
  end
79
106
  end
@@ -99,6 +126,30 @@ module Riemann
99
126
  end
100
127
  end
101
128
 
129
+ def report_int(service, value, report)
130
+ return unless value
131
+
132
+ if value >= @limits[service][:critical]
133
+ alert service, :critical, value, "#{value} #{report}"
134
+ elsif value >= @limits[service][:warning]
135
+ alert service, :warning, value, "#{value} #{report}"
136
+ else
137
+ alert service, :ok, value, "#{value} #{report}"
138
+ end
139
+ end
140
+
141
+ def report_uptime(uptime)
142
+ description = uptime_to_human(uptime)
143
+
144
+ if uptime < @limits[:uptime][:critical]
145
+ alert 'uptime', :critical, uptime, description
146
+ elsif uptime < @limits[:uptime][:warning]
147
+ alert 'uptime', :warning, uptime, description
148
+ else
149
+ alert 'uptime', :ok, uptime, description
150
+ end
151
+ end
152
+
102
153
  def linux_cpu
103
154
  new = File.read('/proc/stat')
104
155
  unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
@@ -205,9 +256,16 @@ module Riemann
205
256
  @old_cpu = [u2, s2, t2, i2]
206
257
  end
207
258
 
259
+ def uptime_parser
260
+ @uptime_parser ||= UptimeParser.new
261
+ end
262
+
263
+ def uptime
264
+ @cached_data[:uptime] ||= uptime_parser.parse(`uptime`)
265
+ end
266
+
208
267
  def bsd_load
209
- m = `uptime`.split(':')[-1].chomp.gsub(/\s+/, '').split(',')
210
- load = m[0].to_f / @cores
268
+ load = uptime[:load_averages][1] / @cores
211
269
  if load > @limits[:load][:critical]
212
270
  alert 'load', :critical, load, "1-minute load average/core is #{load}"
213
271
  elsif load > @limits[:load][:warning]
@@ -240,47 +298,50 @@ module Riemann
240
298
  end
241
299
 
242
300
  def darwin_top
301
+ return @cached_data[:darwin_top] if @cached_data[:darwin_top]
302
+
243
303
  raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
244
- @topdata = { stamp: Time.now.to_i }
304
+ topdata = {}
245
305
  raw.each_line do |ln|
246
306
  if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
247
- @topdata[:load] = Regexp.last_match(1).to_f
307
+ topdata[:load] = Regexp.last_match(1).to_f
248
308
  elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
249
- @topdata[:cpu] = 1 - (Regexp.last_match(1).to_f / 100)
309
+ topdata[:cpu] = 1 - (Regexp.last_match(1).to_f / 100)
250
310
  elsif (mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i))
251
311
  wired = mdat[1].to_i * (1024**'BKMGT'.index(mdat[2]))
252
312
  active = mdat[3].to_i * (1024**'BKMGT'.index(mdat[4]))
253
313
  inactive = mdat[5].to_i * (1024**'BKMGT'.index(mdat[6]))
254
314
  used = mdat[7].to_i * (1024**'BKMGT'.index(mdat[8]))
255
315
  free = mdat[9].to_i * (1024**'BKMGT'.index(mdat[10]))
256
- @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
316
+ topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
257
317
  # This is for OSX Mavericks which
258
318
  # uses a different format for top
259
319
  # Example: PhysMem: 4662M used (1328M wired), 2782M unused.
260
320
  elsif (mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) used \([0-9]+[BKMGT] wired\), ([0-9]+)([BKMGT]) unused/i))
261
321
  used = mdat[1].to_i * (1024**'BKMGT'.index(mdat[2]))
262
322
  unused = mdat[3].to_i * (1024**'BKMGT'.index(mdat[4]))
263
- @topdata[:memory] = used.to_f / (used + unused)
323
+ topdata[:memory] = used.to_f / (used + unused)
264
324
  end
265
325
  end
326
+ @cached_data[:darwin_top] = topdata
266
327
  end
267
328
 
268
329
  def darwin_cpu
269
- darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
270
- unless @topdata[:cpu]
330
+ topdata = darwin_top
331
+ unless topdata[:cpu]
271
332
  alert 'cpu', :unknown, nil, 'unable to get CPU stats from top'
272
333
  return false
273
334
  end
274
- report_pct :cpu, @topdata[:cpu], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pcpu,pid,comm`)}"
335
+ report_pct :cpu, topdata[:cpu], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pcpu,pid,comm`)}"
275
336
  end
276
337
 
277
338
  def darwin_load
278
- darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
279
- unless @topdata[:load]
339
+ topdata = darwin_top
340
+ unless topdata[:load]
280
341
  alert 'load', :unknown, nil, 'unable to get load ave from top'
281
342
  return false
282
343
  end
283
- metric = @topdata[:load] / @cores
344
+ metric = topdata[:load] / @cores
284
345
  if metric > @limits[:load][:critical]
285
346
  alert 'load', :critical, metric, "1-minute load average per core is #{metric}"
286
347
  elsif metric > @limits[:load][:warning]
@@ -291,12 +352,12 @@ module Riemann
291
352
  end
292
353
 
293
354
  def darwin_memory
294
- darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
295
- unless @topdata[:memory]
355
+ topdata = darwin_top
356
+ unless topdata[:memory]
296
357
  alert 'memory', :unknown, nil, 'unable to get memory data from top'
297
358
  return false
298
359
  end
299
- report_pct :memory, @topdata[:memory], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pmem,pid,comm`)}"
360
+ report_pct :memory, topdata[:memory], "usage\n\n#{reverse_numeric_sort_with_header(`ps -eo pmem,pid,comm`)}"
300
361
  end
301
362
 
302
363
  def df
@@ -336,11 +397,81 @@ module Riemann
336
397
  end
337
398
  end
338
399
 
400
+ def bsd_uptime
401
+ value = uptime[:uptime]
402
+
403
+ report_uptime(value)
404
+ end
405
+
406
+ def linux_uptime
407
+ value = File.read('/proc/uptime').split(/\s+/)[0].to_f
408
+
409
+ report_uptime(value)
410
+ end
411
+
412
+ def users
413
+ value = uptime[:users]
414
+
415
+ report_int(:users, value, "user#{'s' if value != 1}")
416
+ end
417
+
418
+ def bsd_swap
419
+ _device, blocks, used, _avail, _capacity = `swapinfo`.lines.last.split(/\s+/)
420
+
421
+ value = Float(used) / Integer(blocks)
422
+
423
+ report_pct :swap, value, 'used'
424
+ rescue ArgumentError
425
+ # Ignore
426
+ end
427
+
428
+ def linux_swap
429
+ total_size = 0.0
430
+ total_used = 0.0
431
+
432
+ File.read('/proc/swaps').lines.each_with_index do |line, n|
433
+ next if n.zero?
434
+
435
+ _filename, _type, size, used, _priority = line.split(/\s+/)
436
+
437
+ total_size += size.to_f
438
+ total_used += used.to_f
439
+ end
440
+
441
+ return if total_size.zero?
442
+
443
+ value = total_used / total_size
444
+
445
+ report_pct :swap, value, 'used'
446
+ end
447
+
448
+ def uptime_to_human(value)
449
+ seconds = value.to_i
450
+ days = seconds / 86_400
451
+ seconds %= 86_400
452
+ hrs = seconds / 3600
453
+ seconds %= 3600
454
+ mins = seconds / 60
455
+ [
456
+ ("#{days} day#{'s' if days > 1}" unless days.zero?),
457
+ format('%<hrs>2d:%<mins>02d', hrs: hrs, mins: mins),
458
+ ].compact.join(' ')
459
+ end
460
+
339
461
  def tick
462
+ invalidate_cache
463
+
340
464
  @cpu.call if @cpu_enabled
341
465
  @memory.call if @memory_enabled
342
466
  @disk.call if @disk_enabled
343
467
  @load.call if @load_enabled
468
+ @uptime.call if @uptime_enabled
469
+ @users.call if @users_enabled
470
+ @swap.call if @swap_enabled
471
+ end
472
+
473
+ def invalidate_cache
474
+ @cached_data = {}
344
475
  end
345
476
  end
346
477
  end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ module Riemann
6
+ module Tools
7
+ class Kvm
8
+ include Riemann::Tools
9
+
10
+ def tick
11
+ # determine how many instances I have according to libvirt
12
+ kvm_instances = `LANG=C virsh list | grep -c running`
13
+
14
+ # submit them to riemann
15
+ report(
16
+ service: 'KVM Running VMs',
17
+ metric: kvm_instances.to_i,
18
+ state: 'info',
19
+ )
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Gathers memcached STATS and submits them to Riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Memcached
9
+ include Riemann::Tools
10
+ require 'socket'
11
+
12
+ opt :memcached_host, 'Memcached hostname', default: 'localhost'
13
+ opt :memcached_port, 'Memcached port', default: 11_211
14
+
15
+ def tick
16
+ sock = TCPSocket.new(opts[:memcached_host], opts[:memcached_port])
17
+ sock.print("stats\r\n")
18
+ sock.flush
19
+ stats = sock.gets
20
+
21
+ loop do
22
+ stats = sock.gets
23
+ break if stats.strip == 'END'
24
+
25
+ m = stats.match(/STAT (\w+) (\S+)/)
26
+ report(
27
+ host: opts[:memcached_host].dup,
28
+ service: "memcached #{m[1]}",
29
+ metric: m[2].to_f,
30
+ state: 'ok',
31
+ tags: ['memcached'],
32
+ )
33
+ end
34
+ sock.close
35
+ end
36
+ end
37
+ end
38
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'riemann/tools'
4
4
 
5
+ # Gathers network interface statistics and submits them to Riemann.
5
6
  module Riemann
6
7
  module Tools
7
8
  class Net
@@ -42,7 +43,7 @@ module Riemann
42
43
  'tx bytes',
43
44
  'tx packets',
44
45
  'tx errs',
45
- 'tx drops',
46
+ 'tx drop',
46
47
  'tx fifo',
47
48
  'tx colls',
48
49
  'tx carrier',
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Gathers nginx status stub statistics and submits them to Riemann.
6
+ # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
7
+ module Riemann
8
+ module Tools
9
+ class NginxStatus
10
+ include Riemann::Tools
11
+ require 'net/http'
12
+ require 'uri'
13
+
14
+ opt :uri, 'Nginx Stub Status URI', default: 'http://localhost:8080/nginx_status'
15
+ opt :checks, 'Which metrics to report.', type: :strings,
16
+ default: %w[active accepted handled requests reading writing waiting]
17
+ opt :active_warning, 'Active connections warning threshold', default: 0
18
+ opt :active_critical, 'Active connections critical threshold', default: 0
19
+ opt :reading_warning, 'Reading connections warning threshold', default: 0
20
+ opt :reading_critical, 'Reading connections critical threshold', default: 0
21
+ opt :writing_warning, 'Writing connections warning threshold', default: 0
22
+ opt :writing_critical, 'Writing connections critical threshold', default: 0
23
+ opt :waiting_warning, 'Waiting connections warning threshold', default: 0
24
+ opt :waiting_critical, 'Waiting connections critical threshold', default: 0
25
+
26
+ def initialize
27
+ @uri = URI.parse(opts[:uri])
28
+
29
+ # sample response:
30
+ #
31
+ # Active connections: 1
32
+ # server accepts handled requests
33
+ # 39 39 39
34
+ # Reading: 0 Writing: 1 Waiting: 0
35
+ @keys = %w[active accepted handled requests reading writing waiting]
36
+ @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
37
+ end
38
+
39
+ def state(key, value)
40
+ if opts.key? "#{key}_critical".to_sym
41
+ critical_threshold = opts["#{key}_critical".to_sym]
42
+ return 'critical' if critical_threshold.positive? && (value >= critical_threshold)
43
+ end
44
+
45
+ if opts.key? "#{key}_warning".to_sym
46
+ warning_threshold = opts["#{key}_warning".to_sym]
47
+ return 'warning' if warning_threshold.positive? && (value >= warning_threshold)
48
+ end
49
+
50
+ 'ok'
51
+ end
52
+
53
+ def tick
54
+ response = nil
55
+ begin
56
+ response = ::Net::HTTP.get(@uri)
57
+ rescue StandardError => e
58
+ report(
59
+ service: 'nginx health',
60
+ state: 'critical',
61
+ description: "Connection error: #{e.class} - #{e.message}",
62
+ )
63
+ end
64
+
65
+ return if response.nil?
66
+
67
+ report(
68
+ service: 'nginx health',
69
+ state: 'ok',
70
+ description: 'Nginx status connection ok',
71
+ )
72
+
73
+ values = @re.match(response).to_a[1, 7].map(&:to_i)
74
+
75
+ @keys.zip(values).each do |key, value|
76
+ report({
77
+ service: "nginx #{key}",
78
+ metric: value,
79
+ state: state(key, value),
80
+ tags: ['nginx'],
81
+ })
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'riemann/tools'
4
4
 
5
+ # Reports NTP stats to Riemann.
5
6
  module Riemann
6
7
  module Tools
7
8
  class Ntp
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Checks for open tcp ports.
6
+ # (c) Max Voit 2017
7
+ module Riemann
8
+ module Tools
9
+ class Portcheck
10
+ include Riemann::Tools
11
+ require 'socket'
12
+
13
+ opt :hostname, 'Host, defaults to localhost', default: `hostname`.chomp
14
+ opt :ports, "List of ports to check, e.g. '-r 80 443'", type: :ints
15
+
16
+ def initialize
17
+ @hostname = opts.fetch(:hostname)
18
+ @ports = opts.fetch(:ports)
19
+ end
20
+
21
+ def tick
22
+ @ports.each do |thisport|
23
+ # try opening tcp connection with 5s timeout;
24
+ # if this fails, the port is considered closed
25
+ portopen = begin
26
+ Socket.tcp(@hostname, thisport, connect_timeout: 5) { true }
27
+ rescue StandardError
28
+ false
29
+ end
30
+ state = if portopen
31
+ 'ok'
32
+ else
33
+ 'critical'
34
+ end
35
+ report(
36
+ host: @hostname.to_s,
37
+ service: "port #{thisport}",
38
+ state: state.to_s,
39
+ tags: ['portcheck'],
40
+ )
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Reports running process count to riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Proc
9
+ include Riemann::Tools
10
+
11
+ opt :proc_regex, 'regular expression that matches the process to be monitored', type: :string, default: '.*'
12
+ opt :proc_min_critical, 'running process count minimum', default: 0
13
+ opt :proc_max_critical, 'running process count maximum', default: 65_536
14
+
15
+ def initialize
16
+ @limits = { critical: { min: opts[:proc_min_critical], max: opts[:proc_max_critical] } }
17
+
18
+ abort 'FATAL: specify a process regular expression, see --help for usage' unless opts[:proc_regex]
19
+
20
+ ostype = `uname -s`.chomp.downcase
21
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == 'linux'
22
+ @check = method :linux_proc
23
+ end
24
+
25
+ def alert(service, state, metric, description)
26
+ report(
27
+ service: service.to_s,
28
+ state: state.to_s,
29
+ metric: metric.to_f,
30
+ description: description,
31
+ )
32
+ end
33
+
34
+ def linux_proc
35
+ process = opts[:proc_regex]
36
+ found = `ps axo pid=,rss=,vsize=,state=,cputime=,lstart=,command= | grep '#{process}' | grep -v grep | grep -v riemann-proc`
37
+ running = found.count("\n")
38
+ if (running > @limits[:critical][:max]) || (running < @limits[:critical][:min])
39
+ alert "proc count/#{process}", :critical, running, "process #{process} is running #{running} instances.\n"
40
+ else
41
+ alert "proc count/#{process}", :ok, running, "process #{process} is running #{running} instances.\n"
42
+ end
43
+ # Iterate on all the lines and create an entry for the following metrics:
44
+ #
45
+ # process/<pid>-<start-time>/rss
46
+ # process/<pid>-<start-time>/vsize
47
+ # process/<pid>-<start-time>/running
48
+ # process/<pid>-<start-time>/cputime
49
+ #
50
+ # description should contain the command itself.
51
+ # value should be either process RSS, VSIZE, or 1 if running
52
+ # state is always unknown for the moment
53
+ #
54
+ ps_regex = /([0-9]+) +([0-9]+) +([0-9]+) +([A-Z]) +([0-9:.]+) +[A-Za-z]{3} +([A-Za-z]{3} {1,2}[0-9]+ [0-9:]+ [0-9]+) +(.*)/
55
+ found.each_line do |line|
56
+ m = ps_regex.match(line)
57
+ next if m.nil?
58
+
59
+ pid, rss, vsize, state, cputime, start, command = m.captures
60
+ start_s = DateTime.parse(start, 'Mmm DD HH:MM:ss YYYY').to_time.to_i
61
+ cputime_s = DateTime.parse(cputime, '%H:%M:%S')
62
+ cputime_seconds = (cputime_s.hour * 3600) + (cputime_s.minute * 60) + cputime_s.second
63
+ running = 0
64
+ case state[0]
65
+ when 'R'
66
+ state_s = 'ok'
67
+ running = 1
68
+ when 'S'
69
+ state_s = 'ok'
70
+ when 'I'
71
+ state_s = 'warning'
72
+ when 'T', 'U', 'Z'
73
+ state_s = 'critical'
74
+ else
75
+ state_s = 'unknown'
76
+ end
77
+ report(
78
+ service: "proc #{pid}-#{start_s}/rss",
79
+ state: state_s.to_s,
80
+ metric: rss.to_f,
81
+ description: command,
82
+ )
83
+ report(
84
+ service: "proc #{pid}-#{start_s}/vsize",
85
+ state: state_s.to_s,
86
+ metric: vsize.to_f,
87
+ description: command,
88
+ )
89
+ report(
90
+ service: "proc #{pid}-#{start_s}/running",
91
+ state: state_s.to_s,
92
+ metric: running.to_f,
93
+ description: command,
94
+ )
95
+ report(
96
+ service: "proc #{pid}-#{start_s}/cputime",
97
+ state: state_s.to_s,
98
+ metric: cputime_seconds,
99
+ description: command,
100
+ )
101
+ end
102
+ end
103
+
104
+ def tick
105
+ @check.call
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+
5
+ require 'riemann/client'
6
+
7
+ module Riemann
8
+ module Tools
9
+ class RiemannClientWrapper
10
+ include Singleton
11
+
12
+ def initialize
13
+ @client = nil
14
+ end
15
+
16
+ def configure(options)
17
+ return self unless @client.nil?
18
+
19
+ r = Riemann::Client.new(
20
+ host: options[:host],
21
+ port: options[:port],
22
+ timeout: options[:timeout],
23
+ ssl: options[:tls],
24
+ key_file: options[:tls_key],
25
+ cert_file: options[:tls_cert],
26
+ ca_file: options[:tls_ca_cert],
27
+ ssl_verify: options[:tls_verify],
28
+ )
29
+
30
+ @client = if options[:tcp] || options[:tls]
31
+ r.tcp
32
+ else
33
+ r
34
+ end
35
+ self
36
+ end
37
+
38
+ def <<(event)
39
+ @client << event
40
+ end
41
+ end
42
+ end
43
+ end