riemann-tools 1.1.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +2 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.ruby-version +1 -0
  6. data/CHANGELOG.md +60 -2
  7. data/Rakefile +17 -3
  8. data/bin/riemann-apache-status +1 -106
  9. data/bin/riemann-bench +2 -70
  10. data/bin/riemann-cloudant +1 -56
  11. data/bin/riemann-consul +1 -106
  12. data/bin/riemann-dir-files-count +1 -55
  13. data/bin/riemann-dir-space +1 -55
  14. data/bin/riemann-diskstats +1 -92
  15. data/bin/riemann-fd +2 -81
  16. data/bin/riemann-freeswitch +2 -119
  17. data/bin/riemann-haproxy +1 -58
  18. data/bin/riemann-health +0 -2
  19. data/bin/riemann-kvminstance +2 -22
  20. data/bin/riemann-md +8 -0
  21. data/bin/riemann-memcached +1 -37
  22. data/bin/riemann-net +0 -2
  23. data/bin/riemann-nginx-status +1 -85
  24. data/bin/riemann-ntp +0 -2
  25. data/bin/riemann-portcheck +1 -44
  26. data/bin/riemann-proc +1 -108
  27. data/bin/riemann-varnish +1 -54
  28. data/bin/riemann-wrapper +113 -0
  29. data/bin/riemann-zookeeper +1 -39
  30. data/bin/riemann-zpool +8 -0
  31. data/lib/riemann/tools/apache_status.rb +107 -0
  32. data/lib/riemann/tools/bench.rb +72 -0
  33. data/lib/riemann/tools/cloudant.rb +57 -0
  34. data/lib/riemann/tools/consul_health.rb +107 -0
  35. data/lib/riemann/tools/dir_files_count.rb +56 -0
  36. data/lib/riemann/tools/dir_space.rb +56 -0
  37. data/lib/riemann/tools/diskstats.rb +94 -0
  38. data/lib/riemann/tools/fd.rb +81 -0
  39. data/lib/riemann/tools/freeswitch.rb +119 -0
  40. data/lib/riemann/tools/haproxy.rb +59 -0
  41. data/lib/riemann/tools/health.rb +150 -19
  42. data/lib/riemann/tools/kvm.rb +23 -0
  43. data/lib/riemann/tools/md.rb +35 -0
  44. data/lib/riemann/tools/mdstat_parser.tab.rb +340 -0
  45. data/lib/riemann/tools/memcached.rb +38 -0
  46. data/lib/riemann/tools/net.rb +2 -1
  47. data/lib/riemann/tools/nginx_status.rb +86 -0
  48. data/lib/riemann/tools/ntp.rb +1 -0
  49. data/lib/riemann/tools/portcheck.rb +45 -0
  50. data/lib/riemann/tools/proc.rb +109 -0
  51. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  52. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  53. data/lib/riemann/tools/varnish.rb +55 -0
  54. data/lib/riemann/tools/version.rb +1 -1
  55. data/lib/riemann/tools/zookeeper.rb +40 -0
  56. data/lib/riemann/tools/zpool.rb +40 -0
  57. data/lib/riemann/tools.rb +2 -20
  58. data/riemann-tools.gemspec +10 -1
  59. data/tools/riemann-aws/Rakefile +6 -9
  60. data/tools/riemann-aws/bin/riemann-aws-billing +2 -87
  61. data/tools/riemann-aws/bin/riemann-aws-rds-status +2 -62
  62. data/tools/riemann-aws/bin/riemann-aws-sqs-status +2 -44
  63. data/tools/riemann-aws/bin/riemann-aws-status +2 -77
  64. data/tools/riemann-aws/bin/riemann-elb-metrics +2 -162
  65. data/tools/riemann-aws/bin/riemann-s3-list +2 -81
  66. data/tools/riemann-aws/bin/riemann-s3-status +2 -96
  67. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  69. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  70. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  71. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  72. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  73. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  74. data/tools/riemann-chronos/Rakefile +6 -9
  75. data/tools/riemann-chronos/bin/riemann-chronos +1 -154
  76. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  77. data/tools/riemann-docker/Rakefile +5 -8
  78. data/tools/riemann-docker/bin/riemann-docker +2 -200
  79. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  80. data/tools/riemann-elasticsearch/Rakefile +6 -9
  81. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +1 -167
  82. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  83. data/tools/riemann-marathon/Rakefile +6 -9
  84. data/tools/riemann-marathon/bin/riemann-marathon +1 -156
  85. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  86. data/tools/riemann-mesos/Rakefile +6 -9
  87. data/tools/riemann-mesos/bin/riemann-mesos +1 -139
  88. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  89. data/tools/riemann-munin/Rakefile +5 -8
  90. data/tools/riemann-munin/bin/riemann-munin +1 -36
  91. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  92. data/tools/riemann-rabbitmq/Rakefile +6 -9
  93. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +1 -266
  94. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  95. data/tools/riemann-riak/Rakefile +5 -8
  96. data/tools/riemann-riak/bin/riemann-riak +1 -316
  97. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  98. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  99. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  100. metadata +64 -10
  101. data/.travis.yml +0 -31
  102. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  103. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  104. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
data/bin/riemann-proc CHANGED
@@ -3,113 +3,6 @@
3
3
 
4
4
  Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- # Reports running process count to riemann.
7
-
8
- require File.expand_path('../lib/riemann/tools', __dir__)
9
-
10
- module Riemann
11
- module Tools
12
- class Proc
13
- include Riemann::Tools
14
-
15
- opt :proc_regex, 'regular expression that matches the process to be monitored', type: :string, default: '.*'
16
- opt :proc_min_critical, 'running process count minimum', default: 0
17
- opt :proc_max_critical, 'running process count maximum', default: 65_536
18
-
19
- def initialize
20
- @limits = { critical: { min: opts[:proc_min_critical], max: opts[:proc_max_critical] } }
21
-
22
- abort 'FATAL: specify a process regular expression, see --help for usage' unless opts[:proc_regex]
23
-
24
- ostype = `uname -s`.chomp.downcase
25
- puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == 'linux'
26
- @check = method :linux_proc
27
- end
28
-
29
- def alert(service, state, metric, description)
30
- report(
31
- service: service.to_s,
32
- state: state.to_s,
33
- metric: metric.to_f,
34
- description: description,
35
- )
36
- end
37
-
38
- def linux_proc
39
- process = opts[:proc_regex]
40
- found = `ps axo pid=,rss=,vsize=,state=,cputime=,lstart=,command= | grep '#{process}' | grep -v grep | grep -v riemann-proc`
41
- running = found.count("\n")
42
- if (running > @limits[:critical][:max]) || (running < @limits[:critical][:min])
43
- alert "proc count/#{process}", :critical, running, "process #{process} is running #{running} instances.\n"
44
- else
45
- alert "proc count/#{process}", :ok, running, "process #{process} is running #{running} instances.\n"
46
- end
47
- # Iterate on all the lines and create an entry for the following metrics:
48
- #
49
- # process/<pid>-<start-time>/rss
50
- # process/<pid>-<start-time>/vsize
51
- # process/<pid>-<start-time>/running
52
- # process/<pid>-<start-time>/cputime
53
- #
54
- # description should contain the command itself.
55
- # value should be either process RSS, VSIZE, or 1 if running
56
- # state is always unknown for the moment
57
- #
58
- ps_regex = /([0-9]+) +([0-9]+) +([0-9]+) +([A-Z]) +([0-9:.]+) +[A-Za-z]{3} +([A-Za-z]{3} {1,2}[0-9]+ [0-9:]+ [0-9]+) +(.*)/
59
- found.each_line do |line|
60
- m = ps_regex.match(line)
61
- next if m.nil?
62
-
63
- pid, rss, vsize, state, cputime, start, command = m.captures
64
- start_s = DateTime.parse(start, 'Mmm DD HH:MM:ss YYYY').to_time.to_i
65
- cputime_s = DateTime.parse(cputime, '%H:%M:%S')
66
- cputime_seconds = (cputime_s.hour * 3600) + (cputime_s.minute * 60) + cputime_s.second
67
- running = 0
68
- case state[0]
69
- when 'R'
70
- state_s = 'ok'
71
- running = 1
72
- when 'S'
73
- state_s = 'ok'
74
- when 'I'
75
- state_s = 'warning'
76
- when 'T', 'U', 'Z'
77
- state_s = 'critical'
78
- else
79
- state_s = 'unknown'
80
- end
81
- report(
82
- service: "proc #{pid}-#{start_s}/rss",
83
- state: state_s.to_s,
84
- metric: rss.to_f,
85
- description: command,
86
- )
87
- report(
88
- service: "proc #{pid}-#{start_s}/vsize",
89
- state: state_s.to_s,
90
- metric: vsize.to_f,
91
- description: command,
92
- )
93
- report(
94
- service: "proc #{pid}-#{start_s}/running",
95
- state: state_s.to_s,
96
- metric: running.to_f,
97
- description: command,
98
- )
99
- report(
100
- service: "proc #{pid}-#{start_s}/cputime",
101
- state: state_s.to_s,
102
- metric: cputime_seconds,
103
- description: command,
104
- )
105
- end
106
- end
107
-
108
- def tick
109
- @check.call
110
- end
111
- end
112
- end
113
- end
6
+ require 'riemann/tools/proc'
114
7
 
115
8
  Riemann::Tools::Proc.run
data/bin/riemann-varnish CHANGED
@@ -3,59 +3,6 @@
3
3
 
4
4
  Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- # Reports varnish stats to Riemann.
7
-
8
- require 'open3'
9
- require File.expand_path('../lib/riemann/tools', __dir__)
10
-
11
- module Riemann
12
- module Tools
13
- class Varnish
14
- include Riemann::Tools
15
-
16
- opt :varnish_host, 'Varnish hostname', default: `hostname`.chomp
17
-
18
- def initialize
19
- cmd = 'varnishstat -V'
20
- Open3.popen3(cmd) do |_stdin, _stdout, stderr, _wait_thr|
21
- @ver = /varnishstat \(varnish-(\d+)/.match(stderr.read)[1].to_i
22
- end
23
-
24
- @vstats = if @ver >= 4
25
- ['MAIN.sess_conn',
26
- 'MAIN.sess_drop ',
27
- 'MAIN.client_req',
28
- 'MAIN.cache_hit',
29
- 'MAIN.cache_miss',]
30
- else
31
- %w[client_conn
32
- client_drop
33
- client_req
34
- cache_hit
35
- cache_miss]
36
- end
37
- end
38
-
39
- def tick
40
- stats = if @ver >= 4
41
- `varnishstat -1 -f #{@vstats.join(' -f ')}`
42
- else
43
- `varnishstat -1 -f #{@vstats.join(',')}`
44
- end
45
- stats.each_line do |stat|
46
- m = stat.split
47
- report(
48
- host: opts[:varnish_host].dup,
49
- service: "varnish #{m[0]}",
50
- metric: m[1].to_f,
51
- state: 'ok',
52
- description: m[3..].join(' ').to_s,
53
- tags: ['varnish'],
54
- )
55
- end
56
- end
57
- end
58
- end
59
- end
6
+ require 'riemann/tools/varnish'
60
7
 
61
8
  Riemann::Tools::Varnish.run
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ def camelize(subject)
7
+ subject.gsub(%r{(^|[/_])[a-z]}) { |x| x.sub('/', '::').sub('_', '').upcase }
8
+ end
9
+
10
+ def underscore(subject)
11
+ subject.split(/(?=[A-Z])/).map(&:downcase).join('_').gsub('::_', '/')
12
+ end
13
+
14
+ def constantize(subject)
15
+ Object.const_get(subject)
16
+ end
17
+
18
+ def read_flags(argv)
19
+ res = []
20
+
21
+ while (arg = argv.shift)
22
+ break if arg == '--'
23
+
24
+ res << arg
25
+ end
26
+
27
+ res
28
+ end
29
+
30
+ def usage
31
+ warn <<~USAGE
32
+ usage: riemann-wrapper [common options] -- tool1 [tool1 options] [-- tool2 [tool2 options] ...]
33
+ riemann-wrapper /path/to/configuration/file.yml
34
+
35
+ Run multiple Riemann tools in a single process. A single connection to
36
+ riemann is maintained and shared for all tools, the connection flags should
37
+ only be passed as common options.
38
+
39
+ Examples:
40
+ 1. Run the fd, health and ntp tools with default options:
41
+
42
+ riemann-wrapper -- fd -- health -- ntp
43
+
44
+ 2. Run the fd, health and ntp tools against a remote riemann server using
45
+ TCP and tagging each event with the name of the tool that produced it:
46
+
47
+ riemann-wrapper --host riemann.example.com --tcp -- \\
48
+ fd --tag=fd -- \\
49
+ health --tag=health -- \\
50
+ ntp --tag=ntp
51
+
52
+ 3. Same as above example, but using a configuration file (more verbose but
53
+ easier to handle when running riemann-wrapper manually of managing it
54
+ with a Configuration Management system):
55
+
56
+ cat > config.yml << EOT
57
+ ---
58
+ options: --host riemann.example.com --tcp
59
+ tools:
60
+ - name: fd
61
+ options: --tag=fd
62
+ - name: health
63
+ options: --tag=health
64
+ - name: ntp
65
+ options: --tag=ntp
66
+ EOT
67
+ riemann-wrapper config.yml
68
+ USAGE
69
+ exit 1
70
+ end
71
+
72
+ usage if ARGV.empty?
73
+
74
+ if ARGV.size == 1
75
+ unless File.readable?(ARGV[0])
76
+ warn "Cannot open file for reading: #{ARGV[0]}"
77
+ usage
78
+ end
79
+
80
+ require 'yaml'
81
+ config = YAML.safe_load(File.read(ARGV[0]))
82
+
83
+ commandline = config['options']
84
+ config['tools'].each { |tool| commandline << " -- #{tool['name']} #{tool['options']}" }
85
+
86
+ ARGV.replace(commandline.split)
87
+ end
88
+
89
+ argv = ARGV.dup
90
+
91
+ common_argv = read_flags(argv)
92
+
93
+ threads = []
94
+
95
+ # Terminate the whole process is some thread fail
96
+ Thread.abort_on_exception = true
97
+
98
+ while argv.any?
99
+ tool = argv.shift
100
+ tool_argv = read_flags(argv)
101
+
102
+ require "riemann/tools/#{tool}"
103
+ tool_class = constantize(camelize("riemann/tools/#{tool}"))
104
+
105
+ ARGV.replace(common_argv + tool_argv)
106
+ instance = tool_class.new
107
+ # Force evaluation of options. This rely on ARGV and needs to be done before
108
+ # we launch multiple threads which compete to read information from there.
109
+ instance.options
110
+ threads << Thread.new(instance, &:run)
111
+ end
112
+
113
+ threads.each(&:join)
@@ -3,44 +3,6 @@
3
3
 
4
4
  Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- # Gathers zookeeper STATS and submits them to Riemann.
7
-
8
- require File.expand_path('../lib/riemann/tools', __dir__)
9
-
10
- module Riemann
11
- module Tools
12
- class Zookeeper
13
- include Riemann::Tools
14
- require 'socket'
15
-
16
- opt :zookeeper_host, 'Zookeeper hostname', default: 'localhost'
17
- opt :zookeeper_port, 'Zookeeper port', default: 2181
18
-
19
- def tick
20
- sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
21
- sock.sync = true
22
- sock.print('mntr')
23
- sock.flush
24
-
25
- loop do
26
- stats = sock.gets
27
-
28
- break if stats.nil?
29
-
30
- m = stats.match(/^(\w+)\t+(.*)/)
31
-
32
- report(
33
- host: opts[:zookeeper_host].dup,
34
- service: "zookeeper #{m[1]}",
35
- metric: m[2].to_f,
36
- state: 'ok',
37
- tags: ['zookeeper'],
38
- )
39
- end
40
- sock.close
41
- end
42
- end
43
- end
44
- end
6
+ require 'riemann/tools/zookeeper'
45
7
 
46
8
  Riemann::Tools::Zookeeper.run
data/bin/riemann-zpool ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ require 'riemann/tools/zpool'
7
+
8
+ Riemann::Tools::Zpool.run
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Collects Apache metrics and submits them to Riemann
6
+ # More information can be found at http://httpd.apache.org/docs/2.4/mod/mod_status.html
7
+
8
+ # Removes whitespace from 'Total Accesses' and 'Total kBytes' for output to graphite
9
+ module Riemann
10
+ module Tools
11
+ class ApacheStatus
12
+ include Riemann::Tools
13
+ require 'net/http'
14
+ require 'uri'
15
+
16
+ opt :uri, 'Apache Server Status URI', default: 'http://localhost/server-status'
17
+
18
+ def initialize
19
+ @uri = "#{URI.parse(opts[:uri])}?auto"
20
+ # Sample Response with ExtendedStatus On
21
+ # Total Accesses: 20643
22
+ # Total kBytes: 36831
23
+ # CPULoad: .0180314
24
+ # Uptime: 43868
25
+ # ReqPerSec: .470571
26
+ # BytesPerSec: 859.737
27
+ # BytesPerReq: 1827.01
28
+ # BusyWorkers: 6
29
+ # IdleWorkers: 94
30
+ # Scoreboard: ___K_____K____________W_
31
+
32
+ @scoreboard_map = {
33
+ '_' => 'waiting',
34
+ 'S' => 'starting',
35
+ 'R' => 'reading',
36
+ 'W' => 'sending',
37
+ 'K' => 'keepalive',
38
+ 'D' => 'dns',
39
+ 'C' => 'closing',
40
+ 'L' => 'logging',
41
+ 'G' => 'graceful',
42
+ 'I' => 'idle',
43
+ '.' => 'open',
44
+ }
45
+ end
46
+
47
+ def get_scoreboard_metrics(response)
48
+ results = Hash.new(0)
49
+
50
+ response.slice! 'Scoreboard: '
51
+ response.each_char do |char|
52
+ results[char] += 1
53
+ end
54
+ results.transform_keys { |k| @scoreboard_map[k] }
55
+ end
56
+
57
+ def report_metrics(metrics)
58
+ metrics.each do |k, v|
59
+ report(
60
+ service: "httpd #{k}",
61
+ metric: v.to_f,
62
+ state: 'ok',
63
+ tags: ['httpd'],
64
+ )
65
+ end
66
+ end
67
+
68
+ def connection
69
+ response = nil
70
+ begin
71
+ response = ::Net::HTTP.get(@uri)
72
+ rescue StandardError => e
73
+ report(
74
+ service: 'httpd health',
75
+ state: 'critical',
76
+ description: "Httpd connection error: #{e.class} - #{e.message}",
77
+ tags: ['httpd'],
78
+ )
79
+ else
80
+ report(
81
+ service: 'httpd health',
82
+ state: 'ok',
83
+ description: 'Httpd connection status ok',
84
+ tags: ['httpd'],
85
+ )
86
+ end
87
+ response
88
+ end
89
+
90
+ def tick
91
+ return if (response = connection).nil?
92
+
93
+ response.each_line do |line|
94
+ metrics = {}
95
+
96
+ if line =~ /Scoreboard/
97
+ metrics = get_scoreboard_metrics(line.strip)
98
+ else
99
+ key, value = line.strip.split(':')
100
+ metrics[key.gsub(/\s/, '')] = value
101
+ end
102
+ report_metrics(metrics)
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'riemann/client'
5
+ require 'pp'
6
+
7
+ # Connects to a server (first arg) and populates it with a constant stream of
8
+ # events for testing.
9
+ module Riemann
10
+ module Tools
11
+ class Bench
12
+ attr_accessor :client, :hosts, :services, :states
13
+
14
+ def initialize
15
+ @hosts = [nil] + (0...10).map { |i| "host#{i}" }
16
+ @hosts = %w[a b c d e f g h i j]
17
+ @services = %w[test1 test2 test3 foo bar baz xyzzy attack cat treat]
18
+ @states = {}
19
+ @client = Riemann::Client.new(host: (ARGV.first || 'localhost'))
20
+ end
21
+
22
+ def evolve(state)
23
+ m = state[:metric] + (rand - 0.5) * 0.1
24
+ m = [[0, m].max, 1].min
25
+
26
+ s = case m
27
+ when 0...0.75
28
+ 'ok'
29
+ when 0.75...0.9
30
+ 'warning'
31
+ when 0.9..1.0
32
+ 'critical'
33
+ end
34
+
35
+ {
36
+ metric: m,
37
+ state: s,
38
+ host: state[:host],
39
+ service: state[:service],
40
+ description: "at #{Time.now}",
41
+ }
42
+ end
43
+
44
+ def tick
45
+ # pp @states
46
+ hosts.product(services).each do |id|
47
+ client << (states[id] = evolve(states[id]))
48
+ end
49
+ end
50
+
51
+ def run
52
+ start
53
+ loop do
54
+ sleep 0.05
55
+ tick
56
+ end
57
+ end
58
+
59
+ def start
60
+ hosts.product(services).each do |host, service|
61
+ states[[host, service]] = {
62
+ metric: 0.5,
63
+ state: 'ok',
64
+ description: 'Starting up',
65
+ host: host,
66
+ service: service,
67
+ }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Gathers load balancer statistics from Cloudant.com (shared cluster) and submits them to Riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Cloudant
9
+ include Riemann::Tools
10
+ require 'net/http'
11
+ require 'json'
12
+
13
+ opt :cloudant_username, 'Cloudant username', type: :string, required: true
14
+ opt :cloudant_password, 'Cloudant pasword', type: :string, required: true
15
+
16
+ def tick
17
+ json.each do |node|
18
+ break if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
19
+
20
+ ns = "cloudant #{node['pxname']}"
21
+ cluster_name = node['tracked'].split('.')[0] # ie: meritage.cloudant.com
22
+
23
+ # report health of each node.
24
+ report(
25
+ service: ns,
26
+ state: (node['status'] == 'UP' ? 'ok' : 'critical'),
27
+ tags: ['cloudant', cluster_name],
28
+ )
29
+
30
+ # report property->metric of each node.
31
+ node.each do |property, metric|
32
+ next if %w[pxname svname status tracked].include?(property)
33
+
34
+ report(
35
+ host: node['tracked'],
36
+ service: "#{ns} #{property}",
37
+ metric: metric.to_f,
38
+ state: (node['status'] == 'UP' ? 'ok' : 'critical'),
39
+ tags: ['cloudant', cluster_name],
40
+ )
41
+ end
42
+ end
43
+ end
44
+
45
+ def json
46
+ http = ::Net::HTTP.new('cloudant.com', 443)
47
+ http.use_ssl = true
48
+ http.start do |h|
49
+ get = ::Net::HTTP::Get.new('/api/load_balancer')
50
+ get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
51
+ h.request get
52
+ end
53
+ JSON.parse(http.boby)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+ require 'socket'
5
+ require 'net/http'
6
+ require 'uri'
7
+ require 'json'
8
+
9
+ # Reports service and node status to riemann
10
+ module Riemann
11
+ module Tools
12
+ class ConsulHealth
13
+ include Riemann::Tools
14
+
15
+ opt :consul_host, 'Consul API Host (default to localhost)', default: 'localhost'
16
+ opt :consul_port, 'Consul API Host (default to 8500)', default: '8500'
17
+ opt :prefix, 'prefix to use for all service names when reporting', default: 'consul '
18
+ opt :minimum_services_per_node, 'minimum services per node (default: 0)', default: 0
19
+
20
+ def initialize
21
+ @hostname = opts[:consul_host]
22
+ @prefix = opts[:prefix]
23
+ @minimum_services_per_node = opts[:minimum_services_per_node]
24
+ @underlying_ip = IPSocket.getaddress(@hostname)
25
+ @consul_leader_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/status/leader")
26
+ @consul_services_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/services")
27
+ @consul_nodes_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/nodes")
28
+ @consul_health_url_prefix = "http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/health/service/"
29
+
30
+ @last_services_read = {}
31
+ end
32
+
33
+ def alert(hostname, service, state, metric, description)
34
+ opts = {
35
+ host: hostname,
36
+ service: service.to_s,
37
+ state: state.to_s,
38
+ metric: metric,
39
+ description: description,
40
+ }
41
+
42
+ report(opts)
43
+ end
44
+
45
+ def get(url)
46
+ ::Net::HTTP.get_response(url).body
47
+ end
48
+
49
+ def tick
50
+ leader = JSON.parse(get(@consul_leader_url))
51
+ leader_hostname = URI.parse("http://#{leader}").hostname
52
+
53
+ return unless leader_hostname == @underlying_ip
54
+
55
+ nodes = JSON.parse(get(@consul_nodes_url))
56
+ services = JSON.parse(get(@consul_services_url))
57
+ services_by_nodes = {}
58
+
59
+ nodes.each do |node|
60
+ node_name = node['Node']
61
+ services_by_nodes[node_name] = 0
62
+ end
63
+
64
+ # For every service
65
+ services.each do |service|
66
+ service_name = service[0]
67
+ health_url = URI.parse(@consul_health_url_prefix + service_name)
68
+ health_nodes = JSON.parse(get(health_url))
69
+
70
+ total_count = 0
71
+ ok_count = 0
72
+
73
+ health_nodes.each do |node|
74
+ hostname = node['Node']['Node']
75
+ ok = node['Checks'].all? { |check| check['Status'] == 'passing' }
76
+ alert(hostname, "#{@prefix}#{service_name}", ok ? :ok : :critical, ok ? 1 : 0, JSON.generate(node))
77
+ total_count += 1
78
+ ok_count += ok ? 1 : 0
79
+
80
+ last_services_by_nodes = services_by_nodes[hostname].to_i
81
+ services_by_nodes[hostname] = last_services_by_nodes + 1
82
+ end
83
+
84
+ unless @last_services_read[service_name].nil?
85
+ last_ok = @last_services_read[service_name]
86
+ if last_ok != ok_count
87
+ alert(
88
+ 'total', "#{@prefix}#{service_name}-count", ok_count >= last_ok ? :ok : :critical, ok_count,
89
+ "Number of passing #{service_name} is: #{ok_count}/#{total_count}, Last time it was: #{last_ok}",
90
+ )
91
+ end
92
+ end
93
+
94
+ @last_services_read[service_name] = ok_count
95
+ end
96
+
97
+ # For every node
98
+ services_by_nodes.each do |node, count|
99
+ alert(
100
+ node, "#{@prefix}total-services", count >= @minimum_services_per_node ? :ok : :critical, count,
101
+ "#{count} services in the specified node",
102
+ )
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end