riemann-tools 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +15 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.gitignore +2 -0
  6. data/.rubocop.yml +40 -0
  7. data/.ruby-version +1 -0
  8. data/CHANGELOG.md +62 -2
  9. data/README.markdown +8 -24
  10. data/Rakefile +14 -5
  11. data/SECURITY.md +42 -0
  12. data/bin/riemann-apache-status +3 -94
  13. data/bin/riemann-bench +4 -67
  14. data/bin/riemann-cloudant +3 -54
  15. data/bin/riemann-consul +3 -102
  16. data/bin/riemann-dir-files-count +3 -51
  17. data/bin/riemann-dir-space +3 -51
  18. data/bin/riemann-diskstats +3 -91
  19. data/bin/riemann-fd +4 -63
  20. data/bin/riemann-freeswitch +4 -116
  21. data/bin/riemann-haproxy +3 -54
  22. data/bin/riemann-health +3 -344
  23. data/bin/riemann-kvminstance +4 -19
  24. data/bin/riemann-memcached +3 -33
  25. data/bin/riemann-net +3 -105
  26. data/bin/riemann-nginx-status +3 -80
  27. data/bin/riemann-ntp +3 -34
  28. data/bin/riemann-portcheck +3 -37
  29. data/bin/riemann-proc +3 -104
  30. data/bin/riemann-varnish +3 -50
  31. data/bin/riemann-wrapper +75 -0
  32. data/bin/riemann-zookeeper +3 -37
  33. data/lib/riemann/tools/apache_status.rb +107 -0
  34. data/lib/riemann/tools/bench.rb +72 -0
  35. data/lib/riemann/tools/cloudant.rb +57 -0
  36. data/lib/riemann/tools/consul_health.rb +107 -0
  37. data/lib/riemann/tools/dir_files_count.rb +56 -0
  38. data/lib/riemann/tools/dir_space.rb +56 -0
  39. data/lib/riemann/tools/diskstats.rb +94 -0
  40. data/lib/riemann/tools/fd.rb +81 -0
  41. data/lib/riemann/tools/freeswitch.rb +119 -0
  42. data/lib/riemann/tools/haproxy.rb +59 -0
  43. data/lib/riemann/tools/health.rb +478 -0
  44. data/lib/riemann/tools/kvm.rb +23 -0
  45. data/lib/riemann/tools/memcached.rb +38 -0
  46. data/lib/riemann/tools/net.rb +105 -0
  47. data/lib/riemann/tools/nginx_status.rb +86 -0
  48. data/lib/riemann/tools/ntp.rb +42 -0
  49. data/lib/riemann/tools/portcheck.rb +45 -0
  50. data/lib/riemann/tools/proc.rb +109 -0
  51. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  52. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  53. data/lib/riemann/tools/varnish.rb +55 -0
  54. data/lib/riemann/tools/version.rb +1 -1
  55. data/lib/riemann/tools/zookeeper.rb +40 -0
  56. data/lib/riemann/tools.rb +31 -52
  57. data/riemann-tools.gemspec +8 -2
  58. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +8 -9
  59. data/tools/riemann-aws/bin/riemann-aws-billing +4 -83
  60. data/tools/riemann-aws/bin/riemann-aws-rds-status +4 -50
  61. data/tools/riemann-aws/bin/riemann-aws-sqs-status +4 -40
  62. data/tools/riemann-aws/bin/riemann-aws-status +4 -67
  63. data/tools/riemann-aws/bin/riemann-elb-metrics +4 -163
  64. data/tools/riemann-aws/bin/riemann-s3-list +4 -78
  65. data/tools/riemann-aws/bin/riemann-s3-status +4 -95
  66. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  67. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  69. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  70. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  71. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  72. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  73. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +8 -9
  74. data/tools/riemann-chronos/bin/riemann-chronos +3 -139
  75. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  76. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +7 -8
  77. data/tools/riemann-docker/bin/riemann-docker +4 -213
  78. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  79. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +8 -9
  80. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +3 -161
  81. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  82. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +8 -9
  83. data/tools/riemann-marathon/bin/riemann-marathon +3 -142
  84. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  85. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +8 -9
  86. data/tools/riemann-mesos/bin/riemann-mesos +3 -126
  87. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  88. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +7 -8
  89. data/tools/riemann-munin/bin/riemann-munin +3 -32
  90. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  91. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +8 -9
  92. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +3 -264
  93. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  94. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +7 -8
  95. data/tools/riemann-riak/bin/riemann-riak +3 -326
  96. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  97. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  98. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  99. metadata +112 -16
  100. data/.travis.yml +0 -31
  101. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  102. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  103. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
@@ -1,42 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- # Checks for open tcp ports.
5
- # (c) Max Voit 2017
4
+ Process.setproctitle($PROGRAM_NAME)
6
5
 
7
- require File.expand_path('../../lib/riemann/tools', __FILE__)
8
-
9
- class Riemann::Tools::Portcheck
10
- include Riemann::Tools
11
- require 'socket'
12
-
13
- opt :hostname, "Host, defaults to localhost", :default => `hostname`.chomp
14
- opt :ports, "List of ports to check, e.g. '-r 80 443'", :type => :ints
15
-
16
- def initialize
17
- @hostname = opts.fetch(:hostname)
18
- @ports = opts.fetch(:ports)
19
- end
20
-
21
- def tick
22
- for thisport in @ports
23
- # try opening tcp connection with 5s timeout;
24
- # if this fails, the port is considered closed
25
- portopen = Socket.tcp(@hostname, thisport, connect_timeout: 5) { true } rescue false
26
- if portopen
27
- state = "ok"
28
- else
29
- state = "critical"
30
- end
31
- report(
32
- :host => "#{@hostname}",
33
- :service => "port #{thisport}",
34
- :state => "#{state}",
35
- :tags => ["portcheck"]
36
- )
37
- end
38
- end
39
-
40
- end
6
+ require 'riemann/tools/portcheck'
41
7
 
42
8
  Riemann::Tools::Portcheck.run
data/bin/riemann-proc CHANGED
@@ -1,109 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- # Reports running process count to riemann.
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- require File.expand_path('../../lib/riemann/tools', __FILE__)
7
-
8
- class Riemann::Tools::Proc
9
- include Riemann::Tools
10
-
11
- opt :proc_regex, "regular expression that matches the process to be monitored", type: :string, :default => ".*"
12
- opt :proc_min_critical, "running process count minimum", :default => 0
13
- opt :proc_max_critical, "running process count maximum", :default => 65536
14
-
15
- def initialize
16
- @limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
17
-
18
- abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
19
-
20
- ostype = `uname -s`.chomp.downcase
21
- puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
22
- @check = method :linux_proc
23
- end
24
-
25
- def alert(service, state, metric, description)
26
- report(
27
- :service => service.to_s,
28
- :state => state.to_s,
29
- :metric => metric.to_f,
30
- :description => description
31
- )
32
- end
33
-
34
- def linux_proc
35
- process = opts[:proc_regex]
36
- found = `ps axo pid=,rss=,vsize=,state=,cputime=,lstart=,command= | grep '#{process}' | grep -v grep | grep -v riemann-proc`
37
- running = found.count("\n")
38
- if running > @limits[:critical][:max] or running < @limits[:critical][:min]
39
- alert "proc count/#{process}", :critical, running, "process #{process} is running #{running} instances.\n"
40
- else
41
- alert "proc count/#{process}", :ok, running, "process #{process} is running #{running} instances.\n"
42
- end
43
- # Iterate on all the lines and create an entry for the following metrics:
44
- #
45
- # process/<pid>-<start-time>/rss
46
- # process/<pid>-<start-time>/vsize
47
- # process/<pid>-<start-time>/running
48
- # process/<pid>-<start-time>/cputime
49
- #
50
- # description should contain the command itself.
51
- # value should be either process RSS, VSIZE, or 1 if running
52
- # state is always unknown for the moment
53
- #
54
- ps_regex = /([0-9]+)[ ]+([0-9]+)[ ]+([0-9]+)[ ]+([A-Z])[ ]+([0-9:.]+)[ ]+[A-Za-z]{3}[ ]+([A-Za-z]{3}[ ]{1,2}[0-9]+ [0-9:]+ [0-9]+)[ ]+(.*)/
55
- found.each_line do |line|
56
- m = ps_regex.match(line)
57
- if not m.nil?
58
- pid, rss, vsize, state, cputime, start, command = m.captures
59
- start_s = DateTime.parse(start, "Mmm DD HH:MM:ss YYYY").to_time.to_i
60
- cputime_s = DateTime.parse(cputime, "%H:%M:%S")
61
- cputime_seconds = (cputime_s.hour * 3600) + (cputime_s.minute * 60) + cputime_s.second
62
- running = 0
63
- case state[0]
64
- when "R"
65
- state_s = "ok"
66
- running = 1
67
- when "S"
68
- state_s = "ok"
69
- when "I"
70
- state_s = "warning"
71
- when "T", "U", "Z"
72
- state_s = "critical"
73
- else
74
- state_s = "unknown"
75
- end
76
- report(
77
- :service => "proc #{pid}-#{start_s}/rss",
78
- :state => state_s.to_s,
79
- :metric => rss.to_f,
80
- :description => command,
81
- )
82
- report(
83
- :service => "proc #{pid}-#{start_s}/vsize",
84
- :state => state_s.to_s,
85
- :metric => vsize.to_f,
86
- :description => command,
87
- )
88
- report(
89
- :service => "proc #{pid}-#{start_s}/running",
90
- :state => state_s.to_s,
91
- :metric => running.to_f,
92
- :description => command,
93
- )
94
- report(
95
- :service => "proc #{pid}-#{start_s}/cputime",
96
- :state => state_s.to_s,
97
- :metric => cputime_seconds,
98
- :description => command,
99
- )
100
- end
101
- end
102
- end
103
-
104
- def tick
105
- @check.call
106
- end
107
- end
6
+ require 'riemann/tools/proc'
108
7
 
109
8
  Riemann::Tools::Proc.run
data/bin/riemann-varnish CHANGED
@@ -1,55 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- # Reports varnish stats to Riemann.
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- require 'open3'
7
- require File.expand_path('../../lib/riemann/tools', __FILE__)
8
-
9
- class Riemann::Tools::Varnish
10
- include Riemann::Tools
11
-
12
- opt :varnish_host, "Varnish hostname", :default => `hostname`.chomp
13
-
14
- def initialize
15
- cmd = 'varnishstat -V'
16
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
17
- @ver = /varnishstat \(varnish-(\d+)/.match(stderr.read)[1].to_i
18
- end
19
-
20
- if @ver >= 4
21
- @vstats = [ "MAIN.sess_conn",
22
- "MAIN.sess_drop ",
23
- "MAIN.client_req",
24
- "MAIN.cache_hit",
25
- "MAIN.cache_miss" ]
26
- else
27
- @vstats = [ "client_conn",
28
- "client_drop",
29
- "client_req",
30
- "cache_hit",
31
- "cache_miss" ]
32
- end
33
- end
34
-
35
- def tick
36
- if @ver >= 4
37
- stats = `varnishstat -1 -f #{@vstats.join(" -f ")}`
38
- else
39
- stats = `varnishstat -1 -f #{@vstats.join(",")}`
40
- end
41
- stats.each_line do |stat|
42
- m = stat.split()
43
- report(
44
- :host => opts[:varnish_host].dup,
45
- :service => "varnish #{m[0]}",
46
- :metric => m[1].to_f,
47
- :state => "ok",
48
- :description => "#{m[3..-1].join(' ')}",
49
- :tags => ["varnish"]
50
- )
51
- end
52
- end
53
- end
6
+ require 'riemann/tools/varnish'
54
7
 
55
8
  Riemann::Tools::Varnish.run
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ def camelize(subject)
7
+ subject.gsub(%r{(^|[/_])[a-z]}) { |x| x.sub('/', '::').sub('_', '').upcase }
8
+ end
9
+
10
+ def underscore(subject)
11
+ subject.split(/(?=[A-Z])/).map(&:downcase).join('_').gsub('::_', '/')
12
+ end
13
+
14
+ def constantize(subject)
15
+ Object.const_get(subject)
16
+ end
17
+
18
+ def read_flags(argv)
19
+ res = []
20
+
21
+ while (arg = argv.shift)
22
+ break if arg == '--'
23
+
24
+ res << arg
25
+ end
26
+
27
+ res
28
+ end
29
+
30
+ if ARGV.empty?
31
+ warn <<~USAGE
32
+ usage: riemann-wrapper [common options] -- tool1 [tool1 options] [-- tool2 [tool2 options] ...]
33
+
34
+ Run multiple Riemann tools in a single process. A single connection to
35
+ riemann is maintained and shared for all tools, the connection flags should
36
+ only be passed as common options.
37
+
38
+ Examples:
39
+ 1. Run the fd, health and ntp tools with default options:
40
+
41
+ riemann-wrapper -- fd -- health -- ntp
42
+
43
+ 2. Run the fd, health and ntp tools against a remote riemann server using
44
+ TCP and tagging each event with the name of the tool that produced it:
45
+
46
+ riemann-wrapper --host riemann.example.com --tcp -- \\
47
+ fd --tag=fd -- \\
48
+ health --tag=health -- \\
49
+ ntp --tag=ntp
50
+ USAGE
51
+ exit 1
52
+ end
53
+
54
+ argv = ARGV.dup
55
+
56
+ common_argv = read_flags(argv)
57
+
58
+ threads = []
59
+
60
+ while argv.any?
61
+ tool = argv.shift
62
+ tool_argv = read_flags(argv)
63
+
64
+ require "riemann/tools/#{tool}"
65
+ tool_class = constantize(camelize("riemann/tools/#{tool}"))
66
+
67
+ ARGV.replace(common_argv + tool_argv)
68
+ instance = tool_class.new
69
+ # Force evaluation of options. This rely on ARGV and needs to be done before
70
+ # we launch multiple threads which compete to read information from there.
71
+ instance.options
72
+ threads << Thread.new { instance.run }
73
+ end
74
+
75
+ threads.each(&:join)
@@ -1,42 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- # Gathers zookeeper STATS and submits them to Riemann.
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- require File.expand_path('../../lib/riemann/tools', __FILE__)
7
-
8
- class Riemann::Tools::Zookeeper
9
- include Riemann::Tools
10
- require 'socket'
11
-
12
- opt :zookeeper_host, "Zookeeper hostname", :default => 'localhost'
13
- opt :zookeeper_port, "Zookeeper port", :default => 2181
14
-
15
- def tick
16
- sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
17
- sock.sync = true
18
- sock.print("mntr")
19
- sock.flush
20
-
21
-
22
- data = {}
23
- while true
24
- stats = sock.gets
25
-
26
- break if stats.nil?
27
-
28
- m = stats.match /^(\w+)\t+(.*)/
29
-
30
- report(
31
- :host => opts[ :zookeeper_host].dup,
32
- :service => "zookeeper #{m[1]}",
33
- :metric => m[2].to_f,
34
- :state => 'ok',
35
- :tags => ['zookeeper']
36
- )
37
- end
38
- sock.close
39
- end
40
- end
6
+ require 'riemann/tools/zookeeper'
41
7
 
42
8
  Riemann::Tools::Zookeeper.run
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Collects Apache metrics and submits them to Riemann
6
+ # More information can be found at http://httpd.apache.org/docs/2.4/mod/mod_status.html
7
+
8
+ # Removes whitespace from 'Total Accesses' and 'Total kBytes' for output to graphite
9
+ module Riemann
10
+ module Tools
11
+ class ApacheStatus
12
+ include Riemann::Tools
13
+ require 'net/http'
14
+ require 'uri'
15
+
16
+ opt :uri, 'Apache Server Status URI', default: 'http://localhost/server-status'
17
+
18
+ def initialize
19
+ @uri = "#{URI.parse(opts[:uri])}?auto"
20
+ # Sample Response with ExtendedStatus On
21
+ # Total Accesses: 20643
22
+ # Total kBytes: 36831
23
+ # CPULoad: .0180314
24
+ # Uptime: 43868
25
+ # ReqPerSec: .470571
26
+ # BytesPerSec: 859.737
27
+ # BytesPerReq: 1827.01
28
+ # BusyWorkers: 6
29
+ # IdleWorkers: 94
30
+ # Scoreboard: ___K_____K____________W_
31
+
32
+ @scoreboard_map = {
33
+ '_' => 'waiting',
34
+ 'S' => 'starting',
35
+ 'R' => 'reading',
36
+ 'W' => 'sending',
37
+ 'K' => 'keepalive',
38
+ 'D' => 'dns',
39
+ 'C' => 'closing',
40
+ 'L' => 'logging',
41
+ 'G' => 'graceful',
42
+ 'I' => 'idle',
43
+ '.' => 'open',
44
+ }
45
+ end
46
+
47
+ def get_scoreboard_metrics(response)
48
+ results = Hash.new(0)
49
+
50
+ response.slice! 'Scoreboard: '
51
+ response.each_char do |char|
52
+ results[char] += 1
53
+ end
54
+ results.transform_keys { |k| @scoreboard_map[k] }
55
+ end
56
+
57
+ def report_metrics(metrics)
58
+ metrics.each do |k, v|
59
+ report(
60
+ service: "httpd #{k}",
61
+ metric: v.to_f,
62
+ state: 'ok',
63
+ tags: ['httpd'],
64
+ )
65
+ end
66
+ end
67
+
68
+ def connection
69
+ response = nil
70
+ begin
71
+ response = ::Net::HTTP.get(@uri)
72
+ rescue StandardError => e
73
+ report(
74
+ service: 'httpd health',
75
+ state: 'critical',
76
+ description: "Httpd connection error: #{e.class} - #{e.message}",
77
+ tags: ['httpd'],
78
+ )
79
+ else
80
+ report(
81
+ service: 'httpd health',
82
+ state: 'ok',
83
+ description: 'Httpd connection status ok',
84
+ tags: ['httpd'],
85
+ )
86
+ end
87
+ response
88
+ end
89
+
90
+ def tick
91
+ return if (response = connection).nil?
92
+
93
+ response.each_line do |line|
94
+ metrics = {}
95
+
96
+ if line =~ /Scoreboard/
97
+ metrics = get_scoreboard_metrics(line.strip)
98
+ else
99
+ key, value = line.strip.split(':')
100
+ metrics[key.gsub(/\s/, '')] = value
101
+ end
102
+ report_metrics(metrics)
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'riemann/client'
5
+ require 'pp'
6
+
7
+ # Connects to a server (first arg) and populates it with a constant stream of
8
+ # events for testing.
9
+ module Riemann
10
+ module Tools
11
+ class Bench
12
+ attr_accessor :client, :hosts, :services, :states
13
+
14
+ def initialize
15
+ @hosts = [nil] + (0...10).map { |i| "host#{i}" }
16
+ @hosts = %w[a b c d e f g h i j]
17
+ @services = %w[test1 test2 test3 foo bar baz xyzzy attack cat treat]
18
+ @states = {}
19
+ @client = Riemann::Client.new(host: (ARGV.first || 'localhost'))
20
+ end
21
+
22
+ def evolve(state)
23
+ m = state[:metric] + (rand - 0.5) * 0.1
24
+ m = [[0, m].max, 1].min
25
+
26
+ s = case m
27
+ when 0...0.75
28
+ 'ok'
29
+ when 0.75...0.9
30
+ 'warning'
31
+ when 0.9..1.0
32
+ 'critical'
33
+ end
34
+
35
+ {
36
+ metric: m,
37
+ state: s,
38
+ host: state[:host],
39
+ service: state[:service],
40
+ description: "at #{Time.now}",
41
+ }
42
+ end
43
+
44
+ def tick
45
+ # pp @states
46
+ hosts.product(services).each do |id|
47
+ client << (states[id] = evolve(states[id]))
48
+ end
49
+ end
50
+
51
+ def run
52
+ start
53
+ loop do
54
+ sleep 0.05
55
+ tick
56
+ end
57
+ end
58
+
59
+ def start
60
+ hosts.product(services).each do |host, service|
61
+ states[[host, service]] = {
62
+ metric: 0.5,
63
+ state: 'ok',
64
+ description: 'Starting up',
65
+ host: host,
66
+ service: service,
67
+ }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Gathers load balancer statistics from Cloudant.com (shared cluster) and submits them to Riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Cloudant
9
+ include Riemann::Tools
10
+ require 'net/http'
11
+ require 'json'
12
+
13
+ opt :cloudant_username, 'Cloudant username', type: :string, required: true
14
+ opt :cloudant_password, 'Cloudant pasword', type: :string, required: true
15
+
16
+ def tick
17
+ json.each do |node|
18
+ break if node['svname'] == 'BACKEND' # this is just a sum of all nodes.
19
+
20
+ ns = "cloudant #{node['pxname']}"
21
+ cluster_name = node['tracked'].split('.')[0] # ie: meritage.cloudant.com
22
+
23
+ # report health of each node.
24
+ report(
25
+ service: ns,
26
+ state: (node['status'] == 'UP' ? 'ok' : 'critical'),
27
+ tags: ['cloudant', cluster_name],
28
+ )
29
+
30
+ # report property->metric of each node.
31
+ node.each do |property, metric|
32
+ next if %w[pxname svname status tracked].include?(property)
33
+
34
+ report(
35
+ host: node['tracked'],
36
+ service: "#{ns} #{property}",
37
+ metric: metric.to_f,
38
+ state: (node['status'] == 'UP' ? 'ok' : 'critical'),
39
+ tags: ['cloudant', cluster_name],
40
+ )
41
+ end
42
+ end
43
+ end
44
+
45
+ def json
46
+ http = ::Net::HTTP.new('cloudant.com', 443)
47
+ http.use_ssl = true
48
+ http.start do |h|
49
+ get = ::Net::HTTP::Get.new('/api/load_balancer')
50
+ get.basic_auth opts[:cloudant_username], opts[:cloudant_password]
51
+ h.request get
52
+ end
53
+ JSON.parse(http.boby)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+ require 'socket'
5
+ require 'net/http'
6
+ require 'uri'
7
+ require 'json'
8
+
9
+ # Reports service and node status to riemann
10
+ module Riemann
11
+ module Tools
12
+ class ConsulHealth
13
+ include Riemann::Tools
14
+
15
+ opt :consul_host, 'Consul API Host (default to localhost)', default: 'localhost'
16
+ opt :consul_port, 'Consul API Host (default to 8500)', default: '8500'
17
+ opt :prefix, 'prefix to use for all service names when reporting', default: 'consul '
18
+ opt :minimum_services_per_node, 'minimum services per node (default: 0)', default: 0
19
+
20
+ def initialize
21
+ @hostname = opts[:consul_host]
22
+ @prefix = opts[:prefix]
23
+ @minimum_services_per_node = opts[:minimum_services_per_node]
24
+ @underlying_ip = IPSocket.getaddress(@hostname)
25
+ @consul_leader_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/status/leader")
26
+ @consul_services_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/services")
27
+ @consul_nodes_url = URI.parse("http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/catalog/nodes")
28
+ @consul_health_url_prefix = "http://#{opts[:consul_host]}:#{opts[:consul_port]}/v1/health/service/"
29
+
30
+ @last_services_read = {}
31
+ end
32
+
33
+ def alert(hostname, service, state, metric, description)
34
+ opts = {
35
+ host: hostname,
36
+ service: service.to_s,
37
+ state: state.to_s,
38
+ metric: metric,
39
+ description: description,
40
+ }
41
+
42
+ report(opts)
43
+ end
44
+
45
+ def get(url)
46
+ ::Net::HTTP.get_response(url).body
47
+ end
48
+
49
+ def tick
50
+ leader = JSON.parse(get(@consul_leader_url))
51
+ leader_hostname = URI.parse("http://#{leader}").hostname
52
+
53
+ return unless leader_hostname == @underlying_ip
54
+
55
+ nodes = JSON.parse(get(@consul_nodes_url))
56
+ services = JSON.parse(get(@consul_services_url))
57
+ services_by_nodes = {}
58
+
59
+ nodes.each do |node|
60
+ node_name = node['Node']
61
+ services_by_nodes[node_name] = 0
62
+ end
63
+
64
+ # For every service
65
+ services.each do |service|
66
+ service_name = service[0]
67
+ health_url = URI.parse(@consul_health_url_prefix + service_name)
68
+ health_nodes = JSON.parse(get(health_url))
69
+
70
+ total_count = 0
71
+ ok_count = 0
72
+
73
+ health_nodes.each do |node|
74
+ hostname = node['Node']['Node']
75
+ ok = node['Checks'].all? { |check| check['Status'] == 'passing' }
76
+ alert(hostname, "#{@prefix}#{service_name}", ok ? :ok : :critical, ok ? 1 : 0, JSON.generate(node))
77
+ total_count += 1
78
+ ok_count += ok ? 1 : 0
79
+
80
+ last_services_by_nodes = services_by_nodes[hostname].to_i
81
+ services_by_nodes[hostname] = last_services_by_nodes + 1
82
+ end
83
+
84
+ unless @last_services_read[service_name].nil?
85
+ last_ok = @last_services_read[service_name]
86
+ if last_ok != ok_count
87
+ alert(
88
+ 'total', "#{@prefix}#{service_name}-count", ok_count >= last_ok ? :ok : :critical, ok_count,
89
+ "Number of passing #{service_name} is: #{ok_count}/#{total_count}, Last time it was: #{last_ok}",
90
+ )
91
+ end
92
+ end
93
+
94
+ @last_services_read[service_name] = ok_count
95
+ end
96
+
97
+ # For every node
98
+ services_by_nodes.each do |node, count|
99
+ alert(
100
+ node, "#{@prefix}total-services", count >= @minimum_services_per_node ? :ok : :critical, count,
101
+ "#{count} services in the specified node",
102
+ )
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end