riemann-tools 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +15 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.gitignore +2 -0
  6. data/.rubocop.yml +40 -0
  7. data/.ruby-version +1 -0
  8. data/CHANGELOG.md +62 -2
  9. data/README.markdown +8 -24
  10. data/Rakefile +14 -5
  11. data/SECURITY.md +42 -0
  12. data/bin/riemann-apache-status +3 -94
  13. data/bin/riemann-bench +4 -67
  14. data/bin/riemann-cloudant +3 -54
  15. data/bin/riemann-consul +3 -102
  16. data/bin/riemann-dir-files-count +3 -51
  17. data/bin/riemann-dir-space +3 -51
  18. data/bin/riemann-diskstats +3 -91
  19. data/bin/riemann-fd +4 -63
  20. data/bin/riemann-freeswitch +4 -116
  21. data/bin/riemann-haproxy +3 -54
  22. data/bin/riemann-health +3 -344
  23. data/bin/riemann-kvminstance +4 -19
  24. data/bin/riemann-memcached +3 -33
  25. data/bin/riemann-net +3 -105
  26. data/bin/riemann-nginx-status +3 -80
  27. data/bin/riemann-ntp +3 -34
  28. data/bin/riemann-portcheck +3 -37
  29. data/bin/riemann-proc +3 -104
  30. data/bin/riemann-varnish +3 -50
  31. data/bin/riemann-wrapper +75 -0
  32. data/bin/riemann-zookeeper +3 -37
  33. data/lib/riemann/tools/apache_status.rb +107 -0
  34. data/lib/riemann/tools/bench.rb +72 -0
  35. data/lib/riemann/tools/cloudant.rb +57 -0
  36. data/lib/riemann/tools/consul_health.rb +107 -0
  37. data/lib/riemann/tools/dir_files_count.rb +56 -0
  38. data/lib/riemann/tools/dir_space.rb +56 -0
  39. data/lib/riemann/tools/diskstats.rb +94 -0
  40. data/lib/riemann/tools/fd.rb +81 -0
  41. data/lib/riemann/tools/freeswitch.rb +119 -0
  42. data/lib/riemann/tools/haproxy.rb +59 -0
  43. data/lib/riemann/tools/health.rb +478 -0
  44. data/lib/riemann/tools/kvm.rb +23 -0
  45. data/lib/riemann/tools/memcached.rb +38 -0
  46. data/lib/riemann/tools/net.rb +105 -0
  47. data/lib/riemann/tools/nginx_status.rb +86 -0
  48. data/lib/riemann/tools/ntp.rb +42 -0
  49. data/lib/riemann/tools/portcheck.rb +45 -0
  50. data/lib/riemann/tools/proc.rb +109 -0
  51. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  52. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  53. data/lib/riemann/tools/varnish.rb +55 -0
  54. data/lib/riemann/tools/version.rb +1 -1
  55. data/lib/riemann/tools/zookeeper.rb +40 -0
  56. data/lib/riemann/tools.rb +31 -52
  57. data/riemann-tools.gemspec +8 -2
  58. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +8 -9
  59. data/tools/riemann-aws/bin/riemann-aws-billing +4 -83
  60. data/tools/riemann-aws/bin/riemann-aws-rds-status +4 -50
  61. data/tools/riemann-aws/bin/riemann-aws-sqs-status +4 -40
  62. data/tools/riemann-aws/bin/riemann-aws-status +4 -67
  63. data/tools/riemann-aws/bin/riemann-elb-metrics +4 -163
  64. data/tools/riemann-aws/bin/riemann-s3-list +4 -78
  65. data/tools/riemann-aws/bin/riemann-s3-status +4 -95
  66. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  67. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  69. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  70. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  71. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  72. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  73. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +8 -9
  74. data/tools/riemann-chronos/bin/riemann-chronos +3 -139
  75. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  76. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +7 -8
  77. data/tools/riemann-docker/bin/riemann-docker +4 -213
  78. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  79. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +8 -9
  80. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +3 -161
  81. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  82. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +8 -9
  83. data/tools/riemann-marathon/bin/riemann-marathon +3 -142
  84. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  85. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +8 -9
  86. data/tools/riemann-mesos/bin/riemann-mesos +3 -126
  87. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  88. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +7 -8
  89. data/tools/riemann-munin/bin/riemann-munin +3 -32
  90. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  91. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +8 -9
  92. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +3 -264
  93. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  94. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +7 -8
  95. data/tools/riemann-riak/bin/riemann-riak +3 -326
  96. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  97. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  98. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  99. metadata +112 -16
  100. data/.travis.yml +0 -31
  101. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  102. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  103. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
@@ -1,217 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- # Reports current CPU, disk, load average, and memory use to riemann.
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- require 'riemann/tools'
7
-
8
- class Riemann::Tools::DockerHealth
9
- require 'docker'
10
- require 'socket'
11
- include Riemann::Tools
12
- include Docker
13
-
14
- opt :docker_host, "Docker Container Host (see https://github.com/swipely/docker-api#host)", :type => String, :default => nil
15
- opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
16
- opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
17
- opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
18
- opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
19
- opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
20
- opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
21
- opt :host_hostname, "Suffix of host", :type => String, :default => nil
22
- opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'memory', 'disk', 'basic']
23
-
24
- def get_containers
25
- Docker::Container.all
26
- end
27
-
28
- def get_container_name(container)
29
- container.json['Name'][1..-1]
30
- end
31
-
32
- def initialize
33
-
34
- if (opts[:docker_host] != nil)
35
- Docker.url = opts[:docker_host]
36
- end
37
-
38
- @hostname = opts[:host_hostname]
39
- if (@hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?)
40
- @hostname = Socket.gethostname
41
- end
42
-
43
- @cpu_coefficient = 1000 * 1000 * 1000
44
-
45
- @limits = {
46
- :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
47
- :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
48
- :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
49
- }
50
-
51
- @last_cpu_reads = Hash.new
52
- @last_uptime_reads = Hash.new
53
-
54
- opts[:checks].each do |check|
55
- case check
56
- when 'disk'
57
- @disk_enabled = true
58
- when 'cpu'
59
- @cpu_enabled = true
60
- when 'memory'
61
- @memory_enabled = true
62
- when 'basic'
63
- @basic_inspection_enabled = true
64
- end
65
- end
66
- end
67
-
68
- def alert(container, service, state, metric, description)
69
-
70
- opts = { :service => service.to_s,
71
- :state => state.to_s,
72
- :metric => metric.to_f,
73
- :description => description }
74
-
75
- if (container != nil)
76
- opts[:host] = "#{@hostname}-#{container}"
77
- else
78
- opts[:host] = @hostname
79
- end
80
-
81
- report(opts)
82
- end
83
-
84
- def report_pct(container, service, fraction, report = '', name = nil)
85
- if fraction
86
-
87
- if (name == nil)
88
- name = service
89
- end
90
-
91
- if fraction > @limits[service][:critical]
92
- alert container, name, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
93
- elsif fraction > @limits[service][:warning]
94
- alert container, name, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
95
- else
96
- alert container, name, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
97
- end
98
- end
99
- end
100
-
101
-
102
- def cpu(id, name, stats)
103
-
104
- current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
105
-
106
- unless current
107
- alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
108
- return false
109
- end
110
-
111
- current_time = Time.parse(stats['read']);
112
- if (@last_cpu_reads[id] != nil)
113
- last = @last_cpu_reads[id]
114
- used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
115
-
116
- report_pct name, :cpu, used
117
- end
118
-
119
- @last_cpu_reads[id] = { v: current, t: current_time }
120
- end
121
-
122
- def memory(id, name, stats)
123
- memory_stats = stats['memory_stats']
124
- usage = memory_stats['usage'].to_f
125
- total = memory_stats['limit'].to_f
126
- fraction = (usage / total)
127
-
128
- report_pct name, :memory, fraction, "#{usage} / #{total}"
129
- end
130
-
131
- def disk
132
- `df -P`.split(/\n/).each do |r|
133
- f = r.split(/\s+/)
134
- next if f[0] == 'Filesystem'
135
- next unless f[0] =~ /\// # Needs at least one slash in the mount path
136
-
137
- # Calculate capacity
138
- x = f[4].to_f/100
139
- report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
140
- end
141
- end
142
-
143
- def basic_inspection(id, name, inspection)
144
-
145
- state = inspection['State']
146
- json_state = JSON.generate(state)
147
-
148
- running = state['Running']
149
-
150
- alert(name, "status",
151
- running ? "ok" : "critical",
152
- running ? 1 : 0,
153
- json_state)
154
-
155
- if (running)
156
- start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
157
- now = DateTime.now.to_time.utc.to_i
158
- uptime = now - start_time
159
-
160
- if (@last_uptime_reads[id] != nil)
161
- last = @last_uptime_reads[id]
162
- restarted = start_time != last
163
- alert(name, "uptime",
164
- restarted ? "critical" : "ok",
165
- uptime,
166
- "last 'StartedAt' measure was #{last} (#{Time.at(last).utc.to_s}), " +
167
- "now it's #{start_time} (#{Time.at(start_time).utc.to_s})")
168
- end
169
-
170
- @last_uptime_reads[id] = start_time
171
- end
172
- end
173
-
174
- def tick
175
-
176
- # Disk is the same in every container
177
- if @disk_enabled
178
- disk()
179
- end
180
-
181
- # Get CPU, Memory and Load of each container
182
- containers = get_containers()
183
- threads = []
184
-
185
- containers.each do |ctr|
186
- threads << Thread.new(ctr) do |container|
187
-
188
- id = container.id
189
- name = get_container_name(container)
190
-
191
- stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", {stream:false}))
192
-
193
- if @basic_inspection_enabled
194
- inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
195
- basic_inspection(id, name, inspection)
196
- end
197
- if @cpu_enabled
198
- cpu(id, name, stats)
199
- end
200
- if @memory_enabled
201
- memory(id, name, stats)
202
- end
203
- end
204
- end
205
-
206
- threads.each do |thread|
207
- begin
208
- thread.join
209
- rescue => e
210
- $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
211
- end
212
- end
213
- end
214
- end
215
-
216
- Riemann::Tools::DockerHealth.run
6
+ require 'riemann/tools/docker'
217
7
 
8
+ Riemann::Tools::Docker.run
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Reports current CPU, disk, load average, and memory use to riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Docker
9
+ require 'docker'
10
+ require 'socket'
11
+ include Riemann::Tools
12
+ include ::Docker
13
+
14
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
15
+ default: nil
16
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
17
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
18
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
19
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
20
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
21
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
22
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
23
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
24
+
25
+ def containers
26
+ Docker::Container.all
27
+ end
28
+
29
+ def get_container_name(container)
30
+ container.json['Name'][1..]
31
+ end
32
+
33
+ def initialize
34
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
35
+
36
+ @hostname = opts[:host_hostname]
37
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
38
+
39
+ @cpu_coefficient = 1000 * 1000 * 1000
40
+
41
+ @limits = {
42
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
43
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
44
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
45
+ }
46
+
47
+ @last_cpu_reads = {}
48
+ @last_uptime_reads = {}
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when 'disk'
53
+ @disk_enabled = true
54
+ when 'cpu'
55
+ @cpu_enabled = true
56
+ when 'memory'
57
+ @memory_enabled = true
58
+ when 'basic'
59
+ @basic_inspection_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(container, service, state, metric, description)
65
+ opts = {
66
+ service: service.to_s,
67
+ state: state.to_s,
68
+ metric: metric.to_f,
69
+ description: description,
70
+ }
71
+
72
+ opts[:host] = if !container.nil?
73
+ "#{@hostname}-#{container}"
74
+ else
75
+ @hostname
76
+ end
77
+
78
+ report(opts)
79
+ end
80
+
81
+ def report_pct(container, service, fraction, report = '', name = nil)
82
+ return unless fraction
83
+
84
+ name = service if name.nil?
85
+
86
+ if fraction > @limits[service][:critical]
87
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
88
+ elsif fraction > @limits[service][:warning]
89
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
90
+ else
91
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
+ end
93
+ end
94
+
95
+ def cpu(id, name, stats)
96
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
97
+
98
+ unless current
99
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
100
+ return false
101
+ end
102
+
103
+ current_time = Time.parse(stats['read'])
104
+ unless @last_cpu_reads[id].nil?
105
+ last = @last_cpu_reads[id]
106
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
107
+
108
+ report_pct name, :cpu, used
109
+ end
110
+
111
+ @last_cpu_reads[id] = { v: current, t: current_time }
112
+ end
113
+
114
+ def memory(_id, name, stats)
115
+ memory_stats = stats['memory_stats']
116
+ usage = memory_stats['usage'].to_f
117
+ total = memory_stats['limit'].to_f
118
+ fraction = (usage / total)
119
+
120
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
121
+ end
122
+
123
+ def disk
124
+ `df -P`.split(/\n/).each do |r|
125
+ f = r.split(/\s+/)
126
+ next if f[0] == 'Filesystem'
127
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
128
+
129
+ # Calculate capacity
130
+ x = f[4].to_f / 100
131
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
132
+ end
133
+ end
134
+
135
+ def basic_inspection(id, name, inspection)
136
+ state = inspection['State']
137
+ json_state = JSON.generate(state)
138
+
139
+ running = state['Running']
140
+
141
+ alert(
142
+ name, 'status',
143
+ running ? 'ok' : 'critical',
144
+ running ? 1 : 0,
145
+ json_state,
146
+ )
147
+
148
+ return unless running
149
+
150
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
151
+ now = DateTime.now.to_time.utc.to_i
152
+ uptime = now - start_time
153
+
154
+ unless @last_uptime_reads[id].nil?
155
+ last = @last_uptime_reads[id]
156
+ restarted = start_time != last
157
+ alert(
158
+ name, 'uptime',
159
+ restarted ? 'critical' : 'ok',
160
+ uptime,
161
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
162
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
163
+ )
164
+ end
165
+
166
+ @last_uptime_reads[id] = start_time
167
+ end
168
+
169
+ def tick
170
+ # Disk is the same in every container
171
+ disk if @disk_enabled
172
+
173
+ # Get CPU, Memory and Load of each container
174
+ threads = []
175
+
176
+ containers.each do |ctr|
177
+ threads << Thread.new(ctr) do |container|
178
+ id = container.id
179
+ name = get_container_name(container)
180
+
181
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
182
+
183
+ if @basic_inspection_enabled
184
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
185
+ basic_inspection(id, name, inspection)
186
+ end
187
+ cpu(id, name, stats) if @cpu_enabled
188
+ memory(id, name, stats) if @memory_enabled
189
+ end
190
+ end
191
+
192
+ threads.each do |thread|
193
+ thread.join
194
+ rescue StandardError => e
195
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/package_task'
3
5
  require 'rdoc/task'
@@ -9,10 +11,8 @@ ENV['COPYFILE_DISABLE'] = 'true'
9
11
 
10
12
  # Gemspec
11
13
  gemspec = Gem::Specification.new do |s|
12
- s.rubyforge_project = 'riemann-elasticsearch'
13
-
14
14
  s.name = 'riemann-elasticsearch'
15
- s.version = '0.2.4'
15
+ s.version = '0.2.5'
16
16
  s.author = 'Gavin Sandie'
17
17
  s.email = 'beach@vicecity.co.uk'
18
18
  s.homepage = 'https://github.com/riemann/riemann-tools'
@@ -20,15 +20,14 @@ gemspec = Gem::Specification.new do |s|
20
20
  s.summary = 'Submits elasticsearch stats to riemann.'
21
21
  s.license = 'MIT'
22
22
 
23
- s.add_dependency 'riemann-tools', '>= 0.2.13'
24
- s.add_dependency 'faraday', '>= 0.8.5'
25
- s.add_dependency 'json'
23
+ s.add_runtime_dependency 'riemann-tools', '~> 1.0', '>= 1.1.1'
24
+ s.add_runtime_dependency 'faraday', '~> 2.3', '>= 2.3.0'
25
+ s.add_runtime_dependency 'json', '~> 2.6', '>=2.6.2'
26
26
 
27
- s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
27
+ s.files = FileList['bin/*', 'lib/**/*.rb', 'LICENSE', 'README.md'].to_a
28
28
  s.executables |= Dir.entries('bin/')
29
- s.has_rdoc = false
30
29
 
31
- s.required_ruby_version = '>= 1.8.7'
30
+ s.required_ruby_version = Gem::Requirement.new('>= 2.6.0')
32
31
  end
33
32
 
34
33
  Gem::PackageTask.new gemspec do |p|
@@ -1,166 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- require 'riemann/tools'
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- class Riemann::Tools::Elasticsearch
7
- include Riemann::Tools
8
- require 'faraday'
9
- require 'json'
10
- require 'uri'
6
+ require 'riemann/tools/elasticsearch'
11
7
 
12
- opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
13
- opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
14
- opt :path_prefix, 'Elasticsearch path prefix for proxied installations e.g. "els" for target http://localhost/els/_cluster/health', default: "/"
15
- opt :es_host, 'Elasticsearch host', default: "localhost"
16
- opt :es_port, 'Elasticsearch port', type: :int, default: 9200
17
- opt :es_search_index, 'Elasticsearch index to fetch search statistics for', default: "_all"
18
-
19
-
20
- # Handles HTTP connections and GET requests safely
21
- def safe_get(uri)
22
- # Handle connection timeouts
23
- response = nil
24
- begin
25
- connection = Faraday.new(uri)
26
- response = connection.get do |req|
27
- req.options[:timeout] = options[:read_timeout]
28
- req.options[:open_timeout] = options[:open_timeout]
29
- end
30
- rescue => e
31
- report(:host => uri.host,
32
- :service => "elasticsearch health",
33
- :state => "critical",
34
- :description => "HTTP connection error: #{e.class} - #{e.message}"
35
- )
36
- end
37
- response
38
- end
39
-
40
- def make_es_url(path)
41
- path_prefix = options[:path_prefix]
42
- path_prefix[0] = '' if path_prefix[0]=='/'
43
- path_prefix[path_prefix.length-1] = '' if path_prefix[path_prefix.length-1]=='/'
44
- "http://#{options[:es_host]}:#{options[:es_port]}#{path_prefix.length>0?'/':''}#{path_prefix}/#{path}"
45
- end
46
-
47
- def health_url
48
- make_es_url("_cluster/health")
49
- end
50
-
51
- def indices_url
52
- make_es_url("_stats/store")
53
- end
54
-
55
- def search_url
56
- es_search_index = options[:es_search_index]
57
- make_es_url("#{es_search_index}/_stats/search")
58
- end
59
-
60
- def is_bad?(response, uri)
61
- if response.success?
62
- false
63
- else
64
- report(:host => uri.host,
65
- :service => "elasticsearch health",
66
- :state => "critical",
67
- :description => response.nil? ? "HTTP response is empty!" : "HTTP connection error: #{response.status} - #{response.body}"
68
- )
69
- end
70
- end
71
-
72
- def tick_indices
73
- uri = URI(indices_url)
74
- response = safe_get(uri)
75
-
76
- return if is_bad?(response, uri)
77
-
78
- # Assuming that a 200 will give json
79
- json = JSON.parse(response.body)
80
-
81
- json["indices"].each_pair do |k,v|
82
- report(:host => uri.host,
83
- :service => "elasticsearch index/#{k}/primaries/size_in_bytes",
84
- :metric => v["primaries"]["store"]["size_in_bytes"]
85
- )
86
- report(:host => uri.host,
87
- :service => "elasticsearch index/#{k}/total/size_in_bytes",
88
- :metric => v["total"]["store"]["size_in_bytes"]
89
- )
90
- end
91
- end
92
-
93
- def tick_search
94
- uri = URI(search_url)
95
- response = safe_get(uri)
96
-
97
- return if is_bad?(response, uri)
98
-
99
- es_search_index = options[:es_search_index]
100
- # Assuming that a 200 will give json
101
- json = JSON.parse(response.body)
102
-
103
- json["_all"].each_pair do |type, data|
104
- query = data["search"]["query_time_in_millis"].to_f / data["search"]["query_total"].to_f
105
- fetch = data["search"]["fetch_time_in_millis"].to_f / data["search"]["fetch_total"].to_f
106
-
107
- report(:host => uri.host,
108
- :service => "elasticsearch search/#{es_search_index}/query",
109
- :metric => query
110
- )
111
- report(:host => uri.host,
112
- :service => "elasticsearch search/#{es_search_index}/fetch",
113
- :metric => fetch
114
- )
115
- end
116
- end
117
-
118
- def tick
119
- begin
120
- tick_indices
121
- tick_search
122
- rescue Exception => e
123
- report(:host => options[:es_host],
124
- :service => "elasticsearch error",
125
- :state => "critical",
126
- :description => "Elasticsearch cluster error: #{e.message}")
127
- end
128
- uri = URI(health_url)
129
- response = safe_get(uri)
130
-
131
- return if is_bad?(response, uri)
132
-
133
- # Assuming that a 200 will give json
134
- json = JSON.parse(response.body)
135
- cluster_name = json.delete("cluster_name")
136
- cluster_status = json.delete("status")
137
- state = case cluster_status
138
- when "green"
139
- "ok"
140
- when "yellow"
141
- "warning"
142
- when "red"
143
- "critical"
144
- end
145
-
146
- report(:host => uri.host,
147
- :service => "elasticsearch health",
148
- :state => state,
149
- :description => "Elasticsearch cluster: #{cluster_name} - #{cluster_status}")
150
-
151
- json.each_pair do |k,v|
152
- report(:host => uri.host,
153
- :service => "elasticsearch #{k}",
154
- :metric => v,
155
- :description => "Elasticsearch cluster #{k}"
156
- )
157
-
158
- end
159
-
160
- end
161
-
162
-
163
-
164
- end
165
8
  Riemann::Tools::Elasticsearch.run
166
-