riemann-tools 1.1.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +2 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.ruby-version +1 -0
  6. data/CHANGELOG.md +60 -2
  7. data/Rakefile +17 -3
  8. data/bin/riemann-apache-status +1 -106
  9. data/bin/riemann-bench +2 -70
  10. data/bin/riemann-cloudant +1 -56
  11. data/bin/riemann-consul +1 -106
  12. data/bin/riemann-dir-files-count +1 -55
  13. data/bin/riemann-dir-space +1 -55
  14. data/bin/riemann-diskstats +1 -92
  15. data/bin/riemann-fd +2 -81
  16. data/bin/riemann-freeswitch +2 -119
  17. data/bin/riemann-haproxy +1 -58
  18. data/bin/riemann-health +0 -2
  19. data/bin/riemann-kvminstance +2 -22
  20. data/bin/riemann-md +8 -0
  21. data/bin/riemann-memcached +1 -37
  22. data/bin/riemann-net +0 -2
  23. data/bin/riemann-nginx-status +1 -85
  24. data/bin/riemann-ntp +0 -2
  25. data/bin/riemann-portcheck +1 -44
  26. data/bin/riemann-proc +1 -108
  27. data/bin/riemann-varnish +1 -54
  28. data/bin/riemann-wrapper +113 -0
  29. data/bin/riemann-zookeeper +1 -39
  30. data/bin/riemann-zpool +8 -0
  31. data/lib/riemann/tools/apache_status.rb +107 -0
  32. data/lib/riemann/tools/bench.rb +72 -0
  33. data/lib/riemann/tools/cloudant.rb +57 -0
  34. data/lib/riemann/tools/consul_health.rb +107 -0
  35. data/lib/riemann/tools/dir_files_count.rb +56 -0
  36. data/lib/riemann/tools/dir_space.rb +56 -0
  37. data/lib/riemann/tools/diskstats.rb +94 -0
  38. data/lib/riemann/tools/fd.rb +81 -0
  39. data/lib/riemann/tools/freeswitch.rb +119 -0
  40. data/lib/riemann/tools/haproxy.rb +59 -0
  41. data/lib/riemann/tools/health.rb +150 -19
  42. data/lib/riemann/tools/kvm.rb +23 -0
  43. data/lib/riemann/tools/md.rb +35 -0
  44. data/lib/riemann/tools/mdstat_parser.tab.rb +340 -0
  45. data/lib/riemann/tools/memcached.rb +38 -0
  46. data/lib/riemann/tools/net.rb +2 -1
  47. data/lib/riemann/tools/nginx_status.rb +86 -0
  48. data/lib/riemann/tools/ntp.rb +1 -0
  49. data/lib/riemann/tools/portcheck.rb +45 -0
  50. data/lib/riemann/tools/proc.rb +109 -0
  51. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  52. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  53. data/lib/riemann/tools/varnish.rb +55 -0
  54. data/lib/riemann/tools/version.rb +1 -1
  55. data/lib/riemann/tools/zookeeper.rb +40 -0
  56. data/lib/riemann/tools/zpool.rb +40 -0
  57. data/lib/riemann/tools.rb +2 -20
  58. data/riemann-tools.gemspec +10 -1
  59. data/tools/riemann-aws/Rakefile +6 -9
  60. data/tools/riemann-aws/bin/riemann-aws-billing +2 -87
  61. data/tools/riemann-aws/bin/riemann-aws-rds-status +2 -62
  62. data/tools/riemann-aws/bin/riemann-aws-sqs-status +2 -44
  63. data/tools/riemann-aws/bin/riemann-aws-status +2 -77
  64. data/tools/riemann-aws/bin/riemann-elb-metrics +2 -162
  65. data/tools/riemann-aws/bin/riemann-s3-list +2 -81
  66. data/tools/riemann-aws/bin/riemann-s3-status +2 -96
  67. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  69. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  70. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  71. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  72. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  73. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  74. data/tools/riemann-chronos/Rakefile +6 -9
  75. data/tools/riemann-chronos/bin/riemann-chronos +1 -154
  76. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  77. data/tools/riemann-docker/Rakefile +5 -8
  78. data/tools/riemann-docker/bin/riemann-docker +2 -200
  79. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  80. data/tools/riemann-elasticsearch/Rakefile +6 -9
  81. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +1 -167
  82. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  83. data/tools/riemann-marathon/Rakefile +6 -9
  84. data/tools/riemann-marathon/bin/riemann-marathon +1 -156
  85. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  86. data/tools/riemann-mesos/Rakefile +6 -9
  87. data/tools/riemann-mesos/bin/riemann-mesos +1 -139
  88. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  89. data/tools/riemann-munin/Rakefile +5 -8
  90. data/tools/riemann-munin/bin/riemann-munin +1 -36
  91. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  92. data/tools/riemann-rabbitmq/Rakefile +6 -9
  93. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +1 -266
  94. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  95. data/tools/riemann-riak/Rakefile +5 -8
  96. data/tools/riemann-riak/bin/riemann-riak +1 -316
  97. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  98. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  99. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  100. metadata +64 -10
  101. data/.travis.yml +0 -31
  102. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  103. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  104. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
@@ -3,204 +3,6 @@
3
3
 
4
4
  Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- # Reports current CPU, disk, load average, and memory use to riemann.
6
+ require 'riemann/tools/docker'
7
7
 
8
- require 'riemann/tools'
9
-
10
- module Riemann
11
- module Tools
12
- class DockerHealth
13
- require 'docker'
14
- require 'socket'
15
- include Riemann::Tools
16
- include Docker
17
-
18
- opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
19
- default: nil
20
- opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
21
- opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
22
- opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
23
- opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
24
- opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
25
- opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
26
- opt :host_hostname, 'Suffix of host', type: String, default: nil
27
- opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
28
-
29
- def containers
30
- Docker::Container.all
31
- end
32
-
33
- def get_container_name(container)
34
- container.json['Name'][1..]
35
- end
36
-
37
- def initialize
38
- Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
39
-
40
- @hostname = opts[:host_hostname]
41
- @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
42
-
43
- @cpu_coefficient = 1000 * 1000 * 1000
44
-
45
- @limits = {
46
- cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
47
- disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
48
- memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
49
- }
50
-
51
- @last_cpu_reads = {}
52
- @last_uptime_reads = {}
53
-
54
- opts[:checks].each do |check|
55
- case check
56
- when 'disk'
57
- @disk_enabled = true
58
- when 'cpu'
59
- @cpu_enabled = true
60
- when 'memory'
61
- @memory_enabled = true
62
- when 'basic'
63
- @basic_inspection_enabled = true
64
- end
65
- end
66
- end
67
-
68
- def alert(container, service, state, metric, description)
69
- opts = {
70
- service: service.to_s,
71
- state: state.to_s,
72
- metric: metric.to_f,
73
- description: description,
74
- }
75
-
76
- opts[:host] = if !container.nil?
77
- "#{@hostname}-#{container}"
78
- else
79
- @hostname
80
- end
81
-
82
- report(opts)
83
- end
84
-
85
- def report_pct(container, service, fraction, report = '', name = nil)
86
- return unless fraction
87
-
88
- name = service if name.nil?
89
-
90
- if fraction > @limits[service][:critical]
91
- alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
- elsif fraction > @limits[service][:warning]
93
- alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
94
- else
95
- alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
96
- end
97
- end
98
-
99
- def cpu(id, name, stats)
100
- current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
101
-
102
- unless current
103
- alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
104
- return false
105
- end
106
-
107
- current_time = Time.parse(stats['read'])
108
- unless @last_cpu_reads[id].nil?
109
- last = @last_cpu_reads[id]
110
- used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
111
-
112
- report_pct name, :cpu, used
113
- end
114
-
115
- @last_cpu_reads[id] = { v: current, t: current_time }
116
- end
117
-
118
- def memory(_id, name, stats)
119
- memory_stats = stats['memory_stats']
120
- usage = memory_stats['usage'].to_f
121
- total = memory_stats['limit'].to_f
122
- fraction = (usage / total)
123
-
124
- report_pct name, :memory, fraction, "#{usage} / #{total}"
125
- end
126
-
127
- def disk
128
- `df -P`.split(/\n/).each do |r|
129
- f = r.split(/\s+/)
130
- next if f[0] == 'Filesystem'
131
- next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
132
-
133
- # Calculate capacity
134
- x = f[4].to_f / 100
135
- report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
136
- end
137
- end
138
-
139
- def basic_inspection(id, name, inspection)
140
- state = inspection['State']
141
- json_state = JSON.generate(state)
142
-
143
- running = state['Running']
144
-
145
- alert(
146
- name, 'status',
147
- running ? 'ok' : 'critical',
148
- running ? 1 : 0,
149
- json_state,
150
- )
151
-
152
- return unless running
153
-
154
- start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
155
- now = DateTime.now.to_time.utc.to_i
156
- uptime = now - start_time
157
-
158
- unless @last_uptime_reads[id].nil?
159
- last = @last_uptime_reads[id]
160
- restarted = start_time != last
161
- alert(
162
- name, 'uptime',
163
- restarted ? 'critical' : 'ok',
164
- uptime,
165
- "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
166
- "now it's #{start_time} (#{Time.at(start_time).utc})",
167
- )
168
- end
169
-
170
- @last_uptime_reads[id] = start_time
171
- end
172
-
173
- def tick
174
- # Disk is the same in every container
175
- disk if @disk_enabled
176
-
177
- # Get CPU, Memory and Load of each container
178
- threads = []
179
-
180
- containers.each do |ctr|
181
- threads << Thread.new(ctr) do |container|
182
- id = container.id
183
- name = get_container_name(container)
184
-
185
- stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
186
-
187
- if @basic_inspection_enabled
188
- inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
189
- basic_inspection(id, name, inspection)
190
- end
191
- cpu(id, name, stats) if @cpu_enabled
192
- memory(id, name, stats) if @memory_enabled
193
- end
194
- end
195
-
196
- threads.each do |thread|
197
- thread.join
198
- rescue StandardError => e
199
- warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
200
- end
201
- end
202
- end
203
- end
204
- end
205
-
206
- Riemann::Tools::DockerHealth.run
8
+ Riemann::Tools::Docker.run
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'riemann/tools'
4
+
5
+ # Reports current CPU, disk, load average, and memory use to riemann.
6
+ module Riemann
7
+ module Tools
8
+ class Docker
9
+ require 'docker'
10
+ require 'socket'
11
+ include Riemann::Tools
12
+ include ::Docker
13
+
14
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
15
+ default: nil
16
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
17
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
18
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
19
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
20
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
21
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
22
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
23
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
24
+
25
+ def containers
26
+ Docker::Container.all
27
+ end
28
+
29
+ def get_container_name(container)
30
+ container.json['Name'][1..]
31
+ end
32
+
33
+ def initialize
34
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
35
+
36
+ @hostname = opts[:host_hostname]
37
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
38
+
39
+ @cpu_coefficient = 1000 * 1000 * 1000
40
+
41
+ @limits = {
42
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
43
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
44
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
45
+ }
46
+
47
+ @last_cpu_reads = {}
48
+ @last_uptime_reads = {}
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when 'disk'
53
+ @disk_enabled = true
54
+ when 'cpu'
55
+ @cpu_enabled = true
56
+ when 'memory'
57
+ @memory_enabled = true
58
+ when 'basic'
59
+ @basic_inspection_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(container, service, state, metric, description)
65
+ opts = {
66
+ service: service.to_s,
67
+ state: state.to_s,
68
+ metric: metric.to_f,
69
+ description: description,
70
+ }
71
+
72
+ opts[:host] = if !container.nil?
73
+ "#{@hostname}-#{container}"
74
+ else
75
+ @hostname
76
+ end
77
+
78
+ report(opts)
79
+ end
80
+
81
+ def report_pct(container, service, fraction, report = '', name = nil)
82
+ return unless fraction
83
+
84
+ name = service if name.nil?
85
+
86
+ if fraction > @limits[service][:critical]
87
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
88
+ elsif fraction > @limits[service][:warning]
89
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
90
+ else
91
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
+ end
93
+ end
94
+
95
+ def cpu(id, name, stats)
96
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
97
+
98
+ unless current
99
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
100
+ return false
101
+ end
102
+
103
+ current_time = Time.parse(stats['read'])
104
+ unless @last_cpu_reads[id].nil?
105
+ last = @last_cpu_reads[id]
106
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
107
+
108
+ report_pct name, :cpu, used
109
+ end
110
+
111
+ @last_cpu_reads[id] = { v: current, t: current_time }
112
+ end
113
+
114
+ def memory(_id, name, stats)
115
+ memory_stats = stats['memory_stats']
116
+ usage = memory_stats['usage'].to_f
117
+ total = memory_stats['limit'].to_f
118
+ fraction = (usage / total)
119
+
120
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
121
+ end
122
+
123
+ def disk
124
+ `df -P`.split(/\n/).each do |r|
125
+ f = r.split(/\s+/)
126
+ next if f[0] == 'Filesystem'
127
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
128
+
129
+ # Calculate capacity
130
+ x = f[4].to_f / 100
131
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
132
+ end
133
+ end
134
+
135
+ def basic_inspection(id, name, inspection)
136
+ state = inspection['State']
137
+ json_state = JSON.generate(state)
138
+
139
+ running = state['Running']
140
+
141
+ alert(
142
+ name, 'status',
143
+ running ? 'ok' : 'critical',
144
+ running ? 1 : 0,
145
+ json_state,
146
+ )
147
+
148
+ return unless running
149
+
150
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
151
+ now = DateTime.now.to_time.utc.to_i
152
+ uptime = now - start_time
153
+
154
+ unless @last_uptime_reads[id].nil?
155
+ last = @last_uptime_reads[id]
156
+ restarted = start_time != last
157
+ alert(
158
+ name, 'uptime',
159
+ restarted ? 'critical' : 'ok',
160
+ uptime,
161
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
162
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
163
+ )
164
+ end
165
+
166
+ @last_uptime_reads[id] = start_time
167
+ end
168
+
169
+ def tick
170
+ # Disk is the same in every container
171
+ disk if @disk_enabled
172
+
173
+ # Get CPU, Memory and Load of each container
174
+ threads = []
175
+
176
+ containers.each do |ctr|
177
+ threads << Thread.new(ctr) do |container|
178
+ id = container.id
179
+ name = get_container_name(container)
180
+
181
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
182
+
183
+ if @basic_inspection_enabled
184
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
185
+ basic_inspection(id, name, inspection)
186
+ end
187
+ cpu(id, name, stats) if @cpu_enabled
188
+ memory(id, name, stats) if @memory_enabled
189
+ end
190
+ end
191
+
192
+ threads.each do |thread|
193
+ thread.join
194
+ rescue StandardError => e
195
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
@@ -11,10 +11,8 @@ ENV['COPYFILE_DISABLE'] = 'true'
11
11
 
12
12
  # Gemspec
13
13
  gemspec = Gem::Specification.new do |s|
14
- s.rubyforge_project = 'riemann-elasticsearch'
15
-
16
14
  s.name = 'riemann-elasticsearch'
17
- s.version = '0.2.4'
15
+ s.version = '0.2.5'
18
16
  s.author = 'Gavin Sandie'
19
17
  s.email = 'beach@vicecity.co.uk'
20
18
  s.homepage = 'https://github.com/riemann/riemann-tools'
@@ -22,15 +20,14 @@ gemspec = Gem::Specification.new do |s|
22
20
  s.summary = 'Submits elasticsearch stats to riemann.'
23
21
  s.license = 'MIT'
24
22
 
25
- s.add_dependency 'riemann-tools', '>= 0.2.13'
26
- s.add_dependency 'faraday', '>= 0.8.5'
27
- s.add_dependency 'json'
23
+ s.add_runtime_dependency 'riemann-tools', '~> 1.0', '>= 1.1.1'
24
+ s.add_runtime_dependency 'faraday', '~> 2.3', '>= 2.3.0'
25
+ s.add_runtime_dependency 'json', '~> 2.6', '>=2.6.2'
28
26
 
29
- s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
27
+ s.files = FileList['bin/*', 'lib/**/*.rb', 'LICENSE', 'README.md'].to_a
30
28
  s.executables |= Dir.entries('bin/')
31
- s.has_rdoc = false
32
29
 
33
- s.required_ruby_version = '>= 1.8.7'
30
+ s.required_ruby_version = Gem::Requirement.new('>= 2.6.0')
34
31
  end
35
32
 
36
33
  Gem::PackageTask.new gemspec do |p|
@@ -3,172 +3,6 @@
3
3
 
4
4
  Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- require 'riemann/tools'
6
+ require 'riemann/tools/elasticsearch'
7
7
 
8
- module Riemann
9
- module Tools
10
- class Elasticsearch
11
- include Riemann::Tools
12
- require 'faraday'
13
- require 'json'
14
- require 'uri'
15
-
16
- opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
17
- opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
18
- opt :path_prefix,
19
- 'Elasticsearch path prefix for proxied installations e.g. "els" for target http://localhost/els/_cluster/health', default: '/'
20
- opt :es_host, 'Elasticsearch host', default: 'localhost'
21
- opt :es_port, 'Elasticsearch port', type: :int, default: 9200
22
- opt :es_search_index, 'Elasticsearch index to fetch search statistics for', default: '_all'
23
-
24
- # Handles HTTP connections and GET requests safely
25
- def safe_get(uri)
26
- # Handle connection timeouts
27
- response = nil
28
- begin
29
- connection = Faraday.new(uri)
30
- response = connection.get do |req|
31
- req.options[:timeout] = options[:read_timeout]
32
- req.options[:open_timeout] = options[:open_timeout]
33
- end
34
- rescue StandardError => e
35
- report(
36
- host: uri.host,
37
- service: 'elasticsearch health',
38
- state: 'critical',
39
- description: "HTTP connection error: #{e.class} - #{e.message}",
40
- )
41
- end
42
- response
43
- end
44
-
45
- def make_es_url(path)
46
- path_prefix = options[:path_prefix]
47
- path_prefix[0] = '' if path_prefix[0] == '/'
48
- path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
49
- "http://#{options[:es_host]}:#{options[:es_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/#{path}"
50
- end
51
-
52
- def health_url
53
- make_es_url('_cluster/health')
54
- end
55
-
56
- def indices_url
57
- make_es_url('_stats/store')
58
- end
59
-
60
- def search_url
61
- es_search_index = options[:es_search_index]
62
- make_es_url("#{es_search_index}/_stats/search")
63
- end
64
-
65
- def bad?(response, uri)
66
- if response.success?
67
- false
68
- else
69
- report(
70
- host: uri.host,
71
- service: 'elasticsearch health',
72
- state: 'critical',
73
- description: response.nil? ? 'HTTP response is empty!' : "HTTP connection error: #{response.status} - #{response.body}",
74
- )
75
- end
76
- end
77
-
78
- def tick_indices
79
- uri = URI(indices_url)
80
- response = safe_get(uri)
81
-
82
- return if bad?(response, uri)
83
-
84
- # Assuming that a 200 will give json
85
- json = JSON.parse(response.body)
86
-
87
- json['indices'].each_pair do |k, v|
88
- report(
89
- host: uri.host,
90
- service: "elasticsearch index/#{k}/primaries/size_in_bytes",
91
- metric: v['primaries']['store']['size_in_bytes'],
92
- )
93
- report(
94
- host: uri.host,
95
- service: "elasticsearch index/#{k}/total/size_in_bytes",
96
- metric: v['total']['store']['size_in_bytes'],
97
- )
98
- end
99
- end
100
-
101
- def tick_search
102
- uri = URI(search_url)
103
- response = safe_get(uri)
104
-
105
- return if bad?(response, uri)
106
-
107
- es_search_index = options[:es_search_index]
108
- # Assuming that a 200 will give json
109
- json = JSON.parse(response.body)
110
-
111
- json['_all'].each_pair do |_type, data|
112
- query = data['search']['query_time_in_millis'].to_f / data['search']['query_total']
113
- fetch = data['search']['fetch_time_in_millis'].to_f / data['search']['fetch_total']
114
-
115
- report(
116
- host: uri.host,
117
- service: "elasticsearch search/#{es_search_index}/query",
118
- metric: query,
119
- )
120
- report(
121
- host: uri.host,
122
- service: "elasticsearch search/#{es_search_index}/fetch",
123
- metric: fetch,
124
- )
125
- end
126
- end
127
-
128
- def tick
129
- begin
130
- tick_indices
131
- tick_search
132
- rescue StandardError => e
133
- report(
134
- host: options[:es_host],
135
- service: 'elasticsearch error',
136
- state: 'critical',
137
- description: "Elasticsearch cluster error: #{e.message}",
138
- )
139
- end
140
- uri = URI(health_url)
141
- response = safe_get(uri)
142
-
143
- return if bad?(response, uri)
144
-
145
- # Assuming that a 200 will give json
146
- json = JSON.parse(response.body)
147
- cluster_name = json.delete('cluster_name')
148
- cluster_status = json.delete('status')
149
- state = {
150
- 'green' => 'ok',
151
- 'yellow' => 'warning',
152
- 'red' => 'critical',
153
- }[cluster_status]
154
-
155
- report(
156
- host: uri.host,
157
- service: 'elasticsearch health',
158
- state: state,
159
- description: "Elasticsearch cluster: #{cluster_name} - #{cluster_status}",
160
- )
161
-
162
- json.each_pair do |k, v|
163
- report(
164
- host: uri.host,
165
- service: "elasticsearch #{k}",
166
- metric: v,
167
- description: "Elasticsearch cluster #{k}",
168
- )
169
- end
170
- end
171
- end
172
- end
173
- end
174
8
  Riemann::Tools::Elasticsearch.run