sensu-plugins-elasticsearch-boutetnico 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,354 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # es-node-graphite
4
+ #
5
+ # DESCRIPTION:
6
+ # This check creates node metrics from the elasticsearch API
7
+ #
8
+ # OUTPUT:
9
+ # metric data
10
+ #
11
+ # PLATFORMS:
12
+ # Linux, Windows, BSD, Solaris, etc
13
+ #
14
+ # DEPENDENCIES:
15
+ # gem: sensu-plugin
16
+ # gem: rest-client
17
+ #
18
+ # USAGE:
19
+ # #YELLOW
20
+ #
21
+ # NOTES:
22
+ # 2014/04
23
+ # Modifid by Vincent Janelle @randomfrequency http://github.com/vjanelle
24
+ # Add more metrics, fix es 1.x URLs, translate graphite stats from
25
+ # names directly
26
+ #
27
+ # 2012/12 - Modified by Zach Dunn @SillySophist http://github.com/zadunn
28
+ # To add more metrics, and correct for new versins of ES. Tested on
29
+ # ES Version 0.19.8
30
+ #
31
+ # LICENSE:
32
+ # Copyright 2013 Vincent Janelle <randomfrequency@gmail.com>
33
+ # Copyright 2012 Sonian, Inc <chefs@sonian.net>
34
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
35
+ # for details.
36
+ #
37
+
38
+ require 'sensu-plugin/metric/cli'
39
+ require 'rest-client'
40
+ require 'json'
41
+ require 'base64'
42
+
43
+ #
44
+ # ES Node Graphite Metrics
45
+ #
46
+ class ESNodeGraphiteMetrics < Sensu::Plugin::Metric::CLI::Graphite
47
+ option :scheme,
48
+ description: 'Metric naming scheme, text to prepend to queue_name.metric',
49
+ short: '-s SCHEME',
50
+ long: '--scheme SCHEME',
51
+ default: "#{Socket.gethostname}.elasticsearch"
52
+
53
+ option :server,
54
+ description: 'Elasticsearch server host.',
55
+ short: '-h HOST',
56
+ long: '--host HOST',
57
+ default: 'localhost'
58
+
59
+ option :port,
60
+ description: 'Elasticsearch port.',
61
+ short: '-p PORT',
62
+ long: '--port PORT',
63
+ proc: proc(&:to_i),
64
+ default: 9200
65
+
66
+ option :timeout,
67
+ description: 'Request timeout to elasticsearch',
68
+ short: '-t TIMEOUT',
69
+ long: '--timeout TIMEOUT',
70
+ proc: proc(&:to_i),
71
+ default: 30
72
+
73
+ option :disable_jvm_stats,
74
+ description: 'Disable JVM statistics',
75
+ long: '--disable-jvm-stats',
76
+ boolean: true,
77
+ default: false
78
+
79
+ option :disable_os_stats,
80
+ description: 'Disable OS Stats',
81
+ long: '--disable-os-stat',
82
+ boolean: true,
83
+ default: false
84
+
85
+ option :disable_process_stats,
86
+ description: 'Disable process statistics',
87
+ long: '--disable-process-stats',
88
+ boolean: true,
89
+ default: false
90
+
91
+ option :disable_thread_pool_stats,
92
+ description: 'Disable thread-pool statistics',
93
+ long: '--disable-thread-pool-stats',
94
+ boolean: true,
95
+ default: false
96
+
97
+ option :disable_fs_stats,
98
+ description: 'Disable filesystem statistics',
99
+ long: '--disable-fs-stats',
100
+ boolean: true,
101
+ default: false
102
+
103
+ option :user,
104
+ description: 'Elasticsearch User',
105
+ short: '-u USER',
106
+ long: '--user USER'
107
+
108
+ option :password,
109
+ description: 'Elasticsearch Password',
110
+ short: '-P PASS',
111
+ long: '--password PASS'
112
+
113
+ option :https,
114
+ description: 'Enables HTTPS',
115
+ short: '-e',
116
+ long: '--https'
117
+
118
+ option :cert_file,
119
+ description: 'Cert file to use',
120
+ long: '--cert-file CERT_FILE'
121
+
122
+ def get_es_resource(resource)
123
+ headers = {}
124
+ if config[:user] && config[:password]
125
+ auth = 'Basic ' + Base64.strict_encode64("#{config[:user]}:#{config[:password]}").chomp
126
+ headers = { 'Authorization' => auth }
127
+ end
128
+
129
+ protocol = if config[:https]
130
+ 'https'
131
+ else
132
+ 'http'
133
+ end
134
+
135
+ r = if config[:cert_file]
136
+ RestClient::Resource.new("#{protocol}://#{config[:host]}:#{config[:port]}#{resource}",
137
+ ssl_ca_file: config[:cert_file].to_s,
138
+ timeout: config[:timeout],
139
+ headers: headers)
140
+ else
141
+ RestClient::Resource.new("#{protocol}://#{config[:server]}:#{config[:port]}#{resource}?pretty",
142
+ timeout: config[:timeout],
143
+ headers: headers)
144
+ end
145
+ ::JSON.parse(r.get)
146
+ rescue Errno::ECONNREFUSED
147
+ warning 'Connection refused'
148
+ rescue RestClient::RequestTimeout
149
+ warning 'Connection timed out'
150
+ end
151
+
152
+ def acquire_es_version
153
+ info = get_es_resource('/')
154
+ info['version']['number']
155
+ end
156
+
157
+ def run
158
+ # invert various stats depending on if some flags are set
159
+ os_stat = !config[:disable_os_stats]
160
+ process_stats = !config[:disable_process_stats]
161
+ jvm_stats = !config[:disable_jvm_stats]
162
+ tp_stats = !config[:disable_thread_pool_stats]
163
+ fs_stats = !config[:disable_fs_stats]
164
+
165
+ es_version = Gem::Version.new(acquire_es_version)
166
+
167
+ if es_version >= Gem::Version.new('3.0.0')
168
+ stats_query_array = %w[indices http transport]
169
+ stats_query_array.push('jvm') if jvm_stats == true
170
+ stats_query_array.push('os') if os_stat == true
171
+ stats_query_array.push('process') if process_stats == true
172
+ stats_query_array.push('thread_pool') if tp_stats == true
173
+ stats_query_array.push('fs') if fs_stats == true
174
+ stats_query_string = stats_query_array.join(',')
175
+ elsif es_version >= Gem::Version.new('1.0.0')
176
+ stats_query_array = %w[indices http network transport thread_pool]
177
+ stats_query_array.push('jvm') if jvm_stats == true
178
+ stats_query_array.push('os') if os_stat == true
179
+ stats_query_array.push('process') if process_stats == true
180
+ stats_query_array.push('tp_stats') if tp_stats == true
181
+ stats_query_array.push('fs_stats') if fs_stats == true
182
+ stats_query_string = stats_query_array.join(',')
183
+ else
184
+ stats_query_string = [
185
+ 'clear=true',
186
+ 'indices=true',
187
+ 'http=true',
188
+ "jvm=#{jvm_stats}",
189
+ 'network=true',
190
+ "os=#{os_stat}",
191
+ "process=#{process_stats}",
192
+ "thread_pool=#{tp_stats}",
193
+ 'transport=true',
194
+ 'thread_pool=true',
195
+ "fs=#{fs_stats}"
196
+ ].join('&')
197
+ end
198
+
199
+ stats = if es_version >= Gem::Version.new('3.0.0')
200
+ get_es_resource("/_nodes/_local/stats/#{stats_query_string}")
201
+ elsif es_version >= Gem::Version.new('1.0.0')
202
+ get_es_resource("/_nodes/_local/stats?#{stats_query_string}")
203
+ else
204
+ get_es_resource("/_cluster/nodes/_local/stats?#{stats_query_string}")
205
+ end
206
+
207
+ timestamp = Time.now.to_i
208
+ node = stats['nodes'].values.first
209
+
210
+ metrics = {}
211
+
212
+ if os_stat
213
+ if es_version >= Gem::Version.new('2.0.0')
214
+ metrics['os.load_average'] = node['os']['load_average']
215
+ else
216
+ metrics['os.load_average'] = node['os']['load_average'][0]
217
+ metrics['os.load_average.1'] = node['os']['load_average'][0]
218
+ metrics['os.load_average.5'] = node['os']['load_average'][1]
219
+ metrics['os.load_average.15'] = node['os']['load_average'][2]
220
+ metrics['os.cpu.sys'] = node['os']['cpu']['sys']
221
+ metrics['os.cpu.user'] = node['os']['cpu']['user']
222
+ metrics['os.cpu.idle'] = node['os']['cpu']['idle']
223
+ metrics['os.cpu.usage'] = node['os']['cpu']['usage']
224
+ metrics['os.cpu.stolen'] = node['os']['cpu']['stolen']
225
+ metrics['os.uptime'] = node['os']['uptime_in_millis']
226
+ end
227
+ metrics['os.mem.free_in_bytes'] = node['os']['mem']['free_in_bytes']
228
+ end
229
+
230
+ if process_stats
231
+ metrics['process.cpu.percent'] = node['process']['cpu']['percent']
232
+ metrics['process.mem.resident_in_bytes'] = node['process']['mem']['resident_in_bytes'] if node['process']['mem']['resident_in_bytes']
233
+ end
234
+
235
+ if jvm_stats
236
+ metrics['jvm.mem.heap_used_in_bytes'] = node['jvm']['mem']['heap_used_in_bytes']
237
+ metrics['jvm.mem.non_heap_used_in_bytes'] = node['jvm']['mem']['non_heap_used_in_bytes']
238
+ metrics['jvm.mem.max_heap_size_in_bytes'] = 0
239
+
240
+ node['jvm']['mem']['pools'].each do |k, v|
241
+ metrics["jvm.mem.#{k.tr(' ', '_')}.max_in_bytes"] = v['max_in_bytes']
242
+ metrics['jvm.mem.max_heap_size_in_bytes'] += v['max_in_bytes']
243
+ end
244
+
245
+ # This makes absolutely no sense - not sure what it's trying to measure - @vjanelle
246
+ # metrics['jvm.gc.collection_time_in_millis'] = node['jvm']['gc']['collection_time_in_millis'] + \
247
+ # node['jvm']['mem']['pools']['CMS Old Gen']['max_in_bytes']
248
+
249
+ node['jvm']['gc']['collectors'].each do |gc, gc_value|
250
+ gc_value.each do |k, v|
251
+ # this contains stupid things like '28ms' and '2s', and there's already
252
+ # something that counts in millis, which makes more sense
253
+ unless k.end_with? 'collection_time'
254
+ metrics["jvm.gc.collectors.#{gc}.#{k}"] = v
255
+ end
256
+ end
257
+ end
258
+
259
+ metrics['jvm.threads.count'] = node['jvm']['threads']['count']
260
+ metrics['jvm.threads.peak_count'] = node['jvm']['threads']['peak_count']
261
+ metrics['jvm.uptime'] = node['jvm']['uptime_in_millis']
262
+ end
263
+
264
+ node['indices'].each do |type, index|
265
+ index.each do |k, v|
266
+ # #YELLOW
267
+ if k.end_with? 'is_throttled'
268
+ metrics["indices.#{type}.#{k}"] = true?(v) ? 1 : 0
269
+ elsif !(k =~ /(_time$)/ || v =~ /\d+/)
270
+ metrics["indices.#{type}.#{k}"] = v
271
+ end
272
+ end
273
+ end
274
+
275
+ node['transport'].each do |k, v|
276
+ # #YELLOW
277
+ unless k =~ /(_size$)/
278
+ metrics["transport.#{k}"] = v
279
+ end
280
+ end
281
+
282
+ metrics['http.current_open'] = node['http']['current_open']
283
+ metrics['http.total_opened'] = node['http']['total_opened']
284
+
285
+ if node['network']
286
+ metrics['network.tcp.active_opens'] = node['network']['tcp']['active_opens']
287
+ metrics['network.tcp.passive_opens'] = node['network']['tcp']['passive_opens']
288
+
289
+ metrics['network.tcp.in_segs'] = node['network']['tcp']['in_segs']
290
+ metrics['network.tcp.out_segs'] = node['network']['tcp']['out_segs']
291
+ metrics['network.tcp.retrans_segs'] = node['network']['tcp']['retrans_segs']
292
+ metrics['network.tcp.attempt_fails'] = node['network']['tcp']['attempt_fails']
293
+ metrics['network.tcp.in_errs'] = node['network']['tcp']['in_errs']
294
+ metrics['network.tcp.out_rsts'] = node['network']['tcp']['out_rsts']
295
+
296
+ metrics['network.tcp.curr_estab'] = node['network']['tcp']['curr_estab']
297
+ metrics['network.tcp.estab_resets'] = node['network']['tcp']['estab_resets']
298
+ end
299
+
300
+ if tp_stats
301
+ node['thread_pool'].each do |pool, stat|
302
+ stat.each do |k, v|
303
+ metrics["thread_pool.#{pool}.#{k}"] = v
304
+ end
305
+ end
306
+ end
307
+
308
+ if fs_stats
309
+ node['fs'].each do |fs, fs_value|
310
+ unless fs =~ /(timestamp|data)/
311
+ metrics_fs = hash_to_dotted_path(fs_value, "#{fs}.")
312
+ metrics_fs.each do |k, v|
313
+ metrics["fs.#{k}"] = v
314
+ end
315
+ end
316
+ end
317
+ end
318
+
319
+ metrics.each do |k, v|
320
+ if v.is_a? Numeric
321
+ output([config[:scheme], k].join('.'), v, timestamp)
322
+ end
323
+ end
324
+ ok
325
+ end
326
+ end
327
+
328
+ def hash_to_dotted_path(hash, path = '')
329
+ hash.each_with_object({}) do |(k, v), ret|
330
+ key = path + k.to_s
331
+ if v.is_a? Hash
332
+ ret.merge! hash_to_dotted_path(v, "#{key}.")
333
+ elsif v.is_a? Array
334
+ v.each do |element|
335
+ if element['device_name']
336
+ key2 = "#{key}.#{element['device_name']}"
337
+ ret.merge! hash_to_dotted_path(element, "#{key2}.")
338
+ end
339
+ end
340
+ else
341
+ ret[key] = v
342
+ end
343
+ end
344
+ end
345
+
346
+ def true?(obj)
347
+ if obj.to_s == 'true'
348
+ true
349
+ elsif obj.to_s == 'false'
350
+ false
351
+ else
352
+ "#{obj} is not a truthy value, please open an issue with this output so we can fix it"
353
+ end
354
+ end
@@ -0,0 +1,143 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # es-node-metrics
4
+ #
5
+ # DESCRIPTION:
6
+ # This plugin uses the ES API to collect metrics, producing a JSON
7
+ # document which is outputted to STDOUT. An exit status of 0 indicates
8
+ # the plugin has successfully collected and produced.
9
+ #
10
+ # OUTPUT:
11
+ # metric data
12
+ #
13
+ # PLATFORMS:
14
+ # Linux
15
+ #
16
+ # DEPENDENCIES:
17
+ # gem: sensu-plugin
18
+ # gem: rest-client
19
+ #
20
+ # USAGE:
21
+ # #YELLOW
22
+ #
23
+ # NOTES:
24
+ #
25
+ # LICENSE:
26
+ # Copyright 2011 Sonian, Inc <chefs@sonian.net>
27
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
28
+ # for details.
29
+ #
30
+
31
+ require 'sensu-plugin/metric/cli'
32
+ require 'rest-client'
33
+ require 'json'
34
+ require 'base64'
35
+
36
+ #
37
+ # ES Node Metrics
38
+ #
39
+ class ESMetrics < Sensu::Plugin::Metric::CLI::Graphite
40
+ option :scheme,
41
+ description: 'Metric naming scheme, text to prepend to queue_name.metric',
42
+ short: '-s SCHEME',
43
+ long: '--scheme SCHEME',
44
+ default: "#{Socket.gethostname}.elasticsearch"
45
+
46
+ option :host,
47
+ description: 'Elasticsearch server host.',
48
+ short: '-h HOST',
49
+ long: '--host HOST',
50
+ default: 'localhost'
51
+
52
+ option :port,
53
+ description: 'Elasticsearch port',
54
+ short: '-p PORT',
55
+ long: '--port PORT',
56
+ proc: proc(&:to_i),
57
+ default: 9200
58
+
59
+ option :user,
60
+ description: 'Elasticsearch User',
61
+ short: '-u USER',
62
+ long: '--user USER'
63
+
64
+ option :password,
65
+ description: 'Elasticsearch Password',
66
+ short: '-P PASS',
67
+ long: '--password PASS'
68
+
69
+ option :https,
70
+ description: 'Enables HTTPS',
71
+ short: '-e',
72
+ long: '--https'
73
+
74
+ option :cert_file,
75
+ description: 'Cert file to use',
76
+ long: '--cert CERT_FILE'
77
+
78
+ def acquire_es_version
79
+ info = get_es_resource('/')
80
+ info['version']['number']
81
+ end
82
+
83
+ def get_es_resource(resource)
84
+ headers = {}
85
+ if config[:user] && config[:password]
86
+ auth = 'Basic ' + Base64.strict_encode64("#{config[:user]}:#{config[:password]}").chomp
87
+ headers = { 'Authorization' => auth }
88
+ end
89
+
90
+ protocol = if config[:https]
91
+ 'https'
92
+ else
93
+ 'http'
94
+ end
95
+
96
+ r = if config[:cert_file]
97
+ RestClient::Resource.new("#{protocol}://#{config[:host]}:#{config[:port]}#{resource}",
98
+ ssl_ca_file: config[:cert_file].to_s,
99
+ timeout: config[:timeout],
100
+ headers: headers)
101
+ else
102
+ RestClient::Resource.new("#{protocol}://#{config[:host]}:#{config[:port]}#{resource}",
103
+ timeout: config[:timeout],
104
+ headers: headers)
105
+ end
106
+ ::JSON.parse(r.get)
107
+ rescue Errno::ECONNREFUSED
108
+ warning 'Connection refused'
109
+ rescue RestClient::RequestTimeout
110
+ warning 'Connection timed out'
111
+ end
112
+
113
+ def run
114
+ es_version = Gem::Version.new(acquire_es_version)
115
+
116
+ if es_version >= Gem::Version.new('1.0.0')
117
+ ln = get_es_resource('/_nodes/_local')
118
+ stats = get_es_resource('/_nodes/_local/stats')
119
+ else
120
+ ln = get_es_resource('/_cluster/nodes/_local')
121
+ stats = get_es_resource('/_cluster/nodes/_local/stats')
122
+ end
123
+
124
+ timestamp = Time.now.to_i
125
+ node = stats['nodes'].values.first
126
+ node['jvm']['mem']['heap_max_in_bytes'] = ln['nodes'].values.first['jvm']['mem']['heap_max_in_bytes']
127
+ metrics = {}
128
+ metrics['os.load_average'] = if es_version >= Gem::Version.new('2.0.0')
129
+ node['os']['load_average']
130
+ else
131
+ node['os']['load_average'][0]
132
+ end
133
+ metrics['os.mem.free_in_bytes'] = node['os']['mem']['free_in_bytes']
134
+ metrics['process.mem.resident_in_bytes'] = node['process']['mem']['resident_in_bytes']
135
+ metrics['jvm.mem.heap_used_in_bytes'] = node['jvm']['mem']['heap_used_in_bytes']
136
+ metrics['jvm.mem.non_heap_used_in_bytes'] = node['jvm']['mem']['non_heap_used_in_bytes']
137
+ metrics['jvm.gc.collection_time_in_millis'] = node['jvm']['gc']['collection_time_in_millis']
138
+ metrics.each do |k, v|
139
+ output([config[:scheme], k].join('.'), v, timestamp)
140
+ end
141
+ ok
142
+ end
143
+ end