sensu-plugins-graphite 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE +22 -0
- data/README.md +53 -0
- data/bin/check-graphite-data.rb +257 -0
- data/bin/check-graphite-replication.rb +227 -0
- data/bin/check-graphite-stats.rb +145 -0
- data/bin/check-graphite.rb +530 -0
- data/bin/handler-graphite-event.rb +78 -0
- data/bin/handler-graphite-notify.rb +29 -0
- data/bin/handler-graphite-occurrences.rb +39 -0
- data/bin/mutator-graphite.rb +37 -0
- data/lib/sensu-plugins-graphite.rb +14 -0
- data/lib/sensu-plugins-graphite/version.rb +28 -0
- metadata +308 -0
- metadata.gz.sig +1 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-stats
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Checks metrics in graphite, averaged over a period of time.
|
7
|
+
#
|
8
|
+
# The fired sensu event will only be critical if a stat is
|
9
|
+
# above the critical threshold. Otherwise, the event will be warning,
|
10
|
+
# if a stat is above the warning threshold.
|
11
|
+
#
|
12
|
+
# Multiple stats will be checked if * are used
|
13
|
+
# in the "target" query.
|
14
|
+
#
|
15
|
+
# OUTPUT:
|
16
|
+
# plain text
|
17
|
+
#
|
18
|
+
# PLATFORMS:
|
19
|
+
# Linux
|
20
|
+
#
|
21
|
+
# DEPENDENCIES:
|
22
|
+
# gem: sensu-plugin
|
23
|
+
# gem: <?>
|
24
|
+
#
|
25
|
+
# USAGE:
|
26
|
+
# example commands
|
27
|
+
#
|
28
|
+
# NOTES:
|
29
|
+
#
|
30
|
+
# LICENSE:
|
31
|
+
# Alan Smith (alan@asmith.me)
|
32
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
33
|
+
# for details.
|
34
|
+
#
|
35
|
+
|
36
|
+
require 'json'
|
37
|
+
require 'net/http'
|
38
|
+
require 'sensu-plugin/check/cli'
|
39
|
+
|
40
|
+
class CheckGraphiteStat < Sensu::Plugin::Check::CLI
|
41
|
+
option :host,
|
42
|
+
short: '-h HOST',
|
43
|
+
long: '--host HOST',
|
44
|
+
description: 'graphite hostname',
|
45
|
+
proc: proc(&:to_s),
|
46
|
+
default: 'graphite'
|
47
|
+
|
48
|
+
option :period,
|
49
|
+
short: '-p PERIOD',
|
50
|
+
long: '--period PERIOD',
|
51
|
+
description: 'The period back in time to extract from Graphite. Use -24hours, -2days, -15mins, etc, same format as in Graphite',
|
52
|
+
proc: proc(&:to_s),
|
53
|
+
required: true
|
54
|
+
|
55
|
+
option :target,
|
56
|
+
short: '-t TARGET',
|
57
|
+
long: '--target TARGET',
|
58
|
+
description: 'The graphite metric name. Can include * to query multiple metrics',
|
59
|
+
proc: proc(&:to_s),
|
60
|
+
required: true
|
61
|
+
|
62
|
+
option :warn,
|
63
|
+
short: '-w WARN',
|
64
|
+
long: '--warn WARN',
|
65
|
+
description: 'Warning level',
|
66
|
+
proc: proc(&:to_f),
|
67
|
+
required: false
|
68
|
+
|
69
|
+
option :crit,
|
70
|
+
short: '-c Crit',
|
71
|
+
long: '--crit CRIT',
|
72
|
+
description: 'Critical level',
|
73
|
+
proc: proc(&:to_f),
|
74
|
+
required: false
|
75
|
+
|
76
|
+
option :unknown_ignore,
|
77
|
+
short: '-u',
|
78
|
+
long: '--unknown-ignore',
|
79
|
+
description: "Do nothing for UNKNOWN status (when you wildcard-match a ton of metrics at once and you don't care for a few missing data)",
|
80
|
+
boolean: true,
|
81
|
+
default: false
|
82
|
+
|
83
|
+
def average(a)
|
84
|
+
total = 0
|
85
|
+
a.to_a.each { |i| total += i.to_f }
|
86
|
+
|
87
|
+
total / a.length
|
88
|
+
end
|
89
|
+
|
90
|
+
def danger(metric)
|
91
|
+
datapoints = metric['datapoints'].map(&:first).compact
|
92
|
+
|
93
|
+
# #YELLOW
|
94
|
+
unless datapoints.empty? # rubocop:disable UnlessElse
|
95
|
+
avg = average(datapoints)
|
96
|
+
|
97
|
+
if !config[:crit].nil? && avg > config[:crit]
|
98
|
+
return [2, "#{metric['target']} is #{avg}"]
|
99
|
+
elsif !config[:warn].nil? && avg > config[:warn]
|
100
|
+
return [1, "#{metric['target']} is #{avg}"]
|
101
|
+
end
|
102
|
+
else
|
103
|
+
return [3, "#{metric['target']} has no datapoints"] unless config[:unknown_ignore]
|
104
|
+
end
|
105
|
+
[0, nil]
|
106
|
+
end
|
107
|
+
|
108
|
+
def run
|
109
|
+
body =
|
110
|
+
begin
|
111
|
+
uri = URI("http://#{config[:host]}/render?format=json&target=#{config[:target]}&from=#{config[:period]}")
|
112
|
+
res = Net::HTTP.get_response(uri)
|
113
|
+
res.body
|
114
|
+
rescue => e
|
115
|
+
warning "Failed to query graphite: #{e.inspect}"
|
116
|
+
end
|
117
|
+
|
118
|
+
status = 0
|
119
|
+
message = ''
|
120
|
+
data =
|
121
|
+
begin
|
122
|
+
JSON.parse(body)
|
123
|
+
rescue
|
124
|
+
[]
|
125
|
+
end
|
126
|
+
|
127
|
+
unknown 'No data from graphite' if data.empty?
|
128
|
+
|
129
|
+
data.each do |metric|
|
130
|
+
s, msg = danger(metric)
|
131
|
+
|
132
|
+
message += "#{msg} " unless s == 0
|
133
|
+
status = s unless s < status
|
134
|
+
end
|
135
|
+
|
136
|
+
if status == 2
|
137
|
+
critical message
|
138
|
+
elsif status == 1
|
139
|
+
warning message
|
140
|
+
elsif status == 3
|
141
|
+
unknown message
|
142
|
+
end
|
143
|
+
ok
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,530 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# <script name>
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Get time series values from Graphite and create events based on values
|
7
|
+
#
|
8
|
+
# OUTPUT:
|
9
|
+
# plain text
|
10
|
+
#
|
11
|
+
# PLATFORMS:
|
12
|
+
# Linux
|
13
|
+
#
|
14
|
+
# DEPENDENCIES:
|
15
|
+
# gem: sensu-plugin
|
16
|
+
# gem: json
|
17
|
+
# gem: socket
|
18
|
+
# gem: array_stats
|
19
|
+
# gem: net/http
|
20
|
+
#
|
21
|
+
# USAGE:
|
22
|
+
# #YELLOW
|
23
|
+
#
|
24
|
+
# NOTES:
|
25
|
+
#
|
26
|
+
# LICENSE:
|
27
|
+
# Copyright 2012 Ulf Mansson @ Recorded Future
|
28
|
+
# Modifications by Chris Jansen to support wildcard targets
|
29
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
30
|
+
# for details.
|
31
|
+
#
|
32
|
+
|
33
|
+
require 'sensu-plugin/check/cli'
|
34
|
+
require 'json'
|
35
|
+
require 'net/http'
|
36
|
+
require 'socket'
|
37
|
+
require 'array_stats'
|
38
|
+
|
39
|
+
class Graphite < Sensu::Plugin::Check::CLI
|
40
|
+
option :host,
|
41
|
+
short: '-h HOST',
|
42
|
+
long: '--host HOST',
|
43
|
+
description: 'Graphite host to connect to, include port',
|
44
|
+
required: true
|
45
|
+
|
46
|
+
option :target,
|
47
|
+
description: 'The graphite metric name. Could be a comma separated list of metric names.',
|
48
|
+
short: '-t TARGET',
|
49
|
+
long: '--target TARGET',
|
50
|
+
required: true
|
51
|
+
|
52
|
+
option :period,
|
53
|
+
description: 'The period back in time to extract from Graphite and compare with. Use 24hours,2days etc, same format as in Graphite',
|
54
|
+
short: '-p PERIOD',
|
55
|
+
long: '--period PERIOD',
|
56
|
+
default: '2hours'
|
57
|
+
|
58
|
+
option :updated_since,
|
59
|
+
description: 'The graphite value should have been updated within UPDATED_SINCE seconds, default to 600 seconds',
|
60
|
+
short: '-u UPDATED_SINCE',
|
61
|
+
long: '--updated_since UPDATED_SINCE',
|
62
|
+
default: 600
|
63
|
+
|
64
|
+
option :acceptable_diff_percentage,
|
65
|
+
description: 'The acceptable diff from max values in percentage, used in check_function_increasing',
|
66
|
+
short: '-d ACCEPTABLE_DIFF_PERCENTAGE',
|
67
|
+
long: '--acceptable_diff_percentage ACCEPTABLE_DIFF_PERCENTAGE',
|
68
|
+
default: 0
|
69
|
+
|
70
|
+
option :check_function_increasing,
|
71
|
+
description: 'Check that value is increasing or equal over time (use acceptable_diff_percentage if it should allow to be lower)',
|
72
|
+
short: '-i',
|
73
|
+
long: '--check_function_decreasing',
|
74
|
+
default: false,
|
75
|
+
boolean: true
|
76
|
+
|
77
|
+
option :greater_than,
|
78
|
+
description: 'Change whether value is greater than or less than check',
|
79
|
+
short: '-g',
|
80
|
+
long: '--greater_than',
|
81
|
+
default: false
|
82
|
+
|
83
|
+
option :check_last,
|
84
|
+
description: 'Check that the last value in GRAPHITE is greater/less than VALUE',
|
85
|
+
short: '-l VALUE',
|
86
|
+
long: '--last VALUE',
|
87
|
+
default: nil
|
88
|
+
|
89
|
+
option :ignore_nulls,
|
90
|
+
description: 'Do not error on null values, used in check_function_increasing',
|
91
|
+
short: '-n',
|
92
|
+
long: '--ignore_nulls',
|
93
|
+
default: false,
|
94
|
+
boolean: true
|
95
|
+
|
96
|
+
option :concat_output,
|
97
|
+
description: 'Include warning messages in output even if overall status is critical',
|
98
|
+
short: '-c',
|
99
|
+
long: '--concat_output',
|
100
|
+
default: false,
|
101
|
+
boolean: true
|
102
|
+
|
103
|
+
option :short_output,
|
104
|
+
description: 'Report only the highest status per series in output',
|
105
|
+
short: '-s',
|
106
|
+
long: '--short_output',
|
107
|
+
default: false,
|
108
|
+
boolean: true
|
109
|
+
|
110
|
+
option :check_average,
|
111
|
+
description: 'MAX_VALUE should be greater than the average of Graphite values from PERIOD',
|
112
|
+
short: '-a MAX_VALUE',
|
113
|
+
long: '--average_value MAX_VALUE'
|
114
|
+
|
115
|
+
option :data_points,
|
116
|
+
description: 'Number of data points to include in average check (smooths out spikes)',
|
117
|
+
short: '-d VALUE',
|
118
|
+
long: '--data_points VALUE',
|
119
|
+
default: 1
|
120
|
+
|
121
|
+
option :check_average_percent,
|
122
|
+
description: 'MAX_VALUE% should be greater than the average of Graphite values from PERIOD',
|
123
|
+
short: '-b MAX_VALUE',
|
124
|
+
long: '--average_percent_value MAX_VALUE'
|
125
|
+
|
126
|
+
option :percentile,
|
127
|
+
description: 'Percentile value, should be used in conjunction with percentile_value, defaults to 90',
|
128
|
+
long: '--percentile PERCENTILE',
|
129
|
+
default: 90
|
130
|
+
|
131
|
+
option :check_percentile,
|
132
|
+
description: 'Values should not be greater than the VALUE of Graphite values from PERIOD',
|
133
|
+
long: '--percentile_value VALUE'
|
134
|
+
|
135
|
+
option :http_user,
|
136
|
+
description: 'Basic HTTP authentication user',
|
137
|
+
short: '-U USER',
|
138
|
+
long: '--http-user USER',
|
139
|
+
default: nil
|
140
|
+
|
141
|
+
option :http_password,
|
142
|
+
description: 'Basic HTTP authentication password',
|
143
|
+
short: '-P PASSWORD',
|
144
|
+
long: '--http-password USER',
|
145
|
+
default: nil
|
146
|
+
|
147
|
+
def initialize
|
148
|
+
super
|
149
|
+
@graphite_cache = {}
|
150
|
+
end
|
151
|
+
|
152
|
+
def graphite_cache(target = nil)
|
153
|
+
# #YELLOW
|
154
|
+
if @graphite_cache.key?(target) # rubocop:disable GuardClause
|
155
|
+
graphite_value = @graphite_cache[target].select { |value| value[:period] == @period }
|
156
|
+
graphite_value if graphite_value.size > 0
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Create a graphite url from params
|
161
|
+
#
|
162
|
+
#
|
163
|
+
def graphite_url(target = nil)
|
164
|
+
url = "#{config[:host]}/render/"
|
165
|
+
url = 'http://' + url unless url[0..3] == 'http'
|
166
|
+
# #YELLOW
|
167
|
+
url = url + "?target=#{target}" if target # rubocop:disable Style/SelfAssignment
|
168
|
+
URI.parse(url)
|
169
|
+
end
|
170
|
+
|
171
|
+
def get_levels(config_param)
|
172
|
+
values = config_param.split(',')
|
173
|
+
i = 0
|
174
|
+
levels = {}
|
175
|
+
%w(warning error fatal).each do |type|
|
176
|
+
levels[type] = values[i] if values[i]
|
177
|
+
i += 1
|
178
|
+
end
|
179
|
+
levels
|
180
|
+
end
|
181
|
+
|
182
|
+
def get_graphite_values(target)
|
183
|
+
cache_value = graphite_cache target
|
184
|
+
return cache_value if cache_value
|
185
|
+
params = {
|
186
|
+
target: target,
|
187
|
+
from: "-#{@period}",
|
188
|
+
format: 'json'
|
189
|
+
}
|
190
|
+
|
191
|
+
req = Net::HTTP::Post.new(graphite_url.path)
|
192
|
+
|
193
|
+
# If the basic http authentication credentials have been provided, then use them
|
194
|
+
if !config[:http_user].nil? && !config[:http_password].nil?
|
195
|
+
req.basic_auth(config[:http_user], config[:http_password])
|
196
|
+
end
|
197
|
+
|
198
|
+
req.set_form_data(params)
|
199
|
+
resp = Net::HTTP.new(graphite_url.host, graphite_url.port).start { |http| http.request(req) }
|
200
|
+
data = JSON.parse(resp.body)
|
201
|
+
@graphite_cache[target] = []
|
202
|
+
if data.size > 0
|
203
|
+
data.each { |d| @graphite_cache[target] << { target: d['target'], period: @period, datapoints: d['datapoints'] } }
|
204
|
+
graphite_cache target
|
205
|
+
else # rubocop:disable all
|
206
|
+
nil
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Will give max values for [0..-2]
|
211
|
+
def max_graphite_value(target)
|
212
|
+
max_values = {}
|
213
|
+
values = get_graphite_values target
|
214
|
+
if values
|
215
|
+
values.each do |val|
|
216
|
+
max = get_max_value(val[:datapoints])
|
217
|
+
max_values[val[:target]] = max
|
218
|
+
end
|
219
|
+
end
|
220
|
+
max_values
|
221
|
+
end
|
222
|
+
|
223
|
+
def get_max_value(values)
|
224
|
+
if values
|
225
|
+
values.map { |i| i[0] ? i[0] : 0 }[0..-2].max
|
226
|
+
else # rubocop:disable all
|
227
|
+
nil
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def last_graphite_metric(target, count = 1)
|
232
|
+
last_values = {}
|
233
|
+
values = get_graphite_values target
|
234
|
+
if values
|
235
|
+
values.each do |val|
|
236
|
+
last = get_last_metric(val[:datapoints], count)
|
237
|
+
last_values[val[:target]] = last
|
238
|
+
end
|
239
|
+
end
|
240
|
+
last_values
|
241
|
+
end
|
242
|
+
|
243
|
+
def get_last_metric(values, count = 1)
|
244
|
+
if values
|
245
|
+
ret = []
|
246
|
+
values_size = values.size
|
247
|
+
count = values_size if count > values_size
|
248
|
+
while count > 0
|
249
|
+
values_size -= 1
|
250
|
+
break if values[values_size].nil?
|
251
|
+
count -= 1 if values[values_size][0]
|
252
|
+
ret.push(values[values_size]) if values[values_size][0]
|
253
|
+
end
|
254
|
+
ret
|
255
|
+
else # rubocop:disable all
|
256
|
+
nil
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def last_graphite_value(target, count = 1)
|
261
|
+
last_metrics = last_graphite_metric(target, count)
|
262
|
+
last_values = {}
|
263
|
+
if last_metrics
|
264
|
+
last_metrics.each do |target_name, metrics|
|
265
|
+
last_values[target_name] = metrics.map { |metric| metric[0] }.mean
|
266
|
+
end
|
267
|
+
end
|
268
|
+
last_values
|
269
|
+
end
|
270
|
+
|
271
|
+
def been_updated_since(target, time, updated_since)
|
272
|
+
last_time_stamp = last_graphite_metric target
|
273
|
+
warnings = []
|
274
|
+
if last_time_stamp
|
275
|
+
last_time_stamp.each do |target_name, value|
|
276
|
+
last_time_stamp_bool = value[1] > time.to_i ? true : false
|
277
|
+
warnings << "The metric #{target_name} has not been updated in #{updated_since} seconds" unless last_time_stamp_bool
|
278
|
+
end
|
279
|
+
end
|
280
|
+
warnings
|
281
|
+
end
|
282
|
+
|
283
|
+
def greater_less
|
284
|
+
return 'greater' if config[:greater_than]
|
285
|
+
return 'less' unless config[:greater_than]
|
286
|
+
end
|
287
|
+
|
288
|
+
def check_increasing(target)
|
289
|
+
updated_since = config[:updated_since].to_i
|
290
|
+
time_to_be_updated_since = Time.now - updated_since
|
291
|
+
critical_errors = []
|
292
|
+
warnings = []
|
293
|
+
max_gv = max_graphite_value target
|
294
|
+
last_gv = last_graphite_value target
|
295
|
+
if last_gv.is_a?(Hash) && max_gv.is_a?(Hash)
|
296
|
+
# #YELLOW
|
297
|
+
last_gv.each do |target_name, value| # rubocop:disable Style/Next
|
298
|
+
if value && max_gv[target_name]
|
299
|
+
last = value
|
300
|
+
max = max_gv[target_name]
|
301
|
+
if max > last * (1 + config[:acceptable_diff_percentage].to_f / 100)
|
302
|
+
msg = "The metric #{target} with last value #{last} is less than max value #{max} during #{config[:period]} period"
|
303
|
+
critical_errors << msg
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
else
|
308
|
+
warnings << "Could not found any value in Graphite for metric #{target}, see #{graphite_url(target)}"
|
309
|
+
end
|
310
|
+
unless config[:ignore_nulls]
|
311
|
+
warnings.concat(been_updated_since(target, time_to_be_updated_since, updated_since))
|
312
|
+
end
|
313
|
+
[warnings, critical_errors, []]
|
314
|
+
end
|
315
|
+
|
316
|
+
def check_average_percent(target, max_values, data_points = 1)
|
317
|
+
values = get_graphite_values target
|
318
|
+
last_values = last_graphite_value(target, data_points)
|
319
|
+
return [[], [], []] unless values
|
320
|
+
warnings = []
|
321
|
+
criticals = []
|
322
|
+
fatal = []
|
323
|
+
values.each do |data|
|
324
|
+
target = data[:target]
|
325
|
+
values_pair = data[:datapoints]
|
326
|
+
values_array = values_pair.select(&:first).map { |v| v.first unless v.first.nil? }
|
327
|
+
# #YELLOW
|
328
|
+
avg_value = values_array.reduce { |sum, el| sum + el if el }.to_f / values_array.size # rubocop:disable SingleLineBlockParams
|
329
|
+
last_value = last_values[target]
|
330
|
+
percent = last_value / avg_value unless last_value.nil? || avg_value.nil?
|
331
|
+
# #YELLOW
|
332
|
+
%w(fatal error warning).each do |type| # rubocop:disable Style/Next
|
333
|
+
next unless max_values.key?(type)
|
334
|
+
max_value = max_values[type]
|
335
|
+
var1 = config[:greater_than] ? percent : max_value.to_f
|
336
|
+
var2 = config[:greater_than] ? max_value.to_f : percent
|
337
|
+
if !percent.nil? && var1 > var2 && (values_array.size > 0 || !config[:ignore_nulls])
|
338
|
+
text = "The last value of metric #{target} is #{percent}% #{greater_less} than allowed #{max_value}% of the average value #{avg_value}"
|
339
|
+
case type
|
340
|
+
when 'warning'
|
341
|
+
warnings << text
|
342
|
+
when 'error'
|
343
|
+
criticals << text
|
344
|
+
when 'fatal'
|
345
|
+
fatal << text
|
346
|
+
else
|
347
|
+
fail "Unknown type #{type}"
|
348
|
+
end
|
349
|
+
break if config[:short_output]
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
[warnings, criticals, fatal]
|
354
|
+
end
|
355
|
+
|
356
|
+
def check_average(target, max_values)
|
357
|
+
values = get_graphite_values target
|
358
|
+
return [[], [], []] unless values
|
359
|
+
warnings = []
|
360
|
+
criticals = []
|
361
|
+
fatal = []
|
362
|
+
values.each do |data|
|
363
|
+
target = data[:target]
|
364
|
+
values_pair = data[:datapoints]
|
365
|
+
values_array = values_pair.select(&:first).map { |v| v.first unless v.first.nil? }
|
366
|
+
# #YELLOW
|
367
|
+
avg_value = values_array.reduce { |sum, el| sum + el if el }.to_f / values_array.size # rubocop:disable SingleLineBlockParams
|
368
|
+
# YELLOW
|
369
|
+
%w(fatal error warning).each do |type| # rubocop:disable Style/Next
|
370
|
+
next unless max_values.key?(type)
|
371
|
+
max_value = max_values[type]
|
372
|
+
var1 = config[:greater_than] ? avg_value : max_value.to_f
|
373
|
+
var2 = config[:greater_than] ? max_value.to_f : avg_value
|
374
|
+
if var1 > var2 && (values_array.size > 0 || !config[:ignore_nulls])
|
375
|
+
text = "The average value of metric #{target} is #{avg_value} that is #{greater_less} than allowed average of #{max_value}"
|
376
|
+
case type
|
377
|
+
when 'warning'
|
378
|
+
warnings << text
|
379
|
+
when 'error'
|
380
|
+
criticals << text
|
381
|
+
when 'fatal'
|
382
|
+
fatal << text
|
383
|
+
else
|
384
|
+
fail "Unknown type #{type}"
|
385
|
+
end
|
386
|
+
break if config[:short_output]
|
387
|
+
end
|
388
|
+
end
|
389
|
+
end
|
390
|
+
[warnings, criticals, fatal]
|
391
|
+
end
|
392
|
+
|
393
|
+
def check_percentile(target, max_values, percentile, data_points = 1)
|
394
|
+
values = get_graphite_values target
|
395
|
+
last_values = last_graphite_value(target, data_points)
|
396
|
+
return [[], [], []] unless values
|
397
|
+
warnings = []
|
398
|
+
criticals = []
|
399
|
+
fatal = []
|
400
|
+
values.each do |data|
|
401
|
+
target = data[:target]
|
402
|
+
values_pair = data[:datapoints]
|
403
|
+
values_array = values_pair.select(&:first).map { |v| v.first unless v.first.nil? }
|
404
|
+
percentile_value = values_array.percentile(percentile)
|
405
|
+
last_value = last_values[target]
|
406
|
+
percent = last_value / percentile_value unless last_value.nil? || percentile_value.nil?
|
407
|
+
# #YELLOW
|
408
|
+
%w(fatal error warning).each do |type| # rubocop:disable Style/Next
|
409
|
+
next unless max_values.key?(type)
|
410
|
+
max_value = max_values[type]
|
411
|
+
var1 = config[:greater_than] ? percent : max_value.to_f
|
412
|
+
var2 = config[:greater_than] ? max_value.to_f : percent
|
413
|
+
if !percentile_value.nil? && var1 > var2
|
414
|
+
text = "The percentile value of metric #{target} (#{last_value}) is #{greater_less} than the
|
415
|
+
#{percentile}th percentile (#{percentile_value}) by more than #{max_value}%"
|
416
|
+
case type
|
417
|
+
when 'warning'
|
418
|
+
warnings << text
|
419
|
+
when 'error'
|
420
|
+
criticals << text
|
421
|
+
when 'fatal'
|
422
|
+
fatal << text
|
423
|
+
else
|
424
|
+
fail "Unknown type #{type}"
|
425
|
+
end
|
426
|
+
break if config[:short_output]
|
427
|
+
end
|
428
|
+
end
|
429
|
+
end
|
430
|
+
[warnings, criticals, fatal]
|
431
|
+
end
|
432
|
+
|
433
|
+
def check_last(target, max_values)
|
434
|
+
last_targets = last_graphite_metric target
|
435
|
+
return [[], [], []] unless last_targets
|
436
|
+
warnings = []
|
437
|
+
criticals = []
|
438
|
+
fatal = []
|
439
|
+
# #YELLOW
|
440
|
+
last_targets.each do |target_name, last| # rubocop:disable Style/Next
|
441
|
+
last_value = last.first
|
442
|
+
unless last_value.nil?
|
443
|
+
# #YELLOW
|
444
|
+
%w(fatal error warning).each do |type| # rubocop:disable Style/Next
|
445
|
+
next unless max_values.key?(type)
|
446
|
+
max_value = max_values[type]
|
447
|
+
var1 = config[:greater_than] ? last_value : max_value.to_f
|
448
|
+
var2 = config[:greater_than] ? max_value.to_f : last_value
|
449
|
+
if var1 > var2
|
450
|
+
text = "The metric #{target_name} is #{last_value} that is #{greater_less} than max allowed #{max_value}"
|
451
|
+
case type
|
452
|
+
when 'warning'
|
453
|
+
warnings << text
|
454
|
+
when 'error'
|
455
|
+
criticals << text
|
456
|
+
when 'fatal'
|
457
|
+
fatal << text
|
458
|
+
else
|
459
|
+
fail "Unknown type #{type}"
|
460
|
+
end
|
461
|
+
break if config[:short_output]
|
462
|
+
end
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
[warnings, criticals, fatal]
|
467
|
+
end
|
468
|
+
|
469
|
+
def run # rubocop:disable all
|
470
|
+
targets = config[:target].split(',')
|
471
|
+
@period = config[:period]
|
472
|
+
critical_errors = []
|
473
|
+
warnings = []
|
474
|
+
fatals = []
|
475
|
+
# #YELLOW
|
476
|
+
targets.each do |target| # rubocop:disable Style/Next
|
477
|
+
if config[:check_function_increasing]
|
478
|
+
inc_warnings, inc_critical, inc_fatal = check_increasing target
|
479
|
+
warnings += inc_warnings
|
480
|
+
critical_errors += inc_critical
|
481
|
+
fatals += inc_fatal
|
482
|
+
end
|
483
|
+
if config[:check_last]
|
484
|
+
max_values = get_levels config[:check_last]
|
485
|
+
lt_warnings, lt_critical, lt_fatal = check_last(target, max_values)
|
486
|
+
warnings += lt_warnings
|
487
|
+
critical_errors += lt_critical
|
488
|
+
fatals += lt_fatal
|
489
|
+
end
|
490
|
+
if config[:check_average]
|
491
|
+
max_values = get_levels config[:check_average]
|
492
|
+
avg_warnings, avg_critical, avg_fatal = check_average(target, max_values)
|
493
|
+
warnings += avg_warnings
|
494
|
+
critical_errors += avg_critical
|
495
|
+
fatals += avg_fatal
|
496
|
+
end
|
497
|
+
if config[:check_average_percent]
|
498
|
+
max_values = get_levels config[:check_average_percent]
|
499
|
+
avg_warnings, avg_critical, avg_fatal = check_average_percent(target, max_values, config[:data_points].to_i)
|
500
|
+
warnings += avg_warnings
|
501
|
+
critical_errors += avg_critical
|
502
|
+
fatals += avg_fatal
|
503
|
+
end
|
504
|
+
if config[:check_percentile]
|
505
|
+
max_values = get_levels config[:check_percentile]
|
506
|
+
pct_warnings, pct_critical, pct_fatal = check_percentile(target, max_values, config[:percentile].to_i, config[:data_points].to_i)
|
507
|
+
warnings += pct_warnings
|
508
|
+
critical_errors += pct_critical
|
509
|
+
fatals += pct_fatal
|
510
|
+
end
|
511
|
+
end
|
512
|
+
fatals_string = fatals.size > 0 ? fatals.join("\n") : ''
|
513
|
+
criticals_string = critical_errors.size > 0 ? critical_errors.join("\n") : ''
|
514
|
+
warnings_string = warnings.size > 0 ? warnings.join("\n") : ''
|
515
|
+
|
516
|
+
if config[:concat_output]
|
517
|
+
fatals_string = fatals_string + "\n" + criticals_string if critical_errors.size > 0
|
518
|
+
fatals_string = fatals_string + "\nGraphite WARNING: " + warnings_string if warnings.size > 0
|
519
|
+
criticals_string = criticals_string + "\nGraphite WARNING: " + warnings_string if warnings.size > 0
|
520
|
+
critical fatals_string if fatals.size > 0
|
521
|
+
critical criticals_string if critical_errors.size > 0
|
522
|
+
warning warnings_string if warnings.size > 0
|
523
|
+
else
|
524
|
+
critical fatals_string if fatals.size > 0
|
525
|
+
critical criticals_string if critical_errors.size > 0
|
526
|
+
warning warnings_string if warnings.size > 0
|
527
|
+
end
|
528
|
+
ok
|
529
|
+
end
|
530
|
+
end
|