sensu-plugins-graphite 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE +22 -0
- data/README.md +53 -0
- data/bin/check-graphite-data.rb +257 -0
- data/bin/check-graphite-replication.rb +227 -0
- data/bin/check-graphite-stats.rb +145 -0
- data/bin/check-graphite.rb +530 -0
- data/bin/handler-graphite-event.rb +78 -0
- data/bin/handler-graphite-notify.rb +29 -0
- data/bin/handler-graphite-occurrences.rb +39 -0
- data/bin/mutator-graphite.rb +37 -0
- data/lib/sensu-plugins-graphite.rb +14 -0
- data/lib/sensu-plugins-graphite/version.rb +28 -0
- metadata +308 -0
- metadata.gz.sig +1 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b2a16c3bc084157cd55b7141a470cbf2ca12f711
|
4
|
+
data.tar.gz: f0742cb176d3eae355232ce7377a30d59639e837
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f98e424b272955d83e69d0478c80dddbeb380c15db4d154da0f0b82e7eb28ccc9295d9833cc0d3558732d995e953be1dad90e4d76f9765442caf60e3861030c4
|
7
|
+
data.tar.gz: aa3554ccc1943cd78bac35ce23d416ca47c0c412a0f451e0018ff6eea8554e95bbaa7967997bee93a8b465afd54c500dba34a207fb822fa6913dd95303446430
|
checksums.yaml.gz.sig
ADDED
Binary file
|
data.tar.gz.sig
ADDED
Binary file
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#Change Log
|
2
|
+
This project adheres to [Semantic Versioning](http://semver.org/).
|
3
|
+
|
4
|
+
This CHANGELOG follows the format listed at [Keep A Changelog](http://keepachangelog.com/)
|
5
|
+
|
6
|
+
## Unreleased][unreleased]
|
7
|
+
|
8
|
+
## 0.0.1 - 2015-04-30
|
9
|
+
|
10
|
+
### Added
|
11
|
+
- initial release
|
12
|
+
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Sensu-Plugins
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
## Sensu-Plugins-graphite
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/sensu-plugins/sensu-plugins-graphite.svg?branch=master)](https://travis-ci.org/sensu-plugins/sensu-plugins-graphite)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/sensu-plugins-graphite.svg)](http://badge.fury.io/rb/sensu-plugins-graphite)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/sensu-plugins/sensu-plugins-graphite/badges/gpa.svg)](https://codeclimate.com/github/sensu-plugins/sensu-plugins-graphite)
|
6
|
+
[![Test Coverage](https://codeclimate.com/github/sensu-plugins/sensu-plugins-graphite/badges/coverage.svg)](https://codeclimate.com/github/sensu-plugins/sensu-plugins-graphite)
|
7
|
+
[![Dependency Status](https://gemnasium.com/sensu-plugins/sensu-plugins-graphite.svg)](https://gemnasium.com/sensu-plugins/sensu-plugins-graphite)
|
8
|
+
[![Codeship Status for sensu-plugins/sensu-plugins-graphite](https://codeship.com/projects/c6f4f5a0-db95-0132-445b-5ad94843e341/status?branch=master)](https://codeship.com/projects/79664)
|
9
|
+
|
10
|
+
## Functionality
|
11
|
+
|
12
|
+
## Files
|
13
|
+
* bin/check-graphite-data
|
14
|
+
* bin/check-graphite-replication
|
15
|
+
* bin/check-graphite-stats
|
16
|
+
* bin/check-graphite
|
17
|
+
* bin/extension-graphite
|
18
|
+
* bin/handlr-graphite-event
|
19
|
+
* bin/hanlder-graphite-notify
|
20
|
+
* bin/handler-graphite-occurances
|
21
|
+
* bin/mutator-graphite
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
**handler-graphite-event**
|
26
|
+
```
|
27
|
+
{
|
28
|
+
"graphite_event": {
|
29
|
+
"server_uri": "https://graphite.example.com:443/events/",
|
30
|
+
"tags": [
|
31
|
+
"custom_tag_a",
|
32
|
+
"custom_tag_b"
|
33
|
+
]
|
34
|
+
}
|
35
|
+
}
|
36
|
+
```
|
37
|
+
|
38
|
+
**handler-graphite-occurances**
|
39
|
+
```
|
40
|
+
{
|
41
|
+
"graphite": {
|
42
|
+
"server":"graphite.example.com",
|
43
|
+
"port":"2003"
|
44
|
+
}
|
45
|
+
}
|
46
|
+
```
|
47
|
+
|
48
|
+
## Installation
|
49
|
+
|
50
|
+
[Installation and Setup](https://github.com/sensu-plugins/documentation/blob/master/user_docs/installation_instructions.md)
|
51
|
+
|
52
|
+
## Notes
|
53
|
+
|
@@ -0,0 +1,257 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-data
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# This plugin checks values within graphite
|
7
|
+
#
|
8
|
+
# OUTPUT:
|
9
|
+
# plain text
|
10
|
+
#
|
11
|
+
# PLATFORMS:
|
12
|
+
# Linux
|
13
|
+
#
|
14
|
+
# DEPENDENCIES:
|
15
|
+
# gem: sensu-plugin
|
16
|
+
# gem: json
|
17
|
+
# gem: open-uri
|
18
|
+
# gem: openssl
|
19
|
+
#
|
20
|
+
# USAGE:
|
21
|
+
# #YELLOW
|
22
|
+
#
|
23
|
+
# NOTES:
|
24
|
+
#
|
25
|
+
# LICENSE:
|
26
|
+
# Copyright 2014 Sonian, Inc. and contributors. <support@sensuapp.org>
|
27
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
28
|
+
# for details.
|
29
|
+
#
|
30
|
+
|
31
|
+
require 'sensu-plugin/check/cli'
|
32
|
+
require 'json'
|
33
|
+
require 'open-uri'
|
34
|
+
require 'openssl'
|
35
|
+
|
36
|
+
class CheckGraphiteData < Sensu::Plugin::Check::CLI
|
37
|
+
option :target,
|
38
|
+
description: 'Graphite data target',
|
39
|
+
short: '-t TARGET',
|
40
|
+
long: '--target TARGET',
|
41
|
+
required: true
|
42
|
+
|
43
|
+
option :server,
|
44
|
+
description: 'Server host and port',
|
45
|
+
short: '-s SERVER:PORT',
|
46
|
+
long: '--server SERVER:PORT',
|
47
|
+
required: true
|
48
|
+
|
49
|
+
option :username,
|
50
|
+
description: 'username for basic http authentication',
|
51
|
+
short: '-u USERNAME',
|
52
|
+
long: '--user USERNAME',
|
53
|
+
required: false
|
54
|
+
|
55
|
+
option :password,
|
56
|
+
description: 'user password for basic http authentication',
|
57
|
+
short: '-p PASSWORD',
|
58
|
+
long: '--pass PASSWORD',
|
59
|
+
required: false
|
60
|
+
|
61
|
+
option :passfile,
|
62
|
+
description: 'password file path for basic http authentication',
|
63
|
+
short: '-P PASSWORDFILE',
|
64
|
+
long: '--passfile PASSWORDFILE',
|
65
|
+
required: false
|
66
|
+
|
67
|
+
option :warning,
|
68
|
+
description: 'Generate warning if given value is above received value',
|
69
|
+
short: '-w VALUE',
|
70
|
+
long: '--warn VALUE',
|
71
|
+
proc: proc(&:to_f)
|
72
|
+
|
73
|
+
option :critical,
|
74
|
+
description: 'Generate critical if given value is above received value',
|
75
|
+
short: '-c VALUE',
|
76
|
+
long: '--critical VALUE',
|
77
|
+
proc: proc(&:to_f)
|
78
|
+
|
79
|
+
option :reset_on_decrease,
|
80
|
+
description: 'Send OK if value has decreased on any values within END-INTERVAL to END',
|
81
|
+
short: '-r INTERVAL',
|
82
|
+
long: '--reset INTERVAL',
|
83
|
+
proc: proc(&:to_i)
|
84
|
+
|
85
|
+
option :name,
|
86
|
+
description: 'Name used in responses',
|
87
|
+
short: '-n NAME',
|
88
|
+
long: '--name NAME',
|
89
|
+
default: 'graphite check'
|
90
|
+
|
91
|
+
option :allowed_graphite_age,
|
92
|
+
description: 'Allowed number of seconds since last data update (default: 60 seconds)',
|
93
|
+
short: '-a SECONDS',
|
94
|
+
long: '--age SECONDS',
|
95
|
+
default: 60,
|
96
|
+
proc: proc(&:to_i)
|
97
|
+
|
98
|
+
option :hostname_sub,
|
99
|
+
description: 'Character used to replace periods (.) in hostname (default: _)',
|
100
|
+
short: '-s CHARACTER',
|
101
|
+
long: '--host-sub CHARACTER'
|
102
|
+
|
103
|
+
option :from,
|
104
|
+
description: 'Get samples starting from FROM (default: -10mins)',
|
105
|
+
short: '-f FROM',
|
106
|
+
long: '--from FROM',
|
107
|
+
default: '-10mins'
|
108
|
+
|
109
|
+
option :below,
|
110
|
+
description: 'warnings/critical if values below specified thresholds',
|
111
|
+
short: '-b',
|
112
|
+
long: '--below'
|
113
|
+
|
114
|
+
option :no_ssl_verify,
|
115
|
+
description: 'Do not verify SSL certs',
|
116
|
+
short: '-v',
|
117
|
+
long: '--nosslverify'
|
118
|
+
|
119
|
+
option :help,
|
120
|
+
description: 'Show this message',
|
121
|
+
short: '-h',
|
122
|
+
long: '--help'
|
123
|
+
|
124
|
+
# Run checks
|
125
|
+
def run
|
126
|
+
if config[:help]
|
127
|
+
puts opt_parser if config[:help]
|
128
|
+
exit
|
129
|
+
end
|
130
|
+
|
131
|
+
data = retrieve_data
|
132
|
+
data.each_pair do |_key, value|
|
133
|
+
@value = value
|
134
|
+
@data = value['data']
|
135
|
+
check_age || check(:critical) || check(:warning)
|
136
|
+
end
|
137
|
+
ok("#{name} value okay")
|
138
|
+
end
|
139
|
+
|
140
|
+
# name used in responses
|
141
|
+
def name
|
142
|
+
base = config[:name]
|
143
|
+
@formatted ? "#{base} (#{@formatted})" : base
|
144
|
+
end
|
145
|
+
|
146
|
+
# Check the age of the data being processed
|
147
|
+
def check_age
|
148
|
+
# #YELLOW
|
149
|
+
if (Time.now.to_i - @value['end']) > config[:allowed_graphite_age] # rubocop:disable GuardClause
|
150
|
+
unknown "Graphite data age is past allowed threshold (#{config[:allowed_graphite_age]} seconds)"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# grab data from graphite
|
155
|
+
def retrieve_data
|
156
|
+
# #YELLOW
|
157
|
+
unless @raw_data # rubocop:disable GuardClause
|
158
|
+
begin
|
159
|
+
unless config[:server].start_with?('https://', 'http://')
|
160
|
+
config[:server].prepend('http://')
|
161
|
+
end
|
162
|
+
|
163
|
+
url = "#{config[:server]}/render?format=json&target=#{formatted_target}&from=#{config[:from]}"
|
164
|
+
|
165
|
+
url_opts = {}
|
166
|
+
|
167
|
+
if config[:no_ssl_verify]
|
168
|
+
url_opts[:ssl_verify_mode] = OpenSSL::SSL::VERIFY_NONE
|
169
|
+
end
|
170
|
+
|
171
|
+
if config[:username] && (config[:password] || config[:passfile])
|
172
|
+
if config[:passfile]
|
173
|
+
pass = File.open(config[:passfile]).readline
|
174
|
+
elsif config[:password]
|
175
|
+
pass = config[:password]
|
176
|
+
end
|
177
|
+
|
178
|
+
url_opts[:http_basic_authentication] = [config[:username], pass.chomp]
|
179
|
+
end # we don't have both username and password trying without
|
180
|
+
|
181
|
+
handle = open(url, url_opts)
|
182
|
+
|
183
|
+
@raw_data = handle.gets
|
184
|
+
if @raw_data == '[]'
|
185
|
+
unknown 'Empty data received from Graphite - metric probably doesn\'t exists'
|
186
|
+
else
|
187
|
+
@json_data = JSON.parse(@raw_data)
|
188
|
+
output = {}
|
189
|
+
@json_data.each do |raw|
|
190
|
+
raw['datapoints'].delete_if { |v| v.first.nil? }
|
191
|
+
next if raw['datapoints'].empty?
|
192
|
+
target = raw['target']
|
193
|
+
data = raw['datapoints'].map(&:first)
|
194
|
+
start = raw['datapoints'].first.last
|
195
|
+
dend = raw['datapoints'].last.last
|
196
|
+
step = ((dend - start) / raw['datapoints'].size.to_f).ceil
|
197
|
+
output[target] = { 'target' => target, 'data' => data, 'start' => start, 'end' => dend, 'step' => step }
|
198
|
+
end
|
199
|
+
output
|
200
|
+
end
|
201
|
+
rescue OpenURI::HTTPError
|
202
|
+
unknown 'Failed to connect to graphite server'
|
203
|
+
rescue NoMethodError
|
204
|
+
unknown 'No data for time period and/or target'
|
205
|
+
rescue Errno::ECONNREFUSED
|
206
|
+
unknown 'Connection refused when connecting to graphite server'
|
207
|
+
rescue Errno::ECONNRESET
|
208
|
+
unknown 'Connection reset by peer when connecting to graphite server'
|
209
|
+
rescue EOFError
|
210
|
+
unknown 'End of file error when reading from graphite server'
|
211
|
+
rescue => e
|
212
|
+
unknown "An unknown error occured: #{e.inspect}"
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# type:: :warning or :critical
|
218
|
+
# Return alert if required
|
219
|
+
def check(type)
|
220
|
+
# #YELLOW
|
221
|
+
if config[type] # rubocop:disable GuardClause
|
222
|
+
send(type, "#{@value['target']} has passed #{type} threshold (#{@data.last})") if below?(type) || above?(type)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Check if value is below defined threshold
|
227
|
+
def below?(type)
|
228
|
+
config[:below] && @data.last < config[type]
|
229
|
+
end
|
230
|
+
|
231
|
+
# Check is value is above defined threshold
|
232
|
+
def above?(type)
|
233
|
+
(!config[:below]) && (@data.last > config[type]) && (!decreased?)
|
234
|
+
end
|
235
|
+
|
236
|
+
# Check if values have decreased within interval if given
|
237
|
+
def decreased?
|
238
|
+
if config[:reset_on_decrease]
|
239
|
+
slice = @data.slice(@data.size - config[:reset_on_decrease], @data.size)
|
240
|
+
val = slice.shift until slice.empty? || val.to_f > slice.first
|
241
|
+
!slice.empty?
|
242
|
+
else
|
243
|
+
false
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Returns formatted target with hostname replacing any $ characters
|
248
|
+
def formatted_target
|
249
|
+
if config[:target].include?('$')
|
250
|
+
require 'socket'
|
251
|
+
@formatted = Socket.gethostbyname(Socket.gethostname).first.gsub('.', config[:hostname_sub] || '_')
|
252
|
+
config[:target].gsub('$', @formatted)
|
253
|
+
else
|
254
|
+
URI.escape config[:target]
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-replication
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Check to ensure data gets posted and is retrievable by graphite.
|
7
|
+
# We post to each server in config[:relays] then sleep config[:sleep]
|
8
|
+
# seconds then check each of config[:graphites] to see if the data made it
|
9
|
+
# to each one. OK if all servers have the data we expected, WARN if
|
10
|
+
# config[:warning] or fewer have it. CRITICAL if config[:critical]
|
11
|
+
# or fewer have it. config[:check_id] allows you to have many of these
|
12
|
+
# checks running in different places without any conflicts. Customize it
|
13
|
+
# if you are going to run this check from multiple servers. Otherwise
|
14
|
+
# it defaults to default. (can be a descriptive string, used as a graphite key)
|
15
|
+
#
|
16
|
+
# This check is most useful when you have a cluster of carbon-relays configured
|
17
|
+
# with REPLICATION_FACTOR > 1 and more than one graphite server those
|
18
|
+
# carbon-relays are configured to post to. This check ensures that replication
|
19
|
+
# is actually happening in a timely manner.
|
20
|
+
|
21
|
+
# How it works: We generate a large random number for each of these servers
|
22
|
+
# Then we post that number to each server via a key in the form of:
|
23
|
+
# checks.graphite.check_id.replication.your_graphite_server.ip It's safe
|
24
|
+
# to throw this data away quickly. A day retention ought to be more
|
25
|
+
# than enough for anybody.
|
26
|
+
#
|
27
|
+
# OUTPUT:
|
28
|
+
# plain text
|
29
|
+
#
|
30
|
+
# PLATFORMS:
|
31
|
+
# Linux
|
32
|
+
#
|
33
|
+
# DEPENDENCIES:
|
34
|
+
# gem: sensu-plugin
|
35
|
+
# gem: rest-client
|
36
|
+
# gem: json
|
37
|
+
# gem: ipaddress
|
38
|
+
# gem: resolv
|
39
|
+
#
|
40
|
+
# USAGE:
|
41
|
+
# #YELLOW
|
42
|
+
#
|
43
|
+
# NOTES:
|
44
|
+
#
|
45
|
+
# LICENSE:
|
46
|
+
# AJ Bourg <aj@ajbourg.com>
|
47
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
48
|
+
# for details.
|
49
|
+
#
|
50
|
+
|
51
|
+
require 'sensu-plugin/check/cli'
|
52
|
+
require 'timeout'
|
53
|
+
require 'socket'
|
54
|
+
require 'rest-client'
|
55
|
+
require 'json'
|
56
|
+
require 'resolv'
|
57
|
+
require 'ipaddress'
|
58
|
+
|
59
|
+
class CheckGraphiteReplication < Sensu::Plugin::Check::CLI
|
60
|
+
option :relays,
|
61
|
+
short: '-r RELAYS',
|
62
|
+
long: '--relays RELAYS',
|
63
|
+
description: 'Comma separated list of carbon relay servers to post to.',
|
64
|
+
required: true
|
65
|
+
option :servers,
|
66
|
+
short: '-g SERVERS',
|
67
|
+
long: '--graphite SERVERS',
|
68
|
+
description: 'Comma separated list of all graphite servers to check.',
|
69
|
+
required: true
|
70
|
+
option :sleep,
|
71
|
+
short: '-s SECONDS',
|
72
|
+
long: '--sleep SECONDS',
|
73
|
+
description: 'Time to sleep between submitting and checking for value.',
|
74
|
+
default: 30,
|
75
|
+
proc: proc(&:to_i)
|
76
|
+
option :timeout,
|
77
|
+
short: '-t TIMEOUT',
|
78
|
+
long: '--timeout TIMEOUT',
|
79
|
+
description: 'Timeout limit for posting to the relay.',
|
80
|
+
default: 5,
|
81
|
+
proc: proc(&:to_i)
|
82
|
+
option :port,
|
83
|
+
short: '-p PORT',
|
84
|
+
long: '--port PORT',
|
85
|
+
description: 'Port to post to carbon-relay on.',
|
86
|
+
default: 2003,
|
87
|
+
proc: proc(&:to_i)
|
88
|
+
option :critical,
|
89
|
+
short: '-c COUNT',
|
90
|
+
long: '--critical COUNT',
|
91
|
+
description: 'Number of servers missing our test data to be critical.',
|
92
|
+
default: 2,
|
93
|
+
proc: proc(&:to_i)
|
94
|
+
option :warning,
|
95
|
+
short: '-w COUNT',
|
96
|
+
long: '--warning COUNT',
|
97
|
+
description: 'Number of servers missing our test data to be warning.',
|
98
|
+
default: 1,
|
99
|
+
proc: proc(&:to_i)
|
100
|
+
option :check_id,
|
101
|
+
short: '-i ID',
|
102
|
+
long: '--check-id ID',
|
103
|
+
description: 'Check ID to identify this check.',
|
104
|
+
default: 'default'
|
105
|
+
option :verbose,
|
106
|
+
short: '-v',
|
107
|
+
long: '--verbose',
|
108
|
+
description: 'Verbose.',
|
109
|
+
default: false,
|
110
|
+
boolean: true
|
111
|
+
|
112
|
+
def run
|
113
|
+
messages = []
|
114
|
+
servers = config[:servers].split(',')
|
115
|
+
relay_ips = find_relay_ips(config[:relays].split(','))
|
116
|
+
|
117
|
+
check_id = graphite_key(config[:check_id])
|
118
|
+
|
119
|
+
relay_ips.each do |server_name, ips|
|
120
|
+
ips.each do |ip|
|
121
|
+
messages << post_message(server_name, ip, check_id)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
puts "Sleeping for #{config[:sleep]}." if config[:verbose]
|
126
|
+
sleep(config[:sleep])
|
127
|
+
|
128
|
+
fail_count = 0
|
129
|
+
# on every server, check to see if all our data replicated
|
130
|
+
servers.each do |server|
|
131
|
+
messages.each_with_index do |c|
|
132
|
+
unless check_for_message(server, c['key'], c['value'])
|
133
|
+
puts "#{c['relay']} (#{c['ip']}) didn't post to #{server}"
|
134
|
+
fail_count += 1
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
if fail_count >= config[:critical]
|
140
|
+
critical "Missing data points. #{fail_count} lookups failed."
|
141
|
+
elsif fail_count >= config[:warning]
|
142
|
+
warning "Missing data points. #{fail_count} lookups failed."
|
143
|
+
end
|
144
|
+
|
145
|
+
success_count = (messages.length * servers.length) - fail_count
|
146
|
+
ok "#{fail_count} failed checks. #{success_count} successful checks."
|
147
|
+
end
|
148
|
+
|
149
|
+
def find_relay_ips(relays)
|
150
|
+
# we may have gotten an IPAddress or a DNS hostname or a mix, so let's try
|
151
|
+
|
152
|
+
relay_ips = {}
|
153
|
+
|
154
|
+
time_out('resolving dns') do
|
155
|
+
relays.each do |r|
|
156
|
+
if IPAddress.valid? r
|
157
|
+
relay_ips[r] = [r]
|
158
|
+
else
|
159
|
+
relay_ips[r] = Resolv.getaddresses(r)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
relay_ips
|
165
|
+
end
|
166
|
+
|
167
|
+
def post_message(server_name, ip, check_id)
|
168
|
+
server_key = graphite_key(server_name)
|
169
|
+
|
170
|
+
number = rand(10_000)
|
171
|
+
time = Time.now.to_i
|
172
|
+
|
173
|
+
ip_key = graphite_key(ip)
|
174
|
+
key = "checks.graphite.#{check_id}.replication.#{server_key}.#{ip_key}"
|
175
|
+
|
176
|
+
time_out("posting data to #{ip}") do
|
177
|
+
t = TCPSocket.new(ip, config[:port])
|
178
|
+
t.puts("#{key} #{number} #{time}")
|
179
|
+
t.close
|
180
|
+
end
|
181
|
+
|
182
|
+
if config[:verbose]
|
183
|
+
puts "Posted #{key} to #{server_name} with #{number} on IP #{ip}."
|
184
|
+
end
|
185
|
+
|
186
|
+
{ 'relay' => server_name, 'ip' => ip, 'key' => key, 'value' => number }
|
187
|
+
end
|
188
|
+
|
189
|
+
# checks to see if a value landed on a graphite server
|
190
|
+
def check_for_message(server, key, value)
|
191
|
+
url = "http://#{server}/render?format=json&target=#{key}&from=-10minutes"
|
192
|
+
|
193
|
+
puts "Checking URL #{url}" if config[:verbose]
|
194
|
+
graphite_data = nil
|
195
|
+
|
196
|
+
begin
|
197
|
+
time_out("querying graphite api on #{server}") do
|
198
|
+
graphite_data = RestClient.get url
|
199
|
+
graphite_data = JSON.parse(graphite_data)
|
200
|
+
end
|
201
|
+
rescue RestClient::Exception, JSON::ParserError => e
|
202
|
+
critical "Unexpected error getting data from #{server}: #{e}"
|
203
|
+
end
|
204
|
+
|
205
|
+
success = false
|
206
|
+
|
207
|
+
# we get all the data points for the last 10 minutes, so see if our value
|
208
|
+
# appeared in any of them
|
209
|
+
graphite_data[0]['datapoints'].each do |v|
|
210
|
+
success = true if v[0] == value
|
211
|
+
end
|
212
|
+
|
213
|
+
success
|
214
|
+
end
|
215
|
+
|
216
|
+
def graphite_key(key)
|
217
|
+
key.gsub(',', '_').gsub(' ', '_').gsub('.', '_').gsub('-', '_')
|
218
|
+
end
|
219
|
+
|
220
|
+
def time_out(activity, &block)
|
221
|
+
Timeout.timeout(config[:timeout]) do
|
222
|
+
yield block
|
223
|
+
end
|
224
|
+
rescue Timeout::Error
|
225
|
+
critical "Timed out while #{activity}"
|
226
|
+
end
|
227
|
+
end
|