riemann-tools 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/riemann-health +11 -11
- data/bin/riemann-net +1 -1
- data/bin/riemann-redis +1 -1
- data/bin/riemann-riak +114 -62
- metadata +2 -4
- data/bin/.riemann-health.swp +0 -0
data/bin/riemann-health
CHANGED
@@ -121,13 +121,13 @@ class Riemann::Tools::Health
|
|
121
121
|
end
|
122
122
|
|
123
123
|
def linux_load
|
124
|
-
load = File.read('/proc/loadavg').split(/\s+/)[
|
124
|
+
load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
|
125
125
|
if load > @limits[:load][:critical]
|
126
|
-
alert "load", :critical, load, "
|
126
|
+
alert "load", :critical, load, "1-minute load average/core is #{load}"
|
127
127
|
elsif load > @limits[:load][:warning]
|
128
|
-
alert "load", :warning, load, "
|
128
|
+
alert "load", :warning, load, "1-minute load average/core is #{load}"
|
129
129
|
else
|
130
|
-
alert "load", :ok, load, "
|
130
|
+
alert "load", :ok, load, "1-minute load average/core is #{load}"
|
131
131
|
end
|
132
132
|
end
|
133
133
|
|
@@ -163,14 +163,14 @@ class Riemann::Tools::Health
|
|
163
163
|
end
|
164
164
|
|
165
165
|
def freebsd_load
|
166
|
-
m = `uptime`.split[
|
166
|
+
m = `uptime`.split[0].match(/^[0-9]*\.[0-9]*$/)
|
167
167
|
load = m[0].to_f / @cores
|
168
168
|
if load > @limits[:load][:critical]
|
169
|
-
alert "load", :critical, load, "
|
169
|
+
alert "load", :critical, load, "1-minute load average/core is #{load}"
|
170
170
|
elsif load > @limits[:load][:warning]
|
171
|
-
alert "load", :warning, load, "
|
171
|
+
alert "load", :warning, load, "1-minute load average/core is #{load}"
|
172
172
|
else
|
173
|
-
alert "load", :ok, load, "
|
173
|
+
alert "load", :ok, load, "1-minute load average/core is #{load}"
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
@@ -217,11 +217,11 @@ class Riemann::Tools::Health
|
|
217
217
|
end
|
218
218
|
metric = @topdata[:load] / @cores
|
219
219
|
if metric > @limits[:load][:critical]
|
220
|
-
alert "load", :critical, metric, "
|
220
|
+
alert "load", :critical, metric, "1-minute load average per core is #{metric}"
|
221
221
|
elsif metric > @limits[:load][:warning]
|
222
|
-
alert "load", :warning, metric, "
|
222
|
+
alert "load", :warning, metric, "1-minute load average per core is #{metric}"
|
223
223
|
else
|
224
|
-
alert "load", :ok, metric, "
|
224
|
+
alert "load", :ok, metric, "1-minute load average per core is #{metric}"
|
225
225
|
end
|
226
226
|
end
|
227
227
|
|
data/bin/riemann-net
CHANGED
data/bin/riemann-redis
CHANGED
data/bin/riemann-riak
CHANGED
@@ -26,18 +26,10 @@ class Riemann::Tools::Riak
|
|
26
26
|
opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000
|
27
27
|
|
28
28
|
def initialize
|
29
|
-
|
30
|
-
|
31
|
-
@httpstatus = true
|
32
|
-
|
33
|
-
if `which escript` =~ /^\s*$/
|
34
|
-
@escript = false
|
35
|
-
end
|
36
|
-
|
37
|
-
if `which riak-admin` =~ /^\s*$/
|
38
|
-
@riakadmin = false
|
39
|
-
end
|
40
|
-
|
29
|
+
detect_features
|
30
|
+
|
31
|
+
@httpstatus = true
|
32
|
+
# What's going on here? --aphyr
|
41
33
|
if
|
42
34
|
begin
|
43
35
|
uri = URI.parse(opts[:riak_host])
|
@@ -56,11 +48,25 @@ class Riemann::Tools::Riak
|
|
56
48
|
@httpstatus = false
|
57
49
|
end
|
58
50
|
end
|
51
|
+
|
59
52
|
# we're going to override the emulator setting to allow users to
|
60
53
|
# dynamically input the cookie
|
61
54
|
# this is done only once - hopefully it doesn't get overridden.
|
62
55
|
ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
|
56
|
+
end
|
63
57
|
|
58
|
+
# Identifies whether escript and riak-admin are installed
|
59
|
+
def detect_features
|
60
|
+
@escript = true # Whether escript is present on this machine
|
61
|
+
@riakadmin = true # Whether riak-admin is present
|
62
|
+
|
63
|
+
if `which escript` =~ /^\s*$/
|
64
|
+
@escript = false
|
65
|
+
end
|
66
|
+
|
67
|
+
if `which riak-admin` =~ /^\s*$/
|
68
|
+
@riakadmin = false
|
69
|
+
end
|
64
70
|
end
|
65
71
|
|
66
72
|
def check_ring
|
@@ -136,54 +142,106 @@ class Riemann::Tools::Riak
|
|
136
142
|
end
|
137
143
|
end
|
138
144
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
end
|
146
|
-
http = Net::HTTP.new(uri.host, opts[:stats_port])
|
147
|
-
http.use_ssl = uri.scheme == 'https'
|
148
|
-
if http.use_ssl?
|
149
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
150
|
-
end
|
151
|
-
res = http.start do |http|
|
152
|
-
http.get opts[:stats_path]
|
145
|
+
# Get current stats via HTTP
|
146
|
+
def stats_http
|
147
|
+
begin
|
148
|
+
uri = URI.parse(opts[:riak_host])
|
149
|
+
if uri.host == nil
|
150
|
+
uri.host = opts[:riak_host]
|
153
151
|
end
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
:state => 'critical',
|
159
|
-
:description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
|
160
|
-
)
|
161
|
-
return
|
152
|
+
http = Net::HTTP.new(uri.host, opts[:stats_port])
|
153
|
+
http.use_ssl = uri.scheme == 'https'
|
154
|
+
if http.use_ssl?
|
155
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
162
156
|
end
|
163
|
-
|
164
|
-
|
165
|
-
stats = JSON.parse(res.body)
|
166
|
-
else
|
167
|
-
report(
|
168
|
-
:host => opts[:riak_host],
|
169
|
-
:service => 'riak',
|
170
|
-
:state => 'critical',
|
171
|
-
:description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
|
172
|
-
)
|
173
|
-
return
|
157
|
+
res = http.start do |http|
|
158
|
+
http.get opts[:stats_path]
|
174
159
|
end
|
160
|
+
rescue => e
|
161
|
+
report(
|
162
|
+
:host => opts[:riak_host],
|
163
|
+
:service => 'riak',
|
164
|
+
:state => 'critical',
|
165
|
+
:description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
|
166
|
+
)
|
167
|
+
raise
|
168
|
+
end
|
169
|
+
|
170
|
+
if res.code.to_i == 200
|
171
|
+
return JSON.parse(res.body)
|
172
|
+
else
|
173
|
+
report(
|
174
|
+
:host => opts[:riak_host],
|
175
|
+
:service => 'riak',
|
176
|
+
:state => 'critical',
|
177
|
+
:description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
|
178
|
+
)
|
179
|
+
raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Get current stats via riak-admin
|
184
|
+
def stats_riak_admin
|
185
|
+
str = `riak-admin status`
|
186
|
+
raise "riak-admin failed" unless $? == 0
|
187
|
+
Hash[str.split(/\n/).map{|i| i.split(/ : /)}]
|
188
|
+
end
|
189
|
+
|
190
|
+
# Get current stats as a hash
|
191
|
+
def stats
|
192
|
+
if @httpstatus
|
193
|
+
stats_http
|
175
194
|
elsif @riakadmin
|
176
|
-
|
195
|
+
stats_riak_admin
|
177
196
|
else
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
197
|
+
report(
|
198
|
+
:host => opts[:riak_host],
|
199
|
+
:service => 'riak',
|
200
|
+
:state => 'critical',
|
201
|
+
:description => "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
|
202
|
+
)
|
203
|
+
raise "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def core_services
|
208
|
+
['vnode_gets',
|
209
|
+
'vnode_puts',
|
210
|
+
'node_gets',
|
211
|
+
'node_puts',
|
212
|
+
'read_repairs']
|
213
|
+
end
|
214
|
+
|
215
|
+
def fsm_types
|
216
|
+
['get', 'put']
|
217
|
+
end
|
218
|
+
|
219
|
+
def fsm_percentiles
|
220
|
+
[50, 95, 99]
|
221
|
+
end
|
222
|
+
|
223
|
+
# Reports current stats to Riemann
|
224
|
+
def check_stats
|
225
|
+
begin
|
226
|
+
stats = self.stats
|
227
|
+
rescue => e
|
228
|
+
event = {:state => 'critical',
|
229
|
+
:description => e.message,
|
230
|
+
:host => opts[:riak_host]}
|
231
|
+
# Report errors
|
232
|
+
report(event.merge(:service => 'riak'))
|
233
|
+
core_services.each do |s|
|
234
|
+
report(event.merge(:service => "riak #{s}"))
|
235
|
+
end
|
236
|
+
fsm_types.each do |type|
|
237
|
+
fsm_percentiles.each do |percentile|
|
238
|
+
report(event.merge(:service => "riak #{type} #{percentile}"))
|
239
|
+
end
|
240
|
+
end
|
241
|
+
return
|
185
242
|
end
|
186
243
|
|
244
|
+
# Riak itself
|
187
245
|
report(
|
188
246
|
:host => opts[:riak_host],
|
189
247
|
:service => 'riak',
|
@@ -191,13 +249,7 @@ class Riemann::Tools::Riak
|
|
191
249
|
)
|
192
250
|
|
193
251
|
# Gets/puts/rr
|
194
|
-
|
195
|
-
'vnode_gets',
|
196
|
-
'vnode_puts',
|
197
|
-
'node_gets',
|
198
|
-
'node_puts',
|
199
|
-
'read_repairs'
|
200
|
-
].each do |s|
|
252
|
+
core_services.each do |s|
|
201
253
|
report(
|
202
254
|
:host => opts[:riak_host],
|
203
255
|
:service => "riak #{s}",
|
@@ -208,8 +260,8 @@ class Riemann::Tools::Riak
|
|
208
260
|
end
|
209
261
|
|
210
262
|
# FSMs
|
211
|
-
|
212
|
-
|
263
|
+
fsm_types.each do |type|
|
264
|
+
fsm_percentiles.each do |percentile|
|
213
265
|
val = stats[fsm_stat(type, percentile)].to_i || 0
|
214
266
|
val = 0 if val == 'undefined'
|
215
267
|
val /= 1000.0 # Convert us to ms
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: riemann-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-12-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: riemann-client
|
@@ -142,7 +142,6 @@ dependencies:
|
|
142
142
|
description: Utilities which submit events to Riemann.
|
143
143
|
email: aphyr@aphyr.com
|
144
144
|
executables:
|
145
|
-
- .riemann-health.swp
|
146
145
|
- riemann-riak-ring
|
147
146
|
- riemann-riak-keys
|
148
147
|
- riemann-memcached
|
@@ -195,7 +194,6 @@ files:
|
|
195
194
|
- bin/riemann-health
|
196
195
|
- LICENSE
|
197
196
|
- README.markdown
|
198
|
-
- bin/.riemann-health.swp
|
199
197
|
homepage: https://github.com/aphyr/riemann-tools
|
200
198
|
licenses:
|
201
199
|
- MIT
|
data/bin/.riemann-health.swp
DELETED
Binary file
|