riemann-tools 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
data/bin/riemann-health CHANGED
@@ -121,13 +121,13 @@ class Riemann::Tools::Health
121
121
  end
122
122
 
123
123
  def linux_load
124
- load = File.read('/proc/loadavg').split(/\s+/)[2].to_f / @cores
124
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
125
125
  if load > @limits[:load][:critical]
126
- alert "load", :critical, load, "15-minute load average/core is #{load}"
126
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
127
127
  elsif load > @limits[:load][:warning]
128
- alert "load", :warning, load, "15-minute load average/core is #{load}"
128
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
129
129
  else
130
- alert "load", :ok, load, "15-minute load average/core is #{load}"
130
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
131
131
  end
132
132
  end
133
133
 
@@ -163,14 +163,14 @@ class Riemann::Tools::Health
163
163
  end
164
164
 
165
165
  def freebsd_load
166
- m = `uptime`.split[-1].match(/^[0-9]*\.[0-9]*$/)
166
+ m = `uptime`.split[0].match(/^[0-9]*\.[0-9]*$/)
167
167
  load = m[0].to_f / @cores
168
168
  if load > @limits[:load][:critical]
169
- alert "load", :critical, load, "15-minute load average/core is #{load}"
169
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
170
170
  elsif load > @limits[:load][:warning]
171
- alert "load", :warning, load, "15-minute load average/core is #{load}"
171
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
172
172
  else
173
- alert "load", :ok, load, "15-minute load average/core is #{load}"
173
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
174
174
  end
175
175
  end
176
176
 
@@ -217,11 +217,11 @@ class Riemann::Tools::Health
217
217
  end
218
218
  metric = @topdata[:load] / @cores
219
219
  if metric > @limits[:load][:critical]
220
- alert "load", :critical, metric, "15-minute load average per core is #{metric}"
220
+ alert "load", :critical, metric, "1-minute load average per core is #{metric}"
221
221
  elsif metric > @limits[:load][:warning]
222
- alert "load", :warning, metric, "15-minute load average per core is #{metric}"
222
+ alert "load", :warning, metric, "1-minute load average per core is #{metric}"
223
223
  else
224
- alert "load", :ok, metric, "15-minute load average per core is #{metric}"
224
+ alert "load", :ok, metric, "1-minute load average per core is #{metric}"
225
225
  end
226
226
  end
227
227
 
data/bin/riemann-net CHANGED
@@ -87,7 +87,7 @@ class Riemann::Tools::Net
87
87
  end
88
88
 
89
89
  report(
90
- :service => service,
90
+ :service => service.dup,
91
91
  :metric => (delta.to_f / opts[:interval]),
92
92
  :state => svc_state
93
93
  )
data/bin/riemann-redis CHANGED
@@ -37,7 +37,7 @@ class Riemann::Tools::Redis
37
37
  begin
38
38
  @redis.info(@section).each do |property, value|
39
39
  data = {
40
- :host => opts[:redis_host],
40
+ :host => opts[:redis_host].dup,
41
41
  :service => "redis #{property}",
42
42
  :metric => value.to_f,
43
43
  :state => value.to_s,
data/bin/riemann-riak CHANGED
@@ -26,18 +26,10 @@ class Riemann::Tools::Riak
26
26
  opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000
27
27
 
28
28
  def initialize
29
- @escript = true
30
- @riakadmin = true
31
- @httpstatus = true
32
-
33
- if `which escript` =~ /^\s*$/
34
- @escript = false
35
- end
36
-
37
- if `which riak-admin` =~ /^\s*$/
38
- @riakadmin = false
39
- end
40
-
29
+ detect_features
30
+
31
+ @httpstatus = true
32
+ # What's going on here? --aphyr
41
33
  if
42
34
  begin
43
35
  uri = URI.parse(opts[:riak_host])
@@ -56,11 +48,25 @@ class Riemann::Tools::Riak
56
48
  @httpstatus = false
57
49
  end
58
50
  end
51
+
59
52
  # we're going to override the emulator setting to allow users to
60
53
  # dynamically input the cookie
61
54
  # this is done only once - hopefully it doesn't get overridden.
62
55
  ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
56
+ end
63
57
 
58
+ # Identifies whether escript and riak-admin are installed
59
+ def detect_features
60
+ @escript = true # Whether escript is present on this machine
61
+ @riakadmin = true # Whether riak-admin is present
62
+
63
+ if `which escript` =~ /^\s*$/
64
+ @escript = false
65
+ end
66
+
67
+ if `which riak-admin` =~ /^\s*$/
68
+ @riakadmin = false
69
+ end
64
70
  end
65
71
 
66
72
  def check_ring
@@ -136,54 +142,106 @@ class Riemann::Tools::Riak
136
142
  end
137
143
  end
138
144
 
139
- def check_stats
140
- if @httpstatus
141
- begin
142
- uri = URI.parse(opts[:riak_host])
143
- if uri.host == nil
144
- uri.host = opts[:riak_host]
145
- end
146
- http = Net::HTTP.new(uri.host, opts[:stats_port])
147
- http.use_ssl = uri.scheme == 'https'
148
- if http.use_ssl?
149
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
150
- end
151
- res = http.start do |http|
152
- http.get opts[:stats_path]
145
+ # Get current stats via HTTP
146
+ def stats_http
147
+ begin
148
+ uri = URI.parse(opts[:riak_host])
149
+ if uri.host == nil
150
+ uri.host = opts[:riak_host]
153
151
  end
154
- rescue => e
155
- report(
156
- :host => opts[:riak_host],
157
- :service => 'riak',
158
- :state => 'critical',
159
- :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
160
- )
161
- return
152
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
153
+ http.use_ssl = uri.scheme == 'https'
154
+ if http.use_ssl?
155
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
162
156
  end
163
-
164
- if res.code.to_i == 200
165
- stats = JSON.parse(res.body)
166
- else
167
- report(
168
- :host => opts[:riak_host],
169
- :service => 'riak',
170
- :state => 'critical',
171
- :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
172
- )
173
- return
157
+ res = http.start do |http|
158
+ http.get opts[:stats_path]
174
159
  end
160
+ rescue => e
161
+ report(
162
+ :host => opts[:riak_host],
163
+ :service => 'riak',
164
+ :state => 'critical',
165
+ :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
166
+ )
167
+ raise
168
+ end
169
+
170
+ if res.code.to_i == 200
171
+ return JSON.parse(res.body)
172
+ else
173
+ report(
174
+ :host => opts[:riak_host],
175
+ :service => 'riak',
176
+ :state => 'critical',
177
+ :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
178
+ )
179
+ raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
180
+ end
181
+ end
182
+
183
+ # Get current stats via riak-admin
184
+ def stats_riak_admin
185
+ str = `riak-admin status`
186
+ raise "riak-admin failed" unless $? == 0
187
+ Hash[str.split(/\n/).map{|i| i.split(/ : /)}]
188
+ end
189
+
190
+ # Get current stats as a hash
191
+ def stats
192
+ if @httpstatus
193
+ stats_http
175
194
  elsif @riakadmin
176
- stats = Hash[`riak-admin status`.split(/\n/).map{|i| i.split(/ : /)}]
195
+ stats_riak_admin
177
196
  else
178
- report(
179
- :host => opts[:riak_host],
180
- :service => 'riak',
181
- :state => 'critical',
182
- :description => "error fetching Riak stats"
183
- )
184
- return
197
+ report(
198
+ :host => opts[:riak_host],
199
+ :service => 'riak',
200
+ :state => 'critical',
201
+ :description => "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
202
+ )
203
+ raise "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
204
+ end
205
+ end
206
+
207
+ def core_services
208
+ ['vnode_gets',
209
+ 'vnode_puts',
210
+ 'node_gets',
211
+ 'node_puts',
212
+ 'read_repairs']
213
+ end
214
+
215
+ def fsm_types
216
+ ['get', 'put']
217
+ end
218
+
219
+ def fsm_percentiles
220
+ [50, 95, 99]
221
+ end
222
+
223
+ # Reports current stats to Riemann
224
+ def check_stats
225
+ begin
226
+ stats = self.stats
227
+ rescue => e
228
+ event = {:state => 'critical',
229
+ :description => e.message,
230
+ :host => opts[:riak_host]}
231
+ # Report errors
232
+ report(event.merge(:service => 'riak'))
233
+ core_services.each do |s|
234
+ report(event.merge(:service => "riak #{s}"))
235
+ end
236
+ fsm_types.each do |type|
237
+ fsm_percentiles.each do |percentile|
238
+ report(event.merge(:service => "riak #{type} #{percentile}"))
239
+ end
240
+ end
241
+ return
185
242
  end
186
243
 
244
+ # Riak itself
187
245
  report(
188
246
  :host => opts[:riak_host],
189
247
  :service => 'riak',
@@ -191,13 +249,7 @@ class Riemann::Tools::Riak
191
249
  )
192
250
 
193
251
  # Gets/puts/rr
194
- [
195
- 'vnode_gets',
196
- 'vnode_puts',
197
- 'node_gets',
198
- 'node_puts',
199
- 'read_repairs'
200
- ].each do |s|
252
+ core_services.each do |s|
201
253
  report(
202
254
  :host => opts[:riak_host],
203
255
  :service => "riak #{s}",
@@ -208,8 +260,8 @@ class Riemann::Tools::Riak
208
260
  end
209
261
 
210
262
  # FSMs
211
- ['get', 'put'].each do |type|
212
- [50, 95, 99].each do |percentile|
263
+ fsm_types.each do |type|
264
+ fsm_percentiles.each do |percentile|
213
265
  val = stats[fsm_stat(type, percentile)].to_i || 0
214
266
  val = 0 if val == 'undefined'
215
267
  val /= 1000.0 # Convert us to ms
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: riemann-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-11-11 00:00:00.000000000 Z
12
+ date: 2013-12-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: riemann-client
@@ -142,7 +142,6 @@ dependencies:
142
142
  description: Utilities which submit events to Riemann.
143
143
  email: aphyr@aphyr.com
144
144
  executables:
145
- - .riemann-health.swp
146
145
  - riemann-riak-ring
147
146
  - riemann-riak-keys
148
147
  - riemann-memcached
@@ -195,7 +194,6 @@ files:
195
194
  - bin/riemann-health
196
195
  - LICENSE
197
196
  - README.markdown
198
- - bin/.riemann-health.swp
199
197
  homepage: https://github.com/aphyr/riemann-tools
200
198
  licenses:
201
199
  - MIT
Binary file