riemann-riak 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7358bc4125363c3f2774a6579ea6648b9d5efb1b
4
+ data.tar.gz: ed6e0d58221a1b03e8bd00ff1d2e5649b35b2cd4
5
+ SHA512:
6
+ metadata.gz: 061832fbfbefa06354c931caea31604d41557157c07deb75b06cdaafbf3e8d9c5e82d223179dfe21e6b0e61821fb65e7a81c8e46c147216fbcf1dad3241a58fd
7
+ data.tar.gz: 6a0d5f3ded5dd8fc45132d2a8c4f351e0e78adb006737a98ae84644a7b35da47fa30509b53841555821f0203d2911289af36c577cc3e994790b37defb831a05d
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,10 @@
1
+ # Riemann Riak
2
+
3
+ Gathers Riak statistics and submits them to Riemann.
4
+
5
+ ## Getting started
6
+
7
+ ```
8
+ gem install riemann-riak
9
+ riemann-riak --help
10
+ ```
data/bin/riemann-riak ADDED
@@ -0,0 +1,330 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Forwards information on a Riak node to Riemann.
4
+
5
+ require 'riemann/tools'
6
+
7
+ class Riemann::Tools::Riak
8
+ include Riemann::Tools
9
+ require 'net/http'
10
+ require 'net/https'
11
+ require 'yajl/json_gem'
12
+
13
+ opt :riak_host, "Riak host for stats <IP> or SSL http(s)://<IP>", :default => Socket.gethostname
14
+ opt :data_dir, "Riak data directory", :default => '/var/lib/riak'
15
+ opt :stats_port, "Riak HTTP port for stats", :default => 8098
16
+ opt :stats_path, "Riak HTTP stats path", :default => '/stats'
17
+ opt :node_name, "Riak erlang node name", :default => "riak@#{Socket.gethostname}"
18
+ opt :cookie, "Riak cookie to use", :default => "riak"
19
+
20
+ opt :get_50_warning, "FSM 50% get time warning threshold (ms)", :default => 1000
21
+ opt :put_50_warning, "FSM 50% put time warning threshold (ms)", :default => 1000
22
+ opt :get_95_warning, "FSM 95% get time warning threshold (ms)", :default => 2000
23
+ opt :put_95_warning, "FSM 95% put time warning threshold (ms)", :default => 2000
24
+ opt :get_99_warning, "FSM 99% get time warning threshold (ms)", :default => 10000
25
+ opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000
26
+
27
+ def initialize
28
+ detect_features
29
+
30
+ @httpstatus = true
31
+
32
+ begin
33
+ uri = URI.parse(opts[:riak_host])
34
+ if uri.host == nil
35
+ uri.host = opts[:riak_host]
36
+ end
37
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
38
+ http.use_ssl = uri.scheme == 'https'
39
+ if http.use_ssl?
40
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
41
+ end
42
+ http.start do |h|
43
+ h.get opts[:stats_path]
44
+ end
45
+ rescue => _e
46
+ @httpstatus = false
47
+ end
48
+
49
+ # we're going to override the emulator setting to allow users to
50
+ # dynamically input the cookie
51
+ # this is done only once - hopefully it doesn't get overridden.
52
+ ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
53
+ end
54
+
55
+ # Identifies whether escript and riak-admin are installed
56
+ def detect_features
57
+ @escript = true # Whether escript is present on this machine
58
+ @riakadmin = true # Whether riak-admin is present
59
+
60
+ if `which escript` =~ /^\s*$/
61
+ @escript = false
62
+ end
63
+
64
+ if `which riak-admin` =~ /^\s*$/
65
+ @riakadmin = false
66
+ end
67
+ end
68
+
69
+ def check_ring
70
+ str = if @escript
71
+ str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp
72
+ elsif @riakadmin
73
+ str = `riak-admin ringready`
74
+ else
75
+ nil
76
+ end
77
+
78
+ return if str.nil?
79
+
80
+ if str =~ /^TRUE/
81
+ report(
82
+ :host => opts[:riak_host],
83
+ :service => 'riak ring',
84
+ :state => 'ok',
85
+ :description => str
86
+ )
87
+ else
88
+ report(
89
+ :host => opts[:riak_host],
90
+ :service => 'riak ring',
91
+ :state => 'warning',
92
+ :description => str
93
+ )
94
+ end
95
+ end
96
+
97
+ def check_keys
98
+ keys = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-keys #{opts[:node_name]}`.chomp
99
+ if keys =~ /^\d+$/
100
+ report(
101
+ :host => opts[:riak_host],
102
+ :service => 'riak keys',
103
+ :state => 'ok',
104
+ :metric => keys.to_i,
105
+ :description => keys
106
+ )
107
+ else
108
+ report(
109
+ :host => opts[:riak_host],
110
+ :service => 'riak keys',
111
+ :state => 'unknown',
112
+ :description => keys
113
+ )
114
+ end
115
+ end
116
+
117
+ def check_transfers
118
+ str = if @riakadmin
119
+ `riak-admin transfers`
120
+ else
121
+ nil
122
+ end
123
+
124
+ return if str.nil?
125
+
126
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
127
+ report(
128
+ :host => opts[:riak_host],
129
+ :service => 'riak transfers',
130
+ :state => 'critical',
131
+ :metric => $1.to_i,
132
+ :description => "waiting to handoff #{$1} partitions"
133
+ )
134
+ else
135
+ report(
136
+ :host => opts[:riak_host],
137
+ :service => 'riak transfers',
138
+ :state => 'ok',
139
+ :metric => 0,
140
+ :description => "No pending transfers"
141
+ )
142
+ end
143
+ end
144
+
145
+ def check_disk
146
+ gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
147
+ report(
148
+ :host => opts[:riak_host],
149
+ :service => 'riak disk',
150
+ :state => 'ok',
151
+ :metric => gb,
152
+ :description => "#{gb} GB in #{opts[:data_dir]}"
153
+ )
154
+ end
155
+
156
+ # Returns the riak stat for the given fsm type and percentile.
157
+ def fsm_stat(type, property, percentile)
158
+ "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
159
+ end
160
+
161
+ # Returns the alerts state for the given fsm.
162
+ def fsm_state(type, percentile, val)
163
+ limit = opts["#{type}_#{percentile}_warning".to_sym]
164
+ case val
165
+ when 0 .. limit
166
+ 'ok'
167
+ when limit .. limit * 2
168
+ 'warning'
169
+ else
170
+ 'critical'
171
+ end
172
+ end
173
+
174
+ # Get current stats via HTTP
175
+ def stats_http
176
+ begin
177
+ uri = URI.parse(opts[:riak_host])
178
+ if uri.host == nil
179
+ uri.host = opts[:riak_host]
180
+ end
181
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
182
+ http.use_ssl = uri.scheme == 'https'
183
+ if http.use_ssl?
184
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
185
+ end
186
+ res = http.start do |h|
187
+ h.get opts[:stats_path]
188
+ end
189
+ rescue => e
190
+ report(
191
+ :host => opts[:riak_host],
192
+ :service => 'riak',
193
+ :state => 'critical',
194
+ :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
195
+ )
196
+ raise
197
+ end
198
+
199
+ if res.code.to_i == 200
200
+ return JSON.parse(res.body)
201
+ else
202
+ report(
203
+ :host => opts[:riak_host],
204
+ :service => 'riak',
205
+ :state => 'critical',
206
+ :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
207
+ )
208
+ raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
209
+ end
210
+ end
211
+
212
+ # Get current stats via riak-admin
213
+ def stats_riak_admin
214
+ str = `riak-admin status`
215
+ raise "riak-admin failed" unless $? == 0
216
+ Hash[str.split(/\n/).map{|i| i.split(/ : /)}]
217
+ end
218
+
219
+ # Get current stats as a hash
220
+ def stats
221
+ if @httpstatus
222
+ stats_http
223
+ elsif @riakadmin
224
+ stats_riak_admin
225
+ else
226
+ report(
227
+ :host => opts[:riak_host],
228
+ :service => 'riak',
229
+ :state => 'critical',
230
+ :description => "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
231
+ )
232
+ raise "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
233
+ end
234
+ end
235
+
236
+ def core_services
237
+ ['vnode_gets',
238
+ 'vnode_puts',
239
+ 'node_gets',
240
+ 'node_puts',
241
+ 'node_gets_set',
242
+ 'node_puts_set',
243
+ 'read_repairs']
244
+ end
245
+
246
+ def fsm_types
247
+ [{'get' => 'time'}, {'put' => 'time'},
248
+ {'get' => 'set_objsize'}]
249
+ end
250
+
251
+ def fsm_percentiles
252
+ [50, 95, 99]
253
+ end
254
+
255
+ # Reports current stats to Riemann
256
+ def check_stats
257
+ begin
258
+ stats = self.stats
259
+ rescue => e
260
+ event = {:state => 'critical',
261
+ :description => e.message,
262
+ :host => opts[:riak_host]}
263
+ # Report errors
264
+ report(event.merge(:service => 'riak'))
265
+ core_services.each do |s|
266
+ report(event.merge(:service => "riak #{s}"))
267
+ end
268
+ fsm_types.each do |typespec|
269
+ typespec.each do |type, prop|
270
+ fsm_percentiles.each do |percentile|
271
+ report(event.merge(:service => "riak #{type} #{prop} #{percentile}"))
272
+ end
273
+ end
274
+ end
275
+ return
276
+ end
277
+
278
+ # Riak itself
279
+ report(
280
+ :host => opts[:riak_host],
281
+ :service => 'riak',
282
+ :state => 'ok'
283
+ )
284
+
285
+ # Gets/puts/rr
286
+ core_services.each do |s|
287
+ report(
288
+ :host => opts[:riak_host],
289
+ :service => "riak #{s}",
290
+ :state => 'ok',
291
+ :metric => stats[s].to_i/60.0,
292
+ :description => "#{stats[s].to_i/60.0}/sec"
293
+ )
294
+ end
295
+
296
+ # FSMs
297
+ fsm_types.each do |typespec|
298
+ typespec.each do |type, prop|
299
+ fsm_percentiles.each do |percentile|
300
+ val = stats[fsm_stat(type, prop, percentile)].to_i || 0
301
+ val = 0 if val == 'undefined'
302
+ val /= 1000.0 if prop == 'time' # Convert us to ms
303
+ if prop == 'time'
304
+ state = fsm_state(type, percentile, val)
305
+ else
306
+ state = "ok"
307
+ end
308
+ report(
309
+ :host => opts[:riak_host],
310
+ :service => "riak #{type} #{prop} #{percentile}",
311
+ :state => state,
312
+ :metric => val,
313
+ :description => "#{val} ms"
314
+ )
315
+ end
316
+ end
317
+ end
318
+ end
319
+
320
+ def tick
321
+ # This can utterly destroy a cluster, so we disable
322
+ # check_keys
323
+ check_stats
324
+ check_ring
325
+ check_disk
326
+ check_transfers
327
+ end
328
+ end
329
+
330
+ Riemann::Tools::Riak.run
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env escript
2
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
3
+
4
+ main([]) -> main(["riak@127.0.0.1"]);
5
+ main([Node]) ->
6
+ io:format("~w\n", [
7
+ lists:foldl(
8
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
9
+ 0,
10
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
11
+ )
12
+ ]).
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env escript
2
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
3
+
4
+ main([]) -> main(["riak@127.0.0.1"]);
5
+ main([Node]) ->
6
+ io:format("~p\n", [
7
+ rpc:call(list_to_atom(Node), riak_kv_console, ringready, [[]])
8
+ ]).
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: riemann-riak
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kyle Kingsbury
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: riemann-tools
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: yajl-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.1.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.1.0
41
+ description:
42
+ email: aphyr@aphyr.com
43
+ executables:
44
+ - riemann-riak
45
+ - riemann-riak-keys
46
+ - riemann-riak-ring
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - LICENSE
51
+ - README.md
52
+ - bin/riemann-riak
53
+ - bin/riemann-riak-keys
54
+ - bin/riemann-riak-ring
55
+ homepage: https://github.com/riemann/riemann-riak
56
+ licenses: []
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 1.8.7
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project: riemann-riak
74
+ rubygems_version: 2.4.5
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Submits riak stats to riemann.
78
+ test_files: []