riemann-tools 0.2.14 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.docker/Dockerfile +7 -0
  3. data/.docker/publish.sh +35 -0
  4. data/.github/dependabot.yml +11 -0
  5. data/.github/workflows/ci.yml +42 -0
  6. data/.github/workflows/codeql-analysis.yml +72 -0
  7. data/.gitignore +6 -0
  8. data/.rspec +2 -0
  9. data/.rubocop.yml +32 -0
  10. data/.travis.yml +31 -0
  11. data/CHANGELOG.md +430 -0
  12. data/Gemfile +6 -0
  13. data/ISSUE_TEMPLATE.md +15 -0
  14. data/README.markdown +13 -16
  15. data/Rakefile +23 -0
  16. data/SECURITY.md +42 -0
  17. data/bin/riemann-apache-status +92 -77
  18. data/bin/riemann-bench +54 -48
  19. data/bin/riemann-cloudant +44 -39
  20. data/bin/riemann-consul +82 -75
  21. data/bin/riemann-dir-files-count +53 -46
  22. data/bin/riemann-dir-space +53 -46
  23. data/bin/riemann-diskstats +78 -74
  24. data/bin/riemann-fd +68 -47
  25. data/bin/riemann-freeswitch +108 -102
  26. data/bin/riemann-haproxy +46 -39
  27. data/bin/riemann-health +4 -335
  28. data/bin/riemann-kvminstance +18 -12
  29. data/bin/riemann-memcached +35 -28
  30. data/bin/riemann-net +4 -103
  31. data/bin/riemann-nginx-status +74 -66
  32. data/bin/riemann-ntp +4 -32
  33. data/bin/riemann-portcheck +40 -30
  34. data/bin/riemann-proc +96 -89
  35. data/bin/riemann-varnish +51 -44
  36. data/bin/riemann-zookeeper +38 -33
  37. data/lib/riemann/tools/health.rb +347 -0
  38. data/lib/riemann/tools/net.rb +104 -0
  39. data/lib/riemann/tools/ntp.rb +41 -0
  40. data/lib/riemann/tools/utils.rb +17 -0
  41. data/lib/riemann/tools/version.rb +7 -0
  42. data/lib/riemann/tools.rb +38 -31
  43. data/riemann-tools.gemspec +42 -0
  44. data/tools/riemann-aws/LICENSE +21 -0
  45. data/tools/riemann-aws/README.md +54 -0
  46. data/tools/riemann-aws/Rakefile +37 -0
  47. data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
  48. data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
  49. data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
  50. data/tools/riemann-aws/bin/riemann-aws-status +83 -0
  51. data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
  52. data/tools/riemann-aws/bin/riemann-s3-list +87 -0
  53. data/tools/riemann-aws/bin/riemann-s3-status +102 -0
  54. data/tools/riemann-chronos/LICENSE +21 -0
  55. data/tools/riemann-chronos/README.md +10 -0
  56. data/tools/riemann-chronos/Rakefile +37 -0
  57. data/tools/riemann-chronos/bin/riemann-chronos +161 -0
  58. data/tools/riemann-docker/LICENSE +21 -0
  59. data/tools/riemann-docker/README.md +10 -0
  60. data/tools/riemann-docker/Rakefile +36 -0
  61. data/tools/riemann-docker/bin/riemann-docker +206 -0
  62. data/tools/riemann-elasticsearch/LICENSE +21 -0
  63. data/tools/riemann-elasticsearch/README.md +10 -0
  64. data/tools/riemann-elasticsearch/Rakefile +37 -0
  65. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
  66. data/tools/riemann-marathon/LICENSE +21 -0
  67. data/tools/riemann-marathon/README.md +10 -0
  68. data/tools/riemann-marathon/Rakefile +37 -0
  69. data/tools/riemann-marathon/bin/riemann-marathon +163 -0
  70. data/tools/riemann-mesos/LICENSE +21 -0
  71. data/tools/riemann-mesos/README.md +10 -0
  72. data/tools/riemann-mesos/Rakefile +37 -0
  73. data/tools/riemann-mesos/bin/riemann-mesos +146 -0
  74. data/tools/riemann-munin/LICENSE +21 -0
  75. data/tools/riemann-munin/README.md +10 -0
  76. data/tools/riemann-munin/Rakefile +36 -0
  77. data/tools/riemann-munin/bin/riemann-munin +43 -0
  78. data/tools/riemann-rabbitmq/LICENSE +21 -0
  79. data/tools/riemann-rabbitmq/README.md +10 -0
  80. data/tools/riemann-rabbitmq/Rakefile +37 -0
  81. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
  82. data/tools/riemann-riak/LICENSE +21 -0
  83. data/tools/riemann-riak/README.md +10 -0
  84. data/tools/riemann-riak/Rakefile +36 -0
  85. data/tools/riemann-riak/bin/riemann-riak +323 -0
  86. data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
  87. data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
  88. data/tools/riemann-riak/riak_status/key_count.erl +13 -0
  89. data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
  90. data/tools/riemann-riak/riak_status/ringready.erl +9 -0
  91. metadata +186 -37
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Riak
2
+
3
+ Gathers Riak statistics and submits them to Riemann.
4
+
5
+ ## Getting started
6
+
7
+ ```
8
+ gem install riemann-riak
9
+ riemann-riak --help
10
+ ```
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'find'
7
+
8
+ # Don't include resource forks in tarballs on Mac OS X.
9
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
10
+ ENV['COPYFILE_DISABLE'] = 'true'
11
+
12
+ # Gemspec
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.rubyforge_project = 'riemann-riak'
15
+
16
+ s.name = 'riemann-riak'
17
+ s.version = '0.1.2'
18
+ s.author = 'Kyle Kingsbury'
19
+ s.email = 'aphyr@aphyr.com'
20
+ s.homepage = 'https://github.com/riemann/riemann-tools'
21
+ s.platform = Gem::Platform::RUBY
22
+ s.summary = 'Submits riak stats to riemann.'
23
+ s.license = 'MIT'
24
+
25
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
26
+ s.add_dependency 'yajl-ruby', '>= 1.1.0'
27
+
28
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
29
+ s.executables |= Dir.entries('bin/')
30
+ s.has_rdoc = false
31
+
32
+ s.required_ruby_version = '>= 1.8.7'
33
+ end
34
+
35
+ Gem::PackageTask.new gemspec do |p|
36
+ end
@@ -0,0 +1,323 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'English'
5
+ Process.setproctitle($PROGRAM_NAME)
6
+
7
+ # Forwards information on a Riak node to Riemann.
8
+
9
+ require 'riemann/tools'
10
+
11
+ module Riemann
12
+ module Tools
13
+ class Riak
14
+ include Riemann::Tools
15
+ require 'net/http'
16
+ require 'net/https'
17
+ require 'yajl/json_gem'
18
+
19
+ opt :riak_host, 'Riak host for stats <IP> or SSL http(s)://<IP>', default: Socket.gethostname
20
+ opt :data_dir, 'Riak data directory', default: '/var/lib/riak'
21
+ opt :stats_port, 'Riak HTTP port for stats', default: 8098
22
+ opt :stats_path, 'Riak HTTP stats path', default: '/stats'
23
+ opt :node_name, 'Riak erlang node name', default: "riak@#{Socket.gethostname}"
24
+ opt :cookie, 'Riak cookie to use', default: 'riak'
25
+
26
+ opt :get_50_warning, 'FSM 50% get time warning threshold (ms)', default: 1000
27
+ opt :put_50_warning, 'FSM 50% put time warning threshold (ms)', default: 1000
28
+ opt :get_95_warning, 'FSM 95% get time warning threshold (ms)', default: 2000
29
+ opt :put_95_warning, 'FSM 95% put time warning threshold (ms)', default: 2000
30
+ opt :get_99_warning, 'FSM 99% get time warning threshold (ms)', default: 10_000
31
+ opt :put_99_warning, 'FSM 99% put time warning threshold (ms)', default: 10_000
32
+
33
+ def initialize
34
+ detect_features
35
+
36
+ @httpstatus = true
37
+
38
+ begin
39
+ uri = URI.parse(opts[:riak_host])
40
+ uri.host = opts[:riak_host] if uri.host.nil?
41
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
42
+ http.use_ssl = uri.scheme == 'https'
43
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
44
+ http.start do |h|
45
+ h.get opts[:stats_path]
46
+ end
47
+ rescue StandardError => _e
48
+ @httpstatus = false
49
+ end
50
+
51
+ # we're going to override the emulator setting to allow users to
52
+ # dynamically input the cookie
53
+ # this is done only once - hopefully it doesn't get overridden.
54
+ ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
55
+ end
56
+
57
+ # Identifies whether escript and riak-admin are installed
58
+ def detect_features
59
+ @escript = true # Whether escript is present on this machine
60
+ @riakadmin = true # Whether riak-admin is present
61
+
62
+ @escript = false if `which escript` =~ /^\s*$/
63
+
64
+ @riakadmin = false if `which riak-admin` =~ /^\s*$/
65
+ end
66
+
67
+ def check_ring
68
+ str = if @escript
69
+ `#{__dir__}/riemann-riak-ring #{opts[:node_name]}`.chomp
70
+ elsif @riakadmin
71
+ `riak-admin ringready`
72
+ end
73
+
74
+ return if str.nil?
75
+
76
+ if str =~ /^TRUE/
77
+ report(
78
+ host: opts[:riak_host],
79
+ service: 'riak ring',
80
+ state: 'ok',
81
+ description: str,
82
+ )
83
+ else
84
+ report(
85
+ host: opts[:riak_host],
86
+ service: 'riak ring',
87
+ state: 'warning',
88
+ description: str,
89
+ )
90
+ end
91
+ end
92
+
93
+ def check_keys
94
+ keys = `#{__dir__}/riemann-riak-keys #{opts[:node_name]}`.chomp
95
+ if keys =~ /^\d+$/
96
+ report(
97
+ host: opts[:riak_host],
98
+ service: 'riak keys',
99
+ state: 'ok',
100
+ metric: keys.to_i,
101
+ description: keys,
102
+ )
103
+ else
104
+ report(
105
+ host: opts[:riak_host],
106
+ service: 'riak keys',
107
+ state: 'unknown',
108
+ description: keys,
109
+ )
110
+ end
111
+ end
112
+
113
+ def check_transfers
114
+ str = (`riak-admin transfers` if @riakadmin)
115
+
116
+ return if str.nil?
117
+
118
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
119
+ report(
120
+ host: opts[:riak_host],
121
+ service: 'riak transfers',
122
+ state: 'critical',
123
+ metric: Regexp.last_match(1).to_i,
124
+ description: "waiting to handoff #{Regexp.last_match(1)} partitions",
125
+ )
126
+ else
127
+ report(
128
+ host: opts[:riak_host],
129
+ service: 'riak transfers',
130
+ state: 'ok',
131
+ metric: 0,
132
+ description: 'No pending transfers',
133
+ )
134
+ end
135
+ end
136
+
137
+ def check_disk
138
+ gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
139
+ report(
140
+ host: opts[:riak_host],
141
+ service: 'riak disk',
142
+ state: 'ok',
143
+ metric: gb,
144
+ description: "#{gb} GB in #{opts[:data_dir]}",
145
+ )
146
+ end
147
+
148
+ # Returns the riak stat for the given fsm type and percentile.
149
+ def fsm_stat(type, property, percentile)
150
+ "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
151
+ end
152
+
153
+ # Returns the alerts state for the given fsm.
154
+ def fsm_state(type, percentile, val)
155
+ limit = opts["#{type}_#{percentile}_warning".to_sym]
156
+ case val
157
+ when 0..limit
158
+ 'ok'
159
+ when limit..limit * 2
160
+ 'warning'
161
+ else
162
+ 'critical'
163
+ end
164
+ end
165
+
166
+ # Get current stats via HTTP
167
+ def stats_http
168
+ begin
169
+ uri = URI.parse(opts[:riak_host])
170
+ uri.host = opts[:riak_host] if uri.host.nil?
171
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
172
+ http.use_ssl = uri.scheme == 'https'
173
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
174
+ res = http.start do |h|
175
+ h.get opts[:stats_path]
176
+ end
177
+ rescue StandardError => e
178
+ report(
179
+ host: opts[:riak_host],
180
+ service: 'riak',
181
+ state: 'critical',
182
+ description: "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}",
183
+ )
184
+ raise
185
+ end
186
+
187
+ if res.code.to_i == 200
188
+ JSON.parse(res.body)
189
+ else
190
+ report(
191
+ host: opts[:riak_host],
192
+ service: 'riak',
193
+ state: 'critical',
194
+ description: "stats returned HTTP #{res.code}:\n\n#{res.body}",
195
+ )
196
+ raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
197
+ end
198
+ end
199
+
200
+ # Get current stats via riak-admin
201
+ def stats_riak_admin
202
+ str = `riak-admin status`
203
+ raise 'riak-admin failed' unless $CHILD_STATUS == 0
204
+
205
+ Hash[str.split(/\n/).map { |i| i.split(/ : /) }]
206
+ end
207
+
208
+ # Get current stats as a hash
209
+ def stats
210
+ if @httpstatus
211
+ stats_http
212
+ elsif @riakadmin
213
+ stats_riak_admin
214
+ else
215
+ report(
216
+ host: opts[:riak_host],
217
+ service: 'riak',
218
+ state: 'critical',
219
+ description: 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.',
220
+ )
221
+ raise 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.'
222
+ end
223
+ end
224
+
225
+ def core_services
226
+ %w[vnode_gets
227
+ vnode_puts
228
+ node_gets
229
+ node_puts
230
+ node_gets_set
231
+ node_puts_set
232
+ read_repairs]
233
+ end
234
+
235
+ def fsm_types
236
+ [{ 'get' => 'time' }, { 'put' => 'time' },
237
+ { 'get' => 'set_objsize' },]
238
+ end
239
+
240
+ def fsm_percentiles
241
+ [50, 95, 99]
242
+ end
243
+
244
+ # Reports current stats to Riemann
245
+ def check_stats
246
+ begin
247
+ stats = self.stats
248
+ rescue StandardError => e
249
+ event = {
250
+ state: 'critical',
251
+ description: e.message,
252
+ host: opts[:riak_host],
253
+ }
254
+ # Report errors
255
+ report(event.merge(service: 'riak'))
256
+ core_services.each do |s|
257
+ report(event.merge(service: "riak #{s}"))
258
+ end
259
+ fsm_types.each do |typespec|
260
+ typespec.each do |type, prop|
261
+ fsm_percentiles.each do |percentile|
262
+ report(event.merge(service: "riak #{type} #{prop} #{percentile}"))
263
+ end
264
+ end
265
+ end
266
+ return
267
+ end
268
+
269
+ # Riak itself
270
+ report(
271
+ host: opts[:riak_host],
272
+ service: 'riak',
273
+ state: 'ok',
274
+ )
275
+
276
+ # Gets/puts/rr
277
+ core_services.each do |s|
278
+ report(
279
+ host: opts[:riak_host],
280
+ service: "riak #{s}",
281
+ state: 'ok',
282
+ metric: stats[s].to_i / 60.0,
283
+ description: "#{stats[s].to_i / 60.0}/sec",
284
+ )
285
+ end
286
+
287
+ # FSMs
288
+ fsm_types.each do |typespec|
289
+ typespec.each do |type, prop|
290
+ fsm_percentiles.each do |percentile|
291
+ val = stats[fsm_stat(type, prop, percentile)].to_i || 0
292
+ val = 0 if val == 'undefined'
293
+ val /= 1000.0 if prop == 'time' # Convert us to ms
294
+ state = if prop == 'time'
295
+ fsm_state(type, percentile, val)
296
+ else
297
+ 'ok'
298
+ end
299
+ report(
300
+ host: opts[:riak_host],
301
+ service: "riak #{type} #{prop} #{percentile}",
302
+ state: state,
303
+ metric: val,
304
+ description: "#{val} ms",
305
+ )
306
+ end
307
+ end
308
+ end
309
+ end
310
+
311
+ def tick
312
+ # This can utterly destroy a cluster, so we disable
313
+ # check_keys
314
+ check_stats
315
+ check_ring
316
+ check_disk
317
+ check_transfers
318
+ end
319
+ end
320
+ end
321
+ end
322
+
323
+ Riemann::Tools::Riak.run
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~w\n", [
8
+ lists:foldl(
9
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
10
+ 0,
11
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
12
+ )
13
+ ]).
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~p\n", [
8
+ rpc:call(list_to_atom(Node), riak_kv_console, ringready, [[]])
9
+ ]).
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck -setcookie riak -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~w\n", [
8
+ lists:foldl(
9
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
10
+ 0,
11
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
12
+ )
13
+ ]).
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ $LOAD_PATH.unshift File.expand_path("#{File.dirname(__FILE__)}/../vodpod-common/lib")
7
+ require 'rubygems'
8
+ require 'vodpod-common'
9
+ require 'vodpod/alerts'
10
+ require 'vodpod/starling'
11
+ require 'net/http'
12
+ require 'yajl/json_gem'
13
+
14
+ class RiakStatus
15
+ PORT = 8098
16
+ PATH = '/stats'
17
+ INTERVAL = 10
18
+
19
+ FSM_LIMITS = {
20
+ get: {
21
+ 50 => 1000,
22
+ 95 => 2000,
23
+ 99 => 10_000,
24
+ },
25
+ put: {
26
+ 50 => 1000,
27
+ 95 => 2000,
28
+ 99 => 10_000,
29
+ },
30
+ }.freeze
31
+
32
+ def initialize(opts = {})
33
+ @host = opts[:host] || `hostname`.chomp
34
+ @port = opts[:port] || PORT
35
+ @path = opts[:path] || PATH
36
+ end
37
+
38
+ def alert(subservice, state, metric, description)
39
+ Vodpod.alert(
40
+ service: "riak #{subservice}",
41
+ state: state,
42
+ metric: metric,
43
+ description: description,
44
+ )
45
+ end
46
+
47
+ def check_ring
48
+ str = %x(#{__dir__}/ringready.erl riak@#{`hostname`}).chomp
49
+ if str =~ /^TRUE/
50
+ alert 'ring', :ok, nil, str
51
+ else
52
+ alert 'ring', :warning, nil, str
53
+ end
54
+ end
55
+
56
+ def check_keys
57
+ keys = %x(#{__dir__}/key_count.erl riak@#{`hostname`}).chomp
58
+ if keys =~ /^\d+$/
59
+ alert 'keys', :ok, keys.to_i, keys
60
+ else
61
+ alert 'keys', :error, nil, keys
62
+ end
63
+ end
64
+
65
+ def check_disk
66
+ gb = `du -s /var/lib/riak/bitcask/`.split(/\s+/).first.to_i / (1024.0**2)
67
+ alert 'disk', :ok, gb, "#{gb} GB in bitcask"
68
+ end
69
+
70
+ # Returns the riak stat for the given fsm type and percentile.
71
+ def fsm_stat(type, percentile)
72
+ "node_#{type}_fsm_time_#{percentile == 50 ? 'median' : percentile}"
73
+ end
74
+
75
+ # Returns the alerts state for the given fsm.
76
+ def fsm_state(type, percentile, val)
77
+ limit = FSM_LIMITS[type][percentile]
78
+ case val
79
+ when 0..limit
80
+ :ok
81
+ when limit..limit * 2
82
+ :warning
83
+ else
84
+ :critical
85
+ end
86
+ end
87
+
88
+ def check_stats
89
+ begin
90
+ res = Net::HTTP.start(@host, @port) do |http|
91
+ http.get('/stats')
92
+ end
93
+ rescue StandardError => e
94
+ Vodpod.alert(
95
+ service: 'riak',
96
+ state: :critical,
97
+ description: "error fetching /stats: #{e.class}, #{e.message}",
98
+ )
99
+ return
100
+ end
101
+
102
+ if res.code.to_i == 200
103
+ stats = JSON.parse(res.body)
104
+ else
105
+ Vodpod.alert(
106
+ service: 'riak',
107
+ state: :critical,
108
+ description: "stats returned HTTP #{res.code}:\n\n#{res.body}",
109
+ )
110
+ return
111
+ end
112
+
113
+ Vodpod.alert(
114
+ service: 'riak',
115
+ state: :ok,
116
+ )
117
+
118
+ # Gets/puts/rr
119
+ %w[
120
+ vnode_gets
121
+ vnode_puts
122
+ node_gets
123
+ node_puts
124
+ read_repairs
125
+ ].each do |s|
126
+ alert s, :ok, stats[s] / 60.0, "#{stats[s] / 60.0}/sec"
127
+ end
128
+
129
+ # FSMs
130
+ %i[get put].each do |type|
131
+ [50, 95, 99].each do |percentile|
132
+ val = stats[fsm_stat(type, percentile)] || 0
133
+ val = 0 if val == 'undefined'
134
+ val /= 1000.0 # Convert us to ms
135
+ state = fsm_state(type, percentile, val)
136
+ alert "#{type} #{percentile}", state, val, "#{val} ms"
137
+ end
138
+ end
139
+ end
140
+
141
+ def run
142
+ loop do
143
+ # check_keys
144
+ check_stats
145
+ check_ring
146
+ check_disk
147
+ sleep INTERVAL
148
+ end
149
+ end
150
+ end
151
+
152
+ RiakStatus.new.run if $PROGRAM_NAME == __FILE__
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck -setcookie riak -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~p\n", [
8
+ rpc:call(list_to_atom(Node), riak_kv_console, ringready, [[]])
9
+ ]).