riemann-tools 0.2.13 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.docker/Dockerfile +7 -0
  3. data/.docker/publish.sh +35 -0
  4. data/.github/dependabot.yml +11 -0
  5. data/.github/workflows/ci.yml +42 -0
  6. data/.github/workflows/codeql-analysis.yml +72 -0
  7. data/.gitignore +6 -0
  8. data/.rspec +2 -0
  9. data/.rubocop.yml +32 -0
  10. data/.travis.yml +31 -0
  11. data/CHANGELOG.md +422 -0
  12. data/Gemfile +6 -0
  13. data/ISSUE_TEMPLATE.md +15 -0
  14. data/README.markdown +14 -15
  15. data/Rakefile +23 -0
  16. data/SECURITY.md +42 -0
  17. data/bin/riemann-apache-status +92 -77
  18. data/bin/riemann-bench +54 -48
  19. data/bin/riemann-cloudant +44 -39
  20. data/bin/riemann-consul +82 -75
  21. data/bin/riemann-dir-files-count +53 -46
  22. data/bin/riemann-dir-space +53 -46
  23. data/bin/riemann-diskstats +78 -74
  24. data/bin/riemann-fd +68 -47
  25. data/bin/riemann-freeswitch +108 -102
  26. data/bin/riemann-haproxy +46 -39
  27. data/bin/riemann-health +4 -335
  28. data/bin/riemann-kvminstance +18 -12
  29. data/bin/riemann-memcached +35 -28
  30. data/bin/riemann-net +4 -103
  31. data/bin/riemann-nginx-status +74 -66
  32. data/bin/riemann-ntp +4 -32
  33. data/bin/riemann-portcheck +40 -30
  34. data/bin/riemann-proc +96 -89
  35. data/bin/riemann-varnish +51 -44
  36. data/bin/riemann-zookeeper +38 -33
  37. data/lib/riemann/tools/health.rb +347 -0
  38. data/lib/riemann/tools/net.rb +104 -0
  39. data/lib/riemann/tools/ntp.rb +41 -0
  40. data/lib/riemann/tools/utils.rb +17 -0
  41. data/lib/riemann/tools/version.rb +7 -0
  42. data/lib/riemann/tools.rb +40 -33
  43. data/riemann-tools.gemspec +42 -0
  44. data/tools/riemann-aws/LICENSE +21 -0
  45. data/tools/riemann-aws/README.md +54 -0
  46. data/tools/riemann-aws/Rakefile +37 -0
  47. data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
  48. data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
  49. data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
  50. data/tools/riemann-aws/bin/riemann-aws-status +83 -0
  51. data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
  52. data/tools/riemann-aws/bin/riemann-s3-list +87 -0
  53. data/tools/riemann-aws/bin/riemann-s3-status +102 -0
  54. data/tools/riemann-chronos/LICENSE +21 -0
  55. data/tools/riemann-chronos/README.md +10 -0
  56. data/tools/riemann-chronos/Rakefile +37 -0
  57. data/tools/riemann-chronos/bin/riemann-chronos +161 -0
  58. data/tools/riemann-docker/LICENSE +21 -0
  59. data/tools/riemann-docker/README.md +10 -0
  60. data/tools/riemann-docker/Rakefile +36 -0
  61. data/tools/riemann-docker/bin/riemann-docker +206 -0
  62. data/tools/riemann-elasticsearch/LICENSE +21 -0
  63. data/tools/riemann-elasticsearch/README.md +10 -0
  64. data/tools/riemann-elasticsearch/Rakefile +37 -0
  65. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
  66. data/tools/riemann-marathon/LICENSE +21 -0
  67. data/tools/riemann-marathon/README.md +10 -0
  68. data/tools/riemann-marathon/Rakefile +37 -0
  69. data/tools/riemann-marathon/bin/riemann-marathon +163 -0
  70. data/tools/riemann-mesos/LICENSE +21 -0
  71. data/tools/riemann-mesos/README.md +10 -0
  72. data/tools/riemann-mesos/Rakefile +37 -0
  73. data/tools/riemann-mesos/bin/riemann-mesos +146 -0
  74. data/tools/riemann-munin/LICENSE +21 -0
  75. data/tools/riemann-munin/README.md +10 -0
  76. data/tools/riemann-munin/Rakefile +36 -0
  77. data/tools/riemann-munin/bin/riemann-munin +43 -0
  78. data/tools/riemann-rabbitmq/LICENSE +21 -0
  79. data/tools/riemann-rabbitmq/README.md +10 -0
  80. data/tools/riemann-rabbitmq/Rakefile +37 -0
  81. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
  82. data/tools/riemann-riak/LICENSE +21 -0
  83. data/tools/riemann-riak/README.md +10 -0
  84. data/tools/riemann-riak/Rakefile +36 -0
  85. data/tools/riemann-riak/bin/riemann-riak +323 -0
  86. data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
  87. data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
  88. data/tools/riemann-riak/riak_status/key_count.erl +13 -0
  89. data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
  90. data/tools/riemann-riak/riak_status/ringready.erl +9 -0
  91. metadata +195 -34
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Riak
2
+
3
+ Gathers Riak statistics and submits them to Riemann.
4
+
5
+ ## Getting started
6
+
7
+ ```
8
+ gem install riemann-riak
9
+ riemann-riak --help
10
+ ```
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'find'
7
+
8
+ # Don't include resource forks in tarballs on Mac OS X.
9
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
10
+ ENV['COPYFILE_DISABLE'] = 'true'
11
+
12
+ # Gemspec
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.rubyforge_project = 'riemann-riak'
15
+
16
+ s.name = 'riemann-riak'
17
+ s.version = '0.1.2'
18
+ s.author = 'Kyle Kingsbury'
19
+ s.email = 'aphyr@aphyr.com'
20
+ s.homepage = 'https://github.com/riemann/riemann-tools'
21
+ s.platform = Gem::Platform::RUBY
22
+ s.summary = 'Submits riak stats to riemann.'
23
+ s.license = 'MIT'
24
+
25
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
26
+ s.add_dependency 'yajl-ruby', '>= 1.1.0'
27
+
28
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
29
+ s.executables |= Dir.entries('bin/')
30
+ s.has_rdoc = false
31
+
32
+ s.required_ruby_version = '>= 1.8.7'
33
+ end
34
+
35
+ Gem::PackageTask.new gemspec do |p|
36
+ end
@@ -0,0 +1,323 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'English'
5
+ Process.setproctitle($PROGRAM_NAME)
6
+
7
+ # Forwards information on a Riak node to Riemann.
8
+
9
+ require 'riemann/tools'
10
+
11
+ module Riemann
12
+ module Tools
13
+ class Riak
14
+ include Riemann::Tools
15
+ require 'net/http'
16
+ require 'net/https'
17
+ require 'yajl/json_gem'
18
+
19
+ opt :riak_host, 'Riak host for stats <IP> or SSL http(s)://<IP>', default: Socket.gethostname
20
+ opt :data_dir, 'Riak data directory', default: '/var/lib/riak'
21
+ opt :stats_port, 'Riak HTTP port for stats', default: 8098
22
+ opt :stats_path, 'Riak HTTP stats path', default: '/stats'
23
+ opt :node_name, 'Riak erlang node name', default: "riak@#{Socket.gethostname}"
24
+ opt :cookie, 'Riak cookie to use', default: 'riak'
25
+
26
+ opt :get_50_warning, 'FSM 50% get time warning threshold (ms)', default: 1000
27
+ opt :put_50_warning, 'FSM 50% put time warning threshold (ms)', default: 1000
28
+ opt :get_95_warning, 'FSM 95% get time warning threshold (ms)', default: 2000
29
+ opt :put_95_warning, 'FSM 95% put time warning threshold (ms)', default: 2000
30
+ opt :get_99_warning, 'FSM 99% get time warning threshold (ms)', default: 10_000
31
+ opt :put_99_warning, 'FSM 99% put time warning threshold (ms)', default: 10_000
32
+
33
+ def initialize
34
+ detect_features
35
+
36
+ @httpstatus = true
37
+
38
+ begin
39
+ uri = URI.parse(opts[:riak_host])
40
+ uri.host = opts[:riak_host] if uri.host.nil?
41
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
42
+ http.use_ssl = uri.scheme == 'https'
43
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
44
+ http.start do |h|
45
+ h.get opts[:stats_path]
46
+ end
47
+ rescue StandardError => _e
48
+ @httpstatus = false
49
+ end
50
+
51
+ # we're going to override the emulator setting to allow users to
52
+ # dynamically input the cookie
53
+ # this is done only once - hopefully it doesn't get overridden.
54
+ ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
55
+ end
56
+
57
+ # Identifies whether escript and riak-admin are installed
58
+ def detect_features
59
+ @escript = true # Whether escript is present on this machine
60
+ @riakadmin = true # Whether riak-admin is present
61
+
62
+ @escript = false if `which escript` =~ /^\s*$/
63
+
64
+ @riakadmin = false if `which riak-admin` =~ /^\s*$/
65
+ end
66
+
67
+ def check_ring
68
+ str = if @escript
69
+ `#{__dir__}/riemann-riak-ring #{opts[:node_name]}`.chomp
70
+ elsif @riakadmin
71
+ `riak-admin ringready`
72
+ end
73
+
74
+ return if str.nil?
75
+
76
+ if str =~ /^TRUE/
77
+ report(
78
+ host: opts[:riak_host],
79
+ service: 'riak ring',
80
+ state: 'ok',
81
+ description: str,
82
+ )
83
+ else
84
+ report(
85
+ host: opts[:riak_host],
86
+ service: 'riak ring',
87
+ state: 'warning',
88
+ description: str,
89
+ )
90
+ end
91
+ end
92
+
93
+ def check_keys
94
+ keys = `#{__dir__}/riemann-riak-keys #{opts[:node_name]}`.chomp
95
+ if keys =~ /^\d+$/
96
+ report(
97
+ host: opts[:riak_host],
98
+ service: 'riak keys',
99
+ state: 'ok',
100
+ metric: keys.to_i,
101
+ description: keys,
102
+ )
103
+ else
104
+ report(
105
+ host: opts[:riak_host],
106
+ service: 'riak keys',
107
+ state: 'unknown',
108
+ description: keys,
109
+ )
110
+ end
111
+ end
112
+
113
+ def check_transfers
114
+ str = (`riak-admin transfers` if @riakadmin)
115
+
116
+ return if str.nil?
117
+
118
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
119
+ report(
120
+ host: opts[:riak_host],
121
+ service: 'riak transfers',
122
+ state: 'critical',
123
+ metric: Regexp.last_match(1).to_i,
124
+ description: "waiting to handoff #{Regexp.last_match(1)} partitions",
125
+ )
126
+ else
127
+ report(
128
+ host: opts[:riak_host],
129
+ service: 'riak transfers',
130
+ state: 'ok',
131
+ metric: 0,
132
+ description: 'No pending transfers',
133
+ )
134
+ end
135
+ end
136
+
137
+ def check_disk
138
+ gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
139
+ report(
140
+ host: opts[:riak_host],
141
+ service: 'riak disk',
142
+ state: 'ok',
143
+ metric: gb,
144
+ description: "#{gb} GB in #{opts[:data_dir]}",
145
+ )
146
+ end
147
+
148
+ # Returns the riak stat for the given fsm type and percentile.
149
+ def fsm_stat(type, property, percentile)
150
+ "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
151
+ end
152
+
153
+ # Returns the alerts state for the given fsm.
154
+ def fsm_state(type, percentile, val)
155
+ limit = opts["#{type}_#{percentile}_warning".to_sym]
156
+ case val
157
+ when 0..limit
158
+ 'ok'
159
+ when limit..limit * 2
160
+ 'warning'
161
+ else
162
+ 'critical'
163
+ end
164
+ end
165
+
166
+ # Get current stats via HTTP
167
+ def stats_http
168
+ begin
169
+ uri = URI.parse(opts[:riak_host])
170
+ uri.host = opts[:riak_host] if uri.host.nil?
171
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
172
+ http.use_ssl = uri.scheme == 'https'
173
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
174
+ res = http.start do |h|
175
+ h.get opts[:stats_path]
176
+ end
177
+ rescue StandardError => e
178
+ report(
179
+ host: opts[:riak_host],
180
+ service: 'riak',
181
+ state: 'critical',
182
+ description: "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}",
183
+ )
184
+ raise
185
+ end
186
+
187
+ if res.code.to_i == 200
188
+ JSON.parse(res.body)
189
+ else
190
+ report(
191
+ host: opts[:riak_host],
192
+ service: 'riak',
193
+ state: 'critical',
194
+ description: "stats returned HTTP #{res.code}:\n\n#{res.body}",
195
+ )
196
+ raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
197
+ end
198
+ end
199
+
200
+ # Get current stats via riak-admin
201
+ def stats_riak_admin
202
+ str = `riak-admin status`
203
+ raise 'riak-admin failed' unless $CHILD_STATUS == 0
204
+
205
+ Hash[str.split(/\n/).map { |i| i.split(/ : /) }]
206
+ end
207
+
208
+ # Get current stats as a hash
209
+ def stats
210
+ if @httpstatus
211
+ stats_http
212
+ elsif @riakadmin
213
+ stats_riak_admin
214
+ else
215
+ report(
216
+ host: opts[:riak_host],
217
+ service: 'riak',
218
+ state: 'critical',
219
+ description: 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.',
220
+ )
221
+ raise 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.'
222
+ end
223
+ end
224
+
225
+ def core_services
226
+ %w[vnode_gets
227
+ vnode_puts
228
+ node_gets
229
+ node_puts
230
+ node_gets_set
231
+ node_puts_set
232
+ read_repairs]
233
+ end
234
+
235
+ def fsm_types
236
+ [{ 'get' => 'time' }, { 'put' => 'time' },
237
+ { 'get' => 'set_objsize' },]
238
+ end
239
+
240
+ def fsm_percentiles
241
+ [50, 95, 99]
242
+ end
243
+
244
+ # Reports current stats to Riemann
245
+ def check_stats
246
+ begin
247
+ stats = self.stats
248
+ rescue StandardError => e
249
+ event = {
250
+ state: 'critical',
251
+ description: e.message,
252
+ host: opts[:riak_host],
253
+ }
254
+ # Report errors
255
+ report(event.merge(service: 'riak'))
256
+ core_services.each do |s|
257
+ report(event.merge(service: "riak #{s}"))
258
+ end
259
+ fsm_types.each do |typespec|
260
+ typespec.each do |type, prop|
261
+ fsm_percentiles.each do |percentile|
262
+ report(event.merge(service: "riak #{type} #{prop} #{percentile}"))
263
+ end
264
+ end
265
+ end
266
+ return
267
+ end
268
+
269
+ # Riak itself
270
+ report(
271
+ host: opts[:riak_host],
272
+ service: 'riak',
273
+ state: 'ok',
274
+ )
275
+
276
+ # Gets/puts/rr
277
+ core_services.each do |s|
278
+ report(
279
+ host: opts[:riak_host],
280
+ service: "riak #{s}",
281
+ state: 'ok',
282
+ metric: stats[s].to_i / 60.0,
283
+ description: "#{stats[s].to_i / 60.0}/sec",
284
+ )
285
+ end
286
+
287
+ # FSMs
288
+ fsm_types.each do |typespec|
289
+ typespec.each do |type, prop|
290
+ fsm_percentiles.each do |percentile|
291
+ val = stats[fsm_stat(type, prop, percentile)].to_i || 0
292
+ val = 0 if val == 'undefined'
293
+ val /= 1000.0 if prop == 'time' # Convert us to ms
294
+ state = if prop == 'time'
295
+ fsm_state(type, percentile, val)
296
+ else
297
+ 'ok'
298
+ end
299
+ report(
300
+ host: opts[:riak_host],
301
+ service: "riak #{type} #{prop} #{percentile}",
302
+ state: state,
303
+ metric: val,
304
+ description: "#{val} ms",
305
+ )
306
+ end
307
+ end
308
+ end
309
+ end
310
+
311
+ def tick
312
+ # This can utterly destroy a cluster, so we disable
313
+ # check_keys
314
+ check_stats
315
+ check_ring
316
+ check_disk
317
+ check_transfers
318
+ end
319
+ end
320
+ end
321
+ end
322
+
323
+ Riemann::Tools::Riak.run
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~w\n", [
8
+ lists:foldl(
9
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
10
+ 0,
11
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
12
+ )
13
+ ]).
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~p\n", [
8
+ rpc:call(list_to_atom(Node), riak_kv_console, ringready, [[]])
9
+ ]).
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck -setcookie riak -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~w\n", [
8
+ lists:foldl(
9
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
10
+ 0,
11
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
12
+ )
13
+ ]).
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ $LOAD_PATH.unshift File.expand_path("#{File.dirname(__FILE__)}/../vodpod-common/lib")
7
+ require 'rubygems'
8
+ require 'vodpod-common'
9
+ require 'vodpod/alerts'
10
+ require 'vodpod/starling'
11
+ require 'net/http'
12
+ require 'yajl/json_gem'
13
+
14
+ class RiakStatus
15
+ PORT = 8098
16
+ PATH = '/stats'
17
+ INTERVAL = 10
18
+
19
+ FSM_LIMITS = {
20
+ get: {
21
+ 50 => 1000,
22
+ 95 => 2000,
23
+ 99 => 10_000,
24
+ },
25
+ put: {
26
+ 50 => 1000,
27
+ 95 => 2000,
28
+ 99 => 10_000,
29
+ },
30
+ }.freeze
31
+
32
+ def initialize(opts = {})
33
+ @host = opts[:host] || `hostname`.chomp
34
+ @port = opts[:port] || PORT
35
+ @path = opts[:path] || PATH
36
+ end
37
+
38
+ def alert(subservice, state, metric, description)
39
+ Vodpod.alert(
40
+ service: "riak #{subservice}",
41
+ state: state,
42
+ metric: metric,
43
+ description: description,
44
+ )
45
+ end
46
+
47
+ def check_ring
48
+ str = %x(#{__dir__}/ringready.erl riak@#{`hostname`}).chomp
49
+ if str =~ /^TRUE/
50
+ alert 'ring', :ok, nil, str
51
+ else
52
+ alert 'ring', :warning, nil, str
53
+ end
54
+ end
55
+
56
+ def check_keys
57
+ keys = %x(#{__dir__}/key_count.erl riak@#{`hostname`}).chomp
58
+ if keys =~ /^\d+$/
59
+ alert 'keys', :ok, keys.to_i, keys
60
+ else
61
+ alert 'keys', :error, nil, keys
62
+ end
63
+ end
64
+
65
+ def check_disk
66
+ gb = `du -s /var/lib/riak/bitcask/`.split(/\s+/).first.to_i / (1024.0**2)
67
+ alert 'disk', :ok, gb, "#{gb} GB in bitcask"
68
+ end
69
+
70
+ # Returns the riak stat for the given fsm type and percentile.
71
+ def fsm_stat(type, percentile)
72
+ "node_#{type}_fsm_time_#{percentile == 50 ? 'median' : percentile}"
73
+ end
74
+
75
+ # Returns the alerts state for the given fsm.
76
+ def fsm_state(type, percentile, val)
77
+ limit = FSM_LIMITS[type][percentile]
78
+ case val
79
+ when 0..limit
80
+ :ok
81
+ when limit..limit * 2
82
+ :warning
83
+ else
84
+ :critical
85
+ end
86
+ end
87
+
88
+ def check_stats
89
+ begin
90
+ res = Net::HTTP.start(@host, @port) do |http|
91
+ http.get('/stats')
92
+ end
93
+ rescue StandardError => e
94
+ Vodpod.alert(
95
+ service: 'riak',
96
+ state: :critical,
97
+ description: "error fetching /stats: #{e.class}, #{e.message}",
98
+ )
99
+ return
100
+ end
101
+
102
+ if res.code.to_i == 200
103
+ stats = JSON.parse(res.body)
104
+ else
105
+ Vodpod.alert(
106
+ service: 'riak',
107
+ state: :critical,
108
+ description: "stats returned HTTP #{res.code}:\n\n#{res.body}",
109
+ )
110
+ return
111
+ end
112
+
113
+ Vodpod.alert(
114
+ service: 'riak',
115
+ state: :ok,
116
+ )
117
+
118
+ # Gets/puts/rr
119
+ %w[
120
+ vnode_gets
121
+ vnode_puts
122
+ node_gets
123
+ node_puts
124
+ read_repairs
125
+ ].each do |s|
126
+ alert s, :ok, stats[s] / 60.0, "#{stats[s] / 60.0}/sec"
127
+ end
128
+
129
+ # FSMs
130
+ %i[get put].each do |type|
131
+ [50, 95, 99].each do |percentile|
132
+ val = stats[fsm_stat(type, percentile)] || 0
133
+ val = 0 if val == 'undefined'
134
+ val /= 1000.0 # Convert us to ms
135
+ state = fsm_state(type, percentile, val)
136
+ alert "#{type} #{percentile}", state, val, "#{val} ms"
137
+ end
138
+ end
139
+ end
140
+
141
+ def run
142
+ loop do
143
+ # check_keys
144
+ check_stats
145
+ check_ring
146
+ check_disk
147
+ sleep INTERVAL
148
+ end
149
+ end
150
+ end
151
+
152
+ RiakStatus.new.run if $PROGRAM_NAME == __FILE__
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck -setcookie riak -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~p\n", [
8
+ rpc:call(list_to_atom(Node), riak_kv_console, ringready, [[]])
9
+ ]).