riemann-tools 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +2 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.ruby-version +1 -0
  6. data/CHANGELOG.md +25 -2
  7. data/Rakefile +10 -3
  8. data/bin/riemann-apache-status +1 -106
  9. data/bin/riemann-bench +2 -70
  10. data/bin/riemann-cloudant +1 -56
  11. data/bin/riemann-consul +1 -106
  12. data/bin/riemann-dir-files-count +1 -55
  13. data/bin/riemann-dir-space +1 -55
  14. data/bin/riemann-diskstats +1 -92
  15. data/bin/riemann-fd +2 -81
  16. data/bin/riemann-freeswitch +2 -119
  17. data/bin/riemann-haproxy +1 -58
  18. data/bin/riemann-health +0 -2
  19. data/bin/riemann-kvminstance +2 -22
  20. data/bin/riemann-memcached +1 -37
  21. data/bin/riemann-net +0 -2
  22. data/bin/riemann-nginx-status +1 -85
  23. data/bin/riemann-ntp +0 -2
  24. data/bin/riemann-portcheck +1 -44
  25. data/bin/riemann-proc +1 -108
  26. data/bin/riemann-varnish +1 -54
  27. data/bin/riemann-wrapper +75 -0
  28. data/bin/riemann-zookeeper +1 -39
  29. data/lib/riemann/tools/apache_status.rb +107 -0
  30. data/lib/riemann/tools/bench.rb +72 -0
  31. data/lib/riemann/tools/cloudant.rb +57 -0
  32. data/lib/riemann/tools/consul_health.rb +107 -0
  33. data/lib/riemann/tools/dir_files_count.rb +56 -0
  34. data/lib/riemann/tools/dir_space.rb +56 -0
  35. data/lib/riemann/tools/diskstats.rb +94 -0
  36. data/lib/riemann/tools/fd.rb +81 -0
  37. data/lib/riemann/tools/freeswitch.rb +119 -0
  38. data/lib/riemann/tools/haproxy.rb +59 -0
  39. data/lib/riemann/tools/health.rb +150 -19
  40. data/lib/riemann/tools/kvm.rb +23 -0
  41. data/lib/riemann/tools/memcached.rb +38 -0
  42. data/lib/riemann/tools/net.rb +2 -1
  43. data/lib/riemann/tools/nginx_status.rb +86 -0
  44. data/lib/riemann/tools/ntp.rb +1 -0
  45. data/lib/riemann/tools/portcheck.rb +45 -0
  46. data/lib/riemann/tools/proc.rb +109 -0
  47. data/lib/riemann/tools/riemann_client_wrapper.rb +43 -0
  48. data/lib/riemann/tools/uptime_parser.tab.rb +323 -0
  49. data/lib/riemann/tools/varnish.rb +55 -0
  50. data/lib/riemann/tools/version.rb +1 -1
  51. data/lib/riemann/tools/zookeeper.rb +40 -0
  52. data/lib/riemann/tools.rb +2 -20
  53. data/riemann-tools.gemspec +4 -1
  54. data/tools/riemann-aws/Rakefile +6 -9
  55. data/tools/riemann-aws/bin/riemann-aws-billing +2 -87
  56. data/tools/riemann-aws/bin/riemann-aws-rds-status +2 -62
  57. data/tools/riemann-aws/bin/riemann-aws-sqs-status +2 -44
  58. data/tools/riemann-aws/bin/riemann-aws-status +2 -77
  59. data/tools/riemann-aws/bin/riemann-elb-metrics +2 -162
  60. data/tools/riemann-aws/bin/riemann-s3-list +2 -81
  61. data/tools/riemann-aws/bin/riemann-s3-status +2 -96
  62. data/tools/riemann-aws/lib/riemann/tools/aws/billing.rb +87 -0
  63. data/tools/riemann-aws/lib/riemann/tools/aws/elb_metrics.rb +163 -0
  64. data/tools/riemann-aws/lib/riemann/tools/aws/rds_status.rb +63 -0
  65. data/tools/riemann-aws/lib/riemann/tools/aws/s3_list.rb +82 -0
  66. data/tools/riemann-aws/lib/riemann/tools/aws/s3_status.rb +97 -0
  67. data/tools/riemann-aws/lib/riemann/tools/aws/sqs_status.rb +45 -0
  68. data/tools/riemann-aws/lib/riemann/tools/aws/status.rb +74 -0
  69. data/tools/riemann-chronos/Rakefile +6 -9
  70. data/tools/riemann-chronos/bin/riemann-chronos +1 -154
  71. data/tools/riemann-chronos/lib/riemann/tools/chronos.rb +157 -0
  72. data/tools/riemann-docker/Rakefile +5 -8
  73. data/tools/riemann-docker/bin/riemann-docker +2 -200
  74. data/tools/riemann-docker/lib/riemann/tools/docker.rb +200 -0
  75. data/tools/riemann-elasticsearch/Rakefile +6 -9
  76. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +1 -167
  77. data/tools/riemann-elasticsearch/lib/riemann/tools/elasticsearch.rb +170 -0
  78. data/tools/riemann-marathon/Rakefile +6 -9
  79. data/tools/riemann-marathon/bin/riemann-marathon +1 -156
  80. data/tools/riemann-marathon/lib/riemann/tools/marathon.rb +159 -0
  81. data/tools/riemann-mesos/Rakefile +6 -9
  82. data/tools/riemann-mesos/bin/riemann-mesos +1 -139
  83. data/tools/riemann-mesos/lib/riemann/tools/mesos.rb +142 -0
  84. data/tools/riemann-munin/Rakefile +5 -8
  85. data/tools/riemann-munin/bin/riemann-munin +1 -36
  86. data/tools/riemann-munin/lib/riemann/tools/munin.rb +37 -0
  87. data/tools/riemann-rabbitmq/Rakefile +6 -9
  88. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +1 -266
  89. data/tools/riemann-rabbitmq/lib/riemann/tools/rabbitmq.rb +269 -0
  90. data/tools/riemann-riak/Rakefile +5 -8
  91. data/tools/riemann-riak/bin/riemann-riak +1 -316
  92. data/tools/riemann-riak/bin/riemann-riak-keys +0 -1
  93. data/tools/riemann-riak/bin/riemann-riak-ring +0 -1
  94. data/tools/riemann-riak/lib/riemann/tools/riak.rb +317 -0
  95. metadata +57 -10
  96. data/.travis.yml +0 -31
  97. data/tools/riemann-riak/riak_status/key_count.erl +0 -13
  98. data/tools/riemann-riak/riak_status/riak_status.rb +0 -152
  99. data/tools/riemann-riak/riak_status/ringready.erl +0 -9
@@ -1,323 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require 'English'
5
4
  Process.setproctitle($PROGRAM_NAME)
6
5
 
7
- # Forwards information on a Riak node to Riemann.
8
-
9
- require 'riemann/tools'
10
-
11
- module Riemann
12
- module Tools
13
- class Riak
14
- include Riemann::Tools
15
- require 'net/http'
16
- require 'net/https'
17
- require 'yajl/json_gem'
18
-
19
- opt :riak_host, 'Riak host for stats <IP> or SSL http(s)://<IP>', default: Socket.gethostname
20
- opt :data_dir, 'Riak data directory', default: '/var/lib/riak'
21
- opt :stats_port, 'Riak HTTP port for stats', default: 8098
22
- opt :stats_path, 'Riak HTTP stats path', default: '/stats'
23
- opt :node_name, 'Riak erlang node name', default: "riak@#{Socket.gethostname}"
24
- opt :cookie, 'Riak cookie to use', default: 'riak'
25
-
26
- opt :get_50_warning, 'FSM 50% get time warning threshold (ms)', default: 1000
27
- opt :put_50_warning, 'FSM 50% put time warning threshold (ms)', default: 1000
28
- opt :get_95_warning, 'FSM 95% get time warning threshold (ms)', default: 2000
29
- opt :put_95_warning, 'FSM 95% put time warning threshold (ms)', default: 2000
30
- opt :get_99_warning, 'FSM 99% get time warning threshold (ms)', default: 10_000
31
- opt :put_99_warning, 'FSM 99% put time warning threshold (ms)', default: 10_000
32
-
33
- def initialize
34
- detect_features
35
-
36
- @httpstatus = true
37
-
38
- begin
39
- uri = URI.parse(opts[:riak_host])
40
- uri.host = opts[:riak_host] if uri.host.nil?
41
- http = Net::HTTP.new(uri.host, opts[:stats_port])
42
- http.use_ssl = uri.scheme == 'https'
43
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
44
- http.start do |h|
45
- h.get opts[:stats_path]
46
- end
47
- rescue StandardError => _e
48
- @httpstatus = false
49
- end
50
-
51
- # we're going to override the emulator setting to allow users to
52
- # dynamically input the cookie
53
- # this is done only once - hopefully it doesn't get overridden.
54
- ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
55
- end
56
-
57
- # Identifies whether escript and riak-admin are installed
58
- def detect_features
59
- @escript = true # Whether escript is present on this machine
60
- @riakadmin = true # Whether riak-admin is present
61
-
62
- @escript = false if `which escript` =~ /^\s*$/
63
-
64
- @riakadmin = false if `which riak-admin` =~ /^\s*$/
65
- end
66
-
67
- def check_ring
68
- str = if @escript
69
- `#{__dir__}/riemann-riak-ring #{opts[:node_name]}`.chomp
70
- elsif @riakadmin
71
- `riak-admin ringready`
72
- end
73
-
74
- return if str.nil?
75
-
76
- if str =~ /^TRUE/
77
- report(
78
- host: opts[:riak_host],
79
- service: 'riak ring',
80
- state: 'ok',
81
- description: str,
82
- )
83
- else
84
- report(
85
- host: opts[:riak_host],
86
- service: 'riak ring',
87
- state: 'warning',
88
- description: str,
89
- )
90
- end
91
- end
92
-
93
- def check_keys
94
- keys = `#{__dir__}/riemann-riak-keys #{opts[:node_name]}`.chomp
95
- if keys =~ /^\d+$/
96
- report(
97
- host: opts[:riak_host],
98
- service: 'riak keys',
99
- state: 'ok',
100
- metric: keys.to_i,
101
- description: keys,
102
- )
103
- else
104
- report(
105
- host: opts[:riak_host],
106
- service: 'riak keys',
107
- state: 'unknown',
108
- description: keys,
109
- )
110
- end
111
- end
112
-
113
- def check_transfers
114
- str = (`riak-admin transfers` if @riakadmin)
115
-
116
- return if str.nil?
117
-
118
- if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
119
- report(
120
- host: opts[:riak_host],
121
- service: 'riak transfers',
122
- state: 'critical',
123
- metric: Regexp.last_match(1).to_i,
124
- description: "waiting to handoff #{Regexp.last_match(1)} partitions",
125
- )
126
- else
127
- report(
128
- host: opts[:riak_host],
129
- service: 'riak transfers',
130
- state: 'ok',
131
- metric: 0,
132
- description: 'No pending transfers',
133
- )
134
- end
135
- end
136
-
137
- def check_disk
138
- gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
139
- report(
140
- host: opts[:riak_host],
141
- service: 'riak disk',
142
- state: 'ok',
143
- metric: gb,
144
- description: "#{gb} GB in #{opts[:data_dir]}",
145
- )
146
- end
147
-
148
- # Returns the riak stat for the given fsm type and percentile.
149
- def fsm_stat(type, property, percentile)
150
- "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
151
- end
152
-
153
- # Returns the alerts state for the given fsm.
154
- def fsm_state(type, percentile, val)
155
- limit = opts["#{type}_#{percentile}_warning".to_sym]
156
- case val
157
- when 0..limit
158
- 'ok'
159
- when limit..limit * 2
160
- 'warning'
161
- else
162
- 'critical'
163
- end
164
- end
165
-
166
- # Get current stats via HTTP
167
- def stats_http
168
- begin
169
- uri = URI.parse(opts[:riak_host])
170
- uri.host = opts[:riak_host] if uri.host.nil?
171
- http = Net::HTTP.new(uri.host, opts[:stats_port])
172
- http.use_ssl = uri.scheme == 'https'
173
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
174
- res = http.start do |h|
175
- h.get opts[:stats_path]
176
- end
177
- rescue StandardError => e
178
- report(
179
- host: opts[:riak_host],
180
- service: 'riak',
181
- state: 'critical',
182
- description: "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}",
183
- )
184
- raise
185
- end
186
-
187
- if res.code.to_i == 200
188
- JSON.parse(res.body)
189
- else
190
- report(
191
- host: opts[:riak_host],
192
- service: 'riak',
193
- state: 'critical',
194
- description: "stats returned HTTP #{res.code}:\n\n#{res.body}",
195
- )
196
- raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
197
- end
198
- end
199
-
200
- # Get current stats via riak-admin
201
- def stats_riak_admin
202
- str = `riak-admin status`
203
- raise 'riak-admin failed' unless $CHILD_STATUS == 0
204
-
205
- Hash[str.split(/\n/).map { |i| i.split(/ : /) }]
206
- end
207
-
208
- # Get current stats as a hash
209
- def stats
210
- if @httpstatus
211
- stats_http
212
- elsif @riakadmin
213
- stats_riak_admin
214
- else
215
- report(
216
- host: opts[:riak_host],
217
- service: 'riak',
218
- state: 'critical',
219
- description: 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.',
220
- )
221
- raise 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.'
222
- end
223
- end
224
-
225
- def core_services
226
- %w[vnode_gets
227
- vnode_puts
228
- node_gets
229
- node_puts
230
- node_gets_set
231
- node_puts_set
232
- read_repairs]
233
- end
234
-
235
- def fsm_types
236
- [{ 'get' => 'time' }, { 'put' => 'time' },
237
- { 'get' => 'set_objsize' },]
238
- end
239
-
240
- def fsm_percentiles
241
- [50, 95, 99]
242
- end
243
-
244
- # Reports current stats to Riemann
245
- def check_stats
246
- begin
247
- stats = self.stats
248
- rescue StandardError => e
249
- event = {
250
- state: 'critical',
251
- description: e.message,
252
- host: opts[:riak_host],
253
- }
254
- # Report errors
255
- report(event.merge(service: 'riak'))
256
- core_services.each do |s|
257
- report(event.merge(service: "riak #{s}"))
258
- end
259
- fsm_types.each do |typespec|
260
- typespec.each do |type, prop|
261
- fsm_percentiles.each do |percentile|
262
- report(event.merge(service: "riak #{type} #{prop} #{percentile}"))
263
- end
264
- end
265
- end
266
- return
267
- end
268
-
269
- # Riak itself
270
- report(
271
- host: opts[:riak_host],
272
- service: 'riak',
273
- state: 'ok',
274
- )
275
-
276
- # Gets/puts/rr
277
- core_services.each do |s|
278
- report(
279
- host: opts[:riak_host],
280
- service: "riak #{s}",
281
- state: 'ok',
282
- metric: stats[s].to_i / 60.0,
283
- description: "#{stats[s].to_i / 60.0}/sec",
284
- )
285
- end
286
-
287
- # FSMs
288
- fsm_types.each do |typespec|
289
- typespec.each do |type, prop|
290
- fsm_percentiles.each do |percentile|
291
- val = stats[fsm_stat(type, prop, percentile)].to_i || 0
292
- val = 0 if val == 'undefined'
293
- val /= 1000.0 if prop == 'time' # Convert us to ms
294
- state = if prop == 'time'
295
- fsm_state(type, percentile, val)
296
- else
297
- 'ok'
298
- end
299
- report(
300
- host: opts[:riak_host],
301
- service: "riak #{type} #{prop} #{percentile}",
302
- state: state,
303
- metric: val,
304
- description: "#{val} ms",
305
- )
306
- end
307
- end
308
- end
309
- end
310
-
311
- def tick
312
- # This can utterly destroy a cluster, so we disable
313
- # check_keys
314
- check_stats
315
- check_ring
316
- check_disk
317
- check_transfers
318
- end
319
- end
320
- end
321
- end
6
+ require 'riemann/tools/riak'
322
7
 
323
8
  Riemann::Tools::Riak.run
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env escript
2
- Process.setproctitle($0)
3
2
  %%! -name riakstatuscheck@127.0.0.1 -hidden
4
3
 
5
4
  main([]) -> main(["riak@127.0.0.1"]);
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env escript
2
- Process.setproctitle($0)
3
2
  %%! -name riakstatuscheck@127.0.0.1 -hidden
4
3
 
5
4
  main([]) -> main(["riak@127.0.0.1"]);
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'English'
4
+ require 'riemann/tools'
5
+
6
+ # Forwards information on a Riak node to Riemann.
7
+ module Riemann
8
+ module Tools
9
+ class Riak
10
+ include Riemann::Tools
11
+ require 'net/http'
12
+ require 'net/https'
13
+ require 'yajl/json_gem'
14
+
15
+ opt :riak_host, 'Riak host for stats <IP> or SSL http(s)://<IP>', default: Socket.gethostname
16
+ opt :data_dir, 'Riak data directory', default: '/var/lib/riak'
17
+ opt :stats_port, 'Riak HTTP port for stats', default: 8098
18
+ opt :stats_path, 'Riak HTTP stats path', default: '/stats'
19
+ opt :node_name, 'Riak erlang node name', default: "riak@#{Socket.gethostname}"
20
+ opt :cookie, 'Riak cookie to use', default: 'riak'
21
+
22
+ opt :get_50_warning, 'FSM 50% get time warning threshold (ms)', default: 1000
23
+ opt :put_50_warning, 'FSM 50% put time warning threshold (ms)', default: 1000
24
+ opt :get_95_warning, 'FSM 95% get time warning threshold (ms)', default: 2000
25
+ opt :put_95_warning, 'FSM 95% put time warning threshold (ms)', default: 2000
26
+ opt :get_99_warning, 'FSM 99% get time warning threshold (ms)', default: 10_000
27
+ opt :put_99_warning, 'FSM 99% put time warning threshold (ms)', default: 10_000
28
+
29
+ def initialize
30
+ detect_features
31
+
32
+ @httpstatus = true
33
+
34
+ begin
35
+ uri = URI.parse(opts[:riak_host])
36
+ uri.host = opts[:riak_host] if uri.host.nil?
37
+ http = ::Net::HTTP.new(uri.host, opts[:stats_port])
38
+ http.use_ssl = uri.scheme == 'https'
39
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
40
+ http.start do |h|
41
+ h.get opts[:stats_path]
42
+ end
43
+ rescue StandardError => _e
44
+ @httpstatus = false
45
+ end
46
+
47
+ # we're going to override the emulator setting to allow users to
48
+ # dynamically input the cookie
49
+ # this is done only once - hopefully it doesn't get overridden.
50
+ ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
51
+ end
52
+
53
+ # Identifies whether escript and riak-admin are installed
54
+ def detect_features
55
+ @escript = true # Whether escript is present on this machine
56
+ @riakadmin = true # Whether riak-admin is present
57
+
58
+ @escript = false if `which escript` =~ /^\s*$/
59
+
60
+ @riakadmin = false if `which riak-admin` =~ /^\s*$/
61
+ end
62
+
63
+ def check_ring
64
+ str = if @escript
65
+ `riemann-riak-ring #{opts[:node_name]}`.chomp
66
+ elsif @riakadmin
67
+ `riak-admin ringready`
68
+ end
69
+
70
+ return if str.nil?
71
+
72
+ if str =~ /^TRUE/
73
+ report(
74
+ host: opts[:riak_host],
75
+ service: 'riak ring',
76
+ state: 'ok',
77
+ description: str,
78
+ )
79
+ else
80
+ report(
81
+ host: opts[:riak_host],
82
+ service: 'riak ring',
83
+ state: 'warning',
84
+ description: str,
85
+ )
86
+ end
87
+ end
88
+
89
+ def check_keys
90
+ keys = `riemann-riak-keys #{opts[:node_name]}`.chomp
91
+ if keys =~ /^\d+$/
92
+ report(
93
+ host: opts[:riak_host],
94
+ service: 'riak keys',
95
+ state: 'ok',
96
+ metric: keys.to_i,
97
+ description: keys,
98
+ )
99
+ else
100
+ report(
101
+ host: opts[:riak_host],
102
+ service: 'riak keys',
103
+ state: 'unknown',
104
+ description: keys,
105
+ )
106
+ end
107
+ end
108
+
109
+ def check_transfers
110
+ str = (`riak-admin transfers` if @riakadmin)
111
+
112
+ return if str.nil?
113
+
114
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
115
+ report(
116
+ host: opts[:riak_host],
117
+ service: 'riak transfers',
118
+ state: 'critical',
119
+ metric: Regexp.last_match(1).to_i,
120
+ description: "waiting to handoff #{Regexp.last_match(1)} partitions",
121
+ )
122
+ else
123
+ report(
124
+ host: opts[:riak_host],
125
+ service: 'riak transfers',
126
+ state: 'ok',
127
+ metric: 0,
128
+ description: 'No pending transfers',
129
+ )
130
+ end
131
+ end
132
+
133
+ def check_disk
134
+ gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
135
+ report(
136
+ host: opts[:riak_host],
137
+ service: 'riak disk',
138
+ state: 'ok',
139
+ metric: gb,
140
+ description: "#{gb} GB in #{opts[:data_dir]}",
141
+ )
142
+ end
143
+
144
+ # Returns the riak stat for the given fsm type and percentile.
145
+ def fsm_stat(type, property, percentile)
146
+ "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
147
+ end
148
+
149
+ # Returns the alerts state for the given fsm.
150
+ def fsm_state(type, percentile, val)
151
+ limit = opts["#{type}_#{percentile}_warning".to_sym]
152
+ case val
153
+ when 0..limit
154
+ 'ok'
155
+ when limit..limit * 2
156
+ 'warning'
157
+ else
158
+ 'critical'
159
+ end
160
+ end
161
+
162
+ # Get current stats via HTTP
163
+ def stats_http
164
+ begin
165
+ uri = URI.parse(opts[:riak_host])
166
+ uri.host = opts[:riak_host] if uri.host.nil?
167
+ http = ::Net::HTTP.new(uri.host, opts[:stats_port])
168
+ http.use_ssl = uri.scheme == 'https'
169
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
170
+ res = http.start do |h|
171
+ h.get opts[:stats_path]
172
+ end
173
+ rescue StandardError => e
174
+ report(
175
+ host: opts[:riak_host],
176
+ service: 'riak',
177
+ state: 'critical',
178
+ description: "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}",
179
+ )
180
+ raise
181
+ end
182
+
183
+ if res.code.to_i == 200
184
+ JSON.parse(res.body)
185
+ else
186
+ report(
187
+ host: opts[:riak_host],
188
+ service: 'riak',
189
+ state: 'critical',
190
+ description: "stats returned HTTP #{res.code}:\n\n#{res.body}",
191
+ )
192
+ raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
193
+ end
194
+ end
195
+
196
+ # Get current stats via riak-admin
197
+ def stats_riak_admin
198
+ str = `riak-admin status`
199
+ raise 'riak-admin failed' unless $CHILD_STATUS == 0
200
+
201
+ Hash[str.split(/\n/).map { |i| i.split(/ : /) }]
202
+ end
203
+
204
+ # Get current stats as a hash
205
+ def stats
206
+ if @httpstatus
207
+ stats_http
208
+ elsif @riakadmin
209
+ stats_riak_admin
210
+ else
211
+ report(
212
+ host: opts[:riak_host],
213
+ service: 'riak',
214
+ state: 'critical',
215
+ description: 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.',
216
+ )
217
+ raise 'No mechanism for fetching Riak stats: neither HTTP nor riak-admin available.'
218
+ end
219
+ end
220
+
221
+ def core_services
222
+ %w[vnode_gets
223
+ vnode_puts
224
+ node_gets
225
+ node_puts
226
+ node_gets_set
227
+ node_puts_set
228
+ read_repairs]
229
+ end
230
+
231
+ def fsm_types
232
+ [{ 'get' => 'time' }, { 'put' => 'time' },
233
+ { 'get' => 'set_objsize' },]
234
+ end
235
+
236
+ def fsm_percentiles
237
+ [50, 95, 99]
238
+ end
239
+
240
+ # Reports current stats to Riemann
241
+ def check_stats
242
+ begin
243
+ stats = self.stats
244
+ rescue StandardError => e
245
+ event = {
246
+ state: 'critical',
247
+ description: e.message,
248
+ host: opts[:riak_host],
249
+ }
250
+ # Report errors
251
+ report(event.merge(service: 'riak'))
252
+ core_services.each do |s|
253
+ report(event.merge(service: "riak #{s}"))
254
+ end
255
+ fsm_types.each do |typespec|
256
+ typespec.each do |type, prop|
257
+ fsm_percentiles.each do |percentile|
258
+ report(event.merge(service: "riak #{type} #{prop} #{percentile}"))
259
+ end
260
+ end
261
+ end
262
+ return
263
+ end
264
+
265
+ # Riak itself
266
+ report(
267
+ host: opts[:riak_host],
268
+ service: 'riak',
269
+ state: 'ok',
270
+ )
271
+
272
+ # Gets/puts/rr
273
+ core_services.each do |s|
274
+ report(
275
+ host: opts[:riak_host],
276
+ service: "riak #{s}",
277
+ state: 'ok',
278
+ metric: stats[s].to_i / 60.0,
279
+ description: "#{stats[s].to_i / 60.0}/sec",
280
+ )
281
+ end
282
+
283
+ # FSMs
284
+ fsm_types.each do |typespec|
285
+ typespec.each do |type, prop|
286
+ fsm_percentiles.each do |percentile|
287
+ val = stats[fsm_stat(type, prop, percentile)].to_i || 0
288
+ val = 0 if val == 'undefined'
289
+ val /= 1000.0 if prop == 'time' # Convert us to ms
290
+ state = if prop == 'time'
291
+ fsm_state(type, percentile, val)
292
+ else
293
+ 'ok'
294
+ end
295
+ report(
296
+ host: opts[:riak_host],
297
+ service: "riak #{type} #{prop} #{percentile}",
298
+ state: state,
299
+ metric: val,
300
+ description: "#{val} ms",
301
+ )
302
+ end
303
+ end
304
+ end
305
+ end
306
+
307
+ def tick
308
+ # This can utterly destroy a cluster, so we disable
309
+ # check_keys
310
+ check_stats
311
+ check_ring
312
+ check_disk
313
+ check_transfers
314
+ end
315
+ end
316
+ end
317
+ end