riemann-tools 0.2.14 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +5 -5
  2. data/.docker/Dockerfile +7 -0
  3. data/.docker/publish.sh +35 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +6 -0
  6. data/.rspec +2 -0
  7. data/.travis.yml +31 -0
  8. data/CHANGELOG.md +393 -0
  9. data/Gemfile +6 -0
  10. data/ISSUE_TEMPLATE.md +15 -0
  11. data/README.markdown +14 -1
  12. data/Rakefile +21 -0
  13. data/bin/riemann-apache-status +1 -0
  14. data/bin/riemann-bench +1 -0
  15. data/bin/riemann-cloudant +1 -0
  16. data/bin/riemann-consul +1 -0
  17. data/bin/riemann-dir-files-count +1 -0
  18. data/bin/riemann-dir-space +1 -0
  19. data/bin/riemann-diskstats +1 -0
  20. data/bin/riemann-fd +1 -0
  21. data/bin/riemann-freeswitch +1 -0
  22. data/bin/riemann-haproxy +1 -0
  23. data/bin/riemann-health +19 -11
  24. data/bin/riemann-kvminstance +1 -0
  25. data/bin/riemann-memcached +1 -0
  26. data/bin/riemann-net +1 -0
  27. data/bin/riemann-nginx-status +1 -0
  28. data/bin/riemann-ntp +1 -0
  29. data/bin/riemann-portcheck +1 -0
  30. data/bin/riemann-proc +1 -0
  31. data/bin/riemann-varnish +1 -0
  32. data/bin/riemann-zookeeper +1 -0
  33. data/lib/riemann/tools/utils.rb +17 -0
  34. data/lib/riemann/tools/version.rb +7 -0
  35. data/lib/riemann/tools.rb +12 -2
  36. data/riemann-tools.gemspec +39 -0
  37. data/tools/riemann-aws/LICENSE +21 -0
  38. data/tools/riemann-aws/README.md +54 -0
  39. data/tools/riemann-aws/Rakefile.rb +35 -0
  40. data/tools/riemann-aws/bin/riemann-aws-billing +87 -0
  41. data/tools/riemann-aws/bin/riemann-aws-rds-status +54 -0
  42. data/tools/riemann-aws/bin/riemann-aws-sqs-status +44 -0
  43. data/tools/riemann-aws/bin/riemann-aws-status +71 -0
  44. data/tools/riemann-aws/bin/riemann-elb-metrics +167 -0
  45. data/tools/riemann-aws/bin/riemann-s3-list +82 -0
  46. data/tools/riemann-aws/bin/riemann-s3-status +99 -0
  47. data/tools/riemann-chronos/LICENSE +21 -0
  48. data/tools/riemann-chronos/README.md +10 -0
  49. data/tools/riemann-chronos/Rakefile.rb +35 -0
  50. data/tools/riemann-chronos/bin/riemann-chronos +144 -0
  51. data/tools/riemann-docker/LICENSE +21 -0
  52. data/tools/riemann-docker/README.md +10 -0
  53. data/tools/riemann-docker/Rakefile.rb +34 -0
  54. data/tools/riemann-docker/bin/riemann-docker +217 -0
  55. data/tools/riemann-elasticsearch/LICENSE +21 -0
  56. data/tools/riemann-elasticsearch/README.md +10 -0
  57. data/tools/riemann-elasticsearch/Rakefile.rb +35 -0
  58. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +166 -0
  59. data/tools/riemann-marathon/LICENSE +21 -0
  60. data/tools/riemann-marathon/README.md +10 -0
  61. data/tools/riemann-marathon/Rakefile.rb +35 -0
  62. data/tools/riemann-marathon/bin/riemann-marathon +147 -0
  63. data/tools/riemann-mesos/LICENSE +21 -0
  64. data/tools/riemann-mesos/README.md +10 -0
  65. data/tools/riemann-mesos/Rakefile.rb +35 -0
  66. data/tools/riemann-mesos/bin/riemann-mesos +131 -0
  67. data/tools/riemann-munin/LICENSE +21 -0
  68. data/tools/riemann-munin/README.md +10 -0
  69. data/tools/riemann-munin/Rakefile.rb +34 -0
  70. data/tools/riemann-munin/bin/riemann-munin +37 -0
  71. data/tools/riemann-rabbitmq/LICENSE +21 -0
  72. data/tools/riemann-rabbitmq/README.md +10 -0
  73. data/tools/riemann-rabbitmq/Rakefile.rb +35 -0
  74. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +269 -0
  75. data/tools/riemann-riak/LICENSE +21 -0
  76. data/tools/riemann-riak/README.md +10 -0
  77. data/tools/riemann-riak/Rakefile.rb +34 -0
  78. data/tools/riemann-riak/bin/riemann-riak +331 -0
  79. data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
  80. data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
  81. data/tools/riemann-riak/riak_status/key_count.erl +13 -0
  82. data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
  83. data/tools/riemann-riak/riak_status/ringready.erl +9 -0
  84. metadata +134 -34
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env ruby
2
+ Process.setproctitle($0)
3
+
4
+ require 'riemann/tools'
5
+
6
+ class Riemann::Tools::Rabbitmq
7
+ include Riemann::Tools
8
+
9
+ require 'faraday'
10
+ require 'json'
11
+ require 'uri'
12
+
13
+
14
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
15
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
16
+
17
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
+ opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15672
20
+ opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: "localhost"
21
+ opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
22
+
23
+ opt :max_queue_size, "max number of items in a queue that is acceptable", type: :int, default: 1_000_000
24
+ opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
25
+
26
+ opt :node, "Specify a node to monitor", type: :strings
27
+
28
+ def base_url
29
+ protocol = "http"
30
+ if (options[:monitor_use_tls]) && (options[:monitor_use_tls]==true)
31
+ protocol = "https"
32
+ end
33
+ "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
34
+ end
35
+
36
+ def overview_url
37
+ "#{base_url}/overview"
38
+ end
39
+
40
+ def node_url(n)
41
+ "#{base_url}/nodes/#{n}"
42
+ end
43
+
44
+ def queues_url
45
+ "#{base_url}/queues"
46
+ end
47
+
48
+ def event_host
49
+ if options[:event_host]
50
+ return options[:event_host]
51
+ else
52
+ return options[:monitor_host]
53
+ end
54
+ end
55
+
56
+ def safe_get(uri, event_host)
57
+ # Handle connection timeouts
58
+ response = nil
59
+ begin
60
+ connection = Faraday.new(uri)
61
+ response = connection.get do |req|
62
+ req.options[:timeout] = options[:read_timeout]
63
+ req.options[:open_timeout] = options[:open_timeout]
64
+ end
65
+ report(:host => event_host,
66
+ :service => "rabbitmq monitoring",
67
+ :state => 'ok',
68
+ :description => "Monitoring operational"
69
+ )
70
+ rescue => e
71
+ report(:host => event_host,
72
+ :service => "rabbitmq monitoring",
73
+ :state => "critical",
74
+ :description => "HTTP connection error: #{e.class} - #{e.message}"
75
+ )
76
+ end
77
+ response
78
+ end
79
+
80
+ def check_queues
81
+ response = safe_get(queues_url, event_host)
82
+ max_size_check_filter = if options[:ignore_max_size_queues]
83
+ Regexp.new(options[:ignore_max_size_queues])
84
+ else
85
+ nil
86
+ end
87
+
88
+ return if response.nil?
89
+
90
+ json = JSON.parse(response.body)
91
+
92
+ if response.status != 200
93
+ report(:host => event_host,
94
+ :service => "rabbitmq.queue",
95
+ :state => "critical",
96
+ :description => "HTTP connection error to /api/queues: #{response.status} - #{response.body}"
97
+ )
98
+ else
99
+ report(:host => event_host,
100
+ :service => "rabbitmq.queue",
101
+ :state => "ok",
102
+ :description => "HTTP connection ok"
103
+ )
104
+
105
+ json = JSON.parse(response.body)
106
+
107
+ json.each do |queue|
108
+ svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
109
+ errs = []
110
+
111
+ if queue['messages_ready']!=nil and queue['messages_ready'] > 0 and queue['consumers'] == 0
112
+ errs << "Queue has jobs but no consumers"
113
+ end
114
+
115
+ if (max_size_check_filter.nil? or queue['name'] !~ max_size_check_filter) and queue['messages_ready']!=nil and queue['messages_ready'] > options[:max_queue_size]
116
+ errs << "Queue has #{queue['messages_ready']} jobs"
117
+ end
118
+
119
+ if errs.empty?
120
+ report(:host => event_host,
121
+ :service => svc,
122
+ :state => "ok",
123
+ :description => "Queue is looking good"
124
+ )
125
+ else
126
+ report(:host => event_host,
127
+ :service => svc,
128
+ :state => "critical",
129
+ :description => errs.join("; ")
130
+ )
131
+ end
132
+
133
+ stats = (queue['message_stats'] || {}).merge(
134
+ 'messages' => queue['messages'],
135
+ 'messages_details' => queue['messages_details'],
136
+ 'messages_ready' => queue['messages_ready'],
137
+ 'messages_ready_details' => queue['messages_ready_details'],
138
+ 'messages_unacknowledged' => queue['messages_unacknowledged'],
139
+ 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
140
+ 'consumers' => queue['consumers'],
141
+ 'memory' => queue['memory'],
142
+ )
143
+
144
+ stats.each_pair do |k,v|
145
+ service = "#{svc}.#{k}"
146
+ if k =~ /details$/ and v!=nil
147
+ metric = v['rate']
148
+ else
149
+ metric = v
150
+ end
151
+
152
+ # TODO: Set state via thresholds which can be configured
153
+
154
+ report(:host => event_host,
155
+ :service => service,
156
+ :metric => metric,
157
+ :description => "RabbitMQ monitor"
158
+ )
159
+ end
160
+ end
161
+ end
162
+ end
163
+
164
+ def check_overview
165
+ uri = URI(overview_url)
166
+ response = safe_get(uri, event_host)
167
+
168
+ return if response.nil?
169
+
170
+ json = JSON.parse(response.body)
171
+
172
+ if response.status != 200
173
+ report(:host => event_host,
174
+ :service => "rabbitmq",
175
+ :state => "critical",
176
+ :description => "HTTP connection error: #{response.status} - #{response.body}"
177
+ )
178
+ else
179
+ report(:host => event_host,
180
+ :service => "rabbitmq monitoring",
181
+ :state => "ok",
182
+ :description => "HTTP connection ok"
183
+ )
184
+
185
+ %w( message_stats queue_totals object_totals ).each do |stat|
186
+ # NOTE / BUG ?
187
+ # Brand new servers can have blank message stats. Is this ok?
188
+ # I can't decide.
189
+ next if json[stat].empty?
190
+ json[stat].each_pair do |k,v|
191
+ service = "rabbitmq.#{stat}.#{k}"
192
+ if k =~ /details$/
193
+ metric = v['rate']
194
+ else
195
+ metric = v
196
+ end
197
+
198
+ # TODO: Set state via thresholds which can be configured
199
+
200
+ report(:host => event_host,
201
+ :service => service,
202
+ :metric => metric,
203
+ :description => "RabbitMQ monitor"
204
+ )
205
+ end
206
+ end
207
+ end
208
+ end
209
+
210
+ def check_node
211
+ opts[:node].each do |n|
212
+ uri = URI(node_url(n))
213
+ response = safe_get(uri, event_host)
214
+
215
+ return if response.nil?
216
+
217
+ if response.status != 200
218
+ if response.status == 404
219
+ report(:host => event_host,
220
+ :service => "rabbitmq.node.#{n}",
221
+ :state => "critical",
222
+ :description => "Node was not found in the cluster"
223
+ )
224
+ else
225
+ report(:host => event_host,
226
+ :service => "rabbitmq.node.#{n}",
227
+ :state => "critical",
228
+ :description => "HTTP error: #{response.status} - #{response.body}"
229
+ )
230
+ end
231
+ return
232
+ end
233
+
234
+ json = JSON.parse(response.body)
235
+
236
+ if json['mem_alarm']
237
+ report(:host => event_host,
238
+ :service => "rabbitmq.node.#{n}",
239
+ :state => "critical",
240
+ :description => "Memory alarm has triggered; job submission throttled"
241
+ )
242
+ return
243
+ end
244
+
245
+ if json['disk_free_alarm']
246
+ report(:host => event_host,
247
+ :service => "rabbitmq.node.#{n}",
248
+ :state => "critical",
249
+ :description => "Disk free alarm has triggered; job submission throttled"
250
+ )
251
+ return
252
+ end
253
+
254
+ report(:host => event_host,
255
+ :service => "rabbitmq.node.#{n}",
256
+ :state => "ok",
257
+ :description => "Node looks OK to me"
258
+ )
259
+ end
260
+ end
261
+
262
+ def tick
263
+ check_overview
264
+ check_node if opts[:node]
265
+ check_queues
266
+ end
267
+ end
268
+ Riemann::Tools::Rabbitmq.run
269
+
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Riak
2
+
3
+ Gathers Riak statistics and submits them to Riemann.
4
+
5
+ ## Getting started
6
+
7
+ ```
8
+ gem install riemann-riak
9
+ riemann-riak --help
10
+ ```
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'rubygems/package_task'
3
+ require 'rdoc/task'
4
+ require 'find'
5
+
6
+ # Don't include resource forks in tarballs on Mac OS X.
7
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
8
+ ENV['COPYFILE_DISABLE'] = 'true'
9
+
10
+ # Gemspec
11
+ gemspec = Gem::Specification.new do |s|
12
+ s.rubyforge_project = 'riemann-riak'
13
+
14
+ s.name = 'riemann-riak'
15
+ s.version = '0.1.2'
16
+ s.author = 'Kyle Kingsbury'
17
+ s.email = 'aphyr@aphyr.com'
18
+ s.homepage = 'https://github.com/riemann/riemann-tools'
19
+ s.platform = Gem::Platform::RUBY
20
+ s.summary = 'Submits riak stats to riemann.'
21
+ s.license = 'MIT'
22
+
23
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
24
+ s.add_dependency 'yajl-ruby', '>= 1.1.0'
25
+
26
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
27
+ s.executables |= Dir.entries('bin/')
28
+ s.has_rdoc = false
29
+
30
+ s.required_ruby_version = '>= 1.8.7'
31
+ end
32
+
33
+ Gem::PackageTask.new gemspec do |p|
34
+ end
@@ -0,0 +1,331 @@
1
+ #!/usr/bin/env ruby
2
+ Process.setproctitle($0)
3
+
4
+ # Forwards information on a Riak node to Riemann.
5
+
6
+ require 'riemann/tools'
7
+
8
+ class Riemann::Tools::Riak
9
+ include Riemann::Tools
10
+ require 'net/http'
11
+ require 'net/https'
12
+ require 'yajl/json_gem'
13
+
14
+ opt :riak_host, "Riak host for stats <IP> or SSL http(s)://<IP>", :default => Socket.gethostname
15
+ opt :data_dir, "Riak data directory", :default => '/var/lib/riak'
16
+ opt :stats_port, "Riak HTTP port for stats", :default => 8098
17
+ opt :stats_path, "Riak HTTP stats path", :default => '/stats'
18
+ opt :node_name, "Riak erlang node name", :default => "riak@#{Socket.gethostname}"
19
+ opt :cookie, "Riak cookie to use", :default => "riak"
20
+
21
+ opt :get_50_warning, "FSM 50% get time warning threshold (ms)", :default => 1000
22
+ opt :put_50_warning, "FSM 50% put time warning threshold (ms)", :default => 1000
23
+ opt :get_95_warning, "FSM 95% get time warning threshold (ms)", :default => 2000
24
+ opt :put_95_warning, "FSM 95% put time warning threshold (ms)", :default => 2000
25
+ opt :get_99_warning, "FSM 99% get time warning threshold (ms)", :default => 10000
26
+ opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000
27
+
28
+ def initialize
29
+ detect_features
30
+
31
+ @httpstatus = true
32
+
33
+ begin
34
+ uri = URI.parse(opts[:riak_host])
35
+ if uri.host == nil
36
+ uri.host = opts[:riak_host]
37
+ end
38
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
39
+ http.use_ssl = uri.scheme == 'https'
40
+ if http.use_ssl?
41
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
42
+ end
43
+ http.start do |h|
44
+ h.get opts[:stats_path]
45
+ end
46
+ rescue => _e
47
+ @httpstatus = false
48
+ end
49
+
50
+ # we're going to override the emulator setting to allow users to
51
+ # dynamically input the cookie
52
+ # this is done only once - hopefully it doesn't get overridden.
53
+ ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
54
+ end
55
+
56
+ # Identifies whether escript and riak-admin are installed
57
+ def detect_features
58
+ @escript = true # Whether escript is present on this machine
59
+ @riakadmin = true # Whether riak-admin is present
60
+
61
+ if `which escript` =~ /^\s*$/
62
+ @escript = false
63
+ end
64
+
65
+ if `which riak-admin` =~ /^\s*$/
66
+ @riakadmin = false
67
+ end
68
+ end
69
+
70
+ def check_ring
71
+ str = if @escript
72
+ str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp
73
+ elsif @riakadmin
74
+ str = `riak-admin ringready`
75
+ else
76
+ nil
77
+ end
78
+
79
+ return if str.nil?
80
+
81
+ if str =~ /^TRUE/
82
+ report(
83
+ :host => opts[:riak_host],
84
+ :service => 'riak ring',
85
+ :state => 'ok',
86
+ :description => str
87
+ )
88
+ else
89
+ report(
90
+ :host => opts[:riak_host],
91
+ :service => 'riak ring',
92
+ :state => 'warning',
93
+ :description => str
94
+ )
95
+ end
96
+ end
97
+
98
+ def check_keys
99
+ keys = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-keys #{opts[:node_name]}`.chomp
100
+ if keys =~ /^\d+$/
101
+ report(
102
+ :host => opts[:riak_host],
103
+ :service => 'riak keys',
104
+ :state => 'ok',
105
+ :metric => keys.to_i,
106
+ :description => keys
107
+ )
108
+ else
109
+ report(
110
+ :host => opts[:riak_host],
111
+ :service => 'riak keys',
112
+ :state => 'unknown',
113
+ :description => keys
114
+ )
115
+ end
116
+ end
117
+
118
+ def check_transfers
119
+ str = if @riakadmin
120
+ `riak-admin transfers`
121
+ else
122
+ nil
123
+ end
124
+
125
+ return if str.nil?
126
+
127
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
128
+ report(
129
+ :host => opts[:riak_host],
130
+ :service => 'riak transfers',
131
+ :state => 'critical',
132
+ :metric => $1.to_i,
133
+ :description => "waiting to handoff #{$1} partitions"
134
+ )
135
+ else
136
+ report(
137
+ :host => opts[:riak_host],
138
+ :service => 'riak transfers',
139
+ :state => 'ok',
140
+ :metric => 0,
141
+ :description => "No pending transfers"
142
+ )
143
+ end
144
+ end
145
+
146
+ def check_disk
147
+ gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
148
+ report(
149
+ :host => opts[:riak_host],
150
+ :service => 'riak disk',
151
+ :state => 'ok',
152
+ :metric => gb,
153
+ :description => "#{gb} GB in #{opts[:data_dir]}"
154
+ )
155
+ end
156
+
157
+ # Returns the riak stat for the given fsm type and percentile.
158
+ def fsm_stat(type, property, percentile)
159
+ "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
160
+ end
161
+
162
+ # Returns the alerts state for the given fsm.
163
+ def fsm_state(type, percentile, val)
164
+ limit = opts["#{type}_#{percentile}_warning".to_sym]
165
+ case val
166
+ when 0 .. limit
167
+ 'ok'
168
+ when limit .. limit * 2
169
+ 'warning'
170
+ else
171
+ 'critical'
172
+ end
173
+ end
174
+
175
+ # Get current stats via HTTP
176
+ def stats_http
177
+ begin
178
+ uri = URI.parse(opts[:riak_host])
179
+ if uri.host == nil
180
+ uri.host = opts[:riak_host]
181
+ end
182
+ http = Net::HTTP.new(uri.host, opts[:stats_port])
183
+ http.use_ssl = uri.scheme == 'https'
184
+ if http.use_ssl?
185
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
186
+ end
187
+ res = http.start do |h|
188
+ h.get opts[:stats_path]
189
+ end
190
+ rescue => e
191
+ report(
192
+ :host => opts[:riak_host],
193
+ :service => 'riak',
194
+ :state => 'critical',
195
+ :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
196
+ )
197
+ raise
198
+ end
199
+
200
+ if res.code.to_i == 200
201
+ return JSON.parse(res.body)
202
+ else
203
+ report(
204
+ :host => opts[:riak_host],
205
+ :service => 'riak',
206
+ :state => 'critical',
207
+ :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
208
+ )
209
+ raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
210
+ end
211
+ end
212
+
213
+ # Get current stats via riak-admin
214
+ def stats_riak_admin
215
+ str = `riak-admin status`
216
+ raise "riak-admin failed" unless $? == 0
217
+ Hash[str.split(/\n/).map{|i| i.split(/ : /)}]
218
+ end
219
+
220
+ # Get current stats as a hash
221
+ def stats
222
+ if @httpstatus
223
+ stats_http
224
+ elsif @riakadmin
225
+ stats_riak_admin
226
+ else
227
+ report(
228
+ :host => opts[:riak_host],
229
+ :service => 'riak',
230
+ :state => 'critical',
231
+ :description => "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
232
+ )
233
+ raise "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
234
+ end
235
+ end
236
+
237
+ def core_services
238
+ ['vnode_gets',
239
+ 'vnode_puts',
240
+ 'node_gets',
241
+ 'node_puts',
242
+ 'node_gets_set',
243
+ 'node_puts_set',
244
+ 'read_repairs']
245
+ end
246
+
247
+ def fsm_types
248
+ [{'get' => 'time'}, {'put' => 'time'},
249
+ {'get' => 'set_objsize'}]
250
+ end
251
+
252
+ def fsm_percentiles
253
+ [50, 95, 99]
254
+ end
255
+
256
+ # Reports current stats to Riemann
257
+ def check_stats
258
+ begin
259
+ stats = self.stats
260
+ rescue => e
261
+ event = {:state => 'critical',
262
+ :description => e.message,
263
+ :host => opts[:riak_host]}
264
+ # Report errors
265
+ report(event.merge(:service => 'riak'))
266
+ core_services.each do |s|
267
+ report(event.merge(:service => "riak #{s}"))
268
+ end
269
+ fsm_types.each do |typespec|
270
+ typespec.each do |type, prop|
271
+ fsm_percentiles.each do |percentile|
272
+ report(event.merge(:service => "riak #{type} #{prop} #{percentile}"))
273
+ end
274
+ end
275
+ end
276
+ return
277
+ end
278
+
279
+ # Riak itself
280
+ report(
281
+ :host => opts[:riak_host],
282
+ :service => 'riak',
283
+ :state => 'ok'
284
+ )
285
+
286
+ # Gets/puts/rr
287
+ core_services.each do |s|
288
+ report(
289
+ :host => opts[:riak_host],
290
+ :service => "riak #{s}",
291
+ :state => 'ok',
292
+ :metric => stats[s].to_i/60.0,
293
+ :description => "#{stats[s].to_i/60.0}/sec"
294
+ )
295
+ end
296
+
297
+ # FSMs
298
+ fsm_types.each do |typespec|
299
+ typespec.each do |type, prop|
300
+ fsm_percentiles.each do |percentile|
301
+ val = stats[fsm_stat(type, prop, percentile)].to_i || 0
302
+ val = 0 if val == 'undefined'
303
+ val /= 1000.0 if prop == 'time' # Convert us to ms
304
+ if prop == 'time'
305
+ state = fsm_state(type, percentile, val)
306
+ else
307
+ state = "ok"
308
+ end
309
+ report(
310
+ :host => opts[:riak_host],
311
+ :service => "riak #{type} #{prop} #{percentile}",
312
+ :state => state,
313
+ :metric => val,
314
+ :description => "#{val} ms"
315
+ )
316
+ end
317
+ end
318
+ end
319
+ end
320
+
321
+ def tick
322
+ # This can utterly destroy a cluster, so we disable
323
+ # check_keys
324
+ check_stats
325
+ check_ring
326
+ check_disk
327
+ check_transfers
328
+ end
329
+ end
330
+
331
+ Riemann::Tools::Riak.run
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~w\n", [
8
+ lists:foldl(
9
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
10
+ 0,
11
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
12
+ )
13
+ ]).
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck@127.0.0.1 -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~p\n", [
8
+ rpc:call(list_to_atom(Node), riak_kv_console, ringready, [[]])
9
+ ]).
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env escript
2
+ Process.setproctitle($0)
3
+ %%! -name riakstatuscheck -setcookie riak -hidden
4
+
5
+ main([]) -> main(["riak@127.0.0.1"]);
6
+ main([Node]) ->
7
+ io:format("~w\n", [
8
+ lists:foldl(
9
+ fun({_VNode, Count}, Sum) -> Sum + Count end,
10
+ 0,
11
+ rpc:call(list_to_atom(Node), riak_kv_bitcask_backend, key_counts, [])
12
+ )
13
+ ]).