riemann-tools-dgvz 0.2.2.1 → 0.2.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/riemann-rabbitmq +154 -7
  2. data/bin/riemann-riak +32 -3
  3. metadata +1 -1
data/bin/riemann-rabbitmq CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
3
 
4
- require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+ require 'riemann/tools'
5
5
 
6
6
  class Riemann::Tools::Rabbitmq
7
7
  include Riemann::Tools
@@ -16,11 +16,28 @@ class Riemann::Tools::Rabbitmq
16
16
 
17
17
  opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
18
  opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
- opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
20
- opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
19
+ opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15672
20
+ opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: "localhost"
21
21
 
22
- def monitor_url
23
- "http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
22
+ opt :max_queue_size, "max number of items in a queue that is acceptable", type: :int, default: 1_000_000
23
+ opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
24
+
25
+ opt :node, "Specify a node to monitor", type: :strings
26
+
27
+ def base_url
28
+ "http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
29
+ end
30
+
31
+ def overview_url
32
+ "#{base_url}/overview"
33
+ end
34
+
35
+ def node_url(n)
36
+ "#{base_url}/nodes/#{n}"
37
+ end
38
+
39
+ def queues_url
40
+ "#{base_url}/queues"
24
41
  end
25
42
 
26
43
  def event_host
@@ -50,8 +67,86 @@ class Riemann::Tools::Rabbitmq
50
67
  response
51
68
  end
52
69
 
53
- def tick
54
- uri = URI(monitor_url)
70
+ def check_queues
71
+ response = safe_get(queues_url, event_host)
72
+ max_size_check_filter = if options[:ignore_max_size_queues]
73
+ Regexp.new(options[:ignore_max_size_queues])
74
+ else
75
+ nil
76
+ end
77
+
78
+ return if response.nil?
79
+
80
+ json = JSON.parse(response.body)
81
+
82
+ if response.status != 200
83
+ report(:host => event_host,
84
+ :service => "rabbitmq.queue",
85
+ :state => "critical",
86
+ :description => "HTTP connection error to /api/queues: #{response.status} - #{response.body}"
87
+ )
88
+ else
89
+ report(:host => event_host,
90
+ :service => "rabbitmq.queue",
91
+ :state => "ok",
92
+ :description => "HTTP connection ok"
93
+ )
94
+
95
+ json = JSON.parse(response.body)
96
+
97
+ json.each do |queue|
98
+ svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
99
+ errs = []
100
+
101
+ if queue['messages_ready'] > 0 and queue['consumers'] == 0
102
+ errs << "Queue has jobs but no consumers"
103
+ end
104
+
105
+ if (max_size_check_filter.nil? or queue['name'] !~ max_size_check_filter) and queue['messages_ready'] > options[:max_queue_size]
106
+ errs << "Queue has #{queue['messages_ready']} jobs"
107
+ end
108
+
109
+ unless errs.empty?
110
+ report(:host => event_host,
111
+ :service => svc,
112
+ :state => "critical",
113
+ :description => errs.join("; ")
114
+ )
115
+ end
116
+
117
+ stats = (queue['message_stats'] || {}).merge(
118
+ 'messages' => queue['messages'],
119
+ 'messages_details' => queue['messages_details'],
120
+ 'messages_ready' => queue['messages_ready'],
121
+ 'messages_ready_details' => queue['messages_ready_details'],
122
+ 'messages_unacknowledged' => queue['messages_unacknowledged'],
123
+ 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
124
+ 'consumers' => queue['consumers'],
125
+ 'memory' => queue['memory'],
126
+ )
127
+
128
+ stats.each_pair do |k,v|
129
+ service = "#{svc}.#{k}"
130
+ if k =~ /details$/
131
+ metric = v['rate']
132
+ else
133
+ metric = v
134
+ end
135
+
136
+ # TODO: Set state via thresholds which can be configured
137
+
138
+ report(:host => event_host,
139
+ :service => service,
140
+ :metric => metric,
141
+ :description => "RabbitMQ monitor"
142
+ )
143
+ end
144
+ end
145
+ end
146
+ end
147
+
148
+ def check_overview
149
+ uri = URI(overview_url)
55
150
  response = safe_get(uri, event_host)
56
151
 
57
152
  return if response.nil?
@@ -95,5 +190,57 @@ class Riemann::Tools::Rabbitmq
95
190
  end
96
191
  end
97
192
  end
193
+
194
+ def check_node
195
+ opts[:node].each do |n|
196
+ uri = URI(node_url(n))
197
+ response = safe_get(uri, event_host)
198
+
199
+ return if response.nil?
200
+
201
+ if response.status != 200
202
+ if response.status == 404
203
+ report(:host => event_host,
204
+ :service => "rabbitmq.node.#{n}",
205
+ :state => "critical",
206
+ :description => "Node was not found in the cluster"
207
+ )
208
+ else
209
+ report(:host => event_host,
210
+ :service => "rabbitmq.node.#{n}",
211
+ :state => "critical",
212
+ :description => "HTTP error: #{response.status} - #{response.body}"
213
+ )
214
+ end
215
+ return
216
+ end
217
+
218
+ json = JSON.parse(response.body)
219
+
220
+ if json['mem_alarm']
221
+ report(:host => event_host,
222
+ :service => "rabbitmq.node.#{n}",
223
+ :state => "critical",
224
+ :description => "Memory alarm has triggered; job submission throttled"
225
+ )
226
+ return
227
+ end
228
+
229
+ if json['disk_free_alarm']
230
+ report(:host => event_host,
231
+ :service => "rabbitmq.node.#{n}",
232
+ :state => "critical",
233
+ :description => "Disk free alarm has triggered; job submission throttled"
234
+ )
235
+ return
236
+ end
237
+ end
238
+ end
239
+
240
+ def tick
241
+ check_overview
242
+ check_node if opts[:node]
243
+ check_queues
244
+ end
98
245
  end
99
246
  Riemann::Tools::Rabbitmq.run
data/bin/riemann-riak CHANGED
@@ -27,8 +27,8 @@ class Riemann::Tools::Riak
27
27
 
28
28
  def initialize
29
29
  detect_features
30
-
31
- @httpstatus = true
30
+
31
+ @httpstatus = true
32
32
  # What's going on here? --aphyr
33
33
  if
34
34
  begin
@@ -59,7 +59,7 @@ class Riemann::Tools::Riak
59
59
  def detect_features
60
60
  @escript = true # Whether escript is present on this machine
61
61
  @riakadmin = true # Whether riak-admin is present
62
-
62
+
63
63
  if `which escript` =~ /^\s*$/
64
64
  @escript = false
65
65
  end
@@ -113,6 +113,34 @@ class Riemann::Tools::Riak
113
113
  end
114
114
  end
115
115
 
116
+ def check_transfers
117
+ str = if @riakadmin
118
+ `riak-admin transfers`
119
+ else
120
+ nil
121
+ end
122
+
123
+ return if str.nil?
124
+
125
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
126
+ report(
127
+ :host => opts[:riak_host],
128
+ :service => 'riak transfers',
129
+ :state => 'critical',
130
+ :metric => $1.to_i,
131
+ :description => "waiting to handoff #{$1} partitions"
132
+ )
133
+ else
134
+ report(
135
+ :host => opts[:riak_host],
136
+ :service => 'riak transfers',
137
+ :state => 'ok',
138
+ :metric => 0,
139
+ :description => "No pending transfers"
140
+ )
141
+ end
142
+ end
143
+
116
144
  def check_disk
117
145
  gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
118
146
  report(
@@ -294,6 +322,7 @@ class Riemann::Tools::Riak
294
322
  check_stats
295
323
  check_ring
296
324
  check_disk
325
+ check_transfers
297
326
  end
298
327
  end
299
328
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: riemann-tools-dgvz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2.1
4
+ version: 0.2.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: