riemann-tools 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +13 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.rubocop.yml +32 -0
  6. data/CHANGELOG.md +31 -2
  7. data/README.markdown +8 -24
  8. data/Rakefile +4 -2
  9. data/SECURITY.md +42 -0
  10. data/bin/riemann-apache-status +92 -78
  11. data/bin/riemann-bench +54 -49
  12. data/bin/riemann-cloudant +44 -40
  13. data/bin/riemann-consul +82 -76
  14. data/bin/riemann-dir-files-count +53 -47
  15. data/bin/riemann-dir-space +53 -47
  16. data/bin/riemann-diskstats +78 -75
  17. data/bin/riemann-fd +68 -48
  18. data/bin/riemann-freeswitch +108 -103
  19. data/bin/riemann-haproxy +46 -40
  20. data/bin/riemann-health +4 -343
  21. data/bin/riemann-kvminstance +18 -13
  22. data/bin/riemann-memcached +35 -29
  23. data/bin/riemann-net +4 -104
  24. data/bin/riemann-nginx-status +74 -67
  25. data/bin/riemann-ntp +4 -33
  26. data/bin/riemann-portcheck +40 -31
  27. data/bin/riemann-proc +96 -90
  28. data/bin/riemann-varnish +51 -45
  29. data/bin/riemann-zookeeper +38 -34
  30. data/lib/riemann/tools/health.rb +347 -0
  31. data/lib/riemann/tools/net.rb +104 -0
  32. data/lib/riemann/tools/ntp.rb +41 -0
  33. data/lib/riemann/tools/version.rb +1 -1
  34. data/lib/riemann/tools.rb +37 -40
  35. data/riemann-tools.gemspec +4 -1
  36. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +2 -0
  37. data/tools/riemann-aws/bin/riemann-aws-billing +72 -66
  38. data/tools/riemann-aws/bin/riemann-aws-rds-status +55 -41
  39. data/tools/riemann-aws/bin/riemann-aws-sqs-status +37 -31
  40. data/tools/riemann-aws/bin/riemann-aws-status +63 -51
  41. data/tools/riemann-aws/bin/riemann-elb-metrics +149 -148
  42. data/tools/riemann-aws/bin/riemann-s3-list +70 -65
  43. data/tools/riemann-aws/bin/riemann-s3-status +85 -82
  44. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +2 -0
  45. data/tools/riemann-chronos/bin/riemann-chronos +136 -119
  46. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +2 -0
  47. data/tools/riemann-docker/bin/riemann-docker +163 -174
  48. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +2 -0
  49. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +155 -147
  50. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +2 -0
  51. data/tools/riemann-marathon/bin/riemann-marathon +138 -122
  52. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +2 -0
  53. data/tools/riemann-mesos/bin/riemann-mesos +125 -110
  54. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +2 -0
  55. data/tools/riemann-munin/bin/riemann-munin +28 -22
  56. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +2 -0
  57. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +226 -222
  58. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +2 -0
  59. data/tools/riemann-riak/bin/riemann-riak +281 -289
  60. data/tools/riemann-riak/riak_status/riak_status.rb +39 -39
  61. metadata +65 -16
@@ -1,269 +1,273 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- require 'riemann/tools'
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- class Riemann::Tools::Rabbitmq
7
- include Riemann::Tools
6
+ require 'riemann/tools'
8
7
 
9
- require 'faraday'
10
- require 'json'
11
- require 'uri'
8
+ module Riemann
9
+ module Tools
10
+ class Rabbitmq
11
+ include Riemann::Tools
12
12
 
13
+ require 'faraday'
14
+ require 'json'
15
+ require 'uri'
13
16
 
14
- opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
15
- opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
17
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
18
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
16
19
 
17
- opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
- opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
- opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15672
20
- opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: "localhost"
21
- opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
20
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
21
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
22
+ opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15_672
23
+ opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: 'localhost'
24
+ opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
22
25
 
23
- opt :max_queue_size, "max number of items in a queue that is acceptable", type: :int, default: 1_000_000
24
- opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
26
+ opt :max_queue_size, 'max number of items in a queue that is acceptable', type: :int, default: 1_000_000
27
+ opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
25
28
 
26
- opt :node, "Specify a node to monitor", type: :strings
29
+ opt :node, 'Specify a node to monitor', type: :strings
27
30
 
28
- def base_url
29
- protocol = "http"
30
- if (options[:monitor_use_tls]) && (options[:monitor_use_tls]==true)
31
- protocol = "https"
32
- end
33
- "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
34
- end
31
+ def base_url
32
+ protocol = 'http'
33
+ protocol = 'https' if options[:monitor_use_tls] && (options[:monitor_use_tls] == true)
34
+ "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
35
+ end
35
36
 
36
- def overview_url
37
- "#{base_url}/overview"
38
- end
37
+ def overview_url
38
+ "#{base_url}/overview"
39
+ end
39
40
 
40
- def node_url(n)
41
- "#{base_url}/nodes/#{n}"
42
- end
41
+ def node_url(node)
42
+ "#{base_url}/nodes/#{node}"
43
+ end
43
44
 
44
- def queues_url
45
- "#{base_url}/queues"
46
- end
45
+ def queues_url
46
+ "#{base_url}/queues"
47
+ end
47
48
 
48
- def event_host
49
- if options[:event_host]
50
- return options[:event_host]
51
- else
52
- return options[:monitor_host]
53
- end
54
- end
49
+ def event_host
50
+ options[:event_host] || :monitor_host
51
+ end
55
52
 
56
- def safe_get(uri, event_host)
57
- # Handle connection timeouts
58
- response = nil
59
- begin
60
- connection = Faraday.new(uri)
61
- response = connection.get do |req|
62
- req.options[:timeout] = options[:read_timeout]
63
- req.options[:open_timeout] = options[:open_timeout]
53
+ def safe_get(uri, event_host)
54
+ # Handle connection timeouts
55
+ response = nil
56
+ begin
57
+ connection = Faraday.new(uri)
58
+ response = connection.get do |req|
59
+ req.options[:timeout] = options[:read_timeout]
60
+ req.options[:open_timeout] = options[:open_timeout]
61
+ end
62
+ report(
63
+ host: event_host,
64
+ service: 'rabbitmq monitoring',
65
+ state: 'ok',
66
+ description: 'Monitoring operational',
67
+ )
68
+ rescue StandardError => e
69
+ report(
70
+ host: event_host,
71
+ service: 'rabbitmq monitoring',
72
+ state: 'critical',
73
+ description: "HTTP connection error: #{e.class} - #{e.message}",
74
+ )
64
75
  end
65
- report(:host => event_host,
66
- :service => "rabbitmq monitoring",
67
- :state => 'ok',
68
- :description => "Monitoring operational"
69
- )
70
- rescue => e
71
- report(:host => event_host,
72
- :service => "rabbitmq monitoring",
73
- :state => "critical",
74
- :description => "HTTP connection error: #{e.class} - #{e.message}"
75
- )
76
+ response
76
77
  end
77
- response
78
- end
79
-
80
- def check_queues
81
- response = safe_get(queues_url, event_host)
82
- max_size_check_filter = if options[:ignore_max_size_queues]
83
- Regexp.new(options[:ignore_max_size_queues])
84
- else
85
- nil
86
- end
87
78
 
88
- return if response.nil?
79
+ def check_queues
80
+ response = safe_get(queues_url, event_host)
81
+ max_size_check_filter = (Regexp.new(options[:ignore_max_size_queues]) if options[:ignore_max_size_queues])
89
82
 
90
- json = JSON.parse(response.body)
83
+ return if response.nil?
91
84
 
92
- if response.status != 200
93
- report(:host => event_host,
94
- :service => "rabbitmq.queue",
95
- :state => "critical",
96
- :description => "HTTP connection error to /api/queues: #{response.status} - #{response.body}"
97
- )
98
- else
99
- report(:host => event_host,
100
- :service => "rabbitmq.queue",
101
- :state => "ok",
102
- :description => "HTTP connection ok"
103
- )
85
+ if response.status != 200
86
+ report(
87
+ host: event_host,
88
+ service: 'rabbitmq.queue',
89
+ state: 'critical',
90
+ description: "HTTP connection error to /api/queues: #{response.status} - #{response.body}",
91
+ )
92
+ else
93
+ report(
94
+ host: event_host,
95
+ service: 'rabbitmq.queue',
96
+ state: 'ok',
97
+ description: 'HTTP connection ok',
98
+ )
104
99
 
105
- json = JSON.parse(response.body)
100
+ json = JSON.parse(response.body)
101
+
102
+ json.each do |queue|
103
+ svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
104
+ errs = []
105
+
106
+ errs << 'Queue has jobs but no consumers' if !queue['messages_ready'].nil? && (queue['messages_ready']).positive? && (queue['consumers']).zero?
107
+
108
+ errs << "Queue has #{queue['messages_ready']} jobs" if (max_size_check_filter.nil? || queue['name'] !~ (max_size_check_filter)) && !queue['messages_ready'].nil? && (queue['messages_ready'] > options[:max_queue_size])
109
+
110
+ if errs.empty?
111
+ report(
112
+ host: event_host,
113
+ service: svc,
114
+ state: 'ok',
115
+ description: 'Queue is looking good',
116
+ )
117
+ else
118
+ report(
119
+ host: event_host,
120
+ service: svc,
121
+ state: 'critical',
122
+ description: errs.join('; '),
123
+ )
124
+ end
125
+
126
+ stats = (queue['message_stats'] || {}).merge(
127
+ 'messages' => queue['messages'],
128
+ 'messages_details' => queue['messages_details'],
129
+ 'messages_ready' => queue['messages_ready'],
130
+ 'messages_ready_details' => queue['messages_ready_details'],
131
+ 'messages_unacknowledged' => queue['messages_unacknowledged'],
132
+ 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
133
+ 'consumers' => queue['consumers'],
134
+ 'memory' => queue['memory'],
135
+ )
136
+
137
+ stats.each_pair do |k, v|
138
+ service = "#{svc}.#{k}"
139
+ metric = if k =~ (/details$/) && !v.nil?
140
+ v['rate']
141
+ else
142
+ v
143
+ end
144
+
145
+ # TODO: Set state via thresholds which can be configured
146
+
147
+ report(
148
+ host: event_host,
149
+ service: service,
150
+ metric: metric,
151
+ description: 'RabbitMQ monitor',
152
+ )
153
+ end
154
+ end
155
+ end
156
+ end
106
157
 
107
- json.each do |queue|
108
- svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
109
- errs = []
158
+ def check_overview
159
+ uri = URI(overview_url)
160
+ response = safe_get(uri, event_host)
110
161
 
111
- if queue['messages_ready']!=nil and queue['messages_ready'] > 0 and queue['consumers'] == 0
112
- errs << "Queue has jobs but no consumers"
113
- end
162
+ return if response.nil?
114
163
 
115
- if (max_size_check_filter.nil? or queue['name'] !~ max_size_check_filter) and queue['messages_ready']!=nil and queue['messages_ready'] > options[:max_queue_size]
116
- errs << "Queue has #{queue['messages_ready']} jobs"
117
- end
164
+ json = JSON.parse(response.body)
118
165
 
119
- if errs.empty?
120
- report(:host => event_host,
121
- :service => svc,
122
- :state => "ok",
123
- :description => "Queue is looking good"
166
+ if response.status != 200
167
+ report(
168
+ host: event_host,
169
+ service: 'rabbitmq',
170
+ state: 'critical',
171
+ description: "HTTP connection error: #{response.status} - #{response.body}",
124
172
  )
125
173
  else
126
- report(:host => event_host,
127
- :service => svc,
128
- :state => "critical",
129
- :description => errs.join("; ")
174
+ report(
175
+ host: event_host,
176
+ service: 'rabbitmq monitoring',
177
+ state: 'ok',
178
+ description: 'HTTP connection ok',
130
179
  )
131
- end
132
180
 
133
- stats = (queue['message_stats'] || {}).merge(
134
- 'messages' => queue['messages'],
135
- 'messages_details' => queue['messages_details'],
136
- 'messages_ready' => queue['messages_ready'],
137
- 'messages_ready_details' => queue['messages_ready_details'],
138
- 'messages_unacknowledged' => queue['messages_unacknowledged'],
139
- 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
140
- 'consumers' => queue['consumers'],
141
- 'memory' => queue['memory'],
142
- )
143
-
144
- stats.each_pair do |k,v|
145
- service = "#{svc}.#{k}"
146
- if k =~ /details$/ and v!=nil
147
- metric = v['rate']
148
- else
149
- metric = v
181
+ %w[message_stats queue_totals object_totals].each do |stat|
182
+ # NOTE: / BUG ?
183
+ # Brand new servers can have blank message stats. Is this ok?
184
+ # I can't decide.
185
+ next if json[stat].empty?
186
+
187
+ json[stat].each_pair do |k, v|
188
+ service = "rabbitmq.#{stat}.#{k}"
189
+ metric = if k =~ /details$/
190
+ v['rate']
191
+ else
192
+ v
193
+ end
194
+
195
+ # TODO: Set state via thresholds which can be configured
196
+
197
+ report(
198
+ host: event_host,
199
+ service: service,
200
+ metric: metric,
201
+ description: 'RabbitMQ monitor',
202
+ )
203
+ end
150
204
  end
151
-
152
- # TODO: Set state via thresholds which can be configured
153
-
154
- report(:host => event_host,
155
- :service => service,
156
- :metric => metric,
157
- :description => "RabbitMQ monitor"
158
- )
159
205
  end
160
206
  end
161
- end
162
- end
163
207
 
164
- def check_overview
165
- uri = URI(overview_url)
166
- response = safe_get(uri, event_host)
167
-
168
- return if response.nil?
169
-
170
- json = JSON.parse(response.body)
171
-
172
- if response.status != 200
173
- report(:host => event_host,
174
- :service => "rabbitmq",
175
- :state => "critical",
176
- :description => "HTTP connection error: #{response.status} - #{response.body}"
177
- )
178
- else
179
- report(:host => event_host,
180
- :service => "rabbitmq monitoring",
181
- :state => "ok",
182
- :description => "HTTP connection ok"
183
- )
184
-
185
- %w( message_stats queue_totals object_totals ).each do |stat|
186
- # NOTE / BUG ?
187
- # Brand new servers can have blank message stats. Is this ok?
188
- # I can't decide.
189
- next if json[stat].empty?
190
- json[stat].each_pair do |k,v|
191
- service = "rabbitmq.#{stat}.#{k}"
192
- if k =~ /details$/
193
- metric = v['rate']
194
- else
195
- metric = v
208
+ def check_node
209
+ opts[:node].each do |n|
210
+ uri = URI(node_url(n))
211
+ response = safe_get(uri, event_host)
212
+
213
+ break if response.nil?
214
+
215
+ if response.status != 200
216
+ if response.status == 404
217
+ report(
218
+ host: event_host,
219
+ service: "rabbitmq.node.#{n}",
220
+ state: 'critical',
221
+ description: 'Node was not found in the cluster',
222
+ )
223
+ else
224
+ report(
225
+ host: event_host,
226
+ service: "rabbitmq.node.#{n}",
227
+ state: 'critical',
228
+ description: "HTTP error: #{response.status} - #{response.body}",
229
+ )
230
+ end
231
+ break
196
232
  end
197
233
 
198
- # TODO: Set state via thresholds which can be configured
234
+ json = JSON.parse(response.body)
199
235
 
200
- report(:host => event_host,
201
- :service => service,
202
- :metric => metric,
203
- :description => "RabbitMQ monitor"
204
- )
205
- end
206
- end
207
- end
208
- end
209
-
210
- def check_node
211
- opts[:node].each do |n|
212
- uri = URI(node_url(n))
213
- response = safe_get(uri, event_host)
236
+ if json['mem_alarm']
237
+ report(
238
+ host: event_host,
239
+ service: "rabbitmq.node.#{n}",
240
+ state: 'critical',
241
+ description: 'Memory alarm has triggered; job submission throttled',
242
+ )
243
+ break
244
+ end
214
245
 
215
- return if response.nil?
246
+ if json['disk_free_alarm']
247
+ report(
248
+ host: event_host,
249
+ service: "rabbitmq.node.#{n}",
250
+ state: 'critical',
251
+ description: 'Disk free alarm has triggered; job submission throttled',
252
+ )
253
+ break
254
+ end
216
255
 
217
- if response.status != 200
218
- if response.status == 404
219
- report(:host => event_host,
220
- :service => "rabbitmq.node.#{n}",
221
- :state => "critical",
222
- :description => "Node was not found in the cluster"
223
- )
224
- else
225
- report(:host => event_host,
226
- :service => "rabbitmq.node.#{n}",
227
- :state => "critical",
228
- :description => "HTTP error: #{response.status} - #{response.body}"
256
+ report(
257
+ host: event_host,
258
+ service: "rabbitmq.node.#{n}",
259
+ state: 'ok',
260
+ description: 'Node looks OK to me',
229
261
  )
230
262
  end
231
- return
232
- end
233
-
234
- json = JSON.parse(response.body)
235
-
236
- if json['mem_alarm']
237
- report(:host => event_host,
238
- :service => "rabbitmq.node.#{n}",
239
- :state => "critical",
240
- :description => "Memory alarm has triggered; job submission throttled"
241
- )
242
- return
243
263
  end
244
264
 
245
- if json['disk_free_alarm']
246
- report(:host => event_host,
247
- :service => "rabbitmq.node.#{n}",
248
- :state => "critical",
249
- :description => "Disk free alarm has triggered; job submission throttled"
250
- )
251
- return
265
+ def tick
266
+ check_overview
267
+ check_node if opts[:node]
268
+ check_queues
252
269
  end
253
-
254
- report(:host => event_host,
255
- :service => "rabbitmq.node.#{n}",
256
- :state => "ok",
257
- :description => "Node looks OK to me"
258
- )
259
270
  end
260
271
  end
261
-
262
- def tick
263
- check_overview
264
- check_node if opts[:node]
265
- check_queues
266
- end
267
272
  end
268
273
  Riemann::Tools::Rabbitmq.run
269
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/package_task'
3
5
  require 'rdoc/task'