riemann-tools 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +13 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.rubocop.yml +32 -0
  6. data/CHANGELOG.md +31 -2
  7. data/README.markdown +8 -24
  8. data/Rakefile +4 -2
  9. data/SECURITY.md +42 -0
  10. data/bin/riemann-apache-status +92 -78
  11. data/bin/riemann-bench +54 -49
  12. data/bin/riemann-cloudant +44 -40
  13. data/bin/riemann-consul +82 -76
  14. data/bin/riemann-dir-files-count +53 -47
  15. data/bin/riemann-dir-space +53 -47
  16. data/bin/riemann-diskstats +78 -75
  17. data/bin/riemann-fd +68 -48
  18. data/bin/riemann-freeswitch +108 -103
  19. data/bin/riemann-haproxy +46 -40
  20. data/bin/riemann-health +4 -343
  21. data/bin/riemann-kvminstance +18 -13
  22. data/bin/riemann-memcached +35 -29
  23. data/bin/riemann-net +4 -104
  24. data/bin/riemann-nginx-status +74 -67
  25. data/bin/riemann-ntp +4 -33
  26. data/bin/riemann-portcheck +40 -31
  27. data/bin/riemann-proc +96 -90
  28. data/bin/riemann-varnish +51 -45
  29. data/bin/riemann-zookeeper +38 -34
  30. data/lib/riemann/tools/health.rb +347 -0
  31. data/lib/riemann/tools/net.rb +104 -0
  32. data/lib/riemann/tools/ntp.rb +41 -0
  33. data/lib/riemann/tools/version.rb +1 -1
  34. data/lib/riemann/tools.rb +37 -40
  35. data/riemann-tools.gemspec +4 -1
  36. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +2 -0
  37. data/tools/riemann-aws/bin/riemann-aws-billing +72 -66
  38. data/tools/riemann-aws/bin/riemann-aws-rds-status +55 -41
  39. data/tools/riemann-aws/bin/riemann-aws-sqs-status +37 -31
  40. data/tools/riemann-aws/bin/riemann-aws-status +63 -51
  41. data/tools/riemann-aws/bin/riemann-elb-metrics +149 -148
  42. data/tools/riemann-aws/bin/riemann-s3-list +70 -65
  43. data/tools/riemann-aws/bin/riemann-s3-status +85 -82
  44. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +2 -0
  45. data/tools/riemann-chronos/bin/riemann-chronos +136 -119
  46. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +2 -0
  47. data/tools/riemann-docker/bin/riemann-docker +163 -174
  48. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +2 -0
  49. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +155 -147
  50. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +2 -0
  51. data/tools/riemann-marathon/bin/riemann-marathon +138 -122
  52. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +2 -0
  53. data/tools/riemann-mesos/bin/riemann-mesos +125 -110
  54. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +2 -0
  55. data/tools/riemann-munin/bin/riemann-munin +28 -22
  56. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +2 -0
  57. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +226 -222
  58. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +2 -0
  59. data/tools/riemann-riak/bin/riemann-riak +281 -289
  60. data/tools/riemann-riak/riak_status/riak_status.rb +39 -39
  61. metadata +65 -16
@@ -1,269 +1,273 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
3
 
4
- require 'riemann/tools'
4
+ Process.setproctitle($PROGRAM_NAME)
5
5
 
6
- class Riemann::Tools::Rabbitmq
7
- include Riemann::Tools
6
+ require 'riemann/tools'
8
7
 
9
- require 'faraday'
10
- require 'json'
11
- require 'uri'
8
+ module Riemann
9
+ module Tools
10
+ class Rabbitmq
11
+ include Riemann::Tools
12
12
 
13
+ require 'faraday'
14
+ require 'json'
15
+ require 'uri'
13
16
 
14
- opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
15
- opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
17
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
18
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
16
19
 
17
- opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
- opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
- opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15672
20
- opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: "localhost"
21
- opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
20
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
21
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
22
+ opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15_672
23
+ opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: 'localhost'
24
+ opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
22
25
 
23
- opt :max_queue_size, "max number of items in a queue that is acceptable", type: :int, default: 1_000_000
24
- opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
26
+ opt :max_queue_size, 'max number of items in a queue that is acceptable', type: :int, default: 1_000_000
27
+ opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
25
28
 
26
- opt :node, "Specify a node to monitor", type: :strings
29
+ opt :node, 'Specify a node to monitor', type: :strings
27
30
 
28
- def base_url
29
- protocol = "http"
30
- if (options[:monitor_use_tls]) && (options[:monitor_use_tls]==true)
31
- protocol = "https"
32
- end
33
- "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
34
- end
31
+ def base_url
32
+ protocol = 'http'
33
+ protocol = 'https' if options[:monitor_use_tls] && (options[:monitor_use_tls] == true)
34
+ "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
35
+ end
35
36
 
36
- def overview_url
37
- "#{base_url}/overview"
38
- end
37
+ def overview_url
38
+ "#{base_url}/overview"
39
+ end
39
40
 
40
- def node_url(n)
41
- "#{base_url}/nodes/#{n}"
42
- end
41
+ def node_url(node)
42
+ "#{base_url}/nodes/#{node}"
43
+ end
43
44
 
44
- def queues_url
45
- "#{base_url}/queues"
46
- end
45
+ def queues_url
46
+ "#{base_url}/queues"
47
+ end
47
48
 
48
- def event_host
49
- if options[:event_host]
50
- return options[:event_host]
51
- else
52
- return options[:monitor_host]
53
- end
54
- end
49
+ def event_host
50
+ options[:event_host] || :monitor_host
51
+ end
55
52
 
56
- def safe_get(uri, event_host)
57
- # Handle connection timeouts
58
- response = nil
59
- begin
60
- connection = Faraday.new(uri)
61
- response = connection.get do |req|
62
- req.options[:timeout] = options[:read_timeout]
63
- req.options[:open_timeout] = options[:open_timeout]
53
+ def safe_get(uri, event_host)
54
+ # Handle connection timeouts
55
+ response = nil
56
+ begin
57
+ connection = Faraday.new(uri)
58
+ response = connection.get do |req|
59
+ req.options[:timeout] = options[:read_timeout]
60
+ req.options[:open_timeout] = options[:open_timeout]
61
+ end
62
+ report(
63
+ host: event_host,
64
+ service: 'rabbitmq monitoring',
65
+ state: 'ok',
66
+ description: 'Monitoring operational',
67
+ )
68
+ rescue StandardError => e
69
+ report(
70
+ host: event_host,
71
+ service: 'rabbitmq monitoring',
72
+ state: 'critical',
73
+ description: "HTTP connection error: #{e.class} - #{e.message}",
74
+ )
64
75
  end
65
- report(:host => event_host,
66
- :service => "rabbitmq monitoring",
67
- :state => 'ok',
68
- :description => "Monitoring operational"
69
- )
70
- rescue => e
71
- report(:host => event_host,
72
- :service => "rabbitmq monitoring",
73
- :state => "critical",
74
- :description => "HTTP connection error: #{e.class} - #{e.message}"
75
- )
76
+ response
76
77
  end
77
- response
78
- end
79
-
80
- def check_queues
81
- response = safe_get(queues_url, event_host)
82
- max_size_check_filter = if options[:ignore_max_size_queues]
83
- Regexp.new(options[:ignore_max_size_queues])
84
- else
85
- nil
86
- end
87
78
 
88
- return if response.nil?
79
+ def check_queues
80
+ response = safe_get(queues_url, event_host)
81
+ max_size_check_filter = (Regexp.new(options[:ignore_max_size_queues]) if options[:ignore_max_size_queues])
89
82
 
90
- json = JSON.parse(response.body)
83
+ return if response.nil?
91
84
 
92
- if response.status != 200
93
- report(:host => event_host,
94
- :service => "rabbitmq.queue",
95
- :state => "critical",
96
- :description => "HTTP connection error to /api/queues: #{response.status} - #{response.body}"
97
- )
98
- else
99
- report(:host => event_host,
100
- :service => "rabbitmq.queue",
101
- :state => "ok",
102
- :description => "HTTP connection ok"
103
- )
85
+ if response.status != 200
86
+ report(
87
+ host: event_host,
88
+ service: 'rabbitmq.queue',
89
+ state: 'critical',
90
+ description: "HTTP connection error to /api/queues: #{response.status} - #{response.body}",
91
+ )
92
+ else
93
+ report(
94
+ host: event_host,
95
+ service: 'rabbitmq.queue',
96
+ state: 'ok',
97
+ description: 'HTTP connection ok',
98
+ )
104
99
 
105
- json = JSON.parse(response.body)
100
+ json = JSON.parse(response.body)
101
+
102
+ json.each do |queue|
103
+ svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
104
+ errs = []
105
+
106
+ errs << 'Queue has jobs but no consumers' if !queue['messages_ready'].nil? && (queue['messages_ready']).positive? && (queue['consumers']).zero?
107
+
108
+ errs << "Queue has #{queue['messages_ready']} jobs" if (max_size_check_filter.nil? || queue['name'] !~ (max_size_check_filter)) && !queue['messages_ready'].nil? && (queue['messages_ready'] > options[:max_queue_size])
109
+
110
+ if errs.empty?
111
+ report(
112
+ host: event_host,
113
+ service: svc,
114
+ state: 'ok',
115
+ description: 'Queue is looking good',
116
+ )
117
+ else
118
+ report(
119
+ host: event_host,
120
+ service: svc,
121
+ state: 'critical',
122
+ description: errs.join('; '),
123
+ )
124
+ end
125
+
126
+ stats = (queue['message_stats'] || {}).merge(
127
+ 'messages' => queue['messages'],
128
+ 'messages_details' => queue['messages_details'],
129
+ 'messages_ready' => queue['messages_ready'],
130
+ 'messages_ready_details' => queue['messages_ready_details'],
131
+ 'messages_unacknowledged' => queue['messages_unacknowledged'],
132
+ 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
133
+ 'consumers' => queue['consumers'],
134
+ 'memory' => queue['memory'],
135
+ )
136
+
137
+ stats.each_pair do |k, v|
138
+ service = "#{svc}.#{k}"
139
+ metric = if k =~ (/details$/) && !v.nil?
140
+ v['rate']
141
+ else
142
+ v
143
+ end
144
+
145
+ # TODO: Set state via thresholds which can be configured
146
+
147
+ report(
148
+ host: event_host,
149
+ service: service,
150
+ metric: metric,
151
+ description: 'RabbitMQ monitor',
152
+ )
153
+ end
154
+ end
155
+ end
156
+ end
106
157
 
107
- json.each do |queue|
108
- svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
109
- errs = []
158
+ def check_overview
159
+ uri = URI(overview_url)
160
+ response = safe_get(uri, event_host)
110
161
 
111
- if queue['messages_ready']!=nil and queue['messages_ready'] > 0 and queue['consumers'] == 0
112
- errs << "Queue has jobs but no consumers"
113
- end
162
+ return if response.nil?
114
163
 
115
- if (max_size_check_filter.nil? or queue['name'] !~ max_size_check_filter) and queue['messages_ready']!=nil and queue['messages_ready'] > options[:max_queue_size]
116
- errs << "Queue has #{queue['messages_ready']} jobs"
117
- end
164
+ json = JSON.parse(response.body)
118
165
 
119
- if errs.empty?
120
- report(:host => event_host,
121
- :service => svc,
122
- :state => "ok",
123
- :description => "Queue is looking good"
166
+ if response.status != 200
167
+ report(
168
+ host: event_host,
169
+ service: 'rabbitmq',
170
+ state: 'critical',
171
+ description: "HTTP connection error: #{response.status} - #{response.body}",
124
172
  )
125
173
  else
126
- report(:host => event_host,
127
- :service => svc,
128
- :state => "critical",
129
- :description => errs.join("; ")
174
+ report(
175
+ host: event_host,
176
+ service: 'rabbitmq monitoring',
177
+ state: 'ok',
178
+ description: 'HTTP connection ok',
130
179
  )
131
- end
132
180
 
133
- stats = (queue['message_stats'] || {}).merge(
134
- 'messages' => queue['messages'],
135
- 'messages_details' => queue['messages_details'],
136
- 'messages_ready' => queue['messages_ready'],
137
- 'messages_ready_details' => queue['messages_ready_details'],
138
- 'messages_unacknowledged' => queue['messages_unacknowledged'],
139
- 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
140
- 'consumers' => queue['consumers'],
141
- 'memory' => queue['memory'],
142
- )
143
-
144
- stats.each_pair do |k,v|
145
- service = "#{svc}.#{k}"
146
- if k =~ /details$/ and v!=nil
147
- metric = v['rate']
148
- else
149
- metric = v
181
+ %w[message_stats queue_totals object_totals].each do |stat|
182
+ # NOTE: / BUG ?
183
+ # Brand new servers can have blank message stats. Is this ok?
184
+ # I can't decide.
185
+ next if json[stat].empty?
186
+
187
+ json[stat].each_pair do |k, v|
188
+ service = "rabbitmq.#{stat}.#{k}"
189
+ metric = if k =~ /details$/
190
+ v['rate']
191
+ else
192
+ v
193
+ end
194
+
195
+ # TODO: Set state via thresholds which can be configured
196
+
197
+ report(
198
+ host: event_host,
199
+ service: service,
200
+ metric: metric,
201
+ description: 'RabbitMQ monitor',
202
+ )
203
+ end
150
204
  end
151
-
152
- # TODO: Set state via thresholds which can be configured
153
-
154
- report(:host => event_host,
155
- :service => service,
156
- :metric => metric,
157
- :description => "RabbitMQ monitor"
158
- )
159
205
  end
160
206
  end
161
- end
162
- end
163
207
 
164
- def check_overview
165
- uri = URI(overview_url)
166
- response = safe_get(uri, event_host)
167
-
168
- return if response.nil?
169
-
170
- json = JSON.parse(response.body)
171
-
172
- if response.status != 200
173
- report(:host => event_host,
174
- :service => "rabbitmq",
175
- :state => "critical",
176
- :description => "HTTP connection error: #{response.status} - #{response.body}"
177
- )
178
- else
179
- report(:host => event_host,
180
- :service => "rabbitmq monitoring",
181
- :state => "ok",
182
- :description => "HTTP connection ok"
183
- )
184
-
185
- %w( message_stats queue_totals object_totals ).each do |stat|
186
- # NOTE / BUG ?
187
- # Brand new servers can have blank message stats. Is this ok?
188
- # I can't decide.
189
- next if json[stat].empty?
190
- json[stat].each_pair do |k,v|
191
- service = "rabbitmq.#{stat}.#{k}"
192
- if k =~ /details$/
193
- metric = v['rate']
194
- else
195
- metric = v
208
+ def check_node
209
+ opts[:node].each do |n|
210
+ uri = URI(node_url(n))
211
+ response = safe_get(uri, event_host)
212
+
213
+ break if response.nil?
214
+
215
+ if response.status != 200
216
+ if response.status == 404
217
+ report(
218
+ host: event_host,
219
+ service: "rabbitmq.node.#{n}",
220
+ state: 'critical',
221
+ description: 'Node was not found in the cluster',
222
+ )
223
+ else
224
+ report(
225
+ host: event_host,
226
+ service: "rabbitmq.node.#{n}",
227
+ state: 'critical',
228
+ description: "HTTP error: #{response.status} - #{response.body}",
229
+ )
230
+ end
231
+ break
196
232
  end
197
233
 
198
- # TODO: Set state via thresholds which can be configured
234
+ json = JSON.parse(response.body)
199
235
 
200
- report(:host => event_host,
201
- :service => service,
202
- :metric => metric,
203
- :description => "RabbitMQ monitor"
204
- )
205
- end
206
- end
207
- end
208
- end
209
-
210
- def check_node
211
- opts[:node].each do |n|
212
- uri = URI(node_url(n))
213
- response = safe_get(uri, event_host)
236
+ if json['mem_alarm']
237
+ report(
238
+ host: event_host,
239
+ service: "rabbitmq.node.#{n}",
240
+ state: 'critical',
241
+ description: 'Memory alarm has triggered; job submission throttled',
242
+ )
243
+ break
244
+ end
214
245
 
215
- return if response.nil?
246
+ if json['disk_free_alarm']
247
+ report(
248
+ host: event_host,
249
+ service: "rabbitmq.node.#{n}",
250
+ state: 'critical',
251
+ description: 'Disk free alarm has triggered; job submission throttled',
252
+ )
253
+ break
254
+ end
216
255
 
217
- if response.status != 200
218
- if response.status == 404
219
- report(:host => event_host,
220
- :service => "rabbitmq.node.#{n}",
221
- :state => "critical",
222
- :description => "Node was not found in the cluster"
223
- )
224
- else
225
- report(:host => event_host,
226
- :service => "rabbitmq.node.#{n}",
227
- :state => "critical",
228
- :description => "HTTP error: #{response.status} - #{response.body}"
256
+ report(
257
+ host: event_host,
258
+ service: "rabbitmq.node.#{n}",
259
+ state: 'ok',
260
+ description: 'Node looks OK to me',
229
261
  )
230
262
  end
231
- return
232
- end
233
-
234
- json = JSON.parse(response.body)
235
-
236
- if json['mem_alarm']
237
- report(:host => event_host,
238
- :service => "rabbitmq.node.#{n}",
239
- :state => "critical",
240
- :description => "Memory alarm has triggered; job submission throttled"
241
- )
242
- return
243
263
  end
244
264
 
245
- if json['disk_free_alarm']
246
- report(:host => event_host,
247
- :service => "rabbitmq.node.#{n}",
248
- :state => "critical",
249
- :description => "Disk free alarm has triggered; job submission throttled"
250
- )
251
- return
265
+ def tick
266
+ check_overview
267
+ check_node if opts[:node]
268
+ check_queues
252
269
  end
253
-
254
- report(:host => event_host,
255
- :service => "rabbitmq.node.#{n}",
256
- :state => "ok",
257
- :description => "Node looks OK to me"
258
- )
259
270
  end
260
271
  end
261
-
262
- def tick
263
- check_overview
264
- check_node if opts[:node]
265
- check_queues
266
- end
267
272
  end
268
273
  Riemann::Tools::Rabbitmq.run
269
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/package_task'
3
5
  require 'rdoc/task'