riemann-tools 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +13 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.rubocop.yml +32 -0
  6. data/CHANGELOG.md +31 -2
  7. data/README.markdown +8 -24
  8. data/Rakefile +4 -2
  9. data/SECURITY.md +42 -0
  10. data/bin/riemann-apache-status +92 -78
  11. data/bin/riemann-bench +54 -49
  12. data/bin/riemann-cloudant +44 -40
  13. data/bin/riemann-consul +82 -76
  14. data/bin/riemann-dir-files-count +53 -47
  15. data/bin/riemann-dir-space +53 -47
  16. data/bin/riemann-diskstats +78 -75
  17. data/bin/riemann-fd +68 -48
  18. data/bin/riemann-freeswitch +108 -103
  19. data/bin/riemann-haproxy +46 -40
  20. data/bin/riemann-health +4 -343
  21. data/bin/riemann-kvminstance +18 -13
  22. data/bin/riemann-memcached +35 -29
  23. data/bin/riemann-net +4 -104
  24. data/bin/riemann-nginx-status +74 -67
  25. data/bin/riemann-ntp +4 -33
  26. data/bin/riemann-portcheck +40 -31
  27. data/bin/riemann-proc +96 -90
  28. data/bin/riemann-varnish +51 -45
  29. data/bin/riemann-zookeeper +38 -34
  30. data/lib/riemann/tools/health.rb +347 -0
  31. data/lib/riemann/tools/net.rb +104 -0
  32. data/lib/riemann/tools/ntp.rb +41 -0
  33. data/lib/riemann/tools/version.rb +1 -1
  34. data/lib/riemann/tools.rb +37 -40
  35. data/riemann-tools.gemspec +4 -1
  36. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +2 -0
  37. data/tools/riemann-aws/bin/riemann-aws-billing +72 -66
  38. data/tools/riemann-aws/bin/riemann-aws-rds-status +55 -41
  39. data/tools/riemann-aws/bin/riemann-aws-sqs-status +37 -31
  40. data/tools/riemann-aws/bin/riemann-aws-status +63 -51
  41. data/tools/riemann-aws/bin/riemann-elb-metrics +149 -148
  42. data/tools/riemann-aws/bin/riemann-s3-list +70 -65
  43. data/tools/riemann-aws/bin/riemann-s3-status +85 -82
  44. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +2 -0
  45. data/tools/riemann-chronos/bin/riemann-chronos +136 -119
  46. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +2 -0
  47. data/tools/riemann-docker/bin/riemann-docker +163 -174
  48. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +2 -0
  49. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +155 -147
  50. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +2 -0
  51. data/tools/riemann-marathon/bin/riemann-marathon +138 -122
  52. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +2 -0
  53. data/tools/riemann-mesos/bin/riemann-mesos +125 -110
  54. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +2 -0
  55. data/tools/riemann-munin/bin/riemann-munin +28 -22
  56. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +2 -0
  57. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +226 -222
  58. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +2 -0
  59. data/tools/riemann-riak/bin/riemann-riak +281 -289
  60. data/tools/riemann-riak/riak_status/riak_status.rb +39 -39
  61. metadata +65 -16
@@ -1,217 +1,206 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
3
5
 
4
6
  # Reports current CPU, disk, load average, and memory use to riemann.
5
7
 
6
8
  require 'riemann/tools'
7
9
 
8
- class Riemann::Tools::DockerHealth
9
- require 'docker'
10
- require 'socket'
11
- include Riemann::Tools
12
- include Docker
13
-
14
- opt :docker_host, "Docker Container Host (see https://github.com/swipely/docker-api#host)", :type => String, :default => nil
15
- opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
16
- opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
17
- opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
18
- opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
19
- opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
20
- opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
21
- opt :host_hostname, "Suffix of host", :type => String, :default => nil
22
- opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'memory', 'disk', 'basic']
23
-
24
- def get_containers
25
- Docker::Container.all
26
- end
27
-
28
- def get_container_name(container)
29
- container.json['Name'][1..-1]
30
- end
31
-
32
- def initialize
33
-
34
- if (opts[:docker_host] != nil)
35
- Docker.url = opts[:docker_host]
36
- end
37
-
38
- @hostname = opts[:host_hostname]
39
- if (@hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?)
40
- @hostname = Socket.gethostname
41
- end
42
-
43
- @cpu_coefficient = 1000 * 1000 * 1000
44
-
45
- @limits = {
46
- :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
47
- :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
48
- :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
49
- }
50
-
51
- @last_cpu_reads = Hash.new
52
- @last_uptime_reads = Hash.new
53
-
54
- opts[:checks].each do |check|
55
- case check
56
- when 'disk'
57
- @disk_enabled = true
58
- when 'cpu'
59
- @cpu_enabled = true
60
- when 'memory'
61
- @memory_enabled = true
62
- when 'basic'
63
- @basic_inspection_enabled = true
10
+ module Riemann
11
+ module Tools
12
+ class DockerHealth
13
+ require 'docker'
14
+ require 'socket'
15
+ include Riemann::Tools
16
+ include Docker
17
+
18
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
19
+ default: nil
20
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
21
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
22
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
23
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
24
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
25
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
26
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
27
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
28
+
29
+ def containers
30
+ Docker::Container.all
64
31
  end
65
- end
66
- end
67
-
68
- def alert(container, service, state, metric, description)
69
32
 
70
- opts = { :service => service.to_s,
71
- :state => state.to_s,
72
- :metric => metric.to_f,
73
- :description => description }
74
-
75
- if (container != nil)
76
- opts[:host] = "#{@hostname}-#{container}"
77
- else
78
- opts[:host] = @hostname
79
- end
80
-
81
- report(opts)
82
- end
83
-
84
- def report_pct(container, service, fraction, report = '', name = nil)
85
- if fraction
33
+ def get_container_name(container)
34
+ container.json['Name'][1..]
35
+ end
86
36
 
87
- if (name == nil)
88
- name = service
37
+ def initialize
38
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
39
+
40
+ @hostname = opts[:host_hostname]
41
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
42
+
43
+ @cpu_coefficient = 1000 * 1000 * 1000
44
+
45
+ @limits = {
46
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
47
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
48
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
49
+ }
50
+
51
+ @last_cpu_reads = {}
52
+ @last_uptime_reads = {}
53
+
54
+ opts[:checks].each do |check|
55
+ case check
56
+ when 'disk'
57
+ @disk_enabled = true
58
+ when 'cpu'
59
+ @cpu_enabled = true
60
+ when 'memory'
61
+ @memory_enabled = true
62
+ when 'basic'
63
+ @basic_inspection_enabled = true
64
+ end
65
+ end
89
66
  end
90
67
 
91
- if fraction > @limits[service][:critical]
92
- alert container, name, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
93
- elsif fraction > @limits[service][:warning]
94
- alert container, name, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
95
- else
96
- alert container, name, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
68
+ def alert(container, service, state, metric, description)
69
+ opts = {
70
+ service: service.to_s,
71
+ state: state.to_s,
72
+ metric: metric.to_f,
73
+ description: description,
74
+ }
75
+
76
+ opts[:host] = if !container.nil?
77
+ "#{@hostname}-#{container}"
78
+ else
79
+ @hostname
80
+ end
81
+
82
+ report(opts)
97
83
  end
98
- end
99
- end
100
84
 
85
+ def report_pct(container, service, fraction, report = '', name = nil)
86
+ return unless fraction
101
87
 
102
- def cpu(id, name, stats)
88
+ name = service if name.nil?
103
89
 
104
- current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
90
+ if fraction > @limits[service][:critical]
91
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
+ elsif fraction > @limits[service][:warning]
93
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
94
+ else
95
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
96
+ end
97
+ end
105
98
 
106
- unless current
107
- alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
108
- return false
109
- end
99
+ def cpu(id, name, stats)
100
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
110
101
 
111
- current_time = Time.parse(stats['read']);
112
- if (@last_cpu_reads[id] != nil)
113
- last = @last_cpu_reads[id]
114
- used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
102
+ unless current
103
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
104
+ return false
105
+ end
115
106
 
116
- report_pct name, :cpu, used
117
- end
107
+ current_time = Time.parse(stats['read'])
108
+ unless @last_cpu_reads[id].nil?
109
+ last = @last_cpu_reads[id]
110
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
118
111
 
119
- @last_cpu_reads[id] = { v: current, t: current_time }
120
- end
112
+ report_pct name, :cpu, used
113
+ end
121
114
 
122
- def memory(id, name, stats)
123
- memory_stats = stats['memory_stats']
124
- usage = memory_stats['usage'].to_f
125
- total = memory_stats['limit'].to_f
126
- fraction = (usage / total)
115
+ @last_cpu_reads[id] = { v: current, t: current_time }
116
+ end
127
117
 
128
- report_pct name, :memory, fraction, "#{usage} / #{total}"
129
- end
118
+ def memory(_id, name, stats)
119
+ memory_stats = stats['memory_stats']
120
+ usage = memory_stats['usage'].to_f
121
+ total = memory_stats['limit'].to_f
122
+ fraction = (usage / total)
130
123
 
131
- def disk
132
- `df -P`.split(/\n/).each do |r|
133
- f = r.split(/\s+/)
134
- next if f[0] == 'Filesystem'
135
- next unless f[0] =~ /\// # Needs at least one slash in the mount path
124
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
125
+ end
136
126
 
137
- # Calculate capacity
138
- x = f[4].to_f/100
139
- report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
140
- end
141
- end
127
+ def disk
128
+ `df -P`.split(/\n/).each do |r|
129
+ f = r.split(/\s+/)
130
+ next if f[0] == 'Filesystem'
131
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
142
132
 
143
- def basic_inspection(id, name, inspection)
133
+ # Calculate capacity
134
+ x = f[4].to_f / 100
135
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
136
+ end
137
+ end
144
138
 
145
- state = inspection['State']
146
- json_state = JSON.generate(state)
139
+ def basic_inspection(id, name, inspection)
140
+ state = inspection['State']
141
+ json_state = JSON.generate(state)
147
142
 
148
- running = state['Running']
143
+ running = state['Running']
149
144
 
150
- alert(name, "status",
151
- running ? "ok" : "critical",
145
+ alert(
146
+ name, 'status',
147
+ running ? 'ok' : 'critical',
152
148
  running ? 1 : 0,
153
- json_state)
154
-
155
- if (running)
156
- start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
157
- now = DateTime.now.to_time.utc.to_i
158
- uptime = now - start_time
159
-
160
- if (@last_uptime_reads[id] != nil)
161
- last = @last_uptime_reads[id]
162
- restarted = start_time != last
163
- alert(name, "uptime",
164
- restarted ? "critical" : "ok",
165
- uptime,
166
- "last 'StartedAt' measure was #{last} (#{Time.at(last).utc.to_s}), " +
167
- "now it's #{start_time} (#{Time.at(start_time).utc.to_s})")
168
- end
169
-
170
- @last_uptime_reads[id] = start_time
171
- end
172
- end
173
-
174
- def tick
149
+ json_state,
150
+ )
151
+
152
+ return unless running
153
+
154
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
155
+ now = DateTime.now.to_time.utc.to_i
156
+ uptime = now - start_time
157
+
158
+ unless @last_uptime_reads[id].nil?
159
+ last = @last_uptime_reads[id]
160
+ restarted = start_time != last
161
+ alert(
162
+ name, 'uptime',
163
+ restarted ? 'critical' : 'ok',
164
+ uptime,
165
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
166
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
167
+ )
168
+ end
175
169
 
176
- # Disk is the same in every container
177
- if @disk_enabled
178
- disk()
179
- end
170
+ @last_uptime_reads[id] = start_time
171
+ end
180
172
 
181
- # Get CPU, Memory and Load of each container
182
- containers = get_containers()
183
- threads = []
173
+ def tick
174
+ # Disk is the same in every container
175
+ disk if @disk_enabled
184
176
 
185
- containers.each do |ctr|
186
- threads << Thread.new(ctr) do |container|
177
+ # Get CPU, Memory and Load of each container
178
+ threads = []
187
179
 
188
- id = container.id
189
- name = get_container_name(container)
180
+ containers.each do |ctr|
181
+ threads << Thread.new(ctr) do |container|
182
+ id = container.id
183
+ name = get_container_name(container)
190
184
 
191
- stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", {stream:false}))
185
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
192
186
 
193
- if @basic_inspection_enabled
194
- inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
195
- basic_inspection(id, name, inspection)
187
+ if @basic_inspection_enabled
188
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
189
+ basic_inspection(id, name, inspection)
190
+ end
191
+ cpu(id, name, stats) if @cpu_enabled
192
+ memory(id, name, stats) if @memory_enabled
196
193
  end
197
- if @cpu_enabled
198
- cpu(id, name, stats)
199
- end
200
- if @memory_enabled
201
- memory(id, name, stats)
202
194
  end
203
- end
204
- end
205
195
 
206
- threads.each do |thread|
207
- begin
208
- thread.join
209
- rescue => e
210
- $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
196
+ threads.each do |thread|
197
+ thread.join
198
+ rescue StandardError => e
199
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
200
+ end
211
201
  end
212
202
  end
213
203
  end
214
204
  end
215
205
 
216
206
  Riemann::Tools::DockerHealth.run
217
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/package_task'
3
5
  require 'rdoc/task'