riemann-tools 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +13 -0
  4. data/.github/workflows/codeql-analysis.yml +72 -0
  5. data/.rubocop.yml +32 -0
  6. data/CHANGELOG.md +31 -2
  7. data/README.markdown +8 -24
  8. data/Rakefile +4 -2
  9. data/SECURITY.md +42 -0
  10. data/bin/riemann-apache-status +92 -78
  11. data/bin/riemann-bench +54 -49
  12. data/bin/riemann-cloudant +44 -40
  13. data/bin/riemann-consul +82 -76
  14. data/bin/riemann-dir-files-count +53 -47
  15. data/bin/riemann-dir-space +53 -47
  16. data/bin/riemann-diskstats +78 -75
  17. data/bin/riemann-fd +68 -48
  18. data/bin/riemann-freeswitch +108 -103
  19. data/bin/riemann-haproxy +46 -40
  20. data/bin/riemann-health +4 -343
  21. data/bin/riemann-kvminstance +18 -13
  22. data/bin/riemann-memcached +35 -29
  23. data/bin/riemann-net +4 -104
  24. data/bin/riemann-nginx-status +74 -67
  25. data/bin/riemann-ntp +4 -33
  26. data/bin/riemann-portcheck +40 -31
  27. data/bin/riemann-proc +96 -90
  28. data/bin/riemann-varnish +51 -45
  29. data/bin/riemann-zookeeper +38 -34
  30. data/lib/riemann/tools/health.rb +347 -0
  31. data/lib/riemann/tools/net.rb +104 -0
  32. data/lib/riemann/tools/ntp.rb +41 -0
  33. data/lib/riemann/tools/version.rb +1 -1
  34. data/lib/riemann/tools.rb +37 -40
  35. data/riemann-tools.gemspec +4 -1
  36. data/tools/riemann-aws/{Rakefile.rb → Rakefile} +2 -0
  37. data/tools/riemann-aws/bin/riemann-aws-billing +72 -66
  38. data/tools/riemann-aws/bin/riemann-aws-rds-status +55 -41
  39. data/tools/riemann-aws/bin/riemann-aws-sqs-status +37 -31
  40. data/tools/riemann-aws/bin/riemann-aws-status +63 -51
  41. data/tools/riemann-aws/bin/riemann-elb-metrics +149 -148
  42. data/tools/riemann-aws/bin/riemann-s3-list +70 -65
  43. data/tools/riemann-aws/bin/riemann-s3-status +85 -82
  44. data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +2 -0
  45. data/tools/riemann-chronos/bin/riemann-chronos +136 -119
  46. data/tools/riemann-docker/{Rakefile.rb → Rakefile} +2 -0
  47. data/tools/riemann-docker/bin/riemann-docker +163 -174
  48. data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +2 -0
  49. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +155 -147
  50. data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +2 -0
  51. data/tools/riemann-marathon/bin/riemann-marathon +138 -122
  52. data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +2 -0
  53. data/tools/riemann-mesos/bin/riemann-mesos +125 -110
  54. data/tools/riemann-munin/{Rakefile.rb → Rakefile} +2 -0
  55. data/tools/riemann-munin/bin/riemann-munin +28 -22
  56. data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +2 -0
  57. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +226 -222
  58. data/tools/riemann-riak/{Rakefile.rb → Rakefile} +2 -0
  59. data/tools/riemann-riak/bin/riemann-riak +281 -289
  60. data/tools/riemann-riak/riak_status/riak_status.rb +39 -39
  61. metadata +65 -16
@@ -1,217 +1,206 @@
1
1
  #!/usr/bin/env ruby
2
- Process.setproctitle($0)
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
3
5
 
4
6
  # Reports current CPU, disk, load average, and memory use to riemann.
5
7
 
6
8
  require 'riemann/tools'
7
9
 
8
- class Riemann::Tools::DockerHealth
9
- require 'docker'
10
- require 'socket'
11
- include Riemann::Tools
12
- include Docker
13
-
14
- opt :docker_host, "Docker Container Host (see https://github.com/swipely/docker-api#host)", :type => String, :default => nil
15
- opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
16
- opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
17
- opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
18
- opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
19
- opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
20
- opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
21
- opt :host_hostname, "Suffix of host", :type => String, :default => nil
22
- opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'memory', 'disk', 'basic']
23
-
24
- def get_containers
25
- Docker::Container.all
26
- end
27
-
28
- def get_container_name(container)
29
- container.json['Name'][1..-1]
30
- end
31
-
32
- def initialize
33
-
34
- if (opts[:docker_host] != nil)
35
- Docker.url = opts[:docker_host]
36
- end
37
-
38
- @hostname = opts[:host_hostname]
39
- if (@hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?)
40
- @hostname = Socket.gethostname
41
- end
42
-
43
- @cpu_coefficient = 1000 * 1000 * 1000
44
-
45
- @limits = {
46
- :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
47
- :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
48
- :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
49
- }
50
-
51
- @last_cpu_reads = Hash.new
52
- @last_uptime_reads = Hash.new
53
-
54
- opts[:checks].each do |check|
55
- case check
56
- when 'disk'
57
- @disk_enabled = true
58
- when 'cpu'
59
- @cpu_enabled = true
60
- when 'memory'
61
- @memory_enabled = true
62
- when 'basic'
63
- @basic_inspection_enabled = true
10
+ module Riemann
11
+ module Tools
12
+ class DockerHealth
13
+ require 'docker'
14
+ require 'socket'
15
+ include Riemann::Tools
16
+ include Docker
17
+
18
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
19
+ default: nil
20
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
21
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
22
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
23
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
24
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
25
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
26
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
27
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
28
+
29
+ def containers
30
+ Docker::Container.all
64
31
  end
65
- end
66
- end
67
-
68
- def alert(container, service, state, metric, description)
69
32
 
70
- opts = { :service => service.to_s,
71
- :state => state.to_s,
72
- :metric => metric.to_f,
73
- :description => description }
74
-
75
- if (container != nil)
76
- opts[:host] = "#{@hostname}-#{container}"
77
- else
78
- opts[:host] = @hostname
79
- end
80
-
81
- report(opts)
82
- end
83
-
84
- def report_pct(container, service, fraction, report = '', name = nil)
85
- if fraction
33
+ def get_container_name(container)
34
+ container.json['Name'][1..]
35
+ end
86
36
 
87
- if (name == nil)
88
- name = service
37
+ def initialize
38
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
39
+
40
+ @hostname = opts[:host_hostname]
41
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
42
+
43
+ @cpu_coefficient = 1000 * 1000 * 1000
44
+
45
+ @limits = {
46
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
47
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
48
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
49
+ }
50
+
51
+ @last_cpu_reads = {}
52
+ @last_uptime_reads = {}
53
+
54
+ opts[:checks].each do |check|
55
+ case check
56
+ when 'disk'
57
+ @disk_enabled = true
58
+ when 'cpu'
59
+ @cpu_enabled = true
60
+ when 'memory'
61
+ @memory_enabled = true
62
+ when 'basic'
63
+ @basic_inspection_enabled = true
64
+ end
65
+ end
89
66
  end
90
67
 
91
- if fraction > @limits[service][:critical]
92
- alert container, name, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
93
- elsif fraction > @limits[service][:warning]
94
- alert container, name, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
95
- else
96
- alert container, name, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
68
+ def alert(container, service, state, metric, description)
69
+ opts = {
70
+ service: service.to_s,
71
+ state: state.to_s,
72
+ metric: metric.to_f,
73
+ description: description,
74
+ }
75
+
76
+ opts[:host] = if !container.nil?
77
+ "#{@hostname}-#{container}"
78
+ else
79
+ @hostname
80
+ end
81
+
82
+ report(opts)
97
83
  end
98
- end
99
- end
100
84
 
85
+ def report_pct(container, service, fraction, report = '', name = nil)
86
+ return unless fraction
101
87
 
102
- def cpu(id, name, stats)
88
+ name = service if name.nil?
103
89
 
104
- current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
90
+ if fraction > @limits[service][:critical]
91
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
+ elsif fraction > @limits[service][:warning]
93
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
94
+ else
95
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
96
+ end
97
+ end
105
98
 
106
- unless current
107
- alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
108
- return false
109
- end
99
+ def cpu(id, name, stats)
100
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
110
101
 
111
- current_time = Time.parse(stats['read']);
112
- if (@last_cpu_reads[id] != nil)
113
- last = @last_cpu_reads[id]
114
- used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
102
+ unless current
103
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
104
+ return false
105
+ end
115
106
 
116
- report_pct name, :cpu, used
117
- end
107
+ current_time = Time.parse(stats['read'])
108
+ unless @last_cpu_reads[id].nil?
109
+ last = @last_cpu_reads[id]
110
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
118
111
 
119
- @last_cpu_reads[id] = { v: current, t: current_time }
120
- end
112
+ report_pct name, :cpu, used
113
+ end
121
114
 
122
- def memory(id, name, stats)
123
- memory_stats = stats['memory_stats']
124
- usage = memory_stats['usage'].to_f
125
- total = memory_stats['limit'].to_f
126
- fraction = (usage / total)
115
+ @last_cpu_reads[id] = { v: current, t: current_time }
116
+ end
127
117
 
128
- report_pct name, :memory, fraction, "#{usage} / #{total}"
129
- end
118
+ def memory(_id, name, stats)
119
+ memory_stats = stats['memory_stats']
120
+ usage = memory_stats['usage'].to_f
121
+ total = memory_stats['limit'].to_f
122
+ fraction = (usage / total)
130
123
 
131
- def disk
132
- `df -P`.split(/\n/).each do |r|
133
- f = r.split(/\s+/)
134
- next if f[0] == 'Filesystem'
135
- next unless f[0] =~ /\// # Needs at least one slash in the mount path
124
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
125
+ end
136
126
 
137
- # Calculate capacity
138
- x = f[4].to_f/100
139
- report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
140
- end
141
- end
127
+ def disk
128
+ `df -P`.split(/\n/).each do |r|
129
+ f = r.split(/\s+/)
130
+ next if f[0] == 'Filesystem'
131
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
142
132
 
143
- def basic_inspection(id, name, inspection)
133
+ # Calculate capacity
134
+ x = f[4].to_f / 100
135
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
136
+ end
137
+ end
144
138
 
145
- state = inspection['State']
146
- json_state = JSON.generate(state)
139
+ def basic_inspection(id, name, inspection)
140
+ state = inspection['State']
141
+ json_state = JSON.generate(state)
147
142
 
148
- running = state['Running']
143
+ running = state['Running']
149
144
 
150
- alert(name, "status",
151
- running ? "ok" : "critical",
145
+ alert(
146
+ name, 'status',
147
+ running ? 'ok' : 'critical',
152
148
  running ? 1 : 0,
153
- json_state)
154
-
155
- if (running)
156
- start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
157
- now = DateTime.now.to_time.utc.to_i
158
- uptime = now - start_time
159
-
160
- if (@last_uptime_reads[id] != nil)
161
- last = @last_uptime_reads[id]
162
- restarted = start_time != last
163
- alert(name, "uptime",
164
- restarted ? "critical" : "ok",
165
- uptime,
166
- "last 'StartedAt' measure was #{last} (#{Time.at(last).utc.to_s}), " +
167
- "now it's #{start_time} (#{Time.at(start_time).utc.to_s})")
168
- end
169
-
170
- @last_uptime_reads[id] = start_time
171
- end
172
- end
173
-
174
- def tick
149
+ json_state,
150
+ )
151
+
152
+ return unless running
153
+
154
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
155
+ now = DateTime.now.to_time.utc.to_i
156
+ uptime = now - start_time
157
+
158
+ unless @last_uptime_reads[id].nil?
159
+ last = @last_uptime_reads[id]
160
+ restarted = start_time != last
161
+ alert(
162
+ name, 'uptime',
163
+ restarted ? 'critical' : 'ok',
164
+ uptime,
165
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
166
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
167
+ )
168
+ end
175
169
 
176
- # Disk is the same in every container
177
- if @disk_enabled
178
- disk()
179
- end
170
+ @last_uptime_reads[id] = start_time
171
+ end
180
172
 
181
- # Get CPU, Memory and Load of each container
182
- containers = get_containers()
183
- threads = []
173
+ def tick
174
+ # Disk is the same in every container
175
+ disk if @disk_enabled
184
176
 
185
- containers.each do |ctr|
186
- threads << Thread.new(ctr) do |container|
177
+ # Get CPU, Memory and Load of each container
178
+ threads = []
187
179
 
188
- id = container.id
189
- name = get_container_name(container)
180
+ containers.each do |ctr|
181
+ threads << Thread.new(ctr) do |container|
182
+ id = container.id
183
+ name = get_container_name(container)
190
184
 
191
- stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", {stream:false}))
185
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
192
186
 
193
- if @basic_inspection_enabled
194
- inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
195
- basic_inspection(id, name, inspection)
187
+ if @basic_inspection_enabled
188
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
189
+ basic_inspection(id, name, inspection)
190
+ end
191
+ cpu(id, name, stats) if @cpu_enabled
192
+ memory(id, name, stats) if @memory_enabled
196
193
  end
197
- if @cpu_enabled
198
- cpu(id, name, stats)
199
- end
200
- if @memory_enabled
201
- memory(id, name, stats)
202
194
  end
203
- end
204
- end
205
195
 
206
- threads.each do |thread|
207
- begin
208
- thread.join
209
- rescue => e
210
- $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
196
+ threads.each do |thread|
197
+ thread.join
198
+ rescue StandardError => e
199
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
200
+ end
211
201
  end
212
202
  end
213
203
  end
214
204
  end
215
205
 
216
206
  Riemann::Tools::DockerHealth.run
217
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/package_task'
3
5
  require 'rdoc/task'