riemann-tools 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +13 -0
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.rubocop.yml +32 -0
- data/CHANGELOG.md +31 -2
- data/README.markdown +8 -24
- data/Rakefile +4 -2
- data/SECURITY.md +42 -0
- data/bin/riemann-apache-status +92 -78
- data/bin/riemann-bench +54 -49
- data/bin/riemann-cloudant +44 -40
- data/bin/riemann-consul +82 -76
- data/bin/riemann-dir-files-count +53 -47
- data/bin/riemann-dir-space +53 -47
- data/bin/riemann-diskstats +78 -75
- data/bin/riemann-fd +68 -48
- data/bin/riemann-freeswitch +108 -103
- data/bin/riemann-haproxy +46 -40
- data/bin/riemann-health +4 -343
- data/bin/riemann-kvminstance +18 -13
- data/bin/riemann-memcached +35 -29
- data/bin/riemann-net +4 -104
- data/bin/riemann-nginx-status +74 -67
- data/bin/riemann-ntp +4 -33
- data/bin/riemann-portcheck +40 -31
- data/bin/riemann-proc +96 -90
- data/bin/riemann-varnish +51 -45
- data/bin/riemann-zookeeper +38 -34
- data/lib/riemann/tools/health.rb +347 -0
- data/lib/riemann/tools/net.rb +104 -0
- data/lib/riemann/tools/ntp.rb +41 -0
- data/lib/riemann/tools/version.rb +1 -1
- data/lib/riemann/tools.rb +37 -40
- data/riemann-tools.gemspec +4 -1
- data/tools/riemann-aws/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-aws/bin/riemann-aws-billing +72 -66
- data/tools/riemann-aws/bin/riemann-aws-rds-status +55 -41
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +37 -31
- data/tools/riemann-aws/bin/riemann-aws-status +63 -51
- data/tools/riemann-aws/bin/riemann-elb-metrics +149 -148
- data/tools/riemann-aws/bin/riemann-s3-list +70 -65
- data/tools/riemann-aws/bin/riemann-s3-status +85 -82
- data/tools/riemann-chronos/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-chronos/bin/riemann-chronos +136 -119
- data/tools/riemann-docker/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-docker/bin/riemann-docker +163 -174
- data/tools/riemann-elasticsearch/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +155 -147
- data/tools/riemann-marathon/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-marathon/bin/riemann-marathon +138 -122
- data/tools/riemann-mesos/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-mesos/bin/riemann-mesos +125 -110
- data/tools/riemann-munin/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-munin/bin/riemann-munin +28 -22
- data/tools/riemann-rabbitmq/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +226 -222
- data/tools/riemann-riak/{Rakefile.rb → Rakefile} +2 -0
- data/tools/riemann-riak/bin/riemann-riak +281 -289
- data/tools/riemann-riak/riak_status/riak_status.rb +39 -39
- metadata +65 -16
@@ -1,217 +1,206 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
3
5
|
|
4
6
|
# Reports current CPU, disk, load average, and memory use to riemann.
|
5
7
|
|
6
8
|
require 'riemann/tools'
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
container.json['Name'][1..-1]
|
30
|
-
end
|
31
|
-
|
32
|
-
def initialize
|
33
|
-
|
34
|
-
if (opts[:docker_host] != nil)
|
35
|
-
Docker.url = opts[:docker_host]
|
36
|
-
end
|
37
|
-
|
38
|
-
@hostname = opts[:host_hostname]
|
39
|
-
if (@hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?)
|
40
|
-
@hostname = Socket.gethostname
|
41
|
-
end
|
42
|
-
|
43
|
-
@cpu_coefficient = 1000 * 1000 * 1000
|
44
|
-
|
45
|
-
@limits = {
|
46
|
-
:cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
|
47
|
-
:disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
|
48
|
-
:memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
|
49
|
-
}
|
50
|
-
|
51
|
-
@last_cpu_reads = Hash.new
|
52
|
-
@last_uptime_reads = Hash.new
|
53
|
-
|
54
|
-
opts[:checks].each do |check|
|
55
|
-
case check
|
56
|
-
when 'disk'
|
57
|
-
@disk_enabled = true
|
58
|
-
when 'cpu'
|
59
|
-
@cpu_enabled = true
|
60
|
-
when 'memory'
|
61
|
-
@memory_enabled = true
|
62
|
-
when 'basic'
|
63
|
-
@basic_inspection_enabled = true
|
10
|
+
module Riemann
|
11
|
+
module Tools
|
12
|
+
class DockerHealth
|
13
|
+
require 'docker'
|
14
|
+
require 'socket'
|
15
|
+
include Riemann::Tools
|
16
|
+
include Docker
|
17
|
+
|
18
|
+
opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
|
19
|
+
default: nil
|
20
|
+
opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
|
21
|
+
opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
|
22
|
+
opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
|
23
|
+
opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
|
24
|
+
opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
|
25
|
+
opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
|
26
|
+
opt :host_hostname, 'Suffix of host', type: String, default: nil
|
27
|
+
opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
|
28
|
+
|
29
|
+
def containers
|
30
|
+
Docker::Container.all
|
64
31
|
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
def alert(container, service, state, metric, description)
|
69
32
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
:description => description }
|
74
|
-
|
75
|
-
if (container != nil)
|
76
|
-
opts[:host] = "#{@hostname}-#{container}"
|
77
|
-
else
|
78
|
-
opts[:host] = @hostname
|
79
|
-
end
|
80
|
-
|
81
|
-
report(opts)
|
82
|
-
end
|
83
|
-
|
84
|
-
def report_pct(container, service, fraction, report = '', name = nil)
|
85
|
-
if fraction
|
33
|
+
def get_container_name(container)
|
34
|
+
container.json['Name'][1..]
|
35
|
+
end
|
86
36
|
|
87
|
-
|
88
|
-
|
37
|
+
def initialize
|
38
|
+
Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
|
39
|
+
|
40
|
+
@hostname = opts[:host_hostname]
|
41
|
+
@hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
|
42
|
+
|
43
|
+
@cpu_coefficient = 1000 * 1000 * 1000
|
44
|
+
|
45
|
+
@limits = {
|
46
|
+
cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
|
47
|
+
disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
|
48
|
+
memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
|
49
|
+
}
|
50
|
+
|
51
|
+
@last_cpu_reads = {}
|
52
|
+
@last_uptime_reads = {}
|
53
|
+
|
54
|
+
opts[:checks].each do |check|
|
55
|
+
case check
|
56
|
+
when 'disk'
|
57
|
+
@disk_enabled = true
|
58
|
+
when 'cpu'
|
59
|
+
@cpu_enabled = true
|
60
|
+
when 'memory'
|
61
|
+
@memory_enabled = true
|
62
|
+
when 'basic'
|
63
|
+
@basic_inspection_enabled = true
|
64
|
+
end
|
65
|
+
end
|
89
66
|
end
|
90
67
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
68
|
+
def alert(container, service, state, metric, description)
|
69
|
+
opts = {
|
70
|
+
service: service.to_s,
|
71
|
+
state: state.to_s,
|
72
|
+
metric: metric.to_f,
|
73
|
+
description: description,
|
74
|
+
}
|
75
|
+
|
76
|
+
opts[:host] = if !container.nil?
|
77
|
+
"#{@hostname}-#{container}"
|
78
|
+
else
|
79
|
+
@hostname
|
80
|
+
end
|
81
|
+
|
82
|
+
report(opts)
|
97
83
|
end
|
98
|
-
end
|
99
|
-
end
|
100
84
|
|
85
|
+
def report_pct(container, service, fraction, report = '', name = nil)
|
86
|
+
return unless fraction
|
101
87
|
|
102
|
-
|
88
|
+
name = service if name.nil?
|
103
89
|
|
104
|
-
|
90
|
+
if fraction > @limits[service][:critical]
|
91
|
+
alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
92
|
+
elsif fraction > @limits[service][:warning]
|
93
|
+
alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
94
|
+
else
|
95
|
+
alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
96
|
+
end
|
97
|
+
end
|
105
98
|
|
106
|
-
|
107
|
-
|
108
|
-
return false
|
109
|
-
end
|
99
|
+
def cpu(id, name, stats)
|
100
|
+
current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
|
110
101
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
102
|
+
unless current
|
103
|
+
alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
|
104
|
+
return false
|
105
|
+
end
|
115
106
|
|
116
|
-
|
117
|
-
|
107
|
+
current_time = Time.parse(stats['read'])
|
108
|
+
unless @last_cpu_reads[id].nil?
|
109
|
+
last = @last_cpu_reads[id]
|
110
|
+
used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
|
118
111
|
|
119
|
-
|
120
|
-
|
112
|
+
report_pct name, :cpu, used
|
113
|
+
end
|
121
114
|
|
122
|
-
|
123
|
-
|
124
|
-
usage = memory_stats['usage'].to_f
|
125
|
-
total = memory_stats['limit'].to_f
|
126
|
-
fraction = (usage / total)
|
115
|
+
@last_cpu_reads[id] = { v: current, t: current_time }
|
116
|
+
end
|
127
117
|
|
128
|
-
|
129
|
-
|
118
|
+
def memory(_id, name, stats)
|
119
|
+
memory_stats = stats['memory_stats']
|
120
|
+
usage = memory_stats['usage'].to_f
|
121
|
+
total = memory_stats['limit'].to_f
|
122
|
+
fraction = (usage / total)
|
130
123
|
|
131
|
-
|
132
|
-
|
133
|
-
f = r.split(/\s+/)
|
134
|
-
next if f[0] == 'Filesystem'
|
135
|
-
next unless f[0] =~ /\// # Needs at least one slash in the mount path
|
124
|
+
report_pct name, :memory, fraction, "#{usage} / #{total}"
|
125
|
+
end
|
136
126
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
127
|
+
def disk
|
128
|
+
`df -P`.split(/\n/).each do |r|
|
129
|
+
f = r.split(/\s+/)
|
130
|
+
next if f[0] == 'Filesystem'
|
131
|
+
next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
|
142
132
|
|
143
|
-
|
133
|
+
# Calculate capacity
|
134
|
+
x = f[4].to_f / 100
|
135
|
+
report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
|
136
|
+
end
|
137
|
+
end
|
144
138
|
|
145
|
-
|
146
|
-
|
139
|
+
def basic_inspection(id, name, inspection)
|
140
|
+
state = inspection['State']
|
141
|
+
json_state = JSON.generate(state)
|
147
142
|
|
148
|
-
|
143
|
+
running = state['Running']
|
149
144
|
|
150
|
-
|
151
|
-
|
145
|
+
alert(
|
146
|
+
name, 'status',
|
147
|
+
running ? 'ok' : 'critical',
|
152
148
|
running ? 1 : 0,
|
153
|
-
json_state
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
def tick
|
149
|
+
json_state,
|
150
|
+
)
|
151
|
+
|
152
|
+
return unless running
|
153
|
+
|
154
|
+
start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
|
155
|
+
now = DateTime.now.to_time.utc.to_i
|
156
|
+
uptime = now - start_time
|
157
|
+
|
158
|
+
unless @last_uptime_reads[id].nil?
|
159
|
+
last = @last_uptime_reads[id]
|
160
|
+
restarted = start_time != last
|
161
|
+
alert(
|
162
|
+
name, 'uptime',
|
163
|
+
restarted ? 'critical' : 'ok',
|
164
|
+
uptime,
|
165
|
+
"last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
|
166
|
+
"now it's #{start_time} (#{Time.at(start_time).utc})",
|
167
|
+
)
|
168
|
+
end
|
175
169
|
|
176
|
-
|
177
|
-
|
178
|
-
disk()
|
179
|
-
end
|
170
|
+
@last_uptime_reads[id] = start_time
|
171
|
+
end
|
180
172
|
|
181
|
-
|
182
|
-
|
183
|
-
|
173
|
+
def tick
|
174
|
+
# Disk is the same in every container
|
175
|
+
disk if @disk_enabled
|
184
176
|
|
185
|
-
|
186
|
-
|
177
|
+
# Get CPU, Memory and Load of each container
|
178
|
+
threads = []
|
187
179
|
|
188
|
-
|
189
|
-
|
180
|
+
containers.each do |ctr|
|
181
|
+
threads << Thread.new(ctr) do |container|
|
182
|
+
id = container.id
|
183
|
+
name = get_container_name(container)
|
190
184
|
|
191
|
-
|
185
|
+
stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
|
192
186
|
|
193
|
-
|
194
|
-
|
195
|
-
|
187
|
+
if @basic_inspection_enabled
|
188
|
+
inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
|
189
|
+
basic_inspection(id, name, inspection)
|
190
|
+
end
|
191
|
+
cpu(id, name, stats) if @cpu_enabled
|
192
|
+
memory(id, name, stats) if @memory_enabled
|
196
193
|
end
|
197
|
-
if @cpu_enabled
|
198
|
-
cpu(id, name, stats)
|
199
|
-
end
|
200
|
-
if @memory_enabled
|
201
|
-
memory(id, name, stats)
|
202
194
|
end
|
203
|
-
end
|
204
|
-
end
|
205
195
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
196
|
+
threads.each do |thread|
|
197
|
+
thread.join
|
198
|
+
rescue StandardError => e
|
199
|
+
warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
|
200
|
+
end
|
211
201
|
end
|
212
202
|
end
|
213
203
|
end
|
214
204
|
end
|
215
205
|
|
216
206
|
Riemann::Tools::DockerHealth.run
|
217
|
-
|