riemann-tools-fb 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.markdown +18 -0
- data/bin/riemann-bench +70 -0
- data/bin/riemann-diskstats +86 -0
- data/bin/riemann-fd +66 -0
- data/bin/riemann-health +270 -0
- data/bin/riemann-net +101 -0
- data/bin/riemann-proc +48 -0
- data/lib/riemann/tools.rb +132 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 25dce28ceece7fa46dec1e11f34c25cd635755da
|
4
|
+
data.tar.gz: d9a177ddf05988aa078e2af1e5cd715487791856
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 81e9be74305a81fc5befef9f73afd37e34146a3a9ce117f88f398322226175a6d1742d8cda90bf250689961fed69cd710b2337a1a2e404cc9df577fec7d9a1f7
|
7
|
+
data.tar.gz: b1c56956e9f9dce4c891a59fba51a5769c38ce327a16c320737f4eb64bca7a0ae637d747fa00c1b17a9b65e81f8893c08e23988ee731e1a0955cee99cb6cfec6
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2011 Kyle Kingsbury
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
Riemann Tools
|
2
|
+
=============
|
3
|
+
|
4
|
+
Tiny programs to submit events to Riemann.
|
5
|
+
|
6
|
+
Riemann-health, for example, submits events about the current CPU, load,
|
7
|
+
memory, and disk use. Bench submits randomly distributed metrics for load
|
8
|
+
testing. I've got a whole bunch of these internally for monitoring Redis, Riak,
|
9
|
+
queues, etc. Most have internal configuration dependencies, so it'll be a while
|
10
|
+
before I can extract them for re-use.
|
11
|
+
|
12
|
+
Get started
|
13
|
+
==========
|
14
|
+
|
15
|
+
``` bash
|
16
|
+
gem install riemann-tools
|
17
|
+
riemann-health --host my.riemann.server
|
18
|
+
```
|
data/bin/riemann-bench
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Connects to a server (first arg) and populates it with a constant stream of
|
4
|
+
# events for testing.
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'riemann/client'
|
8
|
+
require 'pp'
|
9
|
+
|
10
|
+
class Riemann::Bench
|
11
|
+
attr_accessor :client, :hosts, :services, :states
|
12
|
+
def initialize
|
13
|
+
@hosts = [nil] + (0...10).map { |i| "host#{i}" }
|
14
|
+
@hosts = %w(a b c d e f g h i j)
|
15
|
+
@services = %w(test1 test2 test3 foo bar baz xyzzy attack cat treat)
|
16
|
+
@states = {}
|
17
|
+
@client = Riemann::Client.new(:host => (ARGV.first || 'localhost'))
|
18
|
+
end
|
19
|
+
|
20
|
+
def evolve(state)
|
21
|
+
m = state[:metric] + (rand - 0.5) * 0.1
|
22
|
+
m = [[0,m].max, 1].min
|
23
|
+
|
24
|
+
s = case m
|
25
|
+
when 0...0.75
|
26
|
+
'ok'
|
27
|
+
when 0.75...0.9
|
28
|
+
'warning'
|
29
|
+
when 0.9..1.0
|
30
|
+
'critical'
|
31
|
+
end
|
32
|
+
|
33
|
+
{
|
34
|
+
:metric => m,
|
35
|
+
:state => s,
|
36
|
+
:host => state[:host],
|
37
|
+
:service => state[:service],
|
38
|
+
:description => "at #{Time.now}"
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def tick
|
43
|
+
# pp @states
|
44
|
+
hosts.product(services).each do |id|
|
45
|
+
client << (states[id] = evolve(states[id]))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def run
|
50
|
+
start
|
51
|
+
loop do
|
52
|
+
sleep 0.05
|
53
|
+
tick
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def start
|
58
|
+
hosts.product(services).each do |host, service|
|
59
|
+
states[[host, service]] = {
|
60
|
+
:metric => 0.5,
|
61
|
+
:state => 'ok',
|
62
|
+
:description => "Starting up",
|
63
|
+
:host => host,
|
64
|
+
:service => service
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
Riemann::Bench.new.run
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
require 'rubygems'
|
4
|
+
require 'riemann/tools'
|
5
|
+
|
6
|
+
class Riemann::Tools::Diskstats
|
7
|
+
include Riemann::Tools
|
8
|
+
|
9
|
+
opt :devices, "Devices to monitor", :type => :strings, :default => nil
|
10
|
+
opt :ignore_devices, "Devices to ignore", :type => :strings, :default =>nil
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@old_state = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def state
|
17
|
+
f = File.read('/proc/diskstats')
|
18
|
+
state = f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |s, line|
|
19
|
+
if line =~ /^(?:\s+\d+){2}\s+([\w\d\-]+) (.*)$/
|
20
|
+
dev = $1
|
21
|
+
|
22
|
+
['reads reqs',
|
23
|
+
'reads merged',
|
24
|
+
'reads sector',
|
25
|
+
'reads time',
|
26
|
+
'writes reqs',
|
27
|
+
'writes merged',
|
28
|
+
'writes sector',
|
29
|
+
'writes time',
|
30
|
+
'io reqs',
|
31
|
+
'io time',
|
32
|
+
'io weighted'
|
33
|
+
].map do |service|
|
34
|
+
"#{dev} #{service}"
|
35
|
+
end.zip(
|
36
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
37
|
+
).each do |service, value|
|
38
|
+
s[service] = value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
s
|
43
|
+
end
|
44
|
+
|
45
|
+
# Filter interfaces
|
46
|
+
if is = opts[:devices]
|
47
|
+
state = state.select do |service, value|
|
48
|
+
is.include? service.split(' ').first
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if ign = opts[:ignore_devices]
|
53
|
+
state = state.reject do |service, value|
|
54
|
+
ign.include? service.split(' ').first
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
state
|
59
|
+
end
|
60
|
+
|
61
|
+
def tick
|
62
|
+
state = self.state
|
63
|
+
|
64
|
+
if @old_state
|
65
|
+
state.each do |service, metric|
|
66
|
+
delta = metric - @old_state[service]
|
67
|
+
|
68
|
+
report(
|
69
|
+
:service => "diskstats " + service,
|
70
|
+
:metric => (delta.to_f / opts[:interval]),
|
71
|
+
:state => "ok"
|
72
|
+
)
|
73
|
+
|
74
|
+
if service =~ /io time$/
|
75
|
+
report(:service => "diskstats " + service.gsub(/time/, 'util'),
|
76
|
+
:metric => (delta.to_f / (opts[:interval]*1000)),
|
77
|
+
:state => "ok")
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
@old_state = state
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
Riemann::Tools::Diskstats.run
|
data/bin/riemann-fd
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reports current file descriptor use to riemann.
|
4
|
+
# By default reports the total system fd usage, can also report usage of individual processes
|
5
|
+
|
6
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
7
|
+
|
8
|
+
class Riemann::Tools::Health
|
9
|
+
include Riemann::Tools
|
10
|
+
|
11
|
+
opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
|
12
|
+
opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
|
13
|
+
opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
|
14
|
+
opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
|
15
|
+
opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@limits = {
|
19
|
+
:fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
|
20
|
+
:process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
|
21
|
+
}
|
22
|
+
ostype = `uname -s`.chomp.downcase
|
23
|
+
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
24
|
+
@fd = method :linux_fd
|
25
|
+
end
|
26
|
+
|
27
|
+
def alert(service, state, metric, description)
|
28
|
+
report(
|
29
|
+
:service => service.to_s,
|
30
|
+
:state => state.to_s,
|
31
|
+
:metric => metric.to_f,
|
32
|
+
:description => description
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def linux_fd
|
37
|
+
sys_used = Integer(`lsof | wc -l`)
|
38
|
+
if sys_used > @limits[:fd][:critical]
|
39
|
+
alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
|
40
|
+
elsif sys_used > @limits[:fd][:warning]
|
41
|
+
alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
|
42
|
+
else
|
43
|
+
alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
|
44
|
+
end
|
45
|
+
|
46
|
+
unless opts[:processes].nil?
|
47
|
+
opts[:processes].each do |process|
|
48
|
+
used = Integer(`lsof -p #{process} | wc -l`)
|
49
|
+
name, pid = `ps axo comm,pid | grep -w #{process}`.split
|
50
|
+
if used > @limits[:process][:critical]
|
51
|
+
alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
|
52
|
+
elsif used > @limits[:process][:warning]
|
53
|
+
alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
|
54
|
+
else
|
55
|
+
alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def tick
|
62
|
+
@fd.call
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
Riemann::Tools::Health.run
|
data/bin/riemann-health
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reports current CPU, disk, load average, and memory use to riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Health
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
|
11
|
+
opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
|
12
|
+
opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
|
13
|
+
opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
|
14
|
+
opt :load_warning, "Load warning threshold (load average / core)", :default => 3
|
15
|
+
opt :load_critical, "Load critical threshold (load average / core)", :default => 8
|
16
|
+
opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
|
17
|
+
opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
|
18
|
+
opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@limits = {
|
22
|
+
:cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
|
23
|
+
:disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
|
24
|
+
:load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
|
25
|
+
:memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
|
26
|
+
}
|
27
|
+
case (ostype = `uname -s`.chomp.downcase)
|
28
|
+
when 'darwin'
|
29
|
+
@cores = `sysctl -n hw.ncpu`.to_i
|
30
|
+
@cpu = method :darwin_cpu
|
31
|
+
@disk = method :disk
|
32
|
+
@load = method :darwin_load
|
33
|
+
@memory = method :darwin_memory
|
34
|
+
darwin_top
|
35
|
+
when 'freebsd'
|
36
|
+
@cores = `sysctl -n hw.ncpu`.to_i
|
37
|
+
@cpu = method :freebsd_cpu
|
38
|
+
@disk = method :disk
|
39
|
+
@load = method :freebsd_load
|
40
|
+
@memory = method :freebsd_memory
|
41
|
+
else
|
42
|
+
@cores = cores
|
43
|
+
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
44
|
+
@cpu = method :linux_cpu
|
45
|
+
@disk = method :disk
|
46
|
+
@load = method :linux_load
|
47
|
+
@memory = method :linux_memory
|
48
|
+
end
|
49
|
+
|
50
|
+
opts[:checks].each do |check|
|
51
|
+
case check
|
52
|
+
when "disk"
|
53
|
+
@disk_enabled = true
|
54
|
+
when "load"
|
55
|
+
@load_enabled = true
|
56
|
+
when "cpu"
|
57
|
+
@cpu_enabled = true
|
58
|
+
when "memory"
|
59
|
+
@memory_enabled = true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def alert(service, state, metric, description)
|
65
|
+
report(
|
66
|
+
:service => service.to_s,
|
67
|
+
:state => state.to_s,
|
68
|
+
:metric => metric.to_f,
|
69
|
+
:description => description
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
def cores
|
74
|
+
i = 0;
|
75
|
+
File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
|
76
|
+
physical_id = p[/physical id\s+:\s+(\d+)/, 1]
|
77
|
+
core_id = p[/core id\s+:\s+(\d+)/, 1]
|
78
|
+
if physical_id and core_id
|
79
|
+
cores["#{physical_id}:#{core_id}"] = true
|
80
|
+
elsif physical_id
|
81
|
+
cores["#{physical_id}:"] = true
|
82
|
+
else
|
83
|
+
cores[i += 1] = true;
|
84
|
+
end
|
85
|
+
|
86
|
+
cores
|
87
|
+
end.size
|
88
|
+
end
|
89
|
+
|
90
|
+
def report_pct(service, fraction, report)
|
91
|
+
if fraction
|
92
|
+
if fraction > @limits[service][:critical]
|
93
|
+
alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
|
94
|
+
elsif fraction > @limits[service][:warning]
|
95
|
+
alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
|
96
|
+
else
|
97
|
+
alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def linux_cpu
|
103
|
+
new = File.read('/proc/stat')
|
104
|
+
unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
|
105
|
+
alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
|
106
|
+
return false
|
107
|
+
end
|
108
|
+
u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
|
109
|
+
|
110
|
+
if @old_cpu
|
111
|
+
u1, n1, s1, i1 = @old_cpu
|
112
|
+
|
113
|
+
used = (u2+n2+s2) - (u1+n1+s1)
|
114
|
+
total = used + i2-i1
|
115
|
+
fraction = used.to_f / total
|
116
|
+
|
117
|
+
report_pct :cpu, fraction, "user+nice+sytem\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
|
118
|
+
end
|
119
|
+
|
120
|
+
@old_cpu = [u2, n2, s2, i2]
|
121
|
+
end
|
122
|
+
|
123
|
+
def linux_load
|
124
|
+
load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
|
125
|
+
if load > @limits[:load][:critical]
|
126
|
+
alert "load", :critical, load, "1-minute load average/core is #{load}"
|
127
|
+
elsif load > @limits[:load][:warning]
|
128
|
+
alert "load", :warning, load, "1-minute load average/core is #{load}"
|
129
|
+
else
|
130
|
+
alert "load", :ok, load, "1-minute load average/core is #{load}"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def linux_memory
|
135
|
+
m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
|
136
|
+
x = line.split(/:?\s+/)
|
137
|
+
# Assume kB...
|
138
|
+
info[x[0]] = x[1].to_i
|
139
|
+
info
|
140
|
+
}
|
141
|
+
|
142
|
+
free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
|
143
|
+
total = m['MemTotal'].to_i
|
144
|
+
fraction = 1 - (free.to_f / total)
|
145
|
+
|
146
|
+
report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
|
147
|
+
end
|
148
|
+
|
149
|
+
def freebsd_cpu
|
150
|
+
u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
|
151
|
+
|
152
|
+
if @old_cpu
|
153
|
+
u1, n1, s1, t1, i1 = @old_cpu
|
154
|
+
|
155
|
+
used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
|
156
|
+
total = used + i2-i1
|
157
|
+
fraction = used.to_f / total
|
158
|
+
|
159
|
+
report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
|
160
|
+
end
|
161
|
+
|
162
|
+
@old_cpu = [u2, n2, s2, t2, i2]
|
163
|
+
end
|
164
|
+
|
165
|
+
def freebsd_load
|
166
|
+
m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
|
167
|
+
load = m[0].to_f / @cores
|
168
|
+
if load > @limits[:load][:critical]
|
169
|
+
alert "load", :critical, load, "1-minute load average/core is #{load}"
|
170
|
+
elsif load > @limits[:load][:warning]
|
171
|
+
alert "load", :warning, load, "1-minute load average/core is #{load}"
|
172
|
+
else
|
173
|
+
alert "load", :ok, load, "1-minute load average/core is #{load}"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def freebsd_memory
|
178
|
+
meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
|
179
|
+
fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
|
180
|
+
|
181
|
+
report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
|
182
|
+
end
|
183
|
+
|
184
|
+
def darwin_top
|
185
|
+
raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
|
186
|
+
@topdata = {:stamp => Time.now.to_i }
|
187
|
+
raw.each_line do |ln|
|
188
|
+
if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
|
189
|
+
@topdata[:load] = $1.to_f
|
190
|
+
elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
|
191
|
+
@topdata[:cpu] = 1 - ($1.to_f / 100)
|
192
|
+
elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
|
193
|
+
wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
|
194
|
+
active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
|
195
|
+
inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
|
196
|
+
used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
|
197
|
+
free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
|
198
|
+
@topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def darwin_cpu
|
204
|
+
darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
|
205
|
+
unless @topdata[:cpu]
|
206
|
+
alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
|
207
|
+
return false
|
208
|
+
end
|
209
|
+
report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
|
210
|
+
end
|
211
|
+
|
212
|
+
def darwin_load
|
213
|
+
darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
|
214
|
+
unless @topdata[:load]
|
215
|
+
alert 'load', :unknown, nil, "unable to get load ave from top"
|
216
|
+
return false
|
217
|
+
end
|
218
|
+
metric = @topdata[:load] / @cores
|
219
|
+
if metric > @limits[:load][:critical]
|
220
|
+
alert "load", :critical, metric, "1-minute load average per core is #{metric}"
|
221
|
+
elsif metric > @limits[:load][:warning]
|
222
|
+
alert "load", :warning, metric, "1-minute load average per core is #{metric}"
|
223
|
+
else
|
224
|
+
alert "load", :ok, metric, "1-minute load average per core is #{metric}"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def darwin_memory
|
229
|
+
darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
|
230
|
+
unless @topdata[:memory]
|
231
|
+
alert 'memory', :unknown, nil, "unable to get memory data from top"
|
232
|
+
return false
|
233
|
+
end
|
234
|
+
report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
|
235
|
+
end
|
236
|
+
|
237
|
+
def disk
|
238
|
+
`df -P`.split(/\n/).each do |r|
|
239
|
+
f = r.split(/\s+/)
|
240
|
+
next unless f[0] =~ /^\//
|
241
|
+
next if f[0] == 'Filesystem'
|
242
|
+
x = f[4].to_f/100
|
243
|
+
|
244
|
+
if x > @limits[:disk][:critical]
|
245
|
+
alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
|
246
|
+
elsif x > @limits[:disk][:warning]
|
247
|
+
alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
|
248
|
+
else
|
249
|
+
alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def tick
|
255
|
+
if @cpu_enabled
|
256
|
+
@cpu.call
|
257
|
+
end
|
258
|
+
if @memory_enabled
|
259
|
+
@memory.call
|
260
|
+
end
|
261
|
+
if @disk_enabled
|
262
|
+
@disk.call
|
263
|
+
end
|
264
|
+
if @load_enabled
|
265
|
+
@load.call
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
Riemann::Tools::Health.run
|
data/bin/riemann-net
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gathers net statistics and submits them to Riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Net
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
|
11
|
+
opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@old_state = nil
|
15
|
+
@interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
|
16
|
+
@ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
|
17
|
+
end
|
18
|
+
|
19
|
+
def state
|
20
|
+
f = File.read('/proc/net/dev')
|
21
|
+
state = f.split("\n").inject({}) do |s, line|
|
22
|
+
if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
|
23
|
+
iface = $1
|
24
|
+
|
25
|
+
['rx bytes',
|
26
|
+
'rx packets',
|
27
|
+
'rx errs',
|
28
|
+
'rx drop',
|
29
|
+
'rx fifo',
|
30
|
+
'rx frame',
|
31
|
+
'rx compressed',
|
32
|
+
'rx multicast',
|
33
|
+
'tx bytes',
|
34
|
+
'tx packets',
|
35
|
+
'tx errs',
|
36
|
+
'tx drops',
|
37
|
+
'tx fifo',
|
38
|
+
'tx colls',
|
39
|
+
'tx carrier',
|
40
|
+
'tx compressed'].map do |service|
|
41
|
+
"#{iface} #{service}"
|
42
|
+
end.zip(
|
43
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
44
|
+
).each do |service, value|
|
45
|
+
s[service] = value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
s
|
50
|
+
end
|
51
|
+
|
52
|
+
# Filter interfaces
|
53
|
+
if is = @interfaces
|
54
|
+
state = state.select do |service, value|
|
55
|
+
is.include? service.split(' ').first
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
state = state.reject do |service, value|
|
60
|
+
@ignore_interfaces.include? service.split(' ').first
|
61
|
+
end
|
62
|
+
|
63
|
+
state
|
64
|
+
end
|
65
|
+
|
66
|
+
def tick
|
67
|
+
state = self.state
|
68
|
+
|
69
|
+
if @old_state
|
70
|
+
state.each do |service, metric|
|
71
|
+
delta = metric - @old_state[service]
|
72
|
+
svc_state = case service
|
73
|
+
when /drop$/
|
74
|
+
if metric > 0
|
75
|
+
'warning'
|
76
|
+
else
|
77
|
+
'ok'
|
78
|
+
end
|
79
|
+
when /errs$/
|
80
|
+
if metric > 0
|
81
|
+
'warning'
|
82
|
+
else
|
83
|
+
'ok'
|
84
|
+
end
|
85
|
+
else
|
86
|
+
'ok'
|
87
|
+
end
|
88
|
+
|
89
|
+
report(
|
90
|
+
:service => service.dup,
|
91
|
+
:metric => (delta.to_f / opts[:interval]),
|
92
|
+
:state => svc_state
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@old_state = state
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
Riemann::Tools::Net.run
|
data/bin/riemann-proc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reports running process count to riemann.
|
4
|
+
|
5
|
+
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
+
|
7
|
+
class Riemann::Tools::Proc
|
8
|
+
include Riemann::Tools
|
9
|
+
|
10
|
+
opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
|
11
|
+
opt :proc_min_critical, "running process count minimum", :default => 1
|
12
|
+
opt :proc_max_critical, "running process count maximum", :default => 1
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
|
16
|
+
|
17
|
+
abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
|
18
|
+
|
19
|
+
ostype = `uname -s`.chomp.downcase
|
20
|
+
puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
|
21
|
+
@check = method :linux_proc
|
22
|
+
end
|
23
|
+
|
24
|
+
def alert(service, state, metric, description)
|
25
|
+
report(
|
26
|
+
:service => service.to_s,
|
27
|
+
:state => state.to_s,
|
28
|
+
:metric => metric.to_f,
|
29
|
+
:description => description
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def linux_proc
|
34
|
+
process = opts[:proc_regex]
|
35
|
+
running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
|
36
|
+
if running > @limits[:critical][:max] or running < @limits[:critical][:min]
|
37
|
+
alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
|
38
|
+
else
|
39
|
+
alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tick
|
44
|
+
@check.call
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
Riemann::Tools::Proc.run
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module Riemann
|
2
|
+
module Tools
|
3
|
+
require 'rubygems'
|
4
|
+
require 'trollop'
|
5
|
+
require 'riemann/client'
|
6
|
+
require 'timeout'
|
7
|
+
|
8
|
+
def self.included(base)
|
9
|
+
base.instance_eval do
|
10
|
+
def run
|
11
|
+
new.run
|
12
|
+
end
|
13
|
+
|
14
|
+
def opt(*a)
|
15
|
+
a.unshift :opt
|
16
|
+
@opts ||= []
|
17
|
+
@opts << a
|
18
|
+
end
|
19
|
+
|
20
|
+
def options
|
21
|
+
p = Trollop::Parser.new
|
22
|
+
@opts.each do |o|
|
23
|
+
p.send *o
|
24
|
+
end
|
25
|
+
Trollop::with_standard_exception_handling(p) do
|
26
|
+
p.parse ARGV
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
opt :host, "Riemann host", :default => '127.0.0.1'
|
31
|
+
opt :port, "Riemann port", :default => 5555
|
32
|
+
opt :event_host, "Event hostname", :type => String
|
33
|
+
opt :interval, "Seconds between updates", :default => 5
|
34
|
+
opt :tag, "Tag to add to events", :type => String, :multi => true
|
35
|
+
opt :ttl, "TTL for events", :type => Integer
|
36
|
+
opt :attribute, "Attribute to add to the event", :type => String, :multi => true
|
37
|
+
opt :timeout, "Timeout (in seconds) when waiting for acknowledgements", :default => 30
|
38
|
+
opt :tcp, "Use TCP transport instead of UDP (improves reliability, slight overhead.", :default => true
|
39
|
+
opt :daemon, "Run in background", :default => false
|
40
|
+
opt :logfile, "logfile path", :type => String, :default => '/tmp/riemann-tools.log'
|
41
|
+
opt :pidfile, "pidfile path", :type => String, :default => '/tmp/riemann-tools.pid'
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns parsed options (cached) from command line.
|
46
|
+
def options
|
47
|
+
@options ||= self.class.options
|
48
|
+
end
|
49
|
+
alias :opts :options
|
50
|
+
|
51
|
+
def attributes
|
52
|
+
@attributes ||= Hash[options[:attribute].map do |attr|
|
53
|
+
k,v = attr.split(/=/)
|
54
|
+
if k and v
|
55
|
+
[k,v]
|
56
|
+
end
|
57
|
+
end]
|
58
|
+
end
|
59
|
+
|
60
|
+
def report(event)
|
61
|
+
if options[:tag]
|
62
|
+
# Work around a bug with beefcake which can't take frozen strings.
|
63
|
+
event[:tags] = options[:tag].map(&:dup)
|
64
|
+
end
|
65
|
+
|
66
|
+
event[:ttl] ||= (options[:ttl] || (options[:interval] * 2))
|
67
|
+
|
68
|
+
if options[:event_host]
|
69
|
+
event[:host] = options[:event_host].dup
|
70
|
+
end
|
71
|
+
|
72
|
+
event = event.merge(attributes)
|
73
|
+
|
74
|
+
begin
|
75
|
+
Timeout::timeout(options[:timeout]) do
|
76
|
+
riemann << event
|
77
|
+
end
|
78
|
+
rescue Timeout::Error
|
79
|
+
riemann.connect
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def new_riemann_client
|
84
|
+
r = Riemann::Client.new(
|
85
|
+
:host => options[:host],
|
86
|
+
:port => options[:port]
|
87
|
+
)
|
88
|
+
if options[:tcp]
|
89
|
+
r.tcp
|
90
|
+
else
|
91
|
+
r
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def riemann
|
96
|
+
@riemann ||= new_riemann_client
|
97
|
+
end
|
98
|
+
alias :r :riemann
|
99
|
+
|
100
|
+
def run
|
101
|
+
daemonize if options[:daemon]
|
102
|
+
t0 = Time.now
|
103
|
+
loop do
|
104
|
+
begin
|
105
|
+
tick
|
106
|
+
rescue => e
|
107
|
+
$stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
|
108
|
+
end
|
109
|
+
|
110
|
+
# Sleep.
|
111
|
+
sleep(options[:interval] - ((Time.now - t0) % options[:interval]))
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def daemonize
|
116
|
+
exit if fork
|
117
|
+
Process.setsid
|
118
|
+
$0 = self.class.name.downcase.gsub('::','_')
|
119
|
+
$stdout.reopen(opts[:logfile], 'w')
|
120
|
+
$stdout.sync = true
|
121
|
+
$stderr.reopen($stdout)
|
122
|
+
exit if fork
|
123
|
+
f = File.new(opts[:pidfile], 'w')
|
124
|
+
f.write Process.pid
|
125
|
+
f.close
|
126
|
+
end
|
127
|
+
|
128
|
+
def tick
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: riemann-tools-fb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Fede Borgnia
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: riemann-client
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: trollop
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.16.2
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.16.2
|
41
|
+
description: Utilities which submit events to Riemann.
|
42
|
+
email: fborgnia@gmail.com
|
43
|
+
executables:
|
44
|
+
- riemann-bench
|
45
|
+
- riemann-diskstats
|
46
|
+
- riemann-proc
|
47
|
+
- riemann-health
|
48
|
+
- riemann-fd
|
49
|
+
- riemann-net
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- lib/riemann/tools.rb
|
54
|
+
- bin/riemann-bench
|
55
|
+
- bin/riemann-diskstats
|
56
|
+
- bin/riemann-fd
|
57
|
+
- bin/riemann-health
|
58
|
+
- bin/riemann-net
|
59
|
+
- bin/riemann-proc
|
60
|
+
- LICENSE
|
61
|
+
- README.markdown
|
62
|
+
homepage: https://github.com/fborgnia/riemann-tools
|
63
|
+
licenses:
|
64
|
+
- MIT
|
65
|
+
metadata: {}
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 1.9.3
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubyforge_project: riemann-tools
|
82
|
+
rubygems_version: 2.0.3
|
83
|
+
signing_key:
|
84
|
+
specification_version: 4
|
85
|
+
summary: Utilities which submit events to Riemann.
|
86
|
+
test_files: []
|