riemann-tools-fb 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 25dce28ceece7fa46dec1e11f34c25cd635755da
4
+ data.tar.gz: d9a177ddf05988aa078e2af1e5cd715487791856
5
+ SHA512:
6
+ metadata.gz: 81e9be74305a81fc5befef9f73afd37e34146a3a9ce117f88f398322226175a6d1742d8cda90bf250689961fed69cd710b2337a1a2e404cc9df577fec7d9a1f7
7
+ data.tar.gz: b1c56956e9f9dce4c891a59fba51a5769c38ce327a16c320737f4eb64bca7a0ae637d747fa00c1b17a9b65e81f8893c08e23988ee731e1a0955cee99cb6cfec6
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,18 @@
1
+ Riemann Tools
2
+ =============
3
+
4
+ Tiny programs to submit events to Riemann.
5
+
6
+ Riemann-health, for example, submits events about the current CPU, load,
7
+ memory, and disk use. Bench submits randomly distributed metrics for load
8
+ testing. I've got a whole bunch of these internally for monitoring Redis, Riak,
9
+ queues, etc. Most have internal configuration dependencies, so it'll be a while
10
+ before I can extract them for re-use.
11
+
12
+ Get started
13
+ ==========
14
+
15
+ ``` bash
16
+ gem install riemann-tools
17
+ riemann-health --host my.riemann.server
18
+ ```
data/bin/riemann-bench ADDED
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Connects to a server (first arg) and populates it with a constant stream of
4
+ # events for testing.
5
+
6
+ require 'rubygems'
7
+ require 'riemann/client'
8
+ require 'pp'
9
+
10
+ class Riemann::Bench
11
+ attr_accessor :client, :hosts, :services, :states
12
+ def initialize
13
+ @hosts = [nil] + (0...10).map { |i| "host#{i}" }
14
+ @hosts = %w(a b c d e f g h i j)
15
+ @services = %w(test1 test2 test3 foo bar baz xyzzy attack cat treat)
16
+ @states = {}
17
+ @client = Riemann::Client.new(:host => (ARGV.first || 'localhost'))
18
+ end
19
+
20
+ def evolve(state)
21
+ m = state[:metric] + (rand - 0.5) * 0.1
22
+ m = [[0,m].max, 1].min
23
+
24
+ s = case m
25
+ when 0...0.75
26
+ 'ok'
27
+ when 0.75...0.9
28
+ 'warning'
29
+ when 0.9..1.0
30
+ 'critical'
31
+ end
32
+
33
+ {
34
+ :metric => m,
35
+ :state => s,
36
+ :host => state[:host],
37
+ :service => state[:service],
38
+ :description => "at #{Time.now}"
39
+ }
40
+ end
41
+
42
+ def tick
43
+ # pp @states
44
+ hosts.product(services).each do |id|
45
+ client << (states[id] = evolve(states[id]))
46
+ end
47
+ end
48
+
49
+ def run
50
+ start
51
+ loop do
52
+ sleep 0.05
53
+ tick
54
+ end
55
+ end
56
+
57
+ def start
58
+ hosts.product(services).each do |host, service|
59
+ states[[host, service]] = {
60
+ :metric => 0.5,
61
+ :state => 'ok',
62
+ :description => "Starting up",
63
+ :host => host,
64
+ :service => service
65
+ }
66
+ end
67
+ end
68
+ end
69
+
70
+ Riemann::Bench.new.run
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ require 'rubygems'
4
+ require 'riemann/tools'
5
+
6
+ class Riemann::Tools::Diskstats
7
+ include Riemann::Tools
8
+
9
+ opt :devices, "Devices to monitor", :type => :strings, :default => nil
10
+ opt :ignore_devices, "Devices to ignore", :type => :strings, :default =>nil
11
+
12
+ def initialize
13
+ @old_state = nil
14
+ end
15
+
16
+ def state
17
+ f = File.read('/proc/diskstats')
18
+ state = f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |s, line|
19
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d\-]+) (.*)$/
20
+ dev = $1
21
+
22
+ ['reads reqs',
23
+ 'reads merged',
24
+ 'reads sector',
25
+ 'reads time',
26
+ 'writes reqs',
27
+ 'writes merged',
28
+ 'writes sector',
29
+ 'writes time',
30
+ 'io reqs',
31
+ 'io time',
32
+ 'io weighted'
33
+ ].map do |service|
34
+ "#{dev} #{service}"
35
+ end.zip(
36
+ $2.split(/\s+/).map { |str| str.to_i }
37
+ ).each do |service, value|
38
+ s[service] = value
39
+ end
40
+ end
41
+
42
+ s
43
+ end
44
+
45
+ # Filter interfaces
46
+ if is = opts[:devices]
47
+ state = state.select do |service, value|
48
+ is.include? service.split(' ').first
49
+ end
50
+ end
51
+
52
+ if ign = opts[:ignore_devices]
53
+ state = state.reject do |service, value|
54
+ ign.include? service.split(' ').first
55
+ end
56
+ end
57
+
58
+ state
59
+ end
60
+
61
+ def tick
62
+ state = self.state
63
+
64
+ if @old_state
65
+ state.each do |service, metric|
66
+ delta = metric - @old_state[service]
67
+
68
+ report(
69
+ :service => "diskstats " + service,
70
+ :metric => (delta.to_f / opts[:interval]),
71
+ :state => "ok"
72
+ )
73
+
74
+ if service =~ /io time$/
75
+ report(:service => "diskstats " + service.gsub(/time/, 'util'),
76
+ :metric => (delta.to_f / (opts[:interval]*1000)),
77
+ :state => "ok")
78
+ end
79
+ end
80
+ end
81
+
82
+ @old_state = state
83
+ end
84
+ end
85
+
86
+ Riemann::Tools::Diskstats.run
data/bin/riemann-fd ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current file descriptor use to riemann.
4
+ # By default reports the total system fd usage, can also report usage of individual processes
5
+
6
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
7
+
8
+ class Riemann::Tools::Health
9
+ include Riemann::Tools
10
+
11
+ opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
12
+ opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
13
+ opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
14
+ opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
15
+ opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
16
+
17
+ def initialize
18
+ @limits = {
19
+ :fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
20
+ :process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
21
+ }
22
+ ostype = `uname -s`.chomp.downcase
23
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
24
+ @fd = method :linux_fd
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def linux_fd
37
+ sys_used = Integer(`lsof | wc -l`)
38
+ if sys_used > @limits[:fd][:critical]
39
+ alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
40
+ elsif sys_used > @limits[:fd][:warning]
41
+ alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
42
+ else
43
+ alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
44
+ end
45
+
46
+ unless opts[:processes].nil?
47
+ opts[:processes].each do |process|
48
+ used = Integer(`lsof -p #{process} | wc -l`)
49
+ name, pid = `ps axo comm,pid | grep -w #{process}`.split
50
+ if used > @limits[:process][:critical]
51
+ alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
52
+ elsif used > @limits[:process][:warning]
53
+ alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
54
+ else
55
+ alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def tick
62
+ @fd.call
63
+ end
64
+ end
65
+
66
+ Riemann::Tools::Health.run
@@ -0,0 +1,270 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current CPU, disk, load average, and memory use to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Health
8
+ include Riemann::Tools
9
+
10
+ opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
11
+ opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
12
+ opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
13
+ opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
14
+ opt :load_warning, "Load warning threshold (load average / core)", :default => 3
15
+ opt :load_critical, "Load critical threshold (load average / core)", :default => 8
16
+ opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
17
+ opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
18
+ opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
19
+
20
+ def initialize
21
+ @limits = {
22
+ :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
23
+ :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
24
+ :load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
25
+ :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
26
+ }
27
+ case (ostype = `uname -s`.chomp.downcase)
28
+ when 'darwin'
29
+ @cores = `sysctl -n hw.ncpu`.to_i
30
+ @cpu = method :darwin_cpu
31
+ @disk = method :disk
32
+ @load = method :darwin_load
33
+ @memory = method :darwin_memory
34
+ darwin_top
35
+ when 'freebsd'
36
+ @cores = `sysctl -n hw.ncpu`.to_i
37
+ @cpu = method :freebsd_cpu
38
+ @disk = method :disk
39
+ @load = method :freebsd_load
40
+ @memory = method :freebsd_memory
41
+ else
42
+ @cores = cores
43
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
44
+ @cpu = method :linux_cpu
45
+ @disk = method :disk
46
+ @load = method :linux_load
47
+ @memory = method :linux_memory
48
+ end
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when "disk"
53
+ @disk_enabled = true
54
+ when "load"
55
+ @load_enabled = true
56
+ when "cpu"
57
+ @cpu_enabled = true
58
+ when "memory"
59
+ @memory_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(service, state, metric, description)
65
+ report(
66
+ :service => service.to_s,
67
+ :state => state.to_s,
68
+ :metric => metric.to_f,
69
+ :description => description
70
+ )
71
+ end
72
+
73
+ def cores
74
+ i = 0;
75
+ File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
76
+ physical_id = p[/physical id\s+:\s+(\d+)/, 1]
77
+ core_id = p[/core id\s+:\s+(\d+)/, 1]
78
+ if physical_id and core_id
79
+ cores["#{physical_id}:#{core_id}"] = true
80
+ elsif physical_id
81
+ cores["#{physical_id}:"] = true
82
+ else
83
+ cores[i += 1] = true;
84
+ end
85
+
86
+ cores
87
+ end.size
88
+ end
89
+
90
+ def report_pct(service, fraction, report)
91
+ if fraction
92
+ if fraction > @limits[service][:critical]
93
+ alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
94
+ elsif fraction > @limits[service][:warning]
95
+ alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
96
+ else
97
+ alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
98
+ end
99
+ end
100
+ end
101
+
102
+ def linux_cpu
103
+ new = File.read('/proc/stat')
104
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
105
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
106
+ return false
107
+ end
108
+ u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
109
+
110
+ if @old_cpu
111
+ u1, n1, s1, i1 = @old_cpu
112
+
113
+ used = (u2+n2+s2) - (u1+n1+s1)
114
+ total = used + i2-i1
115
+ fraction = used.to_f / total
116
+
117
+ report_pct :cpu, fraction, "user+nice+sytem\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
118
+ end
119
+
120
+ @old_cpu = [u2, n2, s2, i2]
121
+ end
122
+
123
+ def linux_load
124
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
125
+ if load > @limits[:load][:critical]
126
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
127
+ elsif load > @limits[:load][:warning]
128
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
129
+ else
130
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
131
+ end
132
+ end
133
+
134
+ def linux_memory
135
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
136
+ x = line.split(/:?\s+/)
137
+ # Assume kB...
138
+ info[x[0]] = x[1].to_i
139
+ info
140
+ }
141
+
142
+ free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
143
+ total = m['MemTotal'].to_i
144
+ fraction = 1 - (free.to_f / total)
145
+
146
+ report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
147
+ end
148
+
149
+ def freebsd_cpu
150
+ u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
151
+
152
+ if @old_cpu
153
+ u1, n1, s1, t1, i1 = @old_cpu
154
+
155
+ used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
156
+ total = used + i2-i1
157
+ fraction = used.to_f / total
158
+
159
+ report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
160
+ end
161
+
162
+ @old_cpu = [u2, n2, s2, t2, i2]
163
+ end
164
+
165
+ def freebsd_load
166
+ m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
167
+ load = m[0].to_f / @cores
168
+ if load > @limits[:load][:critical]
169
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
170
+ elsif load > @limits[:load][:warning]
171
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
172
+ else
173
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
174
+ end
175
+ end
176
+
177
+ def freebsd_memory
178
+ meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
179
+ fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
180
+
181
+ report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
182
+ end
183
+
184
+ def darwin_top
185
+ raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
186
+ @topdata = {:stamp => Time.now.to_i }
187
+ raw.each_line do |ln|
188
+ if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
189
+ @topdata[:load] = $1.to_f
190
+ elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
191
+ @topdata[:cpu] = 1 - ($1.to_f / 100)
192
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
193
+ wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
194
+ active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
195
+ inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
196
+ used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
197
+ free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
198
+ @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
199
+ end
200
+ end
201
+ end
202
+
203
+ def darwin_cpu
204
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
205
+ unless @topdata[:cpu]
206
+ alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
207
+ return false
208
+ end
209
+ report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
210
+ end
211
+
212
+ def darwin_load
213
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
214
+ unless @topdata[:load]
215
+ alert 'load', :unknown, nil, "unable to get load ave from top"
216
+ return false
217
+ end
218
+ metric = @topdata[:load] / @cores
219
+ if metric > @limits[:load][:critical]
220
+ alert "load", :critical, metric, "1-minute load average per core is #{metric}"
221
+ elsif metric > @limits[:load][:warning]
222
+ alert "load", :warning, metric, "1-minute load average per core is #{metric}"
223
+ else
224
+ alert "load", :ok, metric, "1-minute load average per core is #{metric}"
225
+ end
226
+ end
227
+
228
+ def darwin_memory
229
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
230
+ unless @topdata[:memory]
231
+ alert 'memory', :unknown, nil, "unable to get memory data from top"
232
+ return false
233
+ end
234
+ report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
235
+ end
236
+
237
+ def disk
238
+ `df -P`.split(/\n/).each do |r|
239
+ f = r.split(/\s+/)
240
+ next unless f[0] =~ /^\//
241
+ next if f[0] == 'Filesystem'
242
+ x = f[4].to_f/100
243
+
244
+ if x > @limits[:disk][:critical]
245
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
246
+ elsif x > @limits[:disk][:warning]
247
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
248
+ else
249
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
250
+ end
251
+ end
252
+ end
253
+
254
+ def tick
255
+ if @cpu_enabled
256
+ @cpu.call
257
+ end
258
+ if @memory_enabled
259
+ @memory.call
260
+ end
261
+ if @disk_enabled
262
+ @disk.call
263
+ end
264
+ if @load_enabled
265
+ @load.call
266
+ end
267
+ end
268
+ end
269
+
270
+ Riemann::Tools::Health.run
data/bin/riemann-net ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers net statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Net
8
+ include Riemann::Tools
9
+
10
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
11
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
12
+
13
+ def initialize
14
+ @old_state = nil
15
+ @interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
16
+ @ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
17
+ end
18
+
19
+ def state
20
+ f = File.read('/proc/net/dev')
21
+ state = f.split("\n").inject({}) do |s, line|
22
+ if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
23
+ iface = $1
24
+
25
+ ['rx bytes',
26
+ 'rx packets',
27
+ 'rx errs',
28
+ 'rx drop',
29
+ 'rx fifo',
30
+ 'rx frame',
31
+ 'rx compressed',
32
+ 'rx multicast',
33
+ 'tx bytes',
34
+ 'tx packets',
35
+ 'tx errs',
36
+ 'tx drops',
37
+ 'tx fifo',
38
+ 'tx colls',
39
+ 'tx carrier',
40
+ 'tx compressed'].map do |service|
41
+ "#{iface} #{service}"
42
+ end.zip(
43
+ $2.split(/\s+/).map { |str| str.to_i }
44
+ ).each do |service, value|
45
+ s[service] = value
46
+ end
47
+ end
48
+
49
+ s
50
+ end
51
+
52
+ # Filter interfaces
53
+ if is = @interfaces
54
+ state = state.select do |service, value|
55
+ is.include? service.split(' ').first
56
+ end
57
+ end
58
+
59
+ state = state.reject do |service, value|
60
+ @ignore_interfaces.include? service.split(' ').first
61
+ end
62
+
63
+ state
64
+ end
65
+
66
+ def tick
67
+ state = self.state
68
+
69
+ if @old_state
70
+ state.each do |service, metric|
71
+ delta = metric - @old_state[service]
72
+ svc_state = case service
73
+ when /drop$/
74
+ if metric > 0
75
+ 'warning'
76
+ else
77
+ 'ok'
78
+ end
79
+ when /errs$/
80
+ if metric > 0
81
+ 'warning'
82
+ else
83
+ 'ok'
84
+ end
85
+ else
86
+ 'ok'
87
+ end
88
+
89
+ report(
90
+ :service => service.dup,
91
+ :metric => (delta.to_f / opts[:interval]),
92
+ :state => svc_state
93
+ )
94
+ end
95
+ end
96
+
97
+ @old_state = state
98
+ end
99
+ end
100
+
101
+ Riemann::Tools::Net.run
data/bin/riemann-proc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Proc
8
+ include Riemann::Tools
9
+
10
+ opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
11
+ opt :proc_min_critical, "running process count minimum", :default => 1
12
+ opt :proc_max_critical, "running process count maximum", :default => 1
13
+
14
+ def initialize
15
+ @limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
16
+
17
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
18
+
19
+ ostype = `uname -s`.chomp.downcase
20
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
21
+ @check = method :linux_proc
22
+ end
23
+
24
+ def alert(service, state, metric, description)
25
+ report(
26
+ :service => service.to_s,
27
+ :state => state.to_s,
28
+ :metric => metric.to_f,
29
+ :description => description
30
+ )
31
+ end
32
+
33
+ def linux_proc
34
+ process = opts[:proc_regex]
35
+ running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
36
+ if running > @limits[:critical][:max] or running < @limits[:critical][:min]
37
+ alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
38
+ else
39
+ alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
40
+ end
41
+ end
42
+
43
+ def tick
44
+ @check.call
45
+ end
46
+ end
47
+
48
+ Riemann::Tools::Proc.run
@@ -0,0 +1,132 @@
1
+ module Riemann
2
+ module Tools
3
+ require 'rubygems'
4
+ require 'trollop'
5
+ require 'riemann/client'
6
+ require 'timeout'
7
+
8
+ def self.included(base)
9
+ base.instance_eval do
10
+ def run
11
+ new.run
12
+ end
13
+
14
+ def opt(*a)
15
+ a.unshift :opt
16
+ @opts ||= []
17
+ @opts << a
18
+ end
19
+
20
+ def options
21
+ p = Trollop::Parser.new
22
+ @opts.each do |o|
23
+ p.send *o
24
+ end
25
+ Trollop::with_standard_exception_handling(p) do
26
+ p.parse ARGV
27
+ end
28
+ end
29
+
30
+ opt :host, "Riemann host", :default => '127.0.0.1'
31
+ opt :port, "Riemann port", :default => 5555
32
+ opt :event_host, "Event hostname", :type => String
33
+ opt :interval, "Seconds between updates", :default => 5
34
+ opt :tag, "Tag to add to events", :type => String, :multi => true
35
+ opt :ttl, "TTL for events", :type => Integer
36
+ opt :attribute, "Attribute to add to the event", :type => String, :multi => true
37
+ opt :timeout, "Timeout (in seconds) when waiting for acknowledgements", :default => 30
38
+ opt :tcp, "Use TCP transport instead of UDP (improves reliability, slight overhead.", :default => true
39
+ opt :daemon, "Run in background", :default => false
40
+ opt :logfile, "logfile path", :type => String, :default => '/tmp/riemann-tools.log'
41
+ opt :pidfile, "pidfile path", :type => String, :default => '/tmp/riemann-tools.pid'
42
+ end
43
+ end
44
+
45
+ # Returns parsed options (cached) from command line.
46
+ def options
47
+ @options ||= self.class.options
48
+ end
49
+ alias :opts :options
50
+
51
+ def attributes
52
+ @attributes ||= Hash[options[:attribute].map do |attr|
53
+ k,v = attr.split(/=/)
54
+ if k and v
55
+ [k,v]
56
+ end
57
+ end]
58
+ end
59
+
60
+ def report(event)
61
+ if options[:tag]
62
+ # Work around a bug with beefcake which can't take frozen strings.
63
+ event[:tags] = options[:tag].map(&:dup)
64
+ end
65
+
66
+ event[:ttl] ||= (options[:ttl] || (options[:interval] * 2))
67
+
68
+ if options[:event_host]
69
+ event[:host] = options[:event_host].dup
70
+ end
71
+
72
+ event = event.merge(attributes)
73
+
74
+ begin
75
+ Timeout::timeout(options[:timeout]) do
76
+ riemann << event
77
+ end
78
+ rescue Timeout::Error
79
+ riemann.connect
80
+ end
81
+ end
82
+
83
+ def new_riemann_client
84
+ r = Riemann::Client.new(
85
+ :host => options[:host],
86
+ :port => options[:port]
87
+ )
88
+ if options[:tcp]
89
+ r.tcp
90
+ else
91
+ r
92
+ end
93
+ end
94
+
95
+ def riemann
96
+ @riemann ||= new_riemann_client
97
+ end
98
+ alias :r :riemann
99
+
100
+ def run
101
+ daemonize if options[:daemon]
102
+ t0 = Time.now
103
+ loop do
104
+ begin
105
+ tick
106
+ rescue => e
107
+ $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
108
+ end
109
+
110
+ # Sleep.
111
+ sleep(options[:interval] - ((Time.now - t0) % options[:interval]))
112
+ end
113
+ end
114
+
115
+ def daemonize
116
+ exit if fork
117
+ Process.setsid
118
+ $0 = self.class.name.downcase.gsub('::','_')
119
+ $stdout.reopen(opts[:logfile], 'w')
120
+ $stdout.sync = true
121
+ $stderr.reopen($stdout)
122
+ exit if fork
123
+ f = File.new(opts[:pidfile], 'w')
124
+ f.write Process.pid
125
+ f.close
126
+ end
127
+
128
+ def tick
129
+ end
130
+
131
+ end
132
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: riemann-tools-fb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Fede Borgnia
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: riemann-client
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: trollop
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.16.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.16.2
41
+ description: Utilities which submit events to Riemann.
42
+ email: fborgnia@gmail.com
43
+ executables:
44
+ - riemann-bench
45
+ - riemann-diskstats
46
+ - riemann-proc
47
+ - riemann-health
48
+ - riemann-fd
49
+ - riemann-net
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - lib/riemann/tools.rb
54
+ - bin/riemann-bench
55
+ - bin/riemann-diskstats
56
+ - bin/riemann-fd
57
+ - bin/riemann-health
58
+ - bin/riemann-net
59
+ - bin/riemann-proc
60
+ - LICENSE
61
+ - README.markdown
62
+ homepage: https://github.com/fborgnia/riemann-tools
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: 1.9.3
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project: riemann-tools
82
+ rubygems_version: 2.0.3
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Utilities which submit events to Riemann.
86
+ test_files: []