riemann-tools-fb 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 25dce28ceece7fa46dec1e11f34c25cd635755da
4
+ data.tar.gz: d9a177ddf05988aa078e2af1e5cd715487791856
5
+ SHA512:
6
+ metadata.gz: 81e9be74305a81fc5befef9f73afd37e34146a3a9ce117f88f398322226175a6d1742d8cda90bf250689961fed69cd710b2337a1a2e404cc9df577fec7d9a1f7
7
+ data.tar.gz: b1c56956e9f9dce4c891a59fba51a5769c38ce327a16c320737f4eb64bca7a0ae637d747fa00c1b17a9b65e81f8893c08e23988ee731e1a0955cee99cb6cfec6
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,18 @@
1
+ Riemann Tools
2
+ =============
3
+
4
+ Tiny programs to submit events to Riemann.
5
+
6
+ Riemann-health, for example, submits events about the current CPU, load,
7
+ memory, and disk use. Bench submits randomly distributed metrics for load
8
+ testing. I've got a whole bunch of these internally for monitoring Redis, Riak,
9
+ queues, etc. Most have internal configuration dependencies, so it'll be a while
10
+ before I can extract them for re-use.
11
+
12
+ Get started
13
+ ==========
14
+
15
+ ``` bash
16
+ gem install riemann-tools
17
+ riemann-health --host my.riemann.server
18
+ ```
data/bin/riemann-bench ADDED
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Connects to a server (first arg) and populates it with a constant stream of
4
+ # events for testing.
5
+
6
+ require 'rubygems'
7
+ require 'riemann/client'
8
+ require 'pp'
9
+
10
+ class Riemann::Bench
11
+ attr_accessor :client, :hosts, :services, :states
12
+ def initialize
13
+ @hosts = [nil] + (0...10).map { |i| "host#{i}" }
14
+ @hosts = %w(a b c d e f g h i j)
15
+ @services = %w(test1 test2 test3 foo bar baz xyzzy attack cat treat)
16
+ @states = {}
17
+ @client = Riemann::Client.new(:host => (ARGV.first || 'localhost'))
18
+ end
19
+
20
+ def evolve(state)
21
+ m = state[:metric] + (rand - 0.5) * 0.1
22
+ m = [[0,m].max, 1].min
23
+
24
+ s = case m
25
+ when 0...0.75
26
+ 'ok'
27
+ when 0.75...0.9
28
+ 'warning'
29
+ when 0.9..1.0
30
+ 'critical'
31
+ end
32
+
33
+ {
34
+ :metric => m,
35
+ :state => s,
36
+ :host => state[:host],
37
+ :service => state[:service],
38
+ :description => "at #{Time.now}"
39
+ }
40
+ end
41
+
42
+ def tick
43
+ # pp @states
44
+ hosts.product(services).each do |id|
45
+ client << (states[id] = evolve(states[id]))
46
+ end
47
+ end
48
+
49
+ def run
50
+ start
51
+ loop do
52
+ sleep 0.05
53
+ tick
54
+ end
55
+ end
56
+
57
+ def start
58
+ hosts.product(services).each do |host, service|
59
+ states[[host, service]] = {
60
+ :metric => 0.5,
61
+ :state => 'ok',
62
+ :description => "Starting up",
63
+ :host => host,
64
+ :service => service
65
+ }
66
+ end
67
+ end
68
+ end
69
+
70
+ Riemann::Bench.new.run
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ require 'rubygems'
4
+ require 'riemann/tools'
5
+
6
+ class Riemann::Tools::Diskstats
7
+ include Riemann::Tools
8
+
9
+ opt :devices, "Devices to monitor", :type => :strings, :default => nil
10
+ opt :ignore_devices, "Devices to ignore", :type => :strings, :default =>nil
11
+
12
+ def initialize
13
+ @old_state = nil
14
+ end
15
+
16
+ def state
17
+ f = File.read('/proc/diskstats')
18
+ state = f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |s, line|
19
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d\-]+) (.*)$/
20
+ dev = $1
21
+
22
+ ['reads reqs',
23
+ 'reads merged',
24
+ 'reads sector',
25
+ 'reads time',
26
+ 'writes reqs',
27
+ 'writes merged',
28
+ 'writes sector',
29
+ 'writes time',
30
+ 'io reqs',
31
+ 'io time',
32
+ 'io weighted'
33
+ ].map do |service|
34
+ "#{dev} #{service}"
35
+ end.zip(
36
+ $2.split(/\s+/).map { |str| str.to_i }
37
+ ).each do |service, value|
38
+ s[service] = value
39
+ end
40
+ end
41
+
42
+ s
43
+ end
44
+
45
+ # Filter interfaces
46
+ if is = opts[:devices]
47
+ state = state.select do |service, value|
48
+ is.include? service.split(' ').first
49
+ end
50
+ end
51
+
52
+ if ign = opts[:ignore_devices]
53
+ state = state.reject do |service, value|
54
+ ign.include? service.split(' ').first
55
+ end
56
+ end
57
+
58
+ state
59
+ end
60
+
61
+ def tick
62
+ state = self.state
63
+
64
+ if @old_state
65
+ state.each do |service, metric|
66
+ delta = metric - @old_state[service]
67
+
68
+ report(
69
+ :service => "diskstats " + service,
70
+ :metric => (delta.to_f / opts[:interval]),
71
+ :state => "ok"
72
+ )
73
+
74
+ if service =~ /io time$/
75
+ report(:service => "diskstats " + service.gsub(/time/, 'util'),
76
+ :metric => (delta.to_f / (opts[:interval]*1000)),
77
+ :state => "ok")
78
+ end
79
+ end
80
+ end
81
+
82
+ @old_state = state
83
+ end
84
+ end
85
+
86
+ Riemann::Tools::Diskstats.run
data/bin/riemann-fd ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current file descriptor use to riemann.
4
+ # By default reports the total system fd usage, can also report usage of individual processes
5
+
6
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
7
+
8
+ class Riemann::Tools::Health
9
+ include Riemann::Tools
10
+
11
+ opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
12
+ opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
13
+ opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
14
+ opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
15
+ opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
16
+
17
+ def initialize
18
+ @limits = {
19
+ :fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
20
+ :process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
21
+ }
22
+ ostype = `uname -s`.chomp.downcase
23
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
24
+ @fd = method :linux_fd
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def linux_fd
37
+ sys_used = Integer(`lsof | wc -l`)
38
+ if sys_used > @limits[:fd][:critical]
39
+ alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
40
+ elsif sys_used > @limits[:fd][:warning]
41
+ alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
42
+ else
43
+ alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
44
+ end
45
+
46
+ unless opts[:processes].nil?
47
+ opts[:processes].each do |process|
48
+ used = Integer(`lsof -p #{process} | wc -l`)
49
+ name, pid = `ps axo comm,pid | grep -w #{process}`.split
50
+ if used > @limits[:process][:critical]
51
+ alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
52
+ elsif used > @limits[:process][:warning]
53
+ alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
54
+ else
55
+ alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def tick
62
+ @fd.call
63
+ end
64
+ end
65
+
66
+ Riemann::Tools::Health.run
@@ -0,0 +1,270 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current CPU, disk, load average, and memory use to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Health
8
+ include Riemann::Tools
9
+
10
+ opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
11
+ opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
12
+ opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
13
+ opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
14
+ opt :load_warning, "Load warning threshold (load average / core)", :default => 3
15
+ opt :load_critical, "Load critical threshold (load average / core)", :default => 8
16
+ opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
17
+ opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
18
+ opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
19
+
20
+ def initialize
21
+ @limits = {
22
+ :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
23
+ :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
24
+ :load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
25
+ :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
26
+ }
27
+ case (ostype = `uname -s`.chomp.downcase)
28
+ when 'darwin'
29
+ @cores = `sysctl -n hw.ncpu`.to_i
30
+ @cpu = method :darwin_cpu
31
+ @disk = method :disk
32
+ @load = method :darwin_load
33
+ @memory = method :darwin_memory
34
+ darwin_top
35
+ when 'freebsd'
36
+ @cores = `sysctl -n hw.ncpu`.to_i
37
+ @cpu = method :freebsd_cpu
38
+ @disk = method :disk
39
+ @load = method :freebsd_load
40
+ @memory = method :freebsd_memory
41
+ else
42
+ @cores = cores
43
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
44
+ @cpu = method :linux_cpu
45
+ @disk = method :disk
46
+ @load = method :linux_load
47
+ @memory = method :linux_memory
48
+ end
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when "disk"
53
+ @disk_enabled = true
54
+ when "load"
55
+ @load_enabled = true
56
+ when "cpu"
57
+ @cpu_enabled = true
58
+ when "memory"
59
+ @memory_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(service, state, metric, description)
65
+ report(
66
+ :service => service.to_s,
67
+ :state => state.to_s,
68
+ :metric => metric.to_f,
69
+ :description => description
70
+ )
71
+ end
72
+
73
+ def cores
74
+ i = 0;
75
+ File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
76
+ physical_id = p[/physical id\s+:\s+(\d+)/, 1]
77
+ core_id = p[/core id\s+:\s+(\d+)/, 1]
78
+ if physical_id and core_id
79
+ cores["#{physical_id}:#{core_id}"] = true
80
+ elsif physical_id
81
+ cores["#{physical_id}:"] = true
82
+ else
83
+ cores[i += 1] = true;
84
+ end
85
+
86
+ cores
87
+ end.size
88
+ end
89
+
90
+ def report_pct(service, fraction, report)
91
+ if fraction
92
+ if fraction > @limits[service][:critical]
93
+ alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
94
+ elsif fraction > @limits[service][:warning]
95
+ alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
96
+ else
97
+ alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
98
+ end
99
+ end
100
+ end
101
+
102
+ def linux_cpu
103
+ new = File.read('/proc/stat')
104
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
105
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
106
+ return false
107
+ end
108
+ u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
109
+
110
+ if @old_cpu
111
+ u1, n1, s1, i1 = @old_cpu
112
+
113
+ used = (u2+n2+s2) - (u1+n1+s1)
114
+ total = used + i2-i1
115
+ fraction = used.to_f / total
116
+
117
+ report_pct :cpu, fraction, "user+nice+sytem\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
118
+ end
119
+
120
+ @old_cpu = [u2, n2, s2, i2]
121
+ end
122
+
123
+ def linux_load
124
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
125
+ if load > @limits[:load][:critical]
126
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
127
+ elsif load > @limits[:load][:warning]
128
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
129
+ else
130
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
131
+ end
132
+ end
133
+
134
+ def linux_memory
135
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
136
+ x = line.split(/:?\s+/)
137
+ # Assume kB...
138
+ info[x[0]] = x[1].to_i
139
+ info
140
+ }
141
+
142
+ free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
143
+ total = m['MemTotal'].to_i
144
+ fraction = 1 - (free.to_f / total)
145
+
146
+ report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
147
+ end
148
+
149
+ def freebsd_cpu
150
+ u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
151
+
152
+ if @old_cpu
153
+ u1, n1, s1, t1, i1 = @old_cpu
154
+
155
+ used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
156
+ total = used + i2-i1
157
+ fraction = used.to_f / total
158
+
159
+ report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
160
+ end
161
+
162
+ @old_cpu = [u2, n2, s2, t2, i2]
163
+ end
164
+
165
+ def freebsd_load
166
+ m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
167
+ load = m[0].to_f / @cores
168
+ if load > @limits[:load][:critical]
169
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
170
+ elsif load > @limits[:load][:warning]
171
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
172
+ else
173
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
174
+ end
175
+ end
176
+
177
+ def freebsd_memory
178
+ meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
179
+ fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
180
+
181
+ report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
182
+ end
183
+
184
+ def darwin_top
185
+ raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
186
+ @topdata = {:stamp => Time.now.to_i }
187
+ raw.each_line do |ln|
188
+ if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
189
+ @topdata[:load] = $1.to_f
190
+ elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
191
+ @topdata[:cpu] = 1 - ($1.to_f / 100)
192
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
193
+ wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
194
+ active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
195
+ inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
196
+ used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
197
+ free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
198
+ @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
199
+ end
200
+ end
201
+ end
202
+
203
+ def darwin_cpu
204
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
205
+ unless @topdata[:cpu]
206
+ alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
207
+ return false
208
+ end
209
+ report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
210
+ end
211
+
212
+ def darwin_load
213
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
214
+ unless @topdata[:load]
215
+ alert 'load', :unknown, nil, "unable to get load ave from top"
216
+ return false
217
+ end
218
+ metric = @topdata[:load] / @cores
219
+ if metric > @limits[:load][:critical]
220
+ alert "load", :critical, metric, "1-minute load average per core is #{metric}"
221
+ elsif metric > @limits[:load][:warning]
222
+ alert "load", :warning, metric, "1-minute load average per core is #{metric}"
223
+ else
224
+ alert "load", :ok, metric, "1-minute load average per core is #{metric}"
225
+ end
226
+ end
227
+
228
+ def darwin_memory
229
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
230
+ unless @topdata[:memory]
231
+ alert 'memory', :unknown, nil, "unable to get memory data from top"
232
+ return false
233
+ end
234
+ report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
235
+ end
236
+
237
+ def disk
238
+ `df -P`.split(/\n/).each do |r|
239
+ f = r.split(/\s+/)
240
+ next unless f[0] =~ /^\//
241
+ next if f[0] == 'Filesystem'
242
+ x = f[4].to_f/100
243
+
244
+ if x > @limits[:disk][:critical]
245
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
246
+ elsif x > @limits[:disk][:warning]
247
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
248
+ else
249
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
250
+ end
251
+ end
252
+ end
253
+
254
+ def tick
255
+ if @cpu_enabled
256
+ @cpu.call
257
+ end
258
+ if @memory_enabled
259
+ @memory.call
260
+ end
261
+ if @disk_enabled
262
+ @disk.call
263
+ end
264
+ if @load_enabled
265
+ @load.call
266
+ end
267
+ end
268
+ end
269
+
270
+ Riemann::Tools::Health.run
data/bin/riemann-net ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers net statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Net
8
+ include Riemann::Tools
9
+
10
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
11
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
12
+
13
+ def initialize
14
+ @old_state = nil
15
+ @interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
16
+ @ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
17
+ end
18
+
19
+ def state
20
+ f = File.read('/proc/net/dev')
21
+ state = f.split("\n").inject({}) do |s, line|
22
+ if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
23
+ iface = $1
24
+
25
+ ['rx bytes',
26
+ 'rx packets',
27
+ 'rx errs',
28
+ 'rx drop',
29
+ 'rx fifo',
30
+ 'rx frame',
31
+ 'rx compressed',
32
+ 'rx multicast',
33
+ 'tx bytes',
34
+ 'tx packets',
35
+ 'tx errs',
36
+ 'tx drops',
37
+ 'tx fifo',
38
+ 'tx colls',
39
+ 'tx carrier',
40
+ 'tx compressed'].map do |service|
41
+ "#{iface} #{service}"
42
+ end.zip(
43
+ $2.split(/\s+/).map { |str| str.to_i }
44
+ ).each do |service, value|
45
+ s[service] = value
46
+ end
47
+ end
48
+
49
+ s
50
+ end
51
+
52
+ # Filter interfaces
53
+ if is = @interfaces
54
+ state = state.select do |service, value|
55
+ is.include? service.split(' ').first
56
+ end
57
+ end
58
+
59
+ state = state.reject do |service, value|
60
+ @ignore_interfaces.include? service.split(' ').first
61
+ end
62
+
63
+ state
64
+ end
65
+
66
+ def tick
67
+ state = self.state
68
+
69
+ if @old_state
70
+ state.each do |service, metric|
71
+ delta = metric - @old_state[service]
72
+ svc_state = case service
73
+ when /drop$/
74
+ if metric > 0
75
+ 'warning'
76
+ else
77
+ 'ok'
78
+ end
79
+ when /errs$/
80
+ if metric > 0
81
+ 'warning'
82
+ else
83
+ 'ok'
84
+ end
85
+ else
86
+ 'ok'
87
+ end
88
+
89
+ report(
90
+ :service => service.dup,
91
+ :metric => (delta.to_f / opts[:interval]),
92
+ :state => svc_state
93
+ )
94
+ end
95
+ end
96
+
97
+ @old_state = state
98
+ end
99
+ end
100
+
101
+ Riemann::Tools::Net.run
data/bin/riemann-proc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Proc
8
+ include Riemann::Tools
9
+
10
+ opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
11
+ opt :proc_min_critical, "running process count minimum", :default => 1
12
+ opt :proc_max_critical, "running process count maximum", :default => 1
13
+
14
+ def initialize
15
+ @limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
16
+
17
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
18
+
19
+ ostype = `uname -s`.chomp.downcase
20
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
21
+ @check = method :linux_proc
22
+ end
23
+
24
+ def alert(service, state, metric, description)
25
+ report(
26
+ :service => service.to_s,
27
+ :state => state.to_s,
28
+ :metric => metric.to_f,
29
+ :description => description
30
+ )
31
+ end
32
+
33
+ def linux_proc
34
+ process = opts[:proc_regex]
35
+ running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
36
+ if running > @limits[:critical][:max] or running < @limits[:critical][:min]
37
+ alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
38
+ else
39
+ alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
40
+ end
41
+ end
42
+
43
+ def tick
44
+ @check.call
45
+ end
46
+ end
47
+
48
+ Riemann::Tools::Proc.run
@@ -0,0 +1,132 @@
1
+ module Riemann
2
+ module Tools
3
+ require 'rubygems'
4
+ require 'trollop'
5
+ require 'riemann/client'
6
+ require 'timeout'
7
+
8
+ def self.included(base)
9
+ base.instance_eval do
10
+ def run
11
+ new.run
12
+ end
13
+
14
+ def opt(*a)
15
+ a.unshift :opt
16
+ @opts ||= []
17
+ @opts << a
18
+ end
19
+
20
+ def options
21
+ p = Trollop::Parser.new
22
+ @opts.each do |o|
23
+ p.send *o
24
+ end
25
+ Trollop::with_standard_exception_handling(p) do
26
+ p.parse ARGV
27
+ end
28
+ end
29
+
30
+ opt :host, "Riemann host", :default => '127.0.0.1'
31
+ opt :port, "Riemann port", :default => 5555
32
+ opt :event_host, "Event hostname", :type => String
33
+ opt :interval, "Seconds between updates", :default => 5
34
+ opt :tag, "Tag to add to events", :type => String, :multi => true
35
+ opt :ttl, "TTL for events", :type => Integer
36
+ opt :attribute, "Attribute to add to the event", :type => String, :multi => true
37
+ opt :timeout, "Timeout (in seconds) when waiting for acknowledgements", :default => 30
38
+ opt :tcp, "Use TCP transport instead of UDP (improves reliability, slight overhead.", :default => true
39
+ opt :daemon, "Run in background", :default => false
40
+ opt :logfile, "logfile path", :type => String, :default => '/tmp/riemann-tools.log'
41
+ opt :pidfile, "pidfile path", :type => String, :default => '/tmp/riemann-tools.pid'
42
+ end
43
+ end
44
+
45
+ # Returns parsed options (cached) from command line.
46
+ def options
47
+ @options ||= self.class.options
48
+ end
49
+ alias :opts :options
50
+
51
+ def attributes
52
+ @attributes ||= Hash[options[:attribute].map do |attr|
53
+ k,v = attr.split(/=/)
54
+ if k and v
55
+ [k,v]
56
+ end
57
+ end]
58
+ end
59
+
60
+ def report(event)
61
+ if options[:tag]
62
+ # Work around a bug with beefcake which can't take frozen strings.
63
+ event[:tags] = options[:tag].map(&:dup)
64
+ end
65
+
66
+ event[:ttl] ||= (options[:ttl] || (options[:interval] * 2))
67
+
68
+ if options[:event_host]
69
+ event[:host] = options[:event_host].dup
70
+ end
71
+
72
+ event = event.merge(attributes)
73
+
74
+ begin
75
+ Timeout::timeout(options[:timeout]) do
76
+ riemann << event
77
+ end
78
+ rescue Timeout::Error
79
+ riemann.connect
80
+ end
81
+ end
82
+
83
+ def new_riemann_client
84
+ r = Riemann::Client.new(
85
+ :host => options[:host],
86
+ :port => options[:port]
87
+ )
88
+ if options[:tcp]
89
+ r.tcp
90
+ else
91
+ r
92
+ end
93
+ end
94
+
95
+ def riemann
96
+ @riemann ||= new_riemann_client
97
+ end
98
+ alias :r :riemann
99
+
100
+ def run
101
+ daemonize if options[:daemon]
102
+ t0 = Time.now
103
+ loop do
104
+ begin
105
+ tick
106
+ rescue => e
107
+ $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
108
+ end
109
+
110
+ # Sleep.
111
+ sleep(options[:interval] - ((Time.now - t0) % options[:interval]))
112
+ end
113
+ end
114
+
115
+ def daemonize
116
+ exit if fork
117
+ Process.setsid
118
+ $0 = self.class.name.downcase.gsub('::','_')
119
+ $stdout.reopen(opts[:logfile], 'w')
120
+ $stdout.sync = true
121
+ $stderr.reopen($stdout)
122
+ exit if fork
123
+ f = File.new(opts[:pidfile], 'w')
124
+ f.write Process.pid
125
+ f.close
126
+ end
127
+
128
+ def tick
129
+ end
130
+
131
+ end
132
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: riemann-tools-fb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Fede Borgnia
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: riemann-client
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: trollop
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.16.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.16.2
41
+ description: Utilities which submit events to Riemann.
42
+ email: fborgnia@gmail.com
43
+ executables:
44
+ - riemann-bench
45
+ - riemann-diskstats
46
+ - riemann-proc
47
+ - riemann-health
48
+ - riemann-fd
49
+ - riemann-net
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - lib/riemann/tools.rb
54
+ - bin/riemann-bench
55
+ - bin/riemann-diskstats
56
+ - bin/riemann-fd
57
+ - bin/riemann-health
58
+ - bin/riemann-net
59
+ - bin/riemann-proc
60
+ - LICENSE
61
+ - README.markdown
62
+ homepage: https://github.com/fborgnia/riemann-tools
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: 1.9.3
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project: riemann-tools
82
+ rubygems_version: 2.0.3
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Utilities which submit events to Riemann.
86
+ test_files: []