riemann-monitors 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/riemann-monitors"
4
+
5
+ class Riemann::Monitors::Diskstats
6
+ include Riemann::Monitors
7
+
8
+ opt :devices, "Devices to monitor", :type => :strings, :default => nil
9
+ opt :ignore_devices, "Devices to ignore", :type => :strings, :default =>nil
10
+
11
+ def initialize
12
+ @old_state = nil
13
+ end
14
+
15
+ def state
16
+ f = File.read('/proc/diskstats')
17
+ state = f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |s, line|
18
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d\-]+) (.*)$/
19
+ dev = $1
20
+
21
+ ['reads reqs',
22
+ 'reads merged',
23
+ 'reads sector',
24
+ 'reads time',
25
+ 'writes reqs',
26
+ 'writes merged',
27
+ 'writes sector',
28
+ 'writes time',
29
+ 'io reqs',
30
+ 'io time',
31
+ 'io weighted'
32
+ ].map do |service|
33
+ "#{dev} #{service}"
34
+ end.zip(
35
+ $2.split(/\s+/).map { |str| str.to_i }
36
+ ).each do |service, value|
37
+ s[service] = value
38
+ end
39
+ end
40
+
41
+ s
42
+ end
43
+
44
+ # Filter interfaces
45
+ if is = opts[:devices]
46
+ state = state.select do |service, value|
47
+ is.include? service.split(' ').first
48
+ end
49
+ end
50
+
51
+ if ign = opts[:ignore_devices]
52
+ state = state.reject do |service, value|
53
+ ign.include? service.split(' ').first
54
+ end
55
+ end
56
+
57
+ state
58
+ end
59
+
60
+ def tick
61
+ state = self.state
62
+
63
+ if @old_state
64
+ state.each do |service, metric|
65
+
66
+ if service =~ /io reqs$/
67
+ report(
68
+ :service => "diskstats " + service,
69
+ :metric => metric,
70
+ :state => "ok"
71
+ )
72
+ else
73
+ delta = metric - @old_state[service]
74
+
75
+ report(
76
+ :service => "diskstats " + service,
77
+ :metric => (delta.to_f / opts[:interval]),
78
+ :state => "ok"
79
+ )
80
+ end
81
+
82
+ if service =~ /io time$/
83
+ report(:service => "diskstats " + service.gsub(/time/, 'util'),
84
+ :metric => (delta.to_f / (opts[:interval]*1000)),
85
+ :state => "ok")
86
+ end
87
+ end
88
+ end
89
+
90
+ @old_state = state
91
+ end
92
+
93
+ end
94
+
95
+ Riemann::Monitors::Diskstats.run
data/bin/riemann-fd ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current file descriptor use to riemann.
4
+ # By default reports the total system fd usage, can also report usage of individual processes
5
+
6
+ require_relative "../lib/riemann-monitors"
7
+
8
+ class Riemann::Monitors::Health
9
+ include Riemann::Monitors
10
+
11
+ opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
12
+ opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
13
+ opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
14
+ opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
15
+ opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
16
+
17
+ def initialize
18
+ @limits = {
19
+ :fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
20
+ :process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
21
+ }
22
+ ostype = `uname -s`.chomp.downcase
23
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
24
+ @fd = method :linux_fd
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def linux_fd
37
+ sys_used = Integer(`lsof | wc -l`)
38
+ if sys_used > @limits[:fd][:critical]
39
+ alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
40
+ elsif sys_used > @limits[:fd][:warning]
41
+ alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
42
+ else
43
+ alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
44
+ end
45
+
46
+ unless opts[:processes].nil?
47
+ opts[:processes].each do |process|
48
+ used = Integer(`lsof -p #{process} | wc -l`)
49
+ name, pid = `ps axo comm,pid | grep -w #{process}`.split
50
+ if used > @limits[:process][:critical]
51
+ alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
52
+ elsif used > @limits[:process][:warning]
53
+ alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
54
+ else
55
+ alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def tick
62
+ @fd.call
63
+ end
64
+ end
65
+
66
+ Riemann::Monitors::Health.run
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+
5
+ class Riemann::Monitors::FreeSWITCH
6
+ include Riemann::Monitors
7
+
8
+ opt :calls_warning, "Calls warning threshold", :default => 100
9
+ opt :calls_critical, "Calls critical threshold", :default => 300
10
+ opt :pid_file, "FreeSWITCH daemon pidfile", :type => String, :default => "/var/run/freeswitch/freeswitch.pid"
11
+
12
+ def initialize
13
+ @limits = {
14
+ :calls => {:critical => opts[:calls_critical], :warning => opts[:calls_warning]}
15
+ }
16
+ end
17
+
18
+ def dead_proc?(pid)
19
+ begin
20
+ Process.getpgid(pid)
21
+ false
22
+ rescue Errno::ESRCH
23
+ true
24
+ end
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def exec_with_timeout(cmd, timeout)
37
+ pid = Process.spawn(cmd, {[:err,:out] => :close, :pgroup => true})
38
+ begin
39
+ Timeout.timeout(timeout) do
40
+ Process.waitpid(pid, 0)
41
+ $?.exitstatus == 0
42
+ end
43
+ rescue Timeout::Error
44
+ Process.kill(15, -Process.getpgid(pid))
45
+ puts "Killed pid: #{pid}"
46
+ false
47
+ end
48
+ end
49
+
50
+ def tick
51
+ # Determine how many current calls I have according to FreeSWITCH
52
+ fs_calls = %x[fs_cli -x "show calls count"| grep -Po '^\\d+'].to_i
53
+
54
+ # Determine how many current channels I have according to FreeSWITCH
55
+ fs_channels = %x[fs_cli -x "show channels count"| grep -Po '^\\d+'].to_i
56
+
57
+ # Determine how many conferences I have according to FreeSWITCH
58
+ fs_conferences = %x[fs_cli -x "conference list"| grep -Pco '^Conference'].to_i
59
+
60
+ # Try to read pidfile. If it fails use Devil's dummy PID
61
+ begin
62
+ fs_pid = File.read(opts[:pid_file]).to_i
63
+ rescue
64
+ puts "Couldn't read pidfile: #{opts[:pid_file]}"
65
+ fs_pid = -666
66
+ end
67
+
68
+ fs_threads = fs_pid > 0 ? %x[ps huH p #{fs_pid} | wc -l].to_i : 0
69
+
70
+ # Submit calls to riemann
71
+ if fs_calls > @limits[:calls][:critical]
72
+ alert "FreeSWITCH current calls", :critical, fs_calls, "Number of calls are #{fs_calls}"
73
+ elsif fs_calls > @limits[:calls][:warning]
74
+ alert "FreeSWITCH current calls", :warning, fs_calls, "Number of calls are #{fs_calls}"
75
+ else
76
+ alert "FreeSWITCH current calls", :ok, fs_calls, "Number of calls are #{fs_calls}"
77
+ end
78
+
79
+ # Submit channels to riemann
80
+ if fs_channels > @limits[:calls][:critical]
81
+ alert "FreeSWITCH current channels", :critical, fs_channels, "Number of channels are #{fs_channels}"
82
+ elsif fs_channels > @limits[:calls][:warning]
83
+ alert "FreeSWITCH current channels", :warning, fs_channels, "Number of channels are #{fs_channels}"
84
+ else
85
+ alert "FreeSWITCH current channels", :ok, fs_channels, "Number of channels are #{fs_channels}"
86
+ end
87
+
88
+ # Submit conferences to riemann
89
+ if fs_conferences > @limits[:calls][:critical]
90
+ alert "FreeSWITCH current conferences", :critical, fs_conferences, "Number of conferences are #{fs_conferences}"
91
+ elsif fs_conferences > @limits[:calls][:warning]
92
+ alert "FreeSWITCH current conferences", :warning, fs_conferences, "Number of conferences are #{fs_conferences}"
93
+ else
94
+ alert "FreeSWITCH current conferences", :ok, fs_conferences, "Number of conferences are #{fs_conferences}"
95
+ end
96
+
97
+ # Submit threads to riemann
98
+ if fs_threads
99
+ alert "FreeSWITCH current threads", :ok, fs_threads, "Number of threads are #{fs_threads}"
100
+ end
101
+
102
+ # Submit status to riemann
103
+ if dead_proc?(fs_pid)
104
+ alert "FreeSWITCH status", :critical, -1, "FreeSWITCH service status: not running"
105
+ else
106
+ alert "FreeSWITCH status", :ok, nil, "FreeSWITCH service status: running"
107
+ end
108
+
109
+ # Submit CLI status to riemann using timeout in case it's unresponsive
110
+ if exec_with_timeout("fs_cli -x status", 2)
111
+ alert "FreeSWITCH CLI status", :ok, nil, "FreeSWITCH CLI status: responsive"
112
+ else
113
+ alert "FreeSWITCH CLI status", :critical, -1, "FreeSWITCH CLI status: not responding"
114
+ end
115
+
116
+ end
117
+ end
118
+
119
+ Riemann::Monitors::FreeSWITCH.run
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers haproxy CSV statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Monitors::Haproxy
8
+ include Riemann::Monitors
9
+ require 'net/http'
10
+ require 'csv'
11
+
12
+ opt :stats_url, "Full url to haproxy stats (eg: https://user:password@host.com:9999/stats)", :required => true, :type => :string
13
+
14
+ def initialize
15
+ @uri = URI(opts[:stats_url]+';csv')
16
+ end
17
+
18
+ def tick
19
+ csv = CSV.parse(get_csv.body.split("# ")[1], { :headers => true })
20
+ csv.each do |row|
21
+ row = row.to_hash
22
+ ns = "haproxy #{row['pxname']} #{row['svname']}"
23
+ row.each do |property, metric|
24
+ unless (property.nil? || property == 'pxname' || property == 'svname')
25
+ report(
26
+ :host => @uri.host,
27
+ :service => "#{ns} #{property}",
28
+ :metric => metric.to_f,
29
+ :tags => ['haproxy']
30
+ )
31
+ end
32
+ end
33
+
34
+ report(
35
+ :host => @uri.host,
36
+ :service => "#{ns} state",
37
+ :state => (['UP', 'OPEN'].include?(row['status']) ? 'ok' : 'critical'),
38
+ :tags => ['haproxy']
39
+ )
40
+ end
41
+ end
42
+
43
+ def get_csv
44
+ http = Net::HTTP.new(@uri.host, @uri.port)
45
+ http.use_ssl = true if @uri.scheme == 'https'
46
+ http.start do |h|
47
+ get = Net::HTTP::Get.new(@uri.request_uri)
48
+ unless @uri.userinfo.nil?
49
+ userinfo = @uri.userinfo.split(":")
50
+ get.basic_auth userinfo[0], userinfo[1]
51
+ end
52
+ h.request get
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ Riemann::Monitors::Haproxy.run
@@ -0,0 +1,289 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current CPU, disk, load average, and memory use to riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Health
8
+ include Riemann::Monitors
9
+
10
+ opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
11
+ opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
12
+ opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
13
+ opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
14
+ opt :load_warning, "Load warning threshold (load average / core)", :default => 3
15
+ opt :load_critical, "Load critical threshold (load average / core)", :default => 8
16
+ opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
17
+ opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
18
+ opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
19
+
20
+ def initialize
21
+ @limits = {
22
+ :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
23
+ :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
24
+ :load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
25
+ :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
26
+ }
27
+ case (@ostype = `uname -s`.chomp.downcase)
28
+ when 'darwin'
29
+ @cores = `sysctl -n hw.ncpu`.to_i
30
+ @cpu = method :darwin_cpu
31
+ @disk = method :disk
32
+ @load = method :darwin_load
33
+ @memory = method :darwin_memory
34
+ darwin_top
35
+ when 'freebsd'
36
+ @cores = `sysctl -n hw.ncpu`.to_i
37
+ @cpu = method :freebsd_cpu
38
+ @disk = method :disk
39
+ @load = method :freebsd_load
40
+ @memory = method :freebsd_memory
41
+ else
42
+ @cores = cores
43
+ puts "WARNING: OS '#{@ostype}' not explicitly supported. Falling back to Linux" unless @ostype == "linux"
44
+ @cpu = method :linux_cpu
45
+ @disk = method :disk
46
+ @load = method :linux_load
47
+ @memory = method :linux_memory
48
+ end
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when "disk"
53
+ @disk_enabled = true
54
+ when "load"
55
+ @load_enabled = true
56
+ when "cpu"
57
+ @cpu_enabled = true
58
+ when "memory"
59
+ @memory_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(service, state, metric, description)
65
+ report(
66
+ :service => service.to_s,
67
+ :state => state.to_s,
68
+ :metric => metric.to_f,
69
+ :description => description
70
+ )
71
+ end
72
+
73
+ def cores
74
+ i = 0;
75
+ File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
76
+ physical_id = p[/physical id\s+:\s+(\d+)/, 1]
77
+ core_id = p[/core id\s+:\s+(\d+)/, 1]
78
+ if physical_id and core_id
79
+ cores["#{physical_id}:#{core_id}"] = true
80
+ elsif physical_id
81
+ cores["#{physical_id}:"] = true
82
+ else
83
+ cores[i += 1] = true;
84
+ end
85
+
86
+ cores
87
+ end.size
88
+ end
89
+
90
+ def report_pct(service, fraction, report)
91
+ if fraction
92
+ if fraction > @limits[service][:critical]
93
+ alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
94
+ elsif fraction > @limits[service][:warning]
95
+ alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
96
+ else
97
+ alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
98
+ end
99
+ end
100
+ end
101
+
102
+ def linux_cpu
103
+ new = File.read('/proc/stat')
104
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
105
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
106
+ return false
107
+ end
108
+ u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
109
+
110
+ if @old_cpu
111
+ u1, n1, s1, i1 = @old_cpu
112
+
113
+ used = (u2+n2+s2) - (u1+n1+s1)
114
+ total = used + i2-i1
115
+ fraction = used.to_f / total
116
+
117
+ report_pct :cpu, fraction, "user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
118
+ end
119
+
120
+ @old_cpu = [u2, n2, s2, i2]
121
+ end
122
+
123
+ def linux_load
124
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
125
+ if load > @limits[:load][:critical]
126
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
127
+ elsif load > @limits[:load][:warning]
128
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
129
+ else
130
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
131
+ end
132
+ end
133
+
134
+ def linux_memory
135
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
136
+ x = line.split(/:?\s+/)
137
+ # Assume kB...
138
+ info[x[0]] = x[1].to_i
139
+ info
140
+ }
141
+
142
+ free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
143
+ total = m['MemTotal'].to_i
144
+ fraction = 1 - (free.to_f / total)
145
+
146
+ report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
147
+ end
148
+
149
+ def freebsd_cpu
150
+ u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
151
+
152
+ if @old_cpu
153
+ u1, n1, s1, t1, i1 = @old_cpu
154
+
155
+ used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
156
+ total = used + i2-i1
157
+ fraction = used.to_f / total
158
+
159
+ report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
160
+ end
161
+
162
+ @old_cpu = [u2, n2, s2, t2, i2]
163
+ end
164
+
165
+ def freebsd_load
166
+ m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
167
+ load = m[0].to_f / @cores
168
+ if load > @limits[:load][:critical]
169
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
170
+ elsif load > @limits[:load][:warning]
171
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
172
+ else
173
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
174
+ end
175
+ end
176
+
177
+ def freebsd_memory
178
+ meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
179
+ fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
180
+
181
+ report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
182
+ end
183
+
184
+ def darwin_top
185
+ raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
186
+ @topdata = {:stamp => Time.now.to_i }
187
+ raw.each_line do |ln|
188
+ if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
189
+ @topdata[:load] = $1.to_f
190
+ elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
191
+ @topdata[:cpu] = 1 - ($1.to_f / 100)
192
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
193
+ wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
194
+ active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
195
+ inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
196
+ used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
197
+ free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
198
+ @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
199
+ # This is for OSX Mavericks which
200
+ # uses a different format for top
201
+ # Example: PhysMem: 4662M used (1328M wired), 2782M unused.
202
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) used \(([0-9]+)([BKMGT]) wired\), ([0-9]+)([BKMGT]) unused/i)
203
+ used = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
204
+ wired = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
205
+ unused = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
206
+ @topdata[:memory] = (used).to_f / (used + unused)
207
+ end
208
+ end
209
+ end
210
+
211
+ def darwin_cpu
212
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
213
+ unless @topdata[:cpu]
214
+ alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
215
+ return false
216
+ end
217
+ report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
218
+ end
219
+
220
+ def darwin_load
221
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
222
+ unless @topdata[:load]
223
+ alert 'load', :unknown, nil, "unable to get load ave from top"
224
+ return false
225
+ end
226
+ metric = @topdata[:load] / @cores
227
+ if metric > @limits[:load][:critical]
228
+ alert "load", :critical, metric, "1-minute load average per core is #{metric}"
229
+ elsif metric > @limits[:load][:warning]
230
+ alert "load", :warning, metric, "1-minute load average per core is #{metric}"
231
+ else
232
+ alert "load", :ok, metric, "1-minute load average per core is #{metric}"
233
+ end
234
+ end
235
+
236
+ def darwin_memory
237
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
238
+ unless @topdata[:memory]
239
+ alert 'memory', :unknown, nil, "unable to get memory data from top"
240
+ return false
241
+ end
242
+ report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
243
+ end
244
+
245
+ def df
246
+ case @ostype
247
+ when 'darwin', 'freebsd'
248
+ `df -P -t noiso9660`
249
+ else
250
+ `df -P --exclude-type=iso9660`
251
+ end
252
+ end
253
+
254
+ def disk
255
+ df.split(/\n/).each do |r|
256
+ f = r.split(/\s+/)
257
+ next if f[0] == 'Filesystem'
258
+ next unless f[0] =~ /\// # Needs at least one slash in the mount path
259
+
260
+ # Calculate capacity
261
+ x = f[4].to_f/100
262
+
263
+ if x > @limits[:disk][:critical]
264
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
265
+ elsif x > @limits[:disk][:warning]
266
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
267
+ else
268
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
269
+ end
270
+ end
271
+ end
272
+
273
+ def tick
274
+ if @cpu_enabled
275
+ @cpu.call
276
+ end
277
+ if @memory_enabled
278
+ @memory.call
279
+ end
280
+ if @disk_enabled
281
+ @disk.call
282
+ end
283
+ if @load_enabled
284
+ @load.call
285
+ end
286
+ end
287
+ end
288
+
289
+ Riemann::Monitors::Health.run
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Checks the status of an HTTP request and submits it to Riemann
4
+ require_relative "../lib/riemann-monitors"
5
+
6
+ class Riemann::Monitors::HTTPStatus
7
+ include Riemann::Monitors
8
+ require "net/http"
9
+ require "uri"
10
+
11
+ opt :uri, "Target URI", type: String, default: 'http://localhost/'
12
+ opt :sitename, "Site name (abbreviation for identifying uri)", type: String
13
+ opt :method, "HTTP Method (get, head)", type: String, default: "head"
14
+
15
+ def initialize
16
+ @uri = URI.parse(opts[:uri])
17
+ @sitename = opts[:sitename]&.gsub(/\//, '_') if opts.has_key?(:sitename)
18
+ @servicename = ["http", opts[:method], @sitename].join("/")
19
+ @verbclass = case opts[:method]
20
+ when /\Aget\z/i
21
+ Net::HTTP::Get
22
+ when /\Ahead\z/i
23
+ Net::HTTP::Head
24
+ else
25
+ raise "This script only makes sense with get and head methods. You might want a custom script."
26
+ end
27
+ end
28
+
29
+ def tick
30
+ response, roundtrip = get_connection()
31
+ unless (response).nil?
32
+ metrics = {service: @servicename,
33
+ description: @sitename,
34
+ tags: ["http"]}
35
+ c = response.code&.to_i
36
+ if !c.nil?
37
+ metrics[:state] = "ok"
38
+ metrics[:metric] = c
39
+ end
40
+ if !roundtrip.nil?
41
+ metrics[:roundtrip] = roundtrip
42
+ end
43
+ if !response.body.nil? && @verbclass != Net::HTTP::Head
44
+ metrics[:body_length] = response.body.length
45
+ end
46
+ report(metrics)
47
+ end
48
+ end
49
+
50
+ def get_connection()
51
+ response = nil
52
+ roundtrip = nil
53
+ begin
54
+ p1 = Time.now
55
+ Net::HTTP.start(@uri.host, @uri.port) do |http|
56
+ http.open_timeout = opts[:timeout]
57
+ request = @verbclass.new(@uri)
58
+ response = http.request(request)
59
+ response.read_body
60
+ end
61
+ p2 = Time.now
62
+ roundtrip = p2 - p1
63
+ rescue => e
64
+ report(:service => @servicename,
65
+ :state => "critical",
66
+ :description => "http connection failure: #{e.class} - #{e.message}",
67
+ :tags => ["http", "action"])
68
+ end
69
+ [response, roundtrip]
70
+ end
71
+ end
72
+
73
+ Riemann::Monitors::HTTPStatus.run