riemann-monitors 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/riemann-monitors"
4
+
5
+ class Riemann::Monitors::Diskstats
6
+ include Riemann::Monitors
7
+
8
+ opt :devices, "Devices to monitor", :type => :strings, :default => nil
9
+ opt :ignore_devices, "Devices to ignore", :type => :strings, :default =>nil
10
+
11
+ def initialize
12
+ @old_state = nil
13
+ end
14
+
15
+ def state
16
+ f = File.read('/proc/diskstats')
17
+ state = f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |s, line|
18
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d\-]+) (.*)$/
19
+ dev = $1
20
+
21
+ ['reads reqs',
22
+ 'reads merged',
23
+ 'reads sector',
24
+ 'reads time',
25
+ 'writes reqs',
26
+ 'writes merged',
27
+ 'writes sector',
28
+ 'writes time',
29
+ 'io reqs',
30
+ 'io time',
31
+ 'io weighted'
32
+ ].map do |service|
33
+ "#{dev} #{service}"
34
+ end.zip(
35
+ $2.split(/\s+/).map { |str| str.to_i }
36
+ ).each do |service, value|
37
+ s[service] = value
38
+ end
39
+ end
40
+
41
+ s
42
+ end
43
+
44
+ # Filter interfaces
45
+ if is = opts[:devices]
46
+ state = state.select do |service, value|
47
+ is.include? service.split(' ').first
48
+ end
49
+ end
50
+
51
+ if ign = opts[:ignore_devices]
52
+ state = state.reject do |service, value|
53
+ ign.include? service.split(' ').first
54
+ end
55
+ end
56
+
57
+ state
58
+ end
59
+
60
+ def tick
61
+ state = self.state
62
+
63
+ if @old_state
64
+ state.each do |service, metric|
65
+
66
+ if service =~ /io reqs$/
67
+ report(
68
+ :service => "diskstats " + service,
69
+ :metric => metric,
70
+ :state => "ok"
71
+ )
72
+ else
73
+ delta = metric - @old_state[service]
74
+
75
+ report(
76
+ :service => "diskstats " + service,
77
+ :metric => (delta.to_f / opts[:interval]),
78
+ :state => "ok"
79
+ )
80
+ end
81
+
82
+ if service =~ /io time$/
83
+ report(:service => "diskstats " + service.gsub(/time/, 'util'),
84
+ :metric => (delta.to_f / (opts[:interval]*1000)),
85
+ :state => "ok")
86
+ end
87
+ end
88
+ end
89
+
90
+ @old_state = state
91
+ end
92
+
93
+ end
94
+
95
+ Riemann::Monitors::Diskstats.run
data/bin/riemann-fd ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current file descriptor use to riemann.
4
+ # By default reports the total system fd usage, can also report usage of individual processes
5
+
6
+ require_relative "../lib/riemann-monitors"
7
+
8
+ class Riemann::Monitors::Health
9
+ include Riemann::Monitors
10
+
11
+ opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
12
+ opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
13
+ opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
14
+ opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
15
+ opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
16
+
17
+ def initialize
18
+ @limits = {
19
+ :fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
20
+ :process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
21
+ }
22
+ ostype = `uname -s`.chomp.downcase
23
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
24
+ @fd = method :linux_fd
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def linux_fd
37
+ sys_used = Integer(`lsof | wc -l`)
38
+ if sys_used > @limits[:fd][:critical]
39
+ alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
40
+ elsif sys_used > @limits[:fd][:warning]
41
+ alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
42
+ else
43
+ alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
44
+ end
45
+
46
+ unless opts[:processes].nil?
47
+ opts[:processes].each do |process|
48
+ used = Integer(`lsof -p #{process} | wc -l`)
49
+ name, pid = `ps axo comm,pid | grep -w #{process}`.split
50
+ if used > @limits[:process][:critical]
51
+ alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
52
+ elsif used > @limits[:process][:warning]
53
+ alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
54
+ else
55
+ alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def tick
62
+ @fd.call
63
+ end
64
+ end
65
+
66
+ Riemann::Monitors::Health.run
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+
5
+ class Riemann::Monitors::FreeSWITCH
6
+ include Riemann::Monitors
7
+
8
+ opt :calls_warning, "Calls warning threshold", :default => 100
9
+ opt :calls_critical, "Calls critical threshold", :default => 300
10
+ opt :pid_file, "FreeSWITCH daemon pidfile", :type => String, :default => "/var/run/freeswitch/freeswitch.pid"
11
+
12
+ def initialize
13
+ @limits = {
14
+ :calls => {:critical => opts[:calls_critical], :warning => opts[:calls_warning]}
15
+ }
16
+ end
17
+
18
+ def dead_proc?(pid)
19
+ begin
20
+ Process.getpgid(pid)
21
+ false
22
+ rescue Errno::ESRCH
23
+ true
24
+ end
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def exec_with_timeout(cmd, timeout)
37
+ pid = Process.spawn(cmd, {[:err,:out] => :close, :pgroup => true})
38
+ begin
39
+ Timeout.timeout(timeout) do
40
+ Process.waitpid(pid, 0)
41
+ $?.exitstatus == 0
42
+ end
43
+ rescue Timeout::Error
44
+ Process.kill(15, -Process.getpgid(pid))
45
+ puts "Killed pid: #{pid}"
46
+ false
47
+ end
48
+ end
49
+
50
+ def tick
51
+ # Determine how many current calls I have according to FreeSWITCH
52
+ fs_calls = %x[fs_cli -x "show calls count"| grep -Po '^\\d+'].to_i
53
+
54
+ # Determine how many current channels I have according to FreeSWITCH
55
+ fs_channels = %x[fs_cli -x "show channels count"| grep -Po '^\\d+'].to_i
56
+
57
+ # Determine how many conferences I have according to FreeSWITCH
58
+ fs_conferences = %x[fs_cli -x "conference list"| grep -Pco '^Conference'].to_i
59
+
60
+ # Try to read pidfile. If it fails use Devil's dummy PID
61
+ begin
62
+ fs_pid = File.read(opts[:pid_file]).to_i
63
+ rescue
64
+ puts "Couldn't read pidfile: #{opts[:pid_file]}"
65
+ fs_pid = -666
66
+ end
67
+
68
+ fs_threads = fs_pid > 0 ? %x[ps huH p #{fs_pid} | wc -l].to_i : 0
69
+
70
+ # Submit calls to riemann
71
+ if fs_calls > @limits[:calls][:critical]
72
+ alert "FreeSWITCH current calls", :critical, fs_calls, "Number of calls are #{fs_calls}"
73
+ elsif fs_calls > @limits[:calls][:warning]
74
+ alert "FreeSWITCH current calls", :warning, fs_calls, "Number of calls are #{fs_calls}"
75
+ else
76
+ alert "FreeSWITCH current calls", :ok, fs_calls, "Number of calls are #{fs_calls}"
77
+ end
78
+
79
+ # Submit channels to riemann
80
+ if fs_channels > @limits[:calls][:critical]
81
+ alert "FreeSWITCH current channels", :critical, fs_channels, "Number of channels are #{fs_channels}"
82
+ elsif fs_channels > @limits[:calls][:warning]
83
+ alert "FreeSWITCH current channels", :warning, fs_channels, "Number of channels are #{fs_channels}"
84
+ else
85
+ alert "FreeSWITCH current channels", :ok, fs_channels, "Number of channels are #{fs_channels}"
86
+ end
87
+
88
+ # Submit conferences to riemann
89
+ if fs_conferences > @limits[:calls][:critical]
90
+ alert "FreeSWITCH current conferences", :critical, fs_conferences, "Number of conferences are #{fs_conferences}"
91
+ elsif fs_conferences > @limits[:calls][:warning]
92
+ alert "FreeSWITCH current conferences", :warning, fs_conferences, "Number of conferences are #{fs_conferences}"
93
+ else
94
+ alert "FreeSWITCH current conferences", :ok, fs_conferences, "Number of conferences are #{fs_conferences}"
95
+ end
96
+
97
+ # Submit threads to riemann
98
+ if fs_threads
99
+ alert "FreeSWITCH current threads", :ok, fs_threads, "Number of threads are #{fs_threads}"
100
+ end
101
+
102
+ # Submit status to riemann
103
+ if dead_proc?(fs_pid)
104
+ alert "FreeSWITCH status", :critical, -1, "FreeSWITCH service status: not running"
105
+ else
106
+ alert "FreeSWITCH status", :ok, nil, "FreeSWITCH service status: running"
107
+ end
108
+
109
+ # Submit CLI status to riemann using timeout in case it's unresponsive
110
+ if exec_with_timeout("fs_cli -x status", 2)
111
+ alert "FreeSWITCH CLI status", :ok, nil, "FreeSWITCH CLI status: responsive"
112
+ else
113
+ alert "FreeSWITCH CLI status", :critical, -1, "FreeSWITCH CLI status: not responding"
114
+ end
115
+
116
+ end
117
+ end
118
+
119
+ Riemann::Monitors::FreeSWITCH.run
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers haproxy CSV statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Monitors::Haproxy
8
+ include Riemann::Monitors
9
+ require 'net/http'
10
+ require 'csv'
11
+
12
+ opt :stats_url, "Full url to haproxy stats (eg: https://user:password@host.com:9999/stats)", :required => true, :type => :string
13
+
14
+ def initialize
15
+ @uri = URI(opts[:stats_url]+';csv')
16
+ end
17
+
18
+ def tick
19
+ csv = CSV.parse(get_csv.body.split("# ")[1], { :headers => true })
20
+ csv.each do |row|
21
+ row = row.to_hash
22
+ ns = "haproxy #{row['pxname']} #{row['svname']}"
23
+ row.each do |property, metric|
24
+ unless (property.nil? || property == 'pxname' || property == 'svname')
25
+ report(
26
+ :host => @uri.host,
27
+ :service => "#{ns} #{property}",
28
+ :metric => metric.to_f,
29
+ :tags => ['haproxy']
30
+ )
31
+ end
32
+ end
33
+
34
+ report(
35
+ :host => @uri.host,
36
+ :service => "#{ns} state",
37
+ :state => (['UP', 'OPEN'].include?(row['status']) ? 'ok' : 'critical'),
38
+ :tags => ['haproxy']
39
+ )
40
+ end
41
+ end
42
+
43
+ def get_csv
44
+ http = Net::HTTP.new(@uri.host, @uri.port)
45
+ http.use_ssl = true if @uri.scheme == 'https'
46
+ http.start do |h|
47
+ get = Net::HTTP::Get.new(@uri.request_uri)
48
+ unless @uri.userinfo.nil?
49
+ userinfo = @uri.userinfo.split(":")
50
+ get.basic_auth userinfo[0], userinfo[1]
51
+ end
52
+ h.request get
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ Riemann::Monitors::Haproxy.run
@@ -0,0 +1,289 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current CPU, disk, load average, and memory use to riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Health
8
+ include Riemann::Monitors
9
+
10
+ opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
11
+ opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
12
+ opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
13
+ opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
14
+ opt :load_warning, "Load warning threshold (load average / core)", :default => 3
15
+ opt :load_critical, "Load critical threshold (load average / core)", :default => 8
16
+ opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
17
+ opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
18
+ opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
19
+
20
+ def initialize
21
+ @limits = {
22
+ :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
23
+ :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
24
+ :load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
25
+ :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
26
+ }
27
+ case (@ostype = `uname -s`.chomp.downcase)
28
+ when 'darwin'
29
+ @cores = `sysctl -n hw.ncpu`.to_i
30
+ @cpu = method :darwin_cpu
31
+ @disk = method :disk
32
+ @load = method :darwin_load
33
+ @memory = method :darwin_memory
34
+ darwin_top
35
+ when 'freebsd'
36
+ @cores = `sysctl -n hw.ncpu`.to_i
37
+ @cpu = method :freebsd_cpu
38
+ @disk = method :disk
39
+ @load = method :freebsd_load
40
+ @memory = method :freebsd_memory
41
+ else
42
+ @cores = cores
43
+ puts "WARNING: OS '#{@ostype}' not explicitly supported. Falling back to Linux" unless @ostype == "linux"
44
+ @cpu = method :linux_cpu
45
+ @disk = method :disk
46
+ @load = method :linux_load
47
+ @memory = method :linux_memory
48
+ end
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when "disk"
53
+ @disk_enabled = true
54
+ when "load"
55
+ @load_enabled = true
56
+ when "cpu"
57
+ @cpu_enabled = true
58
+ when "memory"
59
+ @memory_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(service, state, metric, description)
65
+ report(
66
+ :service => service.to_s,
67
+ :state => state.to_s,
68
+ :metric => metric.to_f,
69
+ :description => description
70
+ )
71
+ end
72
+
73
+ def cores
74
+ i = 0;
75
+ File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
76
+ physical_id = p[/physical id\s+:\s+(\d+)/, 1]
77
+ core_id = p[/core id\s+:\s+(\d+)/, 1]
78
+ if physical_id and core_id
79
+ cores["#{physical_id}:#{core_id}"] = true
80
+ elsif physical_id
81
+ cores["#{physical_id}:"] = true
82
+ else
83
+ cores[i += 1] = true;
84
+ end
85
+
86
+ cores
87
+ end.size
88
+ end
89
+
90
+ def report_pct(service, fraction, report)
91
+ if fraction
92
+ if fraction > @limits[service][:critical]
93
+ alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
94
+ elsif fraction > @limits[service][:warning]
95
+ alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
96
+ else
97
+ alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
98
+ end
99
+ end
100
+ end
101
+
102
+ def linux_cpu
103
+ new = File.read('/proc/stat')
104
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
105
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
106
+ return false
107
+ end
108
+ u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
109
+
110
+ if @old_cpu
111
+ u1, n1, s1, i1 = @old_cpu
112
+
113
+ used = (u2+n2+s2) - (u1+n1+s1)
114
+ total = used + i2-i1
115
+ fraction = used.to_f / total
116
+
117
+ report_pct :cpu, fraction, "user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
118
+ end
119
+
120
+ @old_cpu = [u2, n2, s2, i2]
121
+ end
122
+
123
+ def linux_load
124
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
125
+ if load > @limits[:load][:critical]
126
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
127
+ elsif load > @limits[:load][:warning]
128
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
129
+ else
130
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
131
+ end
132
+ end
133
+
134
+ def linux_memory
135
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
136
+ x = line.split(/:?\s+/)
137
+ # Assume kB...
138
+ info[x[0]] = x[1].to_i
139
+ info
140
+ }
141
+
142
+ free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
143
+ total = m['MemTotal'].to_i
144
+ fraction = 1 - (free.to_f / total)
145
+
146
+ report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
147
+ end
148
+
149
+ def freebsd_cpu
150
+ u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
151
+
152
+ if @old_cpu
153
+ u1, n1, s1, t1, i1 = @old_cpu
154
+
155
+ used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
156
+ total = used + i2-i1
157
+ fraction = used.to_f / total
158
+
159
+ report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
160
+ end
161
+
162
+ @old_cpu = [u2, n2, s2, t2, i2]
163
+ end
164
+
165
+ def freebsd_load
166
+ m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
167
+ load = m[0].to_f / @cores
168
+ if load > @limits[:load][:critical]
169
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
170
+ elsif load > @limits[:load][:warning]
171
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
172
+ else
173
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
174
+ end
175
+ end
176
+
177
+ def freebsd_memory
178
+ meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
179
+ fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
180
+
181
+ report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
182
+ end
183
+
184
+ def darwin_top
185
+ raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
186
+ @topdata = {:stamp => Time.now.to_i }
187
+ raw.each_line do |ln|
188
+ if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
189
+ @topdata[:load] = $1.to_f
190
+ elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
191
+ @topdata[:cpu] = 1 - ($1.to_f / 100)
192
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
193
+ wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
194
+ active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
195
+ inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
196
+ used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
197
+ free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
198
+ @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
199
+ # This is for OSX Mavericks which
200
+ # uses a different format for top
201
+ # Example: PhysMem: 4662M used (1328M wired), 2782M unused.
202
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) used \(([0-9]+)([BKMGT]) wired\), ([0-9]+)([BKMGT]) unused/i)
203
+ used = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
204
+ wired = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
205
+ unused = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
206
+ @topdata[:memory] = (used).to_f / (used + unused)
207
+ end
208
+ end
209
+ end
210
+
211
+ def darwin_cpu
212
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
213
+ unless @topdata[:cpu]
214
+ alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
215
+ return false
216
+ end
217
+ report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
218
+ end
219
+
220
+ def darwin_load
221
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
222
+ unless @topdata[:load]
223
+ alert 'load', :unknown, nil, "unable to get load ave from top"
224
+ return false
225
+ end
226
+ metric = @topdata[:load] / @cores
227
+ if metric > @limits[:load][:critical]
228
+ alert "load", :critical, metric, "1-minute load average per core is #{metric}"
229
+ elsif metric > @limits[:load][:warning]
230
+ alert "load", :warning, metric, "1-minute load average per core is #{metric}"
231
+ else
232
+ alert "load", :ok, metric, "1-minute load average per core is #{metric}"
233
+ end
234
+ end
235
+
236
+ def darwin_memory
237
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
238
+ unless @topdata[:memory]
239
+ alert 'memory', :unknown, nil, "unable to get memory data from top"
240
+ return false
241
+ end
242
+ report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
243
+ end
244
+
245
+ def df
246
+ case @ostype
247
+ when 'darwin', 'freebsd'
248
+ `df -P -t noiso9660`
249
+ else
250
+ `df -P --exclude-type=iso9660`
251
+ end
252
+ end
253
+
254
+ def disk
255
+ df.split(/\n/).each do |r|
256
+ f = r.split(/\s+/)
257
+ next if f[0] == 'Filesystem'
258
+ next unless f[0] =~ /\// # Needs at least one slash in the mount path
259
+
260
+ # Calculate capacity
261
+ x = f[4].to_f/100
262
+
263
+ if x > @limits[:disk][:critical]
264
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
265
+ elsif x > @limits[:disk][:warning]
266
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
267
+ else
268
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
269
+ end
270
+ end
271
+ end
272
+
273
+ def tick
274
+ if @cpu_enabled
275
+ @cpu.call
276
+ end
277
+ if @memory_enabled
278
+ @memory.call
279
+ end
280
+ if @disk_enabled
281
+ @disk.call
282
+ end
283
+ if @load_enabled
284
+ @load.call
285
+ end
286
+ end
287
+ end
288
+
289
+ Riemann::Monitors::Health.run
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Checks the status of an HTTP request and submits it to Riemann
4
+ require_relative "../lib/riemann-monitors"
5
+
6
+ class Riemann::Monitors::HTTPStatus
7
+ include Riemann::Monitors
8
+ require "net/http"
9
+ require "uri"
10
+
11
+ opt :uri, "Target URI", type: String, default: 'http://localhost/'
12
+ opt :sitename, "Site name (abbreviation for identifying uri)", type: String
13
+ opt :method, "HTTP Method (get, head)", type: String, default: "head"
14
+
15
+ def initialize
16
+ @uri = URI.parse(opts[:uri])
17
+ @sitename = opts[:sitename]&.gsub(/\//, '_') if opts.has_key?(:sitename)
18
+ @servicename = ["http", opts[:method], @sitename].join("/")
19
+ @verbclass = case opts[:method]
20
+ when /\Aget\z/i
21
+ Net::HTTP::Get
22
+ when /\Ahead\z/i
23
+ Net::HTTP::Head
24
+ else
25
+ raise "This script only makes sense with get and head methods. You might want a custom script."
26
+ end
27
+ end
28
+
29
+ def tick
30
+ response, roundtrip = get_connection()
31
+ unless (response).nil?
32
+ metrics = {service: @servicename,
33
+ description: @sitename,
34
+ tags: ["http"]}
35
+ c = response.code&.to_i
36
+ if !c.nil?
37
+ metrics[:state] = "ok"
38
+ metrics[:metric] = c
39
+ end
40
+ if !roundtrip.nil?
41
+ metrics[:roundtrip] = roundtrip
42
+ end
43
+ if !response.body.nil? && @verbclass != Net::HTTP::Head
44
+ metrics[:body_length] = response.body.length
45
+ end
46
+ report(metrics)
47
+ end
48
+ end
49
+
50
+ def get_connection()
51
+ response = nil
52
+ roundtrip = nil
53
+ begin
54
+ p1 = Time.now
55
+ Net::HTTP.start(@uri.host, @uri.port) do |http|
56
+ http.open_timeout = opts[:timeout]
57
+ request = @verbclass.new(@uri)
58
+ response = http.request(request)
59
+ response.read_body
60
+ end
61
+ p2 = Time.now
62
+ roundtrip = p2 - p1
63
+ rescue => e
64
+ report(:service => @servicename,
65
+ :state => "critical",
66
+ :description => "http connection failure: #{e.class} - #{e.message}",
67
+ :tags => ["http", "action"])
68
+ end
69
+ [response, roundtrip]
70
+ end
71
+ end
72
+
73
+ Riemann::Monitors::HTTPStatus.run