riemann-tools-dgvz 0.2.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+
5
+ $0 = __FILE__
6
+
7
+ class Riemann::Tools::ELBMetrics
8
+ include Riemann::Tools
9
+
10
+ require 'fog'
11
+ require 'time'
12
+
13
+ opt :fog_credentials_file, "Fog credentials file", :type => String
14
+ opt :fog_credential, "Fog credentials to use", :type => String
15
+ opt :aws_access, "AWS Access Key", :type => String
16
+ opt :aws_secret, "AWS Secret Key", :type => String
17
+ opt :aws_region, "AWS Region", :type => String, :default => "eu-west-1"
18
+ opt :aws_azs, "List of AZs to aggregate against", :type => :strings, :default => [ "all_az" ]
19
+ opt :elbs, "List of ELBs to pull metrics from", :type => :strings, :required => true
20
+
21
+ def standard_metrics
22
+ # ELB metric types, from:
23
+ # http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/CW_Support_For_AWS.html#elb-metricscollected
24
+ metric_options = {
25
+ "Latency" => {
26
+ "Unit" => "Seconds",
27
+ "Statistics" => ["Maximum", "Minimum", "Average" ]
28
+ },
29
+ "RequestCount" => {
30
+ "Unit" => "Count",
31
+ "Statistics" => [ "Sum" ]
32
+ },
33
+ "HealthyHostCount" => {
34
+ "Units" => "Count",
35
+ "Statistics" => [ "Minimum", "Maximum", "Average" ]
36
+ },
37
+ "UnHealthyHostCount" => {
38
+ "Units" => "Count",
39
+ "Statistics" => [ "Minimum", "Maximum", "Average" ]
40
+ },
41
+ "HTTPCode_ELB_4XX" => {
42
+ "Units" => "Count",
43
+ "Statistics" => [ "Sum" ]
44
+ },
45
+ "HTTPCode_ELB_5XX" => {
46
+ "Units" => "Count",
47
+ "Statistics" => [ "Sum" ]
48
+ },
49
+ "HTTPCode_Backend_2XX" => {
50
+ "Units" => "Count",
51
+ "Statistics" => [ "Sum" ]
52
+ },
53
+ "HTTPCode_Backend_3XX" => {
54
+ "Units" => "Count",
55
+ "Statistics" => [ "Sum" ]
56
+ },
57
+ "HTTPCode_Backend_4XX" => {
58
+ "Units" => "Count",
59
+ "Statistics" => [ "Sum" ]
60
+ },
61
+ "HTTPCode_Backend_5XX" => {
62
+ "Units" => "Count",
63
+ "Statistics" => [ "Sum" ]
64
+ }
65
+ }
66
+
67
+ metric_options
68
+ end
69
+
70
+ def base_metrics
71
+ # get last 60 seconds
72
+ start_time = (Time.now.utc - 60).iso8601
73
+ end_time = Time.now.utc.iso8601
74
+
75
+ # The base query that all metrics would get
76
+ metric_base = {
77
+ "Namespace" => "AWS/ELB",
78
+ "StartTime" => start_time,
79
+ "EndTime" => end_time,
80
+ "Period" => 60,
81
+ }
82
+
83
+ metric_base
84
+ end
85
+
86
+
87
+ def tick
88
+ if options[:fog_credentials_file]
89
+ Fog.credentials_path = options[:fog_credentials_file]
90
+ Fog.credential = options[:fog_credential].to_sym
91
+ connection = Fog::AWS::CloudWatch.new
92
+ else
93
+ connection = Fog::AWS::CloudWatch.new({
94
+ :aws_access_key_id => options[:aws_access],
95
+ :aws_secret_access_key => options[:aws_secret],
96
+ :region => options[:aws_region]
97
+ })
98
+ end
99
+
100
+ options[:elbs].each do |lb|
101
+
102
+ metric_options = standard_metrics
103
+ metric_base_options = base_metrics
104
+
105
+ options[:aws_azs].each do |az|
106
+ metric_options.keys.sort.each do |metric_type|
107
+ merged_options = metric_base_options.merge(metric_options[metric_type])
108
+ merged_options["MetricName"] = metric_type
109
+ if az == "all_az"
110
+ merged_options["Dimensions"] = [ { "Name" => "LoadBalancerName", "Value" => lb } ]
111
+ else
112
+ merged_options["Dimensions"] = [
113
+ { "Name" => "LoadBalancerName", "Value" => lb },
114
+ { "Name" => "AvailabilityZone" , "Value" => az}
115
+ ]
116
+ end
117
+
118
+ result = connection.get_metric_statistics(merged_options)
119
+
120
+ # "If no response codes in the category 2XX-5XX range are sent to clients within
121
+ # the given time period, values for these metrics will not be recorded in CloudWatch"
122
+ #next if result.body["GetMetricStatisticsResult"]["Datapoints"].empty? && metric_type =~ /[2345]XX/
123
+ #
124
+ # BUG:
125
+ # Metrics are reported every 60 seconds, but sometimes there isn't one there yet.
126
+ # We can skip that, or do something else?
127
+ next if result.body["GetMetricStatisticsResult"]["Datapoints"].empty?
128
+
129
+ # We should only ever have a single data point
130
+ result.body["GetMetricStatisticsResult"]["Datapoints"][0].keys.sort.each do |stat_type|
131
+ next if stat_type == "Unit"
132
+ next if stat_type == "Timestamp"
133
+
134
+ unit = result.body["GetMetricStatisticsResult"]["Datapoints"][0]["Unit"]
135
+ metric = result.body["GetMetricStatisticsResult"]["Datapoints"][0][stat_type]
136
+ event = Hash.new
137
+ event = {
138
+ host: lb,
139
+ service: "elb.#{az}.#{metric_type}.#{stat_type}",
140
+ ttl: 60,
141
+ description: "#{lb} #{metric_type} #{stat_type} (#{unit})",
142
+ tags: [ "production", "elb_metrics" ],
143
+ metric: metric
144
+ }
145
+
146
+ report(event)
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ Riemann::Tools::ELBMetrics.run
data/bin/riemann-fd ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current file descriptor use to riemann.
4
+ # By default reports the total system fd usage, can also report usage of individual processes
5
+
6
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
7
+
8
+ class Riemann::Tools::Health
9
+ include Riemann::Tools
10
+
11
+ opt :fd_sys_warning, "open file descriptor threshold for system", :default => 800
12
+ opt :fd_sys_critical, "open file descriptor critical threshold for system", :default => 900
13
+ opt :fd_proc_warning, "open file descriptor threshold for process", :default => 800
14
+ opt :fd_proc_critical, "open file descriptor critical threshold for process", :default => 900
15
+ opt :processes, "list of processes to measure fd usage in addition to system total", :type => :ints
16
+
17
+ def initialize
18
+ @limits = {
19
+ :fd => {:critical => opts[:fd_sys_critical], :warning => opts[:fd_sys_warning]},
20
+ :process => {:critical => opts[:fd_proc_critical], :warning => opts[:fd_proc_warning]},
21
+ }
22
+ ostype = `uname -s`.chomp.downcase
23
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
24
+ @fd = method :linux_fd
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def linux_fd
37
+ sys_used = Integer(`lsof | wc -l`)
38
+ if sys_used > @limits[:fd][:critical]
39
+ alert "fd sys", :critical, sys_used, "system is using #{sys_used} fds"
40
+ elsif sys_used > @limits[:fd][:warning]
41
+ alert "fd sys", :warning, sys_used, "system is using #{sys_used} fds"
42
+ else
43
+ alert "fd sys", :ok, sys_used, "system is using #{sys_used} fds"
44
+ end
45
+
46
+ unless opts[:processes].nil?
47
+ opts[:processes].each do |process|
48
+ used = Integer(`lsof -p #{process} | wc -l`)
49
+ name, pid = `ps axo comm,pid | grep -w #{process}`.split
50
+ if used > @limits[:process][:critical]
51
+ alert "fd #{name} #{process}", :critical, used, "process #{name} #{process} is using #{used} fds"
52
+ elsif used > @limits[:process][:warning]
53
+ alert "fd #{name} #{process}", :warning, used, "process #{name} #{process} is using #{used} fds"
54
+ else
55
+ alert "fd #{name} #{process}", :ok, used, "process #{name} #{process} is using #{used} fds"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def tick
62
+ @fd.call
63
+ end
64
+ end
65
+
66
+ Riemann::Tools::Health.run
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+
5
+ class Riemann::Tools::FreeSWITCH
6
+ include Riemann::Tools
7
+
8
+ opt :calls_warning, "Calls warning threshold", :default => 100
9
+ opt :calls_critical, "Calls critical threshold", :default => 300
10
+ opt :pid_file, "FreeSWITCH daemon pidfile", :type => String, :default => "/var/run/freeswitch/freeswitch.pid"
11
+
12
+ def initialize
13
+ @limits = {
14
+ :calls => {:critical => opts[:calls_critical], :warning => opts[:calls_warning]}
15
+ }
16
+ end
17
+
18
+ def dead_proc?(pid)
19
+ begin
20
+ Process.getpgid(pid)
21
+ false
22
+ rescue Errno::ESRCH
23
+ true
24
+ end
25
+ end
26
+
27
+ def alert(service, state, metric, description)
28
+ report(
29
+ :service => service.to_s,
30
+ :state => state.to_s,
31
+ :metric => metric.to_f,
32
+ :description => description
33
+ )
34
+ end
35
+
36
+ def tick
37
+ # Determine how many current calls I have according to FreeSWITCH
38
+ fs_calls = %x[fs_cli -x "show calls count"| grep -Po '^\\d+'].to_i
39
+
40
+ # Determine how many current channels I have according to FreeSWITCH
41
+ fs_channels = %x[fs_cli -x "show channels count"| grep -Po '^\\d+'].to_i
42
+
43
+ # Try to read pidfile. If it fails use Devil's dummy PID
44
+ begin
45
+ fs_pid = File.read(opts[:pid_file]).to_i
46
+ rescue
47
+ puts "Couldn't read pidfile: #{opts[:pid_file]}"
48
+ fs_pid = -666
49
+ end
50
+
51
+ # Submit calls to riemann
52
+ if fs_calls > @limits[:calls][:critical]
53
+ alert "FreeSWITCH current calls", :critical, fs_calls, "Number of calls are #{fs_calls}"
54
+ elsif fs_calls > @limits[:calls][:warning]
55
+ alert "FreeSWITCH current calls", :warning, fs_calls, "Number of calls are #{fs_calls}"
56
+ else
57
+ alert "FreeSWITCH current calls", :ok, fs_calls, "Number of calls are #{fs_calls}"
58
+ end
59
+
60
+ # Submit channels to riemann
61
+ if fs_channels > @limits[:calls][:critical]
62
+ alert "FreeSWITCH current channels", :critical, fs_channels, "Number of channels are #{fs_channels}"
63
+ elsif fs_channels > @limits[:calls][:warning]
64
+ alert "FreeSWITCH current channels", :warning, fs_channels, "Number of channels are #{fs_channels}"
65
+ else
66
+ alert "FreeSWITCH current channels", :ok, fs_channels, "Number of channels are #{fs_channels}"
67
+ end
68
+
69
+ # Submit status to riemann
70
+ if dead_proc?(fs_pid)
71
+ alert "FreeSWITCH status", :critical, -1, "FreeSWITCH service status: not running"
72
+ else
73
+ alert "FreeSWITCH status", :ok, nil, "FreeSWITCH service status: running"
74
+ end
75
+
76
+ end
77
+ end
78
+
79
+ Riemann::Tools::FreeSWITCH.run
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers haproxy CSV statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Haproxy
8
+ include Riemann::Tools
9
+ require 'net/http'
10
+ require 'csv'
11
+
12
+ opt :stats_url, "Full url to haproxy stats (eg: https://user:password@host.com:9999/stats)", :required => true, :type => :string
13
+
14
+ def initialize
15
+ @uri = URI(opts[:stats_url]+';csv')
16
+ end
17
+
18
+ def tick
19
+ csv = CSV.parse(get_csv.body.split("# ")[1], { :headers => true })
20
+ csv.each do |row|
21
+ row = row.to_hash
22
+ ns = "haproxy #{row['pxname']} #{row['svname']}"
23
+ row.each do |property, metric|
24
+ unless (property.nil? || property == 'pxname' || property == 'svname')
25
+ report(
26
+ :host => @uri.host,
27
+ :service => "#{ns} #{property}",
28
+ :metric => metric.to_f,
29
+ :state => (['UP', 'OPEN'].include?(row['status']) ? 'ok' : 'critical'),
30
+ :tags => ['haproxy']
31
+ )
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ def get_csv
38
+ http = Net::HTTP.new(@uri.host, @uri.port)
39
+ http.use_ssl = true if @uri.scheme == 'https'
40
+ http.start do |h|
41
+ get = Net::HTTP::Get.new(@uri.request_uri)
42
+ unless @uri.userinfo.nil?
43
+ userinfo = @uri.userinfo.split(":")
44
+ get.basic_auth userinfo[0], userinfo[1]
45
+ end
46
+ h.request get
47
+ end
48
+ end
49
+
50
+ end
51
+
52
+ Riemann::Tools::Haproxy.run
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current CPU, disk, load average, and memory use to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Health
8
+ include Riemann::Tools
9
+
10
+ opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
11
+ opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
12
+ opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
13
+ opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
14
+ opt :load_warning, "Load warning threshold (load average / core)", :default => 3
15
+ opt :load_critical, "Load critical threshold (load average / core)", :default => 8
16
+ opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
17
+ opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
18
+ opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
19
+
20
+ def initialize
21
+ @limits = {
22
+ :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
23
+ :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
24
+ :load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
25
+ :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
26
+ }
27
+ case (ostype = `uname -s`.chomp.downcase)
28
+ when 'darwin'
29
+ @cores = `sysctl -n hw.ncpu`.to_i
30
+ @cpu = method :darwin_cpu
31
+ @disk = method :disk
32
+ @load = method :darwin_load
33
+ @memory = method :darwin_memory
34
+ darwin_top
35
+ when 'freebsd'
36
+ @cores = `sysctl -n hw.ncpu`.to_i
37
+ @cpu = method :freebsd_cpu
38
+ @disk = method :disk
39
+ @load = method :freebsd_load
40
+ @memory = method :freebsd_memory
41
+ else
42
+ @cores = cores
43
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
44
+ @cpu = method :linux_cpu
45
+ @disk = method :disk
46
+ @load = method :linux_load
47
+ @memory = method :linux_memory
48
+ end
49
+
50
+ opts[:checks].each do |check|
51
+ case check
52
+ when "disk"
53
+ @disk_enabled = true
54
+ when "load"
55
+ @load_enabled = true
56
+ when "cpu"
57
+ @cpu_enabled = true
58
+ when "memory"
59
+ @memory_enabled = true
60
+ end
61
+ end
62
+ end
63
+
64
+ def alert(service, state, metric, description)
65
+ report(
66
+ :service => service.to_s,
67
+ :state => state.to_s,
68
+ :metric => metric.to_f,
69
+ :description => description
70
+ )
71
+ end
72
+
73
+ def cores
74
+ i = 0;
75
+ File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
76
+ physical_id = p[/physical id\s+:\s+(\d+)/, 1]
77
+ core_id = p[/core id\s+:\s+(\d+)/, 1]
78
+ if physical_id and core_id
79
+ cores["#{physical_id}:#{core_id}"] = true
80
+ elsif physical_id
81
+ cores["#{physical_id}:"] = true
82
+ else
83
+ cores[i += 1] = true;
84
+ end
85
+
86
+ cores
87
+ end.size
88
+ end
89
+
90
+ def report_pct(service, fraction, report)
91
+ if fraction
92
+ if fraction > @limits[service][:critical]
93
+ alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
94
+ elsif fraction > @limits[service][:warning]
95
+ alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
96
+ else
97
+ alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
98
+ end
99
+ end
100
+ end
101
+
102
+ def linux_cpu
103
+ new = File.read('/proc/stat')
104
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
105
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
106
+ return false
107
+ end
108
+ u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
109
+
110
+ if @old_cpu
111
+ u1, n1, s1, i1 = @old_cpu
112
+
113
+ used = (u2+n2+s2) - (u1+n1+s1)
114
+ total = used + i2-i1
115
+ fraction = used.to_f / total
116
+
117
+ report_pct :cpu, fraction, "user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
118
+ end
119
+
120
+ @old_cpu = [u2, n2, s2, i2]
121
+ end
122
+
123
+ def linux_load
124
+ load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
125
+ if load > @limits[:load][:critical]
126
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
127
+ elsif load > @limits[:load][:warning]
128
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
129
+ else
130
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
131
+ end
132
+ end
133
+
134
+ def linux_memory
135
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
136
+ x = line.split(/:?\s+/)
137
+ # Assume kB...
138
+ info[x[0]] = x[1].to_i
139
+ info
140
+ }
141
+
142
+ free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
143
+ total = m['MemTotal'].to_i
144
+ fraction = 1 - (free.to_f / total)
145
+
146
+ report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
147
+ end
148
+
149
+ def freebsd_cpu
150
+ u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
151
+
152
+ if @old_cpu
153
+ u1, n1, s1, t1, i1 = @old_cpu
154
+
155
+ used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
156
+ total = used + i2-i1
157
+ fraction = used.to_f / total
158
+
159
+ report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
160
+ end
161
+
162
+ @old_cpu = [u2, n2, s2, t2, i2]
163
+ end
164
+
165
+ def freebsd_load
166
+ m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
167
+ load = m[0].to_f / @cores
168
+ if load > @limits[:load][:critical]
169
+ alert "load", :critical, load, "1-minute load average/core is #{load}"
170
+ elsif load > @limits[:load][:warning]
171
+ alert "load", :warning, load, "1-minute load average/core is #{load}"
172
+ else
173
+ alert "load", :ok, load, "1-minute load average/core is #{load}"
174
+ end
175
+ end
176
+
177
+ def freebsd_memory
178
+ meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
179
+ fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
180
+
181
+ report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
182
+ end
183
+
184
+ def darwin_top
185
+ raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
186
+ @topdata = {:stamp => Time.now.to_i }
187
+ raw.each_line do |ln|
188
+ if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
189
+ @topdata[:load] = $1.to_f
190
+ elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
191
+ @topdata[:cpu] = 1 - ($1.to_f / 100)
192
+ elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
193
+ wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
194
+ active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
195
+ inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
196
+ used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
197
+ free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
198
+ @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
199
+ end
200
+ end
201
+ end
202
+
203
+ def darwin_cpu
204
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
205
+ unless @topdata[:cpu]
206
+ alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
207
+ return false
208
+ end
209
+ report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
210
+ end
211
+
212
+ def darwin_load
213
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
214
+ unless @topdata[:load]
215
+ alert 'load', :unknown, nil, "unable to get load ave from top"
216
+ return false
217
+ end
218
+ metric = @topdata[:load] / @cores
219
+ if metric > @limits[:load][:critical]
220
+ alert "load", :critical, metric, "1-minute load average per core is #{metric}"
221
+ elsif metric > @limits[:load][:warning]
222
+ alert "load", :warning, metric, "1-minute load average per core is #{metric}"
223
+ else
224
+ alert "load", :ok, metric, "1-minute load average per core is #{metric}"
225
+ end
226
+ end
227
+
228
+ def darwin_memory
229
+ darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
230
+ unless @topdata[:memory]
231
+ alert 'memory', :unknown, nil, "unable to get memory data from top"
232
+ return false
233
+ end
234
+ report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
235
+ end
236
+
237
+ def disk
238
+ `df -P`.split(/\n/).each do |r|
239
+ f = r.split(/\s+/)
240
+ next if f[0] == 'Filesystem'
241
+ next unless f[0] =~ /\// # Needs at least one slash in the mount path
242
+
243
+ # Calculate capacity
244
+ x = f[4].to_f/100
245
+
246
+ if x > @limits[:disk][:critical]
247
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
248
+ elsif x > @limits[:disk][:warning]
249
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
250
+ else
251
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
252
+ end
253
+ end
254
+ end
255
+
256
+ def tick
257
+ if @cpu_enabled
258
+ @cpu.call
259
+ end
260
+ if @memory_enabled
261
+ @memory.call
262
+ end
263
+ if @disk_enabled
264
+ @disk.call
265
+ end
266
+ if @load_enabled
267
+ @load.call
268
+ end
269
+ end
270
+ end
271
+
272
+ Riemann::Tools::Health.run
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+
5
+ class Riemann::Tools::KVM
6
+ include Riemann::Tools
7
+
8
+ def tick
9
+
10
+ #determine how many instances I have according to libvirt
11
+ kvm_instances = %x[virsh list |grep i-|wc -l]
12
+
13
+ #submit them to riemann
14
+ report(
15
+ :service => "KVM Running VMs",
16
+ :metric => kvm_instances.to_i,
17
+ :state => "info"
18
+ )
19
+ end
20
+ end
21
+
22
+ Riemann::Tools::KVM.run
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers memcached STATS and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Memcached
8
+ include Riemann::Tools
9
+ require 'socket'
10
+
11
+ opt :memcached_host, "Memcached hostname", :default => 'localhost'
12
+ opt :memcached_port, "Memcached port", :default => 11211
13
+
14
+ def tick
15
+ sock = TCPSocket.new(opts[:memcached_host], opts[:memcached_port])
16
+ sock.print("stats\r\n")
17
+ sock.flush
18
+ stats = sock.gets
19
+
20
+ data = {}
21
+ while true
22
+ stats = sock.gets
23
+ break if stats.strip == 'END'
24
+ m = stats.match /STAT (\w+) (\S+)/
25
+ report(
26
+ :host => opts[:memcached_host].dup,
27
+ :service => "memcached #{m[1]}",
28
+ :metric => m[2].to_f,
29
+ :state => 'ok',
30
+ :tags => ['memcached']
31
+ )
32
+ end
33
+ sock.close
34
+ end
35
+ end
36
+
37
+ Riemann::Tools::Memcached.run