riemann-monitors 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/riemann-monitors"
4
+
5
+ class Riemann::Monitors::KVM
6
+ include Riemann::Monitors
7
+
8
+ def tick
9
+
10
+ #determine how many instances I have according to libvirt
11
+ kvm_instances = %x[virsh list |grep i-|wc -l]
12
+
13
+ #submit them to riemann
14
+ report(
15
+ :service => "KVM Running VMs",
16
+ :metric => kvm_instances.to_i,
17
+ :state => "info"
18
+ )
19
+ end
20
+ end
21
+
22
+ Riemann::Monitors::KVM.run
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers memcached STATS and submits them to Riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Memcached
8
+ include Riemann::Monitors
9
+ require 'socket'
10
+
11
+ opt :memcached_host, "Memcached hostname", :default => 'localhost'
12
+ opt :memcached_port, "Memcached port", :default => 11211
13
+
14
+ def tick
15
+ sock = TCPSocket.new(opts[:memcached_host], opts[:memcached_port])
16
+
17
+ sock.print("stats\r\n")
18
+ sock.flush
19
+ stats = sock.gets
20
+
21
+ data = {}
22
+ while true
23
+ stats = sock.gets
24
+ break if stats.strip == 'END'
25
+ m = stats.match /STAT (\w+) (\S+)/
26
+ report(
27
+ :host => opts[:memcached_host].dup,
28
+ :service => "memcached/#{m[1]}",
29
+ :metric => m[2].to_f,
30
+ :state => 'ok',
31
+ :tags => ['memcached']
32
+ )
33
+ end
34
+ sock.close
35
+ end
36
+ end
37
+
38
+ Riemann::Monitors::Memcached.run
data/bin/riemann-net ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers network interface statistics and submits them to Riemann.
4
+
5
+ require "pry"
6
+ require_relative "../lib/riemann-monitors"
7
+ require 'set'
8
+
9
+ class Riemann::Monitors::Net
10
+ include Riemann::Monitors
11
+
12
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => []
13
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
14
+
15
+ def initialize
16
+ @old_state = nil
17
+ @fields = ["rx bytes", "rx packets", "rx errs", "rx drop", "rx fifo", "rx frame", "rx compressed", "rx multicast",
18
+ "tx bytes", "tx packets", "tx errs", "tx drops", "tx fifo", "tx colls", "tx carrier", "tx compressed"]
19
+ @use_interfaces = Set.new(opts[:interfaces])
20
+ @ignore_interfaces = Set.new(opts[:ignore_interfaces])
21
+ end
22
+
23
+ def state
24
+ net = File.open("/proc/net/dev", "r")
25
+ net.readline ; net.readline # Skip first two lines.
26
+
27
+ metrics = {}
28
+ net.readlines.each do |line|
29
+ interface = line[/^[^:]+/]
30
+ if @use_interfaces.include?(interface) && !@ignore_interfaces.include?(interface)
31
+ metrics[interface] = @fields.zip(line.split(/\s+/).drop(1).map(&:to_i)).to_h
32
+ end
33
+ end
34
+ metrics
35
+ end
36
+
37
+ # TODO
38
+ def tick
39
+ state = self.state
40
+
41
+ if @old_state
42
+ # Report services from `@old_state` that don't exist in `state` as expired
43
+ @old_state.reject { |k| state.has_key?(k) }.each do |service, metric|
44
+ report(:service => service.dup, :state => 'expired')
45
+ end
46
+
47
+ # Report delta for services that have values in both `@old_state` and `state`
48
+ state.each do |service, metric|
49
+ next unless @old_state.has_key?(service)
50
+
51
+ delta = metric - @old_state[service]
52
+ svc_state = case service
53
+ when /drop$/
54
+ if metric > 0
55
+ 'warning'
56
+ else
57
+ 'ok'
58
+ end
59
+ when /errs$/
60
+ if metric > 0
61
+ 'warning'
62
+ else
63
+ 'ok'
64
+ end
65
+ else
66
+ 'ok'
67
+ end
68
+
69
+ report(
70
+ :service => service.dup,
71
+ :metric => (delta.to_f / opts[:interval]),
72
+ :state => svc_state
73
+ )
74
+ end
75
+ end
76
+
77
+ @old_state = state
78
+ end
79
+ end
80
+
81
+ Riemann::Monitors::Net.run
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers nginx status stub statistics and submits them to Riemann.
4
+ # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
5
+
6
+ require_relative "../lib/riemann-monitors"
7
+
8
+ class Riemann::Monitors::NginxStatus
9
+ include Riemann::Monitors
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
14
+ opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
15
+ opt :active_warning, "Active connections warning threshold", :default => 0
16
+ opt :active_critical, "Active connections critical threshold", :default => 0
17
+ opt :reading_warning, "Reading connections warning threshold", :default => 0
18
+ opt :reading_critical, "Reading connections critical threshold", :default => 0
19
+ opt :writing_warning, "Writing connections warning threshold", :default => 0
20
+ opt :writing_critical, "Writing connections critical threshold", :default => 0
21
+ opt :waiting_warning, "Waiting connections warning threshold", :default => 0
22
+ opt :waiting_critical, "Waiting connections critical threshold", :default => 0
23
+
24
+ def initialize
25
+ @uri = URI.parse(opts[:uri])
26
+
27
+ # sample response:
28
+ #
29
+ # Active connections: 1
30
+ # server accepts handled requests
31
+ # 39 39 39
32
+ # Reading: 0 Writing: 1 Waiting: 0
33
+ @keys = %w{active accepted handled requests reading writing waiting}
34
+ @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
35
+ end
36
+
37
+ def state(key, value)
38
+ if opts.has_key? "#{key}_critical".to_sym
39
+ critical_threshold = opts["#{key}_critical".to_sym]
40
+ return 'critical' if critical_threshold > 0 and value >= critical_threshold
41
+ end
42
+
43
+ if opts.has_key? "#{key}_warning".to_sym
44
+ warning_threshold = opts["#{key}_warning".to_sym]
45
+ return 'warning' if warning_threshold > 0 and value >= warning_threshold
46
+ end
47
+
48
+ return 'ok'
49
+ end
50
+
51
+ def tick
52
+ response = nil
53
+ begin
54
+ response = Net::HTTP.get(@uri)
55
+ rescue => e
56
+ report(
57
+ :service => "nginx health",
58
+ :state => "critical",
59
+ :description => "Connection error: #{e.class} - #{e.message}"
60
+ )
61
+ end
62
+
63
+ return if response.nil?
64
+
65
+ report(
66
+ :service => "nginx health",
67
+ :state => "ok",
68
+ :description => "Nginx status connection ok"
69
+ )
70
+
71
+ values = @re.match(response).to_a[1,7].map { |v| v.to_i }
72
+
73
+ @keys.zip(values).each do |key, value|
74
+ report({
75
+ :service => "nginx #{key}",
76
+ :metric => value,
77
+ :state => state(key, value),
78
+ :tags => ['nginx']
79
+ })
80
+ end
81
+ end
82
+ end
83
+
84
+ Riemann::Monitors::NginxStatus.run
data/bin/riemann-ntp ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports NTP stats to Riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Ntp
8
+ include Riemann::Monitors
9
+
10
+ def initialize
11
+ @hostname = Socket.gethostname
12
+ end
13
+
14
+ def tick
15
+ stats = `ntpq -p -n`
16
+ stats.each_line do |stat|
17
+ m = stat.split()
18
+ next if m.grep(/^===/).any? || m.grep(/^remote/).any?
19
+ @ntp_host = m[0].gsub("*","").gsub("-","").gsub("+","")
20
+ send("delay",m[7])
21
+ send("offset",m[8])
22
+ send("jitter",m[9])
23
+ end
24
+ end
25
+
26
+ def send(type,metric)
27
+ report(host: @hostname,
28
+ service: "ntp/#{type}",
29
+ metric: metric.to_f,
30
+ description: @ntp_host.to_s,
31
+ tags: ["ntp"])
32
+ end
33
+ end
34
+
35
+ Riemann::Monitors::Ntp.run
data/bin/riemann-proc ADDED
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require 'pry'
6
+ require 'set'
7
+ require_relative "../lib/riemann-monitors"
8
+
9
+ # $clk_tck = Etc.sysconf(2).to_f
10
+
11
+ ProcessInfo = Struct.new(:pid, :comm, :oldfields, :newfields) do
12
+ def riemann_statemap(state)
13
+ statemap = {"R" => "ok",
14
+ "S" => "ok",
15
+ "I" => "warning",
16
+ "T" => "critical",
17
+ "U" => "critical",
18
+ "Z" => "critical"}
19
+ statemap.default = "unknown"
20
+ statemap[state]
21
+ end
22
+
23
+ def fields
24
+ @sorted_fields ||= (oldfields + newfields).sort
25
+ end
26
+
27
+ # def time_of(field)
28
+ # fields.assoc(field).last / $clk_tck
29
+ # end
30
+
31
+ def summarize()
32
+ fields.each do |name, metric|
33
+ params = {}
34
+ params[:service] = "process;#{pid};#{name}"
35
+ params[:description] = name
36
+ params[:pname] = comm
37
+ if name == "State"
38
+ params[:state] = riemann_statemap(metric)
39
+ params[:value] = metric
40
+ # elsif name.end_with?("time")
41
+ # params[:metric] = time_of(name)
42
+ elsif metric.is_a?(Numeric)
43
+ params[:metric] = metric
44
+ else
45
+ params[:value] = metric
46
+ end
47
+ yield(params)
48
+ end
49
+ end
50
+ end
51
+
52
+ class Riemann::Monitors::Proc
53
+ include Riemann::Monitors
54
+
55
+ opt(:proc_regex, "regular expression that matches the process to be monitored", type: :string, default: ".*")
56
+ opt(:newstats, "fields from /proc/#/status to collect (always includes name and status character)", multi: :strings,
57
+ default: ["VmRSS", "VmSize", "VmPeak", "VmHWM", "VmLib", "VmSwap", "Threads"])
58
+ opt(:oldstats, "fields from /proc/#/status to collect (use names from proc(5) manpage)", multi: :strings, default: ["utime", "stime"])
59
+ opt(:metricfield, "field to assign to metric", default: "VmRSS")
60
+ opt(:proc_min_critical, "running process count minimum", :default => 0)
61
+ opt(:proc_max_critical, "running process count maximum", :default => 65536)
62
+
63
+ def initialize
64
+ @process_regex = Regexp.new(opts[:proc_regex])
65
+ @newfields = Set.new(opts[:newstats]).add("Name").add("State")
66
+ @oldfields = Set.new(opts[:oldstats])
67
+ @allstatfields = File.readlines(File.join(__dir__, "..", "data", "statfields")).map(&:chomp)
68
+
69
+ @limits = { :critical => Range.new(opts[:proc_min_critical], opts[:proc_max_critical]) }
70
+
71
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
72
+ end
73
+
74
+ def tick
75
+ @sample_start = Time.now
76
+
77
+ processes = Dir.glob("/proc/[0-9]*/comm").map {|cfile|
78
+ pid = cfile[/\d+/].to_i
79
+ comm = File.read(cfile).chomp
80
+ if @process_regex.match(comm)
81
+ p = ProcessInfo.new()
82
+ p.pid = pid
83
+ p.comm = comm
84
+ p
85
+ end
86
+ }.compact
87
+
88
+ processes.each {|p|
89
+ p.newfields = File.readlines("/proc/#{p.pid}/status") \
90
+ .map {|line| parts = line.split(/\s+/)
91
+ name, rest = parts[0].chop, parts[1..-1]
92
+ if @newfields.include?(name)
93
+ value = rest&.first
94
+ if rest.length > 2
95
+ value = rest.join("\t")
96
+ elsif value[/\A\d+\z/]
97
+ value = value.to_i
98
+ end
99
+ [name, value]
100
+ end
101
+ }.compact
102
+ }
103
+
104
+ processes.each {|p|
105
+ statstr = File.read("/proc/#{p.pid}/stat")
106
+ stat_tail = statstr.reverse[/[-0-9 ]+/].reverse.split(" ").map {|n| n.to_i }
107
+ p.oldfields = @allstatfields.zip(stat_tail).select {|name, value| @oldfields.include?(name) }
108
+ }
109
+ @sample_end = Time.now
110
+
111
+ report(service: "process.samplerate", state: "ok", metric: (@sample_end.to_f - @sample_start.to_f), time: @sample_end)
112
+
113
+ processes.group_by {|p| p.comm }.map do |pname, ps|
114
+ running = ps.count
115
+ if @limits[:critical].cover?(running)
116
+ report(service: "process.instances", state: "ok", description: pname, metric: running)
117
+ else
118
+ report(service: "process.instances", state: "critical", description: pname, metric: running)
119
+ end
120
+ end
121
+
122
+ processes.each do |p|
123
+ p.summarize do |point|
124
+ point[:time] = @sample_end
125
+ report(point)
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ Riemann::Monitors::Proc.run
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports varnish stats to Riemann.
4
+
5
+ require 'open3'
6
+ require_relative "../lib/riemann-monitors"
7
+
8
+ class Riemann::Monitors::Varnish
9
+ include Riemann::Monitors
10
+
11
+ opt :varnish_host, "Varnish hostname", :default => `hostname`.chomp
12
+
13
+ def initialize
14
+ cmd = 'varnishstat -V'
15
+ Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
16
+ @ver = /varnishstat \(varnish-(\d+)/.match(stderr.read)[1].to_i
17
+ end
18
+
19
+ if @ver >= 4
20
+ @vstats = [ "MAIN.sess_conn",
21
+ "MAIN.sess_drop ",
22
+ "MAIN.client_req",
23
+ "MAIN.cache_hit",
24
+ "MAIN.cache_miss" ]
25
+ else
26
+ @vstats = [ "client_conn",
27
+ "client_drop",
28
+ "client_req",
29
+ "cache_hit",
30
+ "cache_miss" ]
31
+ end
32
+ end
33
+
34
+ def tick
35
+ if @ver >= 4
36
+ stats = `varnishstat -1 -f #{@vstats.join(" -f ")}`
37
+ else
38
+ stats = `varnishstat -1 -f #{@vstats.join(",")}`
39
+ end
40
+ stats.each_line do |stat|
41
+ m = stat.split()
42
+ report(
43
+ :host => opts[:varnish_host].dup,
44
+ :service => "varnish #{m[0]}",
45
+ :metric => m[1].to_f,
46
+ :state => "ok",
47
+ :description => "#{m[3..-1].join(' ')}",
48
+ :tags => ["varnish"]
49
+ )
50
+ end
51
+ end
52
+ end
53
+
54
+ Riemann::Monitors::Varnish.run
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers zookeeper STATS and submits them to Riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Zookeeper
8
+ include Riemann::Monitors
9
+ require 'socket'
10
+
11
+ opt :zookeeper_host, "Zookeeper hostname", :default => 'localhost'
12
+ opt :zookeeper_port, "Zookeeper port", :default => 2181
13
+
14
+ def tick
15
+ sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
16
+ sock.sync = true
17
+ sock.print("mntr")
18
+ sock.flush
19
+
20
+
21
+ data = {}
22
+ while true
23
+ stats = sock.gets
24
+
25
+ break if stats.nil?
26
+
27
+ m = stats.match /^(\w+)\t+(.*)/
28
+
29
+ report(
30
+ :host => opts[ :zookeeper_host].dup,
31
+ :service => "zookeeper #{m[1]}",
32
+ :metric => m[2].to_f,
33
+ :state => 'ok',
34
+ :tags => ['zookeeper']
35
+ )
36
+ end
37
+ sock.close
38
+ end
39
+ end
40
+
41
+ Riemann::Monitors::Zookeeper.run
data/data/statfields ADDED
@@ -0,0 +1,49 @@
1
+ ppid
2
+ pgrp
3
+ session
4
+ tty_nr
5
+ tpgid
6
+ flags
7
+ minflt
8
+ cminflt
9
+ majflt
10
+ cmajflt
11
+ utime
12
+ stime
13
+ cutime
14
+ cstime
15
+ priority
16
+ nice
17
+ num_threads
18
+ itrealvalue
19
+ starttime
20
+ vsize
21
+ rss
22
+ rsslim
23
+ startcode
24
+ endcode
25
+ startstack
26
+ kstkesp
27
+ kstkeip
28
+ signal
29
+ blocked
30
+ sigignore
31
+ sigcatch
32
+ wchan
33
+ nswap
34
+ cnswap
35
+ exit_signal
36
+ processor
37
+ rt_priority
38
+ policy
39
+ delayacct_blkio_ticks
40
+ guest_time
41
+ cguest_time
42
+ start_data
43
+ end_data
44
+ start_brk
45
+ arg_start
46
+ arg_end
47
+ env_start
48
+ env_end
49
+ exit_code
@@ -0,0 +1,111 @@
1
+ class Hash
2
+ def has_keys?(*rest)
3
+ rest.all? {|k| self.has_key?(k) }
4
+ end
5
+ end
6
+
7
+ module Riemann
8
+ module Monitors
9
+ def self.included(base)
10
+ base.instance_eval do
11
+ def run
12
+ new.run
13
+ end
14
+
15
+ def opt(*a)
16
+ a.unshift :opt
17
+ @opts ||= []
18
+ @opts << a
19
+ end
20
+
21
+ def options
22
+ p = Trollop::Parser.new
23
+ @opts.each do |o|
24
+ p.send *o
25
+ end
26
+ Trollop::with_standard_exception_handling(p) do
27
+ p.parse ARGV
28
+ end
29
+ end
30
+
31
+ opt :host, "Riemann host", :default => '127.0.0.1'
32
+ opt :port, "Riemann port", :default => 5555
33
+ opt :event_host, "Event hostname", :type => String
34
+ opt :interval, "Seconds between updates", :default => 5
35
+ opt :tag, "Tag to add to events", :type => String, :multi => true
36
+ opt :ttl, "TTL for events", :type => Integer
37
+ opt :attribute, "Attribute to add to the event", :type => String, :multi => true
38
+ opt :timeout, "Timeout (in seconds) when waiting for acknowledgements", :default => 30
39
+ opt :tcp, "Use TCP transport instead of UDP (improves reliability, slight overhead.", :default => true
40
+ opt :ssl, "Use SSL.", default: false
41
+ opt :ssl_ca_file, "SSL certificate authority cert", :default => File.join(Dir.home, ".config", "riemann-tools", "ca.crt")
42
+ opt :ssl_cert_file, "SSL client certificate public key", :default => File.join(Dir.home, ".config", "riemann-tools", "#{Socket.gethostname}.crt")
43
+ opt :ssl_key_file, "SSL client certificate private key", :default => File.join(Dir.home, ".config", "riemann-tools", "#{Socket.gethostname}.key")
44
+ end
45
+ end
46
+
47
+ # Returns parsed options (cached) from command line.
48
+ def options
49
+ @options ||= self.class.options
50
+ end
51
+ alias :opts :options
52
+
53
+ def attributes
54
+ @attributes ||= Hash[options[:attribute].map do |attr|
55
+ k,v = attr.split(/=/)
56
+ if k and v
57
+ [k.to_sym,v]
58
+ end
59
+ end]
60
+ end
61
+
62
+ def report(event_hash)
63
+ event_hash[:tags] = (event_hash[:tags] || []) + (options[:tag] || [])
64
+ event_hash[:ttl] ||= (options[:ttl] || (options[:interval] * 2))
65
+ event_hash[:host] ||= options[:event_host]
66
+ event_hash.merge!(attributes)
67
+ riemann.add_event(event_hash)
68
+ end
69
+
70
+ def new_riemann_client
71
+ riemann_options = {
72
+ :server => "#{options[:host]}:#{options[:port]}",
73
+ :connect_timeout => options[:timeout]
74
+ }
75
+ if options.has_keys?(:ssl_ca_file, :ssl_cert_file, :ssl_key_file) && options[:ssl]
76
+ # These are given to OpenSSL::SSL::SSLContext
77
+ riemann_options[:ssl] = {
78
+ ca_file: File.expand_path(options[:ssl_ca_file]),
79
+ cert: OpenSSL::X509::Certificate.new(File.read(File.expand_path(options[:ssl_cert_file]))),
80
+ key: OpenSSL::PKey::RSA.new(File.read(File.expand_path(options[:ssl_key_file]))),
81
+ verify_mode: OpenSSL::SSL::VERIFY_PEER,
82
+ ssl_version: :TLSv1_2
83
+ }
84
+ end
85
+ Riemann::Experiment::Client.new(riemann_options)
86
+ end
87
+
88
+ def riemann
89
+ @riemann ||= new_riemann_client
90
+ end
91
+ alias :r :riemann
92
+
93
+ def run
94
+ t0 = Time.now
95
+ loop do
96
+ begin
97
+ tick
98
+ riemann.send_message(ok: true)
99
+ rescue => e
100
+ $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
101
+ end
102
+
103
+ # Sleep.
104
+ sleep(options[:interval] - ((Time.now - t0) % options[:interval]))
105
+ end
106
+ end
107
+
108
+ def tick
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ module Riemann::Monitors
2
+ VERSION = "0.0.1"
3
+ VERSION_MAJOR = 0
4
+ VERSION_MINOR = 0
5
+ VERSION_TINY = 1
6
+ end
@@ -0,0 +1,8 @@
1
+ require 'riemann-ruby-experiments'
2
+ require 'openssl'
3
+ require 'trollop'
4
+ require 'json'
5
+
6
+ require_relative 'riemann-monitors/version'
7
+ require_relative 'riemann-monitors/main'
8
+
data/project.yaml ADDED
@@ -0,0 +1,12 @@
1
+ ---
2
+ name: "riemann-monitors"
3
+ toplevel_module: Riemann::Monitors
4
+ dependencies:
5
+ riemann-ruby-experiments: ">= 0.0.2"
6
+ trollop: ">= 1.16.2"
7
+ json: ">= 1.8.3"
8
+ dev_dependencies:
9
+ rake: ">=10.3"
10
+ yard: "~> 0.8.7"
11
+ asciidoctor: "~> 1.5.2"
12
+