riemann-monitors 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/riemann-monitors"
4
+
5
+ class Riemann::Monitors::KVM
6
+ include Riemann::Monitors
7
+
8
+ def tick
9
+
10
+ #determine how many instances I have according to libvirt
11
+ kvm_instances = %x[virsh list |grep i-|wc -l]
12
+
13
+ #submit them to riemann
14
+ report(
15
+ :service => "KVM Running VMs",
16
+ :metric => kvm_instances.to_i,
17
+ :state => "info"
18
+ )
19
+ end
20
+ end
21
+
22
+ Riemann::Monitors::KVM.run
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers memcached STATS and submits them to Riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Memcached
8
+ include Riemann::Monitors
9
+ require 'socket'
10
+
11
+ opt :memcached_host, "Memcached hostname", :default => 'localhost'
12
+ opt :memcached_port, "Memcached port", :default => 11211
13
+
14
+ def tick
15
+ sock = TCPSocket.new(opts[:memcached_host], opts[:memcached_port])
16
+
17
+ sock.print("stats\r\n")
18
+ sock.flush
19
+ stats = sock.gets
20
+
21
+ data = {}
22
+ while true
23
+ stats = sock.gets
24
+ break if stats.strip == 'END'
25
+ m = stats.match /STAT (\w+) (\S+)/
26
+ report(
27
+ :host => opts[:memcached_host].dup,
28
+ :service => "memcached/#{m[1]}",
29
+ :metric => m[2].to_f,
30
+ :state => 'ok',
31
+ :tags => ['memcached']
32
+ )
33
+ end
34
+ sock.close
35
+ end
36
+ end
37
+
38
+ Riemann::Monitors::Memcached.run
data/bin/riemann-net ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers network interface statistics and submits them to Riemann.
4
+
5
+ require "pry"
6
+ require_relative "../lib/riemann-monitors"
7
+ require 'set'
8
+
9
+ class Riemann::Monitors::Net
10
+ include Riemann::Monitors
11
+
12
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => []
13
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
14
+
15
+ def initialize
16
+ @old_state = nil
17
+ @fields = ["rx bytes", "rx packets", "rx errs", "rx drop", "rx fifo", "rx frame", "rx compressed", "rx multicast",
18
+ "tx bytes", "tx packets", "tx errs", "tx drops", "tx fifo", "tx colls", "tx carrier", "tx compressed"]
19
+ @use_interfaces = Set.new(opts[:interfaces])
20
+ @ignore_interfaces = Set.new(opts[:ignore_interfaces])
21
+ end
22
+
23
+ def state
24
+ net = File.open("/proc/net/dev", "r")
25
+ net.readline ; net.readline # Skip first two lines.
26
+
27
+ metrics = {}
28
+ net.readlines.each do |line|
29
+ interface = line[/^[^:]+/]
30
+ if @use_interfaces.include?(interface) && !@ignore_interfaces.include?(interface)
31
+ metrics[interface] = @fields.zip(line.split(/\s+/).drop(1).map(&:to_i)).to_h
32
+ end
33
+ end
34
+ metrics
35
+ end
36
+
37
+ # TODO
38
+ def tick
39
+ state = self.state
40
+
41
+ if @old_state
42
+ # Report services from `@old_state` that don't exist in `state` as expired
43
+ @old_state.reject { |k| state.has_key?(k) }.each do |service, metric|
44
+ report(:service => service.dup, :state => 'expired')
45
+ end
46
+
47
+ # Report delta for services that have values in both `@old_state` and `state`
48
+ state.each do |service, metric|
49
+ next unless @old_state.has_key?(service)
50
+
51
+ delta = metric - @old_state[service]
52
+ svc_state = case service
53
+ when /drop$/
54
+ if metric > 0
55
+ 'warning'
56
+ else
57
+ 'ok'
58
+ end
59
+ when /errs$/
60
+ if metric > 0
61
+ 'warning'
62
+ else
63
+ 'ok'
64
+ end
65
+ else
66
+ 'ok'
67
+ end
68
+
69
+ report(
70
+ :service => service.dup,
71
+ :metric => (delta.to_f / opts[:interval]),
72
+ :state => svc_state
73
+ )
74
+ end
75
+ end
76
+
77
+ @old_state = state
78
+ end
79
+ end
80
+
81
+ Riemann::Monitors::Net.run
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers nginx status stub statistics and submits them to Riemann.
4
+ # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
5
+
6
+ require_relative "../lib/riemann-monitors"
7
+
8
+ class Riemann::Monitors::NginxStatus
9
+ include Riemann::Monitors
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
14
+ opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
15
+ opt :active_warning, "Active connections warning threshold", :default => 0
16
+ opt :active_critical, "Active connections critical threshold", :default => 0
17
+ opt :reading_warning, "Reading connections warning threshold", :default => 0
18
+ opt :reading_critical, "Reading connections critical threshold", :default => 0
19
+ opt :writing_warning, "Writing connections warning threshold", :default => 0
20
+ opt :writing_critical, "Writing connections critical threshold", :default => 0
21
+ opt :waiting_warning, "Waiting connections warning threshold", :default => 0
22
+ opt :waiting_critical, "Waiting connections critical threshold", :default => 0
23
+
24
+ def initialize
25
+ @uri = URI.parse(opts[:uri])
26
+
27
+ # sample response:
28
+ #
29
+ # Active connections: 1
30
+ # server accepts handled requests
31
+ # 39 39 39
32
+ # Reading: 0 Writing: 1 Waiting: 0
33
+ @keys = %w{active accepted handled requests reading writing waiting}
34
+ @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
35
+ end
36
+
37
+ def state(key, value)
38
+ if opts.has_key? "#{key}_critical".to_sym
39
+ critical_threshold = opts["#{key}_critical".to_sym]
40
+ return 'critical' if critical_threshold > 0 and value >= critical_threshold
41
+ end
42
+
43
+ if opts.has_key? "#{key}_warning".to_sym
44
+ warning_threshold = opts["#{key}_warning".to_sym]
45
+ return 'warning' if warning_threshold > 0 and value >= warning_threshold
46
+ end
47
+
48
+ return 'ok'
49
+ end
50
+
51
+ def tick
52
+ response = nil
53
+ begin
54
+ response = Net::HTTP.get(@uri)
55
+ rescue => e
56
+ report(
57
+ :service => "nginx health",
58
+ :state => "critical",
59
+ :description => "Connection error: #{e.class} - #{e.message}"
60
+ )
61
+ end
62
+
63
+ return if response.nil?
64
+
65
+ report(
66
+ :service => "nginx health",
67
+ :state => "ok",
68
+ :description => "Nginx status connection ok"
69
+ )
70
+
71
+ values = @re.match(response).to_a[1,7].map { |v| v.to_i }
72
+
73
+ @keys.zip(values).each do |key, value|
74
+ report({
75
+ :service => "nginx #{key}",
76
+ :metric => value,
77
+ :state => state(key, value),
78
+ :tags => ['nginx']
79
+ })
80
+ end
81
+ end
82
+ end
83
+
84
+ Riemann::Monitors::NginxStatus.run
data/bin/riemann-ntp ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports NTP stats to Riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Ntp
8
+ include Riemann::Monitors
9
+
10
+ def initialize
11
+ @hostname = Socket.gethostname
12
+ end
13
+
14
+ def tick
15
+ stats = `ntpq -p -n`
16
+ stats.each_line do |stat|
17
+ m = stat.split()
18
+ next if m.grep(/^===/).any? || m.grep(/^remote/).any?
19
+ @ntp_host = m[0].gsub("*","").gsub("-","").gsub("+","")
20
+ send("delay",m[7])
21
+ send("offset",m[8])
22
+ send("jitter",m[9])
23
+ end
24
+ end
25
+
26
+ def send(type,metric)
27
+ report(host: @hostname,
28
+ service: "ntp/#{type}",
29
+ metric: metric.to_f,
30
+ description: @ntp_host.to_s,
31
+ tags: ["ntp"])
32
+ end
33
+ end
34
+
35
+ Riemann::Monitors::Ntp.run
data/bin/riemann-proc ADDED
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require 'pry'
6
+ require 'set'
7
+ require_relative "../lib/riemann-monitors"
8
+
9
+ # $clk_tck = Etc.sysconf(2).to_f
10
+
11
+ ProcessInfo = Struct.new(:pid, :comm, :oldfields, :newfields) do
12
+ def riemann_statemap(state)
13
+ statemap = {"R" => "ok",
14
+ "S" => "ok",
15
+ "I" => "warning",
16
+ "T" => "critical",
17
+ "U" => "critical",
18
+ "Z" => "critical"}
19
+ statemap.default = "unknown"
20
+ statemap[state]
21
+ end
22
+
23
+ def fields
24
+ @sorted_fields ||= (oldfields + newfields).sort
25
+ end
26
+
27
+ # def time_of(field)
28
+ # fields.assoc(field).last / $clk_tck
29
+ # end
30
+
31
+ def summarize()
32
+ fields.each do |name, metric|
33
+ params = {}
34
+ params[:service] = "process;#{pid};#{name}"
35
+ params[:description] = name
36
+ params[:pname] = comm
37
+ if name == "State"
38
+ params[:state] = riemann_statemap(metric)
39
+ params[:value] = metric
40
+ # elsif name.end_with?("time")
41
+ # params[:metric] = time_of(name)
42
+ elsif metric.is_a?(Numeric)
43
+ params[:metric] = metric
44
+ else
45
+ params[:value] = metric
46
+ end
47
+ yield(params)
48
+ end
49
+ end
50
+ end
51
+
52
+ class Riemann::Monitors::Proc
53
+ include Riemann::Monitors
54
+
55
+ opt(:proc_regex, "regular expression that matches the process to be monitored", type: :string, default: ".*")
56
+ opt(:newstats, "fields from /proc/#/status to collect (always includes name and status character)", multi: :strings,
57
+ default: ["VmRSS", "VmSize", "VmPeak", "VmHWM", "VmLib", "VmSwap", "Threads"])
58
+ opt(:oldstats, "fields from /proc/#/status to collect (use names from proc(5) manpage)", multi: :strings, default: ["utime", "stime"])
59
+ opt(:metricfield, "field to assign to metric", default: "VmRSS")
60
+ opt(:proc_min_critical, "running process count minimum", :default => 0)
61
+ opt(:proc_max_critical, "running process count maximum", :default => 65536)
62
+
63
+ def initialize
64
+ @process_regex = Regexp.new(opts[:proc_regex])
65
+ @newfields = Set.new(opts[:newstats]).add("Name").add("State")
66
+ @oldfields = Set.new(opts[:oldstats])
67
+ @allstatfields = File.readlines(File.join(__dir__, "..", "data", "statfields")).map(&:chomp)
68
+
69
+ @limits = { :critical => Range.new(opts[:proc_min_critical], opts[:proc_max_critical]) }
70
+
71
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
72
+ end
73
+
74
+ def tick
75
+ @sample_start = Time.now
76
+
77
+ processes = Dir.glob("/proc/[0-9]*/comm").map {|cfile|
78
+ pid = cfile[/\d+/].to_i
79
+ comm = File.read(cfile).chomp
80
+ if @process_regex.match(comm)
81
+ p = ProcessInfo.new()
82
+ p.pid = pid
83
+ p.comm = comm
84
+ p
85
+ end
86
+ }.compact
87
+
88
+ processes.each {|p|
89
+ p.newfields = File.readlines("/proc/#{p.pid}/status") \
90
+ .map {|line| parts = line.split(/\s+/)
91
+ name, rest = parts[0].chop, parts[1..-1]
92
+ if @newfields.include?(name)
93
+ value = rest&.first
94
+ if rest.length > 2
95
+ value = rest.join("\t")
96
+ elsif value[/\A\d+\z/]
97
+ value = value.to_i
98
+ end
99
+ [name, value]
100
+ end
101
+ }.compact
102
+ }
103
+
104
+ processes.each {|p|
105
+ statstr = File.read("/proc/#{p.pid}/stat")
106
+ stat_tail = statstr.reverse[/[-0-9 ]+/].reverse.split(" ").map {|n| n.to_i }
107
+ p.oldfields = @allstatfields.zip(stat_tail).select {|name, value| @oldfields.include?(name) }
108
+ }
109
+ @sample_end = Time.now
110
+
111
+ report(service: "process.samplerate", state: "ok", metric: (@sample_end.to_f - @sample_start.to_f), time: @sample_end)
112
+
113
+ processes.group_by {|p| p.comm }.map do |pname, ps|
114
+ running = ps.count
115
+ if @limits[:critical].cover?(running)
116
+ report(service: "process.instances", state: "ok", description: pname, metric: running)
117
+ else
118
+ report(service: "process.instances", state: "critical", description: pname, metric: running)
119
+ end
120
+ end
121
+
122
+ processes.each do |p|
123
+ p.summarize do |point|
124
+ point[:time] = @sample_end
125
+ report(point)
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ Riemann::Monitors::Proc.run
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports varnish stats to Riemann.
4
+
5
+ require 'open3'
6
+ require_relative "../lib/riemann-monitors"
7
+
8
+ class Riemann::Monitors::Varnish
9
+ include Riemann::Monitors
10
+
11
+ opt :varnish_host, "Varnish hostname", :default => `hostname`.chomp
12
+
13
+ def initialize
14
+ cmd = 'varnishstat -V'
15
+ Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
16
+ @ver = /varnishstat \(varnish-(\d+)/.match(stderr.read)[1].to_i
17
+ end
18
+
19
+ if @ver >= 4
20
+ @vstats = [ "MAIN.sess_conn",
21
+ "MAIN.sess_drop ",
22
+ "MAIN.client_req",
23
+ "MAIN.cache_hit",
24
+ "MAIN.cache_miss" ]
25
+ else
26
+ @vstats = [ "client_conn",
27
+ "client_drop",
28
+ "client_req",
29
+ "cache_hit",
30
+ "cache_miss" ]
31
+ end
32
+ end
33
+
34
+ def tick
35
+ if @ver >= 4
36
+ stats = `varnishstat -1 -f #{@vstats.join(" -f ")}`
37
+ else
38
+ stats = `varnishstat -1 -f #{@vstats.join(",")}`
39
+ end
40
+ stats.each_line do |stat|
41
+ m = stat.split()
42
+ report(
43
+ :host => opts[:varnish_host].dup,
44
+ :service => "varnish #{m[0]}",
45
+ :metric => m[1].to_f,
46
+ :state => "ok",
47
+ :description => "#{m[3..-1].join(' ')}",
48
+ :tags => ["varnish"]
49
+ )
50
+ end
51
+ end
52
+ end
53
+
54
+ Riemann::Monitors::Varnish.run
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers zookeeper STATS and submits them to Riemann.
4
+
5
+ require_relative "../lib/riemann-monitors"
6
+
7
+ class Riemann::Monitors::Zookeeper
8
+ include Riemann::Monitors
9
+ require 'socket'
10
+
11
+ opt :zookeeper_host, "Zookeeper hostname", :default => 'localhost'
12
+ opt :zookeeper_port, "Zookeeper port", :default => 2181
13
+
14
+ def tick
15
+ sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
16
+ sock.sync = true
17
+ sock.print("mntr")
18
+ sock.flush
19
+
20
+
21
+ data = {}
22
+ while true
23
+ stats = sock.gets
24
+
25
+ break if stats.nil?
26
+
27
+ m = stats.match /^(\w+)\t+(.*)/
28
+
29
+ report(
30
+ :host => opts[ :zookeeper_host].dup,
31
+ :service => "zookeeper #{m[1]}",
32
+ :metric => m[2].to_f,
33
+ :state => 'ok',
34
+ :tags => ['zookeeper']
35
+ )
36
+ end
37
+ sock.close
38
+ end
39
+ end
40
+
41
+ Riemann::Monitors::Zookeeper.run
data/data/statfields ADDED
@@ -0,0 +1,49 @@
1
+ ppid
2
+ pgrp
3
+ session
4
+ tty_nr
5
+ tpgid
6
+ flags
7
+ minflt
8
+ cminflt
9
+ majflt
10
+ cmajflt
11
+ utime
12
+ stime
13
+ cutime
14
+ cstime
15
+ priority
16
+ nice
17
+ num_threads
18
+ itrealvalue
19
+ starttime
20
+ vsize
21
+ rss
22
+ rsslim
23
+ startcode
24
+ endcode
25
+ startstack
26
+ kstkesp
27
+ kstkeip
28
+ signal
29
+ blocked
30
+ sigignore
31
+ sigcatch
32
+ wchan
33
+ nswap
34
+ cnswap
35
+ exit_signal
36
+ processor
37
+ rt_priority
38
+ policy
39
+ delayacct_blkio_ticks
40
+ guest_time
41
+ cguest_time
42
+ start_data
43
+ end_data
44
+ start_brk
45
+ arg_start
46
+ arg_end
47
+ env_start
48
+ env_end
49
+ exit_code
@@ -0,0 +1,111 @@
1
+ class Hash
2
+ def has_keys?(*rest)
3
+ rest.all? {|k| self.has_key?(k) }
4
+ end
5
+ end
6
+
7
+ module Riemann
8
+ module Monitors
9
+ def self.included(base)
10
+ base.instance_eval do
11
+ def run
12
+ new.run
13
+ end
14
+
15
+ def opt(*a)
16
+ a.unshift :opt
17
+ @opts ||= []
18
+ @opts << a
19
+ end
20
+
21
+ def options
22
+ p = Trollop::Parser.new
23
+ @opts.each do |o|
24
+ p.send *o
25
+ end
26
+ Trollop::with_standard_exception_handling(p) do
27
+ p.parse ARGV
28
+ end
29
+ end
30
+
31
+ opt :host, "Riemann host", :default => '127.0.0.1'
32
+ opt :port, "Riemann port", :default => 5555
33
+ opt :event_host, "Event hostname", :type => String
34
+ opt :interval, "Seconds between updates", :default => 5
35
+ opt :tag, "Tag to add to events", :type => String, :multi => true
36
+ opt :ttl, "TTL for events", :type => Integer
37
+ opt :attribute, "Attribute to add to the event", :type => String, :multi => true
38
+ opt :timeout, "Timeout (in seconds) when waiting for acknowledgements", :default => 30
39
+ opt :tcp, "Use TCP transport instead of UDP (improves reliability, slight overhead.", :default => true
40
+ opt :ssl, "Use SSL.", default: false
41
+ opt :ssl_ca_file, "SSL certificate authority cert", :default => File.join(Dir.home, ".config", "riemann-tools", "ca.crt")
42
+ opt :ssl_cert_file, "SSL client certificate public key", :default => File.join(Dir.home, ".config", "riemann-tools", "#{Socket.gethostname}.crt")
43
+ opt :ssl_key_file, "SSL client certificate private key", :default => File.join(Dir.home, ".config", "riemann-tools", "#{Socket.gethostname}.key")
44
+ end
45
+ end
46
+
47
+ # Returns parsed options (cached) from command line.
48
+ def options
49
+ @options ||= self.class.options
50
+ end
51
+ alias :opts :options
52
+
53
+ def attributes
54
+ @attributes ||= Hash[options[:attribute].map do |attr|
55
+ k,v = attr.split(/=/)
56
+ if k and v
57
+ [k.to_sym,v]
58
+ end
59
+ end]
60
+ end
61
+
62
+ def report(event_hash)
63
+ event_hash[:tags] = (event_hash[:tags] || []) + (options[:tag] || [])
64
+ event_hash[:ttl] ||= (options[:ttl] || (options[:interval] * 2))
65
+ event_hash[:host] ||= options[:event_host]
66
+ event_hash.merge!(attributes)
67
+ riemann.add_event(event_hash)
68
+ end
69
+
70
+ def new_riemann_client
71
+ riemann_options = {
72
+ :server => "#{options[:host]}:#{options[:port]}",
73
+ :connect_timeout => options[:timeout]
74
+ }
75
+ if options.has_keys?(:ssl_ca_file, :ssl_cert_file, :ssl_key_file) && options[:ssl]
76
+ # These are given to OpenSSL::SSL::SSLContext
77
+ riemann_options[:ssl] = {
78
+ ca_file: File.expand_path(options[:ssl_ca_file]),
79
+ cert: OpenSSL::X509::Certificate.new(File.read(File.expand_path(options[:ssl_cert_file]))),
80
+ key: OpenSSL::PKey::RSA.new(File.read(File.expand_path(options[:ssl_key_file]))),
81
+ verify_mode: OpenSSL::SSL::VERIFY_PEER,
82
+ ssl_version: :TLSv1_2
83
+ }
84
+ end
85
+ Riemann::Experiment::Client.new(riemann_options)
86
+ end
87
+
88
+ def riemann
89
+ @riemann ||= new_riemann_client
90
+ end
91
+ alias :r :riemann
92
+
93
+ def run
94
+ t0 = Time.now
95
+ loop do
96
+ begin
97
+ tick
98
+ riemann.send_message(ok: true)
99
+ rescue => e
100
+ $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
101
+ end
102
+
103
+ # Sleep.
104
+ sleep(options[:interval] - ((Time.now - t0) % options[:interval]))
105
+ end
106
+ end
107
+
108
+ def tick
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ module Riemann::Monitors
2
+ VERSION = "0.0.1"
3
+ VERSION_MAJOR = 0
4
+ VERSION_MINOR = 0
5
+ VERSION_TINY = 1
6
+ end
@@ -0,0 +1,8 @@
1
+ require 'riemann-ruby-experiments'
2
+ require 'openssl'
3
+ require 'trollop'
4
+ require 'json'
5
+
6
+ require_relative 'riemann-monitors/version'
7
+ require_relative 'riemann-monitors/main'
8
+
data/project.yaml ADDED
@@ -0,0 +1,12 @@
1
+ ---
2
+ name: "riemann-monitors"
3
+ toplevel_module: Riemann::Monitors
4
+ dependencies:
5
+ riemann-ruby-experiments: ">= 0.0.2"
6
+ trollop: ">= 1.16.2"
7
+ json: ">= 1.8.3"
8
+ dev_dependencies:
9
+ rake: ">=10.3"
10
+ yard: "~> 0.8.7"
11
+ asciidoctor: "~> 1.5.2"
12
+