riemann-tools-dgvz 0.2.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/riemann-munin ADDED
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers munin statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Munin
8
+ include Riemann::Tools
9
+ require 'munin-ruby'
10
+
11
+ def initialize
12
+ @munin = ::Munin::Node.new
13
+ end
14
+
15
+ def tick
16
+ services = opts[:services] || @munin.list
17
+ services.each do |service|
18
+ @munin.fetch(service).each do |service, parts|
19
+ parts.each do |part, metric|
20
+ report(
21
+ :service => "#{service} #{part}",
22
+ :metric => metric.to_f,
23
+ :state => 'ok',
24
+ :tags => ['munin']
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ opt :munin_host, "Munin hostname", :default => 'localhost'
32
+ opt :munin_port, "Munin port", :default => 4949
33
+ opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
34
+ end
35
+
36
+ Riemann::Tools::Munin.run
data/bin/riemann-net ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers network interface statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Net
8
+ include Riemann::Tools
9
+
10
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
11
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
12
+
13
+ def initialize
14
+ @old_state = nil
15
+ @interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
16
+ @ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
17
+ end
18
+
19
+ def state
20
+ f = File.read('/proc/net/dev')
21
+ state = f.split("\n").inject({}) do |s, line|
22
+ if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
23
+ iface = $1
24
+
25
+ ['rx bytes',
26
+ 'rx packets',
27
+ 'rx errs',
28
+ 'rx drop',
29
+ 'rx fifo',
30
+ 'rx frame',
31
+ 'rx compressed',
32
+ 'rx multicast',
33
+ 'tx bytes',
34
+ 'tx packets',
35
+ 'tx errs',
36
+ 'tx drops',
37
+ 'tx fifo',
38
+ 'tx colls',
39
+ 'tx carrier',
40
+ 'tx compressed'].map do |service|
41
+ "#{iface} #{service}"
42
+ end.zip(
43
+ $2.split(/\s+/).map { |str| str.to_i }
44
+ ).each do |service, value|
45
+ s[service] = value
46
+ end
47
+ end
48
+
49
+ s
50
+ end
51
+
52
+ # Filter interfaces
53
+ if is = @interfaces
54
+ state = state.select do |service, value|
55
+ is.include? service.split(' ').first
56
+ end
57
+ end
58
+
59
+ state = state.reject do |service, value|
60
+ @ignore_interfaces.include? service.split(' ').first
61
+ end
62
+
63
+ state
64
+ end
65
+
66
+ def tick
67
+ state = self.state
68
+
69
+ if @old_state
70
+ state.each do |service, metric|
71
+ delta = metric - @old_state[service]
72
+ svc_state = case service
73
+ when /drop$/
74
+ if metric > 0
75
+ 'warning'
76
+ else
77
+ 'ok'
78
+ end
79
+ when /errs$/
80
+ if metric > 0
81
+ 'warning'
82
+ else
83
+ 'ok'
84
+ end
85
+ else
86
+ 'ok'
87
+ end
88
+
89
+ report(
90
+ :service => service.dup,
91
+ :metric => (delta.to_f / opts[:interval]),
92
+ :state => svc_state
93
+ )
94
+ end
95
+ end
96
+
97
+ @old_state = state
98
+ end
99
+ end
100
+
101
+ Riemann::Tools::Net.run
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers nginx status stub statistics and submits them to Riemann.
4
+ # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
5
+
6
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
7
+
8
+ class Riemann::Tools::NginxStatus
9
+ include Riemann::Tools
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
14
+ opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
15
+ opt :active_warning, "Active connections warning threshold", :default => 0
16
+ opt :active_critical, "Active connections critical threshold", :default => 0
17
+ opt :reading_warning, "Reading connections warning threshold", :default => 0
18
+ opt :reading_critical, "Reading connections critical threshold", :default => 0
19
+ opt :writing_warning, "Writing connections warning threshold", :default => 0
20
+ opt :writing_critical, "Writing connections critical threshold", :default => 0
21
+ opt :waiting_warning, "Waiting connections warning threshold", :default => 0
22
+ opt :waiting_critical, "Waiting connections critical threshold", :default => 0
23
+
24
+ def initialize
25
+ @uri = URI.parse(opts[:uri])
26
+
27
+ # sample response:
28
+ #
29
+ # Active connections: 1
30
+ # server accepts handled requests
31
+ # 39 39 39
32
+ # Reading: 0 Writing: 1 Waiting: 0
33
+ @keys = %w{active accepted handled requests reading writing waiting}
34
+ @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
35
+ end
36
+
37
+ def state(key, value)
38
+ if opts.has_key? "#{key}_critical".to_sym
39
+ critical_threshold = opts["#{key}_critical".to_sym]
40
+ return 'critical' if critical_threshold > 0 and value >= critical_threshold
41
+ end
42
+
43
+ if opts.has_key? "#{key}_warning".to_sym
44
+ warning_threshold = opts["#{key}_warning".to_sym]
45
+ return 'warning' if warning_threshold > 0 and value >= warning_threshold
46
+ end
47
+
48
+ return 'ok'
49
+ end
50
+
51
+ def tick
52
+ response = nil
53
+ begin
54
+ response = Net::HTTP.get(@uri)
55
+ rescue => e
56
+ report(
57
+ :service => "nginx health",
58
+ :state => "critical",
59
+ :description => "Connection error: #{e.class} - #{e.message}"
60
+ )
61
+ end
62
+
63
+ return if response.nil?
64
+
65
+ report(
66
+ :service => "nginx health",
67
+ :state => "ok",
68
+ :description => "Nginx status connection ok"
69
+ )
70
+
71
+ values = @re.match(response).to_a[1,7].map { |v| v.to_i }
72
+
73
+ @keys.zip(values).each do |key, value|
74
+ report({
75
+ :service => "nginx #{key}",
76
+ :metric => value,
77
+ :state => state(key, value),
78
+ :tags => ['nginx']
79
+ })
80
+ end
81
+ end
82
+ end
83
+
84
+ Riemann::Tools::NginxStatus.run
data/bin/riemann-proc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Proc
8
+ include Riemann::Tools
9
+
10
+ opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
11
+ opt :proc_min_critical, "running process count minimum", :default => 1
12
+ opt :proc_max_critical, "running process count maximum", :default => 1
13
+
14
+ def initialize
15
+ @limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
16
+
17
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
18
+
19
+ ostype = `uname -s`.chomp.downcase
20
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
21
+ @check = method :linux_proc
22
+ end
23
+
24
+ def alert(service, state, metric, description)
25
+ report(
26
+ :service => service.to_s,
27
+ :state => state.to_s,
28
+ :metric => metric.to_f,
29
+ :description => description
30
+ )
31
+ end
32
+
33
+ def linux_proc
34
+ process = opts[:proc_regex]
35
+ running = Integer(`ps axo args | grep '#{process}' | grep -v grep | grep -v riemann-proc | wc -l`)
36
+ if running > @limits[:critical][:max] or running < @limits[:critical][:min]
37
+ alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
38
+ else
39
+ alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
40
+ end
41
+ end
42
+
43
+ def tick
44
+ @check.call
45
+ end
46
+ end
47
+
48
+ Riemann::Tools::Proc.run
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+
4
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
5
+
6
+ class Riemann::Tools::Rabbitmq
7
+ include Riemann::Tools
8
+
9
+ require 'faraday'
10
+ require 'json'
11
+ require 'uri'
12
+
13
+
14
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
15
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
16
+
17
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
+ opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
20
+ opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
21
+
22
+ def monitor_url
23
+ "http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
24
+ end
25
+
26
+ def event_host
27
+ if options[:event_host]
28
+ return options[:event_host]
29
+ else
30
+ return options[:monitor_host]
31
+ end
32
+ end
33
+
34
+ def safe_get(uri, event_host)
35
+ # Handle connection timeouts
36
+ response = nil
37
+ begin
38
+ connection = Faraday.new(uri)
39
+ response = connection.get do |req|
40
+ req.options[:timeout] = options[:read_timeout]
41
+ req.options[:open_timeout] = options[:open_timeout]
42
+ end
43
+ rescue => e
44
+ report(:host => event_host,
45
+ :service => "rabbitmq monitoring",
46
+ :state => "critical",
47
+ :description => "HTTP connection error: #{e.class} - #{e.message}"
48
+ )
49
+ end
50
+ response
51
+ end
52
+
53
+ def tick
54
+ uri = URI(monitor_url)
55
+ response = safe_get(uri, event_host)
56
+
57
+ return if response.nil?
58
+
59
+ json = JSON.parse(response.body)
60
+
61
+ if response.status != 200
62
+ report(:host => event_host,
63
+ :service => "rabbitmq",
64
+ :state => "critical",
65
+ :description => "HTTP connection error: #{response.status} - #{response.body}"
66
+ )
67
+ else
68
+ report(:host => event_host,
69
+ :service => "rabbitmq monitoring",
70
+ :state => "ok",
71
+ :description => "HTTP connection ok"
72
+ )
73
+
74
+ %w( message_stats queue_totals object_totals ).each do |stat|
75
+ # NOTE / BUG ?
76
+ # Brand new servers can have blank message stats. Is this ok?
77
+ # I can't decide.
78
+ next if json[stat].empty?
79
+ json[stat].each_pair do |k,v|
80
+ service = "rabbitmq.#{stat}.#{k}"
81
+ if k =~ /details$/
82
+ metric = v['rate']
83
+ else
84
+ metric = v
85
+ end
86
+
87
+ # TODO: Set state via thresholds which can be configured
88
+
89
+ report(:host => event_host,
90
+ :service => service,
91
+ :metric => metric,
92
+ :description => "RabbitMQ monitor"
93
+ )
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ Riemann::Tools::Rabbitmq.run
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+
5
+ class Riemann::Tools::Resmon
6
+ include Riemann::Tools
7
+ require 'nokogiri'
8
+ require 'faraday'
9
+
10
+ opt :resmon_hostfile, 'File with hostnames running Resmon (one URI per line)', type: :string
11
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
12
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
13
+ opt :fqdn, 'Use FQDN for event host'
14
+
15
+
16
+ def initialize
17
+ @hosts = File.read(options[:resmon_hostfile]).split("\n")
18
+ super
19
+ end
20
+
21
+
22
+ # Work out the hostname to submit with the event
23
+ def get_event_host(host)
24
+ unless options[:fqdn]
25
+ return host.split('.')[0]
26
+ end
27
+ return host
28
+ end
29
+
30
+ # Handles HTTP connections and GET requests safely
31
+ def safe_get(uri, event_host)
32
+ # Handle connection timeouts
33
+ response = nil
34
+ begin
35
+ connection = Faraday.new(uri)
36
+ response = connection.get do |req|
37
+ req.options[:timeout] = options[:read_timeout]
38
+ req.options[:open_timeout] = options[:open_timeout]
39
+ end
40
+ rescue => e
41
+ report(:host => event_host,
42
+ :service => "resmon",
43
+ :state => "critical",
44
+ :description => "HTTP connection error: #{e.class} - #{e.message}"
45
+ )
46
+ end
47
+ response
48
+ end
49
+
50
+ def tick
51
+ @hosts.each do |host|
52
+
53
+ uri = URI(host)
54
+ event_host = get_event_host(uri.host)
55
+
56
+ response = safe_get(uri, event_host)
57
+ next if response.nil?
58
+
59
+ # Handle non-200 responses
60
+ if response.status != 200
61
+ report(:host => event_host,
62
+ :service => "resmon",
63
+ :state => "critical",
64
+ :description => "HTTP connection error: #{response.status} - #{response.body}"
65
+ )
66
+ next
67
+ else
68
+ report(:host => event_host,
69
+ :service => "resmon",
70
+ :state => "ok",
71
+ :description => "Resmon connection ok"
72
+ )
73
+ doc = Nokogiri::XML(response.body)
74
+ end
75
+
76
+ doc.xpath('//ResmonResults/ResmonResult').each do |result|
77
+ timestamp = result.xpath('last_update').first.text
78
+ result.xpath('metric').each do |metric|
79
+ hash = {
80
+ host: event_host,
81
+ service: "#{result.attributes['module'].value}`#{result.attributes['service'].value}`#{metric.attributes['name'].value}",
82
+ time: timestamp.to_i
83
+ }
84
+
85
+ case metric.attributes['type'].value
86
+ when /[iIlL]/
87
+ hash[:metric] = metric.text.to_i
88
+ when 'n'
89
+ hash[:metric] = metric.text.to_f
90
+ when 's'
91
+ hash[:description] = metric.text
92
+ when '0'
93
+ raise 'dunno what 0 is yet'
94
+ end
95
+
96
+ report(hash)
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ Riemann::Tools::Resmon.run