riemann-tools.haf 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers munin statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Munin
8
+ include Riemann::Tools
9
+ require 'munin-ruby'
10
+
11
+ def initialize
12
+ @munin = ::Munin::Node.new
13
+ end
14
+
15
+ def tick
16
+ services = opts[:services] || @munin.list
17
+ services.each do |service|
18
+ @munin.fetch(service).each do |service, parts|
19
+ parts.each do |part, metric|
20
+ report(
21
+ :service => "#{service} #{part}",
22
+ :metric => metric.to_f,
23
+ :state => 'ok',
24
+ :tags => ['munin']
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ opt :munin_host, "Munin hostname", :default => 'localhost'
32
+ opt :munin_port, "Munin port", :default => 4949
33
+ opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
34
+ end
35
+
36
+ Riemann::Tools::Munin.run
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers munin statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Net
8
+ include Riemann::Tools
9
+
10
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
11
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
12
+
13
+ def initialize
14
+ @old_state = nil
15
+ @interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
16
+ @ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
17
+ end
18
+
19
+ def state
20
+ f = File.read('/proc/net/dev')
21
+ state = f.split("\n").inject({}) do |s, line|
22
+ if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
23
+ iface = $1
24
+
25
+ ['rx bytes',
26
+ 'rx packets',
27
+ 'rx errs',
28
+ 'rx drop',
29
+ 'rx fifo',
30
+ 'rx frame',
31
+ 'rx compressed',
32
+ 'rx multicast',
33
+ 'tx bytes',
34
+ 'tx packets',
35
+ 'tx errs',
36
+ 'tx drops',
37
+ 'tx fifo',
38
+ 'tx colls',
39
+ 'tx carrier',
40
+ 'tx compressed'].map do |service|
41
+ "#{iface} #{service}"
42
+ end.zip(
43
+ $2.split(/\s+/).map { |str| str.to_i }
44
+ ).each do |service, value|
45
+ s[service] = value
46
+ end
47
+ end
48
+
49
+ s
50
+ end
51
+
52
+ # Filter interfaces
53
+ if is = @interfaces
54
+ state = state.select do |service, value|
55
+ is.include? service.split(' ').first
56
+ end
57
+ end
58
+
59
+ state = state.reject do |service, value|
60
+ @ignore_interfaces.include? service.split(' ').first
61
+ end
62
+
63
+ state
64
+ end
65
+
66
+ def tick
67
+ state = self.state
68
+
69
+ if @old_state
70
+ state.each do |service, metric|
71
+ delta = metric - @old_state[service]
72
+ svc_state = case service
73
+ when /drop$/
74
+ if metric > 0
75
+ 'warning'
76
+ else
77
+ 'ok'
78
+ end
79
+ when /errs$/
80
+ if metric > 0
81
+ 'warning'
82
+ else
83
+ 'ok'
84
+ end
85
+ else
86
+ 'ok'
87
+ end
88
+
89
+ report(
90
+ :service => service.dup,
91
+ :metric => (delta.to_f / opts[:interval]),
92
+ :state => svc_state
93
+ )
94
+ end
95
+ end
96
+
97
+ @old_state = state
98
+ end
99
+ end
100
+
101
+ Riemann::Tools::Net.run
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers nginx status stub statistics and submits them to Riemann.
4
+ # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
5
+
6
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
7
+
8
+ class Riemann::Tools::NginxStatus
9
+ include Riemann::Tools
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
14
+ opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
15
+ opt :active_warning, "Active connections warning threshold", :default => 0
16
+ opt :active_critical, "Active connections critical threshold", :default => 0
17
+ opt :reading_warning, "Reading connections warning threshold", :default => 0
18
+ opt :reading_critical, "Reading connections critical threshold", :default => 0
19
+ opt :writing_warning, "Writing connections warning threshold", :default => 0
20
+ opt :writing_critical, "Writing connections critical threshold", :default => 0
21
+ opt :waiting_warning, "Waiting connections warning threshold", :default => 0
22
+ opt :waiting_critical, "Waiting connections critical threshold", :default => 0
23
+
24
+ def initialize
25
+ @uri = URI.parse(opts[:uri])
26
+
27
+ # sample response:
28
+ #
29
+ # Active connections: 1
30
+ # server accepts handled requests
31
+ # 39 39 39
32
+ # Reading: 0 Writing: 1 Waiting: 0
33
+ @keys = %w{active accepted handled requests reading writing waiting}
34
+ @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
35
+ end
36
+
37
+ def state(key, value)
38
+ if opts.has_key? "#{key}_critical".to_sym
39
+ critical_threshold = opts["#{key}_critical".to_sym]
40
+ return 'critical' if critical_threshold > 0 and value >= critical_threshold
41
+ end
42
+
43
+ if opts.has_key? "#{key}_warning".to_sym
44
+ warning_threshold = opts["#{key}_warning".to_sym]
45
+ return 'warning' if warning_threshold > 0 and value >= warning_threshold
46
+ end
47
+
48
+ return 'ok'
49
+ end
50
+
51
+ def tick
52
+ response = nil
53
+ begin
54
+ response = Net::HTTP.get(@uri)
55
+ rescue => e
56
+ report(
57
+ :service => "nginx health",
58
+ :state => "critical",
59
+ :description => "Connection error: #{e.class} - #{e.message}"
60
+ )
61
+ end
62
+
63
+ return if response.nil?
64
+
65
+ report(
66
+ :service => "nginx health",
67
+ :state => "ok",
68
+ :description => "Nginx status connection ok"
69
+ )
70
+
71
+ values = @re.match(response).to_a[1,7].map { |v| v.to_i }
72
+
73
+ @keys.zip(values).each do |key, value|
74
+ report({
75
+ :service => "nginx #{key}",
76
+ :metric => value,
77
+ :state => state(key, value),
78
+ :tags => ['nginx']
79
+ })
80
+ end
81
+ end
82
+ end
83
+
84
+ Riemann::Tools::NginxStatus.run
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Proc
8
+ include Riemann::Tools
9
+
10
+ opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
11
+ opt :proc_min_critical, "running process count minimum", :default => 1
12
+ opt :proc_max_critical, "running process count maximum", :default => 1
13
+
14
+ def initialize
15
+ @limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
16
+
17
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
18
+
19
+ ostype = `uname -s`.chomp.downcase
20
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
21
+ @check = method :linux_proc
22
+ end
23
+
24
+ def alert(service, state, metric, description)
25
+ report(
26
+ :service => service.to_s,
27
+ :state => state.to_s,
28
+ :metric => metric.to_f,
29
+ :description => description
30
+ )
31
+ end
32
+
33
+ def linux_proc
34
+ process = opts[:proc_regex]
35
+ running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
36
+ if running > @limits[:critical][:max] or running < @limits[:critical][:min]
37
+ alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
38
+ else
39
+ alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
40
+ end
41
+ end
42
+
43
+ def tick
44
+ @check.call
45
+ end
46
+ end
47
+
48
+ Riemann::Tools::Proc.run
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+
4
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
5
+
6
+ class Riemann::Tools::Rabbitmq
7
+ include Riemann::Tools
8
+
9
+ require 'faraday'
10
+ require 'json'
11
+ require 'uri'
12
+
13
+
14
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
15
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
16
+
17
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
+ opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
20
+ opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
21
+
22
+ def monitor_url
23
+ "http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
24
+ end
25
+
26
+ def event_host
27
+ if options[:event_host]
28
+ return options[:event_host]
29
+ else
30
+ return options[:monitor_host]
31
+ end
32
+ end
33
+
34
+ def safe_get(uri, event_host)
35
+ # Handle connection timeouts
36
+ response = nil
37
+ begin
38
+ connection = Faraday.new(uri)
39
+ response = connection.get do |req|
40
+ req.options[:timeout] = options[:read_timeout]
41
+ req.options[:open_timeout] = options[:open_timeout]
42
+ end
43
+ rescue => e
44
+ report(:host => event_host,
45
+ :service => "rabbitmq monitoring",
46
+ :state => "critical",
47
+ :description => "HTTP connection error: #{e.class} - #{e.message}"
48
+ )
49
+ end
50
+ response
51
+ end
52
+
53
+ def tick
54
+ uri = URI(monitor_url)
55
+ response = safe_get(uri, event_host)
56
+
57
+ return if response.nil?
58
+
59
+ json = JSON.parse(response.body)
60
+
61
+ if response.status != 200
62
+ report(:host => event_host,
63
+ :service => "rabbitmq",
64
+ :state => "critical",
65
+ :description => "HTTP connection error: #{response.status} - #{response.body}"
66
+ )
67
+ else
68
+ report(:host => event_host,
69
+ :service => "rabbitmq monitoring",
70
+ :state => "ok",
71
+ :description => "HTTP connection ok"
72
+ )
73
+
74
+ %w( message_stats queue_totals object_totals ).each do |stat|
75
+ # NOTE / BUG ?
76
+ # Brand new servers can have blank message stats. Is this ok?
77
+ # I can't decide.
78
+ next if json[stat].empty?
79
+ json[stat].each_pair do |k,v|
80
+ service = "rabbitmq.#{stat}.#{k}"
81
+ if k =~ /details$/
82
+ metric = v['rate']
83
+ else
84
+ metric = v
85
+ end
86
+
87
+ # TODO: Set state via thresholds which can be configured
88
+
89
+ report(:host => event_host,
90
+ :service => service,
91
+ :metric => metric,
92
+ :description => "RabbitMQ monitor"
93
+ )
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ Riemann::Tools::Rabbitmq.run
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers redis INFO statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Redis
8
+ include Riemann::Tools
9
+ require 'redis'
10
+
11
+ opt :redis_host, "Redis hostname", :default => 'localhost'
12
+ opt :redis_port, "Redis port", :default => 6379
13
+ opt :redis_password, "Redis password", :default => ''
14
+ opt :redis_url, "Redis URL", :default => ''
15
+ opt :redis_socket, "Redis socket", :default => ''
16
+ opt :redis_section, "Redis INFO section", :default => 'default'
17
+
18
+ STRING_VALUES = %w{ redis_version redis_git_sha1 redis_mode os
19
+ multiplexing_api gcc_version run_id used_memory_human
20
+ used_memory_peak_human mem_allocator
21
+ rdb_last_bgsave_status aof_last_bgrewrite_status role }
22
+
23
+ def initialize
24
+ options = if opts[:redis_url] != ''
25
+ { :url => opts[:redis_url] }
26
+ elsif opts[:redis_socket] != ''
27
+ { :path => opts[:redis_socket] }
28
+ else
29
+ { :host => opts[:redis_host], :port => opts[:redis_port] }
30
+ end
31
+ @redis = ::Redis.new(options)
32
+ @redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
33
+ @section = opts[:redis_section]
34
+ end
35
+
36
+ def tick
37
+ begin
38
+ @redis.info(@section).each do |property, value|
39
+ data = {
40
+ :host => opts[:redis_host].dup,
41
+ :service => "redis #{property}",
42
+ :metric => value.to_f,
43
+ :state => value.to_s,
44
+ :tags => ['redis']
45
+ }
46
+
47
+ if STRING_VALUES.include?(property) || property.match(/^db\d+/)
48
+ if %w{ rdb_last_bgsave_status aof_last_bgrewrite_status }.include?(property)
49
+ data[:state] = value
50
+ else
51
+ data[:description] = value
52
+ end
53
+ end
54
+
55
+ if property == "run_id"
56
+ data[:metric] = 0
57
+ end
58
+
59
+ report(data)
60
+ end
61
+ rescue ::Redis::CommandError => e
62
+ if e.message == "ERR operation not permitted"
63
+ @redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
64
+ end
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+ Riemann::Tools::Redis.run
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers Redis SLOWLOG statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::RedisSlowlog
8
+ include Riemann::Tools
9
+ require 'redis'
10
+
11
+ opt :redis_url, "Redis URL", :default => 'redis://127.0.0.1:6379/'
12
+ opt :redis_password, "Redis password", :default => ''
13
+ opt :slowlog_len, "Number of SLOWLOG entries to get", :default => 10
14
+ opt :slowlog_reset, "Reset SLOWLOG after querying it", :default => false
15
+
16
+ def initialize
17
+ @redis = ::Redis.new(url: opts[:redis_url])
18
+
19
+ @slowlog_len = opts[:slowlog_len]
20
+ @slowlog_reset = opts[:slowlog_reset]
21
+
22
+ @redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
23
+ end
24
+
25
+ def tick
26
+ @redis.slowlog("GET", @slowlog_len).each do |id, timestamp, us, cmd|
27
+ data = {
28
+ :host => @redis.client.host,
29
+ :service => "redis",
30
+ :time => timestamp,
31
+ :metric => us.to_f,
32
+ :state => 'warning',
33
+ :tags => ['redis', 'slowlog'],
34
+ :description => cmd.inspect
35
+ }
36
+ report(data)
37
+ end
38
+
39
+ @redis.slowlog("RESET") if @slowlog_reset
40
+ end
41
+
42
+ end
43
+
44
+ Riemann::Tools::RedisSlowlog.run