riemann-tools.haf 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers munin statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Munin
8
+ include Riemann::Tools
9
+ require 'munin-ruby'
10
+
11
+ def initialize
12
+ @munin = ::Munin::Node.new
13
+ end
14
+
15
+ def tick
16
+ services = opts[:services] || @munin.list
17
+ services.each do |service|
18
+ @munin.fetch(service).each do |service, parts|
19
+ parts.each do |part, metric|
20
+ report(
21
+ :service => "#{service} #{part}",
22
+ :metric => metric.to_f,
23
+ :state => 'ok',
24
+ :tags => ['munin']
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ opt :munin_host, "Munin hostname", :default => 'localhost'
32
+ opt :munin_port, "Munin port", :default => 4949
33
+ opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
34
+ end
35
+
36
+ Riemann::Tools::Munin.run
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers munin statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Net
8
+ include Riemann::Tools
9
+
10
+ opt :interfaces, "Interfaces to monitor", :type => :strings, :default => nil
11
+ opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
12
+
13
+ def initialize
14
+ @old_state = nil
15
+ @interfaces = opts[:interfaces].map(&:dup) if opts[:interfaces]
16
+ @ignore_interfaces = opts[:ignore_interfaces].map(&:dup)
17
+ end
18
+
19
+ def state
20
+ f = File.read('/proc/net/dev')
21
+ state = f.split("\n").inject({}) do |s, line|
22
+ if line =~ /\s*(\w+?):\s*([\s\d]+)\s*/
23
+ iface = $1
24
+
25
+ ['rx bytes',
26
+ 'rx packets',
27
+ 'rx errs',
28
+ 'rx drop',
29
+ 'rx fifo',
30
+ 'rx frame',
31
+ 'rx compressed',
32
+ 'rx multicast',
33
+ 'tx bytes',
34
+ 'tx packets',
35
+ 'tx errs',
36
+ 'tx drops',
37
+ 'tx fifo',
38
+ 'tx colls',
39
+ 'tx carrier',
40
+ 'tx compressed'].map do |service|
41
+ "#{iface} #{service}"
42
+ end.zip(
43
+ $2.split(/\s+/).map { |str| str.to_i }
44
+ ).each do |service, value|
45
+ s[service] = value
46
+ end
47
+ end
48
+
49
+ s
50
+ end
51
+
52
+ # Filter interfaces
53
+ if is = @interfaces
54
+ state = state.select do |service, value|
55
+ is.include? service.split(' ').first
56
+ end
57
+ end
58
+
59
+ state = state.reject do |service, value|
60
+ @ignore_interfaces.include? service.split(' ').first
61
+ end
62
+
63
+ state
64
+ end
65
+
66
+ def tick
67
+ state = self.state
68
+
69
+ if @old_state
70
+ state.each do |service, metric|
71
+ delta = metric - @old_state[service]
72
+ svc_state = case service
73
+ when /drop$/
74
+ if metric > 0
75
+ 'warning'
76
+ else
77
+ 'ok'
78
+ end
79
+ when /errs$/
80
+ if metric > 0
81
+ 'warning'
82
+ else
83
+ 'ok'
84
+ end
85
+ else
86
+ 'ok'
87
+ end
88
+
89
+ report(
90
+ :service => service.dup,
91
+ :metric => (delta.to_f / opts[:interval]),
92
+ :state => svc_state
93
+ )
94
+ end
95
+ end
96
+
97
+ @old_state = state
98
+ end
99
+ end
100
+
101
+ Riemann::Tools::Net.run
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers nginx status stub statistics and submits them to Riemann.
4
+ # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
5
+
6
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
7
+
8
+ class Riemann::Tools::NginxStatus
9
+ include Riemann::Tools
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
14
+ opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
15
+ opt :active_warning, "Active connections warning threshold", :default => 0
16
+ opt :active_critical, "Active connections critical threshold", :default => 0
17
+ opt :reading_warning, "Reading connections warning threshold", :default => 0
18
+ opt :reading_critical, "Reading connections critical threshold", :default => 0
19
+ opt :writing_warning, "Writing connections warning threshold", :default => 0
20
+ opt :writing_critical, "Writing connections critical threshold", :default => 0
21
+ opt :waiting_warning, "Waiting connections warning threshold", :default => 0
22
+ opt :waiting_critical, "Waiting connections critical threshold", :default => 0
23
+
24
+ def initialize
25
+ @uri = URI.parse(opts[:uri])
26
+
27
+ # sample response:
28
+ #
29
+ # Active connections: 1
30
+ # server accepts handled requests
31
+ # 39 39 39
32
+ # Reading: 0 Writing: 1 Waiting: 0
33
+ @keys = %w{active accepted handled requests reading writing waiting}
34
+ @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
35
+ end
36
+
37
+ def state(key, value)
38
+ if opts.has_key? "#{key}_critical".to_sym
39
+ critical_threshold = opts["#{key}_critical".to_sym]
40
+ return 'critical' if critical_threshold > 0 and value >= critical_threshold
41
+ end
42
+
43
+ if opts.has_key? "#{key}_warning".to_sym
44
+ warning_threshold = opts["#{key}_warning".to_sym]
45
+ return 'warning' if warning_threshold > 0 and value >= warning_threshold
46
+ end
47
+
48
+ return 'ok'
49
+ end
50
+
51
+ def tick
52
+ response = nil
53
+ begin
54
+ response = Net::HTTP.get(@uri)
55
+ rescue => e
56
+ report(
57
+ :service => "nginx health",
58
+ :state => "critical",
59
+ :description => "Connection error: #{e.class} - #{e.message}"
60
+ )
61
+ end
62
+
63
+ return if response.nil?
64
+
65
+ report(
66
+ :service => "nginx health",
67
+ :state => "ok",
68
+ :description => "Nginx status connection ok"
69
+ )
70
+
71
+ values = @re.match(response).to_a[1,7].map { |v| v.to_i }
72
+
73
+ @keys.zip(values).each do |key, value|
74
+ report({
75
+ :service => "nginx #{key}",
76
+ :metric => value,
77
+ :state => state(key, value),
78
+ :tags => ['nginx']
79
+ })
80
+ end
81
+ end
82
+ end
83
+
84
+ Riemann::Tools::NginxStatus.run
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports running process count to riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Proc
8
+ include Riemann::Tools
9
+
10
+ opt :proc_regex, "regular expression that matches the process to be monitored", type: :string
11
+ opt :proc_min_critical, "running process count minimum", :default => 1
12
+ opt :proc_max_critical, "running process count maximum", :default => 1
13
+
14
+ def initialize
15
+ @limits = { :critical => { :min => opts[:proc_min_critical], :max => opts[:proc_max_critical] } }
16
+
17
+ abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
18
+
19
+ ostype = `uname -s`.chomp.downcase
20
+ puts "WARNING: OS '#{ostype}' not explicitly supported. Falling back to Linux" unless ostype == "linux"
21
+ @check = method :linux_proc
22
+ end
23
+
24
+ def alert(service, state, metric, description)
25
+ report(
26
+ :service => service.to_s,
27
+ :state => state.to_s,
28
+ :metric => metric.to_f,
29
+ :description => description
30
+ )
31
+ end
32
+
33
+ def linux_proc
34
+ process = opts[:proc_regex]
35
+ running = Integer(`ps axo args | grep #{process} | grep -v grep | grep -v riemann-proc | wc -l`)
36
+ if running > @limits[:critical][:max] or running < @limits[:critical][:min]
37
+ alert "proc #{process}", :critical, running, "process #{process} is running #{running} instances"
38
+ else
39
+ alert "proc #{process}", :ok, running, "process #{process} is running #{running} instances"
40
+ end
41
+ end
42
+
43
+ def tick
44
+ @check.call
45
+ end
46
+ end
47
+
48
+ Riemann::Tools::Proc.run
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+
4
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
5
+
6
+ class Riemann::Tools::Rabbitmq
7
+ include Riemann::Tools
8
+
9
+ require 'faraday'
10
+ require 'json'
11
+ require 'uri'
12
+
13
+
14
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
15
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
16
+
17
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
18
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
19
+ opt :monitor_port, 'RabbitMQ monitoring port', default: 15672
20
+ opt :monitor_host, 'RabbitMQ monitoring host', default: "localhost"
21
+
22
+ def monitor_url
23
+ "http://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api/overview"
24
+ end
25
+
26
+ def event_host
27
+ if options[:event_host]
28
+ return options[:event_host]
29
+ else
30
+ return options[:monitor_host]
31
+ end
32
+ end
33
+
34
+ def safe_get(uri, event_host)
35
+ # Handle connection timeouts
36
+ response = nil
37
+ begin
38
+ connection = Faraday.new(uri)
39
+ response = connection.get do |req|
40
+ req.options[:timeout] = options[:read_timeout]
41
+ req.options[:open_timeout] = options[:open_timeout]
42
+ end
43
+ rescue => e
44
+ report(:host => event_host,
45
+ :service => "rabbitmq monitoring",
46
+ :state => "critical",
47
+ :description => "HTTP connection error: #{e.class} - #{e.message}"
48
+ )
49
+ end
50
+ response
51
+ end
52
+
53
+ def tick
54
+ uri = URI(monitor_url)
55
+ response = safe_get(uri, event_host)
56
+
57
+ return if response.nil?
58
+
59
+ json = JSON.parse(response.body)
60
+
61
+ if response.status != 200
62
+ report(:host => event_host,
63
+ :service => "rabbitmq",
64
+ :state => "critical",
65
+ :description => "HTTP connection error: #{response.status} - #{response.body}"
66
+ )
67
+ else
68
+ report(:host => event_host,
69
+ :service => "rabbitmq monitoring",
70
+ :state => "ok",
71
+ :description => "HTTP connection ok"
72
+ )
73
+
74
+ %w( message_stats queue_totals object_totals ).each do |stat|
75
+ # NOTE / BUG ?
76
+ # Brand new servers can have blank message stats. Is this ok?
77
+ # I can't decide.
78
+ next if json[stat].empty?
79
+ json[stat].each_pair do |k,v|
80
+ service = "rabbitmq.#{stat}.#{k}"
81
+ if k =~ /details$/
82
+ metric = v['rate']
83
+ else
84
+ metric = v
85
+ end
86
+
87
+ # TODO: Set state via thresholds which can be configured
88
+
89
+ report(:host => event_host,
90
+ :service => service,
91
+ :metric => metric,
92
+ :description => "RabbitMQ monitor"
93
+ )
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ Riemann::Tools::Rabbitmq.run
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers redis INFO statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::Redis
8
+ include Riemann::Tools
9
+ require 'redis'
10
+
11
+ opt :redis_host, "Redis hostname", :default => 'localhost'
12
+ opt :redis_port, "Redis port", :default => 6379
13
+ opt :redis_password, "Redis password", :default => ''
14
+ opt :redis_url, "Redis URL", :default => ''
15
+ opt :redis_socket, "Redis socket", :default => ''
16
+ opt :redis_section, "Redis INFO section", :default => 'default'
17
+
18
+ STRING_VALUES = %w{ redis_version redis_git_sha1 redis_mode os
19
+ multiplexing_api gcc_version run_id used_memory_human
20
+ used_memory_peak_human mem_allocator
21
+ rdb_last_bgsave_status aof_last_bgrewrite_status role }
22
+
23
+ def initialize
24
+ options = if opts[:redis_url] != ''
25
+ { :url => opts[:redis_url] }
26
+ elsif opts[:redis_socket] != ''
27
+ { :path => opts[:redis_socket] }
28
+ else
29
+ { :host => opts[:redis_host], :port => opts[:redis_port] }
30
+ end
31
+ @redis = ::Redis.new(options)
32
+ @redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
33
+ @section = opts[:redis_section]
34
+ end
35
+
36
+ def tick
37
+ begin
38
+ @redis.info(@section).each do |property, value|
39
+ data = {
40
+ :host => opts[:redis_host].dup,
41
+ :service => "redis #{property}",
42
+ :metric => value.to_f,
43
+ :state => value.to_s,
44
+ :tags => ['redis']
45
+ }
46
+
47
+ if STRING_VALUES.include?(property) || property.match(/^db\d+/)
48
+ if %w{ rdb_last_bgsave_status aof_last_bgrewrite_status }.include?(property)
49
+ data[:state] = value
50
+ else
51
+ data[:description] = value
52
+ end
53
+ end
54
+
55
+ if property == "run_id"
56
+ data[:metric] = 0
57
+ end
58
+
59
+ report(data)
60
+ end
61
+ rescue ::Redis::CommandError => e
62
+ if e.message == "ERR operation not permitted"
63
+ @redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
64
+ end
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+ Riemann::Tools::Redis.run
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gathers Redis SLOWLOG statistics and submits them to Riemann.
4
+
5
+ require File.expand_path('../../lib/riemann/tools', __FILE__)
6
+
7
+ class Riemann::Tools::RedisSlowlog
8
+ include Riemann::Tools
9
+ require 'redis'
10
+
11
+ opt :redis_url, "Redis URL", :default => 'redis://127.0.0.1:6379/'
12
+ opt :redis_password, "Redis password", :default => ''
13
+ opt :slowlog_len, "Number of SLOWLOG entries to get", :default => 10
14
+ opt :slowlog_reset, "Reset SLOWLOG after querying it", :default => false
15
+
16
+ def initialize
17
+ @redis = ::Redis.new(url: opts[:redis_url])
18
+
19
+ @slowlog_len = opts[:slowlog_len]
20
+ @slowlog_reset = opts[:slowlog_reset]
21
+
22
+ @redis.auth(opts[:redis_password]) unless opts[:redis_password] == ''
23
+ end
24
+
25
+ def tick
26
+ @redis.slowlog("GET", @slowlog_len).each do |id, timestamp, us, cmd|
27
+ data = {
28
+ :host => @redis.client.host,
29
+ :service => "redis",
30
+ :time => timestamp,
31
+ :metric => us.to_f,
32
+ :state => 'warning',
33
+ :tags => ['redis', 'slowlog'],
34
+ :description => cmd.inspect
35
+ }
36
+ report(data)
37
+ end
38
+
39
+ @redis.slowlog("RESET") if @slowlog_reset
40
+ end
41
+
42
+ end
43
+
44
+ Riemann::Tools::RedisSlowlog.run