riemann-tools 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.markdown +63 -5
- data/bin/riemann-consul +106 -0
- data/bin/riemann-haproxy +7 -1
- data/bin/riemann-health +4 -1
- data/bin/riemann-net +1 -1
- data/lib/riemann/tools.rb +2 -2
- metadata +33 -125
- data/bin/riemann-aws-billing +0 -79
- data/bin/riemann-aws-rds-status +0 -48
- data/bin/riemann-aws-status +0 -64
- data/bin/riemann-elasticsearch +0 -91
- data/bin/riemann-elb-metrics +0 -154
- data/bin/riemann-munin +0 -36
- data/bin/riemann-rabbitmq +0 -267
- data/bin/riemann-resmon +0 -103
- data/bin/riemann-riak +0 -329
- data/bin/riemann-riak-keys +0 -12
- data/bin/riemann-riak-ring +0 -8
data/bin/riemann-aws-billing
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'fog'
|
3
|
-
|
4
|
-
|
5
|
-
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
-
|
7
|
-
$0 = __FILE__
|
8
|
-
|
9
|
-
class Riemann::Tools::AWSBilling
|
10
|
-
include Riemann::Tools
|
11
|
-
|
12
|
-
opt :fog_credentials_file, "Fog credentials file", :type => String
|
13
|
-
opt :fog_credential, "Fog credentials to use", :type => String
|
14
|
-
|
15
|
-
opt :access_key, "AWS access key", :type => String
|
16
|
-
opt :secret_key, "Secret access key", :type => String
|
17
|
-
opt :services, "AWS services: AmazonEC2 AmazonS3 AWSDataTransfer", :type => :strings, :multi => true, :default => ["AmazonEC2", "AmazonS3", "AWSDataTransfer"]
|
18
|
-
|
19
|
-
opt :time_start, "Start time in seconds of the metrics period (2hrs ago default)", :type => Integer, :default => 7200
|
20
|
-
opt :time_end, "End time in seconds of the metrics period ", :type => Integer, :default => 60
|
21
|
-
|
22
|
-
|
23
|
-
def initialize
|
24
|
-
if options[:fog_credentials_file]
|
25
|
-
Fog.credentials_path = opts[:fog_credentials_file]
|
26
|
-
Fog.credential = opts[:fog_credential].to_sym
|
27
|
-
@cloudwatch = Fog::AWS::CloudWatch.new
|
28
|
-
else
|
29
|
-
@cloudwatch = Fog::AWS::CloudWatch.new(:aws_secret_access_key => opts[:secret_key], :aws_access_key_id => opts[:access_key])
|
30
|
-
@start_time = (Time.now.utc - opts[:time_start]).iso8601
|
31
|
-
@end_time = (Time.now.utc - opts[:time_end]).iso8601
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def tick
|
36
|
-
opts[:services].each do |service|
|
37
|
-
data = @cloudwatch.get_metric_statistics({
|
38
|
-
'Statistics' => ["Maximum"],
|
39
|
-
'StartTime' => @start_time,
|
40
|
-
'EndTime' => @end_time,
|
41
|
-
'Period' => 3600,
|
42
|
-
'Unit' => "None",
|
43
|
-
'MetricName' => "EstimatedCharges",
|
44
|
-
'Namespace' => "AWS/Billing",
|
45
|
-
'Dimensions' => [
|
46
|
-
{
|
47
|
-
'Name' => "ServiceName",
|
48
|
-
'Value' => service
|
49
|
-
},
|
50
|
-
{
|
51
|
-
'Name' => "Currency",
|
52
|
-
'Value' => "USD"
|
53
|
-
}
|
54
|
-
]
|
55
|
-
}).body['GetMetricStatisticsResult']['Datapoints']
|
56
|
-
|
57
|
-
|
58
|
-
data.each do |metrics|
|
59
|
-
name = "AWScloudwatch.Billing." + service
|
60
|
-
value = metrics["Maximum"]
|
61
|
-
timestamp = metrics["Timestamp"].to_i
|
62
|
-
|
63
|
-
event = {
|
64
|
-
host: nil,
|
65
|
-
service: name,
|
66
|
-
time: timestamp,
|
67
|
-
description: "AWS Estimate Charges for #{service}",
|
68
|
-
tags: ["aws_billing"],
|
69
|
-
state: "ok",
|
70
|
-
metric: value
|
71
|
-
}
|
72
|
-
|
73
|
-
report event
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
Riemann::Tools::AWSBilling.run
|
data/bin/riemann-aws-rds-status
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'rubygems'
|
3
|
-
require 'fog'
|
4
|
-
require 'date'
|
5
|
-
require 'time'
|
6
|
-
require 'json'
|
7
|
-
|
8
|
-
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
9
|
-
|
10
|
-
$0 = __FILE__ # Let's not expose our AWS keys in the process list
|
11
|
-
|
12
|
-
class Riemann::Tools::AWS
|
13
|
-
include Riemann::Tools
|
14
|
-
|
15
|
-
opt :access_key, "AWS access key", :type => String
|
16
|
-
opt :secret_key, "Secret access key", :type => String
|
17
|
-
opt :region, "AWS region", :type => String, :default => 'eu-west-1'
|
18
|
-
opt :dbinstance_identifier, "DBInstanceIdentifier", :type => String
|
19
|
-
def initialize
|
20
|
-
abort "FATAL: specify a DB instance name, see --help for usage" unless opts[:dbinstance_identifier]
|
21
|
-
@cloudwatch = Fog::AWS::CloudWatch.new(:aws_access_key_id => opts[:access_key],
|
22
|
-
:aws_secret_access_key => opts[:secret_key],
|
23
|
-
:region => opts[:region])
|
24
|
-
end
|
25
|
-
|
26
|
-
def tick
|
27
|
-
time = Time.new
|
28
|
-
['DatabaseConnections', 'FreeableMemory', 'FreeStorageSpace', 'NetworkReceiveThroughput', 'NetworkTransmitThroughput', 'ReadThroughput', 'CPUUtilization'].each do |metric|
|
29
|
-
result = @cloudwatch.get_metric_statistics({"Namespace" => 'AWS/RDS', "MetricName" => "#{metric}", "Statistics" => 'Average', "Dimensions" => [{"Name" => "DBInstanceIdentifier", "Value" => "#{opts[:dbinstance_identifier]}"}], "StartTime" => (time-120).to_time.iso8601, "EndTime" => time.to_time.iso8601, "Period" => 60})
|
30
|
-
metricsResult = result.data[:body]['GetMetricStatisticsResult']
|
31
|
-
puts JSON.dump(metricsResult)
|
32
|
-
if (metricsResult['Datapoints'].length>0)
|
33
|
-
datapoint = metricsResult['Datapoints'][0]
|
34
|
-
ev = {:metric => datapoint['Average'],
|
35
|
-
:service => "#{opts[:dbinstance_identifier]}.#{metric} (#{datapoint['Unit']})",
|
36
|
-
:description => JSON.dump(metricsResult),
|
37
|
-
:state => "ok",
|
38
|
-
:ttl => 300}
|
39
|
-
|
40
|
-
|
41
|
-
report ev
|
42
|
-
end
|
43
|
-
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
Riemann::Tools::AWS.run
|
data/bin/riemann-aws-status
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'rubygems'
|
3
|
-
require 'fog'
|
4
|
-
require 'date'
|
5
|
-
|
6
|
-
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
7
|
-
|
8
|
-
$0 = __FILE__ # Let's not expose our AWS keys in the process list
|
9
|
-
|
10
|
-
class Riemann::Tools::AWS
|
11
|
-
include Riemann::Tools
|
12
|
-
|
13
|
-
opt :access_key, "AWS access key", :type => String
|
14
|
-
opt :secret_key, "Secret access key", :type => String
|
15
|
-
opt :region, "AWS region", :type => String, :default => 'eu-west-1'
|
16
|
-
|
17
|
-
opt :retirement_critical, "Number of days before retirement. Defaults to 2", :default => 2
|
18
|
-
opt :event_warning, "Number of days before event. Defaults to nil (i.e. when the event appears)", :default => nil
|
19
|
-
|
20
|
-
def initialize
|
21
|
-
@compute = Fog::Compute.new(:aws_access_key_id => opts[:access_key],
|
22
|
-
:aws_secret_access_key => opts[:secret_key],
|
23
|
-
:region => opts[:region],
|
24
|
-
:provider => 'AWS')
|
25
|
-
end
|
26
|
-
|
27
|
-
def tick
|
28
|
-
instance_status = @compute.describe_instance_status.body["instanceStatusSet"]
|
29
|
-
status = instance_status.inject({}) do |acc,i|
|
30
|
-
acc[i.delete("instanceId")] = i
|
31
|
-
acc
|
32
|
-
end
|
33
|
-
|
34
|
-
hosts = @compute.servers.select { |s| s.state == "running" }.
|
35
|
-
inject([status, {}]) do |(status, acc), host|
|
36
|
-
acc[host.private_dns_name] = status.delete(host.id); [status, acc]
|
37
|
-
end[1]
|
38
|
-
|
39
|
-
hosts.each do |host, status|
|
40
|
-
status['eventsSet'].each do |event|
|
41
|
-
before, after = ['notBefore', 'notAfter'].map { |k| Date.parse event[k].to_s if event[k] }
|
42
|
-
|
43
|
-
ev = {:host => host,
|
44
|
-
:service => "aws_instance_status",
|
45
|
-
:description => "#{event['code']}\n\nstart #{event['notBefore']}\nend #{event['notAfter']}\n\n#{event['description']}",
|
46
|
-
:state => "ok",
|
47
|
-
:ttl => 300}
|
48
|
-
|
49
|
-
ev2 = if (event['code'] == 'instance-retirement') and
|
50
|
-
Date.today >= before-opts[:retirement_critical]
|
51
|
-
{:state => "critical"}
|
52
|
-
elsif opts[:event_warning] and Date.today >= before-opts[:event_warning]
|
53
|
-
{:state => "warning"}
|
54
|
-
else
|
55
|
-
{:state => "warning"}
|
56
|
-
end
|
57
|
-
|
58
|
-
report ev.merge(ev2)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
Riemann::Tools::AWS.run
|
data/bin/riemann-elasticsearch
DELETED
@@ -1,91 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
4
|
-
|
5
|
-
class Riemann::Tools::Elasticsearch
|
6
|
-
include Riemann::Tools
|
7
|
-
|
8
|
-
require 'faraday'
|
9
|
-
require 'json'
|
10
|
-
require 'uri'
|
11
|
-
|
12
|
-
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
13
|
-
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
14
|
-
opt :path_prefix, 'Elasticsearch path prefix for proxied installations e.g. "els" for target http://localhost/els/_cluster/health', default: "/"
|
15
|
-
opt :es_host, 'Elasticsearch host', default: "localhost"
|
16
|
-
opt :es_port, 'Elasticsearch port', type: :int, default: 9200
|
17
|
-
|
18
|
-
|
19
|
-
# Handles HTTP connections and GET requests safely
|
20
|
-
def safe_get(uri)
|
21
|
-
# Handle connection timeouts
|
22
|
-
response = nil
|
23
|
-
begin
|
24
|
-
connection = Faraday.new(uri)
|
25
|
-
response = connection.get do |req|
|
26
|
-
req.options[:timeout] = options[:read_timeout]
|
27
|
-
req.options[:open_timeout] = options[:open_timeout]
|
28
|
-
end
|
29
|
-
rescue => e
|
30
|
-
report(:host => uri.host,
|
31
|
-
:service => "elasticsearch health",
|
32
|
-
:state => "critical",
|
33
|
-
:description => "HTTP connection error: #{e.class} - #{e.message}"
|
34
|
-
)
|
35
|
-
end
|
36
|
-
response
|
37
|
-
end
|
38
|
-
|
39
|
-
def health_url
|
40
|
-
path_prefix = options[:path_prefix]
|
41
|
-
path_prefix[0] = '' if path_prefix[0]=='/'
|
42
|
-
path_prefix[path_prefix.length-1] = '' if path_prefix[path_prefix.length-1]=='/'
|
43
|
-
"http://#{options[:es_host]}:#{options[:es_port]}#{path_prefix.length>0?'/':''}#{path_prefix}/_cluster/health"
|
44
|
-
end
|
45
|
-
|
46
|
-
def tick
|
47
|
-
uri = URI(health_url)
|
48
|
-
response = safe_get(uri)
|
49
|
-
|
50
|
-
return if response.nil?
|
51
|
-
|
52
|
-
if response.status != 200
|
53
|
-
report(:host => uri.host,
|
54
|
-
:service => "elasticsearch health",
|
55
|
-
:state => "critical",
|
56
|
-
:description => "HTTP connection error: #{response.status} - #{response.body}"
|
57
|
-
)
|
58
|
-
else
|
59
|
-
# Assuming that a 200 will give json
|
60
|
-
json = JSON.parse(response.body)
|
61
|
-
cluster_name = json.delete("cluster_name")
|
62
|
-
cluster_status = json.delete("status")
|
63
|
-
state = case cluster_status
|
64
|
-
when "green"
|
65
|
-
"ok"
|
66
|
-
when "yellow"
|
67
|
-
"warning"
|
68
|
-
when "red"
|
69
|
-
"critical"
|
70
|
-
end
|
71
|
-
|
72
|
-
report(:host => uri.host,
|
73
|
-
:service => "elasticsearch health",
|
74
|
-
:state => state,
|
75
|
-
:description => "Elasticsearch cluster: #{cluster_name} - #{cluster_status}")
|
76
|
-
|
77
|
-
json.each_pair do |k,v|
|
78
|
-
report(:host => uri.host,
|
79
|
-
:service => "elasticsearch #{k}",
|
80
|
-
:metric => v,
|
81
|
-
:description => "Elasticsearch cluster #{k}"
|
82
|
-
)
|
83
|
-
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
91
|
-
Riemann::Tools::Elasticsearch.run
|
data/bin/riemann-elb-metrics
DELETED
@@ -1,154 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
4
|
-
|
5
|
-
$0 = __FILE__
|
6
|
-
|
7
|
-
class Riemann::Tools::ELBMetrics
|
8
|
-
include Riemann::Tools
|
9
|
-
|
10
|
-
require 'fog'
|
11
|
-
require 'time'
|
12
|
-
|
13
|
-
opt :fog_credentials_file, "Fog credentials file", :type => String
|
14
|
-
opt :fog_credential, "Fog credentials to use", :type => String
|
15
|
-
opt :aws_access, "AWS Access Key", :type => String
|
16
|
-
opt :aws_secret, "AWS Secret Key", :type => String
|
17
|
-
opt :aws_region, "AWS Region", :type => String, :default => "eu-west-1"
|
18
|
-
opt :aws_azs, "List of AZs to aggregate against", :type => :strings, :default => [ "all_az" ]
|
19
|
-
opt :elbs, "List of ELBs to pull metrics from", :type => :strings, :required => true
|
20
|
-
|
21
|
-
def standard_metrics
|
22
|
-
# ELB metric types, from:
|
23
|
-
# http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/CW_Support_For_AWS.html#elb-metricscollected
|
24
|
-
metric_options = {
|
25
|
-
"Latency" => {
|
26
|
-
"Unit" => "Seconds",
|
27
|
-
"Statistics" => ["Maximum", "Minimum", "Average" ]
|
28
|
-
},
|
29
|
-
"RequestCount" => {
|
30
|
-
"Unit" => "Count",
|
31
|
-
"Statistics" => [ "Sum" ]
|
32
|
-
},
|
33
|
-
"HealthyHostCount" => {
|
34
|
-
"Units" => "Count",
|
35
|
-
"Statistics" => [ "Minimum", "Maximum", "Average" ]
|
36
|
-
},
|
37
|
-
"UnHealthyHostCount" => {
|
38
|
-
"Units" => "Count",
|
39
|
-
"Statistics" => [ "Minimum", "Maximum", "Average" ]
|
40
|
-
},
|
41
|
-
"HTTPCode_ELB_4XX" => {
|
42
|
-
"Units" => "Count",
|
43
|
-
"Statistics" => [ "Sum" ]
|
44
|
-
},
|
45
|
-
"HTTPCode_ELB_5XX" => {
|
46
|
-
"Units" => "Count",
|
47
|
-
"Statistics" => [ "Sum" ]
|
48
|
-
},
|
49
|
-
"HTTPCode_Backend_2XX" => {
|
50
|
-
"Units" => "Count",
|
51
|
-
"Statistics" => [ "Sum" ]
|
52
|
-
},
|
53
|
-
"HTTPCode_Backend_3XX" => {
|
54
|
-
"Units" => "Count",
|
55
|
-
"Statistics" => [ "Sum" ]
|
56
|
-
},
|
57
|
-
"HTTPCode_Backend_4XX" => {
|
58
|
-
"Units" => "Count",
|
59
|
-
"Statistics" => [ "Sum" ]
|
60
|
-
},
|
61
|
-
"HTTPCode_Backend_5XX" => {
|
62
|
-
"Units" => "Count",
|
63
|
-
"Statistics" => [ "Sum" ]
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
metric_options
|
68
|
-
end
|
69
|
-
|
70
|
-
def base_metrics
|
71
|
-
# get last 60 seconds
|
72
|
-
start_time = (Time.now.utc - 60).iso8601
|
73
|
-
end_time = Time.now.utc.iso8601
|
74
|
-
|
75
|
-
# The base query that all metrics would get
|
76
|
-
metric_base = {
|
77
|
-
"Namespace" => "AWS/ELB",
|
78
|
-
"StartTime" => start_time,
|
79
|
-
"EndTime" => end_time,
|
80
|
-
"Period" => 60,
|
81
|
-
}
|
82
|
-
|
83
|
-
metric_base
|
84
|
-
end
|
85
|
-
|
86
|
-
|
87
|
-
def tick
|
88
|
-
if options[:fog_credentials_file]
|
89
|
-
Fog.credentials_path = options[:fog_credentials_file]
|
90
|
-
Fog.credential = options[:fog_credential].to_sym
|
91
|
-
connection = Fog::AWS::CloudWatch.new
|
92
|
-
else
|
93
|
-
connection = Fog::AWS::CloudWatch.new({
|
94
|
-
:aws_access_key_id => options[:aws_access],
|
95
|
-
:aws_secret_access_key => options[:aws_secret],
|
96
|
-
:region => options[:aws_region]
|
97
|
-
})
|
98
|
-
end
|
99
|
-
|
100
|
-
options[:elbs].each do |lb|
|
101
|
-
|
102
|
-
metric_options = standard_metrics
|
103
|
-
metric_base_options = base_metrics
|
104
|
-
|
105
|
-
options[:aws_azs].each do |az|
|
106
|
-
metric_options.keys.sort.each do |metric_type|
|
107
|
-
merged_options = metric_base_options.merge(metric_options[metric_type])
|
108
|
-
merged_options["MetricName"] = metric_type
|
109
|
-
if az == "all_az"
|
110
|
-
merged_options["Dimensions"] = [ { "Name" => "LoadBalancerName", "Value" => lb } ]
|
111
|
-
else
|
112
|
-
merged_options["Dimensions"] = [
|
113
|
-
{ "Name" => "LoadBalancerName", "Value" => lb },
|
114
|
-
{ "Name" => "AvailabilityZone" , "Value" => az}
|
115
|
-
]
|
116
|
-
end
|
117
|
-
|
118
|
-
result = connection.get_metric_statistics(merged_options)
|
119
|
-
|
120
|
-
# "If no response codes in the category 2XX-5XX range are sent to clients within
|
121
|
-
# the given time period, values for these metrics will not be recorded in CloudWatch"
|
122
|
-
#next if result.body["GetMetricStatisticsResult"]["Datapoints"].empty? && metric_type =~ /[2345]XX/
|
123
|
-
#
|
124
|
-
# BUG:
|
125
|
-
# Metrics are reported every 60 seconds, but sometimes there isn't one there yet.
|
126
|
-
# We can skip that, or do something else?
|
127
|
-
next if result.body["GetMetricStatisticsResult"]["Datapoints"].empty?
|
128
|
-
|
129
|
-
# We should only ever have a single data point
|
130
|
-
result.body["GetMetricStatisticsResult"]["Datapoints"][0].keys.sort.each do |stat_type|
|
131
|
-
next if stat_type == "Unit"
|
132
|
-
next if stat_type == "Timestamp"
|
133
|
-
|
134
|
-
unit = result.body["GetMetricStatisticsResult"]["Datapoints"][0]["Unit"]
|
135
|
-
metric = result.body["GetMetricStatisticsResult"]["Datapoints"][0][stat_type]
|
136
|
-
event = Hash.new
|
137
|
-
event = {
|
138
|
-
host: lb,
|
139
|
-
service: "elb.#{az}.#{metric_type}.#{stat_type}",
|
140
|
-
ttl: 60,
|
141
|
-
description: "#{lb} #{metric_type} #{stat_type} (#{unit})",
|
142
|
-
tags: [ "production", "elb_metrics" ],
|
143
|
-
metric: metric
|
144
|
-
}
|
145
|
-
|
146
|
-
report(event)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
Riemann::Tools::ELBMetrics.run
|
data/bin/riemann-munin
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# Gathers munin statistics and submits them to Riemann.
|
4
|
-
|
5
|
-
require File.expand_path('../../lib/riemann/tools', __FILE__)
|
6
|
-
|
7
|
-
class Riemann::Tools::Munin
|
8
|
-
include Riemann::Tools
|
9
|
-
require 'munin-ruby'
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@munin = ::Munin::Node.new
|
13
|
-
end
|
14
|
-
|
15
|
-
def tick
|
16
|
-
services = opts[:services] || @munin.list
|
17
|
-
services.each do |service|
|
18
|
-
@munin.fetch(service).each do |service, parts|
|
19
|
-
parts.each do |part, metric|
|
20
|
-
report(
|
21
|
-
:service => "#{service} #{part}",
|
22
|
-
:metric => metric.to_f,
|
23
|
-
:state => 'ok',
|
24
|
-
:tags => ['munin']
|
25
|
-
)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
opt :munin_host, "Munin hostname", :default => 'localhost'
|
32
|
-
opt :munin_port, "Munin port", :default => 4949
|
33
|
-
opt :services, "Munin services to translate (if not specified, all services are relayed)", :type => :strings
|
34
|
-
end
|
35
|
-
|
36
|
-
Riemann::Tools::Munin.run
|