riemann-tools 0.2.14 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.docker/Dockerfile +7 -0
- data/.docker/publish.sh +35 -0
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +42 -0
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.rubocop.yml +32 -0
- data/.travis.yml +31 -0
- data/CHANGELOG.md +430 -0
- data/Gemfile +6 -0
- data/ISSUE_TEMPLATE.md +15 -0
- data/README.markdown +13 -16
- data/Rakefile +23 -0
- data/SECURITY.md +42 -0
- data/bin/riemann-apache-status +92 -77
- data/bin/riemann-bench +54 -48
- data/bin/riemann-cloudant +44 -39
- data/bin/riemann-consul +82 -75
- data/bin/riemann-dir-files-count +53 -46
- data/bin/riemann-dir-space +53 -46
- data/bin/riemann-diskstats +78 -74
- data/bin/riemann-fd +68 -47
- data/bin/riemann-freeswitch +108 -102
- data/bin/riemann-haproxy +46 -39
- data/bin/riemann-health +4 -335
- data/bin/riemann-kvminstance +18 -12
- data/bin/riemann-memcached +35 -28
- data/bin/riemann-net +4 -103
- data/bin/riemann-nginx-status +74 -66
- data/bin/riemann-ntp +4 -32
- data/bin/riemann-portcheck +40 -30
- data/bin/riemann-proc +96 -89
- data/bin/riemann-varnish +51 -44
- data/bin/riemann-zookeeper +38 -33
- data/lib/riemann/tools/health.rb +347 -0
- data/lib/riemann/tools/net.rb +104 -0
- data/lib/riemann/tools/ntp.rb +41 -0
- data/lib/riemann/tools/utils.rb +17 -0
- data/lib/riemann/tools/version.rb +7 -0
- data/lib/riemann/tools.rb +38 -31
- data/riemann-tools.gemspec +42 -0
- data/tools/riemann-aws/LICENSE +21 -0
- data/tools/riemann-aws/README.md +54 -0
- data/tools/riemann-aws/Rakefile +37 -0
- data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
- data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
- data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
- data/tools/riemann-aws/bin/riemann-aws-status +83 -0
- data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
- data/tools/riemann-aws/bin/riemann-s3-list +87 -0
- data/tools/riemann-aws/bin/riemann-s3-status +102 -0
- data/tools/riemann-chronos/LICENSE +21 -0
- data/tools/riemann-chronos/README.md +10 -0
- data/tools/riemann-chronos/Rakefile +37 -0
- data/tools/riemann-chronos/bin/riemann-chronos +161 -0
- data/tools/riemann-docker/LICENSE +21 -0
- data/tools/riemann-docker/README.md +10 -0
- data/tools/riemann-docker/Rakefile +36 -0
- data/tools/riemann-docker/bin/riemann-docker +206 -0
- data/tools/riemann-elasticsearch/LICENSE +21 -0
- data/tools/riemann-elasticsearch/README.md +10 -0
- data/tools/riemann-elasticsearch/Rakefile +37 -0
- data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
- data/tools/riemann-marathon/LICENSE +21 -0
- data/tools/riemann-marathon/README.md +10 -0
- data/tools/riemann-marathon/Rakefile +37 -0
- data/tools/riemann-marathon/bin/riemann-marathon +163 -0
- data/tools/riemann-mesos/LICENSE +21 -0
- data/tools/riemann-mesos/README.md +10 -0
- data/tools/riemann-mesos/Rakefile +37 -0
- data/tools/riemann-mesos/bin/riemann-mesos +146 -0
- data/tools/riemann-munin/LICENSE +21 -0
- data/tools/riemann-munin/README.md +10 -0
- data/tools/riemann-munin/Rakefile +36 -0
- data/tools/riemann-munin/bin/riemann-munin +43 -0
- data/tools/riemann-rabbitmq/LICENSE +21 -0
- data/tools/riemann-rabbitmq/README.md +10 -0
- data/tools/riemann-rabbitmq/Rakefile +37 -0
- data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
- data/tools/riemann-riak/LICENSE +21 -0
- data/tools/riemann-riak/README.md +10 -0
- data/tools/riemann-riak/Rakefile +36 -0
- data/tools/riemann-riak/bin/riemann-riak +323 -0
- data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
- data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
- data/tools/riemann-riak/riak_status/key_count.erl +13 -0
- data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
- data/tools/riemann-riak/riak_status/ringready.erl +9 -0
- metadata +186 -37
@@ -0,0 +1,206 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
|
+
|
6
|
+
# Reports current CPU, disk, load average, and memory use to riemann.
|
7
|
+
|
8
|
+
require 'riemann/tools'
|
9
|
+
|
10
|
+
module Riemann
|
11
|
+
module Tools
|
12
|
+
class DockerHealth
|
13
|
+
require 'docker'
|
14
|
+
require 'socket'
|
15
|
+
include Riemann::Tools
|
16
|
+
include Docker
|
17
|
+
|
18
|
+
opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
|
19
|
+
default: nil
|
20
|
+
opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
|
21
|
+
opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
|
22
|
+
opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
|
23
|
+
opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
|
24
|
+
opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
|
25
|
+
opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
|
26
|
+
opt :host_hostname, 'Suffix of host', type: String, default: nil
|
27
|
+
opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
|
28
|
+
|
29
|
+
def containers
|
30
|
+
Docker::Container.all
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_container_name(container)
|
34
|
+
container.json['Name'][1..]
|
35
|
+
end
|
36
|
+
|
37
|
+
def initialize
|
38
|
+
Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
|
39
|
+
|
40
|
+
@hostname = opts[:host_hostname]
|
41
|
+
@hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
|
42
|
+
|
43
|
+
@cpu_coefficient = 1000 * 1000 * 1000
|
44
|
+
|
45
|
+
@limits = {
|
46
|
+
cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
|
47
|
+
disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
|
48
|
+
memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
|
49
|
+
}
|
50
|
+
|
51
|
+
@last_cpu_reads = {}
|
52
|
+
@last_uptime_reads = {}
|
53
|
+
|
54
|
+
opts[:checks].each do |check|
|
55
|
+
case check
|
56
|
+
when 'disk'
|
57
|
+
@disk_enabled = true
|
58
|
+
when 'cpu'
|
59
|
+
@cpu_enabled = true
|
60
|
+
when 'memory'
|
61
|
+
@memory_enabled = true
|
62
|
+
when 'basic'
|
63
|
+
@basic_inspection_enabled = true
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def alert(container, service, state, metric, description)
|
69
|
+
opts = {
|
70
|
+
service: service.to_s,
|
71
|
+
state: state.to_s,
|
72
|
+
metric: metric.to_f,
|
73
|
+
description: description,
|
74
|
+
}
|
75
|
+
|
76
|
+
opts[:host] = if !container.nil?
|
77
|
+
"#{@hostname}-#{container}"
|
78
|
+
else
|
79
|
+
@hostname
|
80
|
+
end
|
81
|
+
|
82
|
+
report(opts)
|
83
|
+
end
|
84
|
+
|
85
|
+
def report_pct(container, service, fraction, report = '', name = nil)
|
86
|
+
return unless fraction
|
87
|
+
|
88
|
+
name = service if name.nil?
|
89
|
+
|
90
|
+
if fraction > @limits[service][:critical]
|
91
|
+
alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
92
|
+
elsif fraction > @limits[service][:warning]
|
93
|
+
alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
94
|
+
else
|
95
|
+
alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def cpu(id, name, stats)
|
100
|
+
current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
|
101
|
+
|
102
|
+
unless current
|
103
|
+
alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
|
104
|
+
return false
|
105
|
+
end
|
106
|
+
|
107
|
+
current_time = Time.parse(stats['read'])
|
108
|
+
unless @last_cpu_reads[id].nil?
|
109
|
+
last = @last_cpu_reads[id]
|
110
|
+
used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
|
111
|
+
|
112
|
+
report_pct name, :cpu, used
|
113
|
+
end
|
114
|
+
|
115
|
+
@last_cpu_reads[id] = { v: current, t: current_time }
|
116
|
+
end
|
117
|
+
|
118
|
+
def memory(_id, name, stats)
|
119
|
+
memory_stats = stats['memory_stats']
|
120
|
+
usage = memory_stats['usage'].to_f
|
121
|
+
total = memory_stats['limit'].to_f
|
122
|
+
fraction = (usage / total)
|
123
|
+
|
124
|
+
report_pct name, :memory, fraction, "#{usage} / #{total}"
|
125
|
+
end
|
126
|
+
|
127
|
+
def disk
|
128
|
+
`df -P`.split(/\n/).each do |r|
|
129
|
+
f = r.split(/\s+/)
|
130
|
+
next if f[0] == 'Filesystem'
|
131
|
+
next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
|
132
|
+
|
133
|
+
# Calculate capacity
|
134
|
+
x = f[4].to_f / 100
|
135
|
+
report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def basic_inspection(id, name, inspection)
|
140
|
+
state = inspection['State']
|
141
|
+
json_state = JSON.generate(state)
|
142
|
+
|
143
|
+
running = state['Running']
|
144
|
+
|
145
|
+
alert(
|
146
|
+
name, 'status',
|
147
|
+
running ? 'ok' : 'critical',
|
148
|
+
running ? 1 : 0,
|
149
|
+
json_state,
|
150
|
+
)
|
151
|
+
|
152
|
+
return unless running
|
153
|
+
|
154
|
+
start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
|
155
|
+
now = DateTime.now.to_time.utc.to_i
|
156
|
+
uptime = now - start_time
|
157
|
+
|
158
|
+
unless @last_uptime_reads[id].nil?
|
159
|
+
last = @last_uptime_reads[id]
|
160
|
+
restarted = start_time != last
|
161
|
+
alert(
|
162
|
+
name, 'uptime',
|
163
|
+
restarted ? 'critical' : 'ok',
|
164
|
+
uptime,
|
165
|
+
"last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
|
166
|
+
"now it's #{start_time} (#{Time.at(start_time).utc})",
|
167
|
+
)
|
168
|
+
end
|
169
|
+
|
170
|
+
@last_uptime_reads[id] = start_time
|
171
|
+
end
|
172
|
+
|
173
|
+
def tick
|
174
|
+
# Disk is the same in every container
|
175
|
+
disk if @disk_enabled
|
176
|
+
|
177
|
+
# Get CPU, Memory and Load of each container
|
178
|
+
threads = []
|
179
|
+
|
180
|
+
containers.each do |ctr|
|
181
|
+
threads << Thread.new(ctr) do |container|
|
182
|
+
id = container.id
|
183
|
+
name = get_container_name(container)
|
184
|
+
|
185
|
+
stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
|
186
|
+
|
187
|
+
if @basic_inspection_enabled
|
188
|
+
inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
|
189
|
+
basic_inspection(id, name, inspection)
|
190
|
+
end
|
191
|
+
cpu(id, name, stats) if @cpu_enabled
|
192
|
+
memory(id, name, stats) if @memory_enabled
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
threads.each do |thread|
|
197
|
+
thread.join
|
198
|
+
rescue StandardError => e
|
199
|
+
warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
Riemann::Tools::DockerHealth.run
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2011 Kyle Kingsbury
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rubygems/package_task'
|
5
|
+
require 'rdoc/task'
|
6
|
+
require 'find'
|
7
|
+
|
8
|
+
# Don't include resource forks in tarballs on Mac OS X.
|
9
|
+
ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
|
10
|
+
ENV['COPYFILE_DISABLE'] = 'true'
|
11
|
+
|
12
|
+
# Gemspec
|
13
|
+
gemspec = Gem::Specification.new do |s|
|
14
|
+
s.rubyforge_project = 'riemann-elasticsearch'
|
15
|
+
|
16
|
+
s.name = 'riemann-elasticsearch'
|
17
|
+
s.version = '0.2.4'
|
18
|
+
s.author = 'Gavin Sandie'
|
19
|
+
s.email = 'beach@vicecity.co.uk'
|
20
|
+
s.homepage = 'https://github.com/riemann/riemann-tools'
|
21
|
+
s.platform = Gem::Platform::RUBY
|
22
|
+
s.summary = 'Submits elasticsearch stats to riemann.'
|
23
|
+
s.license = 'MIT'
|
24
|
+
|
25
|
+
s.add_dependency 'riemann-tools', '>= 0.2.13'
|
26
|
+
s.add_dependency 'faraday', '>= 0.8.5'
|
27
|
+
s.add_dependency 'json'
|
28
|
+
|
29
|
+
s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
|
30
|
+
s.executables |= Dir.entries('bin/')
|
31
|
+
s.has_rdoc = false
|
32
|
+
|
33
|
+
s.required_ruby_version = '>= 1.8.7'
|
34
|
+
end
|
35
|
+
|
36
|
+
Gem::PackageTask.new gemspec do |p|
|
37
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
|
+
|
6
|
+
require 'riemann/tools'
|
7
|
+
|
8
|
+
module Riemann
|
9
|
+
module Tools
|
10
|
+
class Elasticsearch
|
11
|
+
include Riemann::Tools
|
12
|
+
require 'faraday'
|
13
|
+
require 'json'
|
14
|
+
require 'uri'
|
15
|
+
|
16
|
+
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
17
|
+
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
18
|
+
opt :path_prefix,
|
19
|
+
'Elasticsearch path prefix for proxied installations e.g. "els" for target http://localhost/els/_cluster/health', default: '/'
|
20
|
+
opt :es_host, 'Elasticsearch host', default: 'localhost'
|
21
|
+
opt :es_port, 'Elasticsearch port', type: :int, default: 9200
|
22
|
+
opt :es_search_index, 'Elasticsearch index to fetch search statistics for', default: '_all'
|
23
|
+
|
24
|
+
# Handles HTTP connections and GET requests safely
|
25
|
+
def safe_get(uri)
|
26
|
+
# Handle connection timeouts
|
27
|
+
response = nil
|
28
|
+
begin
|
29
|
+
connection = Faraday.new(uri)
|
30
|
+
response = connection.get do |req|
|
31
|
+
req.options[:timeout] = options[:read_timeout]
|
32
|
+
req.options[:open_timeout] = options[:open_timeout]
|
33
|
+
end
|
34
|
+
rescue StandardError => e
|
35
|
+
report(
|
36
|
+
host: uri.host,
|
37
|
+
service: 'elasticsearch health',
|
38
|
+
state: 'critical',
|
39
|
+
description: "HTTP connection error: #{e.class} - #{e.message}",
|
40
|
+
)
|
41
|
+
end
|
42
|
+
response
|
43
|
+
end
|
44
|
+
|
45
|
+
def make_es_url(path)
|
46
|
+
path_prefix = options[:path_prefix]
|
47
|
+
path_prefix[0] = '' if path_prefix[0] == '/'
|
48
|
+
path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
|
49
|
+
"http://#{options[:es_host]}:#{options[:es_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/#{path}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def health_url
|
53
|
+
make_es_url('_cluster/health')
|
54
|
+
end
|
55
|
+
|
56
|
+
def indices_url
|
57
|
+
make_es_url('_stats/store')
|
58
|
+
end
|
59
|
+
|
60
|
+
def search_url
|
61
|
+
es_search_index = options[:es_search_index]
|
62
|
+
make_es_url("#{es_search_index}/_stats/search")
|
63
|
+
end
|
64
|
+
|
65
|
+
def bad?(response, uri)
|
66
|
+
if response.success?
|
67
|
+
false
|
68
|
+
else
|
69
|
+
report(
|
70
|
+
host: uri.host,
|
71
|
+
service: 'elasticsearch health',
|
72
|
+
state: 'critical',
|
73
|
+
description: response.nil? ? 'HTTP response is empty!' : "HTTP connection error: #{response.status} - #{response.body}",
|
74
|
+
)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def tick_indices
|
79
|
+
uri = URI(indices_url)
|
80
|
+
response = safe_get(uri)
|
81
|
+
|
82
|
+
return if bad?(response, uri)
|
83
|
+
|
84
|
+
# Assuming that a 200 will give json
|
85
|
+
json = JSON.parse(response.body)
|
86
|
+
|
87
|
+
json['indices'].each_pair do |k, v|
|
88
|
+
report(
|
89
|
+
host: uri.host,
|
90
|
+
service: "elasticsearch index/#{k}/primaries/size_in_bytes",
|
91
|
+
metric: v['primaries']['store']['size_in_bytes'],
|
92
|
+
)
|
93
|
+
report(
|
94
|
+
host: uri.host,
|
95
|
+
service: "elasticsearch index/#{k}/total/size_in_bytes",
|
96
|
+
metric: v['total']['store']['size_in_bytes'],
|
97
|
+
)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def tick_search
|
102
|
+
uri = URI(search_url)
|
103
|
+
response = safe_get(uri)
|
104
|
+
|
105
|
+
return if bad?(response, uri)
|
106
|
+
|
107
|
+
es_search_index = options[:es_search_index]
|
108
|
+
# Assuming that a 200 will give json
|
109
|
+
json = JSON.parse(response.body)
|
110
|
+
|
111
|
+
json['_all'].each_pair do |_type, data|
|
112
|
+
query = data['search']['query_time_in_millis'].to_f / data['search']['query_total']
|
113
|
+
fetch = data['search']['fetch_time_in_millis'].to_f / data['search']['fetch_total']
|
114
|
+
|
115
|
+
report(
|
116
|
+
host: uri.host,
|
117
|
+
service: "elasticsearch search/#{es_search_index}/query",
|
118
|
+
metric: query,
|
119
|
+
)
|
120
|
+
report(
|
121
|
+
host: uri.host,
|
122
|
+
service: "elasticsearch search/#{es_search_index}/fetch",
|
123
|
+
metric: fetch,
|
124
|
+
)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def tick
|
129
|
+
begin
|
130
|
+
tick_indices
|
131
|
+
tick_search
|
132
|
+
rescue StandardError => e
|
133
|
+
report(
|
134
|
+
host: options[:es_host],
|
135
|
+
service: 'elasticsearch error',
|
136
|
+
state: 'critical',
|
137
|
+
description: "Elasticsearch cluster error: #{e.message}",
|
138
|
+
)
|
139
|
+
end
|
140
|
+
uri = URI(health_url)
|
141
|
+
response = safe_get(uri)
|
142
|
+
|
143
|
+
return if bad?(response, uri)
|
144
|
+
|
145
|
+
# Assuming that a 200 will give json
|
146
|
+
json = JSON.parse(response.body)
|
147
|
+
cluster_name = json.delete('cluster_name')
|
148
|
+
cluster_status = json.delete('status')
|
149
|
+
state = {
|
150
|
+
'green' => 'ok',
|
151
|
+
'yellow' => 'warning',
|
152
|
+
'red' => 'critical',
|
153
|
+
}[cluster_status]
|
154
|
+
|
155
|
+
report(
|
156
|
+
host: uri.host,
|
157
|
+
service: 'elasticsearch health',
|
158
|
+
state: state,
|
159
|
+
description: "Elasticsearch cluster: #{cluster_name} - #{cluster_status}",
|
160
|
+
)
|
161
|
+
|
162
|
+
json.each_pair do |k, v|
|
163
|
+
report(
|
164
|
+
host: uri.host,
|
165
|
+
service: "elasticsearch #{k}",
|
166
|
+
metric: v,
|
167
|
+
description: "Elasticsearch cluster #{k}",
|
168
|
+
)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
Riemann::Tools::Elasticsearch.run
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2011 Kyle Kingsbury
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rubygems/package_task'
|
5
|
+
require 'rdoc/task'
|
6
|
+
require 'find'
|
7
|
+
|
8
|
+
# Don't include resource forks in tarballs on Mac OS X.
|
9
|
+
ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
|
10
|
+
ENV['COPYFILE_DISABLE'] = 'true'
|
11
|
+
|
12
|
+
# Gemspec
|
13
|
+
gemspec = Gem::Specification.new do |s|
|
14
|
+
s.rubyforge_project = 'riemann-marathon'
|
15
|
+
|
16
|
+
s.name = 'riemann-marathon'
|
17
|
+
s.version = '0.1.3'
|
18
|
+
s.author = 'Giulio Eulisse'
|
19
|
+
s.email = 'giulio.eulisse@cern.ch'
|
20
|
+
s.homepage = 'https://github.com/riemann/riemann-tools'
|
21
|
+
s.platform = Gem::Platform::RUBY
|
22
|
+
s.summary = 'Submits Marathon stats to riemann.'
|
23
|
+
s.license = 'MIT'
|
24
|
+
|
25
|
+
s.add_dependency 'riemann-tools', '>= 0.2.13'
|
26
|
+
s.add_dependency 'faraday', '>= 0.8.5'
|
27
|
+
s.add_dependency 'json'
|
28
|
+
|
29
|
+
s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
|
30
|
+
s.executables |= Dir.entries('bin/')
|
31
|
+
s.has_rdoc = false
|
32
|
+
|
33
|
+
s.required_ruby_version = '>= 1.8.7'
|
34
|
+
end
|
35
|
+
|
36
|
+
Gem::PackageTask.new gemspec do |p|
|
37
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
Process.setproctitle($PROGRAM_NAME)
|
5
|
+
|
6
|
+
require 'riemann/tools'
|
7
|
+
|
8
|
+
module Riemann
|
9
|
+
module Tools
|
10
|
+
class Marathon
|
11
|
+
include Riemann::Tools
|
12
|
+
|
13
|
+
require 'faraday'
|
14
|
+
require 'json'
|
15
|
+
require 'uri'
|
16
|
+
|
17
|
+
opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
|
18
|
+
opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
|
19
|
+
opt :path_prefix,
|
20
|
+
'Marathon path prefix for proxied installations e.g. "marathon" for target http://localhost/marathon/metrics', default: '/'
|
21
|
+
opt :marathon_host, 'Marathon host', default: 'localhost'
|
22
|
+
opt :marathon_port, 'Marathon port', type: :int, default: 8080
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
options[:interval] = 60
|
26
|
+
options[:ttl] = 120
|
27
|
+
end
|
28
|
+
|
29
|
+
# Handles HTTP connections and GET requests safely
|
30
|
+
def safe_get(uri)
|
31
|
+
# Handle connection timeouts
|
32
|
+
response = nil
|
33
|
+
begin
|
34
|
+
connection = Faraday.new(uri)
|
35
|
+
response = connection.get do |req|
|
36
|
+
req.options[:timeout] = options[:read_timeout]
|
37
|
+
req.options[:open_timeout] = options[:open_timeout]
|
38
|
+
end
|
39
|
+
rescue StandardError => e
|
40
|
+
report(
|
41
|
+
host: uri.host,
|
42
|
+
service: 'marathon health',
|
43
|
+
state: 'critical',
|
44
|
+
description: "HTTP connection error: #{e.class} - #{e.message}",
|
45
|
+
)
|
46
|
+
end
|
47
|
+
response
|
48
|
+
end
|
49
|
+
|
50
|
+
def health_url
|
51
|
+
path_prefix = options[:path_prefix]
|
52
|
+
path_prefix[0] = '' if path_prefix[0] == '/'
|
53
|
+
path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
|
54
|
+
"http://#{options[:marathon_host]}:#{options[:marathon_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/metrics"
|
55
|
+
end
|
56
|
+
|
57
|
+
def apps_url
|
58
|
+
path_prefix = options[:path_prefix]
|
59
|
+
path_prefix[0] = '' if path_prefix[0] == '/'
|
60
|
+
path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
|
61
|
+
"http://#{options[:marathon_host]}:#{options[:marathon_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/v2/apps"
|
62
|
+
end
|
63
|
+
|
64
|
+
def tick
|
65
|
+
tick_health
|
66
|
+
tick_apps
|
67
|
+
end
|
68
|
+
|
69
|
+
def tick_health
|
70
|
+
uri = URI(health_url)
|
71
|
+
response = safe_get(uri)
|
72
|
+
|
73
|
+
return if response.nil?
|
74
|
+
|
75
|
+
if response.status != 200
|
76
|
+
report(
|
77
|
+
host: uri.host,
|
78
|
+
service: 'marathon health',
|
79
|
+
state: 'critical',
|
80
|
+
description: "HTTP connection error: #{response.status} - #{response.body}",
|
81
|
+
)
|
82
|
+
else
|
83
|
+
# Assuming that a 200 will give json
|
84
|
+
json = JSON.parse(response.body)
|
85
|
+
state = 'ok'
|
86
|
+
|
87
|
+
report(
|
88
|
+
host: uri.host,
|
89
|
+
service: 'marathon health',
|
90
|
+
state: state,
|
91
|
+
)
|
92
|
+
|
93
|
+
json.each_pair do |t, d|
|
94
|
+
next unless d.respond_to? :each_pair
|
95
|
+
|
96
|
+
d.each_pair do |service, counters|
|
97
|
+
report(
|
98
|
+
host: uri.host,
|
99
|
+
service: "marathon_metric #{t} #{service}",
|
100
|
+
metric: 1,
|
101
|
+
tags: ['metric_name'],
|
102
|
+
ttl: 600,
|
103
|
+
)
|
104
|
+
next unless counters.respond_to? :each_pair
|
105
|
+
|
106
|
+
counters.each_pair do |k, v|
|
107
|
+
next unless v.is_a? Numeric
|
108
|
+
|
109
|
+
report(
|
110
|
+
host: uri.host,
|
111
|
+
service: "marathon #{service} #{k}",
|
112
|
+
metric: v,
|
113
|
+
tags: ['metric', t.to_s],
|
114
|
+
ttl: 600,
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def tick_apps
|
123
|
+
uri = URI(apps_url)
|
124
|
+
response = safe_get(uri)
|
125
|
+
|
126
|
+
return if response.nil?
|
127
|
+
|
128
|
+
if response.status != 200
|
129
|
+
report(
|
130
|
+
host: uri.host,
|
131
|
+
service: 'marathon health',
|
132
|
+
state: 'critical',
|
133
|
+
description: "HTTP connection error: #{response.status} - #{response.body}",
|
134
|
+
)
|
135
|
+
else
|
136
|
+
# Assuming that a 200 will give json
|
137
|
+
json = JSON.parse(response.body)
|
138
|
+
state = 'ok'
|
139
|
+
|
140
|
+
report(
|
141
|
+
host: uri.host,
|
142
|
+
service: 'marathon health',
|
143
|
+
state: state,
|
144
|
+
)
|
145
|
+
|
146
|
+
json['apps'].each do |app|
|
147
|
+
app.each_pair do |k, v|
|
148
|
+
next unless v.is_a? Numeric
|
149
|
+
|
150
|
+
report(
|
151
|
+
host: uri.host,
|
152
|
+
service: "marathon apps#{app['id']}/#{k}",
|
153
|
+
metric: v,
|
154
|
+
ttl: 120,
|
155
|
+
)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
Riemann::Tools::Marathon.run
|