sensu-plugins-prometheus-checks 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/.rubocop.yml +16 -0
- data/.ruby-version +1 -0
- data/.simplecov +8 -0
- data/.travis.yml +16 -0
- data/Dockerfile +9 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +353 -0
- data/Rakefile +6 -0
- data/bin/check_prometheus.rb +24 -0
- data/docker-compose.yml +53 -0
- data/lib/sensu/plugins/events/dispatcher.rb +52 -0
- data/lib/sensu/plugins/prometheus/checks.rb +49 -0
- data/lib/sensu/plugins/prometheus/checks/namespace.rb +21 -0
- data/lib/sensu/plugins/prometheus/checks/output.rb +47 -0
- data/lib/sensu/plugins/prometheus/checks/runner.rb +267 -0
- data/lib/sensu/plugins/prometheus/checks/version.rb +9 -0
- data/lib/sensu/plugins/prometheus/client.rb +62 -0
- data/lib/sensu/plugins/prometheus/metrics.rb +205 -0
- data/lib/sensu/plugins/utils/log.rb +24 -0
- data/sensu-plugins-prometheus-checks.gemspec +35 -0
- data/test.rb +41 -0
- data/test.sh +5 -0
- metadata +199 -0
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'sensu/plugins/prometheus/checks/runner'
|
5
|
+
|
6
|
+
config_file = ARGV[0] || 'config.yml'
|
7
|
+
abort("Can't find configuration file at '#{config_file}'") \
|
8
|
+
unless File.exist?(config_file)
|
9
|
+
|
10
|
+
runner = nil
|
11
|
+
|
12
|
+
begin
|
13
|
+
runner = Sensu::Plugins::Prometheus::Checks::Runner.new(
|
14
|
+
YAML.load_file(config_file)
|
15
|
+
)
|
16
|
+
runner.run
|
17
|
+
rescue RuntimeError => e
|
18
|
+
puts "ERROR: #{e}"
|
19
|
+
exit(1)
|
20
|
+
end
|
21
|
+
|
22
|
+
puts("\n")
|
23
|
+
puts(runner.output)
|
24
|
+
exit(runner.status)
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
version: '2'
|
2
|
+
|
3
|
+
networks:
|
4
|
+
default:
|
5
|
+
external:
|
6
|
+
name: check_prometheus
|
7
|
+
|
8
|
+
services:
|
9
|
+
promtheues:
|
10
|
+
image: quay.io/prometheus/prometheus:v1.0.1
|
11
|
+
container_name: prometheus
|
12
|
+
volumes:
|
13
|
+
- ./spec/docker/prometheus/:/etc/prometheus/
|
14
|
+
command:
|
15
|
+
- '-config.file=/etc/prometheus/prometheus.yml'
|
16
|
+
- '-storage.local.path=/prometheus'
|
17
|
+
expose:
|
18
|
+
- 19090
|
19
|
+
ports:
|
20
|
+
- 19090:9090
|
21
|
+
node-exporter1:
|
22
|
+
image: prom/node-exporter:0.12.0
|
23
|
+
expose:
|
24
|
+
- 19100
|
25
|
+
ports:
|
26
|
+
- 19100:9100
|
27
|
+
volumes:
|
28
|
+
- ./spec/docker/node-exporter/node1/:/etc/node-exporter/
|
29
|
+
command:
|
30
|
+
- "-collectors.enabled=textfile"
|
31
|
+
- "-collector.textfile.directory=/etc/node-exporter/"
|
32
|
+
node-exporter2:
|
33
|
+
image: prom/node-exporter:0.12.0
|
34
|
+
expose:
|
35
|
+
- 19101
|
36
|
+
ports:
|
37
|
+
- 19101:9100
|
38
|
+
volumes:
|
39
|
+
- ./spec/docker/node-exporter/node2/:/etc/node-exporter/
|
40
|
+
command:
|
41
|
+
- "-collectors.enabled=textfile"
|
42
|
+
- "-collector.textfile.directory=/etc/node-exporter/"
|
43
|
+
node-exporter3:
|
44
|
+
image: prom/node-exporter:0.12.0
|
45
|
+
expose:
|
46
|
+
- 19102
|
47
|
+
ports:
|
48
|
+
- 19102:9100
|
49
|
+
volumes:
|
50
|
+
- ./spec/docker/node-exporter/node3/:/etc/node-exporter/
|
51
|
+
command:
|
52
|
+
- "-collectors.enabled=textfile"
|
53
|
+
- "-collector.textfile.directory=/etc/node-exporter/"
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'socket'
|
3
|
+
require 'sensu/plugins/utils/log'
|
4
|
+
|
5
|
+
module Sensu
|
6
|
+
module Plugins
|
7
|
+
module Events
|
8
|
+
# Helper class to dispatch events into Sensu socket.
|
9
|
+
class Dispatcher
|
10
|
+
include Sensu::Plugins::Utils::Log
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@sensu_address = '127.0.0.1'
|
14
|
+
@sensu_port = 3030
|
15
|
+
read_env_address_and_port
|
16
|
+
|
17
|
+
log.debug("Sensu at '#{@sensu_address}':'#{@sensu_port}'")
|
18
|
+
end
|
19
|
+
|
20
|
+
# Send accumulated events into Sensu's socket, unless environment
|
21
|
+
# variable PROM_DEBUG is set.
|
22
|
+
def dispatch(event)
|
23
|
+
if ENV.key?('PROM_DEBUG')
|
24
|
+
log.debug("PROM_DEBUG set, not dispatching event to Sensu: #{event}")
|
25
|
+
return
|
26
|
+
end
|
27
|
+
|
28
|
+
# :nocov:
|
29
|
+
begin
|
30
|
+
s = TCPSocket.open(@sensu_address, @sensu_port)
|
31
|
+
s.puts(JSON.generate(event))
|
32
|
+
s.close
|
33
|
+
rescue SystemCallError => e
|
34
|
+
log.error("Sensu is refusing connections! Error: '#{e}'")
|
35
|
+
raise("Sensu is not avilable at '#{@sensu_address}:#{@sensu_port}'")
|
36
|
+
end
|
37
|
+
# :nocov:
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# Read Sensu address and port from environment.
|
43
|
+
def read_env_address_and_port
|
44
|
+
@sensu_address = ENV['SENSU_SOCKET_ADDRESS'] \
|
45
|
+
if ENV.key?('SENSU_SOCKET_ADDRESS')
|
46
|
+
@sensu_port = ENV['SENSU_SOCKET_PORT'].to_i \
|
47
|
+
if ENV.key?('SENSU_SOCKET_PORT')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Sensu
|
2
|
+
module Plugins
|
3
|
+
module Prometheus
|
4
|
+
# Static methods to help on a given check evaluation. This module is
|
5
|
+
# designed to be included where the check evaluation will happen.
|
6
|
+
module Checks
|
7
|
+
# Given current result, warning and critical levels, it will return a
|
8
|
+
# integer with the current level, zero is success.
|
9
|
+
def evaluate(result, warn, crit)
|
10
|
+
result = result.to_f
|
11
|
+
warn = warn.to_f
|
12
|
+
crit = crit.to_f
|
13
|
+
|
14
|
+
status = 3
|
15
|
+
if result < warn
|
16
|
+
status = 0
|
17
|
+
elsif result >= crit
|
18
|
+
status = 2
|
19
|
+
elsif result >= warn
|
20
|
+
status = 1
|
21
|
+
end
|
22
|
+
|
23
|
+
status
|
24
|
+
end
|
25
|
+
|
26
|
+
# Return zero if result and value are the same.
|
27
|
+
def equals(result, value)
|
28
|
+
status = 2
|
29
|
+
status = 0 if result.to_f == value.to_f
|
30
|
+
status
|
31
|
+
end
|
32
|
+
|
33
|
+
# Return zero if result is below value.
|
34
|
+
def below(result, value)
|
35
|
+
status = 2
|
36
|
+
status = 0 if result.to_f < value.to_f
|
37
|
+
status
|
38
|
+
end
|
39
|
+
|
40
|
+
# Return zero if result is above value.
|
41
|
+
def above(result, value)
|
42
|
+
status = 2
|
43
|
+
status = 0 if result.to_f > value.to_f
|
44
|
+
status
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Sensu
|
2
|
+
module Plugins
|
3
|
+
module Prometheus
|
4
|
+
module Checks
|
5
|
+
# Helper to transform a hash into local methods.
|
6
|
+
class Namespace
|
7
|
+
def initialize(hash)
|
8
|
+
hash.each do |key, value|
|
9
|
+
singleton_class.send(:define_method, key) { value }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Wrap around `binding` method.
|
14
|
+
def namespace_binding
|
15
|
+
binding
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
require 'sensu/plugins/prometheus/checks/namespace'
|
5
|
+
|
6
|
+
module Sensu
|
7
|
+
module Plugins
|
8
|
+
module Prometheus
|
9
|
+
module Checks
|
10
|
+
# Handles the final output of checks, creates a custom message based on
|
11
|
+
# template and informed variables.
|
12
|
+
class Output
|
13
|
+
TEMPLATES = {
|
14
|
+
'disk' => \
|
15
|
+
"Disk: <%= value %>%, Mountpoint: <%= cfg['mount'] %> |disk=<%= value %>",
|
16
|
+
'disk_all' => \
|
17
|
+
"Disk: <%= cfg['mount'] %>, Inode Usage: <%= value %>% |inodes=<%= value %>",
|
18
|
+
'load_per_cpu' => \
|
19
|
+
'Load: <%= value %>|load=<%= value %>',
|
20
|
+
'load_per_cluster' => \
|
21
|
+
'Cluster Load: <%= value %> |load=<%= value %>',
|
22
|
+
'load_per_cluster_minus_n' => \
|
23
|
+
'Cluster Load: <%= value %> |load=<%= value %>',
|
24
|
+
'inode' => \
|
25
|
+
"Disk: <%= cfg['mount'] %>, Inodes: <%= value %>% |inodes=<%= value %>",
|
26
|
+
'memory' => \
|
27
|
+
'Memory <%= value %>% |memory=<%= value %>',
|
28
|
+
'memory_per_cluster' => \
|
29
|
+
'Cluster Memory: <%= value %>% |memory=<%= value %>"',
|
30
|
+
'predict_disk_all' => \
|
31
|
+
"Disks to run out of space in the next <%= cfg['days'] %> days: <%= value %>",
|
32
|
+
'service' => \
|
33
|
+
"Service: <%= cfg['name'] %> (<%= cfg['state'] %>=<%= value %>)"
|
34
|
+
}.freeze
|
35
|
+
|
36
|
+
def render(template_name, vars)
|
37
|
+
template_name = template_name.to_s
|
38
|
+
raise "Can't find template for '#{template_name}'" \
|
39
|
+
if !TEMPLATES.key?(template_name) || TEMPLATES[template_name].empty?
|
40
|
+
ns = Sensu::Plugins::Prometheus::Checks::Namespace.new(vars)
|
41
|
+
ERB.new(TEMPLATES[template_name]).result(ns.namespace_binding)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,267 @@
|
|
1
|
+
require 'sensu/plugins/utils/log'
|
2
|
+
require 'sensu/plugins/events/dispatcher'
|
3
|
+
require 'sensu/plugins/prometheus/client'
|
4
|
+
require 'sensu/plugins/prometheus/metrics'
|
5
|
+
require 'sensu/plugins/prometheus/checks'
|
6
|
+
require 'sensu/plugins/prometheus/checks/output'
|
7
|
+
|
8
|
+
module Sensu
|
9
|
+
module Plugins
|
10
|
+
module Prometheus
|
11
|
+
module Checks
|
12
|
+
# Execute the configured checks, evaluate the results and set a final
|
13
|
+
# output and status.
|
14
|
+
class Runner
|
15
|
+
include Sensu::Plugins::Utils::Log
|
16
|
+
include Sensu::Plugins::Prometheus::Checks
|
17
|
+
|
18
|
+
attr_reader :status, :output, :events
|
19
|
+
|
20
|
+
# Does basic configuration validation and start the object the methods
|
21
|
+
# on this class will consume.
|
22
|
+
def initialize(config)
|
23
|
+
raise 'Configuration is empty, abort!' \
|
24
|
+
if config.nil? || config.empty?
|
25
|
+
raise "Configuration does not specify 'config' section!" \
|
26
|
+
unless config.key?('config')
|
27
|
+
|
28
|
+
config['checks'] = [] unless config.key?('checks')
|
29
|
+
config['custom'] = [] unless config.key?('custom')
|
30
|
+
|
31
|
+
@config = config
|
32
|
+
@events = []
|
33
|
+
@status = 0
|
34
|
+
@output = ''
|
35
|
+
@source_nodename_map = nil
|
36
|
+
|
37
|
+
@prometheus = Sensu::Plugins::Prometheus::Client.new
|
38
|
+
@metrics = Sensu::Plugins::Prometheus::Metrics.new(@prometheus)
|
39
|
+
@tmpl = Sensu::Plugins::Prometheus::Checks::Output.new
|
40
|
+
@dispatcher = Sensu::Plugins::Events::Dispatcher.new
|
41
|
+
end
|
42
|
+
|
43
|
+
# Drives the evaluation of regular and custom checks, then calls for
|
44
|
+
# the final analysis on collected events.
|
45
|
+
def run
|
46
|
+
evaluate_checks if @config.key?('checks')
|
47
|
+
evaluate_custom if @config.key?('custom')
|
48
|
+
evaluate_and_dispatch_events
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
# Wrap around Metrics object and capture exception.
|
54
|
+
def collect_metrics(name, check_cfg)
|
55
|
+
metrics = []
|
56
|
+
begin
|
57
|
+
# invoke method name method on metric object
|
58
|
+
metrics = @metrics.send(name, check_cfg)
|
59
|
+
# :nocov:
|
60
|
+
rescue NoMethodError => e
|
61
|
+
log.error(
|
62
|
+
"Method '#{name}' is not present on Metrics object: '#{e}'"
|
63
|
+
)
|
64
|
+
end
|
65
|
+
# :nocov:
|
66
|
+
metrics
|
67
|
+
end
|
68
|
+
|
69
|
+
# Apply evaluation to all pre-defined checks.
|
70
|
+
def evaluate_checks
|
71
|
+
log.info("Evaluating Checks: '#{@config['checks'].length}'")
|
72
|
+
|
73
|
+
@config['checks'].each do |check|
|
74
|
+
check_name = check['check']
|
75
|
+
check_cfg = check['cfg']
|
76
|
+
|
77
|
+
collect_metrics(check_name, check_cfg).each do |metric|
|
78
|
+
status = 0
|
79
|
+
|
80
|
+
# on service it will come with "state_required" flag
|
81
|
+
if check_name == 'service'
|
82
|
+
# adding defaults in case they are not set
|
83
|
+
check_cfg = check_cfg.merge(
|
84
|
+
'state' => 'active',
|
85
|
+
'state_required' => 1
|
86
|
+
)
|
87
|
+
# giving a service hint by adding it's name
|
88
|
+
check_name = "service_#{check_cfg['name']}"
|
89
|
+
status = equals(metric['value'], check_cfg['state_required'])
|
90
|
+
else
|
91
|
+
# normal threshold evaluation
|
92
|
+
status = evaluate(
|
93
|
+
metric['value'],
|
94
|
+
check_cfg['warn'],
|
95
|
+
check_cfg['crit']
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
template_variables = metric
|
100
|
+
template_variables['cfg'] = check_cfg
|
101
|
+
|
102
|
+
append_event(
|
103
|
+
"check_#{check_name}",
|
104
|
+
@tmpl.render(check['check'], template_variables),
|
105
|
+
status,
|
106
|
+
metric['source']
|
107
|
+
)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Apply checks based on custom Prometheus queries and custom check
|
113
|
+
# config.
|
114
|
+
def evaluate_custom
|
115
|
+
log.info("Evaluating Custom: '#{@config['custom'].length}'")
|
116
|
+
|
117
|
+
@config['custom'].each do |custom|
|
118
|
+
# invoke "custom" method in metrics object
|
119
|
+
collect_metrics('custom', custom).each do |metric|
|
120
|
+
value = metric['value']
|
121
|
+
name = custom['name']
|
122
|
+
|
123
|
+
if custom.key?('check') && !custom['check']['type'].empty?
|
124
|
+
# calling local method to determine metric status
|
125
|
+
status = send(
|
126
|
+
custom['check']['type'],
|
127
|
+
value,
|
128
|
+
custom['check']['value']
|
129
|
+
)
|
130
|
+
elsif custom.key?('cfg')
|
131
|
+
# normal threshold evaluation
|
132
|
+
status = evaluate(
|
133
|
+
value,
|
134
|
+
custom['cfg']['warn'],
|
135
|
+
custom['cfg']['crit']
|
136
|
+
)
|
137
|
+
else
|
138
|
+
log.warn(
|
139
|
+
"Custom check does not have 'check' or 'cfg', can't be evaluated"
|
140
|
+
)
|
141
|
+
status = 3
|
142
|
+
end
|
143
|
+
|
144
|
+
log.debug("Custom Check: name='#{name}', value='#{value}'")
|
145
|
+
|
146
|
+
# making sure the custom check has the status defined
|
147
|
+
output = if custom['msg'].key?(status)
|
148
|
+
custom['msg'][status].to_s
|
149
|
+
else
|
150
|
+
'No output message defined for this check'
|
151
|
+
end
|
152
|
+
|
153
|
+
append_event(
|
154
|
+
"custom_#{name}",
|
155
|
+
output,
|
156
|
+
status,
|
157
|
+
metric['source']
|
158
|
+
)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# Classify and select whitelisted events to dispatch. Also prepares
|
164
|
+
# the final status and output message.
|
165
|
+
def evaluate_and_dispatch_events
|
166
|
+
non_successful_events = []
|
167
|
+
|
168
|
+
@events.reverse_each do |event|
|
169
|
+
# skipping events that are not whitelisted
|
170
|
+
if @config['config'].key?('whitelist') && event['source'] !~ /#{@config['config']['whitelist']}/
|
171
|
+
@events.delete(event)
|
172
|
+
log.debug(
|
173
|
+
"Skipping event! Source '#{event['source']}' does not " \
|
174
|
+
"match /#{@config['config']['whitelist']}/"
|
175
|
+
)
|
176
|
+
next
|
177
|
+
end
|
178
|
+
# removing source key to use local's sensu source name (hostname)
|
179
|
+
if @config.key?('config') && \
|
180
|
+
@config['config'].key?('use_default_source') && \
|
181
|
+
@config['config']['use_default_source']
|
182
|
+
log.debug("Removing 'source' from event, using Sensu's default")
|
183
|
+
event.delete('source')
|
184
|
+
end
|
185
|
+
# selecting the non-succesful events
|
186
|
+
non_successful_events << event if event['status'] != 0
|
187
|
+
# dispatching event to Sensu
|
188
|
+
@dispatcher.dispatch(event)
|
189
|
+
end
|
190
|
+
|
191
|
+
# setting up final status and output message
|
192
|
+
amount_checks = @config['checks'].length + @config['custom'].length
|
193
|
+
amount_events = @events.length
|
194
|
+
|
195
|
+
if non_successful_events.empty?
|
196
|
+
@status = 0
|
197
|
+
@output = \
|
198
|
+
"OK: Ran #{amount_checks} checks succesfully on #{amount_events} events!"
|
199
|
+
else
|
200
|
+
log.debug("#{non_successful_events.length} failed events")
|
201
|
+
@status = 1
|
202
|
+
non_successful_events.sort_by { |e| e['status'] } .reverse.each do |event|
|
203
|
+
@output << ' | ' unless @output.empty?
|
204
|
+
@output << "Source: #{event['source']}, " \
|
205
|
+
"Check: #{event['name']}, " \
|
206
|
+
"Output: #{event['output']}, " \
|
207
|
+
"Status: #{event['status']}"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
log.debug("Ran #{amount_checks}, and collected #{amount_events} events")
|
212
|
+
log.debug("Final Status: #{@status}")
|
213
|
+
log.debug("Final Output: #{@output}")
|
214
|
+
end
|
215
|
+
|
216
|
+
# Query Prometheus to discover the nodenames per instance, found on
|
217
|
+
# the last day, and sanitize query events into a hash, returned by
|
218
|
+
# this method.
|
219
|
+
def source_nodename_map
|
220
|
+
map = {}
|
221
|
+
@prometheus.query('max_over_time(node_uname_info[1d])').each do |result|
|
222
|
+
source = result['metric']['instance']
|
223
|
+
nodename = result['metric']['nodename'].split('.', 2)[0]
|
224
|
+
log.info("[node_exporter] instance: '#{source}', nodename: '#{nodename}'")
|
225
|
+
map[source] = nodename
|
226
|
+
end
|
227
|
+
log.warn('Unable to query the node_exporter intances from Prometheus') \
|
228
|
+
if map.empty?
|
229
|
+
map
|
230
|
+
end
|
231
|
+
|
232
|
+
# Remove chars that are not allowed in Sensu.
|
233
|
+
def sensu_safe(string)
|
234
|
+
string.gsub(/[^\w\.-]+/, '_')
|
235
|
+
end
|
236
|
+
|
237
|
+
# Append an event on the pool, making string safe for Sensu, checking
|
238
|
+
# "source" against "source_nodename_map" and composing "address" using
|
239
|
+
# configuration "domain" entry.
|
240
|
+
def append_event(name, output, status, source)
|
241
|
+
log.info(
|
242
|
+
"[#{status}] check: '#{name}', output: '#{output}', source: '#{source}'"
|
243
|
+
)
|
244
|
+
|
245
|
+
# let source-nodename mapping avialable
|
246
|
+
@source_nodename_map = source_nodename_map \
|
247
|
+
if @source_nodename_map.nil?
|
248
|
+
|
249
|
+
# translating node_exporter hostname into nodename plus domain
|
250
|
+
nodename = @source_nodename_map[source] || source
|
251
|
+
address = "#{nodename}.#{@config['config']['domain']}"
|
252
|
+
|
253
|
+
@events << {
|
254
|
+
'address' => sensu_safe(address),
|
255
|
+
'name' => sensu_safe(name),
|
256
|
+
'occurrences' => @config['config']['occurrences'] || 1,
|
257
|
+
'output' => output,
|
258
|
+
'reported_by' => @config['config']['reported_by'],
|
259
|
+
'status' => status,
|
260
|
+
'source' => sensu_safe(nodename)
|
261
|
+
}
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|