sensu-plugins-prometheus-checks 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'yaml'
4
+ require 'sensu/plugins/prometheus/checks/runner'
5
+
6
+ config_file = ARGV[0] || 'config.yml'
7
+ abort("Can't find configuration file at '#{config_file}'") \
8
+ unless File.exist?(config_file)
9
+
10
+ runner = nil
11
+
12
+ begin
13
+ runner = Sensu::Plugins::Prometheus::Checks::Runner.new(
14
+ YAML.load_file(config_file)
15
+ )
16
+ runner.run
17
+ rescue RuntimeError => e
18
+ puts "ERROR: #{e}"
19
+ exit(1)
20
+ end
21
+
22
+ puts("\n")
23
+ puts(runner.output)
24
+ exit(runner.status)
@@ -0,0 +1,53 @@
1
+ version: '2'
2
+
3
+ networks:
4
+ default:
5
+ external:
6
+ name: check_prometheus
7
+
8
+ services:
9
+ promtheues:
10
+ image: quay.io/prometheus/prometheus:v1.0.1
11
+ container_name: prometheus
12
+ volumes:
13
+ - ./spec/docker/prometheus/:/etc/prometheus/
14
+ command:
15
+ - '-config.file=/etc/prometheus/prometheus.yml'
16
+ - '-storage.local.path=/prometheus'
17
+ expose:
18
+ - 19090
19
+ ports:
20
+ - 19090:9090
21
+ node-exporter1:
22
+ image: prom/node-exporter:0.12.0
23
+ expose:
24
+ - 19100
25
+ ports:
26
+ - 19100:9100
27
+ volumes:
28
+ - ./spec/docker/node-exporter/node1/:/etc/node-exporter/
29
+ command:
30
+ - "-collectors.enabled=textfile"
31
+ - "-collector.textfile.directory=/etc/node-exporter/"
32
+ node-exporter2:
33
+ image: prom/node-exporter:0.12.0
34
+ expose:
35
+ - 19101
36
+ ports:
37
+ - 19101:9100
38
+ volumes:
39
+ - ./spec/docker/node-exporter/node2/:/etc/node-exporter/
40
+ command:
41
+ - "-collectors.enabled=textfile"
42
+ - "-collector.textfile.directory=/etc/node-exporter/"
43
+ node-exporter3:
44
+ image: prom/node-exporter:0.12.0
45
+ expose:
46
+ - 19102
47
+ ports:
48
+ - 19102:9100
49
+ volumes:
50
+ - ./spec/docker/node-exporter/node3/:/etc/node-exporter/
51
+ command:
52
+ - "-collectors.enabled=textfile"
53
+ - "-collector.textfile.directory=/etc/node-exporter/"
@@ -0,0 +1,52 @@
1
+ require 'json'
2
+ require 'socket'
3
+ require 'sensu/plugins/utils/log'
4
+
5
+ module Sensu
6
+ module Plugins
7
+ module Events
8
+ # Helper class to dispatch events into Sensu socket.
9
+ class Dispatcher
10
+ include Sensu::Plugins::Utils::Log
11
+
12
+ def initialize
13
+ @sensu_address = '127.0.0.1'
14
+ @sensu_port = 3030
15
+ read_env_address_and_port
16
+
17
+ log.debug("Sensu at '#{@sensu_address}':'#{@sensu_port}'")
18
+ end
19
+
20
+ # Send accumulated events into Sensu's socket, unless environment
21
+ # variable PROM_DEBUG is set.
22
+ def dispatch(event)
23
+ if ENV.key?('PROM_DEBUG')
24
+ log.debug("PROM_DEBUG set, not dispatching event to Sensu: #{event}")
25
+ return
26
+ end
27
+
28
+ # :nocov:
29
+ begin
30
+ s = TCPSocket.open(@sensu_address, @sensu_port)
31
+ s.puts(JSON.generate(event))
32
+ s.close
33
+ rescue SystemCallError => e
34
+ log.error("Sensu is refusing connections! Error: '#{e}'")
35
+ raise("Sensu is not avilable at '#{@sensu_address}:#{@sensu_port}'")
36
+ end
37
+ # :nocov:
38
+ end
39
+
40
+ private
41
+
42
+ # Read Sensu address and port from environment.
43
+ def read_env_address_and_port
44
+ @sensu_address = ENV['SENSU_SOCKET_ADDRESS'] \
45
+ if ENV.key?('SENSU_SOCKET_ADDRESS')
46
+ @sensu_port = ENV['SENSU_SOCKET_PORT'].to_i \
47
+ if ENV.key?('SENSU_SOCKET_PORT')
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,49 @@
1
+ module Sensu
2
+ module Plugins
3
+ module Prometheus
4
+ # Static methods to help on a given check evaluation. This module is
5
+ # designed to be included where the check evaluation will happen.
6
+ module Checks
7
+ # Given current result, warning and critical levels, it will return a
8
+ # integer with the current level, zero is success.
9
+ def evaluate(result, warn, crit)
10
+ result = result.to_f
11
+ warn = warn.to_f
12
+ crit = crit.to_f
13
+
14
+ status = 3
15
+ if result < warn
16
+ status = 0
17
+ elsif result >= crit
18
+ status = 2
19
+ elsif result >= warn
20
+ status = 1
21
+ end
22
+
23
+ status
24
+ end
25
+
26
+ # Return zero if result and value are the same.
27
+ def equals(result, value)
28
+ status = 2
29
+ status = 0 if result.to_f == value.to_f
30
+ status
31
+ end
32
+
33
+ # Return zero if result is below value.
34
+ def below(result, value)
35
+ status = 2
36
+ status = 0 if result.to_f < value.to_f
37
+ status
38
+ end
39
+
40
+ # Return zero if result is above value.
41
+ def above(result, value)
42
+ status = 2
43
+ status = 0 if result.to_f > value.to_f
44
+ status
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,21 @@
1
+ module Sensu
2
+ module Plugins
3
+ module Prometheus
4
+ module Checks
5
+ # Helper to transform a hash into local methods.
6
+ class Namespace
7
+ def initialize(hash)
8
+ hash.each do |key, value|
9
+ singleton_class.send(:define_method, key) { value }
10
+ end
11
+ end
12
+
13
+ # Wrap around `binding` method.
14
+ def namespace_binding
15
+ binding
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,47 @@
1
+ require 'erb'
2
+ require 'ostruct'
3
+
4
+ require 'sensu/plugins/prometheus/checks/namespace'
5
+
6
+ module Sensu
7
+ module Plugins
8
+ module Prometheus
9
+ module Checks
10
+ # Handles the final output of checks, creates a custom message based on
11
+ # template and informed variables.
12
+ class Output
13
+ TEMPLATES = {
14
+ 'disk' => \
15
+ "Disk: <%= value %>%, Mountpoint: <%= cfg['mount'] %> |disk=<%= value %>",
16
+ 'disk_all' => \
17
+ "Disk: <%= cfg['mount'] %>, Inode Usage: <%= value %>% |inodes=<%= value %>",
18
+ 'load_per_cpu' => \
19
+ 'Load: <%= value %>|load=<%= value %>',
20
+ 'load_per_cluster' => \
21
+ 'Cluster Load: <%= value %> |load=<%= value %>',
22
+ 'load_per_cluster_minus_n' => \
23
+ 'Cluster Load: <%= value %> |load=<%= value %>',
24
+ 'inode' => \
25
+ "Disk: <%= cfg['mount'] %>, Inodes: <%= value %>% |inodes=<%= value %>",
26
+ 'memory' => \
27
+ 'Memory <%= value %>% |memory=<%= value %>',
28
+ 'memory_per_cluster' => \
29
+ 'Cluster Memory: <%= value %>% |memory=<%= value %>"',
30
+ 'predict_disk_all' => \
31
+ "Disks to run out of space in the next <%= cfg['days'] %> days: <%= value %>",
32
+ 'service' => \
33
+ "Service: <%= cfg['name'] %> (<%= cfg['state'] %>=<%= value %>)"
34
+ }.freeze
35
+
36
+ def render(template_name, vars)
37
+ template_name = template_name.to_s
38
+ raise "Can't find template for '#{template_name}'" \
39
+ if !TEMPLATES.key?(template_name) || TEMPLATES[template_name].empty?
40
+ ns = Sensu::Plugins::Prometheus::Checks::Namespace.new(vars)
41
+ ERB.new(TEMPLATES[template_name]).result(ns.namespace_binding)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,267 @@
1
+ require 'sensu/plugins/utils/log'
2
+ require 'sensu/plugins/events/dispatcher'
3
+ require 'sensu/plugins/prometheus/client'
4
+ require 'sensu/plugins/prometheus/metrics'
5
+ require 'sensu/plugins/prometheus/checks'
6
+ require 'sensu/plugins/prometheus/checks/output'
7
+
8
+ module Sensu
9
+ module Plugins
10
+ module Prometheus
11
+ module Checks
12
+ # Execute the configured checks, evaluate the results and set a final
13
+ # output and status.
14
+ class Runner
15
+ include Sensu::Plugins::Utils::Log
16
+ include Sensu::Plugins::Prometheus::Checks
17
+
18
+ attr_reader :status, :output, :events
19
+
20
+ # Does basic configuration validation and start the object the methods
21
+ # on this class will consume.
22
+ def initialize(config)
23
+ raise 'Configuration is empty, abort!' \
24
+ if config.nil? || config.empty?
25
+ raise "Configuration does not specify 'config' section!" \
26
+ unless config.key?('config')
27
+
28
+ config['checks'] = [] unless config.key?('checks')
29
+ config['custom'] = [] unless config.key?('custom')
30
+
31
+ @config = config
32
+ @events = []
33
+ @status = 0
34
+ @output = ''
35
+ @source_nodename_map = nil
36
+
37
+ @prometheus = Sensu::Plugins::Prometheus::Client.new
38
+ @metrics = Sensu::Plugins::Prometheus::Metrics.new(@prometheus)
39
+ @tmpl = Sensu::Plugins::Prometheus::Checks::Output.new
40
+ @dispatcher = Sensu::Plugins::Events::Dispatcher.new
41
+ end
42
+
43
+ # Drives the evaluation of regular and custom checks, then calls for
44
+ # the final analysis on collected events.
45
+ def run
46
+ evaluate_checks if @config.key?('checks')
47
+ evaluate_custom if @config.key?('custom')
48
+ evaluate_and_dispatch_events
49
+ end
50
+
51
+ private
52
+
53
+ # Wrap around Metrics object and capture exception.
54
+ def collect_metrics(name, check_cfg)
55
+ metrics = []
56
+ begin
57
+ # invoke method name method on metric object
58
+ metrics = @metrics.send(name, check_cfg)
59
+ # :nocov:
60
+ rescue NoMethodError => e
61
+ log.error(
62
+ "Method '#{name}' is not present on Metrics object: '#{e}'"
63
+ )
64
+ end
65
+ # :nocov:
66
+ metrics
67
+ end
68
+
69
+ # Apply evaluation to all pre-defined checks.
70
+ def evaluate_checks
71
+ log.info("Evaluating Checks: '#{@config['checks'].length}'")
72
+
73
+ @config['checks'].each do |check|
74
+ check_name = check['check']
75
+ check_cfg = check['cfg']
76
+
77
+ collect_metrics(check_name, check_cfg).each do |metric|
78
+ status = 0
79
+
80
+ # on service it will come with "state_required" flag
81
+ if check_name == 'service'
82
+ # adding defaults in case they are not set
83
+ check_cfg = check_cfg.merge(
84
+ 'state' => 'active',
85
+ 'state_required' => 1
86
+ )
87
+ # giving a service hint by adding it's name
88
+ check_name = "service_#{check_cfg['name']}"
89
+ status = equals(metric['value'], check_cfg['state_required'])
90
+ else
91
+ # normal threshold evaluation
92
+ status = evaluate(
93
+ metric['value'],
94
+ check_cfg['warn'],
95
+ check_cfg['crit']
96
+ )
97
+ end
98
+
99
+ template_variables = metric
100
+ template_variables['cfg'] = check_cfg
101
+
102
+ append_event(
103
+ "check_#{check_name}",
104
+ @tmpl.render(check['check'], template_variables),
105
+ status,
106
+ metric['source']
107
+ )
108
+ end
109
+ end
110
+ end
111
+
112
+ # Apply checks based on custom Prometheus queries and custom check
113
+ # config.
114
+ def evaluate_custom
115
+ log.info("Evaluating Custom: '#{@config['custom'].length}'")
116
+
117
+ @config['custom'].each do |custom|
118
+ # invoke "custom" method in metrics object
119
+ collect_metrics('custom', custom).each do |metric|
120
+ value = metric['value']
121
+ name = custom['name']
122
+
123
+ if custom.key?('check') && !custom['check']['type'].empty?
124
+ # calling local method to determine metric status
125
+ status = send(
126
+ custom['check']['type'],
127
+ value,
128
+ custom['check']['value']
129
+ )
130
+ elsif custom.key?('cfg')
131
+ # normal threshold evaluation
132
+ status = evaluate(
133
+ value,
134
+ custom['cfg']['warn'],
135
+ custom['cfg']['crit']
136
+ )
137
+ else
138
+ log.warn(
139
+ "Custom check does not have 'check' or 'cfg', can't be evaluated"
140
+ )
141
+ status = 3
142
+ end
143
+
144
+ log.debug("Custom Check: name='#{name}', value='#{value}'")
145
+
146
+ # making sure the custom check has the status defined
147
+ output = if custom['msg'].key?(status)
148
+ custom['msg'][status].to_s
149
+ else
150
+ 'No output message defined for this check'
151
+ end
152
+
153
+ append_event(
154
+ "custom_#{name}",
155
+ output,
156
+ status,
157
+ metric['source']
158
+ )
159
+ end
160
+ end
161
+ end
162
+
163
+ # Classify and select whitelisted events to dispatch. Also prepares
164
+ # the final status and output message.
165
+ def evaluate_and_dispatch_events
166
+ non_successful_events = []
167
+
168
+ @events.reverse_each do |event|
169
+ # skipping events that are not whitelisted
170
+ if @config['config'].key?('whitelist') && event['source'] !~ /#{@config['config']['whitelist']}/
171
+ @events.delete(event)
172
+ log.debug(
173
+ "Skipping event! Source '#{event['source']}' does not " \
174
+ "match /#{@config['config']['whitelist']}/"
175
+ )
176
+ next
177
+ end
178
+ # removing source key to use local's sensu source name (hostname)
179
+ if @config.key?('config') && \
180
+ @config['config'].key?('use_default_source') && \
181
+ @config['config']['use_default_source']
182
+ log.debug("Removing 'source' from event, using Sensu's default")
183
+ event.delete('source')
184
+ end
185
+ # selecting the non-succesful events
186
+ non_successful_events << event if event['status'] != 0
187
+ # dispatching event to Sensu
188
+ @dispatcher.dispatch(event)
189
+ end
190
+
191
+ # setting up final status and output message
192
+ amount_checks = @config['checks'].length + @config['custom'].length
193
+ amount_events = @events.length
194
+
195
+ if non_successful_events.empty?
196
+ @status = 0
197
+ @output = \
198
+ "OK: Ran #{amount_checks} checks succesfully on #{amount_events} events!"
199
+ else
200
+ log.debug("#{non_successful_events.length} failed events")
201
+ @status = 1
202
+ non_successful_events.sort_by { |e| e['status'] } .reverse.each do |event|
203
+ @output << ' | ' unless @output.empty?
204
+ @output << "Source: #{event['source']}, " \
205
+ "Check: #{event['name']}, " \
206
+ "Output: #{event['output']}, " \
207
+ "Status: #{event['status']}"
208
+ end
209
+ end
210
+
211
+ log.debug("Ran #{amount_checks}, and collected #{amount_events} events")
212
+ log.debug("Final Status: #{@status}")
213
+ log.debug("Final Output: #{@output}")
214
+ end
215
+
216
+ # Query Prometheus to discover the nodenames per instance, found on
217
+ # the last day, and sanitize query events into a hash, returned by
218
+ # this method.
219
+ def source_nodename_map
220
+ map = {}
221
+ @prometheus.query('max_over_time(node_uname_info[1d])').each do |result|
222
+ source = result['metric']['instance']
223
+ nodename = result['metric']['nodename'].split('.', 2)[0]
224
+ log.info("[node_exporter] instance: '#{source}', nodename: '#{nodename}'")
225
+ map[source] = nodename
226
+ end
227
+ log.warn('Unable to query the node_exporter intances from Prometheus') \
228
+ if map.empty?
229
+ map
230
+ end
231
+
232
+ # Remove chars that are not allowed in Sensu.
233
+ def sensu_safe(string)
234
+ string.gsub(/[^\w\.-]+/, '_')
235
+ end
236
+
237
+ # Append an event on the pool, making string safe for Sensu, checking
238
+ # "source" against "source_nodename_map" and composing "address" using
239
+ # configuration "domain" entry.
240
+ def append_event(name, output, status, source)
241
+ log.info(
242
+ "[#{status}] check: '#{name}', output: '#{output}', source: '#{source}'"
243
+ )
244
+
245
+ # let source-nodename mapping avialable
246
+ @source_nodename_map = source_nodename_map \
247
+ if @source_nodename_map.nil?
248
+
249
+ # translating node_exporter hostname into nodename plus domain
250
+ nodename = @source_nodename_map[source] || source
251
+ address = "#{nodename}.#{@config['config']['domain']}"
252
+
253
+ @events << {
254
+ 'address' => sensu_safe(address),
255
+ 'name' => sensu_safe(name),
256
+ 'occurrences' => @config['config']['occurrences'] || 1,
257
+ 'output' => output,
258
+ 'reported_by' => @config['config']['reported_by'],
259
+ 'status' => status,
260
+ 'source' => sensu_safe(nodename)
261
+ }
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end