interferon 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/bin/interferon +6 -12
- data/lib/interferon.rb +61 -113
- data/lib/interferon/destinations/datadog.rb +34 -29
- data/lib/interferon/group_sources/filesystem.rb +4 -4
- data/lib/interferon/loaders.rb +6 -4
- data/lib/interferon/version.rb +1 -1
- data/spec/lib/interferon/destinations/datadog_spec.rb +2 -10
- data/spec/lib/interferon_spec.rb +68 -51
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e06dab2517dc06edf86da60cb55249bf63d031d
|
4
|
+
data.tar.gz: e370782305dda94dca3215c57dfc8bceb633ed77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bef7463ead0dc65ab0c2e8018feb13a85a419ecc02e2403e9ee15150cc29089d8d471609ae9c7ae421c0f6c112fd39f5746b9cff7288c43535ffb32cf4c88d2d
|
7
|
+
data.tar.gz: f8b07e1239591b50e07facca30186412c74eda5b19bd4d128b634bf1a8c477b449aaa7894ff6fc3181ab781f1011901778e37a663f6d337a56abc21dc9bd8462
|
data/README.md
CHANGED
@@ -29,6 +29,7 @@ It accepts the following parameters:
|
|
29
29
|
* `group_sources` -- a list of sources which can return groups of people to alert
|
30
30
|
* `host_sources` -- a list of sources which can read inventory systems and return lists of hosts to monitor
|
31
31
|
* `destinations` -- a list of alerting providers, which can monitor metrics and dispatch alerts as specified in your alerts dsl files
|
32
|
+
* `processes` -- number of processes to run the alert generation on (optional; default is to use all available cores)
|
32
33
|
|
33
34
|
For more information, see [config.example.yaml](config.example.yaml) file in this repo.
|
34
35
|
|
data/bin/interferon
CHANGED
@@ -10,11 +10,11 @@ options = {}
|
|
10
10
|
optparse = OptionParser.new do |opts|
|
11
11
|
opts.banner = %(Usage: interferon --config /path/to/interferon/config)
|
12
12
|
|
13
|
-
opts.on('-c
|
13
|
+
opts.on('-c', '--config config', String, 'Path to interferon config') do |key|
|
14
14
|
options[:config] = key
|
15
15
|
end
|
16
16
|
|
17
|
-
opts.on('-n', '--dry-run', "Don
|
17
|
+
opts.on('-n', '--dry-run', "Don't update alert destinations") do
|
18
18
|
options[:dry_run] = true
|
19
19
|
end
|
20
20
|
|
@@ -26,7 +26,7 @@ end
|
|
26
26
|
|
27
27
|
def parseconfig(filename)
|
28
28
|
begin
|
29
|
-
|
29
|
+
config = YAML.parse(File.read(filename))
|
30
30
|
rescue Errno::ENOENT => e
|
31
31
|
raise ArgumentError, "config file does not exist:\n#{e.inspect}"
|
32
32
|
rescue Errno::EACCES => e
|
@@ -34,7 +34,7 @@ def parseconfig(filename)
|
|
34
34
|
rescue YAML::SyntaxError => e
|
35
35
|
raise "config file #{filename} contains invalid YAML:\n#{e.inspect}"
|
36
36
|
end
|
37
|
-
|
37
|
+
config.to_ruby
|
38
38
|
end
|
39
39
|
|
40
40
|
# parse command line arguments
|
@@ -55,13 +55,7 @@ end
|
|
55
55
|
|
56
56
|
ENV['DEBUG'] = '1' if config['verbose_logging']
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
config['group_sources'] || {},
|
61
|
-
config['host_sources'],
|
62
|
-
config['destinations']
|
63
|
-
)
|
64
|
-
|
65
|
-
a.run(options[:dry_run])
|
58
|
+
interferon = Interferon::Interferon.new(config, options[:dry_run])
|
59
|
+
interferon.run
|
66
60
|
|
67
61
|
puts 'interferon signaling complete!'
|
data/lib/interferon.rb
CHANGED
@@ -23,28 +23,26 @@ module Interferon
|
|
23
23
|
# groups_sources is a hash from type => options for each group source
|
24
24
|
# host_sources is a hash from type => options for each host source
|
25
25
|
# destinations is a similar hash from type => options for each alerter
|
26
|
-
def initialize(
|
27
|
-
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
26
|
+
def initialize(config, dry_run = false)
|
27
|
+
@alerts_repo_path = config['alerts_repo_path']
|
28
|
+
@group_sources = config['group_sources'] || {}
|
29
|
+
@host_sources = config['host_sources']
|
30
|
+
@destinations = config['destinations']
|
31
|
+
@processes = config['processes']
|
32
32
|
@dry_run = dry_run
|
33
|
-
@processes = processes
|
34
33
|
@request_shutdown = false
|
35
34
|
end
|
36
35
|
|
37
|
-
def run
|
36
|
+
def run
|
38
37
|
Signal.trap('TERM') do
|
39
|
-
log.info
|
38
|
+
log.info('SIGTERM received. shutting down gracefully...')
|
40
39
|
@request_shutdown = true
|
41
40
|
end
|
42
|
-
@dry_run = dry_run
|
43
41
|
run_desc = @dry_run ? 'dry run' : 'run'
|
44
|
-
log.info
|
42
|
+
log.info("beginning alerts #{run_desc}")
|
45
43
|
|
46
44
|
alerts = read_alerts
|
47
|
-
groups = read_groups(@
|
45
|
+
groups = read_groups(@group_sources)
|
48
46
|
hosts = read_hosts(@host_sources)
|
49
47
|
|
50
48
|
@destinations.each do |dest|
|
@@ -55,9 +53,9 @@ module Interferon
|
|
55
53
|
update_alerts(@destinations, hosts, alerts, groups)
|
56
54
|
|
57
55
|
if @request_shutdown
|
58
|
-
log.info
|
56
|
+
log.info("interferon #{run_desc} shut down by SIGTERM")
|
59
57
|
else
|
60
|
-
log.info
|
58
|
+
log.info("interferon #{run_desc} complete")
|
61
59
|
end
|
62
60
|
end
|
63
61
|
|
@@ -75,14 +73,14 @@ module Interferon
|
|
75
73
|
begin
|
76
74
|
alert = Alert.new(alert_file)
|
77
75
|
rescue StandardError => e
|
78
|
-
log.warn
|
76
|
+
log.warn("error reading alert file #{alert_file}: #{e}")
|
79
77
|
failed += 1
|
80
78
|
else
|
81
79
|
alerts << alert
|
82
80
|
end
|
83
81
|
end
|
84
82
|
|
85
|
-
log.info
|
83
|
+
log.info("read #{alerts.count} alerts files from #{path}")
|
86
84
|
|
87
85
|
statsd.gauge('alerts.read.count', alerts.count)
|
88
86
|
statsd.gauge('alerts.read.failed', failed)
|
@@ -106,12 +104,16 @@ module Interferon
|
|
106
104
|
people_count += people.count
|
107
105
|
end
|
108
106
|
|
109
|
-
log.info
|
110
|
-
|
107
|
+
log.info(
|
108
|
+
"read #{people_count} people in #{source_groups.count} groups " \
|
109
|
+
"from source #{source.class.name}"
|
110
|
+
)
|
111
111
|
end
|
112
112
|
|
113
|
-
log.info
|
114
|
-
|
113
|
+
log.info(
|
114
|
+
"total of #{groups.values.flatten.count} people in #{groups.count} groups " \
|
115
|
+
"from #{sources.count} sources"
|
116
|
+
)
|
115
117
|
|
116
118
|
statsd.gauge('groups.sources', sources.count)
|
117
119
|
statsd.gauge('groups.count', groups.count)
|
@@ -131,36 +133,37 @@ module Interferon
|
|
131
133
|
hosts << source_hosts
|
132
134
|
|
133
135
|
statsd.gauge('hosts.count', source_hosts.count, tags: ["source:#{source.class.name}"])
|
134
|
-
log.info
|
136
|
+
log.info("read #{source_hosts.count} hosts from source #{source.class.name}")
|
135
137
|
end
|
136
138
|
|
137
139
|
hosts.flatten!
|
138
|
-
log.info
|
140
|
+
log.info("total of #{hosts.count} entities from #{sources.count} sources")
|
139
141
|
|
140
142
|
hosts
|
141
143
|
end
|
142
144
|
|
143
145
|
def update_alerts(destinations, hosts, alerts, groups)
|
146
|
+
alerts_queue, alert_errors = build_alerts_queue(hosts, alerts, groups)
|
147
|
+
if @dry_run && !alert_errors.empty?
|
148
|
+
raise "Alerts failed to apply or evaluate for all hosts: #{alerts.map(&:to_s).join(', ')}"
|
149
|
+
end
|
150
|
+
|
144
151
|
loader = DestinationsLoader.new([@alerts_repo_path])
|
145
152
|
loader.get_all(destinations).each do |dest|
|
146
153
|
break if @request_shutdown
|
147
|
-
log.info
|
148
|
-
update_alerts_on_destination(dest,
|
154
|
+
log.info("updating alerts on #{dest.class.name}")
|
155
|
+
update_alerts_on_destination(dest, alerts_queue)
|
149
156
|
end
|
150
157
|
end
|
151
158
|
|
152
|
-
def update_alerts_on_destination(dest,
|
159
|
+
def update_alerts_on_destination(dest, alerts_queue)
|
153
160
|
# track some counters/stats per destination
|
154
161
|
start_time = Time.new.to_f
|
155
162
|
|
156
163
|
# get already-defined alerts
|
157
164
|
existing_alerts = dest.existing_alerts
|
158
165
|
|
159
|
-
|
160
|
-
do_dry_run_update(dest, hosts, alerts, existing_alerts, groups)
|
161
|
-
else
|
162
|
-
do_regular_update(dest, hosts, alerts, existing_alerts, groups)
|
163
|
-
end
|
166
|
+
run_update(dest, alerts_queue, existing_alerts)
|
164
167
|
|
165
168
|
unless @request_shutdown
|
166
169
|
# run time summary
|
@@ -170,7 +173,7 @@ module Interferon
|
|
170
173
|
run_time,
|
171
174
|
tags: ["destination:#{dest.class.name}"]
|
172
175
|
)
|
173
|
-
log.info
|
176
|
+
log.info("#{dest.class.name} : run completed in %.2f seconds" % run_time)
|
174
177
|
|
175
178
|
# report destination stats
|
176
179
|
dest.report_stats
|
@@ -179,73 +182,7 @@ module Interferon
|
|
179
182
|
raise dest.api_errors.to_s if @dry_run && !dest.api_errors.empty?
|
180
183
|
end
|
181
184
|
|
182
|
-
def
|
183
|
-
# Track these to clean up dry-run alerts from previous runs
|
184
|
-
existing_dry_run_alerts = []
|
185
|
-
existing_alerts.each do |name, alert|
|
186
|
-
if name.start_with?(DRY_RUN_ALERTS_NAME_PREFIX)
|
187
|
-
existing_dry_run_alerts << [alert['name'], [alert['id']]]
|
188
|
-
existing_alerts.delete(name)
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
alerts_queue = build_alerts_queue(hosts, alerts, groups)
|
193
|
-
updates_queue = alerts_queue.reject do |_name, alert_people_pair|
|
194
|
-
!dest.need_update(alert_people_pair, existing_alerts)
|
195
|
-
end
|
196
|
-
|
197
|
-
# Add dry-run prefix to alerts and delete id to avoid impacting real alerts
|
198
|
-
existing_alerts.keys.each do |name|
|
199
|
-
existing_alert = existing_alerts[name]
|
200
|
-
dry_run_alert_name = DRY_RUN_ALERTS_NAME_PREFIX + name
|
201
|
-
existing_alert['name'] = dry_run_alert_name
|
202
|
-
existing_alert['id'] = [nil]
|
203
|
-
existing_alerts[dry_run_alert_name] = existing_alerts.delete(name)
|
204
|
-
end
|
205
|
-
|
206
|
-
# Build new queue with dry-run prefixes and ensure they are silenced
|
207
|
-
alerts_queue.each do |_name, alert_people_pair|
|
208
|
-
alert, _people = alert_people_pair
|
209
|
-
dry_run_alert_name = DRY_RUN_ALERTS_NAME_PREFIX + alert['name']
|
210
|
-
alert.change_name(dry_run_alert_name)
|
211
|
-
alert.silence
|
212
|
-
end
|
213
|
-
|
214
|
-
# Create alerts in destination
|
215
|
-
created_alerts = create_alerts(dest, updates_queue)
|
216
|
-
|
217
|
-
# Existing alerts are pruned until all that remains are
|
218
|
-
# alerts that aren't being generated anymore
|
219
|
-
to_remove = existing_alerts.dup
|
220
|
-
alerts_queue.each do |_name, alert_people_pair|
|
221
|
-
alert, _people = alert_people_pair
|
222
|
-
old_alerts = to_remove[alert['name']]
|
223
|
-
|
224
|
-
next if old_alerts.nil?
|
225
|
-
if old_alerts['id'].length == 1
|
226
|
-
to_remove.delete(alert['name'])
|
227
|
-
else
|
228
|
-
old_alerts['id'] = old_alerts['id'].drop(1)
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
# Clean up alerts not longer being generated
|
233
|
-
to_remove.each do |_name, alert|
|
234
|
-
break if @request_shutdown
|
235
|
-
dest.remove_alert(alert)
|
236
|
-
end
|
237
|
-
|
238
|
-
# Clean up dry-run created alerts
|
239
|
-
(created_alerts + existing_dry_run_alerts).each do |alert_id_pair|
|
240
|
-
alert_ids = alert_id_pair[1]
|
241
|
-
alert_ids.each do |alert_id|
|
242
|
-
dest.remove_alert_by_id(alert_id)
|
243
|
-
end
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
def do_regular_update(dest, hosts, alerts, existing_alerts, groups)
|
248
|
-
alerts_queue = build_alerts_queue(hosts, alerts, groups)
|
185
|
+
def run_update(dest, alerts_queue, existing_alerts)
|
249
186
|
updates_queue = alerts_queue.reject do |_name, alert_people_pair|
|
250
187
|
!dest.need_update(alert_people_pair, existing_alerts)
|
251
188
|
end
|
@@ -253,6 +190,9 @@ module Interferon
|
|
253
190
|
# Create alerts in destination
|
254
191
|
create_alerts(dest, updates_queue)
|
255
192
|
|
193
|
+
# Do not continue to remove alerts during dry-run
|
194
|
+
return if @dry_run
|
195
|
+
|
256
196
|
# Existing alerts are pruned until all that remains are
|
257
197
|
# alerts that aren't being generated anymore
|
258
198
|
to_remove = existing_alerts.dup
|
@@ -281,12 +221,12 @@ module Interferon
|
|
281
221
|
concurrency = dest.concurrency || 10
|
282
222
|
unless @request_shutdown
|
283
223
|
threads = Array.new(concurrency) do |i|
|
284
|
-
log.info
|
224
|
+
log.info("thread #{i} created")
|
285
225
|
t = Thread.new do
|
286
226
|
while (name = alerts_to_create.shift)
|
287
227
|
break if @request_shutdown
|
288
228
|
cur_alert, people = alerts_queue[name]
|
289
|
-
log.debug
|
229
|
+
log.debug("creating alert for #{cur_alert[:name]}")
|
290
230
|
alert_key_ids << dest.create_alert(cur_alert, people)
|
291
231
|
end
|
292
232
|
end
|
@@ -300,16 +240,20 @@ module Interferon
|
|
300
240
|
|
301
241
|
def build_alerts_queue(hosts, alerts, groups)
|
302
242
|
alerts_queue = {}
|
243
|
+
all_alert_generation_errors = []
|
244
|
+
|
303
245
|
# create or update alerts; mark when we've done that
|
304
246
|
result = Parallel.map(alerts, in_processes: @processes) do |alert|
|
305
247
|
break if @request_shutdown
|
306
248
|
alerts_generated = {}
|
249
|
+
alert_generation_errors = []
|
307
250
|
counters = {
|
308
251
|
errors: 0,
|
309
252
|
evals: 0,
|
310
253
|
applies: 0,
|
311
254
|
hosts: hosts.length,
|
312
255
|
}
|
256
|
+
|
313
257
|
last_eval_error = nil
|
314
258
|
|
315
259
|
hosts.each do |hostinfo|
|
@@ -317,7 +261,7 @@ module Interferon
|
|
317
261
|
alert.evaluate(hostinfo)
|
318
262
|
counters[:evals] += 1
|
319
263
|
rescue StandardError => e
|
320
|
-
log.debug
|
264
|
+
log.debug("Evaluation of alert #{alert} failed in the context of host #{hostinfo}")
|
321
265
|
counters[:errors] += 1
|
322
266
|
last_eval_error = e
|
323
267
|
next
|
@@ -325,7 +269,7 @@ module Interferon
|
|
325
269
|
|
326
270
|
# don't define an alert that doesn't apply to this hostinfo
|
327
271
|
unless alert[:applies]
|
328
|
-
log.debug
|
272
|
+
log.debug("alert #{alert[:name]} doesn't apply to #{hostinfo.inspect}")
|
329
273
|
next
|
330
274
|
end
|
331
275
|
|
@@ -348,17 +292,19 @@ module Interferon
|
|
348
292
|
statsd.gauge('alerts.evaluate.applies', counters[:applies], tags: ["alert:#{alert}"])
|
349
293
|
|
350
294
|
if counters[:applies] > 0
|
351
|
-
log.info
|
295
|
+
log.info("alert #{alert} applies to #{counters[:applies]} of #{counters[:hosts]} hosts")
|
352
296
|
end
|
353
297
|
|
354
298
|
# did the alert fail to evaluate on all hosts?
|
355
299
|
if counters[:errors] == counters[:hosts] && !last_eval_error.nil?
|
356
|
-
log.error
|
357
|
-
log.error
|
358
|
-
|
300
|
+
log.error("alert #{alert} failed to evaluate in the context of all hosts!")
|
301
|
+
log.error("last error on alert #{alert}: #{last_eval_error}")
|
359
302
|
statsd.gauge('alerts.evaluate.failed_on_all', 1, tags: ["alert:#{alert}"])
|
360
|
-
log.debug
|
361
|
-
|
303
|
+
log.debug(
|
304
|
+
"alert #{alert}: " \
|
305
|
+
"error #{last_eval_error}\n#{last_eval_error.backtrace.join("\n")}"
|
306
|
+
)
|
307
|
+
alert_generation_errors << alert
|
362
308
|
else
|
363
309
|
statsd.gauge('alerts.evaluate.failed_on_all', 0, tags: ["alert:#{alert}"])
|
364
310
|
end
|
@@ -366,17 +312,19 @@ module Interferon
|
|
366
312
|
# did the alert apply to any hosts?
|
367
313
|
if counters[:applies] == 0
|
368
314
|
statsd.gauge('alerts.evaluate.never_applies', 1, tags: ["alert:#{alert}"])
|
369
|
-
log.warn
|
315
|
+
log.warn("alert #{alert} did not apply to any hosts")
|
316
|
+
alert_generation_errors << alert
|
370
317
|
else
|
371
318
|
statsd.gauge('alerts.evaluate.never_applies', 0, tags: ["alert:#{alert}"])
|
372
319
|
end
|
373
|
-
alerts_generated
|
320
|
+
[alerts_generated, alert_generation_errors]
|
374
321
|
end
|
375
322
|
|
376
|
-
result.each do |
|
377
|
-
alerts_queue.merge!
|
323
|
+
result.each do |generated_alerts, alert_generation_errors|
|
324
|
+
alerts_queue.merge!(generated_alerts)
|
325
|
+
all_alert_generation_errors += alert_generation_errors
|
378
326
|
end
|
379
|
-
alerts_queue
|
327
|
+
[alerts_queue, all_alert_generation_errors]
|
380
328
|
end
|
381
329
|
end
|
382
330
|
end
|
@@ -119,10 +119,10 @@ module Interferon::Destinations
|
|
119
119
|
@stats[:manually_created_alerts] = \
|
120
120
|
@existing_alerts.reject { |_n, a| a['message'].include?(ALERT_KEY) }.length
|
121
121
|
|
122
|
-
log.info
|
123
|
-
@existing_alerts.length
|
124
|
-
@stats[:manually_created_alerts]
|
125
|
-
|
122
|
+
log.info(
|
123
|
+
"datadog: found #{@existing_alerts.length} existing alerts; " \
|
124
|
+
"#{@stats[:manually_created_alerts]} were manually created"
|
125
|
+
)
|
126
126
|
end
|
127
127
|
|
128
128
|
@existing_alerts
|
@@ -197,13 +197,25 @@ Options:
|
|
197
197
|
EOM
|
198
198
|
log.info("creating new alert #{alert['name']}: #{new_alert_text}")
|
199
199
|
|
200
|
-
|
201
|
-
alert['monitor_type'],
|
202
|
-
datadog_query,
|
200
|
+
monitor_options = {
|
203
201
|
name: alert['name'],
|
204
|
-
message:
|
205
|
-
options: alert_options
|
206
|
-
|
202
|
+
message: message,
|
203
|
+
options: alert_options,
|
204
|
+
}
|
205
|
+
|
206
|
+
if @dry_run
|
207
|
+
@dog.validate_monitor(
|
208
|
+
alert['monitor_type'],
|
209
|
+
datadog_query,
|
210
|
+
monitor_options
|
211
|
+
)
|
212
|
+
else
|
213
|
+
@dog.monitor(
|
214
|
+
alert['monitor_type'],
|
215
|
+
datadog_query,
|
216
|
+
monitor_options
|
217
|
+
)
|
218
|
+
end
|
207
219
|
end
|
208
220
|
|
209
221
|
def update_datadog_alert(alert, datadog_query, message, alert_options, existing_alert)
|
@@ -229,21 +241,23 @@ EOM
|
|
229
241
|
diff = Diffy::Diff.new(existing_alert_text, new_alert_text, context: 1)
|
230
242
|
log.info("updating existing alert #{id} (#{alert['name']}):\n#{diff}")
|
231
243
|
|
244
|
+
monitor_options = {
|
245
|
+
name: alert['name'],
|
246
|
+
message: message,
|
247
|
+
options: alert_options,
|
248
|
+
}
|
249
|
+
|
232
250
|
if @dry_run
|
233
|
-
resp = @dog.
|
251
|
+
resp = @dog.validate_monitor(
|
234
252
|
alert['monitor_type'],
|
235
253
|
datadog_query,
|
236
|
-
|
237
|
-
message: self.class.generate_message(alert, []),
|
238
|
-
options: alert_options
|
254
|
+
monitor_options
|
239
255
|
)
|
240
256
|
elsif self.class.same_monitor_type(alert['monitor_type'], existing_alert['type'])
|
241
257
|
resp = @dog.update_monitor(
|
242
258
|
id,
|
243
259
|
datadog_query,
|
244
|
-
|
245
|
-
message: message,
|
246
|
-
options: alert_options
|
260
|
+
monitor_options
|
247
261
|
)
|
248
262
|
|
249
263
|
# Unmute existing alerts that have been unsilenced.
|
@@ -259,9 +273,7 @@ EOM
|
|
259
273
|
resp = @dog.monitor(
|
260
274
|
alert['monitor_type'],
|
261
275
|
datadog_query,
|
262
|
-
|
263
|
-
message: message,
|
264
|
-
options: alert_options
|
276
|
+
monitor_options
|
265
277
|
)
|
266
278
|
end
|
267
279
|
end
|
@@ -273,6 +285,7 @@ EOM
|
|
273
285
|
@stats[:alerts_to_be_deleted] += 1
|
274
286
|
log.info("deleting alert: #{alert['name']}")
|
275
287
|
|
288
|
+
# Safety to protect aginst accident dry_run deletion
|
276
289
|
unless @dry_run
|
277
290
|
alert['id'].each do |alert_id|
|
278
291
|
resp = @dog.delete_monitor(alert_id)
|
@@ -290,14 +303,6 @@ EOM
|
|
290
303
|
end
|
291
304
|
end
|
292
305
|
|
293
|
-
def remove_alert_by_id(alert_id)
|
294
|
-
# This should only be used by dry-run to clean up created dry-run alerts
|
295
|
-
log.debug("deleting alert, id: #{alert_id}")
|
296
|
-
resp = @dog.delete_monitor(alert_id)
|
297
|
-
code = resp[0].to_i
|
298
|
-
log_datadog_response_code(resp, code, :deleting)
|
299
|
-
end
|
300
|
-
|
301
306
|
def need_update(alert_people_pair, existing_alerts_from_api)
|
302
307
|
alert, people = alert_people_pair
|
303
308
|
existing = existing_alerts_from_api[alert['name']]
|
@@ -386,7 +391,7 @@ EOM
|
|
386
391
|
" response was #{resp[0]}:'#{resp[1].inspect}'")
|
387
392
|
end
|
388
393
|
|
389
|
-
|
394
|
+
# unknown (prob. datadog) error:
|
390
395
|
elsif code > 400 || code == -1
|
391
396
|
@stats[:api_unknown_errors] += 1
|
392
397
|
unless alert.nil?
|
@@ -16,7 +16,7 @@ module Interferon::GroupSources
|
|
16
16
|
@paths.each do |path|
|
17
17
|
path = File.expand_path(path)
|
18
18
|
unless Dir.exist?(path)
|
19
|
-
log.warn
|
19
|
+
log.warn("no such directory #{path} for reading group files")
|
20
20
|
next
|
21
21
|
end
|
22
22
|
|
@@ -24,9 +24,9 @@ module Interferon::GroupSources
|
|
24
24
|
begin
|
25
25
|
group = YAML.parse(File.read(group_file))
|
26
26
|
rescue YAML::SyntaxError => e
|
27
|
-
log.error
|
27
|
+
log.error("syntax error in group file #{group_file}: #{e}")
|
28
28
|
rescue StandardError => e
|
29
|
-
log.warn
|
29
|
+
log.warn("error reading group file #{group_file}: #{e}")
|
30
30
|
else
|
31
31
|
group = group.to_ruby
|
32
32
|
if group['people']
|
@@ -44,7 +44,7 @@ module Interferon::GroupSources
|
|
44
44
|
if groups.include?(group)
|
45
45
|
groups[aliased_group] = groups[group]
|
46
46
|
else
|
47
|
-
log.warn
|
47
|
+
log.warn("Alias not found for #{group} but used by #{aliased_group} in #{group_file}")
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
data/lib/interferon/loaders.rb
CHANGED
@@ -35,12 +35,12 @@ module Interferon
|
|
35
35
|
options = source['options'] || {}
|
36
36
|
|
37
37
|
if type.nil?
|
38
|
-
log.warn
|
38
|
+
log.warn("#{@loader_for} ##{idx} does not have a 'type' set; 'type' is required")
|
39
39
|
next
|
40
40
|
end
|
41
41
|
|
42
42
|
unless enabled
|
43
|
-
log.info
|
43
|
+
log.info("skipping #{@loader_for} #{type} because it's not enabled")
|
44
44
|
next
|
45
45
|
end
|
46
46
|
|
@@ -68,9 +68,11 @@ module Interferon
|
|
68
68
|
require full_path
|
69
69
|
klass = @module.const_get(class_name)
|
70
70
|
rescue LoadError => e
|
71
|
-
log.debug
|
71
|
+
log.debug("LoadError looking for #{@loader_for} file #{type} at #{full_path}: #{e}")
|
72
72
|
rescue NameError => e
|
73
|
-
log.debug
|
73
|
+
log.debug(
|
74
|
+
"NameError looking for #{@loader_for} class #{class_name} in #{full_path}: #{e}"
|
75
|
+
)
|
74
76
|
end
|
75
77
|
|
76
78
|
break if klass
|
data/lib/interferon/version.rb
CHANGED
@@ -88,8 +88,8 @@ describe Interferon::Destinations::Datadog do
|
|
88
88
|
datadog.create_alert(mock_alert, mock_people)
|
89
89
|
end
|
90
90
|
|
91
|
-
it '
|
92
|
-
expect_any_instance_of(Dogapi::Client).to receive(:
|
91
|
+
it 'calls validate monitor in dry-run' do
|
92
|
+
expect_any_instance_of(Dogapi::Client).to receive(:validate_monitor).and_return([200, ''])
|
93
93
|
expect(datadog_dry_run).to receive(:existing_alerts).and_return(mock_response)
|
94
94
|
datadog_dry_run.create_alert(mock_alert, mock_people)
|
95
95
|
end
|
@@ -114,12 +114,4 @@ describe Interferon::Destinations::Datadog do
|
|
114
114
|
datadog.remove_alert(mock_alert)
|
115
115
|
end
|
116
116
|
end
|
117
|
-
|
118
|
-
describe '.remove_alert_by_id' do
|
119
|
-
it 'calls dogapi delete_monitor' do
|
120
|
-
expect_any_instance_of(Dogapi::Client).to receive(:delete_monitor)
|
121
|
-
.with(mock_alert_id).and_return([200, ''])
|
122
|
-
datadog.remove_alert_by_id(mock_alert_id)
|
123
|
-
end
|
124
|
-
end
|
125
117
|
end
|
data/spec/lib/interferon_spec.rb
CHANGED
@@ -71,130 +71,135 @@ describe Interferon::Destinations::Datadog do
|
|
71
71
|
end
|
72
72
|
|
73
73
|
context 'dry_run_update_alerts_on_destination' do
|
74
|
-
let(:interferon) { Interferon::Interferon.new(
|
74
|
+
let(:interferon) { Interferon::Interferon.new({ 'processes' => 0 }, true) }
|
75
75
|
|
76
76
|
before do
|
77
77
|
allow_any_instance_of(MockAlert).to receive(:evaluate)
|
78
78
|
allow(dest).to receive(:remove_alert)
|
79
|
-
allow(dest).to receive(:remove_alert_by_id)
|
80
79
|
allow(dest).to receive(:report_stats)
|
81
80
|
end
|
82
81
|
|
83
82
|
it 'does not re-run existing alerts' do
|
84
|
-
|
83
|
+
mock_alerts = mock_existing_alerts
|
85
84
|
expect(dest).not_to receive(:create_alert)
|
86
|
-
expect(dest).not_to receive(:remove_alert_by_id)
|
87
85
|
|
88
|
-
interferon.
|
89
|
-
|
86
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(
|
87
|
+
['host'],
|
88
|
+
[mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) },
|
89
|
+
{}
|
90
90
|
)
|
91
|
+
|
92
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
91
93
|
end
|
92
94
|
|
93
95
|
it 'runs added alerts' do
|
94
|
-
|
95
|
-
|
96
|
+
mock_alerts = mock_existing_alerts
|
97
|
+
alerts = [mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) }
|
98
|
+
alerts << create_test_alert('name3', 'testquery3', '')
|
99
|
+
|
100
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], alerts, {})
|
101
|
+
|
96
102
|
expect(dest).to receive(:create_alert).once.and_call_original
|
97
|
-
expect(dest).to receive(:remove_alert_by_id).with('3').once
|
98
103
|
|
99
|
-
interferon.update_alerts_on_destination(
|
100
|
-
dest, ['host'], [alerts['name1'], alerts['name2'], added], {}
|
101
|
-
)
|
104
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
102
105
|
end
|
103
106
|
|
104
107
|
it 'runs updated alerts' do
|
105
108
|
added = create_test_alert('name1', 'testquery3', '')
|
109
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
106
110
|
expect(dest).to receive(:create_alert).once.and_call_original
|
107
|
-
expect(dest).to receive(:remove_alert_by_id).with('1').once
|
108
111
|
|
109
|
-
interferon.update_alerts_on_destination(dest,
|
112
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
110
113
|
end
|
111
114
|
|
112
|
-
it '
|
113
|
-
expect(dest).
|
115
|
+
it 'does not delete old alerts' do
|
116
|
+
expect(dest).to_not receive(:remove_alert)
|
117
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
114
118
|
|
115
|
-
interferon.update_alerts_on_destination(dest,
|
119
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
116
120
|
end
|
117
121
|
|
118
|
-
it '
|
122
|
+
it 'does not delete duplicate old alerts' do
|
119
123
|
alert1 = mock_alert_json('name1', 'testquery1', '', nil, [1, 2, 3])
|
120
124
|
alert2 = mock_alert_json('name2', 'testquery2', '')
|
121
125
|
existing_alerts = { 'name1' => alert1, 'name2' => alert2 }
|
126
|
+
|
122
127
|
dest = MockDest.new(existing_alerts)
|
123
|
-
allow(dest).to receive(:remove_alert)
|
124
|
-
allow(dest).to receive(:remove_alert_by_id)
|
125
128
|
allow(dest).to receive(:report_stats)
|
126
129
|
|
127
|
-
|
128
|
-
|
130
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
131
|
+
|
132
|
+
expect(dest).to_not receive(:remove_alert)
|
129
133
|
|
130
|
-
interferon.update_alerts_on_destination(dest,
|
134
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
131
135
|
end
|
132
136
|
|
133
|
-
it '
|
137
|
+
it 'does not delete duplicate old alerts when creating new alert' do
|
134
138
|
alert1 = mock_alert_json('name1', 'testquery1', '', nil, [1, 2, 3])
|
135
139
|
alert2 = mock_alert_json('name2', 'testquery2', '')
|
136
140
|
existing_alerts = { 'name1' => alert1, 'name2' => alert2 }
|
141
|
+
|
137
142
|
dest = MockDest.new(existing_alerts)
|
138
|
-
allow(dest).to receive(:remove_alert)
|
139
|
-
allow(dest).to receive(:remove_alert_by_id)
|
140
143
|
allow(dest).to receive(:report_stats)
|
141
144
|
|
142
145
|
added = create_test_alert('name1', 'testquery1', '')
|
146
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
143
147
|
|
144
|
-
|
145
|
-
# during dry run
|
146
|
-
expect(dest).to_not receive(:remove_alert).with(existing_alerts['name1'])
|
147
|
-
expect(dest).to receive(:remove_alert).with(existing_alerts['name2'])
|
148
|
+
expect(dest).to_not receive(:remove_alert)
|
148
149
|
|
149
|
-
interferon.update_alerts_on_destination(dest,
|
150
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
153
154
|
context 'update_alerts_on_destination' do
|
154
|
-
let(:interferon) { Interferon::Interferon.new(
|
155
|
+
let(:interferon) { Interferon::Interferon.new({ 'processes' => 0 }, false) }
|
155
156
|
|
156
157
|
before do
|
157
158
|
allow_any_instance_of(MockAlert).to receive(:evaluate)
|
158
159
|
allow(dest).to receive(:remove_alert)
|
159
|
-
allow(dest).to receive(:remove_alert_by_id)
|
160
160
|
allow(dest).to receive(:report_stats)
|
161
161
|
end
|
162
162
|
|
163
163
|
it 'does not re-run existing alerts' do
|
164
|
-
|
164
|
+
mock_alerts = mock_existing_alerts
|
165
165
|
expect(dest).not_to receive(:create_alert)
|
166
|
-
expect(dest).not_to receive(:remove_alert_by_id)
|
167
166
|
|
168
|
-
interferon.
|
169
|
-
|
167
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(
|
168
|
+
['host'],
|
169
|
+
[mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) },
|
170
|
+
{}
|
170
171
|
)
|
172
|
+
|
173
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
171
174
|
end
|
172
175
|
|
173
176
|
it 'runs added alerts' do
|
174
|
-
|
175
|
-
|
177
|
+
mock_alerts = mock_existing_alerts
|
178
|
+
alerts = [mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) }
|
179
|
+
alerts << create_test_alert('name3', 'testquery3', '')
|
180
|
+
|
181
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], alerts, {})
|
182
|
+
|
176
183
|
expect(dest).to receive(:create_alert).once.and_call_original
|
177
|
-
expect(dest).not_to receive(:remove_alert_by_id).with('3')
|
178
184
|
|
179
|
-
interferon.update_alerts_on_destination(
|
180
|
-
dest, ['host'], [alerts['name1'], alerts['name2'], added], {}
|
181
|
-
)
|
185
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
182
186
|
end
|
183
187
|
|
184
188
|
it 'runs updated alerts' do
|
185
189
|
added = create_test_alert('name1', 'testquery3', '')
|
190
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
186
191
|
expect(dest).to receive(:create_alert).once.and_call_original
|
187
|
-
expect(dest).not_to receive(:remove_alert_by_id).with('1')
|
188
192
|
|
189
|
-
interferon.update_alerts_on_destination(dest,
|
193
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
190
194
|
end
|
191
195
|
|
192
196
|
it 'deletes old alerts' do
|
193
197
|
alerts = mock_existing_alerts
|
198
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
194
199
|
expect(dest).to receive(:remove_alert).with(alerts['name1'])
|
195
200
|
expect(dest).to receive(:remove_alert).with(alerts['name2'])
|
196
201
|
|
197
|
-
interferon.update_alerts_on_destination(dest,
|
202
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
198
203
|
end
|
199
204
|
|
200
205
|
it 'deletes duplicate old alerts' do
|
@@ -203,13 +208,14 @@ describe Interferon::Destinations::Datadog do
|
|
203
208
|
existing_alerts = { 'name1' => alert1, 'name2' => alert2 }
|
204
209
|
dest = MockDest.new(existing_alerts)
|
205
210
|
allow(dest).to receive(:remove_alert)
|
206
|
-
allow(dest).to receive(:remove_alert_by_id)
|
207
211
|
allow(dest).to receive(:report_stats)
|
208
212
|
|
213
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
214
|
+
|
209
215
|
expect(dest).to receive(:remove_alert).with(existing_alerts['name1'])
|
210
216
|
expect(dest).to receive(:remove_alert).with(existing_alerts['name2'])
|
211
217
|
|
212
|
-
interferon.update_alerts_on_destination(dest,
|
218
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
213
219
|
end
|
214
220
|
|
215
221
|
it 'deletes duplicate old alerts when creating new alert' do
|
@@ -220,19 +226,21 @@ describe Interferon::Destinations::Datadog do
|
|
220
226
|
allow(dest).to receive(:report_stats)
|
221
227
|
|
222
228
|
added = create_test_alert('name1', 'testquery1', '')
|
229
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
223
230
|
|
224
231
|
expect(dest).to receive(:remove_alert).with(
|
225
232
|
mock_alert_json('name1', 'testquery1', '', nil, [2, 3])
|
226
233
|
)
|
227
234
|
expect(dest).to receive(:remove_alert).with(existing_alerts['name2'])
|
228
235
|
|
229
|
-
interferon.update_alerts_on_destination(dest,
|
236
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
230
237
|
end
|
231
238
|
end
|
232
239
|
|
233
240
|
def mock_existing_alerts
|
234
|
-
|
235
|
-
|
241
|
+
mock_message = Interferon::Destinations::Datadog::ALERT_KEY
|
242
|
+
alert1 = mock_alert_json('name1', 'testquery1', mock_message)
|
243
|
+
alert2 = mock_alert_json('name2', 'testquery2', mock_message)
|
236
244
|
{ 'name1' => alert1, 'name2' => alert2 }
|
237
245
|
end
|
238
246
|
|
@@ -274,6 +282,15 @@ describe Interferon::Destinations::Datadog do
|
|
274
282
|
}
|
275
283
|
end
|
276
284
|
|
285
|
+
def test_alert_from_json(mock_alert_json)
|
286
|
+
create_test_alert(
|
287
|
+
mock_alert_json['name'],
|
288
|
+
mock_alert_json['query'],
|
289
|
+
mock_alert_json['message'].sub(/#{Interferon::Destinations::Datadog::ALERT_KEY}$/, ''),
|
290
|
+
mock_alert_json['options']
|
291
|
+
)
|
292
|
+
end
|
293
|
+
|
277
294
|
def create_test_alert(name, datadog_query, message, options = {})
|
278
295
|
options = DEFAULT_OPTIONS.merge(options)
|
279
296
|
|