interferon 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/bin/interferon +6 -12
- data/lib/interferon.rb +61 -113
- data/lib/interferon/destinations/datadog.rb +34 -29
- data/lib/interferon/group_sources/filesystem.rb +4 -4
- data/lib/interferon/loaders.rb +6 -4
- data/lib/interferon/version.rb +1 -1
- data/spec/lib/interferon/destinations/datadog_spec.rb +2 -10
- data/spec/lib/interferon_spec.rb +68 -51
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e06dab2517dc06edf86da60cb55249bf63d031d
|
4
|
+
data.tar.gz: e370782305dda94dca3215c57dfc8bceb633ed77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bef7463ead0dc65ab0c2e8018feb13a85a419ecc02e2403e9ee15150cc29089d8d471609ae9c7ae421c0f6c112fd39f5746b9cff7288c43535ffb32cf4c88d2d
|
7
|
+
data.tar.gz: f8b07e1239591b50e07facca30186412c74eda5b19bd4d128b634bf1a8c477b449aaa7894ff6fc3181ab781f1011901778e37a663f6d337a56abc21dc9bd8462
|
data/README.md
CHANGED
@@ -29,6 +29,7 @@ It accepts the following parameters:
|
|
29
29
|
* `group_sources` -- a list of sources which can return groups of people to alert
|
30
30
|
* `host_sources` -- a list of sources which can read inventory systems and return lists of hosts to monitor
|
31
31
|
* `destinations` -- a list of alerting providers, which can monitor metrics and dispatch alerts as specified in your alerts dsl files
|
32
|
+
* `processes` -- number of processes to run the alert generation on (optional; default is to use all available cores)
|
32
33
|
|
33
34
|
For more information, see [config.example.yaml](config.example.yaml) file in this repo.
|
34
35
|
|
data/bin/interferon
CHANGED
@@ -10,11 +10,11 @@ options = {}
|
|
10
10
|
optparse = OptionParser.new do |opts|
|
11
11
|
opts.banner = %(Usage: interferon --config /path/to/interferon/config)
|
12
12
|
|
13
|
-
opts.on('-c
|
13
|
+
opts.on('-c', '--config config', String, 'Path to interferon config') do |key|
|
14
14
|
options[:config] = key
|
15
15
|
end
|
16
16
|
|
17
|
-
opts.on('-n', '--dry-run', "Don
|
17
|
+
opts.on('-n', '--dry-run', "Don't update alert destinations") do
|
18
18
|
options[:dry_run] = true
|
19
19
|
end
|
20
20
|
|
@@ -26,7 +26,7 @@ end
|
|
26
26
|
|
27
27
|
def parseconfig(filename)
|
28
28
|
begin
|
29
|
-
|
29
|
+
config = YAML.parse(File.read(filename))
|
30
30
|
rescue Errno::ENOENT => e
|
31
31
|
raise ArgumentError, "config file does not exist:\n#{e.inspect}"
|
32
32
|
rescue Errno::EACCES => e
|
@@ -34,7 +34,7 @@ def parseconfig(filename)
|
|
34
34
|
rescue YAML::SyntaxError => e
|
35
35
|
raise "config file #{filename} contains invalid YAML:\n#{e.inspect}"
|
36
36
|
end
|
37
|
-
|
37
|
+
config.to_ruby
|
38
38
|
end
|
39
39
|
|
40
40
|
# parse command line arguments
|
@@ -55,13 +55,7 @@ end
|
|
55
55
|
|
56
56
|
ENV['DEBUG'] = '1' if config['verbose_logging']
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
config['group_sources'] || {},
|
61
|
-
config['host_sources'],
|
62
|
-
config['destinations']
|
63
|
-
)
|
64
|
-
|
65
|
-
a.run(options[:dry_run])
|
58
|
+
interferon = Interferon::Interferon.new(config, options[:dry_run])
|
59
|
+
interferon.run
|
66
60
|
|
67
61
|
puts 'interferon signaling complete!'
|
data/lib/interferon.rb
CHANGED
@@ -23,28 +23,26 @@ module Interferon
|
|
23
23
|
# groups_sources is a hash from type => options for each group source
|
24
24
|
# host_sources is a hash from type => options for each host source
|
25
25
|
# destinations is a similar hash from type => options for each alerter
|
26
|
-
def initialize(
|
27
|
-
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
26
|
+
def initialize(config, dry_run = false)
|
27
|
+
@alerts_repo_path = config['alerts_repo_path']
|
28
|
+
@group_sources = config['group_sources'] || {}
|
29
|
+
@host_sources = config['host_sources']
|
30
|
+
@destinations = config['destinations']
|
31
|
+
@processes = config['processes']
|
32
32
|
@dry_run = dry_run
|
33
|
-
@processes = processes
|
34
33
|
@request_shutdown = false
|
35
34
|
end
|
36
35
|
|
37
|
-
def run
|
36
|
+
def run
|
38
37
|
Signal.trap('TERM') do
|
39
|
-
log.info
|
38
|
+
log.info('SIGTERM received. shutting down gracefully...')
|
40
39
|
@request_shutdown = true
|
41
40
|
end
|
42
|
-
@dry_run = dry_run
|
43
41
|
run_desc = @dry_run ? 'dry run' : 'run'
|
44
|
-
log.info
|
42
|
+
log.info("beginning alerts #{run_desc}")
|
45
43
|
|
46
44
|
alerts = read_alerts
|
47
|
-
groups = read_groups(@
|
45
|
+
groups = read_groups(@group_sources)
|
48
46
|
hosts = read_hosts(@host_sources)
|
49
47
|
|
50
48
|
@destinations.each do |dest|
|
@@ -55,9 +53,9 @@ module Interferon
|
|
55
53
|
update_alerts(@destinations, hosts, alerts, groups)
|
56
54
|
|
57
55
|
if @request_shutdown
|
58
|
-
log.info
|
56
|
+
log.info("interferon #{run_desc} shut down by SIGTERM")
|
59
57
|
else
|
60
|
-
log.info
|
58
|
+
log.info("interferon #{run_desc} complete")
|
61
59
|
end
|
62
60
|
end
|
63
61
|
|
@@ -75,14 +73,14 @@ module Interferon
|
|
75
73
|
begin
|
76
74
|
alert = Alert.new(alert_file)
|
77
75
|
rescue StandardError => e
|
78
|
-
log.warn
|
76
|
+
log.warn("error reading alert file #{alert_file}: #{e}")
|
79
77
|
failed += 1
|
80
78
|
else
|
81
79
|
alerts << alert
|
82
80
|
end
|
83
81
|
end
|
84
82
|
|
85
|
-
log.info
|
83
|
+
log.info("read #{alerts.count} alerts files from #{path}")
|
86
84
|
|
87
85
|
statsd.gauge('alerts.read.count', alerts.count)
|
88
86
|
statsd.gauge('alerts.read.failed', failed)
|
@@ -106,12 +104,16 @@ module Interferon
|
|
106
104
|
people_count += people.count
|
107
105
|
end
|
108
106
|
|
109
|
-
log.info
|
110
|
-
|
107
|
+
log.info(
|
108
|
+
"read #{people_count} people in #{source_groups.count} groups " \
|
109
|
+
"from source #{source.class.name}"
|
110
|
+
)
|
111
111
|
end
|
112
112
|
|
113
|
-
log.info
|
114
|
-
|
113
|
+
log.info(
|
114
|
+
"total of #{groups.values.flatten.count} people in #{groups.count} groups " \
|
115
|
+
"from #{sources.count} sources"
|
116
|
+
)
|
115
117
|
|
116
118
|
statsd.gauge('groups.sources', sources.count)
|
117
119
|
statsd.gauge('groups.count', groups.count)
|
@@ -131,36 +133,37 @@ module Interferon
|
|
131
133
|
hosts << source_hosts
|
132
134
|
|
133
135
|
statsd.gauge('hosts.count', source_hosts.count, tags: ["source:#{source.class.name}"])
|
134
|
-
log.info
|
136
|
+
log.info("read #{source_hosts.count} hosts from source #{source.class.name}")
|
135
137
|
end
|
136
138
|
|
137
139
|
hosts.flatten!
|
138
|
-
log.info
|
140
|
+
log.info("total of #{hosts.count} entities from #{sources.count} sources")
|
139
141
|
|
140
142
|
hosts
|
141
143
|
end
|
142
144
|
|
143
145
|
def update_alerts(destinations, hosts, alerts, groups)
|
146
|
+
alerts_queue, alert_errors = build_alerts_queue(hosts, alerts, groups)
|
147
|
+
if @dry_run && !alert_errors.empty?
|
148
|
+
raise "Alerts failed to apply or evaluate for all hosts: #{alerts.map(&:to_s).join(', ')}"
|
149
|
+
end
|
150
|
+
|
144
151
|
loader = DestinationsLoader.new([@alerts_repo_path])
|
145
152
|
loader.get_all(destinations).each do |dest|
|
146
153
|
break if @request_shutdown
|
147
|
-
log.info
|
148
|
-
update_alerts_on_destination(dest,
|
154
|
+
log.info("updating alerts on #{dest.class.name}")
|
155
|
+
update_alerts_on_destination(dest, alerts_queue)
|
149
156
|
end
|
150
157
|
end
|
151
158
|
|
152
|
-
def update_alerts_on_destination(dest,
|
159
|
+
def update_alerts_on_destination(dest, alerts_queue)
|
153
160
|
# track some counters/stats per destination
|
154
161
|
start_time = Time.new.to_f
|
155
162
|
|
156
163
|
# get already-defined alerts
|
157
164
|
existing_alerts = dest.existing_alerts
|
158
165
|
|
159
|
-
|
160
|
-
do_dry_run_update(dest, hosts, alerts, existing_alerts, groups)
|
161
|
-
else
|
162
|
-
do_regular_update(dest, hosts, alerts, existing_alerts, groups)
|
163
|
-
end
|
166
|
+
run_update(dest, alerts_queue, existing_alerts)
|
164
167
|
|
165
168
|
unless @request_shutdown
|
166
169
|
# run time summary
|
@@ -170,7 +173,7 @@ module Interferon
|
|
170
173
|
run_time,
|
171
174
|
tags: ["destination:#{dest.class.name}"]
|
172
175
|
)
|
173
|
-
log.info
|
176
|
+
log.info("#{dest.class.name} : run completed in %.2f seconds" % run_time)
|
174
177
|
|
175
178
|
# report destination stats
|
176
179
|
dest.report_stats
|
@@ -179,73 +182,7 @@ module Interferon
|
|
179
182
|
raise dest.api_errors.to_s if @dry_run && !dest.api_errors.empty?
|
180
183
|
end
|
181
184
|
|
182
|
-
def
|
183
|
-
# Track these to clean up dry-run alerts from previous runs
|
184
|
-
existing_dry_run_alerts = []
|
185
|
-
existing_alerts.each do |name, alert|
|
186
|
-
if name.start_with?(DRY_RUN_ALERTS_NAME_PREFIX)
|
187
|
-
existing_dry_run_alerts << [alert['name'], [alert['id']]]
|
188
|
-
existing_alerts.delete(name)
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
alerts_queue = build_alerts_queue(hosts, alerts, groups)
|
193
|
-
updates_queue = alerts_queue.reject do |_name, alert_people_pair|
|
194
|
-
!dest.need_update(alert_people_pair, existing_alerts)
|
195
|
-
end
|
196
|
-
|
197
|
-
# Add dry-run prefix to alerts and delete id to avoid impacting real alerts
|
198
|
-
existing_alerts.keys.each do |name|
|
199
|
-
existing_alert = existing_alerts[name]
|
200
|
-
dry_run_alert_name = DRY_RUN_ALERTS_NAME_PREFIX + name
|
201
|
-
existing_alert['name'] = dry_run_alert_name
|
202
|
-
existing_alert['id'] = [nil]
|
203
|
-
existing_alerts[dry_run_alert_name] = existing_alerts.delete(name)
|
204
|
-
end
|
205
|
-
|
206
|
-
# Build new queue with dry-run prefixes and ensure they are silenced
|
207
|
-
alerts_queue.each do |_name, alert_people_pair|
|
208
|
-
alert, _people = alert_people_pair
|
209
|
-
dry_run_alert_name = DRY_RUN_ALERTS_NAME_PREFIX + alert['name']
|
210
|
-
alert.change_name(dry_run_alert_name)
|
211
|
-
alert.silence
|
212
|
-
end
|
213
|
-
|
214
|
-
# Create alerts in destination
|
215
|
-
created_alerts = create_alerts(dest, updates_queue)
|
216
|
-
|
217
|
-
# Existing alerts are pruned until all that remains are
|
218
|
-
# alerts that aren't being generated anymore
|
219
|
-
to_remove = existing_alerts.dup
|
220
|
-
alerts_queue.each do |_name, alert_people_pair|
|
221
|
-
alert, _people = alert_people_pair
|
222
|
-
old_alerts = to_remove[alert['name']]
|
223
|
-
|
224
|
-
next if old_alerts.nil?
|
225
|
-
if old_alerts['id'].length == 1
|
226
|
-
to_remove.delete(alert['name'])
|
227
|
-
else
|
228
|
-
old_alerts['id'] = old_alerts['id'].drop(1)
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
# Clean up alerts not longer being generated
|
233
|
-
to_remove.each do |_name, alert|
|
234
|
-
break if @request_shutdown
|
235
|
-
dest.remove_alert(alert)
|
236
|
-
end
|
237
|
-
|
238
|
-
# Clean up dry-run created alerts
|
239
|
-
(created_alerts + existing_dry_run_alerts).each do |alert_id_pair|
|
240
|
-
alert_ids = alert_id_pair[1]
|
241
|
-
alert_ids.each do |alert_id|
|
242
|
-
dest.remove_alert_by_id(alert_id)
|
243
|
-
end
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
def do_regular_update(dest, hosts, alerts, existing_alerts, groups)
|
248
|
-
alerts_queue = build_alerts_queue(hosts, alerts, groups)
|
185
|
+
def run_update(dest, alerts_queue, existing_alerts)
|
249
186
|
updates_queue = alerts_queue.reject do |_name, alert_people_pair|
|
250
187
|
!dest.need_update(alert_people_pair, existing_alerts)
|
251
188
|
end
|
@@ -253,6 +190,9 @@ module Interferon
|
|
253
190
|
# Create alerts in destination
|
254
191
|
create_alerts(dest, updates_queue)
|
255
192
|
|
193
|
+
# Do not continue to remove alerts during dry-run
|
194
|
+
return if @dry_run
|
195
|
+
|
256
196
|
# Existing alerts are pruned until all that remains are
|
257
197
|
# alerts that aren't being generated anymore
|
258
198
|
to_remove = existing_alerts.dup
|
@@ -281,12 +221,12 @@ module Interferon
|
|
281
221
|
concurrency = dest.concurrency || 10
|
282
222
|
unless @request_shutdown
|
283
223
|
threads = Array.new(concurrency) do |i|
|
284
|
-
log.info
|
224
|
+
log.info("thread #{i} created")
|
285
225
|
t = Thread.new do
|
286
226
|
while (name = alerts_to_create.shift)
|
287
227
|
break if @request_shutdown
|
288
228
|
cur_alert, people = alerts_queue[name]
|
289
|
-
log.debug
|
229
|
+
log.debug("creating alert for #{cur_alert[:name]}")
|
290
230
|
alert_key_ids << dest.create_alert(cur_alert, people)
|
291
231
|
end
|
292
232
|
end
|
@@ -300,16 +240,20 @@ module Interferon
|
|
300
240
|
|
301
241
|
def build_alerts_queue(hosts, alerts, groups)
|
302
242
|
alerts_queue = {}
|
243
|
+
all_alert_generation_errors = []
|
244
|
+
|
303
245
|
# create or update alerts; mark when we've done that
|
304
246
|
result = Parallel.map(alerts, in_processes: @processes) do |alert|
|
305
247
|
break if @request_shutdown
|
306
248
|
alerts_generated = {}
|
249
|
+
alert_generation_errors = []
|
307
250
|
counters = {
|
308
251
|
errors: 0,
|
309
252
|
evals: 0,
|
310
253
|
applies: 0,
|
311
254
|
hosts: hosts.length,
|
312
255
|
}
|
256
|
+
|
313
257
|
last_eval_error = nil
|
314
258
|
|
315
259
|
hosts.each do |hostinfo|
|
@@ -317,7 +261,7 @@ module Interferon
|
|
317
261
|
alert.evaluate(hostinfo)
|
318
262
|
counters[:evals] += 1
|
319
263
|
rescue StandardError => e
|
320
|
-
log.debug
|
264
|
+
log.debug("Evaluation of alert #{alert} failed in the context of host #{hostinfo}")
|
321
265
|
counters[:errors] += 1
|
322
266
|
last_eval_error = e
|
323
267
|
next
|
@@ -325,7 +269,7 @@ module Interferon
|
|
325
269
|
|
326
270
|
# don't define an alert that doesn't apply to this hostinfo
|
327
271
|
unless alert[:applies]
|
328
|
-
log.debug
|
272
|
+
log.debug("alert #{alert[:name]} doesn't apply to #{hostinfo.inspect}")
|
329
273
|
next
|
330
274
|
end
|
331
275
|
|
@@ -348,17 +292,19 @@ module Interferon
|
|
348
292
|
statsd.gauge('alerts.evaluate.applies', counters[:applies], tags: ["alert:#{alert}"])
|
349
293
|
|
350
294
|
if counters[:applies] > 0
|
351
|
-
log.info
|
295
|
+
log.info("alert #{alert} applies to #{counters[:applies]} of #{counters[:hosts]} hosts")
|
352
296
|
end
|
353
297
|
|
354
298
|
# did the alert fail to evaluate on all hosts?
|
355
299
|
if counters[:errors] == counters[:hosts] && !last_eval_error.nil?
|
356
|
-
log.error
|
357
|
-
log.error
|
358
|
-
|
300
|
+
log.error("alert #{alert} failed to evaluate in the context of all hosts!")
|
301
|
+
log.error("last error on alert #{alert}: #{last_eval_error}")
|
359
302
|
statsd.gauge('alerts.evaluate.failed_on_all', 1, tags: ["alert:#{alert}"])
|
360
|
-
log.debug
|
361
|
-
|
303
|
+
log.debug(
|
304
|
+
"alert #{alert}: " \
|
305
|
+
"error #{last_eval_error}\n#{last_eval_error.backtrace.join("\n")}"
|
306
|
+
)
|
307
|
+
alert_generation_errors << alert
|
362
308
|
else
|
363
309
|
statsd.gauge('alerts.evaluate.failed_on_all', 0, tags: ["alert:#{alert}"])
|
364
310
|
end
|
@@ -366,17 +312,19 @@ module Interferon
|
|
366
312
|
# did the alert apply to any hosts?
|
367
313
|
if counters[:applies] == 0
|
368
314
|
statsd.gauge('alerts.evaluate.never_applies', 1, tags: ["alert:#{alert}"])
|
369
|
-
log.warn
|
315
|
+
log.warn("alert #{alert} did not apply to any hosts")
|
316
|
+
alert_generation_errors << alert
|
370
317
|
else
|
371
318
|
statsd.gauge('alerts.evaluate.never_applies', 0, tags: ["alert:#{alert}"])
|
372
319
|
end
|
373
|
-
alerts_generated
|
320
|
+
[alerts_generated, alert_generation_errors]
|
374
321
|
end
|
375
322
|
|
376
|
-
result.each do |
|
377
|
-
alerts_queue.merge!
|
323
|
+
result.each do |generated_alerts, alert_generation_errors|
|
324
|
+
alerts_queue.merge!(generated_alerts)
|
325
|
+
all_alert_generation_errors += alert_generation_errors
|
378
326
|
end
|
379
|
-
alerts_queue
|
327
|
+
[alerts_queue, all_alert_generation_errors]
|
380
328
|
end
|
381
329
|
end
|
382
330
|
end
|
@@ -119,10 +119,10 @@ module Interferon::Destinations
|
|
119
119
|
@stats[:manually_created_alerts] = \
|
120
120
|
@existing_alerts.reject { |_n, a| a['message'].include?(ALERT_KEY) }.length
|
121
121
|
|
122
|
-
log.info
|
123
|
-
@existing_alerts.length
|
124
|
-
@stats[:manually_created_alerts]
|
125
|
-
|
122
|
+
log.info(
|
123
|
+
"datadog: found #{@existing_alerts.length} existing alerts; " \
|
124
|
+
"#{@stats[:manually_created_alerts]} were manually created"
|
125
|
+
)
|
126
126
|
end
|
127
127
|
|
128
128
|
@existing_alerts
|
@@ -197,13 +197,25 @@ Options:
|
|
197
197
|
EOM
|
198
198
|
log.info("creating new alert #{alert['name']}: #{new_alert_text}")
|
199
199
|
|
200
|
-
|
201
|
-
alert['monitor_type'],
|
202
|
-
datadog_query,
|
200
|
+
monitor_options = {
|
203
201
|
name: alert['name'],
|
204
|
-
message:
|
205
|
-
options: alert_options
|
206
|
-
|
202
|
+
message: message,
|
203
|
+
options: alert_options,
|
204
|
+
}
|
205
|
+
|
206
|
+
if @dry_run
|
207
|
+
@dog.validate_monitor(
|
208
|
+
alert['monitor_type'],
|
209
|
+
datadog_query,
|
210
|
+
monitor_options
|
211
|
+
)
|
212
|
+
else
|
213
|
+
@dog.monitor(
|
214
|
+
alert['monitor_type'],
|
215
|
+
datadog_query,
|
216
|
+
monitor_options
|
217
|
+
)
|
218
|
+
end
|
207
219
|
end
|
208
220
|
|
209
221
|
def update_datadog_alert(alert, datadog_query, message, alert_options, existing_alert)
|
@@ -229,21 +241,23 @@ EOM
|
|
229
241
|
diff = Diffy::Diff.new(existing_alert_text, new_alert_text, context: 1)
|
230
242
|
log.info("updating existing alert #{id} (#{alert['name']}):\n#{diff}")
|
231
243
|
|
244
|
+
monitor_options = {
|
245
|
+
name: alert['name'],
|
246
|
+
message: message,
|
247
|
+
options: alert_options,
|
248
|
+
}
|
249
|
+
|
232
250
|
if @dry_run
|
233
|
-
resp = @dog.
|
251
|
+
resp = @dog.validate_monitor(
|
234
252
|
alert['monitor_type'],
|
235
253
|
datadog_query,
|
236
|
-
|
237
|
-
message: self.class.generate_message(alert, []),
|
238
|
-
options: alert_options
|
254
|
+
monitor_options
|
239
255
|
)
|
240
256
|
elsif self.class.same_monitor_type(alert['monitor_type'], existing_alert['type'])
|
241
257
|
resp = @dog.update_monitor(
|
242
258
|
id,
|
243
259
|
datadog_query,
|
244
|
-
|
245
|
-
message: message,
|
246
|
-
options: alert_options
|
260
|
+
monitor_options
|
247
261
|
)
|
248
262
|
|
249
263
|
# Unmute existing alerts that have been unsilenced.
|
@@ -259,9 +273,7 @@ EOM
|
|
259
273
|
resp = @dog.monitor(
|
260
274
|
alert['monitor_type'],
|
261
275
|
datadog_query,
|
262
|
-
|
263
|
-
message: message,
|
264
|
-
options: alert_options
|
276
|
+
monitor_options
|
265
277
|
)
|
266
278
|
end
|
267
279
|
end
|
@@ -273,6 +285,7 @@ EOM
|
|
273
285
|
@stats[:alerts_to_be_deleted] += 1
|
274
286
|
log.info("deleting alert: #{alert['name']}")
|
275
287
|
|
288
|
+
# Safety to protect aginst accident dry_run deletion
|
276
289
|
unless @dry_run
|
277
290
|
alert['id'].each do |alert_id|
|
278
291
|
resp = @dog.delete_monitor(alert_id)
|
@@ -290,14 +303,6 @@ EOM
|
|
290
303
|
end
|
291
304
|
end
|
292
305
|
|
293
|
-
def remove_alert_by_id(alert_id)
|
294
|
-
# This should only be used by dry-run to clean up created dry-run alerts
|
295
|
-
log.debug("deleting alert, id: #{alert_id}")
|
296
|
-
resp = @dog.delete_monitor(alert_id)
|
297
|
-
code = resp[0].to_i
|
298
|
-
log_datadog_response_code(resp, code, :deleting)
|
299
|
-
end
|
300
|
-
|
301
306
|
def need_update(alert_people_pair, existing_alerts_from_api)
|
302
307
|
alert, people = alert_people_pair
|
303
308
|
existing = existing_alerts_from_api[alert['name']]
|
@@ -386,7 +391,7 @@ EOM
|
|
386
391
|
" response was #{resp[0]}:'#{resp[1].inspect}'")
|
387
392
|
end
|
388
393
|
|
389
|
-
|
394
|
+
# unknown (prob. datadog) error:
|
390
395
|
elsif code > 400 || code == -1
|
391
396
|
@stats[:api_unknown_errors] += 1
|
392
397
|
unless alert.nil?
|
@@ -16,7 +16,7 @@ module Interferon::GroupSources
|
|
16
16
|
@paths.each do |path|
|
17
17
|
path = File.expand_path(path)
|
18
18
|
unless Dir.exist?(path)
|
19
|
-
log.warn
|
19
|
+
log.warn("no such directory #{path} for reading group files")
|
20
20
|
next
|
21
21
|
end
|
22
22
|
|
@@ -24,9 +24,9 @@ module Interferon::GroupSources
|
|
24
24
|
begin
|
25
25
|
group = YAML.parse(File.read(group_file))
|
26
26
|
rescue YAML::SyntaxError => e
|
27
|
-
log.error
|
27
|
+
log.error("syntax error in group file #{group_file}: #{e}")
|
28
28
|
rescue StandardError => e
|
29
|
-
log.warn
|
29
|
+
log.warn("error reading group file #{group_file}: #{e}")
|
30
30
|
else
|
31
31
|
group = group.to_ruby
|
32
32
|
if group['people']
|
@@ -44,7 +44,7 @@ module Interferon::GroupSources
|
|
44
44
|
if groups.include?(group)
|
45
45
|
groups[aliased_group] = groups[group]
|
46
46
|
else
|
47
|
-
log.warn
|
47
|
+
log.warn("Alias not found for #{group} but used by #{aliased_group} in #{group_file}")
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
data/lib/interferon/loaders.rb
CHANGED
@@ -35,12 +35,12 @@ module Interferon
|
|
35
35
|
options = source['options'] || {}
|
36
36
|
|
37
37
|
if type.nil?
|
38
|
-
log.warn
|
38
|
+
log.warn("#{@loader_for} ##{idx} does not have a 'type' set; 'type' is required")
|
39
39
|
next
|
40
40
|
end
|
41
41
|
|
42
42
|
unless enabled
|
43
|
-
log.info
|
43
|
+
log.info("skipping #{@loader_for} #{type} because it's not enabled")
|
44
44
|
next
|
45
45
|
end
|
46
46
|
|
@@ -68,9 +68,11 @@ module Interferon
|
|
68
68
|
require full_path
|
69
69
|
klass = @module.const_get(class_name)
|
70
70
|
rescue LoadError => e
|
71
|
-
log.debug
|
71
|
+
log.debug("LoadError looking for #{@loader_for} file #{type} at #{full_path}: #{e}")
|
72
72
|
rescue NameError => e
|
73
|
-
log.debug
|
73
|
+
log.debug(
|
74
|
+
"NameError looking for #{@loader_for} class #{class_name} in #{full_path}: #{e}"
|
75
|
+
)
|
74
76
|
end
|
75
77
|
|
76
78
|
break if klass
|
data/lib/interferon/version.rb
CHANGED
@@ -88,8 +88,8 @@ describe Interferon::Destinations::Datadog do
|
|
88
88
|
datadog.create_alert(mock_alert, mock_people)
|
89
89
|
end
|
90
90
|
|
91
|
-
it '
|
92
|
-
expect_any_instance_of(Dogapi::Client).to receive(:
|
91
|
+
it 'calls validate monitor in dry-run' do
|
92
|
+
expect_any_instance_of(Dogapi::Client).to receive(:validate_monitor).and_return([200, ''])
|
93
93
|
expect(datadog_dry_run).to receive(:existing_alerts).and_return(mock_response)
|
94
94
|
datadog_dry_run.create_alert(mock_alert, mock_people)
|
95
95
|
end
|
@@ -114,12 +114,4 @@ describe Interferon::Destinations::Datadog do
|
|
114
114
|
datadog.remove_alert(mock_alert)
|
115
115
|
end
|
116
116
|
end
|
117
|
-
|
118
|
-
describe '.remove_alert_by_id' do
|
119
|
-
it 'calls dogapi delete_monitor' do
|
120
|
-
expect_any_instance_of(Dogapi::Client).to receive(:delete_monitor)
|
121
|
-
.with(mock_alert_id).and_return([200, ''])
|
122
|
-
datadog.remove_alert_by_id(mock_alert_id)
|
123
|
-
end
|
124
|
-
end
|
125
117
|
end
|
data/spec/lib/interferon_spec.rb
CHANGED
@@ -71,130 +71,135 @@ describe Interferon::Destinations::Datadog do
|
|
71
71
|
end
|
72
72
|
|
73
73
|
context 'dry_run_update_alerts_on_destination' do
|
74
|
-
let(:interferon) { Interferon::Interferon.new(
|
74
|
+
let(:interferon) { Interferon::Interferon.new({ 'processes' => 0 }, true) }
|
75
75
|
|
76
76
|
before do
|
77
77
|
allow_any_instance_of(MockAlert).to receive(:evaluate)
|
78
78
|
allow(dest).to receive(:remove_alert)
|
79
|
-
allow(dest).to receive(:remove_alert_by_id)
|
80
79
|
allow(dest).to receive(:report_stats)
|
81
80
|
end
|
82
81
|
|
83
82
|
it 'does not re-run existing alerts' do
|
84
|
-
|
83
|
+
mock_alerts = mock_existing_alerts
|
85
84
|
expect(dest).not_to receive(:create_alert)
|
86
|
-
expect(dest).not_to receive(:remove_alert_by_id)
|
87
85
|
|
88
|
-
interferon.
|
89
|
-
|
86
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(
|
87
|
+
['host'],
|
88
|
+
[mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) },
|
89
|
+
{}
|
90
90
|
)
|
91
|
+
|
92
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
91
93
|
end
|
92
94
|
|
93
95
|
it 'runs added alerts' do
|
94
|
-
|
95
|
-
|
96
|
+
mock_alerts = mock_existing_alerts
|
97
|
+
alerts = [mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) }
|
98
|
+
alerts << create_test_alert('name3', 'testquery3', '')
|
99
|
+
|
100
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], alerts, {})
|
101
|
+
|
96
102
|
expect(dest).to receive(:create_alert).once.and_call_original
|
97
|
-
expect(dest).to receive(:remove_alert_by_id).with('3').once
|
98
103
|
|
99
|
-
interferon.update_alerts_on_destination(
|
100
|
-
dest, ['host'], [alerts['name1'], alerts['name2'], added], {}
|
101
|
-
)
|
104
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
102
105
|
end
|
103
106
|
|
104
107
|
it 'runs updated alerts' do
|
105
108
|
added = create_test_alert('name1', 'testquery3', '')
|
109
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
106
110
|
expect(dest).to receive(:create_alert).once.and_call_original
|
107
|
-
expect(dest).to receive(:remove_alert_by_id).with('1').once
|
108
111
|
|
109
|
-
interferon.update_alerts_on_destination(dest,
|
112
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
110
113
|
end
|
111
114
|
|
112
|
-
it '
|
113
|
-
expect(dest).
|
115
|
+
it 'does not delete old alerts' do
|
116
|
+
expect(dest).to_not receive(:remove_alert)
|
117
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
114
118
|
|
115
|
-
interferon.update_alerts_on_destination(dest,
|
119
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
116
120
|
end
|
117
121
|
|
118
|
-
it '
|
122
|
+
it 'does not delete duplicate old alerts' do
|
119
123
|
alert1 = mock_alert_json('name1', 'testquery1', '', nil, [1, 2, 3])
|
120
124
|
alert2 = mock_alert_json('name2', 'testquery2', '')
|
121
125
|
existing_alerts = { 'name1' => alert1, 'name2' => alert2 }
|
126
|
+
|
122
127
|
dest = MockDest.new(existing_alerts)
|
123
|
-
allow(dest).to receive(:remove_alert)
|
124
|
-
allow(dest).to receive(:remove_alert_by_id)
|
125
128
|
allow(dest).to receive(:report_stats)
|
126
129
|
|
127
|
-
|
128
|
-
|
130
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
131
|
+
|
132
|
+
expect(dest).to_not receive(:remove_alert)
|
129
133
|
|
130
|
-
interferon.update_alerts_on_destination(dest,
|
134
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
131
135
|
end
|
132
136
|
|
133
|
-
it '
|
137
|
+
it 'does not delete duplicate old alerts when creating new alert' do
|
134
138
|
alert1 = mock_alert_json('name1', 'testquery1', '', nil, [1, 2, 3])
|
135
139
|
alert2 = mock_alert_json('name2', 'testquery2', '')
|
136
140
|
existing_alerts = { 'name1' => alert1, 'name2' => alert2 }
|
141
|
+
|
137
142
|
dest = MockDest.new(existing_alerts)
|
138
|
-
allow(dest).to receive(:remove_alert)
|
139
|
-
allow(dest).to receive(:remove_alert_by_id)
|
140
143
|
allow(dest).to receive(:report_stats)
|
141
144
|
|
142
145
|
added = create_test_alert('name1', 'testquery1', '')
|
146
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
143
147
|
|
144
|
-
|
145
|
-
# during dry run
|
146
|
-
expect(dest).to_not receive(:remove_alert).with(existing_alerts['name1'])
|
147
|
-
expect(dest).to receive(:remove_alert).with(existing_alerts['name2'])
|
148
|
+
expect(dest).to_not receive(:remove_alert)
|
148
149
|
|
149
|
-
interferon.update_alerts_on_destination(dest,
|
150
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
153
154
|
context 'update_alerts_on_destination' do
|
154
|
-
let(:interferon) { Interferon::Interferon.new(
|
155
|
+
let(:interferon) { Interferon::Interferon.new({ 'processes' => 0 }, false) }
|
155
156
|
|
156
157
|
before do
|
157
158
|
allow_any_instance_of(MockAlert).to receive(:evaluate)
|
158
159
|
allow(dest).to receive(:remove_alert)
|
159
|
-
allow(dest).to receive(:remove_alert_by_id)
|
160
160
|
allow(dest).to receive(:report_stats)
|
161
161
|
end
|
162
162
|
|
163
163
|
it 'does not re-run existing alerts' do
|
164
|
-
|
164
|
+
mock_alerts = mock_existing_alerts
|
165
165
|
expect(dest).not_to receive(:create_alert)
|
166
|
-
expect(dest).not_to receive(:remove_alert_by_id)
|
167
166
|
|
168
|
-
interferon.
|
169
|
-
|
167
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(
|
168
|
+
['host'],
|
169
|
+
[mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) },
|
170
|
+
{}
|
170
171
|
)
|
172
|
+
|
173
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
171
174
|
end
|
172
175
|
|
173
176
|
it 'runs added alerts' do
|
174
|
-
|
175
|
-
|
177
|
+
mock_alerts = mock_existing_alerts
|
178
|
+
alerts = [mock_alerts['name1'], mock_alerts['name2']].map { |x| test_alert_from_json(x) }
|
179
|
+
alerts << create_test_alert('name3', 'testquery3', '')
|
180
|
+
|
181
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], alerts, {})
|
182
|
+
|
176
183
|
expect(dest).to receive(:create_alert).once.and_call_original
|
177
|
-
expect(dest).not_to receive(:remove_alert_by_id).with('3')
|
178
184
|
|
179
|
-
interferon.update_alerts_on_destination(
|
180
|
-
dest, ['host'], [alerts['name1'], alerts['name2'], added], {}
|
181
|
-
)
|
185
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
182
186
|
end
|
183
187
|
|
184
188
|
it 'runs updated alerts' do
|
185
189
|
added = create_test_alert('name1', 'testquery3', '')
|
190
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
186
191
|
expect(dest).to receive(:create_alert).once.and_call_original
|
187
|
-
expect(dest).not_to receive(:remove_alert_by_id).with('1')
|
188
192
|
|
189
|
-
interferon.update_alerts_on_destination(dest,
|
193
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
190
194
|
end
|
191
195
|
|
192
196
|
it 'deletes old alerts' do
|
193
197
|
alerts = mock_existing_alerts
|
198
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
194
199
|
expect(dest).to receive(:remove_alert).with(alerts['name1'])
|
195
200
|
expect(dest).to receive(:remove_alert).with(alerts['name2'])
|
196
201
|
|
197
|
-
interferon.update_alerts_on_destination(dest,
|
202
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
198
203
|
end
|
199
204
|
|
200
205
|
it 'deletes duplicate old alerts' do
|
@@ -203,13 +208,14 @@ describe Interferon::Destinations::Datadog do
|
|
203
208
|
existing_alerts = { 'name1' => alert1, 'name2' => alert2 }
|
204
209
|
dest = MockDest.new(existing_alerts)
|
205
210
|
allow(dest).to receive(:remove_alert)
|
206
|
-
allow(dest).to receive(:remove_alert_by_id)
|
207
211
|
allow(dest).to receive(:report_stats)
|
208
212
|
|
213
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [], {})
|
214
|
+
|
209
215
|
expect(dest).to receive(:remove_alert).with(existing_alerts['name1'])
|
210
216
|
expect(dest).to receive(:remove_alert).with(existing_alerts['name2'])
|
211
217
|
|
212
|
-
interferon.update_alerts_on_destination(dest,
|
218
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
213
219
|
end
|
214
220
|
|
215
221
|
it 'deletes duplicate old alerts when creating new alert' do
|
@@ -220,19 +226,21 @@ describe Interferon::Destinations::Datadog do
|
|
220
226
|
allow(dest).to receive(:report_stats)
|
221
227
|
|
222
228
|
added = create_test_alert('name1', 'testquery1', '')
|
229
|
+
alerts_queue, _error_count = interferon.build_alerts_queue(['host'], [added], {})
|
223
230
|
|
224
231
|
expect(dest).to receive(:remove_alert).with(
|
225
232
|
mock_alert_json('name1', 'testquery1', '', nil, [2, 3])
|
226
233
|
)
|
227
234
|
expect(dest).to receive(:remove_alert).with(existing_alerts['name2'])
|
228
235
|
|
229
|
-
interferon.update_alerts_on_destination(dest,
|
236
|
+
interferon.update_alerts_on_destination(dest, alerts_queue)
|
230
237
|
end
|
231
238
|
end
|
232
239
|
|
233
240
|
def mock_existing_alerts
|
234
|
-
|
235
|
-
|
241
|
+
mock_message = Interferon::Destinations::Datadog::ALERT_KEY
|
242
|
+
alert1 = mock_alert_json('name1', 'testquery1', mock_message)
|
243
|
+
alert2 = mock_alert_json('name2', 'testquery2', mock_message)
|
236
244
|
{ 'name1' => alert1, 'name2' => alert2 }
|
237
245
|
end
|
238
246
|
|
@@ -274,6 +282,15 @@ describe Interferon::Destinations::Datadog do
|
|
274
282
|
}
|
275
283
|
end
|
276
284
|
|
285
|
+
def test_alert_from_json(mock_alert_json)
|
286
|
+
create_test_alert(
|
287
|
+
mock_alert_json['name'],
|
288
|
+
mock_alert_json['query'],
|
289
|
+
mock_alert_json['message'].sub(/#{Interferon::Destinations::Datadog::ALERT_KEY}$/, ''),
|
290
|
+
mock_alert_json['options']
|
291
|
+
)
|
292
|
+
end
|
293
|
+
|
277
294
|
def create_test_alert(name, datadog_query, message, options = {})
|
278
295
|
options = DEFAULT_OPTIONS.merge(options)
|
279
296
|
|