interferon 0.1.0 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rubocop.yml +4 -0
- data/.rubocop_todo.yml +83 -0
- data/.travis.yml +4 -1
- data/bin/interferon +10 -9
- data/interferon.gemspec +18 -17
- data/lib/interferon/alert.rb +4 -10
- data/lib/interferon/alert_dsl.rb +12 -7
- data/lib/interferon/destinations/datadog.rb +103 -103
- data/lib/interferon/group_sources/filesystem.rb +5 -5
- data/lib/interferon/host_sources/aws_dynamo.rb +17 -19
- data/lib/interferon/host_sources/aws_elasticache.rb +20 -22
- data/lib/interferon/host_sources/aws_rds.rb +33 -33
- data/lib/interferon/host_sources/optica.rb +12 -10
- data/lib/interferon/host_sources/optica_services.rb +17 -15
- data/lib/interferon/host_sources/test_host_source.rb +1 -1
- data/lib/interferon/loaders.rb +4 -5
- data/lib/interferon/logging.rb +2 -3
- data/lib/interferon/version.rb +1 -1
- data/lib/interferon/work_hours_helper.rb +5 -5
- data/lib/interferon.rb +79 -80
- data/script/pre-commit +15 -20
- data/spec/fixtures/loaders/host_sources/test_host_source.rb +1 -1
- data/spec/fixtures/loaders/test_sources/order_test_source.rb +1 -1
- data/spec/fixtures/loaders/test_sources/test_source.rb +1 -1
- data/spec/fixtures/loaders2/test_sources/order_test_source.rb +1 -1
- data/spec/fixtures/loaders2/test_sources/secondary_source.rb +1 -1
- data/spec/fixtures/loaders2/test_sources/test_source.rb +1 -2
- data/spec/helpers/logging_helper.rb +2 -2
- data/spec/helpers/mock_alert.rb +1 -1
- data/spec/helpers/optica_helper.rb +70 -70
- data/spec/lib/interferon/destinations/datadog_spec.rb +58 -59
- data/spec/lib/interferon/group_sources/filesystem_spec.rb +29 -24
- data/spec/lib/interferon/host_sources/optica_services_spec.rb +11 -9
- data/spec/lib/interferon/host_sources/optica_spec.rb +6 -3
- data/spec/lib/interferon/loaders_spec.rb +19 -15
- data/spec/lib/interferon_spec.rb +61 -59
- data/spec/lib/work_hours_helper_spec.rb +15 -15
- data/spec/spec_helper.rb +1 -1
- metadata +61 -65
data/lib/interferon.rb
CHANGED
@@ -6,7 +6,7 @@ require 'interferon/loaders'
|
|
6
6
|
require 'interferon/alert'
|
7
7
|
require 'interferon/alert_dsl'
|
8
8
|
|
9
|
-
#require 'pry' #uncomment if you're debugging
|
9
|
+
# require 'pry' #uncomment if you're debugging
|
10
10
|
require 'erb'
|
11
11
|
require 'ostruct'
|
12
12
|
require 'parallel'
|
@@ -15,17 +15,16 @@ require 'yaml'
|
|
15
15
|
|
16
16
|
module Interferon
|
17
17
|
class Interferon
|
18
|
-
|
19
18
|
include Logging
|
20
19
|
attr_accessor :host_sources, :destinations, :host_info
|
21
20
|
|
22
|
-
DRY_RUN_ALERTS_NAME_PREFIX = '[-dry-run-]'
|
21
|
+
DRY_RUN_ALERTS_NAME_PREFIX = '[-dry-run-]'.freeze
|
23
22
|
|
24
23
|
# groups_sources is a hash from type => options for each group source
|
25
24
|
# host_sources is a hash from type => options for each host source
|
26
25
|
# destinations is a similar hash from type => options for each alerter
|
27
26
|
def initialize(alerts_repo_path, groups_sources, host_sources, destinations,
|
28
|
-
dry_run=false, processes=nil)
|
27
|
+
dry_run = false, processes = nil)
|
29
28
|
@alerts_repo_path = alerts_repo_path
|
30
29
|
@groups_sources = groups_sources
|
31
30
|
@host_sources = host_sources
|
@@ -36,8 +35,8 @@ module Interferon
|
|
36
35
|
end
|
37
36
|
|
38
37
|
def run(dry_run = false)
|
39
|
-
Signal.trap(
|
40
|
-
log.info
|
38
|
+
Signal.trap('TERM') do
|
39
|
+
log.info 'SIGTERM received. shutting down gracefully...'
|
41
40
|
@request_shutdown = true
|
42
41
|
end
|
43
42
|
@dry_run = dry_run
|
@@ -50,9 +49,7 @@ module Interferon
|
|
50
49
|
|
51
50
|
@destinations.each do |dest|
|
52
51
|
dest['options'] ||= {}
|
53
|
-
if @dry_run
|
54
|
-
dest['options']['dry_run'] = true
|
55
|
-
end
|
52
|
+
dest['options']['dry_run'] = true if @dry_run
|
56
53
|
end
|
57
54
|
|
58
55
|
update_alerts(@destinations, hosts, alerts, groups)
|
@@ -71,7 +68,7 @@ module Interferon
|
|
71
68
|
# validate that alerts path exists
|
72
69
|
path = File.expand_path(File.join(@alerts_repo_path, 'alerts'))
|
73
70
|
abort("no such directory #{path} for reading alert files") \
|
74
|
-
unless Dir.
|
71
|
+
unless Dir.exist?(path)
|
75
72
|
|
76
73
|
Dir.glob(File.join(path, '*.rb')) do |alert_file|
|
77
74
|
break if @request_shutdown
|
@@ -91,7 +88,7 @@ module Interferon
|
|
91
88
|
statsd.gauge('alerts.read.failed', failed)
|
92
89
|
|
93
90
|
abort("failed to read #{failed} alerts") if failed > 0
|
94
|
-
|
91
|
+
alerts
|
95
92
|
end
|
96
93
|
|
97
94
|
def read_groups(sources)
|
@@ -99,7 +96,7 @@ module Interferon
|
|
99
96
|
loader = GroupSourcesLoader.new([@alerts_repo_path])
|
100
97
|
loader.get_all(sources).each do |source|
|
101
98
|
break if @request_shutdown
|
102
|
-
source_groups = source.list_groups
|
99
|
+
source_groups = source.list_groups { groups }
|
103
100
|
|
104
101
|
# add all people to groups
|
105
102
|
people_count = 0
|
@@ -109,16 +106,18 @@ module Interferon
|
|
109
106
|
people_count += people.count
|
110
107
|
end
|
111
108
|
|
112
|
-
log.info "read #{people_count} people in #{source_groups.count} groups
|
109
|
+
log.info "read #{people_count} people in #{source_groups.count} groups" \
|
110
|
+
"from source #{source.class.name}"
|
113
111
|
end
|
114
112
|
|
115
|
-
log.info "total of #{groups.values.flatten.count} people in #{groups.count} groups
|
113
|
+
log.info "total of #{groups.values.flatten.count} people in #{groups.count} groups" \
|
114
|
+
"from #{sources.count} sources"
|
116
115
|
|
117
116
|
statsd.gauge('groups.sources', sources.count)
|
118
117
|
statsd.gauge('groups.count', groups.count)
|
119
118
|
statsd.gauge('groups.people', groups.values.flatten.count)
|
120
119
|
|
121
|
-
|
120
|
+
groups
|
122
121
|
end
|
123
122
|
|
124
123
|
def read_hosts(sources)
|
@@ -131,14 +130,14 @@ module Interferon
|
|
131
130
|
source_hosts = source.list_hosts
|
132
131
|
hosts << source_hosts
|
133
132
|
|
134
|
-
statsd.gauge('hosts.count', source_hosts.count, :
|
133
|
+
statsd.gauge('hosts.count', source_hosts.count, tags: ["source:#{source.class.name}"])
|
135
134
|
log.info "read #{source_hosts.count} hosts from source #{source.class.name}"
|
136
135
|
end
|
137
136
|
|
138
137
|
hosts.flatten!
|
139
138
|
log.info "total of #{hosts.count} entities from #{sources.count} sources"
|
140
139
|
|
141
|
-
|
140
|
+
hosts
|
142
141
|
end
|
143
142
|
|
144
143
|
def update_alerts(destinations, hosts, alerts, groups)
|
@@ -169,16 +168,15 @@ module Interferon
|
|
169
168
|
statsd.histogram(
|
170
169
|
@dry_run ? 'destinations.run_time.dry_run' : 'destinations.run_time',
|
171
170
|
run_time,
|
172
|
-
:
|
173
|
-
|
171
|
+
tags: ["destination:#{dest.class.name}"]
|
172
|
+
)
|
173
|
+
log.info "#{dest.class.name} : run completed in %.2f seconds" % run_time
|
174
174
|
|
175
175
|
# report destination stats
|
176
176
|
dest.report_stats
|
177
177
|
end
|
178
178
|
|
179
|
-
if @dry_run && !dest.api_errors.empty?
|
180
|
-
raise dest.api_errors.to_s
|
181
|
-
end
|
179
|
+
raise dest.api_errors.to_s if @dry_run && !dest.api_errors.empty?
|
182
180
|
end
|
183
181
|
|
184
182
|
def do_dry_run_update(dest, hosts, alerts, existing_alerts, groups)
|
@@ -192,8 +190,8 @@ module Interferon
|
|
192
190
|
end
|
193
191
|
|
194
192
|
alerts_queue = build_alerts_queue(hosts, alerts, groups)
|
195
|
-
updates_queue = alerts_queue.reject do |
|
196
|
-
!Interferon
|
193
|
+
updates_queue = alerts_queue.reject do |_name, alert_people_pair|
|
194
|
+
!Interferon.need_update(dest, alert_people_pair, existing_alerts)
|
197
195
|
end
|
198
196
|
|
199
197
|
# Add dry-run prefix to alerts and delete id to avoid impacting real alerts
|
@@ -206,7 +204,7 @@ module Interferon
|
|
206
204
|
end
|
207
205
|
|
208
206
|
# Build new queue with dry-run prefixes and ensure they are silenced
|
209
|
-
alerts_queue.each do |
|
207
|
+
alerts_queue.each do |_name, alert_people_pair|
|
210
208
|
alert = alert_people_pair[0]
|
211
209
|
dry_run_alert_name = DRY_RUN_ALERTS_NAME_PREFIX + alert['name']
|
212
210
|
alert.change_name(dry_run_alert_name)
|
@@ -216,23 +214,23 @@ module Interferon
|
|
216
214
|
# Create alerts in destination
|
217
215
|
created_alerts = create_alerts(dest, updates_queue)
|
218
216
|
|
219
|
-
# Existing alerts are pruned until all that remains are
|
217
|
+
# Existing alerts are pruned until all that remains are
|
218
|
+
# alerts that aren't being generated anymore
|
220
219
|
to_remove = existing_alerts.dup
|
221
|
-
alerts_queue.each do |
|
220
|
+
alerts_queue.each do |_name, alert_people_pair|
|
222
221
|
alert = alert_people_pair[0]
|
223
222
|
old_alerts = to_remove[alert['name']]
|
224
223
|
|
225
|
-
if
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
end
|
224
|
+
next if old_alerts.nil?
|
225
|
+
if old_alerts['id'].length == 1
|
226
|
+
to_remove.delete(alert['name'])
|
227
|
+
else
|
228
|
+
old_alerts['id'] = old_alerts['id'].drop(1)
|
231
229
|
end
|
232
230
|
end
|
233
231
|
|
234
232
|
# Clean up alerts not longer being generated
|
235
|
-
to_remove.each do |
|
233
|
+
to_remove.each do |_name, alert|
|
236
234
|
break if @request_shutdown
|
237
235
|
dest.remove_alert(alert)
|
238
236
|
end
|
@@ -244,35 +242,34 @@ module Interferon
|
|
244
242
|
dest.remove_alert_by_id(alert_id)
|
245
243
|
end
|
246
244
|
end
|
247
|
-
|
248
245
|
end
|
249
246
|
|
250
247
|
def do_regular_update(dest, hosts, alerts, existing_alerts, groups)
|
251
248
|
alerts_queue = build_alerts_queue(hosts, alerts, groups)
|
252
|
-
updates_queue = alerts_queue.reject do |
|
253
|
-
!Interferon
|
249
|
+
updates_queue = alerts_queue.reject do |_name, alert_people_pair|
|
250
|
+
!Interferon.need_update(dest, alert_people_pair, existing_alerts)
|
254
251
|
end
|
255
252
|
|
256
253
|
# Create alerts in destination
|
257
254
|
create_alerts(dest, updates_queue)
|
258
255
|
|
259
|
-
# Existing alerts are pruned until all that remains are
|
256
|
+
# Existing alerts are pruned until all that remains are
|
257
|
+
# alerts that aren't being generated anymore
|
260
258
|
to_remove = existing_alerts.dup
|
261
|
-
alerts_queue.each do |
|
259
|
+
alerts_queue.each do |_name, alert_people_pair|
|
262
260
|
alert = alert_people_pair[0]
|
263
261
|
old_alerts = to_remove[alert['name']]
|
264
262
|
|
265
|
-
if
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
end
|
263
|
+
next if old_alerts.nil?
|
264
|
+
if old_alerts['id'].length == 1
|
265
|
+
to_remove.delete(alert['name'])
|
266
|
+
else
|
267
|
+
old_alerts['id'] = old_alerts['id'].drop(1)
|
271
268
|
end
|
272
269
|
end
|
273
270
|
|
274
271
|
# Clean up alerts not longer being generated
|
275
|
-
to_remove.each do |
|
272
|
+
to_remove.each do |_name, alert|
|
276
273
|
break if @request_shutdown
|
277
274
|
dest.remove_alert(alert)
|
278
275
|
end
|
@@ -283,10 +280,10 @@ module Interferon
|
|
283
280
|
alerts_to_create = alerts_queue.keys
|
284
281
|
concurrency = dest.concurrency || 10
|
285
282
|
unless @request_shutdown
|
286
|
-
threads = concurrency
|
283
|
+
threads = Array.new(concurrency) do |i|
|
287
284
|
log.info "thread #{i} created"
|
288
285
|
t = Thread.new do
|
289
|
-
while name = alerts_to_create.shift
|
286
|
+
while (name = alerts_to_create.shift)
|
290
287
|
break if @request_shutdown
|
291
288
|
cur_alert, people = alerts_queue[name]
|
292
289
|
log.debug "creating alert for #{cur_alert[:name]}"
|
@@ -308,10 +305,10 @@ module Interferon
|
|
308
305
|
break if @request_shutdown
|
309
306
|
alerts_generated = {}
|
310
307
|
counters = {
|
311
|
-
:
|
312
|
-
:
|
313
|
-
:
|
314
|
-
:
|
308
|
+
errors: 0,
|
309
|
+
evals: 0,
|
310
|
+
applies: 0,
|
311
|
+
hosts: hosts.length,
|
315
312
|
}
|
316
313
|
last_eval_error = nil
|
317
314
|
|
@@ -347,8 +344,8 @@ module Interferon
|
|
347
344
|
end
|
348
345
|
|
349
346
|
# log some of the counters
|
350
|
-
statsd.gauge('alerts.evaluate.errors', counters[:errors], :
|
351
|
-
statsd.gauge('alerts.evaluate.applies', counters[:applies], :
|
347
|
+
statsd.gauge('alerts.evaluate.errors', counters[:errors], tags: ["alert:#{alert}"])
|
348
|
+
statsd.gauge('alerts.evaluate.applies', counters[:applies], tags: ["alert:#{alert}"])
|
352
349
|
|
353
350
|
if counters[:applies] > 0
|
354
351
|
log.info "alert #{alert} applies to #{counters[:applies]} of #{counters[:hosts]} hosts"
|
@@ -359,18 +356,19 @@ module Interferon
|
|
359
356
|
log.error "alert #{alert} failed to evaluate in the context of all hosts!"
|
360
357
|
log.error "last error on alert #{alert}: #{last_eval_error}"
|
361
358
|
|
362
|
-
statsd.gauge('alerts.evaluate.failed_on_all', 1, :
|
363
|
-
log.debug "alert #{alert}:
|
359
|
+
statsd.gauge('alerts.evaluate.failed_on_all', 1, tags: ["alert:#{alert}"])
|
360
|
+
log.debug "alert #{alert}: " \
|
361
|
+
"error #{last_eval_error}\n#{last_eval_error.backtrace.join("\n")}"
|
364
362
|
else
|
365
|
-
statsd.gauge('alerts.evaluate.failed_on_all', 0, :
|
363
|
+
statsd.gauge('alerts.evaluate.failed_on_all', 0, tags: ["alert:#{alert}"])
|
366
364
|
end
|
367
365
|
|
368
366
|
# did the alert apply to any hosts?
|
369
367
|
if counters[:applies] == 0
|
370
|
-
statsd.gauge('alerts.evaluate.never_applies', 1, :
|
368
|
+
statsd.gauge('alerts.evaluate.never_applies', 1, tags: ["alert:#{alert}"])
|
371
369
|
log.warn "alert #{alert} did not apply to any hosts"
|
372
370
|
else
|
373
|
-
statsd.gauge('alerts.evaluate.never_applies', 0, :
|
371
|
+
statsd.gauge('alerts.evaluate.never_applies', 0, tags: ["alert:#{alert}"])
|
374
372
|
end
|
375
373
|
alerts_generated
|
376
374
|
end
|
@@ -403,36 +401,37 @@ module Interferon
|
|
403
401
|
alert, people = alert_people_pair
|
404
402
|
|
405
403
|
prev_alert = {
|
406
|
-
:
|
407
|
-
:
|
408
|
-
:
|
409
|
-
:
|
410
|
-
:
|
411
|
-
:
|
412
|
-
:
|
413
|
-
:
|
414
|
-
:
|
404
|
+
monitor_type: normalize_monitor_type(alert_api_json['type']),
|
405
|
+
query: alert_api_json['query'].strip,
|
406
|
+
message: alert_api_json['message'].strip,
|
407
|
+
evaluation_delay: alert_api_json['options']['evaluation_delay'],
|
408
|
+
notify_no_data: alert_api_json['options']['notify_no_data'],
|
409
|
+
notify_audit: alert_api_json['options']['notify_audit'],
|
410
|
+
no_data_timeframe: alert_api_json['options']['no_data_timeframe'],
|
411
|
+
silenced: alert_api_json['options']['silenced'],
|
412
|
+
thresholds: alert_api_json['options']['thresholds'],
|
413
|
+
timeout_h: alert_api_json['options']['timeout_h'],
|
415
414
|
}
|
416
415
|
|
417
416
|
new_alert = {
|
418
|
-
:
|
419
|
-
:
|
420
|
-
:
|
421
|
-
:
|
422
|
-
:
|
423
|
-
:
|
424
|
-
:
|
425
|
-
:
|
426
|
-
:
|
417
|
+
monitor_type: normalize_monitor_type(alert['monitor_type']),
|
418
|
+
query: alert['metric']['datadog_query'],
|
419
|
+
message: dest.generate_message(alert['message'], people).strip,
|
420
|
+
evaluation_delay: alert['evaluation_delay'],
|
421
|
+
notify_no_data: alert['notify_no_data'],
|
422
|
+
notify_audit: alert['notify']['audit'],
|
423
|
+
no_data_timeframe: alert['no_data_timeframe'],
|
424
|
+
silenced: alert['silenced'],
|
425
|
+
thresholds: alert['thresholds'],
|
426
|
+
timeout_h: alert['timeout_h'],
|
427
427
|
}
|
428
428
|
|
429
|
-
|
430
|
-
|
429
|
+
unless alert['require_full_window'].nil?
|
430
|
+
prev_alert[:require_full_window] = alert_api_json['options']['require_full_window']
|
431
431
|
new_alert[:require_full_window] = alert['require_full_window']
|
432
432
|
end
|
433
433
|
|
434
434
|
prev_alert == new_alert
|
435
435
|
end
|
436
|
-
|
437
436
|
end
|
438
437
|
end
|
data/script/pre-commit
CHANGED
@@ -11,22 +11,20 @@ reasons = []
|
|
11
11
|
diff = `git diff-index --name-status --cached HEAD`
|
12
12
|
files = diff.split("\n").map(&:split)
|
13
13
|
|
14
|
-
added_files = files
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
added_files = files
|
15
|
+
.select { |(status, _name)| status == 'A' }
|
16
|
+
.map { |(_status, name)| name }
|
17
|
+
.compact
|
18
18
|
|
19
|
-
modified_files = files
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
modified_files = files
|
20
|
+
.select { |(status, _name)| status != 'D' } # ignore deleted files
|
21
|
+
.map { |(_status, name)| name }
|
22
|
+
.compact
|
23
23
|
|
24
24
|
# check for large files
|
25
25
|
added_files.each do |file|
|
26
26
|
size_in_kb = `du -k #{file}`.strip.split.first.to_i
|
27
|
-
if size_in_kb > 1024
|
28
|
-
reasons << "#{file} is greater than 1 MB in size"
|
29
|
-
end
|
27
|
+
reasons << "#{file} is greater than 1 MB in size" if size_in_kb > 1024
|
30
28
|
end
|
31
29
|
|
32
30
|
# Make sure Gemfile.lock was updated if Gemfile changed.
|
@@ -35,25 +33,22 @@ if modified_files.include?('Gemfile') && !modified_files.include?('Gemfile.lock'
|
|
35
33
|
end
|
36
34
|
|
37
35
|
# Check Ruby syntax
|
38
|
-
modified_files.select {|f| f.match
|
39
|
-
|
40
|
-
|
41
|
-
if $? != 0
|
42
|
-
reasons << "ruby file #{file} failed syntax check"
|
43
|
-
end
|
36
|
+
modified_files.select { |f| f.match(/\.((rb)|(rake))$/) }.each do |file|
|
37
|
+
`ruby -c #{file} 2>&1`
|
38
|
+
reasons << "ruby file #{file} failed syntax check" if $CHILD_STATUS != 0
|
44
39
|
end
|
45
40
|
|
46
41
|
# Check JSON syntax
|
47
|
-
modified_files.select {|f| f.match
|
42
|
+
modified_files.select { |f| f.match(/(\.json)$/) }.each do |file|
|
48
43
|
begin
|
49
44
|
JSON.parse(File.read(file))
|
50
45
|
rescue StandardError => e
|
51
|
-
reasons << "JSON file #{file} contains invalid JSON"
|
46
|
+
reasons << "JSON file #{file} contains invalid JSON: #{e}"
|
52
47
|
end
|
53
48
|
end
|
54
49
|
|
55
50
|
# Check YAML syntax
|
56
|
-
modified_files.select {|f| f.match
|
51
|
+
modified_files.select { |f| f.match(/(\.yaml)$/) }.each do |file|
|
57
52
|
begin
|
58
53
|
YAML.parse(File.read(file))
|
59
54
|
rescue YAML::SyntaxError => e
|