interferon 0.0.12 → 0.0.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 35b5382f5aabd274b548257bcc5f227faf1b951e
4
- data.tar.gz: abacd2609b40bf28b2783bbd21b24c384f991188
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MDdjYjBjNDk1OTJkMThiNjE5ZmZiNTgzNTI5NGU1ZDMzMzI5MGQ2ZQ==
5
+ data.tar.gz: !binary |-
6
+ NDIxMWM1MzQ2YTFlYjE4ZjIyOTUyMjk0ZDQ4ZjZhYTkzNWVlZDExZA==
5
7
  SHA512:
6
- metadata.gz: b215a4e355f66daa582d65b521fe0eef5842eb795551df1998844d82cb1892a64b7a95e3b60fff1ec2992eb2e88de2f584c274c9bd5570b6f0ae4026b05eeb5a
7
- data.tar.gz: 2bb6ee4093fe2bbe46d811a3cb79a357066073727a699d773b306a5355fd8f7b9d06a4f2bdb51082ac6570a3166820ce7f1b88d63f9fb76d6d032fc8b97073b0
8
+ metadata.gz: !binary |-
9
+ YmU0MWExYTUxNDY3ODcwNWI2OWNlZTliOWVjNWVlYjMwNzkyMzdiNWIzM2Vm
10
+ YzQ5NzY1MTQxNzkyMWIwYzJlM2I5MDc3ZTNlODFjOGVkZmU5ZmZmZjRmNGJm
11
+ YzYyZTdmODhkMjdmNjJmODUwODY5ODcwNzQ4OTliYjA5NzQ0Mzg=
12
+ data.tar.gz: !binary |-
13
+ YzkxYjVkNmFhNzQwNjdiZDdmYjY4ZmQ2ODU0MmZiMzVmODQ5NzJkN2RkMTNh
14
+ Nzc1YzY0ODA5NmU5ZDk3YzYxZmZjZTlhODg5YjdhMWU4OGNkNGM1Yzg2NTQx
15
+ MWE5ODhmZWU5YWJkNzcxNTlkMGM2YmUxYWVlYTZiOWYyZTNlZWY=
data/interferon.gemspec CHANGED
@@ -6,8 +6,8 @@ require 'interferon/version'
6
6
  Gem::Specification.new do |gem|
7
7
  gem.name = "interferon"
8
8
  gem.version = Interferon::VERSION
9
- gem.authors = ["Igor Serebryany"]
10
- gem.email = ["igor.serebryany@airbnb.com"]
9
+ gem.authors = ["Igor Serebryany", "Jimmy Ngo"]
10
+ gem.email = ["igor.serebryany@airbnb.com", "jimmy.ngo@airbnb.com"]
11
11
  gem.description = %q{: Store metrics alerts in code!}
12
12
  gem.summary = %q{: Store metrics alerts in code!}
13
13
  gem.homepage = "https://www.github.com/airbnb/interferon"
@@ -20,6 +20,10 @@ Gem::Specification.new do |gem|
20
20
  gem.add_runtime_dependency "dogapi", "~> 1.11", ">= 1.11.1"
21
21
  gem.add_runtime_dependency "aws-sdk", "~> 1.35", ">= 1.35.1"
22
22
  gem.add_runtime_dependency "dogstatsd-ruby", "~> 1.4", ">= 1.4.1"
23
+ gem.add_runtime_dependency "diffy", "~> 3.1.0", ">= 3.1.0"
24
+ gem.add_runtime_dependency "parallel", "~> 1.9", ">= 1.9.0"
25
+ gem.add_runtime_dependency "nokogiri", "< 1.7.0"
26
+ gem.add_runtime_dependency "tzinfo", "~> 1.2.2", ">= 1.2.2"
23
27
 
24
28
  gem.add_development_dependency "rspec", "~> 3.2"
25
29
  gem.add_development_dependency "pry", "~> 0.10"
@@ -30,6 +30,14 @@ module Interferon
30
30
  @dsl.name(name)
31
31
  end
32
32
 
33
+ def silence
34
+ unless @dsl
35
+ raise "This alert has not yet been evaluated"
36
+ end
37
+
38
+ @dsl.silenced(true)
39
+ end
40
+
33
41
  def [](attr)
34
42
  unless @dsl
35
43
  raise "This alert has not yet been evaluated"
@@ -1,3 +1,4 @@
1
+ require 'interferon/work_hours_helper'
1
2
 
2
3
  module Interferon
3
4
  module DSLMixin
@@ -47,6 +48,15 @@ module Interferon
47
48
  get_or_set(:@silenced_until, v && Time.parse(v), block, Time.at(0))
48
49
  end
49
50
 
51
+ def is_work_hour?(args = {})
52
+ # Args can contain
53
+ # :hours => range of work hours (0 to 23h), for example (9..16)
54
+ # :days => range of week days (0 = sunday), for example (1..5) (Monday to Friday)
55
+ # :timezone => example 'America/Los_Angeles'
56
+ # 9 to 5 Monday to Friday in PST is the default
57
+ WorkHoursHelper.is_work_hour?(Time.now.utc, args)
58
+ end
59
+
50
60
  def notify_no_data(v = nil, &block)
51
61
  get_or_set(:@notify_no_data, v, block, false)
52
62
  end
@@ -1,6 +1,9 @@
1
+ require 'diffy'
1
2
  require 'dogapi'
2
3
  require 'set'
3
4
 
5
+ Diffy::Diff.default_format = :text
6
+
4
7
  module Interferon::Destinations
5
8
  class Datadog
6
9
  include ::Interferon::Logging
@@ -32,14 +35,18 @@ module Interferon::Destinations
32
35
  @dog = Dogapi::Client.new(*args)
33
36
 
34
37
  @existing_alerts = nil
38
+ @dry_run = options['dry_run']
35
39
 
36
40
  # create datadog alerts 10 at a time
37
41
  @concurrency = 10
38
42
 
39
43
  @stats = {
40
44
  :alerts_created => 0,
45
+ :alerts_to_be_created => 0,
41
46
  :alerts_updated => 0,
47
+ :alerts_to_be_updated => 0,
42
48
  :alerts_deleted => 0,
49
+ :alerts_to_be_deleted => 0,
43
50
  :alerts_silenced => 0,
44
51
  :api_successes => 0,
45
52
  :api_client_errors => 0,
@@ -52,6 +59,10 @@ module Interferon::Destinations
52
59
  @api_errors ||= []
53
60
  end
54
61
 
62
+ def generate_message(message, people)
63
+ [message, ALERT_KEY, people.map{ |p| "@#{p}" }].flatten.join("\n")
64
+ end
65
+
55
66
  def existing_alerts
56
67
  unless @existing_alerts
57
68
  resp = @dog.get_all_alerts()
@@ -64,7 +75,16 @@ module Interferon::Destinations
64
75
  alerts = resp[1]['alerts']
65
76
 
66
77
  # key alerts by name
67
- @existing_alerts = Hash[alerts.map{ |a| [a['name'], a] }]
78
+ @existing_alerts = {}
79
+ alerts.each do |alert|
80
+ existing_alert = @existing_alerts[alert['name']]
81
+ if existing_alert.nil?
82
+ alert['id'] = [alert['id']]
83
+ @existing_alerts[alert['name']] = alert
84
+ else
85
+ existing_alert['id'] << alert['id']
86
+ end
87
+ end
68
88
 
69
89
  # count how many are manually created
70
90
  @stats[:manually_created_alerts] = \
@@ -81,21 +101,22 @@ module Interferon::Destinations
81
101
 
82
102
  def create_alert(alert, people)
83
103
  # create a message which includes the notifications
84
- message = [
85
- alert['message'],
86
- ALERT_KEY,
87
- people.map{ |p| "@#{p}" }
88
- ].flatten.join("\n")
104
+ message = generate_message(alert['message'], people)
89
105
 
90
106
  # create the hash of options to send to datadog
91
107
  alert_opts = {
92
108
  :name => alert['name'],
93
109
  :message => message,
94
- :silenced => alert['silenced'] || alert['silenced_until'] > Time.now,
110
+ :silenced => false,
95
111
  :notify_no_data => alert['notify_no_data'],
96
112
  :timeout_h => nil,
97
113
  }
98
114
 
115
+ # Set alert to be silenced if there is a silenced set or silenced_until set
116
+ if alert['silenced'] || alert['silenced_until'] > Time.now
117
+ alert_opts[:silenced] = true
118
+ end
119
+
99
120
  # allow an optional timeframe for "no data" alerts to be specified
100
121
  # (this feature is supported, even though it's not documented)
101
122
  alert_opts[:no_data_timeframe] = alert['no_data_timeframe'] if alert['no_data_timeframe']
@@ -103,10 +124,15 @@ module Interferon::Destinations
103
124
  # timeout is in seconds, but set it to 1 hour at least
104
125
  alert_opts[:timeout_h] = [1, (alert['timeout'].to_i / 3600)].max if alert['timeout']
105
126
 
127
+ datadog_query = alert['metric']['datadog_query'].strip
128
+ existing_alert = existing_alerts[alert['name']]
129
+
106
130
  # new alert, create it
107
- if existing_alerts[alert['name']].nil?
131
+ if existing_alert.nil?
108
132
  action = :creating
109
- log.debug("new alert #{alert['name']}")
133
+ @stats[:alerts_to_be_created] += 1
134
+ new_alert_text = "Query: #{datadog_query} Message: #{message.split().join(' ')}"
135
+ log.info("creating new alert #{alert['name']}: #{new_alert_text}")
110
136
 
111
137
  resp = @dog.alert(
112
138
  alert['metric']['datadog_query'].strip,
@@ -116,66 +142,67 @@ module Interferon::Destinations
116
142
  # existing alert, modify it
117
143
  else
118
144
  action = :updating
119
- id = existing_alerts[alert['name']]['id']
120
- log.debug("updating existing alert #{id} (#{alert['name']})")
121
-
122
- resp = @dog.update_alert(
123
- id,
124
- alert['metric']['datadog_query'].strip,
125
- alert_opts
126
- )
145
+ @stats[:alerts_to_be_updated] += 1
146
+ id = existing_alert['id'][0]
147
+
148
+ new_alert_text = "Query:\n#{datadog_query}\nMessage:\n#{message}"
149
+ existing_alert_text = "Query:\n#{existing_alert['query']}\nMessage:\n#{existing_alert['message']}\n"
150
+ diff = Diffy::Diff.new(existing_alert_text, new_alert_text, :context=>1)
151
+ log.info("updating existing alert #{id} (#{alert['name']}): #{diff}")
152
+
153
+ if @dry_run
154
+ resp = @dog.alert(
155
+ alert['metric']['datadog_query'].strip,
156
+ alert_opts,
157
+ )
158
+ else
159
+ resp = @dog.update_alert(
160
+ id,
161
+ alert['metric']['datadog_query'].strip,
162
+ alert_opts
163
+ )
164
+ # Unmute existing alerts that have been unsilenced.
165
+ # Datadog does not allow updates to silencing via the update_alert API call.
166
+ if existing_alert['silenced'] && !alert_opts[:silenced]
167
+ @dog.unmute_monitor(id)
168
+ end
169
+ end
127
170
  end
128
171
 
129
172
  # log whenever we've encountered errors
130
173
  code = resp[0].to_i
131
- if code != 200
132
- api_errors << "#{code.to_s} on alert #{alert['name']}"
133
- end
134
-
135
- # client error
136
- if code == 400
137
- statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
138
- statsd.gauge('datadog.api.client_error', 1, :tags => ["alert:#{alert}"])
139
- statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
140
-
141
- @stats[:api_client_errors] += 1
142
- log.error("client error while #{action} alert '#{alert['name']}';" \
143
- " query was '#{alert['metric']['datadog_query'].strip}'" \
144
- " response was #{resp[0]}:'#{resp[1].inspect}'")
145
-
146
- # unknown (prob. datadog) error:
147
- elsif code >= 400 || code == -1
148
- statsd.gauge('datadog.api.unknown_error', 1, :tags => ["alert:#{alert}"])
149
- statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
150
- statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
151
-
152
- @stats[:api_unknown_errors] += 1
153
- log.error("unknown error while #{action} alert '#{alert['name']}':" \
154
- " query was '#{alert['metric']['datadog_query'].strip}'" \
155
- " response was #{resp[0]}:'#{resp[1].inspect}'")
174
+ log_datadog_response_code(resp, code, action, alert)
156
175
 
157
176
  # assume this was a success
158
- else
159
- statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
160
- statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
161
- statsd.gauge('datadog.api.success', 1, :tags => ["alert:#{alert}"])
162
-
163
- @stats[:api_successes] += 1
177
+ if !(code >= 400 || code == -1)
178
+ # assume this was a success
164
179
  @stats[:alerts_created] += 1 if action == :creating
165
180
  @stats[:alerts_updated] += 1 if action == :updating
166
181
  @stats[:alerts_silenced] += 1 if alert_opts[:silenced]
167
182
  end
168
183
 
169
- id = resp[1].nil? ? nil : resp[1]['id']
184
+ id = resp[1].nil? ? nil : [resp[1]['id']]
170
185
  # lets key alerts by their name
171
186
  return [alert['name'], id]
172
187
  end
173
188
 
174
189
  def remove_alert(alert)
175
190
  if alert['message'].include?(ALERT_KEY)
176
- log.debug("deleting alert #{alert['id']} (#{alert['name']})")
177
- @dog.delete_alert(alert['id'])
178
- @stats[:alerts_deleted] += 1
191
+ @stats[:alerts_to_be_deleted] += 1
192
+ log.info("deleting alert: #{alert['name']}")
193
+
194
+ if !@dry_run
195
+ alert['id'].each do |alert_id|
196
+ resp = @dog.delete_alert(alert_id)
197
+ code = resp[0].to_i
198
+ log_datadog_response_code(resp, code, :deleting)
199
+
200
+ if !(code >= 300 || code == -1)
201
+ # assume this was a success
202
+ @stats[:alerts_deleted] += 1
203
+ end
204
+ end
205
+ end
179
206
  else
180
207
  log.warn("not deleting manually-created alert #{alert['id']} (#{alert['name']})")
181
208
  end
@@ -186,17 +213,62 @@ module Interferon::Destinations
186
213
  statsd.gauge("datadog.#{k}", v)
187
214
  end
188
215
 
189
- log.info "datadog: created %d updated %d and deleted %d alerts" % [
216
+ log.info "datadog: successfully created (%d/%d), updated (%d/%d), and deleted (%d/%d) alerts" % [
190
217
  @stats[:alerts_created],
218
+ @stats[:alerts_to_be_created],
191
219
  @stats[:alerts_updated],
220
+ @stats[:alerts_to_be_updated],
192
221
  @stats[:alerts_deleted],
222
+ @stats[:alerts_to_be_deleted],
193
223
  ]
194
224
  end
195
225
 
196
226
  def remove_alert_by_id(alert_id)
227
+ # This should only be used by dry-run to clean up created dry-run alerts
197
228
  log.debug("deleting alert, id: #{alert_id}")
198
- @dog.delete_alert(alert_id)
199
- @stats[:alerts_deleted] += 1
229
+ resp = @dog.delete_alert(alert_id)
230
+ code = resp[0].to_i
231
+ log_datadog_response_code(resp, code, :deleting)
232
+ end
233
+
234
+ def log_datadog_response_code(resp, code, action, alert=nil)
235
+ # log whenever we've encountered errors
236
+ if code != 200 && !alert.nil?
237
+ api_errors << "#{code.to_s} on alert #{alert['name']}"
238
+ end
239
+
240
+ # client error
241
+ if code == 400
242
+ @stats[:api_client_errors] += 1
243
+ if !alert.nil?
244
+ statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
245
+ statsd.gauge('datadog.api.client_error', 1, :tags => ["alert:#{alert}"])
246
+ statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
247
+ log.error("client error while #{action} alert '#{alert['name']}';" \
248
+ " query was '#{alert['metric']['datadog_query'].strip}'" \
249
+ " response was #{resp[0]}:'#{resp[1].inspect}'")
250
+ end
251
+
252
+ # unknown (prob. datadog) error:
253
+ elsif code > 400 || code == -1
254
+ @stats[:api_unknown_errors] += 1
255
+ if !alert.nil?
256
+ statsd.gauge('datadog.api.unknown_error', 1, :tags => ["alert:#{alert}"])
257
+ statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
258
+ statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
259
+ log.error("unknown error while #{action} alert '#{alert['name']}':" \
260
+ " query was '#{alert['metric']['datadog_query'].strip}'" \
261
+ " response was #{resp[0]}:'#{resp[1].inspect}'")
262
+ end
263
+ else
264
+ @stats[:api_successes] += 1
265
+ if !alert.nil?
266
+ statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
267
+ statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
268
+ statsd.gauge('datadog.api.success', 1, :tags => ["alert:#{alert}"])
269
+ end
270
+ end
200
271
  end
272
+
201
273
  end
202
274
  end
@@ -1,3 +1,4 @@
1
+ include ::Interferon::Logging
1
2
 
2
3
  module Interferon::GroupSources
3
4
  class Filesystem
@@ -10,6 +11,7 @@ module Interferon::GroupSources
10
11
 
11
12
  def list_groups
12
13
  groups = {}
14
+ aliases = {}
13
15
 
14
16
  @paths.each do |path|
15
17
  path = File.expand_path(path)
@@ -18,7 +20,7 @@ module Interferon::GroupSources
18
20
  next
19
21
  end
20
22
 
21
- Dir.glob(File.join(path, '*.{json,yml,yaml}')) do |group_file|
23
+ Dir.glob(File.join(path, '*.{json,yml,yaml}')).each do |group_file|
22
24
  begin
23
25
  group = YAML::parse(File.read(group_file))
24
26
  rescue YAML::SyntaxError => e
@@ -27,11 +29,25 @@ module Interferon::GroupSources
27
29
  log.warn "error reading group file #{group_file}: #{e}"
28
30
  else
29
31
  group = group.to_ruby
30
- groups[group['name']] = group['people'] || []
32
+ if group['people']
33
+ groups[group['name']] = group['people'] || []
34
+ elsif group['alias_for']
35
+ aliases[group['name']] = {:group => group['alias_for'], :group_file => group_file}
36
+ end
31
37
  end
32
38
  end
33
39
  end
34
40
 
41
+ aliases.each do |aliased_group, group_info|
42
+ group = group_info[:group]
43
+ group_file = group_info[:group_file]
44
+ if groups.include?(group)
45
+ groups[aliased_group] = groups[group]
46
+ else
47
+ log.warn "Alias not found for #{group} but used by #{aliased_group} in #{group_file}"
48
+ end
49
+ end
50
+
35
51
  return groups
36
52
  end
37
53
  end
@@ -1,3 +1,3 @@
1
1
  module Interferon
2
- VERSION = "0.0.12"
2
+ VERSION = "0.0.19"
3
3
  end
@@ -0,0 +1,21 @@
1
+ require 'tzinfo'
2
+
3
+ module Interferon
4
+ class WorkHoursHelper
5
+ DEFAULT_WORK_DAYS = (1..5)
6
+ DEFAULT_WORK_HOURS = (9..16)
7
+ DEFAULT_WORK_TIMEZONE = 'America/Los_Angeles'
8
+ DEFAULT_WORK_ARGS = {
9
+ :hours => DEFAULT_WORK_HOURS,
10
+ :days => DEFAULT_WORK_DAYS,
11
+ :timezone => DEFAULT_WORK_TIMEZONE,
12
+ }.freeze
13
+
14
+ def self.is_work_hour?(time, args = {})
15
+ args = args.merge(DEFAULT_WORK_ARGS)
16
+ tz = TZInfo::Timezone.get args[:timezone]
17
+ time_in_tz = time + tz.period_for_utc(time).utc_offset
18
+ return args[:days].include?(time_in_tz.wday) && args[:hours].include?(time_in_tz.hour)
19
+ end
20
+ end
21
+ end