interferon 0.0.12 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 35b5382f5aabd274b548257bcc5f227faf1b951e
4
- data.tar.gz: abacd2609b40bf28b2783bbd21b24c384f991188
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MDdjYjBjNDk1OTJkMThiNjE5ZmZiNTgzNTI5NGU1ZDMzMzI5MGQ2ZQ==
5
+ data.tar.gz: !binary |-
6
+ NDIxMWM1MzQ2YTFlYjE4ZjIyOTUyMjk0ZDQ4ZjZhYTkzNWVlZDExZA==
5
7
  SHA512:
6
- metadata.gz: b215a4e355f66daa582d65b521fe0eef5842eb795551df1998844d82cb1892a64b7a95e3b60fff1ec2992eb2e88de2f584c274c9bd5570b6f0ae4026b05eeb5a
7
- data.tar.gz: 2bb6ee4093fe2bbe46d811a3cb79a357066073727a699d773b306a5355fd8f7b9d06a4f2bdb51082ac6570a3166820ce7f1b88d63f9fb76d6d032fc8b97073b0
8
+ metadata.gz: !binary |-
9
+ YmU0MWExYTUxNDY3ODcwNWI2OWNlZTliOWVjNWVlYjMwNzkyMzdiNWIzM2Vm
10
+ YzQ5NzY1MTQxNzkyMWIwYzJlM2I5MDc3ZTNlODFjOGVkZmU5ZmZmZjRmNGJm
11
+ YzYyZTdmODhkMjdmNjJmODUwODY5ODcwNzQ4OTliYjA5NzQ0Mzg=
12
+ data.tar.gz: !binary |-
13
+ YzkxYjVkNmFhNzQwNjdiZDdmYjY4ZmQ2ODU0MmZiMzVmODQ5NzJkN2RkMTNh
14
+ Nzc1YzY0ODA5NmU5ZDk3YzYxZmZjZTlhODg5YjdhMWU4OGNkNGM1Yzg2NTQx
15
+ MWE5ODhmZWU5YWJkNzcxNTlkMGM2YmUxYWVlYTZiOWYyZTNlZWY=
data/interferon.gemspec CHANGED
@@ -6,8 +6,8 @@ require 'interferon/version'
6
6
  Gem::Specification.new do |gem|
7
7
  gem.name = "interferon"
8
8
  gem.version = Interferon::VERSION
9
- gem.authors = ["Igor Serebryany"]
10
- gem.email = ["igor.serebryany@airbnb.com"]
9
+ gem.authors = ["Igor Serebryany", "Jimmy Ngo"]
10
+ gem.email = ["igor.serebryany@airbnb.com", "jimmy.ngo@airbnb.com"]
11
11
  gem.description = %q{: Store metrics alerts in code!}
12
12
  gem.summary = %q{: Store metrics alerts in code!}
13
13
  gem.homepage = "https://www.github.com/airbnb/interferon"
@@ -20,6 +20,10 @@ Gem::Specification.new do |gem|
20
20
  gem.add_runtime_dependency "dogapi", "~> 1.11", ">= 1.11.1"
21
21
  gem.add_runtime_dependency "aws-sdk", "~> 1.35", ">= 1.35.1"
22
22
  gem.add_runtime_dependency "dogstatsd-ruby", "~> 1.4", ">= 1.4.1"
23
+ gem.add_runtime_dependency "diffy", "~> 3.1.0", ">= 3.1.0"
24
+ gem.add_runtime_dependency "parallel", "~> 1.9", ">= 1.9.0"
25
+ gem.add_runtime_dependency "nokogiri", "< 1.7.0"
26
+ gem.add_runtime_dependency "tzinfo", "~> 1.2.2", ">= 1.2.2"
23
27
 
24
28
  gem.add_development_dependency "rspec", "~> 3.2"
25
29
  gem.add_development_dependency "pry", "~> 0.10"
@@ -30,6 +30,14 @@ module Interferon
30
30
  @dsl.name(name)
31
31
  end
32
32
 
33
+ def silence
34
+ unless @dsl
35
+ raise "This alert has not yet been evaluated"
36
+ end
37
+
38
+ @dsl.silenced(true)
39
+ end
40
+
33
41
  def [](attr)
34
42
  unless @dsl
35
43
  raise "This alert has not yet been evaluated"
@@ -1,3 +1,4 @@
1
+ require 'interferon/work_hours_helper'
1
2
 
2
3
  module Interferon
3
4
  module DSLMixin
@@ -47,6 +48,15 @@ module Interferon
47
48
  get_or_set(:@silenced_until, v && Time.parse(v), block, Time.at(0))
48
49
  end
49
50
 
51
+ def is_work_hour?(args = {})
52
+ # Args can contain
53
+ # :hours => range of work hours (0 to 23h), for example (9..16)
54
+ # :days => range of week days (0 = sunday), for example (1..5) (Monday to Friday)
55
+ # :timezone => example 'America/Los_Angeles'
56
+ # 9 to 5 Monday to Friday in PST is the default
57
+ WorkHoursHelper.is_work_hour?(Time.now.utc, args)
58
+ end
59
+
50
60
  def notify_no_data(v = nil, &block)
51
61
  get_or_set(:@notify_no_data, v, block, false)
52
62
  end
@@ -1,6 +1,9 @@
1
+ require 'diffy'
1
2
  require 'dogapi'
2
3
  require 'set'
3
4
 
5
+ Diffy::Diff.default_format = :text
6
+
4
7
  module Interferon::Destinations
5
8
  class Datadog
6
9
  include ::Interferon::Logging
@@ -32,14 +35,18 @@ module Interferon::Destinations
32
35
  @dog = Dogapi::Client.new(*args)
33
36
 
34
37
  @existing_alerts = nil
38
+ @dry_run = options['dry_run']
35
39
 
36
40
  # create datadog alerts 10 at a time
37
41
  @concurrency = 10
38
42
 
39
43
  @stats = {
40
44
  :alerts_created => 0,
45
+ :alerts_to_be_created => 0,
41
46
  :alerts_updated => 0,
47
+ :alerts_to_be_updated => 0,
42
48
  :alerts_deleted => 0,
49
+ :alerts_to_be_deleted => 0,
43
50
  :alerts_silenced => 0,
44
51
  :api_successes => 0,
45
52
  :api_client_errors => 0,
@@ -52,6 +59,10 @@ module Interferon::Destinations
52
59
  @api_errors ||= []
53
60
  end
54
61
 
62
+ def generate_message(message, people)
63
+ [message, ALERT_KEY, people.map{ |p| "@#{p}" }].flatten.join("\n")
64
+ end
65
+
55
66
  def existing_alerts
56
67
  unless @existing_alerts
57
68
  resp = @dog.get_all_alerts()
@@ -64,7 +75,16 @@ module Interferon::Destinations
64
75
  alerts = resp[1]['alerts']
65
76
 
66
77
  # key alerts by name
67
- @existing_alerts = Hash[alerts.map{ |a| [a['name'], a] }]
78
+ @existing_alerts = {}
79
+ alerts.each do |alert|
80
+ existing_alert = @existing_alerts[alert['name']]
81
+ if existing_alert.nil?
82
+ alert['id'] = [alert['id']]
83
+ @existing_alerts[alert['name']] = alert
84
+ else
85
+ existing_alert['id'] << alert['id']
86
+ end
87
+ end
68
88
 
69
89
  # count how many are manually created
70
90
  @stats[:manually_created_alerts] = \
@@ -81,21 +101,22 @@ module Interferon::Destinations
81
101
 
82
102
  def create_alert(alert, people)
83
103
  # create a message which includes the notifications
84
- message = [
85
- alert['message'],
86
- ALERT_KEY,
87
- people.map{ |p| "@#{p}" }
88
- ].flatten.join("\n")
104
+ message = generate_message(alert['message'], people)
89
105
 
90
106
  # create the hash of options to send to datadog
91
107
  alert_opts = {
92
108
  :name => alert['name'],
93
109
  :message => message,
94
- :silenced => alert['silenced'] || alert['silenced_until'] > Time.now,
110
+ :silenced => false,
95
111
  :notify_no_data => alert['notify_no_data'],
96
112
  :timeout_h => nil,
97
113
  }
98
114
 
115
+ # Set alert to be silenced if there is a silenced set or silenced_until set
116
+ if alert['silenced'] || alert['silenced_until'] > Time.now
117
+ alert_opts[:silenced] = true
118
+ end
119
+
99
120
  # allow an optional timeframe for "no data" alerts to be specified
100
121
  # (this feature is supported, even though it's not documented)
101
122
  alert_opts[:no_data_timeframe] = alert['no_data_timeframe'] if alert['no_data_timeframe']
@@ -103,10 +124,15 @@ module Interferon::Destinations
103
124
  # timeout is in seconds, but set it to 1 hour at least
104
125
  alert_opts[:timeout_h] = [1, (alert['timeout'].to_i / 3600)].max if alert['timeout']
105
126
 
127
+ datadog_query = alert['metric']['datadog_query'].strip
128
+ existing_alert = existing_alerts[alert['name']]
129
+
106
130
  # new alert, create it
107
- if existing_alerts[alert['name']].nil?
131
+ if existing_alert.nil?
108
132
  action = :creating
109
- log.debug("new alert #{alert['name']}")
133
+ @stats[:alerts_to_be_created] += 1
134
+ new_alert_text = "Query: #{datadog_query} Message: #{message.split().join(' ')}"
135
+ log.info("creating new alert #{alert['name']}: #{new_alert_text}")
110
136
 
111
137
  resp = @dog.alert(
112
138
  alert['metric']['datadog_query'].strip,
@@ -116,66 +142,67 @@ module Interferon::Destinations
116
142
  # existing alert, modify it
117
143
  else
118
144
  action = :updating
119
- id = existing_alerts[alert['name']]['id']
120
- log.debug("updating existing alert #{id} (#{alert['name']})")
121
-
122
- resp = @dog.update_alert(
123
- id,
124
- alert['metric']['datadog_query'].strip,
125
- alert_opts
126
- )
145
+ @stats[:alerts_to_be_updated] += 1
146
+ id = existing_alert['id'][0]
147
+
148
+ new_alert_text = "Query:\n#{datadog_query}\nMessage:\n#{message}"
149
+ existing_alert_text = "Query:\n#{existing_alert['query']}\nMessage:\n#{existing_alert['message']}\n"
150
+ diff = Diffy::Diff.new(existing_alert_text, new_alert_text, :context=>1)
151
+ log.info("updating existing alert #{id} (#{alert['name']}): #{diff}")
152
+
153
+ if @dry_run
154
+ resp = @dog.alert(
155
+ alert['metric']['datadog_query'].strip,
156
+ alert_opts,
157
+ )
158
+ else
159
+ resp = @dog.update_alert(
160
+ id,
161
+ alert['metric']['datadog_query'].strip,
162
+ alert_opts
163
+ )
164
+ # Unmute existing alerts that have been unsilenced.
165
+ # Datadog does not allow updates to silencing via the update_alert API call.
166
+ if existing_alert['silenced'] && !alert_opts[:silenced]
167
+ @dog.unmute_monitor(id)
168
+ end
169
+ end
127
170
  end
128
171
 
129
172
  # log whenever we've encountered errors
130
173
  code = resp[0].to_i
131
- if code != 200
132
- api_errors << "#{code.to_s} on alert #{alert['name']}"
133
- end
134
-
135
- # client error
136
- if code == 400
137
- statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
138
- statsd.gauge('datadog.api.client_error', 1, :tags => ["alert:#{alert}"])
139
- statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
140
-
141
- @stats[:api_client_errors] += 1
142
- log.error("client error while #{action} alert '#{alert['name']}';" \
143
- " query was '#{alert['metric']['datadog_query'].strip}'" \
144
- " response was #{resp[0]}:'#{resp[1].inspect}'")
145
-
146
- # unknown (prob. datadog) error:
147
- elsif code >= 400 || code == -1
148
- statsd.gauge('datadog.api.unknown_error', 1, :tags => ["alert:#{alert}"])
149
- statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
150
- statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
151
-
152
- @stats[:api_unknown_errors] += 1
153
- log.error("unknown error while #{action} alert '#{alert['name']}':" \
154
- " query was '#{alert['metric']['datadog_query'].strip}'" \
155
- " response was #{resp[0]}:'#{resp[1].inspect}'")
174
+ log_datadog_response_code(resp, code, action, alert)
156
175
 
157
176
  # assume this was a success
158
- else
159
- statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
160
- statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
161
- statsd.gauge('datadog.api.success', 1, :tags => ["alert:#{alert}"])
162
-
163
- @stats[:api_successes] += 1
177
+ if !(code >= 400 || code == -1)
178
+ # assume this was a success
164
179
  @stats[:alerts_created] += 1 if action == :creating
165
180
  @stats[:alerts_updated] += 1 if action == :updating
166
181
  @stats[:alerts_silenced] += 1 if alert_opts[:silenced]
167
182
  end
168
183
 
169
- id = resp[1].nil? ? nil : resp[1]['id']
184
+ id = resp[1].nil? ? nil : [resp[1]['id']]
170
185
  # lets key alerts by their name
171
186
  return [alert['name'], id]
172
187
  end
173
188
 
174
189
  def remove_alert(alert)
175
190
  if alert['message'].include?(ALERT_KEY)
176
- log.debug("deleting alert #{alert['id']} (#{alert['name']})")
177
- @dog.delete_alert(alert['id'])
178
- @stats[:alerts_deleted] += 1
191
+ @stats[:alerts_to_be_deleted] += 1
192
+ log.info("deleting alert: #{alert['name']}")
193
+
194
+ if !@dry_run
195
+ alert['id'].each do |alert_id|
196
+ resp = @dog.delete_alert(alert_id)
197
+ code = resp[0].to_i
198
+ log_datadog_response_code(resp, code, :deleting)
199
+
200
+ if !(code >= 300 || code == -1)
201
+ # assume this was a success
202
+ @stats[:alerts_deleted] += 1
203
+ end
204
+ end
205
+ end
179
206
  else
180
207
  log.warn("not deleting manually-created alert #{alert['id']} (#{alert['name']})")
181
208
  end
@@ -186,17 +213,62 @@ module Interferon::Destinations
186
213
  statsd.gauge("datadog.#{k}", v)
187
214
  end
188
215
 
189
- log.info "datadog: created %d updated %d and deleted %d alerts" % [
216
+ log.info "datadog: successfully created (%d/%d), updated (%d/%d), and deleted (%d/%d) alerts" % [
190
217
  @stats[:alerts_created],
218
+ @stats[:alerts_to_be_created],
191
219
  @stats[:alerts_updated],
220
+ @stats[:alerts_to_be_updated],
192
221
  @stats[:alerts_deleted],
222
+ @stats[:alerts_to_be_deleted],
193
223
  ]
194
224
  end
195
225
 
196
226
  def remove_alert_by_id(alert_id)
227
+ # This should only be used by dry-run to clean up created dry-run alerts
197
228
  log.debug("deleting alert, id: #{alert_id}")
198
- @dog.delete_alert(alert_id)
199
- @stats[:alerts_deleted] += 1
229
+ resp = @dog.delete_alert(alert_id)
230
+ code = resp[0].to_i
231
+ log_datadog_response_code(resp, code, :deleting)
232
+ end
233
+
234
+ def log_datadog_response_code(resp, code, action, alert=nil)
235
+ # log whenever we've encountered errors
236
+ if code != 200 && !alert.nil?
237
+ api_errors << "#{code.to_s} on alert #{alert['name']}"
238
+ end
239
+
240
+ # client error
241
+ if code == 400
242
+ @stats[:api_client_errors] += 1
243
+ if !alert.nil?
244
+ statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
245
+ statsd.gauge('datadog.api.client_error', 1, :tags => ["alert:#{alert}"])
246
+ statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
247
+ log.error("client error while #{action} alert '#{alert['name']}';" \
248
+ " query was '#{alert['metric']['datadog_query'].strip}'" \
249
+ " response was #{resp[0]}:'#{resp[1].inspect}'")
250
+ end
251
+
252
+ # unknown (prob. datadog) error:
253
+ elsif code > 400 || code == -1
254
+ @stats[:api_unknown_errors] += 1
255
+ if !alert.nil?
256
+ statsd.gauge('datadog.api.unknown_error', 1, :tags => ["alert:#{alert}"])
257
+ statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
258
+ statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
259
+ log.error("unknown error while #{action} alert '#{alert['name']}':" \
260
+ " query was '#{alert['metric']['datadog_query'].strip}'" \
261
+ " response was #{resp[0]}:'#{resp[1].inspect}'")
262
+ end
263
+ else
264
+ @stats[:api_successes] += 1
265
+ if !alert.nil?
266
+ statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
267
+ statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
268
+ statsd.gauge('datadog.api.success', 1, :tags => ["alert:#{alert}"])
269
+ end
270
+ end
200
271
  end
272
+
201
273
  end
202
274
  end
@@ -1,3 +1,4 @@
1
+ include ::Interferon::Logging
1
2
 
2
3
  module Interferon::GroupSources
3
4
  class Filesystem
@@ -10,6 +11,7 @@ module Interferon::GroupSources
10
11
 
11
12
  def list_groups
12
13
  groups = {}
14
+ aliases = {}
13
15
 
14
16
  @paths.each do |path|
15
17
  path = File.expand_path(path)
@@ -18,7 +20,7 @@ module Interferon::GroupSources
18
20
  next
19
21
  end
20
22
 
21
- Dir.glob(File.join(path, '*.{json,yml,yaml}')) do |group_file|
23
+ Dir.glob(File.join(path, '*.{json,yml,yaml}')).each do |group_file|
22
24
  begin
23
25
  group = YAML::parse(File.read(group_file))
24
26
  rescue YAML::SyntaxError => e
@@ -27,11 +29,25 @@ module Interferon::GroupSources
27
29
  log.warn "error reading group file #{group_file}: #{e}"
28
30
  else
29
31
  group = group.to_ruby
30
- groups[group['name']] = group['people'] || []
32
+ if group['people']
33
+ groups[group['name']] = group['people'] || []
34
+ elsif group['alias_for']
35
+ aliases[group['name']] = {:group => group['alias_for'], :group_file => group_file}
36
+ end
31
37
  end
32
38
  end
33
39
  end
34
40
 
41
+ aliases.each do |aliased_group, group_info|
42
+ group = group_info[:group]
43
+ group_file = group_info[:group_file]
44
+ if groups.include?(group)
45
+ groups[aliased_group] = groups[group]
46
+ else
47
+ log.warn "Alias not found for #{group} but used by #{aliased_group} in #{group_file}"
48
+ end
49
+ end
50
+
35
51
  return groups
36
52
  end
37
53
  end
@@ -1,3 +1,3 @@
1
1
  module Interferon
2
- VERSION = "0.0.12"
2
+ VERSION = "0.0.19"
3
3
  end
@@ -0,0 +1,21 @@
1
+ require 'tzinfo'
2
+
3
+ module Interferon
4
+ class WorkHoursHelper
5
+ DEFAULT_WORK_DAYS = (1..5)
6
+ DEFAULT_WORK_HOURS = (9..16)
7
+ DEFAULT_WORK_TIMEZONE = 'America/Los_Angeles'
8
+ DEFAULT_WORK_ARGS = {
9
+ :hours => DEFAULT_WORK_HOURS,
10
+ :days => DEFAULT_WORK_DAYS,
11
+ :timezone => DEFAULT_WORK_TIMEZONE,
12
+ }.freeze
13
+
14
+ def self.is_work_hour?(time, args = {})
15
+ args = args.merge(DEFAULT_WORK_ARGS)
16
+ tz = TZInfo::Timezone.get args[:timezone]
17
+ time_in_tz = time + tz.period_for_utc(time).utc_offset
18
+ return args[:days].include?(time_in_tz.wday) && args[:hours].include?(time_in_tz.hour)
19
+ end
20
+ end
21
+ end