interferon 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +11 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +52 -0
  5. data/LICENSE +21 -0
  6. data/README.md +96 -0
  7. data/bin/interferon +66 -0
  8. data/config.example.yaml +37 -0
  9. data/groups/data.yaml +11 -0
  10. data/groups/dataeng.yaml +4 -0
  11. data/groups/datainfra.yaml +10 -0
  12. data/groups/devhap.yaml +6 -0
  13. data/groups/discover.yaml +13 -0
  14. data/groups/growth.yaml +17 -0
  15. data/groups/host.yaml +12 -0
  16. data/groups/internalproducts.yml +13 -0
  17. data/groups/logstash.yaml +4 -0
  18. data/groups/mobile.yaml +17 -0
  19. data/groups/pagerduty_sysops.yaml +5 -0
  20. data/groups/panda.yaml +10 -0
  21. data/groups/payments.yaml +16 -0
  22. data/groups/payments_finance.yaml +8 -0
  23. data/groups/prodinfra.yaml +15 -0
  24. data/groups/search.yaml +10 -0
  25. data/groups/security.yaml +8 -0
  26. data/groups/sre.yaml +16 -0
  27. data/groups/teamx.yaml +8 -0
  28. data/groups/tns.yaml +14 -0
  29. data/groups/tools.yml +11 -0
  30. data/interferon.gemspec +26 -0
  31. data/lib/interferon.rb +241 -0
  32. data/lib/interferon/alert.rb +33 -0
  33. data/lib/interferon/alert_dsl.rb +94 -0
  34. data/lib/interferon/destinations/datadog.rb +169 -0
  35. data/lib/interferon/group_sources/filesystem.rb +38 -0
  36. data/lib/interferon/host_sources/aws_dynamo.rb +51 -0
  37. data/lib/interferon/host_sources/aws_elasticache.rb +69 -0
  38. data/lib/interferon/host_sources/aws_rds.rb +92 -0
  39. data/lib/interferon/host_sources/optica.rb +35 -0
  40. data/lib/interferon/host_sources/optica_services.rb +68 -0
  41. data/lib/interferon/loaders.rb +123 -0
  42. data/lib/interferon/logging.rb +26 -0
  43. data/lib/interferon/version.rb +3 -0
  44. data/script/convert.rb +29 -0
  45. data/script/pre-commit +73 -0
  46. data/spec/spec_helper.rb +62 -0
  47. metadata +179 -0
@@ -0,0 +1,169 @@
1
+ require 'dogapi'
2
+ require 'set'
3
+
4
+ module Interferon::Destinations
5
+ class Datadog
6
+ include Interferon::Logging
7
+
8
+ attr_accessor :concurrency
9
+ ALERT_KEY = 'This alert was created via the alerts framework'
10
+
11
+ def initialize(options)
12
+ %w{app_key api_key}.each do |req|
13
+ unless options[req]
14
+ raise ArgumentError, "missing required argument #{req}"
15
+ end
16
+ end
17
+
18
+ @dog = Dogapi::Client.new(options['api_key'], options['app_key'])
19
+ @dry_run = !!options['dry_run']
20
+ @existing_alerts = nil
21
+
22
+ # create datadog alerts 10 at a time
23
+ @concurrency = 10
24
+
25
+ @stats = {
26
+ :alerts_created => 0,
27
+ :alerts_updated => 0,
28
+ :alerts_deleted => 0,
29
+ :alerts_silenced => 0,
30
+ :api_successes => 0,
31
+ :api_client_errors => 0,
32
+ :api_unknown_errors => 0,
33
+ :manually_created_alerts => 0,
34
+ }
35
+ end
36
+
37
+ def existing_alerts
38
+ unless @existing_alerts
39
+ resp = @dog.get_all_alerts()
40
+ alerts = resp[1]['alerts']
41
+
42
+ # key alerts by name
43
+ @existing_alerts = Hash[alerts.map{ |a| [a['name'], a] }]
44
+
45
+ # count how many are manually created
46
+ @stats[:manually_created_alerts] = \
47
+ @existing_alerts.reject{|n,a| a['message'].include?(ALERT_KEY)}.length
48
+
49
+ log.info "datadog: found %d existing alerts; %d were manually created" % [
50
+ @existing_alerts.length,
51
+ @stats[:manually_created_alerts],
52
+ ]
53
+ end
54
+
55
+ return @existing_alerts
56
+ end
57
+
58
+ def create_alert(alert, people)
59
+ # create a message which includes the notifications
60
+ message = [
61
+ alert['message'],
62
+ ALERT_KEY,
63
+ people.map{ |p| "@#{p}" }
64
+ ].flatten.join("\n")
65
+
66
+ # create the hash of options to send to datadog
67
+ alert_opts = {
68
+ :name => alert['name'],
69
+ :message => message,
70
+ :silenced => alert['silenced'] || alert['silenced_until'] > Time.now,
71
+ :notify_no_data => alert['notify_no_data'],
72
+ :timeout_h => nil,
73
+ }
74
+
75
+ # allow an optional timeframe for "no data" alerts to be specified
76
+ # (this feature is supported, even though it's not documented)
77
+ alert_opts[:no_data_timeframe] = alert['no_data_timeframe'] if alert['no_data_timeframe']
78
+
79
+ # timeout is in seconds, but set it to 1 hour at least
80
+ alert_opts[:timeout_h] = [1, (alert['timeout'].to_i / 3600)].max if alert['timeout']
81
+
82
+ # new alert, create it
83
+ if existing_alerts[alert['name']].nil?
84
+ action = :creating
85
+ log.debug("new alert #{alert['name']}")
86
+
87
+ resp = @dog.alert(
88
+ alert['metric']['datadog_query'].strip,
89
+ alert_opts,
90
+ ) unless @dry_run
91
+
92
+ # existing alert, modify it
93
+ else
94
+ action = :updating
95
+ id = existing_alerts[alert['name']]['id']
96
+ log.debug("updating existing alert #{id} (#{alert['name']})")
97
+
98
+ resp = @dog.update_alert(
99
+ id,
100
+ alert['metric']['datadog_query'].strip,
101
+ alert_opts
102
+ ) unless @dry_run
103
+ end
104
+
105
+ # log whenever we've encountered errors
106
+ if resp
107
+ code = resp[0].to_i
108
+
109
+ # client error
110
+ if code == 400
111
+ statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
112
+ statsd.gauge('datadog.api.client_error', 1, :tags => ["alert:#{alert}"])
113
+ statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
114
+
115
+ @stats[:api_client_errors] += 1
116
+ log.error("client error while #{action} alert '#{alert['name']}';" \
117
+ " query was '#{alert['metric']['datadog_query'].strip}'")
118
+
119
+ # unknown (prob. datadog) error:
120
+ elsif code >= 400 || code == -1
121
+ statsd.gauge('datadog.api.unknown_error', 1, :tags => ["alert:#{alert}"])
122
+ statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
123
+ statsd.gauge('datadog.api.success', 0, :tags => ["alert:#{alert}"])
124
+
125
+ @stats[:api_unknown_errors] += 1
126
+ log.error("unknown error while #{action} alert '#{alert['name']}':" \
127
+ " query was '#{alert['metric']['datadog_query'].strip}'" \
128
+ " response was #{resp[0]}:'#{resp[1].inspect}'")
129
+
130
+ # assume this was a success
131
+ else
132
+ statsd.gauge('datadog.api.unknown_error', 0, :tags => ["alert:#{alert}"])
133
+ statsd.gauge('datadog.api.client_error', 0, :tags => ["alert:#{alert}"])
134
+ statsd.gauge('datadog.api.success', 1, :tags => ["alert:#{alert}"])
135
+
136
+ @stats[:api_successes] += 1
137
+ @stats[:alerts_created] += 1 if action == :creating
138
+ @stats[:alerts_updated] += 1 if action == :updating
139
+ @stats[:alerts_silenced] += 1 if alert_opts[:silenced]
140
+ end
141
+ end
142
+
143
+ # lets key alerts by their name
144
+ return alert['name']
145
+ end
146
+
147
+ def remove_alert(alert)
148
+ if alert['message'].include?(ALERT_KEY)
149
+ log.debug("deleting alert #{alert['id']} (#{alert['name']})")
150
+ @dog.delete_alert(alert['id']) unless @dry_run
151
+ @stats[:alerts_deleted] += 1
152
+ else
153
+ log.warn("not deleting manually-created alert #{alert['id']} (#{alert['name']})")
154
+ end
155
+ end
156
+
157
+ def report_stats
158
+ @stats.each do |k,v|
159
+ statsd.gauge("datadog.#{k}", v)
160
+ end
161
+
162
+ log.info "datadog: created %d updated %d and deleted %d alerts" % [
163
+ @stats[:alerts_created],
164
+ @stats[:alerts_updated],
165
+ @stats[:alerts_deleted],
166
+ ]
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,38 @@
1
+
2
+ module Interferon::GroupSources
3
+ class Filesystem
4
+ def initialize(options)
5
+ raise ArgumentError, "missing paths for loading groups from filesystem" \
6
+ unless options['paths']
7
+
8
+ @paths = options['paths']
9
+ end
10
+
11
+ def list_groups
12
+ groups = {}
13
+
14
+ @paths.each do |path|
15
+ path = File.expand_path(path)
16
+ unless Dir.exists?(path)
17
+ log.warn "no such directory #{path} for reading group files"
18
+ next
19
+ end
20
+
21
+ Dir.glob(File.join(path, '*.{json,yaml}')) do |group_file|
22
+ begin
23
+ group = YAML::parse(File.read(group_file))
24
+ rescue YAML::SyntaxError => e
25
+ log.error "syntax error in group file #{group_file}: #{e}"
26
+ rescue StandardError => e
27
+ log.warn "error reading group file #{group_file}: #{e}"
28
+ else
29
+ group = group.to_ruby
30
+ groups[group['name']] = group['people'] || []
31
+ end
32
+ end
33
+ end
34
+
35
+ return groups
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,51 @@
1
+ require 'aws'
2
+
3
+ module Interferon::HostSources
4
+ class AwsDynamo
5
+ def initialize(options)
6
+ missing = %w{access_key_id secret_access_key}.reject{|r| options.key?(r)}
7
+ raise ArgumentError, "missing these required arguments for source AwsDynamo: #{missing.inspect}"\
8
+ unless missing.empty?
9
+
10
+ @access_key_id = options['access_key_id']
11
+ @secret_access_key = options['secret_access_key']
12
+
13
+ # initialize a list of regions to check
14
+ if options['regions'] && !options['regions'].empty?
15
+ @regions = options['regions']
16
+ else
17
+ @regions = AWS::regions.map(&:name)
18
+ end
19
+ end
20
+
21
+ def list_hosts
22
+ hosts = []
23
+
24
+ @regions.each do |region|
25
+ client = AWS::DynamoDB.new(
26
+ :access_key_id => @access_key_id,
27
+ :secret_access_key => @secret_access_key,
28
+ :region => region)
29
+
30
+ AWS.memoize do
31
+ client.tables.each do |table|
32
+ hosts << {
33
+ :source => 'aws_dynamo',
34
+ :region => region,
35
+ :table_name => table.name,
36
+
37
+ :read_capacity => table.read_capacity_units,
38
+ :write_capacity => table.write_capacity_units,
39
+
40
+ # dynamodb does not support tagging
41
+ :owners => [],
42
+ :owner_groups => [],
43
+ }
44
+ end
45
+ end
46
+ end
47
+
48
+ return hosts
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,69 @@
1
+ require 'aws'
2
+
3
+ module Interferon::HostSources
4
+ class AwsElasticache
5
+ def initialize(options)
6
+ missing = %w{access_key_id secret_access_key}.reject{|r| options.key?(r)}
7
+ raise ArgumentError, "missing these required arguments for source AwsElasticache: #{missing.inspect}"\
8
+ unless missing.empty?
9
+
10
+ @access_key_id = options['access_key_id']
11
+ @secret_access_key = options['secret_access_key']
12
+
13
+ # initialize a list of regions to check
14
+ if options['regions'] && !options['regions'].empty?
15
+ @regions = options['regions']
16
+ else
17
+ @regions = AWS::regions.map(&:name)
18
+ end
19
+ end
20
+
21
+ def list_hosts
22
+ hosts = []
23
+
24
+ @regions.each do |region|
25
+ clusters = []
26
+ client = AWS::ElastiCache.new(
27
+ :access_key_id => @access_key_id,
28
+ :secret_access_key => @secret_access_key,
29
+ :region => region).client
30
+
31
+ AWS.memoize do
32
+ # read the list of cache clusters; we have to do our own pagination
33
+ clusters = []
34
+ options = {:show_cache_node_info => true}
35
+ loop do
36
+ r = client.describe_cache_clusters(options)
37
+ clusters += r.data[:cache_clusters]
38
+
39
+ break unless r.data[:marker]
40
+ options[:marker] = r.data[:marker]
41
+ end
42
+
43
+ # iterate over the nodes in each cluster and add each one to hosts
44
+ clusters.each do |cluster|
45
+ cluster[:cache_nodes].each do |node|
46
+ hosts << {
47
+ :source => 'aws_elasticache',
48
+ :region => region,
49
+
50
+ :cluster_id => cluster[:cache_cluster_id],
51
+ :cluster_status => cluster[:cache_cluster_status],
52
+ :node_type => cluster[:cache_node_type],
53
+ :peer_nodes => cluster[:num_cache_nodes],
54
+
55
+ :node_status => node[:cache_node_status],
56
+
57
+ # elasticache does not support tagging
58
+ :owners => [],
59
+ :owner_groups => [],
60
+ }
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ return hosts
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,92 @@
1
+ require 'aws'
2
+
3
+ module Interferon::HostSources
4
+ class AwsRds
5
+ def initialize(options)
6
+ missing = %w{access_key_id secret_access_key}.reject{|r| options.key?(r)}
7
+ raise ArgumentError, "missing these required arguments for source AwsRds: #{missing.inspect}"\
8
+ unless missing.empty?
9
+
10
+ @access_key_id = options['access_key_id']
11
+ @secret_access_key = options['secret_access_key']
12
+
13
+ # initialize a list of regions to check
14
+ if options['regions'] && !options['regions'].empty?
15
+ @regions = options['regions']
16
+ else
17
+ @regions = AWS::regions.map(&:name)
18
+ end
19
+ end
20
+
21
+ def list_hosts
22
+ hosts = []
23
+
24
+ @regions.each do |region|
25
+ rds = AWS::RDS.new(
26
+ :access_key_id => @access_key_id,
27
+ :secret_access_key => @secret_access_key,
28
+ :region => region)
29
+
30
+ AWS.memoize do
31
+ rds.instances.each do |instance|
32
+ # get the tags for the instance
33
+ arn = arn(region, instance.id)
34
+ tag_list = rds.client.list_tags_for_resource(:resource_name => arn)[:tag_list]
35
+ tags = Hash[ tag_list.map { |h| [h[:key], h[:value]] } ]
36
+
37
+ tags['owners'] ||= ''
38
+ tags['owner_groups'] ||= ''
39
+
40
+ # build the host data for this instance
41
+ hosts << {
42
+ :source => 'aws_rds',
43
+ :region => region,
44
+ :instance_id => instance.id,
45
+ :db_name => instance.db_name,
46
+ :engine => instance.engine,
47
+ :engine_version => instance.engine_version,
48
+
49
+ # metrics
50
+ :allocated_storage => instance.allocated_storage,
51
+ :iops => instance.iops,
52
+
53
+ # replication info
54
+ :is_replica => !instance.read_replica_source_db_instance_identifier.nil?,
55
+ :replica_source_name => instance.read_replica_source_db_instance_identifier,
56
+ :replica_names => instance.read_replica_db_instance_identifiers.join(','),
57
+ :replicas => instance.read_replica_db_instance_identifiers.count,
58
+
59
+ :owners => tags['owners'].split(','),
60
+ :owner_groups => tags['owner_groups'].split(','),
61
+
62
+ :db_env => tags['db_env'],
63
+ :db_role => tags['db_role'],
64
+ }
65
+ end
66
+ end
67
+ end
68
+
69
+ return hosts
70
+ end
71
+
72
+ private
73
+ def arn(region, instance_id)
74
+ return "arn:aws:rds:#{region}:#{account_number}:db:#{instance_id}"
75
+ end
76
+
77
+ # unfortunately, this appears to be the only way to get your account number
78
+ def account_number
79
+ return @account_number if @account_number
80
+
81
+ begin
82
+ my_arn = AWS::IAM.new(
83
+ :access_key_id => @access_key_id,
84
+ :secret_access_key => @secret_access_key).client.get_user()[:user][:arn]
85
+ rescue AWS::IAM::Errors::AccessDenied => e
86
+ my_arn = e.message.split[1]
87
+ end
88
+
89
+ @account_number = my_arn.split(':')[4]
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,35 @@
1
+ require 'net/http'
2
+ require 'json'
3
+
4
+ module Interferon::HostSources
5
+ class Optica
6
+ include Logging
7
+
8
+ def initialize(options)
9
+ raise ArgumentError, "missing host for optica source" \
10
+ unless options['host']
11
+
12
+ @host = options['host']
13
+ @port = options['port'] || 80
14
+ end
15
+
16
+ def list_hosts
17
+ con = Net::HTTP.new(@host, @port)
18
+ con.read_timeout = 60
19
+ con.open_timeout = 60
20
+
21
+ response = con.get('/')
22
+ data = JSON::parse(response.body)
23
+
24
+ return data['nodes'].map{|ip, host| {
25
+ :source => 'optica',
26
+ :hostname => host['hostname'],
27
+ :role => host['role'],
28
+ :environment => host['environment'],
29
+
30
+ :owners => host['ownership'] && host['ownership']['people'] || [],
31
+ :owner_groups => host['ownership'] && host['ownership']['groups'] || [],
32
+ }}
33
+ end
34
+ end
35
+ end