kennel 1.74.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Readme.md +289 -0
- data/lib/kennel.rb +90 -0
- data/lib/kennel/api.rb +83 -0
- data/lib/kennel/file_cache.rb +53 -0
- data/lib/kennel/github_reporter.rb +49 -0
- data/lib/kennel/importer.rb +135 -0
- data/lib/kennel/models/base.rb +29 -0
- data/lib/kennel/models/dashboard.rb +209 -0
- data/lib/kennel/models/monitor.rb +219 -0
- data/lib/kennel/models/project.rb +31 -0
- data/lib/kennel/models/record.rb +94 -0
- data/lib/kennel/models/slo.rb +92 -0
- data/lib/kennel/models/team.rb +12 -0
- data/lib/kennel/optional_validations.rb +21 -0
- data/lib/kennel/progress.rb +34 -0
- data/lib/kennel/settings_as_methods.rb +86 -0
- data/lib/kennel/subclass_tracking.rb +19 -0
- data/lib/kennel/syncer.rb +260 -0
- data/lib/kennel/tasks.rb +147 -0
- data/lib/kennel/template_variables.rb +38 -0
- data/lib/kennel/unmuted_alerts.rb +89 -0
- data/lib/kennel/utils.rb +159 -0
- data/lib/kennel/version.rb +4 -0
- data/template/Readme.md +247 -0
- metadata +109 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Kennel
|
3
|
+
module TemplateVariables
|
4
|
+
def self.included(base)
|
5
|
+
base.settings :template_variables
|
6
|
+
base.defaults(template_variables: -> { [] })
|
7
|
+
end
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
def render_template_variables
|
12
|
+
(template_variables || []).map do |v|
|
13
|
+
v.is_a?(String) ? { default: "*", prefix: v, name: v } : v
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# check for queries that do not use the variables and would be misleading
|
18
|
+
# TODO: do the same check for apm_query and their group_by
|
19
|
+
def validate_template_variables(data, key)
|
20
|
+
variables = (data[:template_variables] || []).map { |v| "$#{v.fetch(:name)}" }
|
21
|
+
queries = data[key].flat_map do |widget|
|
22
|
+
([widget] + (widget.dig(:definition, :widgets) || [])).flat_map { |w| widget_queries(w) }
|
23
|
+
end.compact
|
24
|
+
bad = queries.grep_v(/(#{variables.map { |v| Regexp.escape(v) }.join("|")})\b/)
|
25
|
+
if bad.any?
|
26
|
+
invalid!(
|
27
|
+
"queries #{bad.join(", ")} must use the template variables #{variables.join(", ")}\n" \
|
28
|
+
"If that is not possible, add `validate: -> { false } # query foo in bar does not have baz tag`"
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def widget_queries(widget)
|
34
|
+
requests = widget.dig(:definition, :requests) || []
|
35
|
+
(requests.is_a?(Hash) ? requests.values : requests).map { |r| r[:q] } # hostmap widgets have hash requests
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "kennel"
|
3
|
+
|
4
|
+
# Show Alerts that are not muted and their alerting scopes
|
5
|
+
module Kennel
|
6
|
+
class UnmutedAlerts
|
7
|
+
COLORS = {
|
8
|
+
"Alert" => :red,
|
9
|
+
"Warn" => :yellow,
|
10
|
+
"No Data" => :cyan
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def print(api, tag)
|
15
|
+
monitors = filtered_monitors(api, tag)
|
16
|
+
if monitors.empty?
|
17
|
+
Kennel.out.puts "No unmuted alerts found"
|
18
|
+
else
|
19
|
+
monitors.each do |m|
|
20
|
+
Kennel.out.puts m[:name]
|
21
|
+
Kennel.out.puts Utils.path_to_url("/monitors/#{m[:id]}")
|
22
|
+
m[:state][:groups].each do |g|
|
23
|
+
color = COLORS[g[:status]] || :default
|
24
|
+
since = "\t#{time_since(g[:last_triggered_ts])}"
|
25
|
+
Kennel.out.puts "#{Kennel::Utils.color(color, g[:status])}\t#{g[:name]}#{since}"
|
26
|
+
end
|
27
|
+
Kennel.out.puts
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
# sort pod3 before pod11
|
35
|
+
def sort_groups!(monitor)
|
36
|
+
groups = monitor[:state][:groups].values
|
37
|
+
groups.sort_by! { |g| g[:name].to_s.split(",").map { |w| Utils.natural_order(w) } }
|
38
|
+
monitor[:state][:groups] = groups
|
39
|
+
end
|
40
|
+
|
41
|
+
def time_since(t)
|
42
|
+
diff = Time.now.to_i - Integer(t)
|
43
|
+
"%02d:%02d:%02d" % [diff / 3600, diff / 60 % 60, diff % 60]
|
44
|
+
end
|
45
|
+
|
46
|
+
def filtered_monitors(api, tag)
|
47
|
+
# Download all monitors with given tag
|
48
|
+
monitors = Progress.progress("Downloading") do
|
49
|
+
api.list("monitor", monitor_tags: tag, group_states: "all", with_downtimes: "true")
|
50
|
+
end
|
51
|
+
|
52
|
+
raise "No monitors for #{tag} found, check your spelling" if monitors.empty?
|
53
|
+
|
54
|
+
# only keep monitors that are alerting
|
55
|
+
monitors.reject! { |m| m[:overall_state] == "OK" }
|
56
|
+
|
57
|
+
# only keep monitors that are not completely silenced
|
58
|
+
monitors.reject! { |m| m[:options][:silenced].key?(:*) }
|
59
|
+
|
60
|
+
# only keep groups that are alerting
|
61
|
+
monitors.each { |m| m[:state][:groups].reject! { |_, g| g[:status] == "OK" || g[:status] == "Ignored" } }
|
62
|
+
|
63
|
+
# only keep alerting groups that are not silenced
|
64
|
+
monitors.each do |m|
|
65
|
+
silenced = m[:options][:silenced].keys.map { |k| k.to_s.split(",") }
|
66
|
+
m[:state][:groups].select! do |k, _|
|
67
|
+
scope = k.to_s.split(",")
|
68
|
+
silenced.none? { |s| (s - scope).empty? }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# only keep monitors that are not covered by a downtime
|
73
|
+
monitors.each do |m|
|
74
|
+
next unless m[:matching_downtimes]
|
75
|
+
downtime_groups = m[:matching_downtimes].select { |d| d[:active] }.flat_map { |d| d[:groups] }
|
76
|
+
m[:state][:groups].reject! do |k, _|
|
77
|
+
downtime_groups.include?(k.to_s)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# only keep monitors with alerting groups
|
82
|
+
monitors.select! { |m| m[:state][:groups].any? }
|
83
|
+
|
84
|
+
# sort group alerts
|
85
|
+
monitors.each { |m| sort_groups!(m) }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/kennel/utils.rb
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Kennel
|
3
|
+
module Utils
|
4
|
+
COLORS = { red: 31, green: 32, yellow: 33, cyan: 36, magenta: 35, default: 0 }.freeze
|
5
|
+
|
6
|
+
class TeeIO < IO
|
7
|
+
def initialize(ios)
|
8
|
+
@ios = ios
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(string)
|
12
|
+
@ios.each { |io| io.write string }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class << self
|
17
|
+
def snake_case(string)
|
18
|
+
string
|
19
|
+
.gsub(/::/, "_") # Foo::Bar -> foo_bar
|
20
|
+
.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') # FOOBar -> foo_bar
|
21
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2') # fooBar -> foo_bar
|
22
|
+
.tr("-", "_") # foo-bar -> foo_bar
|
23
|
+
.downcase
|
24
|
+
end
|
25
|
+
|
26
|
+
# simplified version of https://apidock.com/rails/ActiveSupport/Inflector/parameterize
|
27
|
+
def parameterize(string)
|
28
|
+
string
|
29
|
+
.downcase
|
30
|
+
.gsub(/[^a-z0-9\-_]+/, "-") # remove unsupported
|
31
|
+
.gsub(/-{2,}/, "-") # remove duplicates
|
32
|
+
.gsub(/^-|-$/, "") # remove leading/trailing
|
33
|
+
end
|
34
|
+
|
35
|
+
def presence(value)
|
36
|
+
value.nil? || value.empty? ? nil : value
|
37
|
+
end
|
38
|
+
|
39
|
+
def ask(question)
|
40
|
+
Kennel.err.printf color(:red, "#{question} - press 'y' to continue: ")
|
41
|
+
begin
|
42
|
+
STDIN.gets.chomp == "y"
|
43
|
+
rescue Interrupt # do not show a backtrace if user decides to Ctrl+C here
|
44
|
+
Kennel.err.print "\n"
|
45
|
+
exit 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def color(color, text)
|
50
|
+
"\e[#{COLORS.fetch(color)}m#{text}\e[0m"
|
51
|
+
end
|
52
|
+
|
53
|
+
def strip_shell_control(text)
|
54
|
+
text.gsub(/\e\[\d+m(.*?)\e\[0m/, "\\1").gsub(/.#{Regexp.escape("\b")}/, "")
|
55
|
+
end
|
56
|
+
|
57
|
+
def capture_stdout
|
58
|
+
old = Kennel.out
|
59
|
+
Kennel.out = StringIO.new
|
60
|
+
yield
|
61
|
+
Kennel.out.string
|
62
|
+
ensure
|
63
|
+
Kennel.out = old
|
64
|
+
end
|
65
|
+
|
66
|
+
def capture_stderr
|
67
|
+
old = Kennel.err
|
68
|
+
Kennel.err = StringIO.new
|
69
|
+
yield
|
70
|
+
Kennel.err.string
|
71
|
+
ensure
|
72
|
+
Kennel.err = old
|
73
|
+
end
|
74
|
+
|
75
|
+
def tee_output
|
76
|
+
old_stdout = Kennel.out
|
77
|
+
old_stderr = Kennel.err
|
78
|
+
capture = StringIO.new
|
79
|
+
Kennel.out = TeeIO.new([capture, Kennel.out])
|
80
|
+
Kennel.err = TeeIO.new([capture, Kennel.err])
|
81
|
+
yield
|
82
|
+
capture.string
|
83
|
+
ensure
|
84
|
+
Kennel.out = old_stdout
|
85
|
+
Kennel.err = old_stderr
|
86
|
+
end
|
87
|
+
|
88
|
+
def capture_sh(command)
|
89
|
+
result = `#{command} 2>&1`
|
90
|
+
raise "Command failed:\n#{command}\n#{result}" unless $CHILD_STATUS.success?
|
91
|
+
result
|
92
|
+
end
|
93
|
+
|
94
|
+
def path_to_url(path)
|
95
|
+
if subdomain = ENV["DATADOG_SUBDOMAIN"]
|
96
|
+
"https://#{subdomain}.datadoghq.com#{path}"
|
97
|
+
else
|
98
|
+
path
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def parallel(items, max: 10)
|
103
|
+
threads = [items.size, max].min
|
104
|
+
work = items.each_with_index.to_a
|
105
|
+
done = Array.new(items.size)
|
106
|
+
workers = Array.new(threads).map do
|
107
|
+
Thread.new do
|
108
|
+
loop do
|
109
|
+
item, i = work.pop
|
110
|
+
break unless i
|
111
|
+
done[i] =
|
112
|
+
begin
|
113
|
+
yield item
|
114
|
+
rescue StandardError => e
|
115
|
+
work.clear
|
116
|
+
e
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
workers.each(&:join)
|
122
|
+
done.each { |d| raise d if d.is_a?(StandardError) }
|
123
|
+
end
|
124
|
+
|
125
|
+
def natural_order(name)
|
126
|
+
name.split(/(\d+)/).each_with_index.map { |x, i| i.odd? ? x.to_i : x }
|
127
|
+
end
|
128
|
+
|
129
|
+
def retry(*errors, times:)
|
130
|
+
yield
|
131
|
+
rescue *errors => e
|
132
|
+
times -= 1
|
133
|
+
raise if times < 0
|
134
|
+
Kennel.err.puts "Error #{e}, #{times} retries left"
|
135
|
+
retry
|
136
|
+
end
|
137
|
+
|
138
|
+
# https://stackoverflow.com/questions/20235206/ruby-get-all-keys-in-a-hash-including-sub-keys/53876255#53876255
|
139
|
+
def all_keys(items)
|
140
|
+
case items
|
141
|
+
when Hash then items.keys + items.values.flat_map { |v| all_keys(v) }
|
142
|
+
when Array then items.flat_map { |i| all_keys(i) }
|
143
|
+
else []
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# TODO: use awesome-print or similar, but it has too many monkey-patches
|
148
|
+
# https://github.com/amazing-print/amazing_print/issues/36
|
149
|
+
def pretty_inspect(object)
|
150
|
+
string = object.inspect
|
151
|
+
string.gsub!(/:([a-z_]+)=>/, "\\1: ")
|
152
|
+
10.times do
|
153
|
+
string.gsub!(/{(\S.*?\S)}/, "{ \\1 }") || break
|
154
|
+
end
|
155
|
+
string
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
data/template/Readme.md
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
![](github/cage.jpg?raw=true)
|
2
|
+
|
3
|
+
Manage Datadog Monitors / Dashboards / Slos as code
|
4
|
+
|
5
|
+
- DRY, searchable, audited, documented
|
6
|
+
- Changes are PR reviewed and applied on merge
|
7
|
+
- Updating shows diff before applying
|
8
|
+
- Automated import of existing resources
|
9
|
+
- Resources are grouped into projects that belong to teams and inherit tags
|
10
|
+
- No copy-pasting of ids to create new resources
|
11
|
+
- Automated cleanup when removing code
|
12
|
+
- [Helpers](#helpers) for automating common tasks
|
13
|
+
|
14
|
+
### Applying changes
|
15
|
+
|
16
|
+
![](github/screen.png?raw=true)
|
17
|
+
|
18
|
+
### Example code
|
19
|
+
|
20
|
+
```Ruby
|
21
|
+
# teams/foo.rb
|
22
|
+
module Teams
|
23
|
+
class Foo < Kennel::Models::Team
|
24
|
+
defaults(mention: -> { "@slack-my-team" })
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# projects/bar.rb
|
29
|
+
class Bar < Kennel::Models::Project
|
30
|
+
defaults(
|
31
|
+
team: -> { Teams::Foo.new }, # use mention and tags from the team
|
32
|
+
parts: -> {
|
33
|
+
[
|
34
|
+
Kennel::Models::Monitor.new(
|
35
|
+
self, # the current project
|
36
|
+
type: -> { "query alert" },
|
37
|
+
kennel_id: -> { "load-too-high" }, # pick a unique name
|
38
|
+
name: -> { "Foobar Load too high" }, # nice descriptive name that will show up in alerts and emails
|
39
|
+
message: -> {
|
40
|
+
<<~TEXT
|
41
|
+
This is bad!
|
42
|
+
#{super()} # inserts mention from team
|
43
|
+
TEXT
|
44
|
+
},
|
45
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:api} by {pod} > #{critical}" },
|
46
|
+
critical: -> { 20 }
|
47
|
+
)
|
48
|
+
]
|
49
|
+
}
|
50
|
+
)
|
51
|
+
end
|
52
|
+
```
|
53
|
+
|
54
|
+
|
55
|
+
## Structure
|
56
|
+
|
57
|
+
- `projects/` monitors/dashboards/etc scoped by project
|
58
|
+
- `teams/` team definitions
|
59
|
+
- `parts/` monitors/dashboards/etc that are used by multiple projects
|
60
|
+
- `generated/` projects as json, to show current state and proposed changes in PRs
|
61
|
+
|
62
|
+
## Workflows
|
63
|
+
|
64
|
+
### Setup
|
65
|
+
- clone the repo
|
66
|
+
- `gem install bundler && bundle install`
|
67
|
+
- `cp .env.example .env`
|
68
|
+
- open [Datadog API Settings](https://app.datadoghq.com/account/settings#api)
|
69
|
+
- copy any `API Key` and add it to `.env` as `DATADOG_API_KEY`
|
70
|
+
- find or create (check last page) your personal "Application Key" and add it to `.env` as `DATADOG_APP_KEY=`
|
71
|
+
- change the `DATADOG_SUBDOMAIN=app` in `.env` to your companies subdomain if you have one
|
72
|
+
- verify it works by running `rake plan`, it might show some diff, but should not crash
|
73
|
+
|
74
|
+
### Adding a team
|
75
|
+
|
76
|
+
- `mention` is used for all team monitors via `super()`
|
77
|
+
- `renotify_interval` is used for all team monitors (defaults to `0` / off)
|
78
|
+
- `tags` is used for all team monitors/dashboards (defaults to `team:<team-name>`)
|
79
|
+
|
80
|
+
```Ruby
|
81
|
+
# teams/my_team.rb
|
82
|
+
module Teams
|
83
|
+
class MyTeam < Kennel::Models::Team
|
84
|
+
defaults(
|
85
|
+
mention: -> { "@slack-my-team" }
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
```
|
90
|
+
|
91
|
+
### Adding a new monitor
|
92
|
+
- use [datadog monitor UI](https://app.datadoghq.com/monitors#create) to create a monitor
|
93
|
+
- see below
|
94
|
+
|
95
|
+
### Updating an existing monitor
|
96
|
+
- use [datadog monitor UI](https://app.datadoghq.com/monitors/manage) to find a monitor
|
97
|
+
- get the `id` from the url
|
98
|
+
- run `URL='https://app.datadoghq.com/monitors/123' bundle exec rake kennel:import` and copy the output
|
99
|
+
- find or create a project in `projects/`
|
100
|
+
- add the monitor to `parts: [` list, for example:
|
101
|
+
```Ruby
|
102
|
+
# projects/my_project.rb
|
103
|
+
class MyProject < Kennel::Models::Project
|
104
|
+
defaults(
|
105
|
+
team: -> { Teams::MyTeam.new }, # use existing team or create new one in teams/
|
106
|
+
parts: -> {
|
107
|
+
[
|
108
|
+
Kennel::Models::Monitor.new(
|
109
|
+
self,
|
110
|
+
id: -> { 123456 }, # id from datadog url, not necessary when creating a new monitor
|
111
|
+
type: -> { "query alert" },
|
112
|
+
kennel_id: -> { "load-too-high" }, # make up a unique name
|
113
|
+
name: -> { "Foobar Load too high" }, # nice descriptive name that will show up in alerts and emails
|
114
|
+
message: -> {
|
115
|
+
# Explain what behavior to expect and how to fix the cause
|
116
|
+
# Use #{super()} to add team notifications.
|
117
|
+
<<~TEXT
|
118
|
+
Foobar will be slow and that could cause Barfoo to go down.
|
119
|
+
Add capacity or debug why it is suddenly slow.
|
120
|
+
#{super()}
|
121
|
+
TEXT
|
122
|
+
},
|
123
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:api} by {pod} > #{critical}" }, # replace actual value with #{critical} to keep them in sync
|
124
|
+
critical: -> { 20 }
|
125
|
+
)
|
126
|
+
]
|
127
|
+
}
|
128
|
+
)
|
129
|
+
end
|
130
|
+
```
|
131
|
+
- run `PROJECT=my_project bundle exec rake plan`, an Update to the existing monitor should be shown (not Create / Delete)
|
132
|
+
- alternatively: `bundle exec rake generate` to only locally update the generated `json` files
|
133
|
+
- review changes then `git commit`
|
134
|
+
- make a PR ... get reviewed ... merge
|
135
|
+
- datadog is updated by travis
|
136
|
+
|
137
|
+
### Adding a new dashboard
|
138
|
+
- go to [datadog dashboard UI](https://app.datadoghq.com/dashboard/lists) and click on _New Dashboard_ to create a dashboard
|
139
|
+
- see below
|
140
|
+
|
141
|
+
### Updating an existing dashboard
|
142
|
+
- go to [datadog dashboard UI](https://app.datadoghq.com/dashboard/lists) and click on _New Dashboard_ to find a dashboard
|
143
|
+
- get the `id` from the url
|
144
|
+
- run `URL='https://app.datadoghq.com/dashboard/bet-foo-bar' bundle exec rake kennel:import` and copy the output
|
145
|
+
- find or create a project in `projects/`
|
146
|
+
- add a dashboard to `parts: [` list, for example:
|
147
|
+
```Ruby
|
148
|
+
class MyProject < Kennel::Models::Project
|
149
|
+
defaults(
|
150
|
+
team: -> { Teams::MyTeam.new }, # use existing team or create new one in teams/
|
151
|
+
parts: -> {
|
152
|
+
[
|
153
|
+
Kennel::Models::Dashboard.new(
|
154
|
+
self,
|
155
|
+
id: -> { "abc-def-ghi" }, # id from datadog url, not needed when creating a new dashboard
|
156
|
+
title: -> { "My Dashboard" },
|
157
|
+
description: -> { "Overview of foobar" },
|
158
|
+
template_variables: -> { ["environment"] }, # see https://docs.datadoghq.com/api/?lang=ruby#timeboards
|
159
|
+
kennel_id: -> { "overview-dashboard" }, # make up a unique name
|
160
|
+
layout_type: -> { "ordered" },
|
161
|
+
definitions: -> {
|
162
|
+
[ # An array or arrays, each one is a graph in the dashboard, alternatively a hash for finer control
|
163
|
+
[
|
164
|
+
# title, viz, type, query, edit an existing graph and see the json definition
|
165
|
+
"Graph name", "timeseries", "area", "sum:mystats.foobar{$environment}"
|
166
|
+
],
|
167
|
+
[
|
168
|
+
# queries can be an Array as well, this will generate multiple requests
|
169
|
+
# for a single graph
|
170
|
+
"Graph name", "timeseries", "area", ["sum:mystats.foobar{$environment}", "sum:mystats.success{$environment}"],
|
171
|
+
# add events too ...
|
172
|
+
events: [{q: "tags:foobar,deploy", tags_execution: "and"}]
|
173
|
+
]
|
174
|
+
]
|
175
|
+
}
|
176
|
+
)
|
177
|
+
]
|
178
|
+
}
|
179
|
+
)
|
180
|
+
end
|
181
|
+
```
|
182
|
+
|
183
|
+
### Skipping validations
|
184
|
+
|
185
|
+
Some validations might be too strict for your usecase or just wrong, please [open an issue](https://github.com/grosser/kennel/issues) and
|
186
|
+
to unblock use the `validate: -> { false }` option.
|
187
|
+
|
188
|
+
### Linking with kennel_ids
|
189
|
+
|
190
|
+
To link to existing monitors via their kennel_id
|
191
|
+
|
192
|
+
- Screens `uptime` widgets can use `monitor: {id: "foo:bar"}`
|
193
|
+
- Screens `alert_graph` widgets can use `alert_id: "foo:bar"`
|
194
|
+
- Monitors `composite` can use `query: -> { "%{foo:bar} || %{foo:baz}" }`
|
195
|
+
|
196
|
+
### Debugging changes locally
|
197
|
+
|
198
|
+
- rebase on updated `master` to not undo other changes
|
199
|
+
- figure out project name by converting the class name to snake-case
|
200
|
+
- run `PROJECT=foo bundle exec rake kennel:update_datadog` to test changes for a single project
|
201
|
+
|
202
|
+
### Reuse
|
203
|
+
|
204
|
+
Add to `parts/<folder>`.
|
205
|
+
|
206
|
+
```Ruby
|
207
|
+
module Monitors
|
208
|
+
class LoadTooHigh < Kennel::Models::Monitor
|
209
|
+
defaults(
|
210
|
+
name: -> { "#{project.name} load too high" },
|
211
|
+
message: -> { "Shut it down!" },
|
212
|
+
type: -> { "query alert" },
|
213
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:#{project.kennel_id}} by {pod} > #{critical}" }
|
214
|
+
)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
```
|
218
|
+
|
219
|
+
Reuse it in multiple projects.
|
220
|
+
|
221
|
+
```Ruby
|
222
|
+
class Database < Kennel::Models::Project
|
223
|
+
defaults(
|
224
|
+
team: -> { Kennel::Models::Team.new(mention: -> { '@slack-foo' }, kennel_id: -> { 'foo' }) },
|
225
|
+
parts: -> { [Monitors::LoadTooHigh.new(self, critical: -> { 13 })] }
|
226
|
+
)
|
227
|
+
end
|
228
|
+
```
|
229
|
+
|
230
|
+
## Helpers
|
231
|
+
|
232
|
+
### Listing un-muted alerts
|
233
|
+
|
234
|
+
Run `rake kennel:alerts TAG=service:my-service` to see all un-muted alerts for a given datadog monitor tag.
|
235
|
+
|
236
|
+
### Validating mentions work
|
237
|
+
|
238
|
+
`rake kennel:validate_mentions` should run as part of CI
|
239
|
+
|
240
|
+
### Grepping through all of datadog
|
241
|
+
|
242
|
+
`TYPE=monitor rake kennel:dump`
|
243
|
+
|
244
|
+
### Find all monitors with No-Data
|
245
|
+
|
246
|
+
`rake kennel:nodata TAG=team:foo`
|
247
|
+
|