kennel 1.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Readme.md +289 -0
- data/lib/kennel.rb +90 -0
- data/lib/kennel/api.rb +83 -0
- data/lib/kennel/file_cache.rb +53 -0
- data/lib/kennel/github_reporter.rb +49 -0
- data/lib/kennel/importer.rb +135 -0
- data/lib/kennel/models/base.rb +29 -0
- data/lib/kennel/models/dashboard.rb +209 -0
- data/lib/kennel/models/monitor.rb +219 -0
- data/lib/kennel/models/project.rb +31 -0
- data/lib/kennel/models/record.rb +94 -0
- data/lib/kennel/models/slo.rb +92 -0
- data/lib/kennel/models/team.rb +12 -0
- data/lib/kennel/optional_validations.rb +21 -0
- data/lib/kennel/progress.rb +34 -0
- data/lib/kennel/settings_as_methods.rb +86 -0
- data/lib/kennel/subclass_tracking.rb +19 -0
- data/lib/kennel/syncer.rb +260 -0
- data/lib/kennel/tasks.rb +148 -0
- data/lib/kennel/template_variables.rb +38 -0
- data/lib/kennel/unmuted_alerts.rb +89 -0
- data/lib/kennel/utils.rb +159 -0
- data/lib/kennel/version.rb +4 -0
- data/template/Readme.md +247 -0
- metadata +109 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Kennel
|
3
|
+
module TemplateVariables
|
4
|
+
def self.included(base)
|
5
|
+
base.settings :template_variables
|
6
|
+
base.defaults(template_variables: -> { [] })
|
7
|
+
end
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
def render_template_variables
|
12
|
+
(template_variables || []).map do |v|
|
13
|
+
v.is_a?(String) ? { default: "*", prefix: v, name: v } : v
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# check for queries that do not use the variables and would be misleading
|
18
|
+
# TODO: do the same check for apm_query and their group_by
|
19
|
+
def validate_template_variables(data, key)
|
20
|
+
variables = (data[:template_variables] || []).map { |v| "$#{v.fetch(:name)}" }
|
21
|
+
queries = data[key].flat_map do |widget|
|
22
|
+
([widget] + (widget.dig(:definition, :widgets) || [])).flat_map { |w| widget_queries(w) }
|
23
|
+
end.compact
|
24
|
+
bad = queries.grep_v(/(#{variables.map { |v| Regexp.escape(v) }.join("|")})\b/)
|
25
|
+
if bad.any?
|
26
|
+
invalid!(
|
27
|
+
"queries #{bad.join(", ")} must use the template variables #{variables.join(", ")}\n" \
|
28
|
+
"If that is not possible, add `validate: -> { false } # query foo in bar does not have baz tag`"
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def widget_queries(widget)
|
34
|
+
requests = widget.dig(:definition, :requests) || []
|
35
|
+
(requests.is_a?(Hash) ? requests.values : requests).map { |r| r[:q] } # hostmap widgets have hash requests
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "kennel"
|
3
|
+
|
4
|
+
# Show Alerts that are not muted and their alerting scopes
|
5
|
+
module Kennel
|
6
|
+
class UnmutedAlerts
|
7
|
+
COLORS = {
|
8
|
+
"Alert" => :red,
|
9
|
+
"Warn" => :yellow,
|
10
|
+
"No Data" => :cyan
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def print(api, tag)
|
15
|
+
monitors = filtered_monitors(api, tag)
|
16
|
+
if monitors.empty?
|
17
|
+
Kennel.out.puts "No unmuted alerts found"
|
18
|
+
else
|
19
|
+
monitors.each do |m|
|
20
|
+
Kennel.out.puts m[:name]
|
21
|
+
Kennel.out.puts Utils.path_to_url("/monitors/#{m[:id]}")
|
22
|
+
m[:state][:groups].each do |g|
|
23
|
+
color = COLORS[g[:status]] || :default
|
24
|
+
since = "\t#{time_since(g[:last_triggered_ts])}"
|
25
|
+
Kennel.out.puts "#{Kennel::Utils.color(color, g[:status])}\t#{g[:name]}#{since}"
|
26
|
+
end
|
27
|
+
Kennel.out.puts
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
# sort pod3 before pod11
|
35
|
+
def sort_groups!(monitor)
|
36
|
+
groups = monitor[:state][:groups].values
|
37
|
+
groups.sort_by! { |g| g[:name].to_s.split(",").map { |w| Utils.natural_order(w) } }
|
38
|
+
monitor[:state][:groups] = groups
|
39
|
+
end
|
40
|
+
|
41
|
+
def time_since(t)
|
42
|
+
diff = Time.now.to_i - Integer(t)
|
43
|
+
"%02d:%02d:%02d" % [diff / 3600, diff / 60 % 60, diff % 60]
|
44
|
+
end
|
45
|
+
|
46
|
+
def filtered_monitors(api, tag)
|
47
|
+
# Download all monitors with given tag
|
48
|
+
monitors = Progress.progress("Downloading") do
|
49
|
+
api.list("monitor", monitor_tags: tag, group_states: "all", with_downtimes: "true")
|
50
|
+
end
|
51
|
+
|
52
|
+
raise "No monitors for #{tag} found, check your spelling" if monitors.empty?
|
53
|
+
|
54
|
+
# only keep monitors that are alerting
|
55
|
+
monitors.reject! { |m| m[:overall_state] == "OK" }
|
56
|
+
|
57
|
+
# only keep monitors that are not completely silenced
|
58
|
+
monitors.reject! { |m| m[:options][:silenced].key?(:*) }
|
59
|
+
|
60
|
+
# only keep groups that are alerting
|
61
|
+
monitors.each { |m| m[:state][:groups].reject! { |_, g| g[:status] == "OK" || g[:status] == "Ignored" } }
|
62
|
+
|
63
|
+
# only keep alerting groups that are not silenced
|
64
|
+
monitors.each do |m|
|
65
|
+
silenced = m[:options][:silenced].keys.map { |k| k.to_s.split(",") }
|
66
|
+
m[:state][:groups].select! do |k, _|
|
67
|
+
scope = k.to_s.split(",")
|
68
|
+
silenced.none? { |s| (s - scope).empty? }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# only keep monitors that are not covered by a downtime
|
73
|
+
monitors.each do |m|
|
74
|
+
next unless m[:matching_downtimes]
|
75
|
+
downtime_groups = m[:matching_downtimes].select { |d| d[:active] }.flat_map { |d| d[:groups] }
|
76
|
+
m[:state][:groups].reject! do |k, _|
|
77
|
+
downtime_groups.include?(k.to_s)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# only keep monitors with alerting groups
|
82
|
+
monitors.select! { |m| m[:state][:groups].any? }
|
83
|
+
|
84
|
+
# sort group alerts
|
85
|
+
monitors.each { |m| sort_groups!(m) }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/kennel/utils.rb
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Kennel
|
3
|
+
module Utils
|
4
|
+
COLORS = { red: 31, green: 32, yellow: 33, cyan: 36, magenta: 35, default: 0 }.freeze
|
5
|
+
|
6
|
+
class TeeIO < IO
|
7
|
+
def initialize(ios)
|
8
|
+
@ios = ios
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(string)
|
12
|
+
@ios.each { |io| io.write string }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class << self
|
17
|
+
def snake_case(string)
|
18
|
+
string
|
19
|
+
.gsub(/::/, "_") # Foo::Bar -> foo_bar
|
20
|
+
.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') # FOOBar -> foo_bar
|
21
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2') # fooBar -> foo_bar
|
22
|
+
.tr("-", "_") # foo-bar -> foo_bar
|
23
|
+
.downcase
|
24
|
+
end
|
25
|
+
|
26
|
+
# simplified version of https://apidock.com/rails/ActiveSupport/Inflector/parameterize
|
27
|
+
def parameterize(string)
|
28
|
+
string
|
29
|
+
.downcase
|
30
|
+
.gsub(/[^a-z0-9\-_]+/, "-") # remove unsupported
|
31
|
+
.gsub(/-{2,}/, "-") # remove duplicates
|
32
|
+
.gsub(/^-|-$/, "") # remove leading/trailing
|
33
|
+
end
|
34
|
+
|
35
|
+
def presence(value)
|
36
|
+
value.nil? || value.empty? ? nil : value
|
37
|
+
end
|
38
|
+
|
39
|
+
def ask(question)
|
40
|
+
Kennel.err.printf color(:red, "#{question} - press 'y' to continue: ")
|
41
|
+
begin
|
42
|
+
STDIN.gets.chomp == "y"
|
43
|
+
rescue Interrupt # do not show a backtrace if user decides to Ctrl+C here
|
44
|
+
Kennel.err.print "\n"
|
45
|
+
exit 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def color(color, text)
|
50
|
+
"\e[#{COLORS.fetch(color)}m#{text}\e[0m"
|
51
|
+
end
|
52
|
+
|
53
|
+
def strip_shell_control(text)
|
54
|
+
text.gsub(/\e\[\d+m(.*?)\e\[0m/, "\\1").gsub(/.#{Regexp.escape("\b")}/, "")
|
55
|
+
end
|
56
|
+
|
57
|
+
def capture_stdout
|
58
|
+
old = Kennel.out
|
59
|
+
Kennel.out = StringIO.new
|
60
|
+
yield
|
61
|
+
Kennel.out.string
|
62
|
+
ensure
|
63
|
+
Kennel.out = old
|
64
|
+
end
|
65
|
+
|
66
|
+
def capture_stderr
|
67
|
+
old = Kennel.err
|
68
|
+
Kennel.err = StringIO.new
|
69
|
+
yield
|
70
|
+
Kennel.err.string
|
71
|
+
ensure
|
72
|
+
Kennel.err = old
|
73
|
+
end
|
74
|
+
|
75
|
+
def tee_output
|
76
|
+
old_stdout = Kennel.out
|
77
|
+
old_stderr = Kennel.err
|
78
|
+
capture = StringIO.new
|
79
|
+
Kennel.out = TeeIO.new([capture, Kennel.out])
|
80
|
+
Kennel.err = TeeIO.new([capture, Kennel.err])
|
81
|
+
yield
|
82
|
+
capture.string
|
83
|
+
ensure
|
84
|
+
Kennel.out = old_stdout
|
85
|
+
Kennel.err = old_stderr
|
86
|
+
end
|
87
|
+
|
88
|
+
def capture_sh(command)
|
89
|
+
result = `#{command} 2>&1`
|
90
|
+
raise "Command failed:\n#{command}\n#{result}" unless $CHILD_STATUS.success?
|
91
|
+
result
|
92
|
+
end
|
93
|
+
|
94
|
+
def path_to_url(path)
|
95
|
+
if subdomain = ENV["DATADOG_SUBDOMAIN"]
|
96
|
+
"https://#{subdomain}.datadoghq.com#{path}"
|
97
|
+
else
|
98
|
+
path
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def parallel(items, max: 10)
|
103
|
+
threads = [items.size, max].min
|
104
|
+
work = items.each_with_index.to_a
|
105
|
+
done = Array.new(items.size)
|
106
|
+
workers = Array.new(threads).map do
|
107
|
+
Thread.new do
|
108
|
+
loop do
|
109
|
+
item, i = work.pop
|
110
|
+
break unless i
|
111
|
+
done[i] =
|
112
|
+
begin
|
113
|
+
yield item
|
114
|
+
rescue StandardError => e
|
115
|
+
work.clear
|
116
|
+
e
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
workers.each(&:join)
|
122
|
+
done.each { |d| raise d if d.is_a?(StandardError) }
|
123
|
+
end
|
124
|
+
|
125
|
+
def natural_order(name)
|
126
|
+
name.split(/(\d+)/).each_with_index.map { |x, i| i.odd? ? x.to_i : x }
|
127
|
+
end
|
128
|
+
|
129
|
+
def retry(*errors, times:)
|
130
|
+
yield
|
131
|
+
rescue *errors => e
|
132
|
+
times -= 1
|
133
|
+
raise if times < 0
|
134
|
+
Kennel.err.puts "Error #{e}, #{times} retries left"
|
135
|
+
retry
|
136
|
+
end
|
137
|
+
|
138
|
+
# https://stackoverflow.com/questions/20235206/ruby-get-all-keys-in-a-hash-including-sub-keys/53876255#53876255
|
139
|
+
def all_keys(items)
|
140
|
+
case items
|
141
|
+
when Hash then items.keys + items.values.flat_map { |v| all_keys(v) }
|
142
|
+
when Array then items.flat_map { |i| all_keys(i) }
|
143
|
+
else []
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# TODO: use awesome-print or similar, but it has too many monkey-patches
|
148
|
+
# https://github.com/amazing-print/amazing_print/issues/36
|
149
|
+
def pretty_inspect(object)
|
150
|
+
string = object.inspect
|
151
|
+
string.gsub!(/:([a-z_]+)=>/, "\\1: ")
|
152
|
+
10.times do
|
153
|
+
string.gsub!(/{(\S.*?\S)}/, "{ \\1 }") || break
|
154
|
+
end
|
155
|
+
string
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
data/template/Readme.md
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+

|
2
|
+
|
3
|
+
Manage Datadog Monitors / Dashboards / Slos as code
|
4
|
+
|
5
|
+
- DRY, searchable, audited, documented
|
6
|
+
- Changes are PR reviewed and applied on merge
|
7
|
+
- Updating shows diff before applying
|
8
|
+
- Automated import of existing resources
|
9
|
+
- Resources are grouped into projects that belong to teams and inherit tags
|
10
|
+
- No copy-pasting of ids to create new resources
|
11
|
+
- Automated cleanup when removing code
|
12
|
+
- [Helpers](#helpers) for automating common tasks
|
13
|
+
|
14
|
+
### Applying changes
|
15
|
+
|
16
|
+

|
17
|
+
|
18
|
+
### Example code
|
19
|
+
|
20
|
+
```Ruby
|
21
|
+
# teams/foo.rb
|
22
|
+
module Teams
|
23
|
+
class Foo < Kennel::Models::Team
|
24
|
+
defaults(mention: -> { "@slack-my-team" })
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# projects/bar.rb
|
29
|
+
class Bar < Kennel::Models::Project
|
30
|
+
defaults(
|
31
|
+
team: -> { Teams::Foo.new }, # use mention and tags from the team
|
32
|
+
parts: -> {
|
33
|
+
[
|
34
|
+
Kennel::Models::Monitor.new(
|
35
|
+
self, # the current project
|
36
|
+
type: -> { "query alert" },
|
37
|
+
kennel_id: -> { "load-too-high" }, # pick a unique name
|
38
|
+
name: -> { "Foobar Load too high" }, # nice descriptive name that will show up in alerts and emails
|
39
|
+
message: -> {
|
40
|
+
<<~TEXT
|
41
|
+
This is bad!
|
42
|
+
#{super()} # inserts mention from team
|
43
|
+
TEXT
|
44
|
+
},
|
45
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:api} by {pod} > #{critical}" },
|
46
|
+
critical: -> { 20 }
|
47
|
+
)
|
48
|
+
]
|
49
|
+
}
|
50
|
+
)
|
51
|
+
end
|
52
|
+
```
|
53
|
+
|
54
|
+
|
55
|
+
## Structure
|
56
|
+
|
57
|
+
- `projects/` monitors/dashboards/etc scoped by project
|
58
|
+
- `teams/` team definitions
|
59
|
+
- `parts/` monitors/dashboards/etc that are used by multiple projects
|
60
|
+
- `generated/` projects as json, to show current state and proposed changes in PRs
|
61
|
+
|
62
|
+
## Workflows
|
63
|
+
|
64
|
+
### Setup
|
65
|
+
- clone the repo
|
66
|
+
- `gem install bundler && bundle install`
|
67
|
+
- `cp .env.example .env`
|
68
|
+
- open [Datadog API Settings](https://app.datadoghq.com/account/settings#api)
|
69
|
+
- copy any `API Key` and add it to `.env` as `DATADOG_API_KEY`
|
70
|
+
- find or create (check last page) your personal "Application Key" and add it to `.env` as `DATADOG_APP_KEY=`
|
71
|
+
- change the `DATADOG_SUBDOMAIN=app` in `.env` to your companies subdomain if you have one
|
72
|
+
- verify it works by running `rake plan`, it might show some diff, but should not crash
|
73
|
+
|
74
|
+
### Adding a team
|
75
|
+
|
76
|
+
- `mention` is used for all team monitors via `super()`
|
77
|
+
- `renotify_interval` is used for all team monitors (defaults to `0` / off)
|
78
|
+
- `tags` is used for all team monitors/dashboards (defaults to `team:<team-name>`)
|
79
|
+
|
80
|
+
```Ruby
|
81
|
+
# teams/my_team.rb
|
82
|
+
module Teams
|
83
|
+
class MyTeam < Kennel::Models::Team
|
84
|
+
defaults(
|
85
|
+
mention: -> { "@slack-my-team" }
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
```
|
90
|
+
|
91
|
+
### Adding a new monitor
|
92
|
+
- use [datadog monitor UI](https://app.datadoghq.com/monitors#create) to create a monitor
|
93
|
+
- see below
|
94
|
+
|
95
|
+
### Updating an existing monitor
|
96
|
+
- use [datadog monitor UI](https://app.datadoghq.com/monitors/manage) to find a monitor
|
97
|
+
- get the `id` from the url
|
98
|
+
- run `URL='https://app.datadoghq.com/monitors/123' bundle exec rake kennel:import` and copy the output
|
99
|
+
- find or create a project in `projects/`
|
100
|
+
- add the monitor to `parts: [` list, for example:
|
101
|
+
```Ruby
|
102
|
+
# projects/my_project.rb
|
103
|
+
class MyProject < Kennel::Models::Project
|
104
|
+
defaults(
|
105
|
+
team: -> { Teams::MyTeam.new }, # use existing team or create new one in teams/
|
106
|
+
parts: -> {
|
107
|
+
[
|
108
|
+
Kennel::Models::Monitor.new(
|
109
|
+
self,
|
110
|
+
id: -> { 123456 }, # id from datadog url, not necessary when creating a new monitor
|
111
|
+
type: -> { "query alert" },
|
112
|
+
kennel_id: -> { "load-too-high" }, # make up a unique name
|
113
|
+
name: -> { "Foobar Load too high" }, # nice descriptive name that will show up in alerts and emails
|
114
|
+
message: -> {
|
115
|
+
# Explain what behavior to expect and how to fix the cause
|
116
|
+
# Use #{super()} to add team notifications.
|
117
|
+
<<~TEXT
|
118
|
+
Foobar will be slow and that could cause Barfoo to go down.
|
119
|
+
Add capacity or debug why it is suddenly slow.
|
120
|
+
#{super()}
|
121
|
+
TEXT
|
122
|
+
},
|
123
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:api} by {pod} > #{critical}" }, # replace actual value with #{critical} to keep them in sync
|
124
|
+
critical: -> { 20 }
|
125
|
+
)
|
126
|
+
]
|
127
|
+
}
|
128
|
+
)
|
129
|
+
end
|
130
|
+
```
|
131
|
+
- run `PROJECT=my_project bundle exec rake plan`, an Update to the existing monitor should be shown (not Create / Delete)
|
132
|
+
- alternatively: `bundle exec rake generate` to only locally update the generated `json` files
|
133
|
+
- review changes then `git commit`
|
134
|
+
- make a PR ... get reviewed ... merge
|
135
|
+
- datadog is updated by CI
|
136
|
+
|
137
|
+
### Adding a new dashboard
|
138
|
+
- go to [datadog dashboard UI](https://app.datadoghq.com/dashboard/lists) and click on _New Dashboard_ to create a dashboard
|
139
|
+
- see below
|
140
|
+
|
141
|
+
### Updating an existing dashboard
|
142
|
+
- go to [datadog dashboard UI](https://app.datadoghq.com/dashboard/lists) and click on _New Dashboard_ to find a dashboard
|
143
|
+
- get the `id` from the url
|
144
|
+
- run `URL='https://app.datadoghq.com/dashboard/bet-foo-bar' bundle exec rake kennel:import` and copy the output
|
145
|
+
- find or create a project in `projects/`
|
146
|
+
- add a dashboard to `parts: [` list, for example:
|
147
|
+
```Ruby
|
148
|
+
class MyProject < Kennel::Models::Project
|
149
|
+
defaults(
|
150
|
+
team: -> { Teams::MyTeam.new }, # use existing team or create new one in teams/
|
151
|
+
parts: -> {
|
152
|
+
[
|
153
|
+
Kennel::Models::Dashboard.new(
|
154
|
+
self,
|
155
|
+
id: -> { "abc-def-ghi" }, # id from datadog url, not needed when creating a new dashboard
|
156
|
+
title: -> { "My Dashboard" },
|
157
|
+
description: -> { "Overview of foobar" },
|
158
|
+
template_variables: -> { ["environment"] }, # see https://docs.datadoghq.com/api/?lang=ruby#timeboards
|
159
|
+
kennel_id: -> { "overview-dashboard" }, # make up a unique name
|
160
|
+
layout_type: -> { "ordered" },
|
161
|
+
definitions: -> {
|
162
|
+
[ # An array or arrays, each one is a graph in the dashboard, alternatively a hash for finer control
|
163
|
+
[
|
164
|
+
# title, viz, type, query, edit an existing graph and see the json definition
|
165
|
+
"Graph name", "timeseries", "area", "sum:mystats.foobar{$environment}"
|
166
|
+
],
|
167
|
+
[
|
168
|
+
# queries can be an Array as well, this will generate multiple requests
|
169
|
+
# for a single graph
|
170
|
+
"Graph name", "timeseries", "area", ["sum:mystats.foobar{$environment}", "sum:mystats.success{$environment}"],
|
171
|
+
# add events too ...
|
172
|
+
events: [{q: "tags:foobar,deploy", tags_execution: "and"}]
|
173
|
+
]
|
174
|
+
]
|
175
|
+
}
|
176
|
+
)
|
177
|
+
]
|
178
|
+
}
|
179
|
+
)
|
180
|
+
end
|
181
|
+
```
|
182
|
+
|
183
|
+
### Skipping validations
|
184
|
+
|
185
|
+
Some validations might be too strict for your usecase or just wrong, please [open an issue](https://github.com/grosser/kennel/issues) and
|
186
|
+
to unblock use the `validate: -> { false }` option.
|
187
|
+
|
188
|
+
### Linking with kennel_ids
|
189
|
+
|
190
|
+
To link to existing monitors via their kennel_id
|
191
|
+
|
192
|
+
- Screens `uptime` widgets can use `monitor: {id: "foo:bar"}`
|
193
|
+
- Screens `alert_graph` widgets can use `alert_id: "foo:bar"`
|
194
|
+
- Monitors `composite` can use `query: -> { "%{foo:bar} || %{foo:baz}" }`
|
195
|
+
|
196
|
+
### Debugging changes locally
|
197
|
+
|
198
|
+
- rebase on updated `master` to not undo other changes
|
199
|
+
- figure out project name by converting the class name to snake-case
|
200
|
+
- run `PROJECT=foo bundle exec rake kennel:update_datadog` to test changes for a single project
|
201
|
+
|
202
|
+
### Reuse
|
203
|
+
|
204
|
+
Add to `parts/<folder>`.
|
205
|
+
|
206
|
+
```Ruby
|
207
|
+
module Monitors
|
208
|
+
class LoadTooHigh < Kennel::Models::Monitor
|
209
|
+
defaults(
|
210
|
+
name: -> { "#{project.name} load too high" },
|
211
|
+
message: -> { "Shut it down!" },
|
212
|
+
type: -> { "query alert" },
|
213
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:#{project.kennel_id}} by {pod} > #{critical}" }
|
214
|
+
)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
```
|
218
|
+
|
219
|
+
Reuse it in multiple projects.
|
220
|
+
|
221
|
+
```Ruby
|
222
|
+
class Database < Kennel::Models::Project
|
223
|
+
defaults(
|
224
|
+
team: -> { Kennel::Models::Team.new(mention: -> { '@slack-foo' }, kennel_id: -> { 'foo' }) },
|
225
|
+
parts: -> { [Monitors::LoadTooHigh.new(self, critical: -> { 13 })] }
|
226
|
+
)
|
227
|
+
end
|
228
|
+
```
|
229
|
+
|
230
|
+
## Helpers
|
231
|
+
|
232
|
+
### Listing un-muted alerts
|
233
|
+
|
234
|
+
Run `rake kennel:alerts TAG=service:my-service` to see all un-muted alerts for a given datadog monitor tag.
|
235
|
+
|
236
|
+
### Validating mentions work
|
237
|
+
|
238
|
+
`rake kennel:validate_mentions` should run as part of CI
|
239
|
+
|
240
|
+
### Grepping through all of datadog
|
241
|
+
|
242
|
+
`TYPE=monitor rake kennel:dump`
|
243
|
+
|
244
|
+
### Find all monitors with No-Data
|
245
|
+
|
246
|
+
`rake kennel:nodata TAG=team:foo`
|
247
|
+
|