kennel 1.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Readme.md +289 -0
- data/lib/kennel.rb +90 -0
- data/lib/kennel/api.rb +83 -0
- data/lib/kennel/file_cache.rb +53 -0
- data/lib/kennel/github_reporter.rb +49 -0
- data/lib/kennel/importer.rb +135 -0
- data/lib/kennel/models/base.rb +29 -0
- data/lib/kennel/models/dashboard.rb +209 -0
- data/lib/kennel/models/monitor.rb +219 -0
- data/lib/kennel/models/project.rb +31 -0
- data/lib/kennel/models/record.rb +94 -0
- data/lib/kennel/models/slo.rb +92 -0
- data/lib/kennel/models/team.rb +12 -0
- data/lib/kennel/optional_validations.rb +21 -0
- data/lib/kennel/progress.rb +34 -0
- data/lib/kennel/settings_as_methods.rb +86 -0
- data/lib/kennel/subclass_tracking.rb +19 -0
- data/lib/kennel/syncer.rb +260 -0
- data/lib/kennel/tasks.rb +148 -0
- data/lib/kennel/template_variables.rb +38 -0
- data/lib/kennel/unmuted_alerts.rb +89 -0
- data/lib/kennel/utils.rb +159 -0
- data/lib/kennel/version.rb +4 -0
- data/template/Readme.md +247 -0
- metadata +109 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e6a61329e4c2b2ccec0021103dbca60ec7fb9658e3a45a0c4212a08e63ea1395
|
4
|
+
data.tar.gz: 50af562a677393894101f495b150be434f7af67851e774109aa6b4605bedffab
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c884d5d811aa4ed5df99e90382d2311391035570185149bae41f0ac0c080df6132877c4dcd9ffbba88a83e829203557353b405ed71b373e997337223192ed5b4
|
7
|
+
data.tar.gz: 4e453ebad3dae75cd38f1901ab6ea50d22f886d2b3004ff3d25c3ab64881f852d7592b73a4efd0eabe244179ee39570f0028a62f3a1aa1bb0c47765c131fb097
|
data/Readme.md
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+

|
2
|
+
|
3
|
+
Manage Datadog Monitors / Dashboards / Slos as code
|
4
|
+
|
5
|
+
- DRY, searchable, audited, documented
|
6
|
+
- Changes are PR reviewed and applied on merge
|
7
|
+
- Updating shows diff before applying
|
8
|
+
- Automated import of existing resources
|
9
|
+
- Resources are grouped into projects that belong to teams and inherit tags
|
10
|
+
- No copy-pasting of ids to create new resources
|
11
|
+
- Automated cleanup when removing code
|
12
|
+
- [Helpers](#helpers) for automating common tasks
|
13
|
+
|
14
|
+
### Applying changes
|
15
|
+
|
16
|
+

|
17
|
+
|
18
|
+
### Example code
|
19
|
+
|
20
|
+
```Ruby
|
21
|
+
# teams/foo.rb
|
22
|
+
module Teams
|
23
|
+
class Foo < Kennel::Models::Team
|
24
|
+
defaults(mention: -> { "@slack-my-team" })
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# projects/bar.rb
|
29
|
+
class Bar < Kennel::Models::Project
|
30
|
+
defaults(
|
31
|
+
team: -> { Teams::Foo.new }, # use mention and tags from the team
|
32
|
+
parts: -> {
|
33
|
+
[
|
34
|
+
Kennel::Models::Monitor.new(
|
35
|
+
self, # the current project
|
36
|
+
type: -> { "query alert" },
|
37
|
+
kennel_id: -> { "load-too-high" }, # pick a unique name
|
38
|
+
name: -> { "Foobar Load too high" }, # nice descriptive name that will show up in alerts and emails
|
39
|
+
message: -> {
|
40
|
+
<<~TEXT
|
41
|
+
This is bad!
|
42
|
+
#{super()} # inserts mention from team
|
43
|
+
TEXT
|
44
|
+
},
|
45
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:api} by {pod} > #{critical}" },
|
46
|
+
critical: -> { 20 }
|
47
|
+
)
|
48
|
+
]
|
49
|
+
}
|
50
|
+
)
|
51
|
+
end
|
52
|
+
```
|
53
|
+
|
54
|
+
<!-- NOT IN template/Readme.md -->
|
55
|
+
## Installation
|
56
|
+
|
57
|
+
- create a new private `kennel` repo for your organization (do not fork this repo)
|
58
|
+
- use the template folder as starting point:
|
59
|
+
```Bash
|
60
|
+
git clone git@github.com:your-org/kennel.git
|
61
|
+
git clone git@github.com:grosser/kennel.git seed
|
62
|
+
mv seed/template/* kennel/
|
63
|
+
cd kennel && git add . && git commit -m 'initial'
|
64
|
+
```
|
65
|
+
- add a basic projects and teams so others can copy-paste to get started
|
66
|
+
- setup CI build for your repo (travis and Github Actions supported)
|
67
|
+
- uncomment `.travis.yml` section for datadog updates on merge (TODO: example setup for Github Actions)
|
68
|
+
- follow `Setup` in your repos Readme.md
|
69
|
+
<!-- NOT IN -->
|
70
|
+
|
71
|
+
## Structure
|
72
|
+
|
73
|
+
- `projects/` monitors/dashboards/etc scoped by project
|
74
|
+
- `teams/` team definitions
|
75
|
+
- `parts/` monitors/dashboards/etc that are used by multiple projects
|
76
|
+
- `generated/` projects as json, to show current state and proposed changes in PRs
|
77
|
+
|
78
|
+
## Workflows
|
79
|
+
|
80
|
+
<!-- ONLY IN template/Readme.md
|
81
|
+
### Setup
|
82
|
+
- clone the repo
|
83
|
+
- `gem install bundler && bundle install`
|
84
|
+
- `cp .env.example .env`
|
85
|
+
- open [Datadog API Settings](https://app.datadoghq.com/account/settings#api)
|
86
|
+
- copy any `API Key` and add it to `.env` as `DATADOG_API_KEY`
|
87
|
+
- find or create (check last page) your personal "Application Key" and add it to `.env` as `DATADOG_APP_KEY=`
|
88
|
+
- change the `DATADOG_SUBDOMAIN=app` in `.env` to your companies subdomain if you have one
|
89
|
+
- verify it works by running `rake plan`, it might show some diff, but should not crash
|
90
|
+
-->
|
91
|
+
|
92
|
+
### Adding a team
|
93
|
+
|
94
|
+
- `mention` is used for all team monitors via `super()`
|
95
|
+
- `renotify_interval` is used for all team monitors (defaults to `0` / off)
|
96
|
+
- `tags` is used for all team monitors/dashboards (defaults to `team:<team-name>`)
|
97
|
+
|
98
|
+
```Ruby
|
99
|
+
# teams/my_team.rb
|
100
|
+
module Teams
|
101
|
+
class MyTeam < Kennel::Models::Team
|
102
|
+
defaults(
|
103
|
+
mention: -> { "@slack-my-team" }
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
```
|
108
|
+
|
109
|
+
### Adding a new monitor
|
110
|
+
- use [datadog monitor UI](https://app.datadoghq.com/monitors#create) to create a monitor
|
111
|
+
- see below
|
112
|
+
|
113
|
+
### Updating an existing monitor
|
114
|
+
- use [datadog monitor UI](https://app.datadoghq.com/monitors/manage) to find a monitor
|
115
|
+
- get the `id` from the url
|
116
|
+
- run `URL='https://app.datadoghq.com/monitors/123' bundle exec rake kennel:import` and copy the output
|
117
|
+
- find or create a project in `projects/`
|
118
|
+
- add the monitor to `parts: [` list, for example:
|
119
|
+
```Ruby
|
120
|
+
# projects/my_project.rb
|
121
|
+
class MyProject < Kennel::Models::Project
|
122
|
+
defaults(
|
123
|
+
team: -> { Teams::MyTeam.new }, # use existing team or create new one in teams/
|
124
|
+
parts: -> {
|
125
|
+
[
|
126
|
+
Kennel::Models::Monitor.new(
|
127
|
+
self,
|
128
|
+
id: -> { 123456 }, # id from datadog url, not necessary when creating a new monitor
|
129
|
+
type: -> { "query alert" },
|
130
|
+
kennel_id: -> { "load-too-high" }, # make up a unique name
|
131
|
+
name: -> { "Foobar Load too high" }, # nice descriptive name that will show up in alerts and emails
|
132
|
+
message: -> {
|
133
|
+
# Explain what behavior to expect and how to fix the cause
|
134
|
+
# Use #{super()} to add team notifications.
|
135
|
+
<<~TEXT
|
136
|
+
Foobar will be slow and that could cause Barfoo to go down.
|
137
|
+
Add capacity or debug why it is suddenly slow.
|
138
|
+
#{super()}
|
139
|
+
TEXT
|
140
|
+
},
|
141
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:api} by {pod} > #{critical}" }, # replace actual value with #{critical} to keep them in sync
|
142
|
+
critical: -> { 20 }
|
143
|
+
)
|
144
|
+
]
|
145
|
+
}
|
146
|
+
)
|
147
|
+
end
|
148
|
+
```
|
149
|
+
- run `PROJECT=my_project bundle exec rake plan`, an Update to the existing monitor should be shown (not Create / Delete)
|
150
|
+
- alternatively: `bundle exec rake generate` to only locally update the generated `json` files
|
151
|
+
- review changes then `git commit`
|
152
|
+
- make a PR ... get reviewed ... merge
|
153
|
+
- datadog is updated by CI
|
154
|
+
|
155
|
+
### Adding a new dashboard
|
156
|
+
- go to [datadog dashboard UI](https://app.datadoghq.com/dashboard/lists) and click on _New Dashboard_ to create a dashboard
|
157
|
+
- see below
|
158
|
+
|
159
|
+
### Updating an existing dashboard
|
160
|
+
- go to [datadog dashboard UI](https://app.datadoghq.com/dashboard/lists) and click on _New Dashboard_ to find a dashboard
|
161
|
+
- get the `id` from the url
|
162
|
+
- run `URL='https://app.datadoghq.com/dashboard/bet-foo-bar' bundle exec rake kennel:import` and copy the output
|
163
|
+
- find or create a project in `projects/`
|
164
|
+
- add a dashboard to `parts: [` list, for example:
|
165
|
+
```Ruby
|
166
|
+
class MyProject < Kennel::Models::Project
|
167
|
+
defaults(
|
168
|
+
team: -> { Teams::MyTeam.new }, # use existing team or create new one in teams/
|
169
|
+
parts: -> {
|
170
|
+
[
|
171
|
+
Kennel::Models::Dashboard.new(
|
172
|
+
self,
|
173
|
+
id: -> { "abc-def-ghi" }, # id from datadog url, not needed when creating a new dashboard
|
174
|
+
title: -> { "My Dashboard" },
|
175
|
+
description: -> { "Overview of foobar" },
|
176
|
+
template_variables: -> { ["environment"] }, # see https://docs.datadoghq.com/api/?lang=ruby#timeboards
|
177
|
+
kennel_id: -> { "overview-dashboard" }, # make up a unique name
|
178
|
+
layout_type: -> { "ordered" },
|
179
|
+
definitions: -> {
|
180
|
+
[ # An array or arrays, each one is a graph in the dashboard, alternatively a hash for finer control
|
181
|
+
[
|
182
|
+
# title, viz, type, query, edit an existing graph and see the json definition
|
183
|
+
"Graph name", "timeseries", "area", "sum:mystats.foobar{$environment}"
|
184
|
+
],
|
185
|
+
[
|
186
|
+
# queries can be an Array as well, this will generate multiple requests
|
187
|
+
# for a single graph
|
188
|
+
"Graph name", "timeseries", "area", ["sum:mystats.foobar{$environment}", "sum:mystats.success{$environment}"],
|
189
|
+
# add events too ...
|
190
|
+
events: [{q: "tags:foobar,deploy", tags_execution: "and"}]
|
191
|
+
]
|
192
|
+
]
|
193
|
+
}
|
194
|
+
)
|
195
|
+
]
|
196
|
+
}
|
197
|
+
)
|
198
|
+
end
|
199
|
+
```
|
200
|
+
|
201
|
+
### Skipping validations
|
202
|
+
|
203
|
+
Some validations might be too strict for your usecase or just wrong, please [open an issue](https://github.com/grosser/kennel/issues) and
|
204
|
+
to unblock use the `validate: -> { false }` option.
|
205
|
+
|
206
|
+
### Linking with kennel_ids
|
207
|
+
|
208
|
+
To link to existing monitors via their kennel_id
|
209
|
+
|
210
|
+
- Screens `uptime` widgets can use `monitor: {id: "foo:bar"}`
|
211
|
+
- Screens `alert_graph` widgets can use `alert_id: "foo:bar"`
|
212
|
+
- Monitors `composite` can use `query: -> { "%{foo:bar} || %{foo:baz}" }`
|
213
|
+
|
214
|
+
### Debugging changes locally
|
215
|
+
|
216
|
+
- rebase on updated `master` to not undo other changes
|
217
|
+
- figure out project name by converting the class name to snake-case
|
218
|
+
- run `PROJECT=foo bundle exec rake kennel:update_datadog` to test changes for a single project
|
219
|
+
|
220
|
+
### Reuse
|
221
|
+
|
222
|
+
Add to `parts/<folder>`.
|
223
|
+
|
224
|
+
```Ruby
|
225
|
+
module Monitors
|
226
|
+
class LoadTooHigh < Kennel::Models::Monitor
|
227
|
+
defaults(
|
228
|
+
name: -> { "#{project.name} load too high" },
|
229
|
+
message: -> { "Shut it down!" },
|
230
|
+
type: -> { "query alert" },
|
231
|
+
query: -> { "avg(last_5m):avg:system.load.5{hostgroup:#{project.kennel_id}} by {pod} > #{critical}" }
|
232
|
+
)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
```
|
236
|
+
|
237
|
+
Reuse it in multiple projects.
|
238
|
+
|
239
|
+
```Ruby
|
240
|
+
class Database < Kennel::Models::Project
|
241
|
+
defaults(
|
242
|
+
team: -> { Kennel::Models::Team.new(mention: -> { '@slack-foo' }, kennel_id: -> { 'foo' }) },
|
243
|
+
parts: -> { [Monitors::LoadTooHigh.new(self, critical: -> { 13 })] }
|
244
|
+
)
|
245
|
+
end
|
246
|
+
```
|
247
|
+
|
248
|
+
## Helpers
|
249
|
+
|
250
|
+
### Listing un-muted alerts
|
251
|
+
|
252
|
+
Run `rake kennel:alerts TAG=service:my-service` to see all un-muted alerts for a given datadog monitor tag.
|
253
|
+
|
254
|
+
### Validating mentions work
|
255
|
+
|
256
|
+
`rake kennel:validate_mentions` should run as part of CI
|
257
|
+
|
258
|
+
### Grepping through all of datadog
|
259
|
+
|
260
|
+
`TYPE=monitor rake kennel:dump`
|
261
|
+
|
262
|
+
### Find all monitors with No-Data
|
263
|
+
|
264
|
+
`rake kennel:nodata TAG=team:foo`
|
265
|
+
|
266
|
+
<!-- NOT IN template/Readme.md -->
|
267
|
+
|
268
|
+
|
269
|
+
## Development
|
270
|
+
|
271
|
+
### Integration testing
|
272
|
+
|
273
|
+
```Bash
|
274
|
+
rake play
|
275
|
+
cd template
|
276
|
+
rake plan
|
277
|
+
```
|
278
|
+
|
279
|
+
Then make changes to play around, do not commit changes and make sure to revert with a `rake kennel:update_datadog` after deleting everything.
|
280
|
+
|
281
|
+
To make changes via the UI, make a new free datadog account and use it's credentaisl instead.
|
282
|
+
|
283
|
+
Author
|
284
|
+
======
|
285
|
+
[Michael Grosser](http://grosser.it)<br/>
|
286
|
+
michael@grosser.it<br/>
|
287
|
+
License: MIT<br/>
|
288
|
+
[](https://travis-ci.org/grosser/kennel)
|
289
|
+
<!-- NOT IN -->
|
data/lib/kennel.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "faraday"
|
3
|
+
require "json"
|
4
|
+
require "English"
|
5
|
+
|
6
|
+
require "kennel/version"
|
7
|
+
require "kennel/utils"
|
8
|
+
require "kennel/progress"
|
9
|
+
require "kennel/syncer"
|
10
|
+
require "kennel/api"
|
11
|
+
require "kennel/github_reporter"
|
12
|
+
require "kennel/subclass_tracking"
|
13
|
+
require "kennel/settings_as_methods"
|
14
|
+
require "kennel/file_cache"
|
15
|
+
require "kennel/template_variables"
|
16
|
+
require "kennel/optional_validations"
|
17
|
+
require "kennel/unmuted_alerts"
|
18
|
+
|
19
|
+
require "kennel/models/base"
|
20
|
+
require "kennel/models/record"
|
21
|
+
|
22
|
+
# records
|
23
|
+
require "kennel/models/dashboard"
|
24
|
+
require "kennel/models/monitor"
|
25
|
+
require "kennel/models/slo"
|
26
|
+
|
27
|
+
# settings
|
28
|
+
require "kennel/models/project"
|
29
|
+
require "kennel/models/team"
|
30
|
+
|
31
|
+
module Kennel
|
32
|
+
class ValidationError < RuntimeError
|
33
|
+
end
|
34
|
+
|
35
|
+
@out = $stdout
|
36
|
+
@err = $stderr
|
37
|
+
|
38
|
+
class << self
|
39
|
+
attr_accessor :out, :err
|
40
|
+
|
41
|
+
def generate
|
42
|
+
FileUtils.rm_rf("generated")
|
43
|
+
generated.each do |part|
|
44
|
+
path = "generated/#{part.tracking_id.sub(":", "/")}.json"
|
45
|
+
FileUtils.mkdir_p(File.dirname(path))
|
46
|
+
File.write(path, JSON.pretty_generate(part.as_json) << "\n")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def plan
|
51
|
+
syncer.plan
|
52
|
+
end
|
53
|
+
|
54
|
+
def update
|
55
|
+
syncer.plan
|
56
|
+
syncer.update if syncer.confirm
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def syncer
|
62
|
+
@syncer ||= Syncer.new(api, generated, project: ENV["PROJECT"])
|
63
|
+
end
|
64
|
+
|
65
|
+
def api
|
66
|
+
@api ||= Api.new(ENV.fetch("DATADOG_APP_KEY"), ENV.fetch("DATADOG_API_KEY"))
|
67
|
+
end
|
68
|
+
|
69
|
+
def generated
|
70
|
+
@generated ||= begin
|
71
|
+
Progress.progress "Generating" do
|
72
|
+
load_all
|
73
|
+
parts = Models::Project.recursive_subclasses.flat_map do |project_class|
|
74
|
+
project_class.new.validated_parts
|
75
|
+
end
|
76
|
+
parts.map(&:tracking_id).group_by { |id| id }.select do |id, same|
|
77
|
+
raise "#{id} is defined #{same.size} times" if same.size != 1
|
78
|
+
end
|
79
|
+
parts
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def load_all
|
85
|
+
["teams", "parts", "projects"].each do |folder|
|
86
|
+
Dir["#{folder}/**/*.rb"].sort.each { |f| require "./#{f}" }
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/kennel/api.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Kennel
|
3
|
+
class Api
|
4
|
+
def initialize(app_key, api_key)
|
5
|
+
@app_key = app_key
|
6
|
+
@api_key = api_key
|
7
|
+
@client = Faraday.new(url: "https://app.datadoghq.com") { |c| c.adapter :net_http_persistent }
|
8
|
+
end
|
9
|
+
|
10
|
+
def show(api_resource, id, params = {})
|
11
|
+
reply = request :get, "/api/v1/#{api_resource}/#{id}", params: params
|
12
|
+
api_resource == "slo" ? reply[:data] : reply
|
13
|
+
end
|
14
|
+
|
15
|
+
def list(api_resource, params = {})
|
16
|
+
if api_resource == "slo"
|
17
|
+
raise ArgumentError if params[:limit] || params[:offset]
|
18
|
+
limit = 1000
|
19
|
+
offset = 0
|
20
|
+
all = []
|
21
|
+
|
22
|
+
loop do
|
23
|
+
result = request :get, "/api/v1/#{api_resource}", params: params.merge(limit: limit, offset: offset)
|
24
|
+
data = result.fetch(:data)
|
25
|
+
all.concat data
|
26
|
+
break all if data.size < limit
|
27
|
+
offset += limit
|
28
|
+
end
|
29
|
+
else
|
30
|
+
result = request :get, "/api/v1/#{api_resource}", params: params
|
31
|
+
result = result.fetch(:dashboards) if api_resource == "dashboard"
|
32
|
+
result
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def create(api_resource, attributes)
|
37
|
+
reply = request :post, "/api/v1/#{api_resource}", body: attributes
|
38
|
+
api_resource == "slo" ? reply[:data].first : reply
|
39
|
+
end
|
40
|
+
|
41
|
+
def update(api_resource, id, attributes)
|
42
|
+
request :put, "/api/v1/#{api_resource}/#{id}", body: attributes
|
43
|
+
end
|
44
|
+
|
45
|
+
def delete(api_resource, id)
|
46
|
+
request :delete, "/api/v1/#{api_resource}/#{id}", ignore_404: true
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def request(method, path, body: nil, params: {}, ignore_404: false)
|
52
|
+
params = params.merge(application_key: @app_key, api_key: @api_key)
|
53
|
+
query = Faraday::FlatParamsEncoder.encode(params)
|
54
|
+
response = nil
|
55
|
+
tries = 2
|
56
|
+
|
57
|
+
tries.times do |i|
|
58
|
+
response = Utils.retry Faraday::ConnectionFailed, Faraday::TimeoutError, times: 2 do
|
59
|
+
@client.send(method, "#{path}?#{query}") do |request|
|
60
|
+
request.body = JSON.generate(body) if body
|
61
|
+
request.headers["Content-type"] = "application/json"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
break if i == tries - 1 || method != :get || response.status < 500
|
66
|
+
Kennel.err.puts "Retrying on server error #{response.status} for #{path}"
|
67
|
+
end
|
68
|
+
|
69
|
+
if !response.success? && (response.status != 404 || !ignore_404)
|
70
|
+
message = +"Error #{response.status} during #{method.upcase} #{path}\n"
|
71
|
+
message << "request:\n#{JSON.pretty_generate(body)}\nresponse:\n" if body
|
72
|
+
message << response.body
|
73
|
+
raise message
|
74
|
+
end
|
75
|
+
|
76
|
+
if response.body.empty?
|
77
|
+
{}
|
78
|
+
else
|
79
|
+
JSON.parse(response.body, symbolize_names: true)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|