kennel 2.9.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kennel/api.rb +15 -15
- data/lib/kennel/models/monitor.rb +87 -42
- data/lib/kennel/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 88495b67d113181455cb6dc751d2f7820eababca5679cfb714bb614dc9370ad6
|
|
4
|
+
data.tar.gz: a8703613913aa4557c585e605f35a1aef9555902c65a5f1c1772b1038e9d4d34
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d730ee50ff704f25eb39fdd8daa9c95a2c0b43f9a9534c7f1a05a245bd3881ab5ce36f12ebf226354eeafe94c4a961e843eebbbcf0312b2b548577b26a4debdd
|
|
7
|
+
data.tar.gz: ff487e51479e7a463464e37ec09a10ef7c1dc7d695bcf18f97303048ab0a75759d71eda14aa95e512aa69b66c25baf016d778be449455c0ef069e081f5957373
|
data/lib/kennel/api.rb
CHANGED
|
@@ -5,8 +5,6 @@ module Kennel
|
|
|
5
5
|
class Api
|
|
6
6
|
CACHE_FILE = ENV.fetch("KENNEL_API_CACHE_FILE", "tmp/cache/details")
|
|
7
7
|
|
|
8
|
-
RateLimitParams = Data.define(:limit, :period, :remaining, :reset, :name)
|
|
9
|
-
|
|
10
8
|
def self.with_tracking(api_resource, reply)
|
|
11
9
|
klass = Models::Record.api_resource_map[api_resource]
|
|
12
10
|
return reply unless klass # do not blow up on unknown models
|
|
@@ -125,20 +123,8 @@ module Kennel
|
|
|
125
123
|
end
|
|
126
124
|
end
|
|
127
125
|
|
|
128
|
-
rate_limit = RateLimitParams.new(
|
|
129
|
-
limit: response.headers["x-ratelimit-limit"],
|
|
130
|
-
period: response.headers["x-ratelimit-period"],
|
|
131
|
-
remaining: response.headers["x-ratelimit-remaining"],
|
|
132
|
-
reset: response.headers["x-ratelimit-reset"],
|
|
133
|
-
name: response.headers["x-ratelimit-name"]
|
|
134
|
-
)
|
|
135
|
-
|
|
136
126
|
if response.status == 429
|
|
137
|
-
|
|
138
|
-
message += " (#{rate_limit.limit} requests per #{rate_limit.period} seconds)"
|
|
139
|
-
message += "; sleeping #{rate_limit.reset} seconds before trying again"
|
|
140
|
-
Kennel.err.puts message
|
|
141
|
-
sleep rate_limit.reset.to_f
|
|
127
|
+
sleep_until_rate_limit_resets(response)
|
|
142
128
|
redo
|
|
143
129
|
end
|
|
144
130
|
|
|
@@ -161,6 +147,20 @@ module Kennel
|
|
|
161
147
|
end
|
|
162
148
|
end
|
|
163
149
|
|
|
150
|
+
def sleep_until_rate_limit_resets(response)
|
|
151
|
+
limit = response.headers["x-ratelimit-limit"]
|
|
152
|
+
period = response.headers["x-ratelimit-period"]
|
|
153
|
+
reset = response.headers["x-ratelimit-reset"]
|
|
154
|
+
name = response.headers["x-ratelimit-name"]
|
|
155
|
+
|
|
156
|
+
message = "Datadog rate limit #{name.inspect} hit"
|
|
157
|
+
message += " (#{limit} requests per #{period} seconds)"
|
|
158
|
+
message += "; sleeping #{reset} seconds before trying again"
|
|
159
|
+
|
|
160
|
+
Kennel.err.puts message
|
|
161
|
+
sleep reset.to_f
|
|
162
|
+
end
|
|
163
|
+
|
|
164
164
|
# allow caching all requests to speedup/benchmark logic that includes repeated requests
|
|
165
165
|
def with_cache(enabled, key)
|
|
166
166
|
return yield unless enabled
|
|
@@ -6,7 +6,8 @@ module Kennel
|
|
|
6
6
|
|
|
7
7
|
OPTIONAL_SERVICE_CHECK_THRESHOLDS = [:ok, :warning].freeze
|
|
8
8
|
READONLY_ATTRIBUTES = superclass::READONLY_ATTRIBUTES + [
|
|
9
|
-
:multi, :matching_downtimes, :overall_state_modified, :overall_state, :restricted_roles, :draft_status, :assets
|
|
9
|
+
:multi, :matching_downtimes, :overall_state_modified, :overall_state, :restricted_roles, :draft_status, :assets,
|
|
10
|
+
:enable_logs_sample
|
|
10
11
|
]
|
|
11
12
|
TRACKING_FIELD = :message
|
|
12
13
|
|
|
@@ -25,13 +26,19 @@ module Kennel
|
|
|
25
26
|
group_retention_duration: nil,
|
|
26
27
|
groupby_simple_monitor: false,
|
|
27
28
|
variables: nil,
|
|
28
|
-
on_missing_data:
|
|
29
|
+
on_missing_data: nil,
|
|
29
30
|
notification_preset_name: nil,
|
|
30
31
|
notify_by: nil
|
|
31
32
|
}.freeze
|
|
32
33
|
DEFAULT_ESCALATION_MESSAGE = ["", nil].freeze
|
|
33
34
|
ALLOWED_PRIORITY_CLASSES = [NilClass, Integer].freeze
|
|
34
35
|
SKIP_NOTIFY_NO_DATA_TYPES = ["event alert", "event-v2 alert", "log alert"].freeze
|
|
36
|
+
MINUTES_PER_UNIT = {
|
|
37
|
+
"m" => 1,
|
|
38
|
+
"h" => 60,
|
|
39
|
+
"d" => 60 * 24,
|
|
40
|
+
"w" => 60 * 24 * 7
|
|
41
|
+
}.freeze
|
|
35
42
|
|
|
36
43
|
settings(
|
|
37
44
|
:query, :name, :message, :escalation_message, :critical, :type, :renotify_interval, :warning, :timeout_h, :evaluation_delay,
|
|
@@ -49,13 +56,11 @@ module Kennel
|
|
|
49
56
|
# datadog UI sets this to false by default, but true is safer
|
|
50
57
|
# except for log alerts which will always have "no error" gaps and should default to false
|
|
51
58
|
notify_no_data: -> { !SKIP_NOTIFY_NO_DATA_TYPES.include?(type) },
|
|
52
|
-
no_data_timeframe: -> { 60 },
|
|
53
59
|
notify_audit: -> { MONITOR_OPTION_DEFAULTS.fetch(:notify_audit) },
|
|
54
60
|
new_host_delay: -> { MONITOR_OPTION_DEFAULTS.fetch(:new_host_delay) },
|
|
55
61
|
new_group_delay: -> { nil },
|
|
56
62
|
group_retention_duration: -> { MONITOR_OPTION_DEFAULTS.fetch(:group_retention_duration) },
|
|
57
63
|
tags: -> { @project.tags },
|
|
58
|
-
timeout_h: -> { MONITOR_OPTION_DEFAULTS.fetch(:timeout_h) },
|
|
59
64
|
evaluation_delay: -> { MONITOR_OPTION_DEFAULTS.fetch(:evaluation_delay) },
|
|
60
65
|
critical_recovery: -> { nil },
|
|
61
66
|
warning_recovery: -> { nil },
|
|
@@ -70,6 +75,8 @@ module Kennel
|
|
|
70
75
|
)
|
|
71
76
|
|
|
72
77
|
def build_json
|
|
78
|
+
no_data_options = configure_no_data
|
|
79
|
+
|
|
73
80
|
data = super.merge(
|
|
74
81
|
name: "#{name}#{LOCK}",
|
|
75
82
|
type: type,
|
|
@@ -79,8 +86,7 @@ module Kennel
|
|
|
79
86
|
priority: priority,
|
|
80
87
|
options: {
|
|
81
88
|
timeout_h: timeout_h,
|
|
82
|
-
|
|
83
|
-
no_data_timeframe: notify_no_data ? no_data_timeframe : nil,
|
|
89
|
+
**no_data_options.except(:on_missing_data),
|
|
84
90
|
notify_audit: notify_audit,
|
|
85
91
|
require_full_window: require_full_window,
|
|
86
92
|
new_host_delay: new_host_delay,
|
|
@@ -88,33 +94,14 @@ module Kennel
|
|
|
88
94
|
include_tags: true,
|
|
89
95
|
escalation_message: Utils.presence(escalation_message.strip),
|
|
90
96
|
evaluation_delay: evaluation_delay,
|
|
91
|
-
locked: false, # deprecated: setting this to true will likely fail
|
|
92
97
|
renotify_interval: renotify_interval || 0,
|
|
93
|
-
variables: variables
|
|
98
|
+
variables: variables,
|
|
99
|
+
**configure_thresholds,
|
|
100
|
+
**no_data_options.slice(:on_missing_data) # moved here to avoid generated diff
|
|
94
101
|
}
|
|
95
102
|
)
|
|
96
103
|
|
|
97
104
|
options = data[:options]
|
|
98
|
-
if data.fetch(:type) != "composite"
|
|
99
|
-
thresholds = (options[:thresholds] = { critical: critical })
|
|
100
|
-
|
|
101
|
-
# warning, ok, critical_recovery, and warning_recovery are optional
|
|
102
|
-
[:warning, :ok, :critical_recovery, :warning_recovery].each do |key|
|
|
103
|
-
if (value = send(key))
|
|
104
|
-
thresholds[key] = value
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
thresholds[:critical] = critical unless
|
|
109
|
-
case data.fetch(:type)
|
|
110
|
-
when "service check"
|
|
111
|
-
# avoid diff for default values of 1
|
|
112
|
-
OPTIONAL_SERVICE_CHECK_THRESHOLDS.each { |t| thresholds[t] ||= 1 }
|
|
113
|
-
when "query alert"
|
|
114
|
-
# metric and query values are stored as float by datadog
|
|
115
|
-
thresholds.each { |k, v| thresholds[k] = Float(v) }
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
105
|
|
|
119
106
|
# set without causing lots of nulls to be stored
|
|
120
107
|
if (notify_by_value = notify_by)
|
|
@@ -145,30 +132,37 @@ module Kennel
|
|
|
145
132
|
# Add in statuses where we would re notify on. Possible values: alert, no data, warn
|
|
146
133
|
if options[:renotify_interval] != 0
|
|
147
134
|
statuses = ["alert"]
|
|
148
|
-
statuses << "no data" if options[:notify_no_data]
|
|
135
|
+
statuses << "no data" if options[:notify_no_data] || options[:on_missing_data] == "show_and_notify_no_data"
|
|
149
136
|
statuses << "warn" if options.dig(:thresholds, :warning)
|
|
150
137
|
options[:renotify_statuses] = statuses
|
|
151
138
|
end
|
|
152
139
|
|
|
153
|
-
# on_missing_data cannot be used with notify_no_data or no_data_timeframe
|
|
154
|
-
# TODO migrate everything to only use on_missing_data
|
|
155
|
-
if data.fetch(:type) == "event-v2 alert" || on_missing_data != "default"
|
|
156
|
-
options[:on_missing_data] = on_missing_data
|
|
157
|
-
options[:notify_no_data] = false # cannot set nil or it's an endless update loop
|
|
158
|
-
options.delete :no_data_timeframe
|
|
159
|
-
end
|
|
160
|
-
|
|
161
140
|
# only set when needed to avoid big diff
|
|
162
141
|
if (notification_preset_name = notification_preset_name())
|
|
163
142
|
options[:notification_preset_name] = notification_preset_name
|
|
164
143
|
end
|
|
165
144
|
|
|
166
|
-
# locked is deprecated, will fail if used
|
|
167
|
-
options.delete :locked
|
|
168
|
-
|
|
169
145
|
data
|
|
170
146
|
end
|
|
171
147
|
|
|
148
|
+
# TODO: migrate everything to only use on_missing_data by only sending notify_no_data when it was set by a user
|
|
149
|
+
# and enforce that it is not set at the same time as on_missing_data
|
|
150
|
+
def configure_no_data
|
|
151
|
+
notify = notify_no_data
|
|
152
|
+
action = on_missing_data
|
|
153
|
+
|
|
154
|
+
# on_missing_data cannot be used with notify_no_data or no_data_timeframe
|
|
155
|
+
if type == "event-v2 alert" || action
|
|
156
|
+
# TODO: mark setting notify_no_data or no_data_timeframe at all as invalid
|
|
157
|
+
{ on_missing_data: action || "default" }
|
|
158
|
+
else
|
|
159
|
+
{
|
|
160
|
+
notify_no_data: notify,
|
|
161
|
+
no_data_timeframe: notify ? no_data_timeframe : nil
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
172
166
|
def resolve_linked_tracking_ids!(id_map, **args)
|
|
173
167
|
case as_json[:type]
|
|
174
168
|
when "composite", "slo alert"
|
|
@@ -187,6 +181,25 @@ module Kennel
|
|
|
187
181
|
"cannot update type from #{actual_type} to #{type}"
|
|
188
182
|
end
|
|
189
183
|
|
|
184
|
+
# deprecated this setting is no longer returned by dd for new monitors
|
|
185
|
+
# datadog UI warns when setting no data timeframe to less than 2x the query window
|
|
186
|
+
# limited to 24h because `no_data_timeframe must not exceed group retention` and max group retention is 24h
|
|
187
|
+
def no_data_timeframe
|
|
188
|
+
default = 60
|
|
189
|
+
if type == "query alert" && (minutes = query_window_minutes)
|
|
190
|
+
(minutes * 2).clamp(default, 24 * 60)
|
|
191
|
+
else
|
|
192
|
+
default
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# validate that monitors that alert on no data resolve in external services by using timeout_h, so it sends a
|
|
197
|
+
# notification when the no data group is removed from the monitor, which datadog does automatically after 24h
|
|
198
|
+
def timeout_h
|
|
199
|
+
sending_no_data_notifications = (on_missing_data ? on_missing_data == "show_and_notify_no_data" : notify_no_data)
|
|
200
|
+
sending_no_data_notifications ? 24 : MONITOR_OPTION_DEFAULTS.fetch(:timeout_h)
|
|
201
|
+
end
|
|
202
|
+
|
|
190
203
|
def self.api_resource
|
|
191
204
|
"monitor"
|
|
192
205
|
end
|
|
@@ -213,7 +226,9 @@ module Kennel
|
|
|
213
226
|
ignore_default(expected, actual, MONITOR_DEFAULTS)
|
|
214
227
|
|
|
215
228
|
options = actual.fetch(:options)
|
|
216
|
-
|
|
229
|
+
|
|
230
|
+
# we do not manage silenced: ignore it when diffing
|
|
231
|
+
options.delete(:silenced)
|
|
217
232
|
|
|
218
233
|
# fields are not returned when set to true
|
|
219
234
|
if ["service check", "event alert"].include?(actual[:type])
|
|
@@ -243,13 +258,38 @@ module Kennel
|
|
|
243
258
|
options.delete(:escalation_message)
|
|
244
259
|
expected_options.delete(:escalation_message)
|
|
245
260
|
end
|
|
261
|
+
|
|
246
262
|
# locked is deprecated: ignored when diffing
|
|
247
263
|
options.delete(:locked)
|
|
248
|
-
expected_options.delete(:locked)
|
|
249
264
|
end
|
|
250
265
|
|
|
251
266
|
private
|
|
252
267
|
|
|
268
|
+
def configure_thresholds
|
|
269
|
+
return {} if type == "composite"
|
|
270
|
+
|
|
271
|
+
thresholds = { critical: critical }
|
|
272
|
+
|
|
273
|
+
# set optional variables
|
|
274
|
+
[:warning, :ok, :critical_recovery, :warning_recovery].each do |key|
|
|
275
|
+
if (value = send(key))
|
|
276
|
+
thresholds[key] = value
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# custom logic for some types
|
|
281
|
+
case type
|
|
282
|
+
when "service check"
|
|
283
|
+
# avoid diff for default values of 1
|
|
284
|
+
OPTIONAL_SERVICE_CHECK_THRESHOLDS.each { |t| thresholds[t] ||= 1 }
|
|
285
|
+
when "query alert"
|
|
286
|
+
# metric and query values are stored as float by datadog
|
|
287
|
+
thresholds.each { |k, v| thresholds[k] = Float(v) }
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
{ thresholds: thresholds }
|
|
291
|
+
end
|
|
292
|
+
|
|
253
293
|
def validate_json(data)
|
|
254
294
|
super
|
|
255
295
|
|
|
@@ -372,6 +412,11 @@ module Kennel
|
|
|
372
412
|
else # do nothing
|
|
373
413
|
end
|
|
374
414
|
end
|
|
415
|
+
|
|
416
|
+
def query_window_minutes
|
|
417
|
+
return unless (match = query.match(/^\s*\w+\(last_(?<count>\d+)(?<unit>[mhdw])\):/))
|
|
418
|
+
Integer(match["count"]) * MINUTES_PER_UNIT.fetch(match["unit"])
|
|
419
|
+
end
|
|
375
420
|
end
|
|
376
421
|
end
|
|
377
422
|
end
|
data/lib/kennel/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kennel
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.11.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Michael Grosser
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diff-lcs
|