kennel 2.9.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9635f7b742f83290e0feaa0f3eb1a55ba4ad78ca7d95c38aaba6e0adfebc1f4
4
- data.tar.gz: 984fd58f8cabb3303e5f9084eb90405d6cf39c23b9c42b304d1aa6a07afca24b
3
+ metadata.gz: 88495b67d113181455cb6dc751d2f7820eababca5679cfb714bb614dc9370ad6
4
+ data.tar.gz: a8703613913aa4557c585e605f35a1aef9555902c65a5f1c1772b1038e9d4d34
5
5
  SHA512:
6
- metadata.gz: ef77dd9a72a9577b0bf8eb78b300c41e098543422d7313a800693fbdc3e5a9ba5db82687cd0551eb362bb9cd8990e9cb943987ea442a3b99cdd52d02b30de3eb
7
- data.tar.gz: 4d4e036cc03aec4619c6e407255189f263d9cd167406cd1bf7f0c131047417a65a777c8c449701622fed38e908d8c3f6aab7f9cd6f7514668371fc33bf0eabb4
6
+ metadata.gz: d730ee50ff704f25eb39fdd8daa9c95a2c0b43f9a9534c7f1a05a245bd3881ab5ce36f12ebf226354eeafe94c4a961e843eebbbcf0312b2b548577b26a4debdd
7
+ data.tar.gz: ff487e51479e7a463464e37ec09a10ef7c1dc7d695bcf18f97303048ab0a75759d71eda14aa95e512aa69b66c25baf016d778be449455c0ef069e081f5957373
data/lib/kennel/api.rb CHANGED
@@ -5,8 +5,6 @@ module Kennel
5
5
  class Api
6
6
  CACHE_FILE = ENV.fetch("KENNEL_API_CACHE_FILE", "tmp/cache/details")
7
7
 
8
- RateLimitParams = Data.define(:limit, :period, :remaining, :reset, :name)
9
-
10
8
  def self.with_tracking(api_resource, reply)
11
9
  klass = Models::Record.api_resource_map[api_resource]
12
10
  return reply unless klass # do not blow up on unknown models
@@ -125,20 +123,8 @@ module Kennel
125
123
  end
126
124
  end
127
125
 
128
- rate_limit = RateLimitParams.new(
129
- limit: response.headers["x-ratelimit-limit"],
130
- period: response.headers["x-ratelimit-period"],
131
- remaining: response.headers["x-ratelimit-remaining"],
132
- reset: response.headers["x-ratelimit-reset"],
133
- name: response.headers["x-ratelimit-name"]
134
- )
135
-
136
126
  if response.status == 429
137
- message = "Datadog rate limit #{rate_limit.name.inspect} hit"
138
- message += " (#{rate_limit.limit} requests per #{rate_limit.period} seconds)"
139
- message += "; sleeping #{rate_limit.reset} seconds before trying again"
140
- Kennel.err.puts message
141
- sleep rate_limit.reset.to_f
127
+ sleep_until_rate_limit_resets(response)
142
128
  redo
143
129
  end
144
130
 
@@ -161,6 +147,20 @@ module Kennel
161
147
  end
162
148
  end
163
149
 
150
+ def sleep_until_rate_limit_resets(response)
151
+ limit = response.headers["x-ratelimit-limit"]
152
+ period = response.headers["x-ratelimit-period"]
153
+ reset = response.headers["x-ratelimit-reset"]
154
+ name = response.headers["x-ratelimit-name"]
155
+
156
+ message = "Datadog rate limit #{name.inspect} hit"
157
+ message += " (#{limit} requests per #{period} seconds)"
158
+ message += "; sleeping #{reset} seconds before trying again"
159
+
160
+ Kennel.err.puts message
161
+ sleep reset.to_f
162
+ end
163
+
164
164
  # allow caching all requests to speedup/benchmark logic that includes repeated requests
165
165
  def with_cache(enabled, key)
166
166
  return yield unless enabled
@@ -6,7 +6,8 @@ module Kennel
6
6
 
7
7
  OPTIONAL_SERVICE_CHECK_THRESHOLDS = [:ok, :warning].freeze
8
8
  READONLY_ATTRIBUTES = superclass::READONLY_ATTRIBUTES + [
9
- :multi, :matching_downtimes, :overall_state_modified, :overall_state, :restricted_roles, :draft_status, :assets
9
+ :multi, :matching_downtimes, :overall_state_modified, :overall_state, :restricted_roles, :draft_status, :assets,
10
+ :enable_logs_sample
10
11
  ]
11
12
  TRACKING_FIELD = :message
12
13
 
@@ -25,13 +26,19 @@ module Kennel
25
26
  group_retention_duration: nil,
26
27
  groupby_simple_monitor: false,
27
28
  variables: nil,
28
- on_missing_data: "default", # "default" is "evaluate as zero"
29
+ on_missing_data: nil,
29
30
  notification_preset_name: nil,
30
31
  notify_by: nil
31
32
  }.freeze
32
33
  DEFAULT_ESCALATION_MESSAGE = ["", nil].freeze
33
34
  ALLOWED_PRIORITY_CLASSES = [NilClass, Integer].freeze
34
35
  SKIP_NOTIFY_NO_DATA_TYPES = ["event alert", "event-v2 alert", "log alert"].freeze
36
+ MINUTES_PER_UNIT = {
37
+ "m" => 1,
38
+ "h" => 60,
39
+ "d" => 60 * 24,
40
+ "w" => 60 * 24 * 7
41
+ }.freeze
35
42
 
36
43
  settings(
37
44
  :query, :name, :message, :escalation_message, :critical, :type, :renotify_interval, :warning, :timeout_h, :evaluation_delay,
@@ -49,13 +56,11 @@ module Kennel
49
56
  # datadog UI sets this to false by default, but true is safer
50
57
  # except for log alerts which will always have "no error" gaps and should default to false
51
58
  notify_no_data: -> { !SKIP_NOTIFY_NO_DATA_TYPES.include?(type) },
52
- no_data_timeframe: -> { 60 },
53
59
  notify_audit: -> { MONITOR_OPTION_DEFAULTS.fetch(:notify_audit) },
54
60
  new_host_delay: -> { MONITOR_OPTION_DEFAULTS.fetch(:new_host_delay) },
55
61
  new_group_delay: -> { nil },
56
62
  group_retention_duration: -> { MONITOR_OPTION_DEFAULTS.fetch(:group_retention_duration) },
57
63
  tags: -> { @project.tags },
58
- timeout_h: -> { MONITOR_OPTION_DEFAULTS.fetch(:timeout_h) },
59
64
  evaluation_delay: -> { MONITOR_OPTION_DEFAULTS.fetch(:evaluation_delay) },
60
65
  critical_recovery: -> { nil },
61
66
  warning_recovery: -> { nil },
@@ -70,6 +75,8 @@ module Kennel
70
75
  )
71
76
 
72
77
  def build_json
78
+ no_data_options = configure_no_data
79
+
73
80
  data = super.merge(
74
81
  name: "#{name}#{LOCK}",
75
82
  type: type,
@@ -79,8 +86,7 @@ module Kennel
79
86
  priority: priority,
80
87
  options: {
81
88
  timeout_h: timeout_h,
82
- notify_no_data: notify_no_data,
83
- no_data_timeframe: notify_no_data ? no_data_timeframe : nil,
89
+ **no_data_options.except(:on_missing_data),
84
90
  notify_audit: notify_audit,
85
91
  require_full_window: require_full_window,
86
92
  new_host_delay: new_host_delay,
@@ -88,33 +94,14 @@ module Kennel
88
94
  include_tags: true,
89
95
  escalation_message: Utils.presence(escalation_message.strip),
90
96
  evaluation_delay: evaluation_delay,
91
- locked: false, # deprecated: setting this to true will likely fail
92
97
  renotify_interval: renotify_interval || 0,
93
- variables: variables
98
+ variables: variables,
99
+ **configure_thresholds,
100
+ **no_data_options.slice(:on_missing_data) # moved here to avoid generated diff
94
101
  }
95
102
  )
96
103
 
97
104
  options = data[:options]
98
- if data.fetch(:type) != "composite"
99
- thresholds = (options[:thresholds] = { critical: critical })
100
-
101
- # warning, ok, critical_recovery, and warning_recovery are optional
102
- [:warning, :ok, :critical_recovery, :warning_recovery].each do |key|
103
- if (value = send(key))
104
- thresholds[key] = value
105
- end
106
- end
107
-
108
- thresholds[:critical] = critical unless
109
- case data.fetch(:type)
110
- when "service check"
111
- # avoid diff for default values of 1
112
- OPTIONAL_SERVICE_CHECK_THRESHOLDS.each { |t| thresholds[t] ||= 1 }
113
- when "query alert"
114
- # metric and query values are stored as float by datadog
115
- thresholds.each { |k, v| thresholds[k] = Float(v) }
116
- end
117
- end
118
105
 
119
106
  # set without causing lots of nulls to be stored
120
107
  if (notify_by_value = notify_by)
@@ -145,30 +132,37 @@ module Kennel
145
132
  # Add in statuses where we would re notify on. Possible values: alert, no data, warn
146
133
  if options[:renotify_interval] != 0
147
134
  statuses = ["alert"]
148
- statuses << "no data" if options[:notify_no_data]
135
+ statuses << "no data" if options[:notify_no_data] || options[:on_missing_data] == "show_and_notify_no_data"
149
136
  statuses << "warn" if options.dig(:thresholds, :warning)
150
137
  options[:renotify_statuses] = statuses
151
138
  end
152
139
 
153
- # on_missing_data cannot be used with notify_no_data or no_data_timeframe
154
- # TODO migrate everything to only use on_missing_data
155
- if data.fetch(:type) == "event-v2 alert" || on_missing_data != "default"
156
- options[:on_missing_data] = on_missing_data
157
- options[:notify_no_data] = false # cannot set nil or it's an endless update loop
158
- options.delete :no_data_timeframe
159
- end
160
-
161
140
  # only set when needed to avoid big diff
162
141
  if (notification_preset_name = notification_preset_name())
163
142
  options[:notification_preset_name] = notification_preset_name
164
143
  end
165
144
 
166
- # locked is deprecated, will fail if used
167
- options.delete :locked
168
-
169
145
  data
170
146
  end
171
147
 
148
+ # TODO: migrate everything to only use on_missing_data by only sending notify_no_data when it was set by a user
149
+ # and enforce that it is not set at the same time as on_missing_data
150
+ def configure_no_data
151
+ notify = notify_no_data
152
+ action = on_missing_data
153
+
154
+ # on_missing_data cannot be used with notify_no_data or no_data_timeframe
155
+ if type == "event-v2 alert" || action
156
+ # TODO: mark setting notify_no_data or no_data_timeframe at all as invalid
157
+ { on_missing_data: action || "default" }
158
+ else
159
+ {
160
+ notify_no_data: notify,
161
+ no_data_timeframe: notify ? no_data_timeframe : nil
162
+ }
163
+ end
164
+ end
165
+
172
166
  def resolve_linked_tracking_ids!(id_map, **args)
173
167
  case as_json[:type]
174
168
  when "composite", "slo alert"
@@ -187,6 +181,25 @@ module Kennel
187
181
  "cannot update type from #{actual_type} to #{type}"
188
182
  end
189
183
 
184
+ # deprecated this setting is no longer returned by dd for new monitors
185
+ # datadog UI warns when setting no data timeframe to less than 2x the query window
186
+ # limited to 24h because `no_data_timeframe must not exceed group retention` and max group retention is 24h
187
+ def no_data_timeframe
188
+ default = 60
189
+ if type == "query alert" && (minutes = query_window_minutes)
190
+ (minutes * 2).clamp(default, 24 * 60)
191
+ else
192
+ default
193
+ end
194
+ end
195
+
196
+ # validate that monitors that alert on no data resolve in external services by using timeout_h, so it sends a
197
+ # notification when the no data group is removed from the monitor, which datadog does automatically after 24h
198
+ def timeout_h
199
+ sending_no_data_notifications = (on_missing_data ? on_missing_data == "show_and_notify_no_data" : notify_no_data)
200
+ sending_no_data_notifications ? 24 : MONITOR_OPTION_DEFAULTS.fetch(:timeout_h)
201
+ end
202
+
190
203
  def self.api_resource
191
204
  "monitor"
192
205
  end
@@ -213,7 +226,9 @@ module Kennel
213
226
  ignore_default(expected, actual, MONITOR_DEFAULTS)
214
227
 
215
228
  options = actual.fetch(:options)
216
- options.delete(:silenced) # we do not manage silenced, so ignore it when diffing
229
+
230
+ # we do not manage silenced: ignore it when diffing
231
+ options.delete(:silenced)
217
232
 
218
233
  # fields are not returned when set to true
219
234
  if ["service check", "event alert"].include?(actual[:type])
@@ -243,13 +258,38 @@ module Kennel
243
258
  options.delete(:escalation_message)
244
259
  expected_options.delete(:escalation_message)
245
260
  end
261
+
246
262
  # locked is deprecated: ignored when diffing
247
263
  options.delete(:locked)
248
- expected_options.delete(:locked)
249
264
  end
250
265
 
251
266
  private
252
267
 
268
+ def configure_thresholds
269
+ return {} if type == "composite"
270
+
271
+ thresholds = { critical: critical }
272
+
273
+ # set optional variables
274
+ [:warning, :ok, :critical_recovery, :warning_recovery].each do |key|
275
+ if (value = send(key))
276
+ thresholds[key] = value
277
+ end
278
+ end
279
+
280
+ # custom logic for some types
281
+ case type
282
+ when "service check"
283
+ # avoid diff for default values of 1
284
+ OPTIONAL_SERVICE_CHECK_THRESHOLDS.each { |t| thresholds[t] ||= 1 }
285
+ when "query alert"
286
+ # metric and query values are stored as float by datadog
287
+ thresholds.each { |k, v| thresholds[k] = Float(v) }
288
+ end
289
+
290
+ { thresholds: thresholds }
291
+ end
292
+
253
293
  def validate_json(data)
254
294
  super
255
295
 
@@ -372,6 +412,11 @@ module Kennel
372
412
  else # do nothing
373
413
  end
374
414
  end
415
+
416
+ def query_window_minutes
417
+ return unless (match = query.match(/^\s*\w+\(last_(?<count>\d+)(?<unit>[mhdw])\):/))
418
+ Integer(match["count"]) * MINUTES_PER_UNIT.fetch(match["unit"])
419
+ end
375
420
  end
376
421
  end
377
422
  end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Kennel
3
- VERSION = "2.9.0"
3
+ VERSION = "2.11.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kennel
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.9.0
4
+ version: 2.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Grosser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-12-20 00:00:00.000000000 Z
11
+ date: 2026-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diff-lcs