debugbundle 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +17 -0
- data/Makefile +43 -0
- data/README.md +168 -0
- data/debugbundle.gemspec +30 -0
- data/lib/debugbundle/client.rb +724 -0
- data/lib/debugbundle/config.rb +144 -0
- data/lib/debugbundle/logging.rb +77 -0
- data/lib/debugbundle/rack/middleware.rb +94 -0
- data/lib/debugbundle/rack/relay_middleware.rb +37 -0
- data/lib/debugbundle/rails/railtie.rb +35 -0
- data/lib/debugbundle/rails/relay_endpoint.rb +100 -0
- data/lib/debugbundle/rails.rb +10 -0
- data/lib/debugbundle/redaction.rb +151 -0
- data/lib/debugbundle/relay/handler.rb +231 -0
- data/lib/debugbundle/relay.rb +4 -0
- data/lib/debugbundle/remote_config.rb +153 -0
- data/lib/debugbundle/runtime.rb +22 -0
- data/lib/debugbundle/sidekiq/server_middleware.rb +34 -0
- data/lib/debugbundle/suppression.rb +121 -0
- data/lib/debugbundle/transport.rb +190 -0
- data/lib/debugbundle/trigger_token.rb +122 -0
- data/lib/debugbundle/version.rb +5 -0
- data/lib/debugbundle.rb +93 -0
- data/spec/client_spec.rb +236 -0
- data/spec/debugbundle_spec.rb +54 -0
- data/spec/file_transport_spec.rb +54 -0
- data/spec/logger_integration_spec.rb +118 -0
- data/spec/rack_integration_spec.rb +44 -0
- data/spec/rack_middleware_spec.rb +206 -0
- data/spec/rails_railtie_spec.rb +96 -0
- data/spec/rails_relay_spec.rb +121 -0
- data/spec/redaction_spec.rb +42 -0
- data/spec/relay_spec.rb +178 -0
- data/spec/remote_config_spec.rb +402 -0
- data/spec/sidekiq_integration_spec.rb +66 -0
- data/spec/sidekiq_middleware_spec.rb +50 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/suppression_spec.rb +16 -0
- metadata +113 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'uri'
|
|
5
|
+
|
|
6
|
+
module DebugBundle
|
|
7
|
+
module Relay
|
|
8
|
+
ACCEPTED_EVENT_TYPES = %w[
|
|
9
|
+
frontend_exception
|
|
10
|
+
error_suppressed
|
|
11
|
+
frontend_breadcrumb
|
|
12
|
+
request_event
|
|
13
|
+
probe_event
|
|
14
|
+
].freeze
|
|
15
|
+
BROWSER_SDK_NAME = '@debugbundle/sdk-browser'
|
|
16
|
+
DEFAULT_MAX_BODY_BYTES = 262_144
|
|
17
|
+
DEFAULT_RATE_LIMIT_PER_MINUTE = 60
|
|
18
|
+
|
|
19
|
+
Response = Struct.new(:status, :body, keyword_init: true)
|
|
20
|
+
|
|
21
|
+
class Handler
|
|
22
|
+
def initialize(
|
|
23
|
+
project_mode: :connected,
|
|
24
|
+
project_token: nil,
|
|
25
|
+
endpoint: DebugBundle::Config::DEFAULT_ENDPOINT,
|
|
26
|
+
local_events_dir: DebugBundle::Config::DEFAULT_LOCAL_EVENTS_DIR,
|
|
27
|
+
spool_dir: DebugBundle::Config::DEFAULT_SPOOL_DIR,
|
|
28
|
+
durable_write: true,
|
|
29
|
+
service: nil,
|
|
30
|
+
environment: nil,
|
|
31
|
+
allowed_origins: nil,
|
|
32
|
+
max_body_bytes: DEFAULT_MAX_BODY_BYTES,
|
|
33
|
+
rate_limit_per_minute: DEFAULT_RATE_LIMIT_PER_MINUTE,
|
|
34
|
+
rate_limit_store: nil,
|
|
35
|
+
forward_transport: nil
|
|
36
|
+
)
|
|
37
|
+
@project_mode = project_mode.to_sym
|
|
38
|
+
@project_token = project_token
|
|
39
|
+
@endpoint = endpoint
|
|
40
|
+
@local_events_dir = local_events_dir
|
|
41
|
+
@spool_dir = spool_dir
|
|
42
|
+
@durable_write = durable_write
|
|
43
|
+
@service = service
|
|
44
|
+
@environment = environment
|
|
45
|
+
@allowed_origins = Array(allowed_origins).compact.map { |origin| normalize_origin(origin) }
|
|
46
|
+
@max_body_bytes = max_body_bytes
|
|
47
|
+
@rate_limit_per_minute = rate_limit_per_minute
|
|
48
|
+
@rate_limit_store = rate_limit_store
|
|
49
|
+
@forward_transport = forward_transport || Transport::HttpTransport.new(@endpoint)
|
|
50
|
+
@rate_limit_state = Hash.new { |hash, key| hash[key] = [] }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def handle(request)
|
|
54
|
+
return Response.new(status: 405, body: nil) unless request.fetch(:method, 'POST').to_s.upcase == 'POST'
|
|
55
|
+
|
|
56
|
+
headers = normalize_headers(request[:headers] || {})
|
|
57
|
+
return Response.new(status: 403, body: nil) unless origin_allowed?(headers)
|
|
58
|
+
unless json_content_type?(headers['content-type'])
|
|
59
|
+
return Response.new(status: 400,
|
|
60
|
+
body: invalid_body('Relay requests must use Content-Type: application/json.'))
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
raw_body = request[:body].to_s
|
|
64
|
+
return Response.new(status: 413, body: nil) if raw_body.bytesize > @max_body_bytes
|
|
65
|
+
return Response.new(status: 429, body: nil) if rate_limited?(request[:ip_address] || request[:ip])
|
|
66
|
+
|
|
67
|
+
decoded = JSON.parse(raw_body)
|
|
68
|
+
batch = decoded.fetch('batch')
|
|
69
|
+
unless batch.is_a?(Array)
|
|
70
|
+
return Response.new(status: 400,
|
|
71
|
+
body: invalid_body('Relay request body must include a batch array.'))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
accepted = []
|
|
75
|
+
errors = []
|
|
76
|
+
|
|
77
|
+
batch.each_with_index do |candidate, index|
|
|
78
|
+
sanitized = sanitize_event(candidate)
|
|
79
|
+
if sanitized
|
|
80
|
+
accepted << sanitized
|
|
81
|
+
else
|
|
82
|
+
errors << "batch[#{index}]: Invalid browser relay event payload."
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
deliver(accepted) unless accepted.empty?
|
|
87
|
+
|
|
88
|
+
unless errors.empty?
|
|
89
|
+
return Response.new(status: 400,
|
|
90
|
+
body: { 'accepted' => accepted.length,
|
|
91
|
+
'rejected' => errors.length, 'errors' => errors })
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
Response.new(status: 202, body: { 'accepted' => accepted.length, 'rejected' => 0, 'errors' => [] })
|
|
95
|
+
rescue JSON::ParserError
|
|
96
|
+
Response.new(status: 400, body: invalid_body('Relay request body must be valid JSON.'))
|
|
97
|
+
rescue KeyError
|
|
98
|
+
Response.new(status: 400, body: invalid_body('Relay request body must include a batch array.'))
|
|
99
|
+
rescue StandardError
|
|
100
|
+
Response.new(status: 500, body: nil)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
private
|
|
104
|
+
|
|
105
|
+
def deliver(events)
|
|
106
|
+
service_name = @service || events.first.dig('service', 'name') || 'service'
|
|
107
|
+
|
|
108
|
+
case @project_mode
|
|
109
|
+
when :local_only
|
|
110
|
+
Transport::FileTransport.new(@local_events_dir).call(service_name: service_name, events: events)
|
|
111
|
+
when :connected
|
|
112
|
+
Transport::FileTransport.new(@spool_dir).call(service_name: service_name, events: events) if @durable_write
|
|
113
|
+
|
|
114
|
+
result = Transport.coerce_result(@forward_transport.call(project_token: @project_token, events: events))
|
|
115
|
+
raise 'relay_forward_failed' unless result.status_code.between?(200, 299)
|
|
116
|
+
else
|
|
117
|
+
raise ArgumentError, 'unsupported relay project mode'
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def sanitize_event(candidate)
|
|
122
|
+
return nil unless candidate.is_a?(Hash)
|
|
123
|
+
|
|
124
|
+
event_type = candidate['event_type']
|
|
125
|
+
return nil unless ACCEPTED_EVENT_TYPES.include?(event_type)
|
|
126
|
+
|
|
127
|
+
service = candidate['service']
|
|
128
|
+
correlation = candidate['correlation']
|
|
129
|
+
payload = candidate['payload']
|
|
130
|
+
return nil unless service.is_a?(Hash) && payload.is_a?(Hash)
|
|
131
|
+
|
|
132
|
+
{
|
|
133
|
+
'schema_version' => candidate['schema_version'].to_s,
|
|
134
|
+
'event_id' => candidate['event_id'].to_s,
|
|
135
|
+
'event_type' => event_type,
|
|
136
|
+
'sdk_name' => BROWSER_SDK_NAME,
|
|
137
|
+
'sdk_version' => candidate['sdk_version'].to_s,
|
|
138
|
+
'occurred_at' => candidate['occurred_at'].to_s,
|
|
139
|
+
'service' => {
|
|
140
|
+
'name' => @service || service['name'].to_s,
|
|
141
|
+
'environment' => @environment || service['environment'].to_s
|
|
142
|
+
},
|
|
143
|
+
'correlation' => sanitize_correlation(correlation),
|
|
144
|
+
'payload' => payload,
|
|
145
|
+
'project_token' => @project_token
|
|
146
|
+
}
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def sanitize_correlation(value)
|
|
150
|
+
correlation = value.is_a?(Hash) ? value : {}
|
|
151
|
+
{
|
|
152
|
+
'request_id' => string_or_nil(correlation['request_id']),
|
|
153
|
+
'trace_id' => string_or_nil(correlation['trace_id']),
|
|
154
|
+
'session_id' => string_or_nil(correlation['session_id']),
|
|
155
|
+
'user_id_hash' => string_or_nil(correlation['user_id_hash'])
|
|
156
|
+
}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def string_or_nil(value)
|
|
160
|
+
value.is_a?(String) ? value : nil
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def invalid_body(message)
|
|
164
|
+
{ 'accepted' => 0, 'rejected' => 0, 'errors' => [message] }
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def normalize_headers(headers)
|
|
168
|
+
headers.each_with_object({}) do |(key, value), result|
|
|
169
|
+
result[key.to_s.downcase] = value.to_s
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def json_content_type?(value)
|
|
174
|
+
value.to_s.downcase.include?('application/json')
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def origin_allowed?(headers)
|
|
178
|
+
origin = headers['origin'] || origin_from_referer(headers['referer'])
|
|
179
|
+
return false if origin.nil? || origin.empty?
|
|
180
|
+
|
|
181
|
+
return @allowed_origins.include?(normalize_origin(origin)) if @allowed_origins.any?
|
|
182
|
+
|
|
183
|
+
host = headers['host'].to_s.split(':').first
|
|
184
|
+
return false if host.empty?
|
|
185
|
+
|
|
186
|
+
URI.parse(origin).host == host
|
|
187
|
+
rescue URI::InvalidURIError
|
|
188
|
+
false
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def origin_from_referer(referer)
|
|
192
|
+
return nil if referer.to_s.empty?
|
|
193
|
+
|
|
194
|
+
parsed = URI.parse(referer)
|
|
195
|
+
return nil unless parsed.scheme && parsed.host
|
|
196
|
+
|
|
197
|
+
"#{parsed.scheme}://#{parsed.host}"
|
|
198
|
+
rescue URI::InvalidURIError
|
|
199
|
+
nil
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def normalize_origin(origin)
|
|
203
|
+
origin.to_s.downcase.sub(%r{/$}, '')
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def rate_limited?(ip)
|
|
207
|
+
key = ip.to_s.empty? ? 'unknown' : ip.to_s
|
|
208
|
+
return shared_rate_limited?(key) if @rate_limit_store
|
|
209
|
+
|
|
210
|
+
cutoff = Time.now.to_i - 60
|
|
211
|
+
@rate_limit_state[key] = @rate_limit_state[key].select { |entry| entry > cutoff }
|
|
212
|
+
return true if @rate_limit_state[key].length >= @rate_limit_per_minute
|
|
213
|
+
|
|
214
|
+
@rate_limit_state[key] << Time.now.to_i
|
|
215
|
+
false
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def shared_rate_limited?(key)
|
|
219
|
+
cache_key = "debugbundle:relay-rate:#{key}:#{Time.now.to_i / 60}"
|
|
220
|
+
count = @rate_limit_store.increment(cache_key, 1, expires_in: 60)
|
|
221
|
+
if count.nil? && @rate_limit_store.respond_to?(:write)
|
|
222
|
+
@rate_limit_store.write(cache_key, 1, expires_in: 60)
|
|
223
|
+
count = 1
|
|
224
|
+
end
|
|
225
|
+
count.to_i > @rate_limit_per_minute
|
|
226
|
+
rescue StandardError
|
|
227
|
+
false
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'time'
|
|
4
|
+
|
|
5
|
+
module DebugBundle
|
|
6
|
+
module RemoteConfig
|
|
7
|
+
CapturePolicy = Struct.new(
|
|
8
|
+
:preset,
|
|
9
|
+
:capture_logs,
|
|
10
|
+
:capture_request_events,
|
|
11
|
+
:capture_breadcrumbs,
|
|
12
|
+
:capture_probe_events,
|
|
13
|
+
:immediate_client_error_statuses,
|
|
14
|
+
keyword_init: true
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
Directive = Struct.new(:id, :label_pattern, :service, :environment, :expires_at, keyword_init: true) do
|
|
18
|
+
def active?(label:, service:, environment:, now:)
|
|
19
|
+
return false if expires_at <= now
|
|
20
|
+
return false unless match_scope?(self.service, service)
|
|
21
|
+
return false unless match_scope?(self.environment, environment)
|
|
22
|
+
|
|
23
|
+
match_label?(label_pattern, label)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def match_scope?(pattern, value)
|
|
29
|
+
pattern == '*' || pattern == value
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def match_label?(pattern, label)
|
|
33
|
+
return true if pattern == '*'
|
|
34
|
+
|
|
35
|
+
if pattern.end_with?('.*')
|
|
36
|
+
prefix = pattern.delete_suffix('.*')
|
|
37
|
+
return label == prefix || label.start_with?("#{prefix}.")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
pattern == label
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
Snapshot = Struct.new(
|
|
45
|
+
:probes_enabled,
|
|
46
|
+
:remote_probes_enabled,
|
|
47
|
+
:directives,
|
|
48
|
+
:poll_interval_seconds,
|
|
49
|
+
:capture_policy,
|
|
50
|
+
:trigger_token_key,
|
|
51
|
+
keyword_init: true
|
|
52
|
+
) do
|
|
53
|
+
def self.default
|
|
54
|
+
new(
|
|
55
|
+
probes_enabled: true,
|
|
56
|
+
remote_probes_enabled: false,
|
|
57
|
+
directives: [],
|
|
58
|
+
poll_interval_seconds: 60,
|
|
59
|
+
capture_policy: RemoteConfig.balanced_capture_policy,
|
|
60
|
+
trigger_token_key: nil
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def self.minimal_capture_policy
|
|
66
|
+
CapturePolicy.new(
|
|
67
|
+
preset: 'minimal',
|
|
68
|
+
capture_logs: 'error',
|
|
69
|
+
capture_request_events: 'failures_only',
|
|
70
|
+
capture_breadcrumbs: 'local_only',
|
|
71
|
+
capture_probe_events: 'buffer_only',
|
|
72
|
+
immediate_client_error_statuses: []
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def self.balanced_capture_policy
|
|
77
|
+
CapturePolicy.new(
|
|
78
|
+
preset: 'balanced',
|
|
79
|
+
capture_logs: 'warning',
|
|
80
|
+
capture_request_events: 'failures_only',
|
|
81
|
+
capture_breadcrumbs: 'exception_only',
|
|
82
|
+
capture_probe_events: 'buffer_only',
|
|
83
|
+
immediate_client_error_statuses: []
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def self.parse(payload, fallback_poll_interval_seconds)
|
|
88
|
+
return nil unless payload.is_a?(Hash)
|
|
89
|
+
|
|
90
|
+
capture_policy =
|
|
91
|
+
parse_capture_policy(payload['capture_policy'] || payload[:capture_policy]) || balanced_capture_policy
|
|
92
|
+
directives = Array(payload['active_probes'] || payload[:active_probes]).filter_map do |entry|
|
|
93
|
+
parse_directive(entry)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
poll_interval_ms = payload['poll_interval_ms'] || payload[:poll_interval_ms]
|
|
97
|
+
poll_interval_seconds = if poll_interval_ms.to_i.positive?
|
|
98
|
+
[(poll_interval_ms.to_i / 1000), 1].max
|
|
99
|
+
else
|
|
100
|
+
fallback_poll_interval_seconds
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
Snapshot.new(
|
|
104
|
+
probes_enabled: payload['probes_enabled'] != false && payload[:probes_enabled] != false,
|
|
105
|
+
remote_probes_enabled: payload['remote_probes_enabled'] == true || payload[:remote_probes_enabled] == true,
|
|
106
|
+
directives: directives,
|
|
107
|
+
poll_interval_seconds: poll_interval_seconds,
|
|
108
|
+
capture_policy: capture_policy,
|
|
109
|
+
trigger_token_key: payload['trigger_token_key'] || payload[:trigger_token_key]
|
|
110
|
+
)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def self.parse_capture_policy(payload)
|
|
114
|
+
return nil unless payload.is_a?(Hash)
|
|
115
|
+
|
|
116
|
+
CapturePolicy.new(
|
|
117
|
+
preset: payload['preset'] || payload[:preset] || 'balanced',
|
|
118
|
+
capture_logs: payload['capture_logs'] || payload[:capture_logs] || 'warning',
|
|
119
|
+
capture_request_events:
|
|
120
|
+
payload['capture_request_events'] || payload[:capture_request_events] || 'failures_only',
|
|
121
|
+
capture_breadcrumbs: payload['capture_breadcrumbs'] || payload[:capture_breadcrumbs] || 'exception_only',
|
|
122
|
+
capture_probe_events: payload['capture_probe_events'] || payload[:capture_probe_events] || 'buffer_only',
|
|
123
|
+
immediate_client_error_statuses: Array(
|
|
124
|
+
payload['immediate_client_error_statuses'] || payload[:immediate_client_error_statuses]
|
|
125
|
+
).grep(Integer)
|
|
126
|
+
)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def self.parse_directive(payload)
|
|
130
|
+
return nil unless payload.is_a?(Hash)
|
|
131
|
+
|
|
132
|
+
expires_at_value = payload['expires_at'] || payload[:expires_at]
|
|
133
|
+
expires_at = parse_time(expires_at_value)
|
|
134
|
+
return nil unless expires_at
|
|
135
|
+
|
|
136
|
+
Directive.new(
|
|
137
|
+
id: payload['id'] || payload[:id],
|
|
138
|
+
label_pattern: payload['label_pattern'] || payload[:label_pattern],
|
|
139
|
+
service: payload['service'] || payload[:service] || '*',
|
|
140
|
+
environment: payload['environment'] || payload[:environment] || '*',
|
|
141
|
+
expires_at: expires_at
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def self.parse_time(value)
|
|
146
|
+
return nil unless value.is_a?(String) && !value.empty?
|
|
147
|
+
|
|
148
|
+
Time.iso8601(value)
|
|
149
|
+
rescue ArgumentError
|
|
150
|
+
nil
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'socket'
|
|
4
|
+
|
|
5
|
+
module DebugBundle
|
|
6
|
+
module Runtime
|
|
7
|
+
def self.payload
|
|
8
|
+
{
|
|
9
|
+
'version' => RUBY_VERSION,
|
|
10
|
+
'platform' => RUBY_PLATFORM,
|
|
11
|
+
'pid' => Process.pid,
|
|
12
|
+
'cwd' => Dir.pwd,
|
|
13
|
+
'hostname' => Socket.gethostname,
|
|
14
|
+
'thread_id' => Thread.current.object_id,
|
|
15
|
+
'engine' => defined?(RUBY_ENGINE) ? RUBY_ENGINE : 'ruby',
|
|
16
|
+
'engine_version' => defined?(RUBY_ENGINE_VERSION) ? RUBY_ENGINE_VERSION : RUBY_VERSION
|
|
17
|
+
}
|
|
18
|
+
rescue StandardError
|
|
19
|
+
{ 'version' => RUBY_VERSION }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DebugBundle
|
|
4
|
+
module Sidekiq
|
|
5
|
+
class ServerMiddleware
|
|
6
|
+
def initialize(options = nil, client: nil)
|
|
7
|
+
resolved_options = options.is_a?(Hash) ? options : {}
|
|
8
|
+
@client = client || resolved_options[:client] || resolved_options['client'] || DebugBundle.client
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(_worker, job, queue)
|
|
12
|
+
yield
|
|
13
|
+
rescue StandardError => e
|
|
14
|
+
@client.capture_exception(
|
|
15
|
+
e,
|
|
16
|
+
context: {
|
|
17
|
+
queue: queue,
|
|
18
|
+
job: {
|
|
19
|
+
class: job['class'],
|
|
20
|
+
queue: job['queue'] || queue,
|
|
21
|
+
jid: job['jid'],
|
|
22
|
+
retry_count: job['retry_count'] || job['retry'],
|
|
23
|
+
args_summary: Array(job['args']).first(5).map { |value| value.class.name }
|
|
24
|
+
},
|
|
25
|
+
job_id: job['jid'],
|
|
26
|
+
trace_id: job['trace_id']
|
|
27
|
+
},
|
|
28
|
+
handled: false
|
|
29
|
+
)
|
|
30
|
+
raise
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module DebugBundle
|
|
7
|
+
module Suppression
|
|
8
|
+
DUPLICATE_WINDOW_SECONDS = 30.0
|
|
9
|
+
LOOP_WINDOW_SECONDS = 2.0
|
|
10
|
+
LOOP_THRESHOLD = 10
|
|
11
|
+
LOOP_RESET_AFTER_SECONDS = 60.0
|
|
12
|
+
LOOP_CHECKPOINT_SECONDS = 30.0
|
|
13
|
+
MAX_NORMAL_EVENTS_PER_WINDOW = 3
|
|
14
|
+
|
|
15
|
+
State = Struct.new(
|
|
16
|
+
:window_started_at,
|
|
17
|
+
:emitted_count,
|
|
18
|
+
:pending_suppressed_count,
|
|
19
|
+
:pending_first_seen_at,
|
|
20
|
+
:pending_last_seen_at,
|
|
21
|
+
:last_aggregate_emitted_at,
|
|
22
|
+
:loop_window_started_at,
|
|
23
|
+
:loop_hit_count,
|
|
24
|
+
:suppression_mode,
|
|
25
|
+
:last_seen_at,
|
|
26
|
+
keyword_init: true
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
class Tracker
|
|
30
|
+
def initialize
|
|
31
|
+
@states = {}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def should_capture(key, now:)
|
|
35
|
+
state = (@states[key] ||= new_state(now))
|
|
36
|
+
|
|
37
|
+
if state.suppression_mode && (now - state.last_seen_at) >= LOOP_RESET_AFTER_SECONDS
|
|
38
|
+
@states[key] = new_state(now)
|
|
39
|
+
state = @states[key]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
if (now - state.window_started_at) >= DUPLICATE_WINDOW_SECONDS
|
|
43
|
+
state.window_started_at = now
|
|
44
|
+
state.emitted_count = 0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
if (now - state.loop_window_started_at) >= LOOP_WINDOW_SECONDS
|
|
48
|
+
state.loop_window_started_at = now
|
|
49
|
+
state.loop_hit_count = 0
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
state.loop_hit_count += 1
|
|
53
|
+
state.last_seen_at = now
|
|
54
|
+
state.suppression_mode = true if state.loop_hit_count > LOOP_THRESHOLD
|
|
55
|
+
|
|
56
|
+
if state.suppression_mode
|
|
57
|
+
mark_suppressed(state, now)
|
|
58
|
+
return false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
if state.emitted_count < MAX_NORMAL_EVENTS_PER_WINDOW
|
|
62
|
+
state.emitted_count += 1
|
|
63
|
+
return true
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
mark_suppressed(state, now)
|
|
67
|
+
false
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def drain_aggregates(now:)
|
|
71
|
+
@states.each_with_object([]) do |(key, state), aggregates|
|
|
72
|
+
next if state.pending_suppressed_count.zero?
|
|
73
|
+
next if state.pending_first_seen_at.nil? || state.pending_last_seen_at.nil?
|
|
74
|
+
next if checkpoint_not_due?(state, now)
|
|
75
|
+
|
|
76
|
+
aggregates << {
|
|
77
|
+
'fingerprint' => Digest::SHA256.hexdigest(key),
|
|
78
|
+
'suppressed_count' => state.pending_suppressed_count,
|
|
79
|
+
'first_seen' => Time.at(state.pending_first_seen_at).utc.iso8601,
|
|
80
|
+
'last_seen' => Time.at(state.pending_last_seen_at).utc.iso8601,
|
|
81
|
+
'window_seconds' => DUPLICATE_WINDOW_SECONDS.to_i
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
state.pending_suppressed_count = 0
|
|
85
|
+
state.pending_first_seen_at = nil
|
|
86
|
+
state.pending_last_seen_at = nil
|
|
87
|
+
state.last_aggregate_emitted_at = now
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def new_state(now)
|
|
94
|
+
State.new(
|
|
95
|
+
window_started_at: now,
|
|
96
|
+
emitted_count: 0,
|
|
97
|
+
pending_suppressed_count: 0,
|
|
98
|
+
pending_first_seen_at: nil,
|
|
99
|
+
pending_last_seen_at: nil,
|
|
100
|
+
last_aggregate_emitted_at: nil,
|
|
101
|
+
loop_window_started_at: now,
|
|
102
|
+
loop_hit_count: 0,
|
|
103
|
+
suppression_mode: false,
|
|
104
|
+
last_seen_at: now
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def mark_suppressed(state, now)
|
|
109
|
+
state.pending_first_seen_at ||= state.window_started_at
|
|
110
|
+
state.pending_suppressed_count += 1
|
|
111
|
+
state.pending_last_seen_at = now
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def checkpoint_not_due?(state, now)
|
|
115
|
+
state.suppression_mode &&
|
|
116
|
+
state.last_aggregate_emitted_at &&
|
|
117
|
+
(now - state.last_aggregate_emitted_at) < LOOP_CHECKPOINT_SECONDS
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|