errsight 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +163 -0
- data/LICENSE +21 -0
- data/README.md +120 -0
- data/errsight.gemspec +26 -0
- data/lib/errsight/backtrace.rb +117 -0
- data/lib/errsight/capture_middleware.rb +95 -0
- data/lib/errsight/client.rb +241 -0
- data/lib/errsight/configuration.rb +57 -0
- data/lib/errsight/hub.rb +53 -0
- data/lib/errsight/integrations/active_job.rb +175 -0
- data/lib/errsight/integrations/active_record.rb +94 -0
- data/lib/errsight/integrations/rails_error_reporter.rb +107 -0
- data/lib/errsight/logger.rb +85 -0
- data/lib/errsight/middleware.rb +16 -0
- data/lib/errsight/railtie.rb +198 -0
- data/lib/errsight/scope.rb +166 -0
- data/lib/errsight/sidekiq.rb +248 -0
- data/lib/errsight/source_context.rb +107 -0
- data/lib/errsight/version.rb +3 -0
- data/lib/errsight.rb +193 -0
- metadata +79 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
require "net/http"
|
|
2
|
+
require "uri"
|
|
3
|
+
require "json"
|
|
4
|
+
require "openssl"
|
|
5
|
+
|
|
6
|
+
module Errsight
|
|
7
|
+
class Client
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
@queue = []
|
|
11
|
+
@mutex = Mutex.new
|
|
12
|
+
@pid = Process.pid
|
|
13
|
+
@shutdown = false
|
|
14
|
+
# Mutex + CV used to interrupt the flush worker's between-tick wait
|
|
15
|
+
# on shutdown. Plain Thread#wakeup has a lost-wakeup race: if
|
|
16
|
+
# shutdown! fires before the thread has entered its first sleep,
|
|
17
|
+
# wakeup is dropped and the thread sleeps the full flush_interval.
|
|
18
|
+
# CV#wait + setting the flag inside the same lock has no such race.
|
|
19
|
+
@sleep_mutex = Mutex.new
|
|
20
|
+
@sleep_cv = ConditionVariable.new
|
|
21
|
+
# When the API returns 429, we set this to a Time and skip sends in
|
|
22
|
+
# flush! until it elapses. Replaces a per-call `sleep retry_after`
|
|
23
|
+
# that used to park the flush thread for up to 60s, during which new
|
|
24
|
+
# events spilled past max_queue_size and got silently dropped.
|
|
25
|
+
@rate_limited_until = nil
|
|
26
|
+
start_flush_worker
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def enqueue(event)
|
|
30
|
+
# Fork-safety gate: see detect_fork! comment. Cheapest possible check
|
|
31
|
+
# since enqueue is on the hot path of every captured event.
|
|
32
|
+
detect_fork! if @pid != Process.pid
|
|
33
|
+
|
|
34
|
+
@mutex.synchronize do
|
|
35
|
+
if @queue.size >= @config.max_queue_size
|
|
36
|
+
@config.logger&.warn("[Errsight] Queue full (#{@config.max_queue_size}), dropping event")
|
|
37
|
+
return
|
|
38
|
+
end
|
|
39
|
+
@queue << event
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def flush!
|
|
44
|
+
return if rate_limited?
|
|
45
|
+
events = nil
|
|
46
|
+
@mutex.synchronize do
|
|
47
|
+
return if @queue.empty?
|
|
48
|
+
events = @queue.slice!(0, @config.batch_size)
|
|
49
|
+
end
|
|
50
|
+
send_events(events) if events&.any?
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def shutdown!
|
|
54
|
+
# Set the flag and signal under the same lock the flush worker uses
|
|
55
|
+
# for its CV#wait. This atomically transfers the shutdown signal:
|
|
56
|
+
# either the worker hasn't entered wait yet (and will see @shutdown
|
|
57
|
+
# immediately) or it's in wait (and the broadcast unblocks it).
|
|
58
|
+
@sleep_mutex.synchronize do
|
|
59
|
+
@shutdown = true
|
|
60
|
+
@sleep_cv.broadcast
|
|
61
|
+
end
|
|
62
|
+
thread = @flush_thread
|
|
63
|
+
# Bound total shutdown time. If a flush is stuck on a hung HTTP
|
|
64
|
+
# request, kill it rather than block the host's signal handling.
|
|
65
|
+
thread&.join(@config.shutdown_timeout)
|
|
66
|
+
thread&.kill if thread&.alive?
|
|
67
|
+
# The thread does a final drain on graceful exit; do one here too in
|
|
68
|
+
# case we had to kill it, or in case shutdown! was called when no
|
|
69
|
+
# flush thread was alive.
|
|
70
|
+
flush! rescue nil
|
|
71
|
+
# Lazy init guard: @http_mutex may be nil if no request ever fired,
|
|
72
|
+
# in which case the old code raised NoMethodError on shutdown.
|
|
73
|
+
http_mutex.synchronize { @http&.finish if @http&.started? }
|
|
74
|
+
rescue StandardError
|
|
75
|
+
# best-effort
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
# Puma cluster mode (and Spring, Unicorn, custom forking servers) all
|
|
81
|
+
# boot the Rails app once in the parent and fork worker processes. Only
|
|
82
|
+
# the calling thread of fork() survives — every other thread, including
|
|
83
|
+
# our flush worker, vanishes in the child. The Client object is inherited
|
|
84
|
+
# with a stale @flush_thread reference and a @queue that mirrors the
|
|
85
|
+
# parent's state at fork time.
|
|
86
|
+
#
|
|
87
|
+
# Without this, every event captured in a Puma worker is enqueued into
|
|
88
|
+
# @queue but nothing ever flushes it. Customers see "the SDK is silently
|
|
89
|
+
# dropping all my events under cluster mode" with no error in logs.
|
|
90
|
+
#
|
|
91
|
+
# Inherited mutexes are also a hazard: if the parent happened to fork
|
|
92
|
+
# while another thread held @mutex or @http_mutex, the child inherits
|
|
93
|
+
# them in a stuck-locked state that no thread will ever release. We
|
|
94
|
+
# rebuild them here so the child starts with clean primitives.
|
|
95
|
+
#
|
|
96
|
+
# The inherited @http connection is a duplicated TCP socket fd shared
|
|
97
|
+
# with the parent; both processes writing to it would interleave bytes
|
|
98
|
+
# on the wire. Drop it so http_connection lazily reconnects on first
|
|
99
|
+
# send.
|
|
100
|
+
#
|
|
101
|
+
# Race tolerance: two threads in the child can race into detect_fork!
|
|
102
|
+
# before either has updated @pid, and each spawn a flush thread. Both
|
|
103
|
+
# survive (same pid_at_start, same @pid) and end up taking turns on
|
|
104
|
+
# @mutex.synchronize when slicing the queue. No correctness issue —
|
|
105
|
+
# each batch is handed to exactly one sender via the slice — just a
|
|
106
|
+
# transient 2x flush rate until one thread is GC'd. The pid_at_start
|
|
107
|
+
# guard exists for the actual fork case (parent's flush thread doesn't
|
|
108
|
+
# survive POSIX fork, but defensive against ports/runtimes where it
|
|
109
|
+
# might).
|
|
110
|
+
def detect_fork!
|
|
111
|
+
@queue = []
|
|
112
|
+
@http = nil
|
|
113
|
+
@mutex = Mutex.new
|
|
114
|
+
@http_mutex = Mutex.new
|
|
115
|
+
@flush_thread = nil
|
|
116
|
+
@pid = Process.pid
|
|
117
|
+
# Inherited @shutdown=true from a parent that was shutting down would
|
|
118
|
+
# cause the child's freshly-spawned flush thread to exit on its first
|
|
119
|
+
# loop check. Inherited @rate_limited_until from the parent's earlier
|
|
120
|
+
# 429 has nothing to do with this process's API state. Inherited
|
|
121
|
+
# @sleep_mutex / @sleep_cv may be in a stuck-locked state if the
|
|
122
|
+
# parent forked while another thread held them.
|
|
123
|
+
@shutdown = false
|
|
124
|
+
@rate_limited_until = nil
|
|
125
|
+
@sleep_mutex = Mutex.new
|
|
126
|
+
@sleep_cv = ConditionVariable.new
|
|
127
|
+
start_flush_worker
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def start_flush_worker
|
|
131
|
+
pid_at_start = @pid
|
|
132
|
+
@flush_thread = Thread.new do
|
|
133
|
+
loop do
|
|
134
|
+
@sleep_mutex.synchronize do
|
|
135
|
+
# CV#wait releases the mutex during the wait and reacquires
|
|
136
|
+
# before returning. Re-checking @shutdown inside the lock
|
|
137
|
+
# closes the lost-wakeup race: shutdown! sets @shutdown under
|
|
138
|
+
# this same lock before broadcasting, so we either see it now
|
|
139
|
+
# (skip wait) or get woken by the broadcast (and see it after
|
|
140
|
+
# wait returns).
|
|
141
|
+
@sleep_cv.wait(@sleep_mutex, @config.flush_interval) unless @shutdown
|
|
142
|
+
end
|
|
143
|
+
# shutdown! signaled — exit before flushing again. shutdown! does
|
|
144
|
+
# a final flush itself, so events queued just before shutdown
|
|
145
|
+
# aren't lost.
|
|
146
|
+
break if @shutdown
|
|
147
|
+
# Orphan-check: a later detect_fork! has installed a newer thread
|
|
148
|
+
# and overwritten @pid. Older threads (including any that somehow
|
|
149
|
+
# survived a fork in violation of POSIX) exit instead of fighting
|
|
150
|
+
# the new thread for queue access.
|
|
151
|
+
break if @pid != pid_at_start
|
|
152
|
+
flush! rescue nil
|
|
153
|
+
end
|
|
154
|
+
# Graceful drain on the way out so events queued during the
|
|
155
|
+
# final flush_interval window aren't dropped on a clean shutdown.
|
|
156
|
+
flush! rescue nil
|
|
157
|
+
end
|
|
158
|
+
@flush_thread.abort_on_exception = false
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def rate_limited?
|
|
162
|
+
ru = @rate_limited_until
|
|
163
|
+
ru && Time.now < ru
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Cached once at process boot — set_default_paths reads the system trust
|
|
167
|
+
# store from disk and is expensive to redo per request.
|
|
168
|
+
def self.cert_store
|
|
169
|
+
@cert_store ||= OpenSSL::X509::Store.new.tap(&:set_default_paths)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def http_mutex
|
|
173
|
+
@http_mutex ||= Mutex.new
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def http_connection
|
|
177
|
+
uri = URI(@config.events_endpoint)
|
|
178
|
+
if @http && @http.address == uri.host && @http.port == uri.port && @http.started?
|
|
179
|
+
return @http
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
@http&.finish rescue nil
|
|
183
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
184
|
+
http.use_ssl = uri.scheme == "https"
|
|
185
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
|
186
|
+
http.cert_store = self.class.cert_store
|
|
187
|
+
http.open_timeout = @config.timeout
|
|
188
|
+
http.read_timeout = @config.timeout
|
|
189
|
+
http.keep_alive_timeout = 30
|
|
190
|
+
http.start
|
|
191
|
+
@http = http
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
MAX_PAYLOAD_BYTES = 490 * 1024
|
|
195
|
+
|
|
196
|
+
def send_events(events)
|
|
197
|
+
uri = URI(@config.events_endpoint)
|
|
198
|
+
body = JSON.generate(events)
|
|
199
|
+
|
|
200
|
+
if body.bytesize > MAX_PAYLOAD_BYTES && events.size > 1
|
|
201
|
+
mid = (events.size / 2.0).ceil
|
|
202
|
+
send_events(events.first(mid))
|
|
203
|
+
send_events(events.last(events.size - mid))
|
|
204
|
+
return
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
request = Net::HTTP::Post.new(uri.path)
|
|
208
|
+
request["Content-Type"] = "application/json"
|
|
209
|
+
request["X-API-Key"] = @config.api_key
|
|
210
|
+
request["Connection"] = "keep-alive"
|
|
211
|
+
request.body = body
|
|
212
|
+
|
|
213
|
+
response = http_mutex.synchronize { http_connection.request(request) }
|
|
214
|
+
|
|
215
|
+
if response.code == "429"
|
|
216
|
+
retry_after = (response["Retry-After"] || "60").to_i
|
|
217
|
+
# Cap retry_after so a buggy or malicious upstream returning a
|
|
218
|
+
# ridiculous Retry-After can't park sends for hours.
|
|
219
|
+
retry_after = 600 if retry_after > 600
|
|
220
|
+
@config.logger&.warn("[Errsight] Rate limited — re-queuing #{events.size} events, pausing sends for #{retry_after}s")
|
|
221
|
+
@mutex.synchronize { @queue.unshift(*events) }
|
|
222
|
+
# Set a "no-send-until" timestamp instead of sleeping. flush! checks
|
|
223
|
+
# rate_limited? at the top and skips the slice while we're paused,
|
|
224
|
+
# so the flush thread keeps ticking and stays responsive to
|
|
225
|
+
# shutdown! signals.
|
|
226
|
+
@rate_limited_until = Time.now + retry_after
|
|
227
|
+
return
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
unless response.is_a?(Net::HTTPSuccess) || response.code == "202"
|
|
231
|
+
@config.logger&.warn("[Errsight] API error #{response.code}: #{response.body&.slice(0, 200)}")
|
|
232
|
+
end
|
|
233
|
+
rescue StandardError => e
|
|
234
|
+
@config.logger&.warn("[Errsight] Failed to send events: #{e.class}: #{e.message}")
|
|
235
|
+
http_mutex.synchronize do
|
|
236
|
+
@http&.finish rescue nil
|
|
237
|
+
@http = nil
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
module Errsight
|
|
2
|
+
class Configuration
|
|
3
|
+
attr_accessor :api_key, :environment, :min_level, :host, :timeout,
|
|
4
|
+
:enabled, :batch_size, :flush_interval, :max_queue_size,
|
|
5
|
+
:logger, :attach_to_rails_logger, :release, :shutdown_timeout,
|
|
6
|
+
:breadcrumbs_active_record, :breadcrumbs_active_record_capture_binds,
|
|
7
|
+
:before_send
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@api_key = ENV["ERRSIGHT_API_KEY"]
|
|
11
|
+
@environment = ENV.fetch("ERRSIGHT_ENV", "production")
|
|
12
|
+
@min_level = :warning
|
|
13
|
+
@host = ENV.fetch("ERRSIGHT_HOST", "https://errsight.com")
|
|
14
|
+
@timeout = 5
|
|
15
|
+
@enabled = true
|
|
16
|
+
@batch_size = 10
|
|
17
|
+
@flush_interval = 2 # seconds
|
|
18
|
+
@max_queue_size = 1_000
|
|
19
|
+
@logger = defined?(Rails) ? Rails.logger : Logger.new($stdout)
|
|
20
|
+
# Off by default. When true, every Rails.logger call at min_level or
|
|
21
|
+
# above becomes an Errsight event, which floods the issue list with
|
|
22
|
+
# framework deprecation noise and burns customer event quota for
|
|
23
|
+
# something that should be in their log aggregator, not their error
|
|
24
|
+
# tracker. Customers who actually want log forwarding can opt in.
|
|
25
|
+
@attach_to_rails_logger = false
|
|
26
|
+
@release = ENV["ERRSIGHT_RELEASE"]
|
|
27
|
+
@shutdown_timeout = 5 # seconds to wait for flush thread on shutdown
|
|
28
|
+
# Auto-collect SQL queries as breadcrumbs via the sql.active_record
|
|
29
|
+
# notification. Default on — this is the headline Rails feature.
|
|
30
|
+
# Customers with extreme-query-volume workloads can disable.
|
|
31
|
+
@breadcrumbs_active_record = true
|
|
32
|
+
# Bind values can carry PII (emails, tokens, customer IDs) so we ship
|
|
33
|
+
# the parameterized SQL only by default. Compliance-relaxed customers
|
|
34
|
+
# opt in.
|
|
35
|
+
@breadcrumbs_active_record_capture_binds = false
|
|
36
|
+
# Per-event filter. Receives the event hash, must return a (possibly
|
|
37
|
+
# modified) hash to send, or nil to drop. Used by compliance teams to
|
|
38
|
+
# scrub PII, by ops teams to drop noisy errors, and by anyone who
|
|
39
|
+
# wants final-mile control before events leave the process.
|
|
40
|
+
@before_send = nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def enabled?
|
|
44
|
+
@enabled && !api_key.nil? && !api_key.strip.empty?
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def validate!
|
|
48
|
+
if api_key.nil? || api_key.strip.empty?
|
|
49
|
+
logger&.debug("[Errsight] api_key is not set; Errsight is disabled until one is configured (set ERRSIGHT_API_KEY or config.api_key).")
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def events_endpoint
|
|
54
|
+
"#{host.chomp('/')}/api/v1/events"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
data/lib/errsight/hub.rb
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module Errsight
|
|
2
|
+
# Per-thread scope stack. The top of the stack is the "current" scope read
|
|
3
|
+
# by Errsight.log when building events.
|
|
4
|
+
#
|
|
5
|
+
# Rails request lifecycle pushes a fresh scope in CaptureMiddleware so any
|
|
6
|
+
# set_user/set_tag/add_breadcrumb calls made during a request are isolated
|
|
7
|
+
# to that request and don't leak to the next request handled by the same
|
|
8
|
+
# Puma thread. Same pattern applies to Sidekiq job middleware.
|
|
9
|
+
class Hub
|
|
10
|
+
def self.current
|
|
11
|
+
Thread.current[:errsight_hub] ||= new
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Test-only: drop the per-thread hub so each test starts with a clean
|
|
15
|
+
# scope stack regardless of which thread runs it.
|
|
16
|
+
def self.reset_current!
|
|
17
|
+
Thread.current[:errsight_hub] = nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def initialize
|
|
21
|
+
@stack = [ Scope.new ]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def current_scope
|
|
25
|
+
@stack.last
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Push either a fresh fork of the current scope (default) or a specific
|
|
29
|
+
# scope (used by Sidekiq server middleware to rehydrate scope from a job
|
|
30
|
+
# payload).
|
|
31
|
+
def push_scope(scope = nil)
|
|
32
|
+
@stack.push(scope || current_scope.dup)
|
|
33
|
+
current_scope
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def pop_scope
|
|
37
|
+
# Always keep at least one scope on the stack so callers can never end
|
|
38
|
+
# up with a nil current_scope after an over-eager pop.
|
|
39
|
+
@stack.pop if @stack.size > 1
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# When called with no argument, forks a fresh dup of the current scope
|
|
43
|
+
# for the block (Rails request middleware, ad-hoc isolation). When called
|
|
44
|
+
# with an explicit scope, pushes that one as-is — used by Sidekiq server
|
|
45
|
+
# middleware to install a job's rehydrated scope.
|
|
46
|
+
def with_scope(scope = nil)
|
|
47
|
+
push_scope(scope)
|
|
48
|
+
yield current_scope
|
|
49
|
+
ensure
|
|
50
|
+
pop_scope
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
module Errsight
|
|
2
|
+
module Integrations
|
|
3
|
+
# ActiveJob integration. Closes the scope-propagation gap for queue
|
|
4
|
+
# adapters that aren't Sidekiq — Solid Queue (Rails 8 default),
|
|
5
|
+
# GoodJob, Delayed::Job's AJ adapter, etc.
|
|
6
|
+
#
|
|
7
|
+
# Two responsibilities:
|
|
8
|
+
#
|
|
9
|
+
# 1. Snapshot the current scope into the serialized job hash on
|
|
10
|
+
# enqueue, rehydrate it on dequeue. Same "user breadcrumbs only"
|
|
11
|
+
# rule as Sidekiq client/server middleware (Scope#to_h enforces
|
|
12
|
+
# this).
|
|
13
|
+
#
|
|
14
|
+
# 2. Capture exceptions raised inside `perform` with structured
|
|
15
|
+
# ActiveJob context (job class, queue, executions, adapter).
|
|
16
|
+
# Deduped against Rails.error.subscribe + Sidekiq middleware via
|
|
17
|
+
# the shared thread-local seen-set.
|
|
18
|
+
#
|
|
19
|
+
# When the customer uses Sidekiq+ActiveJob, both our Sidekiq middleware
|
|
20
|
+
# and this layer fire. The dedup ensures one event per error; the
|
|
21
|
+
# outermost capturer wins on tags. ActiveJob-wrapped Sidekiq jobs
|
|
22
|
+
# already get correct unwrapped worker name + args from the Sidekiq
|
|
23
|
+
# middleware, so the redundancy is harmless.
|
|
24
|
+
module ActiveJob
|
|
25
|
+
extend ::ActiveSupport::Concern if defined?(::ActiveSupport::Concern)
|
|
26
|
+
|
|
27
|
+
MAX_ARG_BYTES = 4_096
|
|
28
|
+
|
|
29
|
+
included do
|
|
30
|
+
around_perform { |job, block| job.send(:__errsight_around_perform, &block) }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Snapshot scope at enqueue time. ActiveJob calls `serialize` to
|
|
34
|
+
# produce the wire payload that adapters store; our override
|
|
35
|
+
# appends a scope key. Adapter-agnostic: works for any backend
|
|
36
|
+
# that round-trips the serialized hash.
|
|
37
|
+
def serialize
|
|
38
|
+
super.tap do |hash|
|
|
39
|
+
snapshot = Errsight.current_scope.to_h
|
|
40
|
+
hash["errsight_scope"] = snapshot unless snapshot.empty?
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Capture the snapshot back. ActiveJob calls deserialize before
|
|
45
|
+
# perform; we stash it on the job instance rather than push to the
|
|
46
|
+
# hub here because perform happens later (around_perform is the
|
|
47
|
+
# right place to push so the scope unwinds at perform end).
|
|
48
|
+
def deserialize(job_data)
|
|
49
|
+
super
|
|
50
|
+
@__errsight_scope_snapshot = job_data["errsight_scope"]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def __errsight_around_perform
|
|
56
|
+
scope = __errsight_scope_for_job
|
|
57
|
+
Errsight.with_scope(scope) do
|
|
58
|
+
begin
|
|
59
|
+
yield
|
|
60
|
+
rescue Exception => exception
|
|
61
|
+
__errsight_capture_active_job_error(exception)
|
|
62
|
+
raise
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
ensure
|
|
66
|
+
# Match the dedup-set lifecycle from CaptureMiddleware/Sidekiq:
|
|
67
|
+
# cleared at job boundary so the next job on this thread starts
|
|
68
|
+
# fresh.
|
|
69
|
+
Thread.current[:errsight_captured_exceptions] = nil
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def __errsight_scope_for_job
|
|
73
|
+
scope = Errsight.hub.current_scope.dup
|
|
74
|
+
snapshot = @__errsight_scope_snapshot
|
|
75
|
+
scope.merge!(::Errsight::Scope.from_h(snapshot)) if snapshot.is_a?(Hash)
|
|
76
|
+
scope
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def __errsight_capture_active_job_error(exception)
|
|
80
|
+
seen = Thread.current[:errsight_captured_exceptions] ||= []
|
|
81
|
+
return if seen.include?(exception.object_id)
|
|
82
|
+
seen << exception.object_id
|
|
83
|
+
|
|
84
|
+
Errsight.capture_exception(
|
|
85
|
+
exception,
|
|
86
|
+
tags: __errsight_active_job_tags,
|
|
87
|
+
metadata: { active_job: __errsight_active_job_metadata }
|
|
88
|
+
)
|
|
89
|
+
rescue StandardError
|
|
90
|
+
# Capture failures must never suppress the original exception —
|
|
91
|
+
# ActiveJob's retry/discard machinery still needs to see it.
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def __errsight_active_job_tags
|
|
95
|
+
{
|
|
96
|
+
"active_job.class" => self.class.name,
|
|
97
|
+
"active_job.queue" => queue_name.to_s,
|
|
98
|
+
"active_job.job_id" => job_id.to_s,
|
|
99
|
+
"active_job.adapter" => __errsight_adapter_name.to_s
|
|
100
|
+
}.reject { |_, v| v.to_s.strip.empty? }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def __errsight_active_job_metadata
|
|
104
|
+
{
|
|
105
|
+
"arguments" => __errsight_filter_arguments(arguments),
|
|
106
|
+
"executions" => respond_to?(:executions) ? executions : nil,
|
|
107
|
+
"enqueued_at" => respond_to?(:enqueued_at) ? enqueued_at&.iso8601(3) : nil,
|
|
108
|
+
"scheduled_at" => respond_to?(:scheduled_at) ? scheduled_at&.iso8601(3) : nil,
|
|
109
|
+
"queue_name" => queue_name,
|
|
110
|
+
"priority" => respond_to?(:priority) ? priority : nil
|
|
111
|
+
}.compact
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Different Rails / ActiveJob versions expose adapter naming
|
|
115
|
+
# differently. Try the canonical method, fall back to inferring
|
|
116
|
+
# from the adapter class name. Wrapped in rescue because some
|
|
117
|
+
# custom adapters can raise on these accessors.
|
|
118
|
+
def __errsight_adapter_name
|
|
119
|
+
if self.class.respond_to?(:queue_adapter_name)
|
|
120
|
+
self.class.queue_adapter_name
|
|
121
|
+
else
|
|
122
|
+
self.class.queue_adapter.class.name.to_s.split("::").last.sub(/Adapter\z/, "").downcase
|
|
123
|
+
end
|
|
124
|
+
rescue StandardError
|
|
125
|
+
"unknown"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Same PII strategy as Sidekiq middleware: walk hashes through
|
|
129
|
+
# ActiveSupport::ParameterFilter so the host's filter_parameters
|
|
130
|
+
# config (passwords, tokens, ssn fields) is honored. Cap value
|
|
131
|
+
# sizes defensively — ActiveJob args can include serialized
|
|
132
|
+
# ActiveRecord objects via GlobalID, which are short, but custom
|
|
133
|
+
# serializers might emit large blobs.
|
|
134
|
+
def __errsight_filter_arguments(args)
|
|
135
|
+
return [] unless args.is_a?(Array)
|
|
136
|
+
args.map { |a| __errsight_filter_one(a) }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def __errsight_filter_one(value)
|
|
140
|
+
case value
|
|
141
|
+
when Hash then __errsight_filter_hash(value)
|
|
142
|
+
when Array then value.map { |a| __errsight_filter_one(a) }
|
|
143
|
+
else __errsight_truncate(value)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def __errsight_filter_hash(hash)
|
|
148
|
+
filter = self.class.send(:__errsight_parameter_filter)
|
|
149
|
+
normalized = hash.transform_keys(&:to_s)
|
|
150
|
+
filtered = filter ? filter.filter(normalized) : normalized
|
|
151
|
+
filtered.transform_values { |v| __errsight_filter_one(v) }
|
|
152
|
+
rescue StandardError
|
|
153
|
+
{ "_unfilterable" => "[#{hash.class.name}]" }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def __errsight_truncate(value)
|
|
157
|
+
str = value.to_s
|
|
158
|
+
return value if str.bytesize <= MAX_ARG_BYTES
|
|
159
|
+
"[truncated #{value.class.name} #{str.bytesize}b]"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
class_methods do
|
|
163
|
+
# Cached at class level — ParameterFilter rebuilds aren't free
|
|
164
|
+
# and the filter list doesn't change at runtime.
|
|
165
|
+
def __errsight_parameter_filter
|
|
166
|
+
return @__errsight_parameter_filter if defined?(@__errsight_parameter_filter)
|
|
167
|
+
@__errsight_parameter_filter =
|
|
168
|
+
if defined?(::ActiveSupport::ParameterFilter) && defined?(::Rails) && ::Rails.application
|
|
169
|
+
::ActiveSupport::ParameterFilter.new(::Rails.application.config.filter_parameters)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require "active_support/notifications"
|
|
2
|
+
|
|
3
|
+
module Errsight
|
|
4
|
+
module Integrations
|
|
5
|
+
# Subscribes to sql.active_record and pushes each non-cached, non-schema
|
|
6
|
+
# query into the current scope's DB breadcrumb ring. When an exception
|
|
7
|
+
# is later captured in the same request/job, the event ships with the
|
|
8
|
+
# last 30 queries that ran — turning "ActiveRecord::RecordNotFound" from
|
|
9
|
+
# an opaque stack trace into "we ran these 5 queries before failing on
|
|
10
|
+
# this one."
|
|
11
|
+
#
|
|
12
|
+
# This is the killer demo for the Rails wedge. Sentry-ruby has SQL
|
|
13
|
+
# breadcrumbs but they come from a generic Notifications layer that
|
|
14
|
+
# treats every framework the same; here it's first-class and tuned for
|
|
15
|
+
# what Rails apps actually emit.
|
|
16
|
+
module ActiveRecord
|
|
17
|
+
MAX_SQL_BYTES = 2_048
|
|
18
|
+
MAX_BIND_VALUES = 20
|
|
19
|
+
INTERNAL_NAMES = %w[SCHEMA TRANSACTION].freeze
|
|
20
|
+
|
|
21
|
+
class << self
|
|
22
|
+
# Idempotent. The subscriber handle is stored at module level so a
|
|
23
|
+
# double-require (gem reloaded in dev, or required from both the
|
|
24
|
+
# Railtie and a manual `require`) doesn't double-fire crumbs.
|
|
25
|
+
def subscribe!
|
|
26
|
+
return @subscriber if @subscriber
|
|
27
|
+
@subscriber = ::ActiveSupport::Notifications.subscribe("sql.active_record") do |*args|
|
|
28
|
+
handle(::ActiveSupport::Notifications::Event.new(*args))
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def unsubscribe!
|
|
33
|
+
if @subscriber
|
|
34
|
+
::ActiveSupport::Notifications.unsubscribe(@subscriber)
|
|
35
|
+
@subscriber = nil
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def handle(event)
|
|
42
|
+
payload = event.payload
|
|
43
|
+
return if INTERNAL_NAMES.include?(payload[:name])
|
|
44
|
+
return if payload[:cached]
|
|
45
|
+
|
|
46
|
+
sql = payload[:sql].to_s
|
|
47
|
+
return if sql.empty?
|
|
48
|
+
if sql.bytesize > MAX_SQL_BYTES
|
|
49
|
+
sql = sql.byteslice(0, MAX_SQL_BYTES) + "…[truncated]"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
duration_ms = event.duration.round(2)
|
|
53
|
+
name = payload[:name].to_s
|
|
54
|
+
message = name.empty? ? "SQL (#{duration_ms}ms)" : "#{name} (#{duration_ms}ms)"
|
|
55
|
+
|
|
56
|
+
data = { sql: sql, duration_ms: duration_ms }
|
|
57
|
+
data[:name] = name unless name.empty?
|
|
58
|
+
data[:connection_id] = payload[:connection_id] if payload[:connection_id]
|
|
59
|
+
|
|
60
|
+
if Errsight.configuration.breadcrumbs_active_record_capture_binds
|
|
61
|
+
binds = extract_binds(payload[:type_casted_binds] || payload[:binds])
|
|
62
|
+
data[:binds] = binds unless binds.empty?
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
Errsight.current_scope.add_db_breadcrumb(message: message, data: data)
|
|
66
|
+
rescue StandardError
|
|
67
|
+
# Never let our subscriber take down the host's request or job —
|
|
68
|
+
# a missing breadcrumb is far less bad than a crashed page.
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Bind values across Rails versions:
|
|
72
|
+
# - 7.x+: ActiveModel::Attribute instances responding to #value
|
|
73
|
+
# - older: [ActiveRecord::ConnectionAdapters::Column, value] tuples
|
|
74
|
+
# - type_casted_binds: already plain values (preferred when set)
|
|
75
|
+
# Cap to MAX_BIND_VALUES so a SELECT IN (1..10000) doesn't ship 10k
|
|
76
|
+
# values per breadcrumb.
|
|
77
|
+
def extract_binds(binds)
|
|
78
|
+
return [] unless binds.is_a?(Array)
|
|
79
|
+
binds.first(MAX_BIND_VALUES).map do |b|
|
|
80
|
+
if b.respond_to?(:value)
|
|
81
|
+
b.value
|
|
82
|
+
elsif b.is_a?(Array)
|
|
83
|
+
b.last
|
|
84
|
+
else
|
|
85
|
+
b
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
rescue StandardError
|
|
89
|
+
[]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|