dispatch-rails 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -0
- data/lib/dispatch/rails/configuration.rb +18 -0
- data/lib/dispatch/rails/engine.rb +12 -0
- data/lib/dispatch/rails/solid_queue_subscriber.rb +168 -0
- data/lib/dispatch/rails/version.rb +1 -1
- data/lib/dispatch-rails.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 797a73d62a39b0ab3640364d8ef71186b61f3233da2788f7733b72c85e77cc75
|
|
4
|
+
data.tar.gz: 235fa20b827e25a6f0537e20d3cdd51221c0bebbed18013046901c9bdabf51ed
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f3cbf75d09473580ec5f66ffaaf7d7623631c1a453bbe9b67b86e091e4fbcb56cd7da1ea141080b6375b714802915c00c4fd9f64d7c649bb2de7e6d6b46d1c0b
|
|
7
|
+
data.tar.gz: f24f36ca5d2769053faa65cb5b77da6e09a28aea5b4e3166bcbb64e2473729cd9b54df4d6936572fe3ecb41937cb5e43e32f0b9de24f2c27b1879532da0232de
|
data/README.md
CHANGED
|
@@ -98,6 +98,40 @@ c.capture_at_exit = true # default; set false to skip the crash-at-exit report
|
|
|
98
98
|
c.shutdown_timeout = 3 # seconds to wait for the send queue to drain at exit; 0 skips
|
|
99
99
|
```
|
|
100
100
|
|
|
101
|
+
### SolidQueue infrastructure failures
|
|
102
|
+
|
|
103
|
+
Errors raised **inside** a job's `perform` are reported automatically (ActiveJob
|
|
104
|
+
sends them through `Rails.error`, which the SDK subscribes to). But SolidQueue's
|
|
105
|
+
own *infrastructure* failures never touch that path — SolidQueue writes them
|
|
106
|
+
straight into its own tables, so they don't reach `Rails.error` and stay
|
|
107
|
+
invisible in the dashboard. The classic case: a container runs out of memory,
|
|
108
|
+
Heroku SIGKILLs the worker, and the supervisor later prunes the dead process and
|
|
109
|
+
force-fails the jobs it had claimed with `SolidQueue::Processes::ProcessPrunedError`
|
|
110
|
+
— a failure you'd only find by opening the queue.
|
|
111
|
+
|
|
112
|
+
When SolidQueue is present, the SDK subscribes to its `ActiveSupport::Notifications`
|
|
113
|
+
and turns these into first-class events (all tagged `source: solid_queue`):
|
|
114
|
+
|
|
115
|
+
- **Lost jobs** (`fail_many_claimed`) — claimed jobs force-failed because their
|
|
116
|
+
worker died (pruned after a missed heartbeat, or orphaned by a dead process).
|
|
117
|
+
Reported as `Dispatch::Rails::SolidQueueJobsLost` with `tags.jobs_lost`,
|
|
118
|
+
`tags.process_ids`, and `tags.job_ids`. **This is the OOM-restart case.**
|
|
119
|
+
- **Thread crashes** (`thread_error`) — an exception that escaped a SolidQueue
|
|
120
|
+
thread (supervisor/dispatcher/worker/scheduler), reported with its real class
|
|
121
|
+
and backtrace. Captured even if your app overrides `SolidQueue.on_thread_error`
|
|
122
|
+
away from the default `Rails.error.report` (deduped against it when it isn't).
|
|
123
|
+
- **Recurring-enqueue misses** (`enqueue_recurring_task`) — a cron task that
|
|
124
|
+
failed to enqueue, as `Dispatch::Rails::SolidQueueEnqueueError`.
|
|
125
|
+
|
|
126
|
+
```ruby
|
|
127
|
+
c.capture_solid_queue = true # default; no-op unless SolidQueue is running
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
It only does anything when SolidQueue is actually running, so apps that don't use
|
|
131
|
+
it pay nothing. Memory pressure itself (Heroku R14/R15) is a platform signal, not
|
|
132
|
+
an app exception — pair this with a Heroku memory alert to catch the root cause,
|
|
133
|
+
not just the orphaned jobs.
|
|
134
|
+
|
|
101
135
|
### Traffic heartbeats (confound signal)
|
|
102
136
|
|
|
103
137
|
On by default in enabled environments, the SDK ships lightweight per-`transaction`
|
|
@@ -20,6 +20,9 @@ module Dispatch
|
|
|
20
20
|
attr_accessor :capture_csp_violations, :capture_browser_reports, :reporting_endpoint_path
|
|
21
21
|
# Process lifecycle (crash-at-exit capture, rake failures, shutdown flush)
|
|
22
22
|
attr_accessor :capture_at_exit, :shutdown_timeout
|
|
23
|
+
# SolidQueue infrastructure failures (pruned/orphaned jobs, thread crashes,
|
|
24
|
+
# recurring-enqueue misses) that never flow through Rails.error/ActiveJob.
|
|
25
|
+
attr_accessor :capture_solid_queue
|
|
23
26
|
# Structured error responses (API-only)
|
|
24
27
|
attr_accessor :structured_error_responses, :annotate_error_body, :report_base_url
|
|
25
28
|
# Traffic heartbeats — per-transaction success counts that let the Dispatch
|
|
@@ -67,6 +70,13 @@ module Dispatch
|
|
|
67
70
|
@capture_at_exit = true
|
|
68
71
|
@shutdown_timeout = 3 # seconds to wait for the queue to drain at exit; 0 skips the flush
|
|
69
72
|
|
|
73
|
+
# SolidQueue infrastructure capture. On by default in enabled environments
|
|
74
|
+
# (no-op unless SolidQueue is actually running, so apps that don't use it
|
|
75
|
+
# pay nothing). These failures — a worker pruned after a missed heartbeat,
|
|
76
|
+
# jobs orphaned by a dead process, a recurring task that didn't enqueue —
|
|
77
|
+
# bypass Rails.error entirely, so without this they never reach Dispatch.
|
|
78
|
+
@capture_solid_queue = true
|
|
79
|
+
|
|
70
80
|
# Structured error responses (off by default — opt-in so we never alter a
|
|
71
81
|
# host app's error contract without being asked).
|
|
72
82
|
@structured_error_responses = false
|
|
@@ -145,6 +155,14 @@ module Dispatch
|
|
|
145
155
|
@capture_traffic && error_tracking_enabled? && environment_enabled?
|
|
146
156
|
end
|
|
147
157
|
|
|
158
|
+
# SolidQueue capture shares error capture's gating (credentials + the
|
|
159
|
+
# capture_exceptions master switch + enabled environments), plus its own
|
|
160
|
+
# toggle. The subscriber is wired unconditionally at boot and checks this
|
|
161
|
+
# at event time, so toggling it in an initializer always takes effect.
|
|
162
|
+
def solid_queue_tracking_enabled?
|
|
163
|
+
@capture_solid_queue && error_tracking_enabled? && environment_enabled?
|
|
164
|
+
end
|
|
165
|
+
|
|
148
166
|
def environment_enabled?
|
|
149
167
|
list = Array(@enabled_environments).map(&:to_s)
|
|
150
168
|
list.empty? || list.include?(effective_environment)
|
|
@@ -45,6 +45,18 @@ module Dispatch
|
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
# SolidQueue infrastructure failures (a worker pruned after a missed
|
|
49
|
+
# heartbeat, jobs orphaned by a dead process, a recurring task that failed
|
|
50
|
+
# to enqueue) are recorded straight into SolidQueue's own tables and never
|
|
51
|
+
# flow through Rails.error/ActiveJob — so the subscriber above never sees
|
|
52
|
+
# them. Bridge SolidQueue's ActiveSupport::Notifications into capture.
|
|
53
|
+
# Subscribed unconditionally and idempotently; the subscriber no-ops unless
|
|
54
|
+
# config.solid_queue_tracking_enabled?, and the events only ever fire when
|
|
55
|
+
# SolidQueue is actually running.
|
|
56
|
+
initializer "dispatch-rails.solid_queue" do
|
|
57
|
+
Dispatch::Rails::SolidQueueSubscriber.install!
|
|
58
|
+
end
|
|
59
|
+
|
|
48
60
|
# Per-transaction traffic heartbeats. Mounted unconditionally; the middleware
|
|
49
61
|
# no-ops at request time unless config.traffic_tracking_enabled? (false in
|
|
50
62
|
# dev/test), so it never phones home outside enabled environments.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
require "active_support/notifications"
|
|
2
|
+
|
|
3
|
+
module Dispatch
|
|
4
|
+
module Rails
|
|
5
|
+
# Synthetic exceptions for SolidQueue infrastructure failures that have no
|
|
6
|
+
# Ruby exception of their own (the real cause is written onto SolidQueue's
|
|
7
|
+
# own tables, not raised). thread_error reports the genuine exception instead.
|
|
8
|
+
class SolidQueueError < StandardError; end
|
|
9
|
+
# A worker process died (pruned after a missed heartbeat, or its process went
|
|
10
|
+
# missing) and SolidQueue force-failed the jobs it had claimed. This is the
|
|
11
|
+
# OOM-restart case: Heroku SIGKILLs the dyno, the supervisor later prunes the
|
|
12
|
+
# dead process and fails its claimed jobs straight into solid_queue_failed_executions.
|
|
13
|
+
class SolidQueueJobsLost < SolidQueueError; end
|
|
14
|
+
# A recurring (cron) task could not be enqueued — a silent scheduling miss.
|
|
15
|
+
class SolidQueueEnqueueError < SolidQueueError; end
|
|
16
|
+
|
|
17
|
+
# Bridges SolidQueue's ActiveSupport::Notifications into Dispatch error
|
|
18
|
+
# capture.
|
|
19
|
+
#
|
|
20
|
+
# SolidQueue records its *infrastructure* failures — a worker pruned after a
|
|
21
|
+
# missed heartbeat, claimed jobs orphaned by a dead process, a recurring task
|
|
22
|
+
# that failed to enqueue — straight into its own tables. They are NOT raised
|
|
23
|
+
# through ActiveJob's perform path, so they never reach Rails.error and never
|
|
24
|
+
# reach the Dispatch::Rails::ErrorSubscriber. The result is an invisible class
|
|
25
|
+
# of failure: jobs that died are only discoverable by opening the queue.
|
|
26
|
+
#
|
|
27
|
+
# This subscriber listens for those events and turns each into a first-class
|
|
28
|
+
# Dispatch event. Captured events (all gated by config.capture_solid_queue and
|
|
29
|
+
# the usual environment gating):
|
|
30
|
+
#
|
|
31
|
+
# fail_many_claimed -> SolidQueueJobsLost (pruned / orphaned / dead-worker jobs)
|
|
32
|
+
# thread_error -> the real exception (a crash in a SolidQueue thread)
|
|
33
|
+
# enqueue_recurring_task -> SolidQueueEnqueueError (only when enqueue_error is set)
|
|
34
|
+
#
|
|
35
|
+
# On thread_error: SolidQueue fires this event BEFORE calling on_thread_error
|
|
36
|
+
# (see SolidQueue::AppExecutor#handle_thread_error), and Reporter marks the
|
|
37
|
+
# exception object on capture. So when the default on_thread_error
|
|
38
|
+
# (-> Rails.error.report) runs next, the Dispatch error subscriber dedups it
|
|
39
|
+
# via that marker — no double report. Capturing here means we still see these
|
|
40
|
+
# crashes even when the host app overrides on_thread_error away from
|
|
41
|
+
# Rails.error (a common customization that would otherwise hide them).
|
|
42
|
+
#
|
|
43
|
+
# Idempotent: install! subscribes once per process; repeat calls no-op. The
|
|
44
|
+
# handlers never raise back into SolidQueue's instrumentation.
|
|
45
|
+
module SolidQueueSubscriber
|
|
46
|
+
EVENTS = %w[
|
|
47
|
+
fail_many_claimed.solid_queue
|
|
48
|
+
thread_error.solid_queue
|
|
49
|
+
enqueue_recurring_task.solid_queue
|
|
50
|
+
].freeze
|
|
51
|
+
|
|
52
|
+
# How many job/process ids to inline into tags before truncating. The
|
|
53
|
+
# payload batches are bounded (prune runs in batches of 50), but we keep
|
|
54
|
+
# tag values short regardless.
|
|
55
|
+
MAX_IDS = 20
|
|
56
|
+
|
|
57
|
+
class << self
|
|
58
|
+
def install!
|
|
59
|
+
return if @installed
|
|
60
|
+
|
|
61
|
+
@installed = true
|
|
62
|
+
@subscriptions = EVENTS.map do |event|
|
|
63
|
+
ActiveSupport::Notifications.subscribe(event) do |name, _start, _finish, _id, payload|
|
|
64
|
+
handle(name, payload)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Test/reset seam — unsubscribe so a fresh install! re-registers.
|
|
71
|
+
def uninstall!
|
|
72
|
+
Array(@subscriptions).each { |sub| ActiveSupport::Notifications.unsubscribe(sub) }
|
|
73
|
+
@subscriptions = nil
|
|
74
|
+
@installed = false
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def installed?
|
|
78
|
+
@installed == true
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
# A subscriber that raises surfaces as an InstrumentationSubscriberError
|
|
84
|
+
# inside SolidQueue's prune/dispatch loop, so every path is guarded.
|
|
85
|
+
# Reporter.capture already never raises; this guards the tag-building too.
|
|
86
|
+
def handle(name, payload)
|
|
87
|
+
return unless Dispatch::Rails.configuration.solid_queue_tracking_enabled?
|
|
88
|
+
|
|
89
|
+
case name
|
|
90
|
+
when "fail_many_claimed.solid_queue" then on_jobs_lost(payload)
|
|
91
|
+
when "thread_error.solid_queue" then on_thread_error(payload)
|
|
92
|
+
when "enqueue_recurring_task.solid_queue" then on_enqueue_recurring_task(payload)
|
|
93
|
+
end
|
|
94
|
+
rescue StandardError => e
|
|
95
|
+
warn "[dispatch-rails] solid_queue subscriber failed: #{e.class}: #{e.message}"
|
|
96
|
+
nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# The underlying ProcessPrunedError / ProcessMissingError isn't in the
|
|
100
|
+
# payload — SolidQueue writes it onto the failed_executions rows — so we
|
|
101
|
+
# synthesize a stable error (constant message for clean grouping) and
|
|
102
|
+
# carry the volatile ids/count as tags.
|
|
103
|
+
def on_jobs_lost(payload)
|
|
104
|
+
size = payload[:size].to_i
|
|
105
|
+
return if size.zero?
|
|
106
|
+
|
|
107
|
+
Reporter.capture(
|
|
108
|
+
SolidQueueJobsLost.new(
|
|
109
|
+
"SolidQueue force-failed claimed job(s) after a worker process died"
|
|
110
|
+
),
|
|
111
|
+
handled: false,
|
|
112
|
+
level: "error",
|
|
113
|
+
context: { tags: {
|
|
114
|
+
source: "solid_queue",
|
|
115
|
+
solid_queue_event: "fail_many_claimed",
|
|
116
|
+
jobs_lost: size.to_s,
|
|
117
|
+
process_ids: truncate_ids(payload[:process_ids]),
|
|
118
|
+
job_ids: truncate_ids(payload[:job_ids])
|
|
119
|
+
}.compact }
|
|
120
|
+
)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# The real exception that escaped a SolidQueue thread (supervisor,
|
|
124
|
+
# dispatcher, worker, scheduler) — report it as-is for an accurate class
|
|
125
|
+
# and backtrace. Dedups against the default on_thread_error via the
|
|
126
|
+
# Reporter capture marker (see class comment).
|
|
127
|
+
def on_thread_error(payload)
|
|
128
|
+
error = payload[:error]
|
|
129
|
+
return unless error.is_a?(Exception)
|
|
130
|
+
|
|
131
|
+
Reporter.capture(
|
|
132
|
+
error,
|
|
133
|
+
handled: false,
|
|
134
|
+
level: "error",
|
|
135
|
+
context: { tags: { source: "solid_queue", solid_queue_event: "thread_error" } }
|
|
136
|
+
)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# A recurring task failed to enqueue. payload[:enqueue_error] is the
|
|
140
|
+
# message string (no exception object), present only on failure.
|
|
141
|
+
def on_enqueue_recurring_task(payload)
|
|
142
|
+
message = payload[:enqueue_error].to_s
|
|
143
|
+
return if message.empty?
|
|
144
|
+
|
|
145
|
+
task = payload[:task].to_s
|
|
146
|
+
Reporter.capture(
|
|
147
|
+
SolidQueueEnqueueError.new("SolidQueue failed to enqueue recurring task #{task}: #{message}"),
|
|
148
|
+
handled: false,
|
|
149
|
+
level: "error",
|
|
150
|
+
context: { tags: {
|
|
151
|
+
source: "solid_queue",
|
|
152
|
+
solid_queue_event: "enqueue_recurring_task",
|
|
153
|
+
task: task
|
|
154
|
+
}.reject { |_, v| v.nil? || v == "" } }
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def truncate_ids(ids)
|
|
159
|
+
list = Array(ids)
|
|
160
|
+
return nil if list.empty?
|
|
161
|
+
|
|
162
|
+
shown = list.first(MAX_IDS).join(",")
|
|
163
|
+
list.size > MAX_IDS ? "#{shown},+#{list.size - MAX_IDS} more" : shown
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
data/lib/dispatch-rails.rb
CHANGED
|
@@ -9,6 +9,7 @@ require "dispatch/rails/response_annotator"
|
|
|
9
9
|
require "dispatch/rails/heartbeat_aggregator"
|
|
10
10
|
require "dispatch/rails/heartbeat_middleware"
|
|
11
11
|
require "dispatch/rails/error_subscriber"
|
|
12
|
+
require "dispatch/rails/solid_queue_subscriber"
|
|
12
13
|
require "dispatch/rails/rake_handler"
|
|
13
14
|
require "dispatch/rails/engine"
|
|
14
15
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dispatch-rails
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dispatch Team
|
|
@@ -57,6 +57,7 @@ files:
|
|
|
57
57
|
- lib/dispatch/rails/reporter.rb
|
|
58
58
|
- lib/dispatch/rails/reporting_endpoint_middleware.rb
|
|
59
59
|
- lib/dispatch/rails/response_annotator.rb
|
|
60
|
+
- lib/dispatch/rails/solid_queue_subscriber.rb
|
|
60
61
|
- lib/dispatch/rails/transport.rb
|
|
61
62
|
- lib/dispatch/rails/version.rb
|
|
62
63
|
homepage: https://dispatchit.app
|