pgbus 0.7.8 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -0
- data/app/helpers/pgbus/streams_helper.rb +3 -1
- data/lib/pgbus/active_job/executor.rb +31 -2
- data/lib/pgbus/client/notify_stream.rb +37 -0
- data/lib/pgbus/client.rb +2 -0
- data/lib/pgbus/configuration.rb +36 -1
- data/lib/pgbus/engine.rb +15 -0
- data/lib/pgbus/event_bus/handler.rb +22 -2
- data/lib/pgbus/instrumentation.rb +15 -6
- data/lib/pgbus/integrations/appsignal/dashboards/pgbus_health.json +87 -0
- data/lib/pgbus/integrations/appsignal/dashboards/pgbus_streams.json +65 -0
- data/lib/pgbus/integrations/appsignal/dashboards/pgbus_throughput.json +81 -0
- data/lib/pgbus/integrations/appsignal/probe.rb +128 -0
- data/lib/pgbus/integrations/appsignal/subscriber.rb +303 -0
- data/lib/pgbus/integrations/appsignal.rb +52 -0
- data/lib/pgbus/outbox.rb +17 -13
- data/lib/pgbus/process/dispatcher.rb +38 -0
- data/lib/pgbus/process/worker.rb +20 -2
- data/lib/pgbus/recurring/scheduler.rb +10 -2
- data/lib/pgbus/streams/turbo_broadcastable.rb +2 -1
- data/lib/pgbus/streams.rb +28 -7
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +43 -4
- data/lib/pgbus/web/streamer/listener.rb +9 -5
- data/lib/pgbus/web/streamer/stream_event_dispatcher.rb +45 -21
- data/lib/pgbus.rb +7 -2
- metadata +8 -1
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Integrations
|
|
5
|
+
module Appsignal
|
|
6
|
+
# Minutely probe that pushes pgbus-wide gauges into AppSignal.
|
|
7
|
+
#
|
|
8
|
+
# All readings come from Pgbus::Web::DataSource so the probe doesn't
|
|
9
|
+
# duplicate query logic. DataSource is built to be resilient — every
|
|
10
|
+
# method rescues StandardError and returns a safe default — but we
|
|
11
|
+
# still wrap each section in our own rescue so a probe iteration
|
|
12
|
+
# never raises out into the AppSignal probe runner.
|
|
13
|
+
module Probe
|
|
14
|
+
METRIC_PREFIX = "pgbus_"
|
|
15
|
+
private_constant :METRIC_PREFIX
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def install! # rubocop:disable Naming/PredicateMethod
|
|
19
|
+
return false if @installed
|
|
20
|
+
|
|
21
|
+
::Appsignal::Probes.register :pgbus, new_probe_instance
|
|
22
|
+
@installed = true
|
|
23
|
+
true
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def installed?
|
|
27
|
+
@installed == true
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def reset!
|
|
31
|
+
::Appsignal::Probes.unregister(:pgbus) if defined?(::Appsignal::Probes) &&
|
|
32
|
+
::Appsignal::Probes.respond_to?(:unregister)
|
|
33
|
+
@installed = false
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Visible for testing — returns a fresh runnable probe.
|
|
37
|
+
def new_probe_instance
|
|
38
|
+
Runner.new
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# The actual probe object; AppSignal calls #call once per minute.
|
|
43
|
+
class Runner
|
|
44
|
+
def initialize(data_source: nil)
|
|
45
|
+
@data_source = data_source
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def call
|
|
49
|
+
return unless data_source
|
|
50
|
+
|
|
51
|
+
track_queues
|
|
52
|
+
track_processes
|
|
53
|
+
track_summary
|
|
54
|
+
track_streams
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def data_source
|
|
60
|
+
@data_source ||=
|
|
61
|
+
(::Pgbus::Web::DataSource.new if defined?(::Pgbus::Web::DataSource))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def track_queues
|
|
65
|
+
data_source.queues_with_metrics.each do |q|
|
|
66
|
+
tags = { queue: q[:name] }
|
|
67
|
+
gauge "queue_depth", q[:queue_length], tags
|
|
68
|
+
gauge "queue_visible_depth", q[:queue_visible_length], tags
|
|
69
|
+
gauge "queue_paused", q[:paused] ? 1 : 0, tags
|
|
70
|
+
age = q[:oldest_msg_age_sec]
|
|
71
|
+
gauge "queue_oldest_message_age_seconds", age, tags if age
|
|
72
|
+
end
|
|
73
|
+
rescue StandardError => e
|
|
74
|
+
log_failure("queue metrics", e)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def track_processes
|
|
78
|
+
gauge "active_processes", data_source.processes.count
|
|
79
|
+
rescue StandardError => e
|
|
80
|
+
log_failure("process metrics", e)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def track_summary
|
|
84
|
+
stats = data_source.summary_stats
|
|
85
|
+
gauge "total_queues", stats[:total_queues]
|
|
86
|
+
gauge "total_depth", stats[:total_depth]
|
|
87
|
+
gauge "total_visible", stats[:total_visible]
|
|
88
|
+
gauge "dlq_depth", stats[:dlq_depth]
|
|
89
|
+
gauge "failed_events_total", stats[:failed_count]
|
|
90
|
+
gauge "throughput_rate", stats[:throughput_rate]
|
|
91
|
+
gauge "total_dead_tuples", stats[:total_dead_tuples]
|
|
92
|
+
gauge "tables_needing_vacuum", stats[:tables_needing_vacuum]
|
|
93
|
+
gauge "oldest_transaction_age_seconds", stats[:oldest_transaction_age_sec]
|
|
94
|
+
rescue StandardError => e
|
|
95
|
+
log_failure("summary metrics", e)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def track_streams
|
|
99
|
+
return unless data_source.respond_to?(:stream_stats_available?) &&
|
|
100
|
+
data_source.stream_stats_available?
|
|
101
|
+
|
|
102
|
+
summary = data_source.stream_stats_summary
|
|
103
|
+
gauge "stream_broadcasts_60m", summary[:broadcasts]
|
|
104
|
+
gauge "stream_connects_60m", summary[:connects]
|
|
105
|
+
gauge "stream_disconnects_60m", summary[:disconnects]
|
|
106
|
+
gauge "stream_active_connections", summary[:active_estimate]
|
|
107
|
+
gauge "stream_avg_fanout", summary[:avg_fanout]
|
|
108
|
+
gauge "stream_avg_broadcast_ms", summary[:avg_broadcast_ms]
|
|
109
|
+
rescue StandardError => e
|
|
110
|
+
log_failure("stream metrics", e)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def gauge(key, value, tags = {})
|
|
114
|
+
return if value.nil?
|
|
115
|
+
|
|
116
|
+
::Appsignal.set_gauge("#{METRIC_PREFIX}#{key}", value, tags)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def log_failure(label, error)
|
|
120
|
+
Pgbus.logger.debug do
|
|
121
|
+
"[Pgbus::AppSignal::Probe] #{label} failed: #{error.class}: #{error.message}"
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Integrations
|
|
7
|
+
module Appsignal
|
|
8
|
+
# Translates Pgbus::Instrumentation events into AppSignal transactions
|
|
9
|
+
# and custom metrics.
|
|
10
|
+
#
|
|
11
|
+
# Job and event-handler events open a BACKGROUND_JOB transaction so they
|
|
12
|
+
# appear under AppSignal's "Performance > Background jobs" view, with
|
|
13
|
+
# action `<JobClass>#perform` or `<HandlerClass>#handle`. All other
|
|
14
|
+
# events are reported as counters or distributions only.
|
|
15
|
+
#
|
|
16
|
+
# All metric names are prefixed `pgbus_`. Tag keys avoid high-cardinality
|
|
17
|
+
# values (no msg_id, no event_id) so AppSignal's metric storage stays
|
|
18
|
+
# efficient.
|
|
19
|
+
module Subscriber
|
|
20
|
+
BACKGROUND_JOB = "background_job"
|
|
21
|
+
METRIC_PREFIX = "pgbus_"
|
|
22
|
+
private_constant :BACKGROUND_JOB, :METRIC_PREFIX
|
|
23
|
+
|
|
24
|
+
# Tracked so we can detach in reset! (used by specs).
|
|
25
|
+
@subscriptions = []
|
|
26
|
+
|
|
27
|
+
class << self
|
|
28
|
+
def install!
|
|
29
|
+
return false if @installed
|
|
30
|
+
|
|
31
|
+
@subscriptions = [
|
|
32
|
+
subscribe("pgbus.executor.execute") { |event| on_executor_execute(event) },
|
|
33
|
+
subscribe("pgbus.job_completed") { |event| on_job_completed(event) },
|
|
34
|
+
subscribe("pgbus.job_failed") { |event| on_job_failed(event) },
|
|
35
|
+
subscribe("pgbus.job_dead_lettered") { |event| on_job_dead_lettered(event) },
|
|
36
|
+
subscribe("pgbus.event_processed") { |event| on_event_processed(event) },
|
|
37
|
+
subscribe("pgbus.event_failed") { |event| on_event_failed(event) },
|
|
38
|
+
subscribe("pgbus.client.send_message") { |event| on_send_message(event) },
|
|
39
|
+
subscribe("pgbus.client.send_batch") { |event| on_send_batch(event) },
|
|
40
|
+
subscribe("pgbus.client.read_batch") { |event| on_read_batch(event) },
|
|
41
|
+
subscribe("pgbus.stream.broadcast") { |event| on_stream_broadcast(event) },
|
|
42
|
+
subscribe("pgbus.outbox.publish") { |event| on_outbox_publish(event) },
|
|
43
|
+
subscribe("pgbus.recurring.enqueue") { |event| on_recurring_enqueue(event) },
|
|
44
|
+
subscribe("pgbus.worker.recycle") { |event| on_worker_recycle(event) }
|
|
45
|
+
]
|
|
46
|
+
@installed = true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def installed?
|
|
50
|
+
@installed == true
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def reset!
|
|
54
|
+
@subscriptions&.each { |s| ActiveSupport::Notifications.unsubscribe(s) }
|
|
55
|
+
@subscriptions = []
|
|
56
|
+
@installed = false
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def subscribe(name, &block)
|
|
62
|
+
# silence rubocop unused
|
|
63
|
+
ActiveSupport::Notifications.subscribe(name) do |*args|
|
|
64
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
65
|
+
safely { block.call(event) }
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Errors in the subscriber must never affect the producer thread.
|
|
70
|
+
# AppSignal can be misconfigured, the agent can be down, etc. — log
|
|
71
|
+
# and move on.
|
|
72
|
+
def safely
|
|
73
|
+
yield
|
|
74
|
+
rescue StandardError => e
|
|
75
|
+
Pgbus.logger.warn do
|
|
76
|
+
"[Pgbus::AppSignal] subscriber error: #{e.class}: #{e.message}"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# ── Job execution ───────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
def on_executor_execute(event)
|
|
83
|
+
payload = event.payload
|
|
84
|
+
transaction = ::Appsignal::Transaction.create(BACKGROUND_JOB)
|
|
85
|
+
transaction.set_action_if_nil("#{payload[:job_class] || "UnknownJob"}#perform")
|
|
86
|
+
apply_queue_start(transaction, payload[:enqueued_at])
|
|
87
|
+
transaction.add_tags(job_tags(payload))
|
|
88
|
+
transaction.add_params_if_nil { { arguments: payload[:arguments] } }
|
|
89
|
+
::Appsignal.add_distribution_value(
|
|
90
|
+
"#{METRIC_PREFIX}job_duration_ms",
|
|
91
|
+
event.duration,
|
|
92
|
+
{ queue: payload[:queue], job_class: payload[:job_class] }
|
|
93
|
+
)
|
|
94
|
+
ensure
|
|
95
|
+
::Appsignal::Transaction.complete_current!
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def on_job_completed(event)
|
|
99
|
+
payload = event.payload
|
|
100
|
+
::Appsignal.increment_counter(
|
|
101
|
+
"#{METRIC_PREFIX}queue_job_count",
|
|
102
|
+
1,
|
|
103
|
+
{ queue: payload[:queue], job_class: payload[:job_class], status: "processed" }
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def on_job_failed(event)
|
|
108
|
+
payload = event.payload
|
|
109
|
+
::Appsignal.increment_counter(
|
|
110
|
+
"#{METRIC_PREFIX}queue_job_count",
|
|
111
|
+
1,
|
|
112
|
+
{ queue: payload[:queue], job_class: payload[:job_class], status: "failed" }
|
|
113
|
+
)
|
|
114
|
+
err = payload[:exception_object]
|
|
115
|
+
::Appsignal.set_error(err) if err && ::Appsignal.respond_to?(:set_error)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def on_job_dead_lettered(event)
|
|
119
|
+
payload = event.payload
|
|
120
|
+
::Appsignal.increment_counter(
|
|
121
|
+
"#{METRIC_PREFIX}queue_job_count",
|
|
122
|
+
1,
|
|
123
|
+
{ queue: payload[:queue], job_class: payload[:job_class], status: "dead_lettered" }
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# ── Event handler ───────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
def on_event_processed(event)
|
|
130
|
+
payload = event.payload
|
|
131
|
+
transaction = ::Appsignal::Transaction.create(BACKGROUND_JOB)
|
|
132
|
+
transaction.set_action_if_nil("#{payload[:handler] || "UnknownHandler"}#handle")
|
|
133
|
+
apply_queue_start(transaction, payload[:published_at])
|
|
134
|
+
transaction.add_tags(handler_tags(payload))
|
|
135
|
+
::Appsignal.add_distribution_value(
|
|
136
|
+
"#{METRIC_PREFIX}event_duration_ms",
|
|
137
|
+
event.duration,
|
|
138
|
+
{ handler: payload[:handler], routing_key: payload[:routing_key] }
|
|
139
|
+
)
|
|
140
|
+
::Appsignal.increment_counter(
|
|
141
|
+
"#{METRIC_PREFIX}event_count",
|
|
142
|
+
1,
|
|
143
|
+
{ handler: payload[:handler], routing_key: payload[:routing_key], status: "processed" }
|
|
144
|
+
)
|
|
145
|
+
ensure
|
|
146
|
+
::Appsignal::Transaction.complete_current!
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def on_event_failed(event)
|
|
150
|
+
payload = event.payload
|
|
151
|
+
::Appsignal.increment_counter(
|
|
152
|
+
"#{METRIC_PREFIX}event_count",
|
|
153
|
+
1,
|
|
154
|
+
{ handler: payload[:handler], routing_key: payload[:routing_key], status: "failed" }
|
|
155
|
+
)
|
|
156
|
+
err = payload[:exception_object]
|
|
157
|
+
::Appsignal.set_error(err) if err && ::Appsignal.respond_to?(:set_error)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# ── Client (PGMQ wrapper) ───────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
def on_send_message(event)
|
|
163
|
+
payload = event.payload
|
|
164
|
+
::Appsignal.increment_counter(
|
|
165
|
+
"#{METRIC_PREFIX}messages_sent",
|
|
166
|
+
1,
|
|
167
|
+
{ queue: payload[:queue] }
|
|
168
|
+
)
|
|
169
|
+
::Appsignal.add_distribution_value(
|
|
170
|
+
"#{METRIC_PREFIX}send_duration_ms",
|
|
171
|
+
event.duration,
|
|
172
|
+
{ queue: payload[:queue] }
|
|
173
|
+
)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def on_send_batch(event)
|
|
177
|
+
payload = event.payload
|
|
178
|
+
count = payload[:count] || payload[:batch_size] || 1
|
|
179
|
+
::Appsignal.increment_counter(
|
|
180
|
+
"#{METRIC_PREFIX}messages_sent",
|
|
181
|
+
count,
|
|
182
|
+
{ queue: payload[:queue] }
|
|
183
|
+
)
|
|
184
|
+
::Appsignal.add_distribution_value(
|
|
185
|
+
"#{METRIC_PREFIX}send_batch_duration_ms",
|
|
186
|
+
event.duration,
|
|
187
|
+
{ queue: payload[:queue] }
|
|
188
|
+
)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def on_read_batch(event)
|
|
192
|
+
payload = event.payload
|
|
193
|
+
count = payload[:count] || payload[:fetched] || 0
|
|
194
|
+
::Appsignal.increment_counter(
|
|
195
|
+
"#{METRIC_PREFIX}messages_read",
|
|
196
|
+
count,
|
|
197
|
+
{ queue: payload[:queue] }
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# ── Streams ─────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
def on_stream_broadcast(event)
|
|
204
|
+
payload = event.payload
|
|
205
|
+
::Appsignal.increment_counter(
|
|
206
|
+
"#{METRIC_PREFIX}stream_broadcast_count",
|
|
207
|
+
1,
|
|
208
|
+
{ stream: payload[:stream], deferred: payload[:deferred] ? "true" : "false" }
|
|
209
|
+
)
|
|
210
|
+
return unless payload[:bytes]
|
|
211
|
+
|
|
212
|
+
::Appsignal.add_distribution_value(
|
|
213
|
+
"#{METRIC_PREFIX}stream_broadcast_bytes",
|
|
214
|
+
payload[:bytes],
|
|
215
|
+
{ stream: payload[:stream] }
|
|
216
|
+
)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# ── Outbox ──────────────────────────────────────────────────────
|
|
220
|
+
|
|
221
|
+
def on_outbox_publish(event)
|
|
222
|
+
payload = event.payload
|
|
223
|
+
::Appsignal.increment_counter(
|
|
224
|
+
"#{METRIC_PREFIX}outbox_published",
|
|
225
|
+
1,
|
|
226
|
+
{ kind: payload[:kind] || "job" }
|
|
227
|
+
)
|
|
228
|
+
::Appsignal.add_distribution_value(
|
|
229
|
+
"#{METRIC_PREFIX}outbox_publish_duration_ms",
|
|
230
|
+
event.duration,
|
|
231
|
+
{ kind: payload[:kind] || "job" }
|
|
232
|
+
)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# ── Recurring scheduler ─────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
def on_recurring_enqueue(event)
|
|
238
|
+
payload = event.payload
|
|
239
|
+
::Appsignal.increment_counter(
|
|
240
|
+
"#{METRIC_PREFIX}recurring_enqueued",
|
|
241
|
+
1,
|
|
242
|
+
{ task: payload[:task], class_name: payload[:class_name] }
|
|
243
|
+
)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# ── Worker lifecycle ────────────────────────────────────────────
|
|
247
|
+
|
|
248
|
+
def on_worker_recycle(event)
|
|
249
|
+
payload = event.payload
|
|
250
|
+
::Appsignal.increment_counter(
|
|
251
|
+
"#{METRIC_PREFIX}worker_recycled",
|
|
252
|
+
1,
|
|
253
|
+
{ reason: payload[:reason] }
|
|
254
|
+
)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# ── Helpers ─────────────────────────────────────────────────────
|
|
258
|
+
|
|
259
|
+
# AppSignal expects queue-start as Unix epoch milliseconds. Pgbus
|
|
260
|
+
# carries it as either an ISO-8601 String or a Time — both happen
|
|
261
|
+
# in practice (executor passes the JSON string, handler passes a
|
|
262
|
+
# parsed Time).
|
|
263
|
+
def apply_queue_start(transaction, value)
|
|
264
|
+
return unless value
|
|
265
|
+
|
|
266
|
+
millis =
|
|
267
|
+
case value
|
|
268
|
+
when Time
|
|
269
|
+
(value.to_f * 1_000).to_i
|
|
270
|
+
when String
|
|
271
|
+
(Time.parse(value).to_f * 1_000).to_i
|
|
272
|
+
when Numeric
|
|
273
|
+
value.to_i
|
|
274
|
+
end
|
|
275
|
+
transaction.set_queue_start(millis) if millis
|
|
276
|
+
rescue ArgumentError
|
|
277
|
+
# Unparseable timestamp — skip rather than blow up.
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def job_tags(payload)
|
|
281
|
+
tags = {
|
|
282
|
+
"queue" => payload[:queue],
|
|
283
|
+
"job_class" => payload[:job_class],
|
|
284
|
+
"attempts" => payload[:read_ct]
|
|
285
|
+
}
|
|
286
|
+
tags["active_job_id"] = payload[:job_id] if payload[:job_id]
|
|
287
|
+
tags["provider_job_id"] = payload[:provider_job_id] if payload[:provider_job_id]
|
|
288
|
+
tags["request_id"] = payload[:provider_job_id] || payload[:job_id]
|
|
289
|
+
tags.compact
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def handler_tags(payload)
|
|
293
|
+
{
|
|
294
|
+
"handler" => payload[:handler],
|
|
295
|
+
"routing_key" => payload[:routing_key],
|
|
296
|
+
"attempts" => payload[:read_ct]
|
|
297
|
+
}.compact
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pgbus/integrations/appsignal/subscriber"
|
|
4
|
+
require "pgbus/integrations/appsignal/probe"
|
|
5
|
+
|
|
6
|
+
module Pgbus
|
|
7
|
+
module Integrations
|
|
8
|
+
# AppSignal integration for pgbus.
|
|
9
|
+
#
|
|
10
|
+
# Loaded automatically by Pgbus::Engine when the appsignal gem is present
|
|
11
|
+
# and config.appsignal_enabled is true (default). To opt out:
|
|
12
|
+
#
|
|
13
|
+
# Pgbus.configure do |c|
|
|
14
|
+
# c.appsignal_enabled = false
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# The integration:
|
|
18
|
+
# * Subscribes to pgbus.* ActiveSupport::Notifications and translates
|
|
19
|
+
# them into AppSignal background-job transactions and metrics.
|
|
20
|
+
# * Registers a minutely probe that reports queue depth, DLQ size,
|
|
21
|
+
# dead-tuple counts, MVCC horizon age, and stream stats from
|
|
22
|
+
# Pgbus::Web::DataSource.
|
|
23
|
+
#
|
|
24
|
+
# All metric names are prefixed `pgbus_` so they group cleanly in
|
|
25
|
+
# AppSignal's custom-metrics view.
|
|
26
|
+
module Appsignal
|
|
27
|
+
module_function
|
|
28
|
+
|
|
29
|
+
def install! # rubocop:disable Naming/PredicateMethod
|
|
30
|
+
return false unless defined?(::Appsignal)
|
|
31
|
+
return false if @installed
|
|
32
|
+
|
|
33
|
+
Subscriber.install!
|
|
34
|
+
Probe.install! if Pgbus.configuration.appsignal_probe_enabled
|
|
35
|
+
@installed = true
|
|
36
|
+
Pgbus.logger.info { "[Pgbus] AppSignal integration installed" }
|
|
37
|
+
true
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def installed?
|
|
41
|
+
@installed == true
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Test hook: tear everything down so a fresh install! can run.
|
|
45
|
+
def reset!
|
|
46
|
+
Subscriber.reset! if defined?(Subscriber)
|
|
47
|
+
Probe.reset! if defined?(Probe)
|
|
48
|
+
@installed = false
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
data/lib/pgbus/outbox.rb
CHANGED
|
@@ -5,22 +5,26 @@ module Pgbus
|
|
|
5
5
|
module_function
|
|
6
6
|
|
|
7
7
|
def publish(queue_name, payload, headers: nil, priority: nil, delay: 0)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
Instrumentation.instrument("pgbus.outbox.publish", queue: queue_name, kind: :job) do
|
|
9
|
+
OutboxEntry.create!(
|
|
10
|
+
queue_name: queue_name,
|
|
11
|
+
payload: payload,
|
|
12
|
+
headers: headers,
|
|
13
|
+
priority: priority || Pgbus.configuration.default_priority,
|
|
14
|
+
delay: delay
|
|
15
|
+
)
|
|
16
|
+
end
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def publish_event(routing_key, payload, headers: nil)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
Instrumentation.instrument("pgbus.outbox.publish", routing_key: routing_key, kind: :event) do
|
|
21
|
+
event_data = EventBus::Publisher.build_event_data(payload)
|
|
22
|
+
OutboxEntry.create!(
|
|
23
|
+
routing_key: routing_key,
|
|
24
|
+
payload: event_data,
|
|
25
|
+
headers: headers
|
|
26
|
+
)
|
|
27
|
+
end
|
|
24
28
|
end
|
|
25
29
|
|
|
26
30
|
def flush!
|
|
@@ -15,6 +15,7 @@ module Pgbus
|
|
|
15
15
|
OUTBOX_CLEANUP_INTERVAL = 3600 # Run outbox cleanup every hour
|
|
16
16
|
JOB_LOCK_CLEANUP_INTERVAL = 300 # Run job lock cleanup every 5 minutes
|
|
17
17
|
STATS_CLEANUP_INTERVAL = 3600 # Run stats cleanup every hour
|
|
18
|
+
ORPHAN_STREAM_SWEEP_INTERVAL = 3600 # Run orphan stream sweep every hour
|
|
18
19
|
TABLE_MAINTENANCE_INTERVAL = Pgbus::TableMaintenance::MAINTENANCE_INTERVAL
|
|
19
20
|
|
|
20
21
|
# Page size for archive compaction. Each cycle deletes up to this
|
|
@@ -38,6 +39,7 @@ module Pgbus
|
|
|
38
39
|
@last_outbox_cleanup_at = monotonic_now
|
|
39
40
|
@last_job_lock_cleanup_at = monotonic_now
|
|
40
41
|
@last_stats_cleanup_at = monotonic_now
|
|
42
|
+
@last_orphan_stream_sweep_at = monotonic_now
|
|
41
43
|
@last_table_maintenance_at = monotonic_now
|
|
42
44
|
end
|
|
43
45
|
|
|
@@ -86,6 +88,8 @@ module Pgbus
|
|
|
86
88
|
run_if_due(now, :@last_outbox_cleanup_at, OUTBOX_CLEANUP_INTERVAL) { cleanup_outbox }
|
|
87
89
|
run_if_due(now, :@last_job_lock_cleanup_at, JOB_LOCK_CLEANUP_INTERVAL) { cleanup_job_locks }
|
|
88
90
|
run_if_due(now, :@last_stats_cleanup_at, STATS_CLEANUP_INTERVAL) { cleanup_stats }
|
|
91
|
+
sweep_interval = config.streams_orphan_sweep_interval
|
|
92
|
+
run_if_due(now, :@last_orphan_stream_sweep_at, sweep_interval) { sweep_orphan_streams } if sweep_interval
|
|
89
93
|
run_if_due(now, :@last_table_maintenance_at, TABLE_MAINTENANCE_INTERVAL) { run_table_maintenance }
|
|
90
94
|
end
|
|
91
95
|
|
|
@@ -315,6 +319,40 @@ module Pgbus
|
|
|
315
319
|
config.streams_default_retention.to_f
|
|
316
320
|
end
|
|
317
321
|
|
|
322
|
+
def sweep_orphan_streams
|
|
323
|
+
prefix = config.streams_queue_prefix
|
|
324
|
+
return if prefix.nil? || prefix.empty?
|
|
325
|
+
|
|
326
|
+
threshold = config.streams_orphan_threshold
|
|
327
|
+
return unless threshold
|
|
328
|
+
|
|
329
|
+
conn = config.connects_to ? Pgbus::BusRecord.connection : ActiveRecord::Base.connection
|
|
330
|
+
queue_names = conn.select_values("SELECT queue_name FROM pgmq.meta ORDER BY queue_name")
|
|
331
|
+
|
|
332
|
+
dropped = 0
|
|
333
|
+
queue_names.each do |full_name|
|
|
334
|
+
next unless full_name.start_with?("#{prefix}_")
|
|
335
|
+
|
|
336
|
+
row = conn.select_one(<<~SQL, "Pgbus Orphan Check")
|
|
337
|
+
SELECT count(*) AS queue_length
|
|
338
|
+
FROM pgmq.q_#{QueueNameValidator.sanitize!(full_name)}
|
|
339
|
+
SQL
|
|
340
|
+
|
|
341
|
+
next unless row
|
|
342
|
+
next if row["queue_length"].to_i.positive?
|
|
343
|
+
|
|
344
|
+
Pgbus.client.drop_queue(full_name, prefixed: false)
|
|
345
|
+
dropped += 1
|
|
346
|
+
Pgbus.logger.info { "[Pgbus] Dropped orphan stream queue: #{full_name}" }
|
|
347
|
+
rescue StandardError => e
|
|
348
|
+
Pgbus.logger.warn { "[Pgbus] Orphan stream sweep failed for #{full_name}: #{e.message}" }
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
Pgbus.logger.debug { "[Pgbus] Orphan stream sweep complete: dropped #{dropped} queue(s)" } if dropped.positive?
|
|
352
|
+
rescue StandardError => e
|
|
353
|
+
Pgbus.logger.warn { "[Pgbus] Orphan stream sweep failed: #{e.message}" }
|
|
354
|
+
end
|
|
355
|
+
|
|
318
356
|
def cleanup_recurring_executions
|
|
319
357
|
retention = config.recurring_execution_retention
|
|
320
358
|
return unless retention&.positive?
|
data/lib/pgbus/process/worker.rb
CHANGED
|
@@ -302,15 +302,33 @@ module Pgbus
|
|
|
302
302
|
end
|
|
303
303
|
|
|
304
304
|
def check_recycle
|
|
305
|
-
return unless @lifecycle.running?
|
|
305
|
+
return unless @lifecycle.running?
|
|
306
|
+
|
|
307
|
+
reason = recycle_reason
|
|
308
|
+
return unless reason
|
|
306
309
|
|
|
307
310
|
Pgbus.stopping = true
|
|
308
311
|
@lifecycle.transition_to(:draining)
|
|
312
|
+
Pgbus::Instrumentation.instrument(
|
|
313
|
+
"pgbus.worker.recycle",
|
|
314
|
+
reason: reason,
|
|
315
|
+
jobs_processed: @jobs_processed.value,
|
|
316
|
+
memory_mb: current_memory_mb,
|
|
317
|
+
lifetime_seconds: monotonic_now - @started_at_monotonic
|
|
318
|
+
)
|
|
309
319
|
@wake_signal.notify!
|
|
310
320
|
end
|
|
311
321
|
|
|
322
|
+
def recycle_reason
|
|
323
|
+
return :max_jobs if exceeded_max_jobs?
|
|
324
|
+
return :max_memory if exceeded_max_memory?
|
|
325
|
+
return :max_lifetime if exceeded_max_lifetime?
|
|
326
|
+
|
|
327
|
+
nil
|
|
328
|
+
end
|
|
329
|
+
|
|
312
330
|
def recycle_needed?
|
|
313
|
-
|
|
331
|
+
!recycle_reason.nil?
|
|
314
332
|
end
|
|
315
333
|
|
|
316
334
|
def exceeded_max_jobs?
|
|
@@ -41,8 +41,16 @@ module Pgbus
|
|
|
41
41
|
|
|
42
42
|
def tick(now)
|
|
43
43
|
schedule.due_tasks(now).each do |task, run_at|
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
Pgbus::Instrumentation.instrument(
|
|
45
|
+
"pgbus.recurring.enqueue",
|
|
46
|
+
task: task.key,
|
|
47
|
+
class_name: task.class_name,
|
|
48
|
+
queue: task.queue_name,
|
|
49
|
+
run_at: run_at
|
|
50
|
+
) do
|
|
51
|
+
schedule.enqueue_task(task, run_at: run_at)
|
|
52
|
+
@last_runs[task.key] = now
|
|
53
|
+
end
|
|
46
54
|
rescue StandardError => e
|
|
47
55
|
Pgbus.logger.error do
|
|
48
56
|
"[Pgbus] Error scheduling recurring task #{task.key}: #{e.class}: #{e.message}"
|
|
@@ -36,7 +36,8 @@ module Pgbus
|
|
|
36
36
|
module TurboBroadcastable
|
|
37
37
|
def broadcast_stream_to(*streamables, content:)
|
|
38
38
|
name = stream_name_from(streamables)
|
|
39
|
-
Pgbus.
|
|
39
|
+
mode = Pgbus.configuration.streams_default_broadcast_mode
|
|
40
|
+
Pgbus.stream(name, durable: mode == :durable).broadcast(content)
|
|
40
41
|
end
|
|
41
42
|
end
|
|
42
43
|
|