pgbus 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +360 -331
- data/app/controllers/pgbus/dead_letter_controller.rb +3 -7
- data/app/frontend/pgbus/style.css +1 -1
- data/app/frontend/pgbus/tailwind.css +28 -1
- data/app/views/layouts/pgbus/application.html.erb +58 -12
- data/app/views/pgbus/dead_letter/_messages_table.html.erb +3 -5
- data/app/views/pgbus/insights/show.html.erb +6 -6
- data/app/views/pgbus/jobs/_enqueued_table.html.erb +2 -3
- data/lib/generators/pgbus/templates/pgbus.yml.erb +5 -3
- data/lib/generators/pgbus/update_generator.rb +75 -0
- data/lib/pgbus/circuit_breaker.rb +17 -3
- data/lib/pgbus/cli.rb +95 -3
- data/lib/pgbus/client.rb +91 -3
- data/lib/pgbus/configuration/capsule_dsl.rb +190 -0
- data/lib/pgbus/configuration.rb +305 -25
- data/lib/pgbus/failed_event_recorder.rb +15 -2
- data/lib/pgbus/generators/config_converter.rb +323 -0
- data/lib/pgbus/process/dispatcher.rb +42 -20
- data/lib/pgbus/process/supervisor.rb +11 -16
- data/lib/pgbus/process/worker.rb +20 -2
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +50 -15
- data/lib/pgbus.rb +13 -1
- metadata +4 -1
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Generators
|
|
7
|
+
# Converts a +config/pgbus.yml+ file into a Ruby initializer
|
|
8
|
+
# (+config/initializers/pgbus.rb+) using the Ruby DSL added in
|
|
9
|
+
# Pgbus 0.5+ — capsule string DSL, ActiveSupport::Duration coercion,
|
|
10
|
+
# auto-tuned pool size, named capsules, etc.
|
|
11
|
+
#
|
|
12
|
+
# Drops settings that:
|
|
13
|
+
# - match the gem default (no point restating it)
|
|
14
|
+
# - are deprecated (e.g. pool_size, which is now auto-tuned)
|
|
15
|
+
#
|
|
16
|
+
# Converts seconds to durations when they evenly divide into a clean
|
|
17
|
+
# unit (7 days, 30 days, 10 minutes). Falls back to the raw integer
|
|
18
|
+
# otherwise.
|
|
19
|
+
#
|
|
20
|
+
# When the YAML has multiple environments with different values for
|
|
21
|
+
# the same setting, emits Rails.env-aware code:
|
|
22
|
+
#
|
|
23
|
+
# - 2 envs with same value → unconditional line
|
|
24
|
+
# - 2 envs, one differs → `unless Rails.env.X?` modifier
|
|
25
|
+
# - 3+ envs with differences → `case Rails.env when ... end`
|
|
26
|
+
#
|
|
27
|
+
# Backwards compatible: the original YAML file is NOT touched. The
|
|
28
|
+
# generator's CLI wrapper writes the new initializer and tells the
|
|
29
|
+
# user to delete the YAML when ready.
|
|
30
|
+
class ConfigConverter
|
|
31
|
+
class Error < StandardError; end
|
|
32
|
+
|
|
33
|
+
# Setters that accept ActiveSupport::Duration (PR 5).
|
|
34
|
+
DURATION_SETTINGS = %w[
|
|
35
|
+
visibility_timeout archive_retention idempotency_ttl
|
|
36
|
+
outbox_retention stats_retention recurring_execution_retention
|
|
37
|
+
].freeze
|
|
38
|
+
|
|
39
|
+
# Settings that no longer exist in the public API. The converter
|
|
40
|
+
# silently drops these from the generated initializer so users on
|
|
41
|
+
# legacy YAML get a clean migration.
|
|
42
|
+
#
|
|
43
|
+
# - pool_size -> auto-tuned from worker thread counts
|
|
44
|
+
# - notify_throttle_ms -> Pgbus::Client::NOTIFY_THROTTLE_MS
|
|
45
|
+
# - circuit_breaker_* -> Pgbus::CircuitBreaker constants
|
|
46
|
+
# - archive_compaction_* -> Pgbus::Process::Dispatcher constants
|
|
47
|
+
# - dead_letter_queue_suffix -> Pgbus::DEAD_LETTER_SUFFIX (frozen)
|
|
48
|
+
DEPRECATED_SETTINGS = %w[
|
|
49
|
+
pool_size
|
|
50
|
+
notify_throttle_ms
|
|
51
|
+
circuit_breaker_threshold circuit_breaker_base_backoff circuit_breaker_max_backoff
|
|
52
|
+
archive_compaction_interval archive_compaction_batch_size
|
|
53
|
+
dead_letter_queue_suffix
|
|
54
|
+
].freeze
|
|
55
|
+
|
|
56
|
+
# Settings whose default we know how to compute by inspecting
|
|
57
|
+
# Pgbus::Configuration.new. Any setting not listed here is emitted
|
|
58
|
+
# as-is (we can't tell if it matches the default).
|
|
59
|
+
KNOWN_SETTINGS = %w[
|
|
60
|
+
queue_prefix default_queue pool_timeout listen_notify
|
|
61
|
+
visibility_timeout max_retries idempotency_ttl
|
|
62
|
+
max_jobs_per_worker max_memory_mb max_worker_lifetime
|
|
63
|
+
dispatch_interval prefetch_limit
|
|
64
|
+
circuit_breaker_enabled
|
|
65
|
+
archive_retention
|
|
66
|
+
outbox_enabled outbox_poll_interval outbox_batch_size outbox_retention
|
|
67
|
+
stats_enabled stats_retention
|
|
68
|
+
recurring_schedule_interval recurring_execution_retention skip_recurring
|
|
69
|
+
polling_interval default_priority priority_levels
|
|
70
|
+
return_to_app_url workers
|
|
71
|
+
].freeze
|
|
72
|
+
|
|
73
|
+
def self.from_yaml(path)
|
|
74
|
+
raise Error, "config file not found: #{path}" unless File.exist?(path)
|
|
75
|
+
|
|
76
|
+
parsed = YAML.safe_load_file(path, aliases: true, permitted_classes: [Symbol])
|
|
77
|
+
from_hash(parsed)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.from_hash(envs_hash)
|
|
81
|
+
new(envs_hash).render
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def initialize(envs_hash)
|
|
85
|
+
@envs = (envs_hash || {}).reject { |env, _| env.start_with?("default") }
|
|
86
|
+
@envs = { "production" => envs_hash } if @envs.empty? && envs_hash
|
|
87
|
+
@defaults = build_defaults
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def render
|
|
91
|
+
lines = []
|
|
92
|
+
lines << "# frozen_string_literal: true"
|
|
93
|
+
lines << "#"
|
|
94
|
+
lines << "# Generated by `rails generate pgbus:update` from config/pgbus.yml."
|
|
95
|
+
lines << "# Review and adjust as needed, then delete config/pgbus.yml."
|
|
96
|
+
lines << ""
|
|
97
|
+
lines << "Pgbus.configure do |c|"
|
|
98
|
+
|
|
99
|
+
body = render_body
|
|
100
|
+
body.each { |line| lines << " #{line}" }
|
|
101
|
+
|
|
102
|
+
lines << "end"
|
|
103
|
+
"#{lines.join("\n")}\n"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
def build_defaults
|
|
109
|
+
config = Pgbus::Configuration.new
|
|
110
|
+
KNOWN_SETTINGS.each_with_object({}) do |key, h|
|
|
111
|
+
h[key] = config.public_send(key) if config.respond_to?(key)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def render_body
|
|
116
|
+
all_settings = collect_all_settings
|
|
117
|
+
return [] if all_settings.empty?
|
|
118
|
+
|
|
119
|
+
constant_settings, varying_settings = partition_by_variance(all_settings)
|
|
120
|
+
special_keys = %w[workers]
|
|
121
|
+
|
|
122
|
+
lines = []
|
|
123
|
+
# Special: workers comes first (most user-visible)
|
|
124
|
+
special_keys.each do |key|
|
|
125
|
+
rendered = render_workers(constant_settings[key], varying_settings[key])
|
|
126
|
+
lines.concat(rendered) if rendered
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
constant_settings.each do |key, value|
|
|
130
|
+
next if special_keys.include?(key)
|
|
131
|
+
next if drop?(key, value)
|
|
132
|
+
|
|
133
|
+
lines << render_setting(key, value)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
varying_settings.each do |key, env_values|
|
|
137
|
+
next if special_keys.include?(key)
|
|
138
|
+
next if env_values.values.all? { |v| drop?(key, v) }
|
|
139
|
+
|
|
140
|
+
lines.concat(render_varying_setting(key, env_values))
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
lines
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def collect_all_settings
|
|
147
|
+
all_keys = @envs.values.flat_map { |env_settings| env_settings&.keys || [] }.uniq
|
|
148
|
+
all_keys.to_h do |key|
|
|
149
|
+
[key, @envs.transform_values { |env_settings| env_settings&.fetch(key, :__missing__) }]
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Returns [constant_settings, varying_settings].
|
|
154
|
+
# constant_settings: { "key" => value } (same value across all envs)
|
|
155
|
+
# varying_settings: { "key" => { env => value, ... } }
|
|
156
|
+
def partition_by_variance(all_settings)
|
|
157
|
+
constant = {}
|
|
158
|
+
varying = {}
|
|
159
|
+
all_settings.each do |key, env_values|
|
|
160
|
+
present_values = env_values.reject { |_, v| v == :__missing__ }
|
|
161
|
+
unique_values = present_values.values.uniq
|
|
162
|
+
if unique_values.size <= 1
|
|
163
|
+
constant[key] = unique_values.first
|
|
164
|
+
else
|
|
165
|
+
varying[key] = present_values
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
[constant, varying]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def drop?(key, value)
|
|
172
|
+
return true if DEPRECATED_SETTINGS.include?(key)
|
|
173
|
+
return true if value == :__missing__
|
|
174
|
+
return true if @defaults.key?(key) && @defaults[key] == value
|
|
175
|
+
|
|
176
|
+
false
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def render_setting(key, value)
|
|
180
|
+
"c.#{key} = #{render_value(key, value)}"
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def render_varying_setting(key, env_values)
|
|
184
|
+
envs = env_values.keys
|
|
185
|
+
if envs.size == 2 && envs.include?("development")
|
|
186
|
+
# Special case: "everything except dev" — common pattern
|
|
187
|
+
non_dev_value = env_values.except("development").values.first
|
|
188
|
+
dev_value = env_values["development"]
|
|
189
|
+
return ["c.#{key} = #{render_value(key, non_dev_value)} unless Rails.env.development?"] if dev_value.nil? && non_dev_value
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Fallback: case Rails.env block — `when` clauses indented one
|
|
193
|
+
# level inside the case, `end` flush with `c.X` (standard Ruby
|
|
194
|
+
# formatting for assigned case expressions).
|
|
195
|
+
lines = ["c.#{key} = case Rails.env"]
|
|
196
|
+
env_values.each do |env, value|
|
|
197
|
+
lines << " when \"#{env}\" then #{render_value(key, value)}"
|
|
198
|
+
end
|
|
199
|
+
lines << "end"
|
|
200
|
+
lines
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def render_workers(constant_workers, varying_workers)
|
|
204
|
+
if constant_workers && !varying_workers
|
|
205
|
+
rendered = render_workers_value(constant_workers)
|
|
206
|
+
return rendered unless rendered.nil? || rendered.empty?
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
if varying_workers
|
|
210
|
+
# Different worker config per env. when clauses indented one
|
|
211
|
+
# level inside the case, end flush with c.workers (standard
|
|
212
|
+
# Ruby formatting for assigned case expressions).
|
|
213
|
+
lines = ["c.workers = case Rails.env"]
|
|
214
|
+
varying_workers.each do |env, workers|
|
|
215
|
+
string_form = workers_as_string(workers)
|
|
216
|
+
value = string_form ? string_form.inspect : workers.inspect
|
|
217
|
+
lines << " when \"#{env}\" then #{value}"
|
|
218
|
+
end
|
|
219
|
+
lines << "end"
|
|
220
|
+
return lines
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
nil
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def render_workers_value(workers)
|
|
227
|
+
return nil if workers.nil? || workers == :__missing__
|
|
228
|
+
|
|
229
|
+
# Drop if matches the gem default ([{queues: %w[default], threads: 5}]).
|
|
230
|
+
# Compare via normalized form (string keys both sides) so YAML's
|
|
231
|
+
# string-keyed hashes match the symbol-keyed default.
|
|
232
|
+
return nil if normalize_workers(workers) == normalize_workers(@defaults["workers"])
|
|
233
|
+
|
|
234
|
+
if workers_simple?(workers)
|
|
235
|
+
string_form = workers_as_string(workers)
|
|
236
|
+
["c.workers = #{string_form.inspect}"]
|
|
237
|
+
else
|
|
238
|
+
workers.map.with_index do |capsule, idx|
|
|
239
|
+
render_capsule_call(capsule, idx)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Normalize a workers array to symbol-keyed hashes with array-of-string
|
|
245
|
+
# queues for stable comparison. Doesn't mutate input.
|
|
246
|
+
def normalize_workers(workers)
|
|
247
|
+
return nil if workers.nil?
|
|
248
|
+
|
|
249
|
+
workers.map do |capsule|
|
|
250
|
+
{
|
|
251
|
+
queues: Array(capsule[:queues] || capsule["queues"]).map(&:to_s),
|
|
252
|
+
threads: (capsule[:threads] || capsule["threads"] || 5).to_i
|
|
253
|
+
}
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def workers_simple?(workers)
|
|
258
|
+
workers.all? do |capsule|
|
|
259
|
+
extras = capsule.keys.map(&:to_s) - %w[queues threads]
|
|
260
|
+
extras.empty?
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def workers_as_string(workers)
|
|
265
|
+
workers.map do |capsule|
|
|
266
|
+
queues = (capsule["queues"] || capsule[:queues]).join(", ")
|
|
267
|
+
threads = capsule["threads"] || capsule[:threads] || 5
|
|
268
|
+
"#{queues}: #{threads}"
|
|
269
|
+
end.join("; ")
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def render_capsule_call(capsule, _idx)
|
|
273
|
+
queues = capsule["queues"] || capsule[:queues]
|
|
274
|
+
threads = capsule["threads"] || capsule[:threads] || 5
|
|
275
|
+
name = capsule["name"] || capsule[:name] || queues.first
|
|
276
|
+
opts = capsule.reject { |k, _| %w[queues threads name].include?(k.to_s) }
|
|
277
|
+
|
|
278
|
+
parts = ["queues: #{queues.inspect}", "threads: #{threads}"]
|
|
279
|
+
opts.each { |k, v| parts << "#{k}: #{v.inspect}" }
|
|
280
|
+
|
|
281
|
+
"c.capsule :#{name}, #{parts.join(", ")}"
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def render_value(key, value)
|
|
285
|
+
return "nil" if value.nil?
|
|
286
|
+
|
|
287
|
+
if DURATION_SETTINGS.include?(key) && value.is_a?(Integer) && value.positive?
|
|
288
|
+
duration_form = format_duration(value)
|
|
289
|
+
return duration_form if duration_form
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
if value.is_a?(Integer) && value >= 1000
|
|
293
|
+
format_integer_with_underscores(value)
|
|
294
|
+
else
|
|
295
|
+
value.inspect
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def format_duration(seconds)
|
|
300
|
+
units = [
|
|
301
|
+
[86_400, "day", "days"],
|
|
302
|
+
[3600, "hour", "hours"],
|
|
303
|
+
[60, "minute", "minutes"],
|
|
304
|
+
[1, "second", "seconds"]
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
units.each do |unit_seconds, singular, plural|
|
|
308
|
+
next unless (seconds % unit_seconds).zero?
|
|
309
|
+
|
|
310
|
+
count = seconds / unit_seconds
|
|
311
|
+
unit_name = count == 1 ? singular : plural
|
|
312
|
+
return "#{count}.#{unit_name}"
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
nil
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def format_integer_with_underscores(int)
|
|
319
|
+
int.to_s.reverse.scan(/\d{1,3}/).join("_").reverse
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
end
|
|
@@ -16,6 +16,12 @@ module Pgbus
|
|
|
16
16
|
JOB_LOCK_CLEANUP_INTERVAL = 300 # Run job lock cleanup every 5 minutes
|
|
17
17
|
STATS_CLEANUP_INTERVAL = 3600 # Run stats cleanup every hour
|
|
18
18
|
|
|
19
|
+
# Page size for archive compaction. Each cycle deletes up to this
|
|
20
|
+
# many archived rows per queue. Tuned via constant rather than
|
|
21
|
+
# configuration because the value rarely needs adjusting and a
|
|
22
|
+
# too-small value just delays cleanup, never breaks anything.
|
|
23
|
+
ARCHIVE_COMPACTION_BATCH_SIZE = 1000
|
|
24
|
+
|
|
19
25
|
attr_reader :config
|
|
20
26
|
|
|
21
27
|
def initialize(config: Pgbus.configuration)
|
|
@@ -72,7 +78,7 @@ module Pgbus
|
|
|
72
78
|
run_if_due(now, :@last_concurrency_at, CONCURRENCY_INTERVAL) { cleanup_concurrency }
|
|
73
79
|
run_if_due(now, :@last_batch_cleanup_at, BATCH_CLEANUP_INTERVAL) { cleanup_batches }
|
|
74
80
|
run_if_due(now, :@last_recurring_cleanup_at, RECURRING_CLEANUP_INTERVAL) { cleanup_recurring_executions }
|
|
75
|
-
run_if_due(now, :@last_archive_compaction_at,
|
|
81
|
+
run_if_due(now, :@last_archive_compaction_at, ARCHIVE_COMPACTION_INTERVAL) { compact_archives }
|
|
76
82
|
run_if_due(now, :@last_outbox_cleanup_at, OUTBOX_CLEANUP_INTERVAL) { cleanup_outbox }
|
|
77
83
|
run_if_due(now, :@last_job_lock_cleanup_at, JOB_LOCK_CLEANUP_INTERVAL) { cleanup_job_locks }
|
|
78
84
|
run_if_due(now, :@last_stats_cleanup_at, STATS_CLEANUP_INTERVAL) { cleanup_stats }
|
|
@@ -144,9 +150,11 @@ module Pgbus
|
|
|
144
150
|
end
|
|
145
151
|
|
|
146
152
|
def cleanup_job_locks
|
|
147
|
-
# Clean up orphaned uniqueness keys whose
|
|
148
|
-
# in
|
|
149
|
-
#
|
|
153
|
+
# Clean up truly orphaned uniqueness keys: rows whose referenced
|
|
154
|
+
# message no longer exists in the PGMQ queue. This handles crashes
|
|
155
|
+
# or queue truncation. It must NEVER delete a lock while the message
|
|
156
|
+
# is still in the queue, even if the lock is "old" — recurring jobs
|
|
157
|
+
# that fail and retry can hold locks for hours.
|
|
150
158
|
reaped = reap_orphaned_uniqueness_keys
|
|
151
159
|
Pgbus.logger.info { "[Pgbus] Reaped #{reaped} orphaned uniqueness keys" } if reaped.positive?
|
|
152
160
|
end
|
|
@@ -155,20 +163,17 @@ module Pgbus
|
|
|
155
163
|
keys = UniquenessKey.all.to_a
|
|
156
164
|
return 0 if keys.empty?
|
|
157
165
|
|
|
166
|
+
# Only consider locks that are old enough that we wouldn't be racing
|
|
167
|
+
# an in-flight enqueue. visibility_timeout * 2 is the floor — anything
|
|
168
|
+
# younger could be a freshly-acquired lock whose send_message hasn't
|
|
169
|
+
# committed yet.
|
|
158
170
|
threshold = Time.current - (config.visibility_timeout * 2)
|
|
159
171
|
|
|
160
172
|
orphaned = keys.select do |key|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
next true if key.msg_id.zero? && key.created_at && key.created_at < threshold
|
|
166
|
-
|
|
167
|
-
# For real msg_ids, only reap if stale (old enough that VT has
|
|
168
|
-
# long expired). The message itself may still be in the queue
|
|
169
|
-
# awaiting retry — age is the only safe signal without scanning
|
|
170
|
-
# every queue table.
|
|
171
|
-
key.created_at && key.created_at < threshold
|
|
173
|
+
next false unless key.created_at && key.created_at < threshold
|
|
174
|
+
next false unless key.queue_name
|
|
175
|
+
|
|
176
|
+
message_gone?(key)
|
|
172
177
|
end
|
|
173
178
|
|
|
174
179
|
return 0 if orphaned.empty?
|
|
@@ -179,6 +184,27 @@ module Pgbus
|
|
|
179
184
|
0
|
|
180
185
|
end
|
|
181
186
|
|
|
187
|
+
# Returns true if the message referenced by this lock is definitely gone
|
|
188
|
+
# from the queue. Returns false otherwise (message present, or unknown).
|
|
189
|
+
#
|
|
190
|
+
# Routes through Pgbus::Client#message_exists? so all PGMQ access stays
|
|
191
|
+
# behind the client interface. The client returns nil when it can't
|
|
192
|
+
# determine the answer (queue table missing, etc.); we treat that as
|
|
193
|
+
# "still here" — the reaper must NEVER delete a lock when in doubt.
|
|
194
|
+
def message_gone?(key)
|
|
195
|
+
msg_id = key.msg_id.to_i
|
|
196
|
+
result = if msg_id.positive?
|
|
197
|
+
Pgbus.client.message_exists?(key.queue_name, msg_id: msg_id)
|
|
198
|
+
else
|
|
199
|
+
Pgbus.client.message_exists?(key.queue_name, uniqueness_key: key.lock_key)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
result == false
|
|
203
|
+
rescue StandardError => e
|
|
204
|
+
Pgbus.logger.warn { "[Pgbus] Reap check failed for #{key.lock_key}: #{e.message}" }
|
|
205
|
+
false
|
|
206
|
+
end
|
|
207
|
+
|
|
182
208
|
def cleanup_outbox
|
|
183
209
|
return unless config.outbox_enabled
|
|
184
210
|
|
|
@@ -191,16 +217,12 @@ module Pgbus
|
|
|
191
217
|
Pgbus.logger.warn { "[Pgbus] Outbox cleanup failed: #{e.message}" }
|
|
192
218
|
end
|
|
193
219
|
|
|
194
|
-
def archive_compaction_interval
|
|
195
|
-
config.archive_compaction_interval || ARCHIVE_COMPACTION_INTERVAL
|
|
196
|
-
end
|
|
197
|
-
|
|
198
220
|
def compact_archives
|
|
199
221
|
retention = config.archive_retention
|
|
200
222
|
return unless retention&.positive?
|
|
201
223
|
|
|
202
224
|
cutoff = Time.current - retention
|
|
203
|
-
batch_size =
|
|
225
|
+
batch_size = ARCHIVE_COMPACTION_BATCH_SIZE
|
|
204
226
|
prefix = config.queue_prefix
|
|
205
227
|
|
|
206
228
|
conn = config.connects_to ? Pgbus::BusRecord.connection : ActiveRecord::Base.connection
|
|
@@ -42,22 +42,17 @@ module Pgbus
|
|
|
42
42
|
private
|
|
43
43
|
|
|
44
44
|
def boot_processes
|
|
45
|
-
# Boot workers
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# Boot event consumers if configured
|
|
57
|
-
boot_consumers
|
|
58
|
-
|
|
59
|
-
# Boot outbox poller if configured
|
|
60
|
-
boot_outbox_poller
|
|
45
|
+
# Boot workers (workers may be nil for scheduler-only or
|
|
46
|
+
# dispatcher-only deployments via --workers-only / --scheduler-only /
|
|
47
|
+
# --dispatcher-only CLI flags). Each role is gated by
|
|
48
|
+
# config.role_enabled?, which returns true unless +config.roles+ has
|
|
49
|
+
# been narrowed.
|
|
50
|
+
Array(config.workers).each { |worker_config| fork_worker(worker_config) } if config.role_enabled?(:workers)
|
|
51
|
+
|
|
52
|
+
fork_dispatcher if config.role_enabled?(:dispatcher)
|
|
53
|
+
boot_scheduler if config.role_enabled?(:scheduler)
|
|
54
|
+
boot_consumers if config.role_enabled?(:consumers)
|
|
55
|
+
boot_outbox_poller if config.role_enabled?(:outbox)
|
|
61
56
|
end
|
|
62
57
|
|
|
63
58
|
def fork_worker(worker_config)
|
data/lib/pgbus/process/worker.rb
CHANGED
|
@@ -133,7 +133,7 @@ module Pgbus
|
|
|
133
133
|
fetch_multi(active_queues, qty)
|
|
134
134
|
end
|
|
135
135
|
rescue StandardError => e
|
|
136
|
-
if
|
|
136
|
+
if undefined_queue_table_error?(e)
|
|
137
137
|
evict_missing_queues(e)
|
|
138
138
|
else
|
|
139
139
|
Pgbus.logger.error { "[Pgbus] Error fetching messages: #{e.message}" }
|
|
@@ -141,6 +141,24 @@ module Pgbus
|
|
|
141
141
|
[]
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
+
# Detect "queue table missing" via the underlying PG::UndefinedTable
|
|
145
|
+
# cause when available. Falls back to a guarded message check that
|
|
146
|
+
# requires BOTH "pgmq.q_" (so we know it's our queue table) and
|
|
147
|
+
# "does not exist", which keeps the eviction logic working for
|
|
148
|
+
# adapters/exception wrappers that don't preserve the original
|
|
149
|
+
# PG::UndefinedTable as #cause (e.g. PGMQ::Errors::ConnectionError
|
|
150
|
+
# raised by pgmq-ruby's auto-reconnect path). Locale-fragile, but
|
|
151
|
+
# this is gated by the very specific "pgmq.q_" prefix so a false
|
|
152
|
+
# positive can only come from another error mentioning that exact
|
|
153
|
+
# string — which is itself a queue-table error worth handling.
|
|
154
|
+
def undefined_queue_table_error?(error)
|
|
155
|
+
cause = error.respond_to?(:cause) ? error.cause : nil
|
|
156
|
+
return true if defined?(PG::UndefinedTable) && cause.is_a?(PG::UndefinedTable)
|
|
157
|
+
return true if error.message.include?("pgmq.q_") && error.message.include?("does not exist")
|
|
158
|
+
|
|
159
|
+
false
|
|
160
|
+
end
|
|
161
|
+
|
|
144
162
|
def fetch_prioritized(active_queues, qty)
|
|
145
163
|
remaining = qty
|
|
146
164
|
results = []
|
|
@@ -199,7 +217,7 @@ module Pgbus
|
|
|
199
217
|
def resolve_wildcard_queues
|
|
200
218
|
return unless @wildcard
|
|
201
219
|
|
|
202
|
-
dlq_suffix =
|
|
220
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
203
221
|
prefix = "#{config.queue_prefix}_"
|
|
204
222
|
|
|
205
223
|
conn = Pgbus.configuration.connects_to ? Pgbus::BusRecord.connection : ActiveRecord::Base.connection
|
data/lib/pgbus/version.rb
CHANGED
|
@@ -16,7 +16,7 @@ module Pgbus
|
|
|
16
16
|
queues = queues_with_metrics
|
|
17
17
|
total_depth = queues.sum { |q| q[:queue_length] }
|
|
18
18
|
total_visible = queues.sum { |q| q[:queue_visible_length] }
|
|
19
|
-
dlq_suffix = Pgbus
|
|
19
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
20
20
|
dlq_depth = queues.select { |q| q[:name].end_with?(dlq_suffix) }.sum { |q| q[:queue_length] }
|
|
21
21
|
|
|
22
22
|
throughput = compute_throughput(queues)
|
|
@@ -120,7 +120,7 @@ module Pgbus
|
|
|
120
120
|
end
|
|
121
121
|
|
|
122
122
|
def discard_all_enqueued
|
|
123
|
-
dlq_suffix = Pgbus
|
|
123
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
124
124
|
queues = queues_with_metrics.reject { |q| q[:name].end_with?(dlq_suffix) }
|
|
125
125
|
total = 0
|
|
126
126
|
|
|
@@ -177,16 +177,24 @@ module Pgbus
|
|
|
177
177
|
event = failed_event(id)
|
|
178
178
|
return false unless event
|
|
179
179
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
180
|
+
# Prefer resetting the existing message's visibility timeout to 0
|
|
181
|
+
# so the worker picks it up immediately. This avoids creating a
|
|
182
|
+
# duplicate (the original is still in the queue waiting for retry).
|
|
183
|
+
# Falls back to enqueueing a fresh copy only if the original is gone
|
|
184
|
+
# (e.g., already moved to DLQ).
|
|
185
|
+
msg_id = event["msg_id"]
|
|
186
|
+
if msg_id && @client.message_exists?(event["queue_name"], msg_id: msg_id.to_i)
|
|
187
|
+
@client.set_visibility_timeout(event["queue_name"], msg_id.to_i, vt: 0)
|
|
188
|
+
else
|
|
189
|
+
payload = JSON.parse(event["payload"])
|
|
190
|
+
headers = event["headers"]
|
|
191
|
+
headers = JSON.parse(headers) if headers.is_a?(String)
|
|
185
192
|
@client.send_message(event["queue_name"], payload, headers: headers)
|
|
186
|
-
connection.exec_delete(
|
|
187
|
-
"DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
|
|
188
|
-
)
|
|
189
193
|
end
|
|
194
|
+
|
|
195
|
+
connection.exec_delete(
|
|
196
|
+
"DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
|
|
197
|
+
)
|
|
190
198
|
true
|
|
191
199
|
rescue StandardError => e
|
|
192
200
|
Pgbus.logger.debug { "[Pgbus::Web] Error retrying failed event #{id}: #{e.message}" }
|
|
@@ -195,7 +203,10 @@ module Pgbus
|
|
|
195
203
|
|
|
196
204
|
def discard_failed_event(id)
|
|
197
205
|
event = failed_event(id)
|
|
198
|
-
|
|
206
|
+
if event
|
|
207
|
+
release_lock_for_payload(event["payload"])
|
|
208
|
+
archive_failed_message(event)
|
|
209
|
+
end
|
|
199
210
|
|
|
200
211
|
connection.exec_delete(
|
|
201
212
|
"DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
|
|
@@ -235,6 +246,7 @@ module Pgbus
|
|
|
235
246
|
|
|
236
247
|
def discard_all_failed
|
|
237
248
|
release_locks_for_failed_events
|
|
249
|
+
archive_all_failed_messages
|
|
238
250
|
|
|
239
251
|
result = connection.execute("DELETE FROM pgbus_failed_events")
|
|
240
252
|
result.cmd_tuples
|
|
@@ -247,7 +259,7 @@ module Pgbus
|
|
|
247
259
|
# Note: DLQ queue names from queues_with_metrics are already fully qualified
|
|
248
260
|
# (e.g., "pgbus_default_dlq"), so we use them directly without re-prefixing.
|
|
249
261
|
def dlq_messages(page: 1, per_page: 25)
|
|
250
|
-
dlq_suffix = Pgbus
|
|
262
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
251
263
|
queues = queues_with_metrics.select { |q| q[:name].end_with?(dlq_suffix) }
|
|
252
264
|
offset = (page - 1) * per_page
|
|
253
265
|
|
|
@@ -262,7 +274,7 @@ module Pgbus
|
|
|
262
274
|
end
|
|
263
275
|
|
|
264
276
|
def dlq_message_detail(msg_id)
|
|
265
|
-
dlq_suffix = Pgbus
|
|
277
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
266
278
|
queues = queues_with_metrics.select { |q| q[:name].end_with?(dlq_suffix) }
|
|
267
279
|
queues.each do |q|
|
|
268
280
|
row = connection.select_one(
|
|
@@ -280,7 +292,7 @@ module Pgbus
|
|
|
280
292
|
|
|
281
293
|
def retry_dlq_message(queue_name, msg_id)
|
|
282
294
|
# queue_name here is the full DLQ name (already prefixed)
|
|
283
|
-
dlq_suffix = Pgbus
|
|
295
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
284
296
|
original_queue = queue_name.delete_suffix(dlq_suffix)
|
|
285
297
|
|
|
286
298
|
row = connection.select_one(
|
|
@@ -652,7 +664,7 @@ module Pgbus
|
|
|
652
664
|
end
|
|
653
665
|
|
|
654
666
|
def all_queue_messages(limit, offset)
|
|
655
|
-
dlq_suffix = Pgbus
|
|
667
|
+
dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
|
|
656
668
|
queues = queues_with_metrics.reject { |q| q[:name].end_with?(dlq_suffix) }
|
|
657
669
|
messages = queues.flat_map do |q|
|
|
658
670
|
query_queue_messages_raw(q[:name], limit + offset, 0)
|
|
@@ -849,6 +861,29 @@ module Pgbus
|
|
|
849
861
|
Pgbus.logger.debug { "[Pgbus::Web] Error releasing locks for failed events: #{e.message}" }
|
|
850
862
|
end
|
|
851
863
|
|
|
864
|
+
# Archive the queue message a failed_event row points to. Idempotent —
|
|
865
|
+
# silently no-ops if the message no longer exists in the queue.
|
|
866
|
+
def archive_failed_message(event)
|
|
867
|
+
return unless event["queue_name"] && event["msg_id"]
|
|
868
|
+
|
|
869
|
+
@client.archive_message(event["queue_name"], event["msg_id"].to_i)
|
|
870
|
+
rescue StandardError => e
|
|
871
|
+
Pgbus.logger.debug do
|
|
872
|
+
"[Pgbus::Web] Error archiving message for failed event #{event["id"]}: #{e.message}"
|
|
873
|
+
end
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
# Archive every queue message referenced by a failed_event row.
|
|
877
|
+
def archive_all_failed_messages
|
|
878
|
+
rows = connection.select_all(
|
|
879
|
+
"SELECT id, queue_name, msg_id FROM pgbus_failed_events WHERE msg_id IS NOT NULL",
|
|
880
|
+
"Pgbus Collect Failed Messages"
|
|
881
|
+
)
|
|
882
|
+
rows.to_a.each { |row| archive_failed_message(row) }
|
|
883
|
+
rescue StandardError => e
|
|
884
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error archiving failed messages: #{e.message}" }
|
|
885
|
+
end
|
|
886
|
+
|
|
852
887
|
# Release all uniqueness keys associated with a queue before purge/drop.
|
|
853
888
|
# Scans queue messages for uniqueness metadata and deletes matching rows.
|
|
854
889
|
def release_uniqueness_keys_for_queue(queue_name)
|
data/lib/pgbus.rb
CHANGED
|
@@ -3,6 +3,13 @@
|
|
|
3
3
|
require "zeitwerk"
|
|
4
4
|
|
|
5
5
|
module Pgbus
|
|
6
|
+
# Suffix appended to a queue name to derive its dead-letter companion
|
|
7
|
+
# (e.g. "pgbus_default" -> "pgbus_default_dlq"). Hard-coded here because
|
|
8
|
+
# changing it on a running deployment would orphan every existing DLQ
|
|
9
|
+
# message; nothing in the codebase or in user reports has ever needed
|
|
10
|
+
# this to be configurable.
|
|
11
|
+
DEAD_LETTER_SUFFIX = "_dlq"
|
|
12
|
+
|
|
6
13
|
class Error < StandardError; end
|
|
7
14
|
class ConfigurationError < Error; end
|
|
8
15
|
class SerializationError < Error; end
|
|
@@ -24,7 +31,12 @@ module Pgbus
|
|
|
24
31
|
def loader
|
|
25
32
|
@loader ||= begin
|
|
26
33
|
loader = Zeitwerk::Loader.for_gem
|
|
27
|
-
loader.inflector.inflect(
|
|
34
|
+
loader.inflector.inflect(
|
|
35
|
+
"pgbus" => "Pgbus",
|
|
36
|
+
"cli" => "CLI",
|
|
37
|
+
"dsl" => "DSL",
|
|
38
|
+
"capsule_dsl" => "CapsuleDSL"
|
|
39
|
+
)
|
|
28
40
|
loader.ignore("#{__dir__}/generators")
|
|
29
41
|
loader.ignore("#{__dir__}/active_job")
|
|
30
42
|
loader
|