pgbus 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ module Pgbus
6
+ module Generators
7
+ # Converts a +config/pgbus.yml+ file into a Ruby initializer
8
+ # (+config/initializers/pgbus.rb+) using the Ruby DSL added in
9
+ # Pgbus 0.5+ — capsule string DSL, ActiveSupport::Duration coercion,
10
+ # auto-tuned pool size, named capsules, etc.
11
+ #
12
+ # Drops settings that:
13
+ # - match the gem default (no point restating it)
14
+ # - are deprecated (e.g. pool_size, which is now auto-tuned)
15
+ #
16
+ # Converts seconds to durations when they evenly divide into a clean
17
+ # unit (7 days, 30 days, 10 minutes). Falls back to the raw integer
18
+ # otherwise.
19
+ #
20
+ # When the YAML has multiple environments with different values for
21
+ # the same setting, emits Rails.env-aware code:
22
+ #
23
+ # - 2 envs with same value → unconditional line
24
+ # - 2 envs, one differs → `unless Rails.env.X?` modifier
25
+ # - 3+ envs with differences → `case Rails.env when ... end`
26
+ #
27
+ # Backwards compatible: the original YAML file is NOT touched. The
28
+ # generator's CLI wrapper writes the new initializer and tells the
29
+ # user to delete the YAML when ready.
30
+ class ConfigConverter
31
+ class Error < StandardError; end
32
+
33
+ # Setters that accept ActiveSupport::Duration (PR 5).
34
+ DURATION_SETTINGS = %w[
35
+ visibility_timeout archive_retention idempotency_ttl
36
+ outbox_retention stats_retention recurring_execution_retention
37
+ ].freeze
38
+
39
+ # Settings that no longer exist in the public API. The converter
40
+ # silently drops these from the generated initializer so users on
41
+ # legacy YAML get a clean migration.
42
+ #
43
+ # - pool_size -> auto-tuned from worker thread counts
44
+ # - notify_throttle_ms -> Pgbus::Client::NOTIFY_THROTTLE_MS
45
+ # - circuit_breaker_* -> Pgbus::CircuitBreaker constants
46
+ # - archive_compaction_* -> Pgbus::Process::Dispatcher constants
47
+ # - dead_letter_queue_suffix -> Pgbus::DEAD_LETTER_SUFFIX (frozen)
48
+ DEPRECATED_SETTINGS = %w[
49
+ pool_size
50
+ notify_throttle_ms
51
+ circuit_breaker_threshold circuit_breaker_base_backoff circuit_breaker_max_backoff
52
+ archive_compaction_interval archive_compaction_batch_size
53
+ dead_letter_queue_suffix
54
+ ].freeze
55
+
56
+ # Settings whose default we know how to compute by inspecting
57
+ # Pgbus::Configuration.new. Any setting not listed here is emitted
58
+ # as-is (we can't tell if it matches the default).
59
+ KNOWN_SETTINGS = %w[
60
+ queue_prefix default_queue pool_timeout listen_notify
61
+ visibility_timeout max_retries idempotency_ttl
62
+ max_jobs_per_worker max_memory_mb max_worker_lifetime
63
+ dispatch_interval prefetch_limit
64
+ circuit_breaker_enabled
65
+ archive_retention
66
+ outbox_enabled outbox_poll_interval outbox_batch_size outbox_retention
67
+ stats_enabled stats_retention
68
+ recurring_schedule_interval recurring_execution_retention skip_recurring
69
+ polling_interval default_priority priority_levels
70
+ return_to_app_url workers
71
+ ].freeze
72
+
73
+ def self.from_yaml(path)
74
+ raise Error, "config file not found: #{path}" unless File.exist?(path)
75
+
76
+ parsed = YAML.safe_load_file(path, aliases: true, permitted_classes: [Symbol])
77
+ from_hash(parsed)
78
+ end
79
+
80
+ def self.from_hash(envs_hash)
81
+ new(envs_hash).render
82
+ end
83
+
84
+ def initialize(envs_hash)
85
+ @envs = (envs_hash || {}).reject { |env, _| env.start_with?("default") }
86
+ @envs = { "production" => envs_hash } if @envs.empty? && envs_hash
87
+ @defaults = build_defaults
88
+ end
89
+
90
+ def render
91
+ lines = []
92
+ lines << "# frozen_string_literal: true"
93
+ lines << "#"
94
+ lines << "# Generated by `rails generate pgbus:update` from config/pgbus.yml."
95
+ lines << "# Review and adjust as needed, then delete config/pgbus.yml."
96
+ lines << ""
97
+ lines << "Pgbus.configure do |c|"
98
+
99
+ body = render_body
100
+ body.each { |line| lines << " #{line}" }
101
+
102
+ lines << "end"
103
+ "#{lines.join("\n")}\n"
104
+ end
105
+
106
+ private
107
+
108
+ def build_defaults
109
+ config = Pgbus::Configuration.new
110
+ KNOWN_SETTINGS.each_with_object({}) do |key, h|
111
+ h[key] = config.public_send(key) if config.respond_to?(key)
112
+ end
113
+ end
114
+
115
+ def render_body
116
+ all_settings = collect_all_settings
117
+ return [] if all_settings.empty?
118
+
119
+ constant_settings, varying_settings = partition_by_variance(all_settings)
120
+ special_keys = %w[workers]
121
+
122
+ lines = []
123
+ # Special: workers comes first (most user-visible)
124
+ special_keys.each do |key|
125
+ rendered = render_workers(constant_settings[key], varying_settings[key])
126
+ lines.concat(rendered) if rendered
127
+ end
128
+
129
+ constant_settings.each do |key, value|
130
+ next if special_keys.include?(key)
131
+ next if drop?(key, value)
132
+
133
+ lines << render_setting(key, value)
134
+ end
135
+
136
+ varying_settings.each do |key, env_values|
137
+ next if special_keys.include?(key)
138
+ next if env_values.values.all? { |v| drop?(key, v) }
139
+
140
+ lines.concat(render_varying_setting(key, env_values))
141
+ end
142
+
143
+ lines
144
+ end
145
+
146
+ def collect_all_settings
147
+ all_keys = @envs.values.flat_map { |env_settings| env_settings&.keys || [] }.uniq
148
+ all_keys.to_h do |key|
149
+ [key, @envs.transform_values { |env_settings| env_settings&.fetch(key, :__missing__) }]
150
+ end
151
+ end
152
+
153
+ # Returns [constant_settings, varying_settings].
154
+ # constant_settings: { "key" => value } (same value across all envs)
155
+ # varying_settings: { "key" => { env => value, ... } }
156
+ def partition_by_variance(all_settings)
157
+ constant = {}
158
+ varying = {}
159
+ all_settings.each do |key, env_values|
160
+ present_values = env_values.reject { |_, v| v == :__missing__ }
161
+ unique_values = present_values.values.uniq
162
+ if unique_values.size <= 1
163
+ constant[key] = unique_values.first
164
+ else
165
+ varying[key] = present_values
166
+ end
167
+ end
168
+ [constant, varying]
169
+ end
170
+
171
+ def drop?(key, value)
172
+ return true if DEPRECATED_SETTINGS.include?(key)
173
+ return true if value == :__missing__
174
+ return true if @defaults.key?(key) && @defaults[key] == value
175
+
176
+ false
177
+ end
178
+
179
+ def render_setting(key, value)
180
+ "c.#{key} = #{render_value(key, value)}"
181
+ end
182
+
183
+ def render_varying_setting(key, env_values)
184
+ envs = env_values.keys
185
+ if envs.size == 2 && envs.include?("development")
186
+ # Special case: "everything except dev" — common pattern
187
+ non_dev_value = env_values.except("development").values.first
188
+ dev_value = env_values["development"]
189
+ return ["c.#{key} = #{render_value(key, non_dev_value)} unless Rails.env.development?"] if dev_value.nil? && non_dev_value
190
+ end
191
+
192
+ # Fallback: case Rails.env block — `when` clauses indented one
193
+ # level inside the case, `end` flush with `c.X` (standard Ruby
194
+ # formatting for assigned case expressions).
195
+ lines = ["c.#{key} = case Rails.env"]
196
+ env_values.each do |env, value|
197
+ lines << " when \"#{env}\" then #{render_value(key, value)}"
198
+ end
199
+ lines << "end"
200
+ lines
201
+ end
202
+
203
+ def render_workers(constant_workers, varying_workers)
204
+ if constant_workers && !varying_workers
205
+ rendered = render_workers_value(constant_workers)
206
+ return rendered unless rendered.nil? || rendered.empty?
207
+ end
208
+
209
+ if varying_workers
210
+ # Different worker config per env. when clauses indented one
211
+ # level inside the case, end flush with c.workers (standard
212
+ # Ruby formatting for assigned case expressions).
213
+ lines = ["c.workers = case Rails.env"]
214
+ varying_workers.each do |env, workers|
215
+ string_form = workers_as_string(workers)
216
+ value = string_form ? string_form.inspect : workers.inspect
217
+ lines << " when \"#{env}\" then #{value}"
218
+ end
219
+ lines << "end"
220
+ return lines
221
+ end
222
+
223
+ nil
224
+ end
225
+
226
+ def render_workers_value(workers)
227
+ return nil if workers.nil? || workers == :__missing__
228
+
229
+ # Drop if matches the gem default ([{queues: %w[default], threads: 5}]).
230
+ # Compare via normalized form (string keys both sides) so YAML's
231
+ # string-keyed hashes match the symbol-keyed default.
232
+ return nil if normalize_workers(workers) == normalize_workers(@defaults["workers"])
233
+
234
+ if workers_simple?(workers)
235
+ string_form = workers_as_string(workers)
236
+ ["c.workers = #{string_form.inspect}"]
237
+ else
238
+ workers.map.with_index do |capsule, idx|
239
+ render_capsule_call(capsule, idx)
240
+ end
241
+ end
242
+ end
243
+
244
+ # Normalize a workers array to symbol-keyed hashes with array-of-string
245
+ # queues for stable comparison. Doesn't mutate input.
246
+ def normalize_workers(workers)
247
+ return nil if workers.nil?
248
+
249
+ workers.map do |capsule|
250
+ {
251
+ queues: Array(capsule[:queues] || capsule["queues"]).map(&:to_s),
252
+ threads: (capsule[:threads] || capsule["threads"] || 5).to_i
253
+ }
254
+ end
255
+ end
256
+
257
+ def workers_simple?(workers)
258
+ workers.all? do |capsule|
259
+ extras = capsule.keys.map(&:to_s) - %w[queues threads]
260
+ extras.empty?
261
+ end
262
+ end
263
+
264
+ def workers_as_string(workers)
265
+ workers.map do |capsule|
266
+ queues = (capsule["queues"] || capsule[:queues]).join(", ")
267
+ threads = capsule["threads"] || capsule[:threads] || 5
268
+ "#{queues}: #{threads}"
269
+ end.join("; ")
270
+ end
271
+
272
+ def render_capsule_call(capsule, _idx)
273
+ queues = capsule["queues"] || capsule[:queues]
274
+ threads = capsule["threads"] || capsule[:threads] || 5
275
+ name = capsule["name"] || capsule[:name] || queues.first
276
+ opts = capsule.reject { |k, _| %w[queues threads name].include?(k.to_s) }
277
+
278
+ parts = ["queues: #{queues.inspect}", "threads: #{threads}"]
279
+ opts.each { |k, v| parts << "#{k}: #{v.inspect}" }
280
+
281
+ "c.capsule :#{name}, #{parts.join(", ")}"
282
+ end
283
+
284
+ def render_value(key, value)
285
+ return "nil" if value.nil?
286
+
287
+ if DURATION_SETTINGS.include?(key) && value.is_a?(Integer) && value.positive?
288
+ duration_form = format_duration(value)
289
+ return duration_form if duration_form
290
+ end
291
+
292
+ if value.is_a?(Integer) && value >= 1000
293
+ format_integer_with_underscores(value)
294
+ else
295
+ value.inspect
296
+ end
297
+ end
298
+
299
+ def format_duration(seconds)
300
+ units = [
301
+ [86_400, "day", "days"],
302
+ [3600, "hour", "hours"],
303
+ [60, "minute", "minutes"],
304
+ [1, "second", "seconds"]
305
+ ]
306
+
307
+ units.each do |unit_seconds, singular, plural|
308
+ next unless (seconds % unit_seconds).zero?
309
+
310
+ count = seconds / unit_seconds
311
+ unit_name = count == 1 ? singular : plural
312
+ return "#{count}.#{unit_name}"
313
+ end
314
+
315
+ nil
316
+ end
317
+
318
+ def format_integer_with_underscores(int)
319
+ int.to_s.reverse.scan(/\d{1,3}/).join("_").reverse
320
+ end
321
+ end
322
+ end
323
+ end
@@ -16,6 +16,12 @@ module Pgbus
16
16
  JOB_LOCK_CLEANUP_INTERVAL = 300 # Run job lock cleanup every 5 minutes
17
17
  STATS_CLEANUP_INTERVAL = 3600 # Run stats cleanup every hour
18
18
 
19
+ # Page size for archive compaction. Each cycle deletes up to this
20
+ # many archived rows per queue. Tuned via constant rather than
21
+ # configuration because the value rarely needs adjusting and a
22
+ # too-small value just delays cleanup, never breaks anything.
23
+ ARCHIVE_COMPACTION_BATCH_SIZE = 1000
24
+
19
25
  attr_reader :config
20
26
 
21
27
  def initialize(config: Pgbus.configuration)
@@ -72,7 +78,7 @@ module Pgbus
72
78
  run_if_due(now, :@last_concurrency_at, CONCURRENCY_INTERVAL) { cleanup_concurrency }
73
79
  run_if_due(now, :@last_batch_cleanup_at, BATCH_CLEANUP_INTERVAL) { cleanup_batches }
74
80
  run_if_due(now, :@last_recurring_cleanup_at, RECURRING_CLEANUP_INTERVAL) { cleanup_recurring_executions }
75
- run_if_due(now, :@last_archive_compaction_at, archive_compaction_interval) { compact_archives }
81
+ run_if_due(now, :@last_archive_compaction_at, ARCHIVE_COMPACTION_INTERVAL) { compact_archives }
76
82
  run_if_due(now, :@last_outbox_cleanup_at, OUTBOX_CLEANUP_INTERVAL) { cleanup_outbox }
77
83
  run_if_due(now, :@last_job_lock_cleanup_at, JOB_LOCK_CLEANUP_INTERVAL) { cleanup_job_locks }
78
84
  run_if_due(now, :@last_stats_cleanup_at, STATS_CLEANUP_INTERVAL) { cleanup_stats }
@@ -144,9 +150,11 @@ module Pgbus
144
150
  end
145
151
 
146
152
  def cleanup_job_locks
147
- # Clean up orphaned uniqueness keys whose msg_id no longer exists
148
- # in any PGMQ queue. This handles the rare case where a message is
149
- # lost (e.g., queue table truncated) but the uniqueness key remains.
153
+ # Clean up truly orphaned uniqueness keys: rows whose referenced
154
+ # message no longer exists in the PGMQ queue. This handles crashes
155
+ # or queue truncation. It must NEVER delete a lock while the message
156
+ # is still in the queue, even if the lock is "old" — recurring jobs
157
+ # that fail and retry can hold locks for hours.
150
158
  reaped = reap_orphaned_uniqueness_keys
151
159
  Pgbus.logger.info { "[Pgbus] Reaped #{reaped} orphaned uniqueness keys" } if reaped.positive?
152
160
  end
@@ -155,20 +163,17 @@ module Pgbus
155
163
  keys = UniquenessKey.all.to_a
156
164
  return 0 if keys.empty?
157
165
 
166
+ # Only consider locks that are old enough that we wouldn't be racing
167
+ # an in-flight enqueue. visibility_timeout * 2 is the floor — anything
168
+ # younger could be a freshly-acquired lock whose send_message hasn't
169
+ # committed yet.
158
170
  threshold = Time.current - (config.visibility_timeout * 2)
159
171
 
160
172
  orphaned = keys.select do |key|
161
- # msg_id == 0 means pre-produce placeholder or :while_executing lock.
162
- # These are live locks — never reap them based on msg_id alone.
163
- # Only reap if old enough that the job is certainly gone.
164
- next false if key.msg_id.zero? && (!key.created_at || key.created_at >= threshold)
165
- next true if key.msg_id.zero? && key.created_at && key.created_at < threshold
166
-
167
- # For real msg_ids, only reap if stale (old enough that VT has
168
- # long expired). The message itself may still be in the queue
169
- # awaiting retry — age is the only safe signal without scanning
170
- # every queue table.
171
- key.created_at && key.created_at < threshold
173
+ next false unless key.created_at && key.created_at < threshold
174
+ next false unless key.queue_name
175
+
176
+ message_gone?(key)
172
177
  end
173
178
 
174
179
  return 0 if orphaned.empty?
@@ -179,6 +184,27 @@ module Pgbus
179
184
  0
180
185
  end
181
186
 
187
+ # Returns true if the message referenced by this lock is definitely gone
188
+ # from the queue. Returns false otherwise (message present, or unknown).
189
+ #
190
+ # Routes through Pgbus::Client#message_exists? so all PGMQ access stays
191
+ # behind the client interface. The client returns nil when it can't
192
+ # determine the answer (queue table missing, etc.); we treat that as
193
+ # "still here" — the reaper must NEVER delete a lock when in doubt.
194
+ def message_gone?(key)
195
+ msg_id = key.msg_id.to_i
196
+ result = if msg_id.positive?
197
+ Pgbus.client.message_exists?(key.queue_name, msg_id: msg_id)
198
+ else
199
+ Pgbus.client.message_exists?(key.queue_name, uniqueness_key: key.lock_key)
200
+ end
201
+
202
+ result == false
203
+ rescue StandardError => e
204
+ Pgbus.logger.warn { "[Pgbus] Reap check failed for #{key.lock_key}: #{e.message}" }
205
+ false
206
+ end
207
+
182
208
  def cleanup_outbox
183
209
  return unless config.outbox_enabled
184
210
 
@@ -191,16 +217,12 @@ module Pgbus
191
217
  Pgbus.logger.warn { "[Pgbus] Outbox cleanup failed: #{e.message}" }
192
218
  end
193
219
 
194
- def archive_compaction_interval
195
- config.archive_compaction_interval || ARCHIVE_COMPACTION_INTERVAL
196
- end
197
-
198
220
  def compact_archives
199
221
  retention = config.archive_retention
200
222
  return unless retention&.positive?
201
223
 
202
224
  cutoff = Time.current - retention
203
- batch_size = config.archive_compaction_batch_size || 1000
225
+ batch_size = ARCHIVE_COMPACTION_BATCH_SIZE
204
226
  prefix = config.queue_prefix
205
227
 
206
228
  conn = config.connects_to ? Pgbus::BusRecord.connection : ActiveRecord::Base.connection
@@ -42,22 +42,17 @@ module Pgbus
42
42
  private
43
43
 
44
44
  def boot_processes
45
- # Boot workers
46
- config.workers.each do |worker_config|
47
- fork_worker(worker_config)
48
- end
49
-
50
- # Boot dispatcher
51
- fork_dispatcher
52
-
53
- # Boot recurring scheduler if configured
54
- boot_scheduler
55
-
56
- # Boot event consumers if configured
57
- boot_consumers
58
-
59
- # Boot outbox poller if configured
60
- boot_outbox_poller
45
+ # Boot workers (workers may be nil for scheduler-only or
46
+ # dispatcher-only deployments via --workers-only / --scheduler-only /
47
+ # --dispatcher-only CLI flags). Each role is gated by
48
+ # config.role_enabled?, which returns true unless +config.roles+ has
49
+ # been narrowed.
50
+ Array(config.workers).each { |worker_config| fork_worker(worker_config) } if config.role_enabled?(:workers)
51
+
52
+ fork_dispatcher if config.role_enabled?(:dispatcher)
53
+ boot_scheduler if config.role_enabled?(:scheduler)
54
+ boot_consumers if config.role_enabled?(:consumers)
55
+ boot_outbox_poller if config.role_enabled?(:outbox)
61
56
  end
62
57
 
63
58
  def fork_worker(worker_config)
@@ -133,7 +133,7 @@ module Pgbus
133
133
  fetch_multi(active_queues, qty)
134
134
  end
135
135
  rescue StandardError => e
136
- if e.message.include?("does not exist") && e.message.include?("pgmq.q_")
136
+ if undefined_queue_table_error?(e)
137
137
  evict_missing_queues(e)
138
138
  else
139
139
  Pgbus.logger.error { "[Pgbus] Error fetching messages: #{e.message}" }
@@ -141,6 +141,24 @@ module Pgbus
141
141
  []
142
142
  end
143
143
 
144
+ # Detect "queue table missing" via the underlying PG::UndefinedTable
145
+ # cause when available. Falls back to a guarded message check that
146
+ # requires BOTH "pgmq.q_" (so we know it's our queue table) and
147
+ # "does not exist", which keeps the eviction logic working for
148
+ # adapters/exception wrappers that don't preserve the original
149
+ # PG::UndefinedTable as #cause (e.g. PGMQ::Errors::ConnectionError
150
+ # raised by pgmq-ruby's auto-reconnect path). Locale-fragile, but
151
+ # this is gated by the very specific "pgmq.q_" prefix so a false
152
+ # positive can only come from another error mentioning that exact
153
+ # string — which is itself a queue-table error worth handling.
154
+ def undefined_queue_table_error?(error)
155
+ cause = error.respond_to?(:cause) ? error.cause : nil
156
+ return true if defined?(PG::UndefinedTable) && cause.is_a?(PG::UndefinedTable)
157
+ return true if error.message.include?("pgmq.q_") && error.message.include?("does not exist")
158
+
159
+ false
160
+ end
161
+
144
162
  def fetch_prioritized(active_queues, qty)
145
163
  remaining = qty
146
164
  results = []
@@ -199,7 +217,7 @@ module Pgbus
199
217
  def resolve_wildcard_queues
200
218
  return unless @wildcard
201
219
 
202
- dlq_suffix = config.dead_letter_queue_suffix
220
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
203
221
  prefix = "#{config.queue_prefix}_"
204
222
 
205
223
  conn = Pgbus.configuration.connects_to ? Pgbus::BusRecord.connection : ActiveRecord::Base.connection
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.4.0"
4
+ VERSION = "0.5.0"
5
5
  end
@@ -16,7 +16,7 @@ module Pgbus
16
16
  queues = queues_with_metrics
17
17
  total_depth = queues.sum { |q| q[:queue_length] }
18
18
  total_visible = queues.sum { |q| q[:queue_visible_length] }
19
- dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
19
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
20
20
  dlq_depth = queues.select { |q| q[:name].end_with?(dlq_suffix) }.sum { |q| q[:queue_length] }
21
21
 
22
22
  throughput = compute_throughput(queues)
@@ -120,7 +120,7 @@ module Pgbus
120
120
  end
121
121
 
122
122
  def discard_all_enqueued
123
- dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
123
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
124
124
  queues = queues_with_metrics.reject { |q| q[:name].end_with?(dlq_suffix) }
125
125
  total = 0
126
126
 
@@ -177,16 +177,24 @@ module Pgbus
177
177
  event = failed_event(id)
178
178
  return false unless event
179
179
 
180
- payload = JSON.parse(event["payload"])
181
- headers = event["headers"]
182
- headers = JSON.parse(headers) if headers.is_a?(String)
183
-
184
- connection.transaction do
180
+ # Prefer resetting the existing message's visibility timeout to 0
181
+ # so the worker picks it up immediately. This avoids creating a
182
+ # duplicate (the original is still in the queue waiting for retry).
183
+ # Falls back to enqueueing a fresh copy only if the original is gone
184
+ # (e.g., already moved to DLQ).
185
+ msg_id = event["msg_id"]
186
+ if msg_id && @client.message_exists?(event["queue_name"], msg_id: msg_id.to_i)
187
+ @client.set_visibility_timeout(event["queue_name"], msg_id.to_i, vt: 0)
188
+ else
189
+ payload = JSON.parse(event["payload"])
190
+ headers = event["headers"]
191
+ headers = JSON.parse(headers) if headers.is_a?(String)
185
192
  @client.send_message(event["queue_name"], payload, headers: headers)
186
- connection.exec_delete(
187
- "DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
188
- )
189
193
  end
194
+
195
+ connection.exec_delete(
196
+ "DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
197
+ )
190
198
  true
191
199
  rescue StandardError => e
192
200
  Pgbus.logger.debug { "[Pgbus::Web] Error retrying failed event #{id}: #{e.message}" }
@@ -195,7 +203,10 @@ module Pgbus
195
203
 
196
204
  def discard_failed_event(id)
197
205
  event = failed_event(id)
198
- release_lock_for_payload(event["payload"]) if event
206
+ if event
207
+ release_lock_for_payload(event["payload"])
208
+ archive_failed_message(event)
209
+ end
199
210
 
200
211
  connection.exec_delete(
201
212
  "DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
@@ -235,6 +246,7 @@ module Pgbus
235
246
 
236
247
  def discard_all_failed
237
248
  release_locks_for_failed_events
249
+ archive_all_failed_messages
238
250
 
239
251
  result = connection.execute("DELETE FROM pgbus_failed_events")
240
252
  result.cmd_tuples
@@ -247,7 +259,7 @@ module Pgbus
247
259
  # Note: DLQ queue names from queues_with_metrics are already fully qualified
248
260
  # (e.g., "pgbus_default_dlq"), so we use them directly without re-prefixing.
249
261
  def dlq_messages(page: 1, per_page: 25)
250
- dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
262
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
251
263
  queues = queues_with_metrics.select { |q| q[:name].end_with?(dlq_suffix) }
252
264
  offset = (page - 1) * per_page
253
265
 
@@ -262,7 +274,7 @@ module Pgbus
262
274
  end
263
275
 
264
276
  def dlq_message_detail(msg_id)
265
- dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
277
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
266
278
  queues = queues_with_metrics.select { |q| q[:name].end_with?(dlq_suffix) }
267
279
  queues.each do |q|
268
280
  row = connection.select_one(
@@ -280,7 +292,7 @@ module Pgbus
280
292
 
281
293
  def retry_dlq_message(queue_name, msg_id)
282
294
  # queue_name here is the full DLQ name (already prefixed)
283
- dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
295
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
284
296
  original_queue = queue_name.delete_suffix(dlq_suffix)
285
297
 
286
298
  row = connection.select_one(
@@ -652,7 +664,7 @@ module Pgbus
652
664
  end
653
665
 
654
666
  def all_queue_messages(limit, offset)
655
- dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
667
+ dlq_suffix = Pgbus::DEAD_LETTER_SUFFIX
656
668
  queues = queues_with_metrics.reject { |q| q[:name].end_with?(dlq_suffix) }
657
669
  messages = queues.flat_map do |q|
658
670
  query_queue_messages_raw(q[:name], limit + offset, 0)
@@ -849,6 +861,29 @@ module Pgbus
849
861
  Pgbus.logger.debug { "[Pgbus::Web] Error releasing locks for failed events: #{e.message}" }
850
862
  end
851
863
 
864
+ # Archive the queue message a failed_event row points to. Idempotent —
865
+ # silently no-ops if the message no longer exists in the queue.
866
+ def archive_failed_message(event)
867
+ return unless event["queue_name"] && event["msg_id"]
868
+
869
+ @client.archive_message(event["queue_name"], event["msg_id"].to_i)
870
+ rescue StandardError => e
871
+ Pgbus.logger.debug do
872
+ "[Pgbus::Web] Error archiving message for failed event #{event["id"]}: #{e.message}"
873
+ end
874
+ end
875
+
876
+ # Archive every queue message referenced by a failed_event row.
877
+ def archive_all_failed_messages
878
+ rows = connection.select_all(
879
+ "SELECT id, queue_name, msg_id FROM pgbus_failed_events WHERE msg_id IS NOT NULL",
880
+ "Pgbus Collect Failed Messages"
881
+ )
882
+ rows.to_a.each { |row| archive_failed_message(row) }
883
+ rescue StandardError => e
884
+ Pgbus.logger.debug { "[Pgbus::Web] Error archiving failed messages: #{e.message}" }
885
+ end
886
+
852
887
  # Release all uniqueness keys associated with a queue before purge/drop.
853
888
  # Scans queue messages for uniqueness metadata and deletes matching rows.
854
889
  def release_uniqueness_keys_for_queue(queue_name)
data/lib/pgbus.rb CHANGED
@@ -3,6 +3,13 @@
3
3
  require "zeitwerk"
4
4
 
5
5
  module Pgbus
6
+ # Suffix appended to a queue name to derive its dead-letter companion
7
+ # (e.g. "pgbus_default" -> "pgbus_default_dlq"). Hard-coded here because
8
+ # changing it on a running deployment would orphan every existing DLQ
9
+ # message; nothing in the codebase or in user reports has ever needed
10
+ # this to be configurable.
11
+ DEAD_LETTER_SUFFIX = "_dlq"
12
+
6
13
  class Error < StandardError; end
7
14
  class ConfigurationError < Error; end
8
15
  class SerializationError < Error; end
@@ -24,7 +31,12 @@ module Pgbus
24
31
  def loader
25
32
  @loader ||= begin
26
33
  loader = Zeitwerk::Loader.for_gem
27
- loader.inflector.inflect("pgbus" => "Pgbus", "cli" => "CLI", "dsl" => "DSL")
34
+ loader.inflector.inflect(
35
+ "pgbus" => "Pgbus",
36
+ "cli" => "CLI",
37
+ "dsl" => "DSL",
38
+ "capsule_dsl" => "CapsuleDSL"
39
+ )
28
40
  loader.ignore("#{__dir__}/generators")
29
41
  loader.ignore("#{__dir__}/active_job")
30
42
  loader