pgbus 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,7 +74,8 @@ module Pgbus
74
74
  add_outbox: "pgbus:add_outbox",
75
75
  add_recurring: "pgbus:add_recurring",
76
76
  add_failed_events_index: "pgbus:add_failed_events_index",
77
- tune_autovacuum: "pgbus:tune_autovacuum"
77
+ tune_autovacuum: "pgbus:tune_autovacuum",
78
+ tune_fillfactor: "pgbus:tune_fillfactor"
78
79
  }.freeze
79
80
 
80
81
  # Human-friendly description of each migration for the generator
@@ -90,7 +91,8 @@ module Pgbus
90
91
  add_outbox: "outbox entries table (transactional outbox)",
91
92
  add_recurring: "recurring tasks + executions tables",
92
93
  add_failed_events_index: "unique index on pgbus_failed_events (queue_name, msg_id)",
93
- tune_autovacuum: "autovacuum tuning for PGMQ queue and archive tables"
94
+ tune_autovacuum: "autovacuum tuning for PGMQ queue and archive tables",
95
+ tune_fillfactor: "fillfactor=70 on PGMQ queue tables (reduces page density during update churn)"
94
96
  }.freeze
95
97
 
96
98
  def initialize(connection)
@@ -113,7 +115,8 @@ module Pgbus
113
115
  *outbox_migrations,
114
116
  *recurring_migrations,
115
117
  *failed_events_index_migrations,
116
- *autovacuum_migrations
118
+ *autovacuum_migrations,
119
+ *fillfactor_migrations
117
120
  ]
118
121
  end
119
122
 
@@ -205,6 +208,15 @@ module Pgbus
205
208
  [:tune_autovacuum]
206
209
  end
207
210
 
211
+ # Fillfactor tuning: check if any PGMQ queue table already has
212
+ # fillfactor applied. If not, queue the migration.
213
+ def fillfactor_migrations
214
+ return [] unless pgmq_schema_exists?
215
+ return [] if fillfactor_already_tuned?
216
+
217
+ [:tune_fillfactor]
218
+ end
219
+
208
220
  # --- schema probes -------------------------------------------------
209
221
 
210
222
  def table_exists?(name)
@@ -247,6 +259,22 @@ module Pgbus
247
259
  rescue StandardError
248
260
  true # if we can't tell, assume already tuned (safe default)
249
261
  end
262
+
263
+ def fillfactor_already_tuned?
264
+ queue_name = connection.select_value("SELECT queue_name FROM pgmq.meta ORDER BY queue_name LIMIT 1")
265
+ return true unless queue_name # no queues = nothing to tune, skip
266
+
267
+ result = connection.select_value(<<~SQL)
268
+ SELECT reloptions::text LIKE '%fillfactor%'
269
+ FROM pg_class
270
+ WHERE relname = 'q_#{queue_name}'
271
+ AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'pgmq')
272
+ SQL
273
+
274
+ [true, "t"].include?(result)
275
+ rescue StandardError
276
+ true # if we can't tell, assume already tuned (safe default)
277
+ end
250
278
  end
251
279
  end
252
280
  end
@@ -15,6 +15,7 @@ module Pgbus
15
15
  OUTBOX_CLEANUP_INTERVAL = 3600 # Run outbox cleanup every hour
16
16
  JOB_LOCK_CLEANUP_INTERVAL = 300 # Run job lock cleanup every 5 minutes
17
17
  STATS_CLEANUP_INTERVAL = 3600 # Run stats cleanup every hour
18
+ TABLE_MAINTENANCE_INTERVAL = Pgbus::TableMaintenance::MAINTENANCE_INTERVAL
18
19
 
19
20
  # Page size for archive compaction. Each cycle deletes up to this
20
21
  # many archived rows per queue. Tuned via constant rather than
@@ -37,6 +38,7 @@ module Pgbus
37
38
  @last_outbox_cleanup_at = monotonic_now
38
39
  @last_job_lock_cleanup_at = monotonic_now
39
40
  @last_stats_cleanup_at = monotonic_now
41
+ @last_table_maintenance_at = monotonic_now
40
42
  end
41
43
 
42
44
  def run
@@ -84,6 +86,7 @@ module Pgbus
84
86
  run_if_due(now, :@last_outbox_cleanup_at, OUTBOX_CLEANUP_INTERVAL) { cleanup_outbox }
85
87
  run_if_due(now, :@last_job_lock_cleanup_at, JOB_LOCK_CLEANUP_INTERVAL) { cleanup_job_locks }
86
88
  run_if_due(now, :@last_stats_cleanup_at, STATS_CLEANUP_INTERVAL) { cleanup_stats }
89
+ run_if_due(now, :@last_table_maintenance_at, TABLE_MAINTENANCE_INTERVAL) { run_table_maintenance }
87
90
  end
88
91
 
89
92
  # Only update the timestamp when the block succeeds.
@@ -158,6 +161,19 @@ module Pgbus
158
161
  Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} old stream stats" } if deleted.positive?
159
162
  end
160
163
 
164
+ def run_table_maintenance
165
+ conn = config.connects_to ? Pgbus::BusRecord.connection : ActiveRecord::Base.connection
166
+ raw_conn = conn.raw_connection
167
+ maintained = TableMaintenance.run_maintenance(
168
+ raw_conn,
169
+ threshold: TableMaintenance::BLOAT_THRESHOLD,
170
+ reindex: true
171
+ )
172
+ Pgbus.logger.info { "[Pgbus] Table maintenance completed: #{maintained} table(s) vacuumed" } if maintained.positive?
173
+ rescue StandardError => e
174
+ Pgbus.logger.warn { "[Pgbus] Table maintenance failed: #{e.message}" }
175
+ end
176
+
161
177
  def cleanup_job_locks
162
178
  # Clean up truly orphaned uniqueness keys: rows whose referenced
163
179
  # message no longer exists in the PGMQ queue. This handles crashes
@@ -145,9 +145,12 @@ module Pgbus
145
145
 
146
146
  def recurring_tasks_configured?
147
147
  return true if config.recurring_tasks&.any?
148
+
149
+ files = config.recurring_tasks_files
150
+ return true if files&.any? { |f| File.exist?(f.to_s) }
151
+
148
152
  return true if config.recurring_tasks_file && File.exist?(config.recurring_tasks_file.to_s)
149
153
 
150
- # Check default location
151
154
  if defined?(Rails) && Rails.respond_to?(:root) && Rails.root
152
155
  default_path = Rails.root.join("config", "recurring.yml")
153
156
  return File.exist?(default_path.to_s)
@@ -159,6 +162,13 @@ module Pgbus
159
162
  def load_recurring_config
160
163
  return if config.recurring_tasks&.any?
161
164
 
165
+ files = config.recurring_tasks_files
166
+ if files
167
+ tasks = Recurring::ConfigLoader.load_all(files)
168
+ config.recurring_tasks = tasks unless tasks.empty?
169
+ return if tasks.any?
170
+ end
171
+
162
172
  path = config.recurring_tasks_file
163
173
  path ||= defined?(Rails) && Rails.respond_to?(:root) && Rails.root ? Rails.root.join("config", "recurring.yml") : nil
164
174
  return unless path && File.exist?(path.to_s)
@@ -23,6 +23,30 @@ module Pgbus
23
23
  {}
24
24
  end
25
25
 
26
+ def load_all(paths, env: nil)
27
+ normalized = Array(paths).compact.map { |p| p.respond_to?(:to_path) ? p.to_path : p.to_s }.reject(&:empty?)
28
+ return {} if normalized.empty?
29
+
30
+ env ||= detect_env
31
+
32
+ normalized.each_with_object({}) do |path, acc|
33
+ unless File.exist?(path.to_s)
34
+ Pgbus.logger.warn { "[Pgbus] Recurring file not found, skipping: #{path}" }
35
+ next
36
+ end
37
+
38
+ parsed = load(path, env: env)
39
+ unless parsed.is_a?(Hash)
40
+ Pgbus.logger.error { "[Pgbus] Invalid recurring config in #{path}: expected Hash, got #{parsed.class}" }
41
+ next
42
+ end
43
+ parsed.each_key do |key|
44
+ Pgbus.logger.debug { "[Pgbus] Recurring task '#{key}' overridden by #{path}" } if acc.key?(key)
45
+ end
46
+ acc.merge!(parsed)
47
+ end
48
+ end
49
+
26
50
  def detect_env
27
51
  if defined?(Rails) && Rails.respond_to?(:env) && Rails.env
28
52
  Rails.env.to_s
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgbus
4
+ # Proactive table maintenance to reduce bloat on PGMQ queue tables.
5
+ #
6
+ # PGMQ's read operation UPDATEs three columns (vt, read_ct, last_read_at)
7
+ # on every message read. With the default fillfactor of 100, every UPDATE
8
+ # creates a new heap tuple AND a new index entry — the dead tuple and its
9
+ # old index pointer remain until VACUUM. Under sustained load, autovacuum
10
+ # can't keep up and B-tree indexes bloat.
11
+ #
12
+ # Setting fillfactor=70 on queue tables reserves 30% of each page for
13
+ # update churn. Because `vt` is indexed and changes on every read, these
14
+ # writes are not HOT updates, but leaving headroom on heap pages still
15
+ # reduces page density for a table that is updated heavily between vacuum
16
+ # passes.
17
+ #
18
+ # More importantly, this module provides targeted VACUUM: instead of
19
+ # relying solely on autovacuum's global heuristics, the dispatcher
20
+ # periodically checks pg_stat_user_tables for tables with high dead tuple
21
+ # ratios and vacuums them explicitly. This is inspired by pgque's
22
+ # philosophy of measuring bloat before acting.
23
+ module TableMaintenance
24
+ FILLFACTOR = 70
25
+ BLOAT_THRESHOLD = 0.1
26
+ MAINTENANCE_INTERVAL = 6 * 3600 # 6 hours
27
+
28
+ class << self
29
+ def fillfactor_sql_for_queue(queue_name)
30
+ "ALTER TABLE pgmq.q_#{queue_name} SET (fillfactor = #{FILLFACTOR});"
31
+ end
32
+
33
+ def fillfactor_sql_for_all_queues
34
+ <<~SQL
35
+ DO $$
36
+ DECLARE
37
+ q RECORD;
38
+ BEGIN
39
+ FOR q IN SELECT queue_name FROM pgmq.meta LOOP
40
+ EXECUTE format('ALTER TABLE pgmq.q_%I SET (fillfactor = #{FILLFACTOR})', q.queue_name);
41
+ END LOOP;
42
+ END $$;
43
+ SQL
44
+ end
45
+
46
+ def vacuum_candidates(conn, threshold: BLOAT_THRESHOLD)
47
+ rows = conn.exec(<<~SQL)
48
+ SELECT schemaname, relname, n_dead_tup, n_live_tup
49
+ FROM pg_stat_user_tables
50
+ WHERE schemaname = 'pgmq'
51
+ AND relname LIKE 'q_%'
52
+ ORDER BY n_dead_tup DESC
53
+ SQL
54
+
55
+ rows.each_with_object([]) do |row, candidates|
56
+ dead = row["n_dead_tup"].to_i
57
+ live = row["n_live_tup"].to_i
58
+ total = dead + live
59
+ next if total.zero?
60
+
61
+ ratio = dead.to_f / total
62
+ next unless ratio > threshold
63
+
64
+ candidates << {
65
+ table: "#{row["schemaname"]}.#{row["relname"]}",
66
+ dead_tuples: dead,
67
+ live_tuples: live,
68
+ dead_ratio: ratio.round(4)
69
+ }
70
+ end
71
+ end
72
+
73
+ def vacuum_sql(table)
74
+ schema, relname = table.split(".", 2)
75
+ "VACUUM \"#{schema}\".\"#{relname}\""
76
+ end
77
+
78
+ def reindex_sql(table)
79
+ schema, relname = table.split(".", 2)
80
+ "REINDEX TABLE CONCURRENTLY \"#{schema}\".\"#{relname}\""
81
+ end
82
+
83
+ def run_maintenance(conn, threshold: BLOAT_THRESHOLD, reindex: true)
84
+ candidates = vacuum_candidates(conn, threshold: threshold)
85
+ return 0 if candidates.empty?
86
+
87
+ maintained = 0
88
+ candidates.each do |candidate|
89
+ table = candidate[:table]
90
+ Pgbus.logger.info do
91
+ "[Pgbus::TableMaintenance] Vacuuming #{table} " \
92
+ "(dead_ratio=#{candidate[:dead_ratio]}, dead=#{candidate[:dead_tuples]})"
93
+ end
94
+ conn.exec(vacuum_sql(table))
95
+
96
+ if reindex
97
+ Pgbus.logger.info { "[Pgbus::TableMaintenance] Reindexing #{table}" }
98
+ conn.exec(reindex_sql(table))
99
+ end
100
+
101
+ maintained += 1
102
+ rescue StandardError => e
103
+ Pgbus.logger.error { "[Pgbus::TableMaintenance] Failed to maintain #{table}: #{e.message}" }
104
+ end
105
+
106
+ maintained
107
+ end
108
+ end
109
+ end
110
+ end
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.7.3"
4
+ VERSION = "0.7.5"
5
5
  end
@@ -707,10 +707,155 @@ module Pgbus
707
707
  []
708
708
  end
709
709
 
710
- # Subscriber registry
710
+ # Pending events — messages sitting in handler queues that haven't been processed.
711
+ # Identifies handler queues via the subscriber registry and queries them
712
+ # for unprocessed messages. Subscriber queue names are logical
713
+ # (e.g. "task_completion_handler"), while `pgmq.meta.queue_name` stores
714
+ # physical names (e.g. "pgbus_task_completion_handler"), so we normalize
715
+ # through `config.queue_name` before intersecting.
716
+ def pending_events(page: 1, per_page: 25)
717
+ handler_queues = handler_queue_physical_names
718
+ return [] if handler_queues.empty?
719
+
720
+ existing = connection.select_values(
721
+ "SELECT queue_name FROM pgmq.meta ORDER BY queue_name", "Pgbus Queue Names"
722
+ )
723
+ target_queues = handler_queues & existing
724
+ return [] if target_queues.empty?
725
+
726
+ offset = (page - 1) * per_page
727
+ paginated_queue_messages(target_queues, per_page, offset)
728
+ rescue StandardError => e
729
+ Pgbus.logger.debug { "[Pgbus::Web] Error fetching pending events: #{e.message}" }
730
+ []
731
+ end
732
+
733
+ # Physical queue names for all registered subscribers. Used for both
734
+ # pending_events lookup and server-side validation of target queues
735
+ # in reroute_event.
736
+ def handler_queue_physical_names
737
+ registered_subscribers.map { |s| s[:physical_queue_name] }.uniq
738
+ end
739
+
740
+ # Find the handler class registered for a given physical queue name.
741
+ # Returns nil if no subscriber matches — used to reject forged handler
742
+ # values in mark_event_handled / reroute_event.
743
+ def handler_class_for_queue(physical_queue_name)
744
+ sub = registered_subscribers.find { |s| s[:physical_queue_name] == physical_queue_name }
745
+ sub && sub[:handler_class]
746
+ end
747
+
748
+ # Discard (archive) an event message from a handler queue.
749
+ def discard_event(queue_name, msg_id)
750
+ release_lock_for_message(queue_name, msg_id)
751
+ @client.archive_message(queue_name, msg_id.to_i, prefixed: false)
752
+ true
753
+ rescue StandardError => e
754
+ Pgbus.logger.debug { "[Pgbus::Web] Error discarding event #{msg_id}: #{e.message}" }
755
+ false
756
+ end
757
+
758
+ # Mark an event as handled: archive the queue message and insert a
759
+ # ProcessedEvent record so it won't be reprocessed on replay.
760
+ #
761
+ # The insert is performed BEFORE archive. If the archive step fails
762
+ # afterwards the operator can retry — replay protection is already in
763
+ # place and the idempotency dedup will cause the handler to skip the
764
+ # event even if it is eventually re-read from the queue. Doing it the
765
+ # other way around would risk losing the message without recording the
766
+ # marker.
767
+ def mark_event_handled(queue_name, msg_id, handler_class)
768
+ detail = job_detail(queue_name, msg_id)
769
+ return false unless detail
770
+
771
+ raw = JSON.parse(detail[:message])
772
+ event_id = raw["event_id"]
773
+ return false unless event_id
774
+
775
+ ProcessedEvent.insert(
776
+ { event_id: event_id, handler_class: handler_class, processed_at: Time.now.utc },
777
+ unique_by: %i[event_id handler_class]
778
+ )
779
+ # Release the uniqueness lock while we still hold the payload in
780
+ # memory — otherwise the message is archived but the lock row stays
781
+ # behind, blocking later publishes with the same key. Mirrors
782
+ # discard_event.
783
+ release_lock_for_payload(detail[:message])
784
+ @client.archive_message(queue_name, msg_id.to_i, prefixed: false)
785
+ true
786
+ rescue StandardError => e
787
+ Pgbus.logger.debug { "[Pgbus::Web] Error marking event #{msg_id} handled: #{e.message}" }
788
+ false
789
+ end
790
+
791
+ # Edit the payload of a stuck event: delete old message and re-enqueue
792
+ # with the corrected payload in the same queue. The produce + delete
793
+ # are wrapped in a PGMQ transaction so the message can't be lost if
794
+ # either half fails (same pattern as retry_dlq_message).
795
+ def edit_event_payload(queue_name, msg_id, new_payload_json)
796
+ begin
797
+ parsed = JSON.parse(new_payload_json)
798
+ rescue JSON::ParserError
799
+ return false
800
+ end
801
+
802
+ detail = job_detail(queue_name, msg_id)
803
+ return false unless detail
804
+
805
+ @client.transaction do |txn|
806
+ txn.produce(queue_name, parsed.to_json, headers: detail[:headers])
807
+ txn.delete(queue_name, msg_id.to_i)
808
+ end
809
+ true
810
+ rescue StandardError => e
811
+ Pgbus.logger.debug { "[Pgbus::Web] Error editing event #{msg_id}: #{e.message}" }
812
+ false
813
+ end
814
+
815
+ # Reroute an event from one handler queue to another. Wrapped in a
816
+ # PGMQ transaction so produce on the target and delete on the source
817
+ # are atomic.
818
+ def reroute_event(source_queue, msg_id, target_queue)
819
+ detail = job_detail(source_queue, msg_id)
820
+ return false unless detail
821
+
822
+ @client.transaction do |txn|
823
+ txn.produce(target_queue, detail[:message], headers: detail[:headers])
824
+ txn.delete(source_queue, msg_id.to_i)
825
+ end
826
+ true
827
+ rescue StandardError => e
828
+ Pgbus.logger.debug { "[Pgbus::Web] Error rerouting event #{msg_id}: #{e.message}" }
829
+ false
830
+ end
831
+
832
+ # Bulk discard selected events from handler queues.
833
+ def discard_selected_events(selections)
834
+ return 0 if selections.empty?
835
+
836
+ count = 0
837
+ selections.each do |sel|
838
+ discard_event(sel[:queue_name], sel[:msg_id]) && count += 1
839
+ rescue StandardError => e
840
+ Pgbus.logger.debug { "[Pgbus::Web] Error in bulk discard for #{sel[:msg_id]}: #{e.message}" }
841
+ next
842
+ end
843
+ count
844
+ end
845
+
846
+ # Subscriber registry. `queue_name` is the logical name the subscriber
847
+ # registered with; `physical_queue_name` is what the queue is actually
848
+ # called in `pgmq.meta` (e.g. logical "task_completion_handler" ->
849
+ # physical "pgbus_task_completion_handler"). The dashboard needs the
850
+ # physical name to match against pending messages / target queues.
711
851
  def registered_subscribers
712
852
  EventBus::Registry.instance.subscribers.map do |s|
713
- { pattern: s.pattern, handler_class: s.handler_class.name, queue_name: s.queue_name }
853
+ {
854
+ pattern: s.pattern,
855
+ handler_class: s.handler_class.name,
856
+ queue_name: s.queue_name,
857
+ physical_queue_name: @client.config.queue_name(s.queue_name)
858
+ }
714
859
  end
715
860
  rescue StandardError => e
716
861
  Pgbus.logger.debug { "[Pgbus::Web] Error fetching subscribers: #{e.message}" }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.3
4
+ version: 0.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikael Henriksson
@@ -170,6 +170,7 @@ files:
170
170
  - app/views/pgbus/dead_letter/_messages_table.html.erb
171
171
  - app/views/pgbus/dead_letter/index.html.erb
172
172
  - app/views/pgbus/dead_letter/show.html.erb
173
+ - app/views/pgbus/events/_pending_table.html.erb
173
174
  - app/views/pgbus/events/index.html.erb
174
175
  - app/views/pgbus/events/show.html.erb
175
176
  - app/views/pgbus/insights/show.html.erb
@@ -233,8 +234,10 @@ files:
233
234
  - lib/generators/pgbus/templates/pgbus_binstub.erb
234
235
  - lib/generators/pgbus/templates/recurring.yml.erb
235
236
  - lib/generators/pgbus/templates/tune_autovacuum.rb.erb
237
+ - lib/generators/pgbus/templates/tune_fillfactor.rb.erb
236
238
  - lib/generators/pgbus/templates/upgrade_pgmq.rb.erb
237
239
  - lib/generators/pgbus/tune_autovacuum_generator.rb
240
+ - lib/generators/pgbus/tune_fillfactor_generator.rb
238
241
  - lib/generators/pgbus/update_generator.rb
239
242
  - lib/generators/pgbus/upgrade_pgmq_generator.rb
240
243
  - lib/pgbus.rb
@@ -308,6 +311,7 @@ files:
308
311
  - lib/pgbus/streams/turbo_broadcastable.rb
309
312
  - lib/pgbus/streams/turbo_stream_override.rb
310
313
  - lib/pgbus/streams/watermark_cache_middleware.rb
314
+ - lib/pgbus/table_maintenance.rb
311
315
  - lib/pgbus/testing.rb
312
316
  - lib/pgbus/testing/assertions.rb
313
317
  - lib/pgbus/testing/minitest.rb