pgbus 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -73,7 +73,8 @@ module Pgbus
73
73
  add_queue_states: "pgbus:add_queue_states",
74
74
  add_outbox: "pgbus:add_outbox",
75
75
  add_recurring: "pgbus:add_recurring",
76
- add_failed_events_index: "pgbus:add_failed_events_index"
76
+ add_failed_events_index: "pgbus:add_failed_events_index",
77
+ tune_autovacuum: "pgbus:tune_autovacuum"
77
78
  }.freeze
78
79
 
79
80
  # Human-friendly description of each migration for the generator
@@ -88,7 +89,8 @@ module Pgbus
88
89
  add_queue_states: "queue states table (pause/resume)",
89
90
  add_outbox: "outbox entries table (transactional outbox)",
90
91
  add_recurring: "recurring tasks + executions tables",
91
- add_failed_events_index: "unique index on pgbus_failed_events (queue_name, msg_id)"
92
+ add_failed_events_index: "unique index on pgbus_failed_events (queue_name, msg_id)",
93
+ tune_autovacuum: "autovacuum tuning for PGMQ queue and archive tables"
92
94
  }.freeze
93
95
 
94
96
  def initialize(connection)
@@ -110,7 +112,8 @@ module Pgbus
110
112
  *queue_states_migrations,
111
113
  *outbox_migrations,
112
114
  *recurring_migrations,
113
- *failed_events_index_migrations
115
+ *failed_events_index_migrations,
116
+ *autovacuum_migrations
114
117
  ]
115
118
  end
116
119
 
@@ -193,6 +196,15 @@ module Pgbus
193
196
  [:add_failed_events_index]
194
197
  end
195
198
 
199
+ # Autovacuum tuning: check if any PGMQ queue table already has
200
+ # custom autovacuum settings applied. If not, queue the migration.
201
+ def autovacuum_migrations
202
+ return [] unless pgmq_schema_exists?
203
+ return [] if autovacuum_already_tuned?
204
+
205
+ [:tune_autovacuum]
206
+ end
207
+
196
208
  # --- schema probes -------------------------------------------------
197
209
 
198
210
  def table_exists?(name)
@@ -212,6 +224,29 @@ module Pgbus
212
224
  rescue StandardError
213
225
  false
214
226
  end
227
+
228
+ def pgmq_schema_exists?
229
+ result = connection.select_value("SELECT 1 FROM information_schema.schemata WHERE schema_name = 'pgmq'")
230
+ result.present?
231
+ rescue StandardError
232
+ false
233
+ end
234
+
235
+ def autovacuum_already_tuned?
236
+ queue_name = connection.select_value("SELECT queue_name FROM pgmq.meta ORDER BY queue_name LIMIT 1")
237
+ return true unless queue_name # no queues = nothing to tune, skip
238
+
239
+ result = connection.select_value(<<~SQL)
240
+ SELECT reloptions::text LIKE '%autovacuum_vacuum_scale_factor%'
241
+ FROM pg_class
242
+ WHERE relname = 'q_#{queue_name}'
243
+ AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'pgmq')
244
+ SQL
245
+
246
+ [true, "t"].include?(result)
247
+ rescue StandardError
248
+ true # if we can't tell, assume already tuned (safe default)
249
+ end
215
250
  end
216
251
  end
217
252
  end
@@ -17,7 +17,17 @@ module Pgbus
17
17
  # After this patch, `turbo_stream_from` renders a `<pgbus-stream-source>`
18
18
  # element instead, so both sides use PGMQ/SSE. When `streams_enabled`
19
19
  # is false, the original turbo-rails behavior is preserved via `super`.
20
+ #
21
+ # The `include Pgbus::StreamsHelper` is required because some callers
22
+ # invoke `turbo_stream_from` from a Rack middleware context (e.g.
23
+ # hotwire-livereload's Middleware uses `ActionController::Base.helpers`)
24
+ # where `Turbo::StreamsHelper` is available but `Pgbus::StreamsHelper`
25
+ # is not — the engine's `isolate_namespace` scopes helpers to its own
26
+ # views. Including it here ensures `pgbus_stream_from` is always
27
+ # reachable on the receiver.
20
28
  module TurboStreamOverride
29
+ include Pgbus::StreamsHelper
30
+
21
31
  def turbo_stream_from(*streamables, **attributes)
22
32
  if Pgbus.configuration.streams_enabled
23
33
  pgbus_stream_from(*streamables, **attributes)
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.6.6"
4
+ VERSION = "0.6.8"
5
5
  end
@@ -21,6 +21,8 @@ module Pgbus
21
21
 
22
22
  throughput = compute_throughput(queues)
23
23
 
24
+ health = queue_health_stats
25
+
24
26
  {
25
27
  total_queues: queues.size,
26
28
  total_depth: total_depth,
@@ -29,7 +31,10 @@ module Pgbus
29
31
  failed_count: failed_events_count,
30
32
  dlq_depth: dlq_depth,
31
33
  recurring_count: recurring_tasks_count,
32
- throughput_rate: throughput
34
+ throughput_rate: throughput,
35
+ total_dead_tuples: health[:total_dead_tuples],
36
+ tables_needing_vacuum: health[:tables_needing_vacuum],
37
+ oldest_transaction_age_sec: health[:oldest_transaction_age_sec]
33
38
  }
34
39
  end
35
40
 
@@ -629,6 +634,50 @@ module Pgbus
629
634
  []
630
635
  end
631
636
 
637
+ # Queue health — vacuum stats, dead tuples, bloat, MVCC horizon.
638
+ # Returns aggregate health across all queue and archive tables, plus
639
+ # the oldest open transaction age (MVCC horizon pinning risk).
640
+ def queue_health_stats
641
+ tables = fetch_all_table_stats
642
+
643
+ total_dead = tables.sum { |t| t[:dead_tuples] }
644
+ total_live = tables.sum { |t| t[:live_tuples] }
645
+ worst_bloat = tables.map { |t| t[:bloat_ratio] }.max || 0.0
646
+ needs_vacuum = tables.count { |t| t[:bloat_ratio] > 0.1 }
647
+ oldest_vacuum = tables.filter_map { |t| t[:last_vacuum_ago_sec] }.max
648
+
649
+ {
650
+ total_dead_tuples: total_dead,
651
+ total_live_tuples: total_live,
652
+ worst_bloat_ratio: worst_bloat.round(4),
653
+ tables_needing_vacuum: needs_vacuum,
654
+ oldest_vacuum_ago_sec: oldest_vacuum,
655
+ oldest_transaction_age_sec: oldest_transaction_age,
656
+ tables: tables
657
+ }
658
+ rescue StandardError => e
659
+ Pgbus.logger.debug { "[Pgbus::Web] Error fetching queue health stats: #{e.class}: #{e.message}" }
660
+ {
661
+ total_dead_tuples: 0, total_live_tuples: 0, worst_bloat_ratio: 0.0,
662
+ tables_needing_vacuum: 0, oldest_vacuum_ago_sec: nil,
663
+ oldest_transaction_age_sec: nil, tables: []
664
+ }
665
+ end
666
+
667
+ # Per-queue health stats for the queue detail view.
668
+ def queue_health_detail(queue_name)
669
+ sanitized = sanitize_name(queue_name)
670
+ tables = [
671
+ fetch_table_stats("pgmq", "q_#{sanitized}", "queue"),
672
+ fetch_table_stats("pgmq", "a_#{sanitized}", "archive")
673
+ ].compact
674
+
675
+ { tables: tables, oldest_transaction_age_sec: oldest_transaction_age }
676
+ rescue StandardError => e
677
+ Pgbus.logger.debug { "[Pgbus::Web] Error fetching health detail for #{queue_name}: #{e.message}" }
678
+ { tables: [], oldest_transaction_age_sec: nil }
679
+ end
680
+
632
681
  # Stream stats — only populated when streams_stats_enabled is
633
682
  # true AND the migration has been run. Controllers should gate
634
683
  # rendering on `stream_stats_available?` to avoid showing empty
@@ -674,6 +723,89 @@ module Pgbus
674
723
  Pgbus::BusRecord.connection
675
724
  end
676
725
 
726
+ # Single query to fetch pg_stat_user_tables stats for all queue and
727
+ # archive tables. Avoids 2*N catalog queries on the dashboard.
728
+ def fetch_all_table_stats
729
+ rows = connection.select_all(<<~SQL, "Pgbus All Table Health")
730
+ WITH rels AS (
731
+ SELECT queue_name, 'q_' || queue_name AS relname, 'queue' AS kind FROM pgmq.meta
732
+ UNION ALL
733
+ SELECT queue_name, 'a_' || queue_name AS relname, 'archive' AS kind FROM pgmq.meta
734
+ )
735
+ SELECT
736
+ 'pgmq.' || r.relname AS table_name,
737
+ r.kind,
738
+ s.n_live_tup,
739
+ s.n_dead_tup,
740
+ EXTRACT(epoch FROM (NOW() - COALESCE(s.last_vacuum, s.last_autovacuum)))::int AS last_vacuum_ago_sec,
741
+ s.last_vacuum,
742
+ s.last_autovacuum
743
+ FROM rels r
744
+ LEFT JOIN pg_stat_user_tables s
745
+ ON s.schemaname = 'pgmq' AND s.relname = r.relname
746
+ ORDER BY r.queue_name, r.kind
747
+ SQL
748
+
749
+ rows.to_a.filter_map { |row| build_table_health_row(row) }
750
+ end
751
+
752
+ # Fetch pg_stat_user_tables stats for a single table (used by queue_health_detail).
753
+ def fetch_table_stats(schema, table_name, kind)
754
+ row = connection.select_one(<<~SQL, "Pgbus Table Health", [schema, table_name])
755
+ SELECT
756
+ n_live_tup,
757
+ n_dead_tup,
758
+ EXTRACT(epoch FROM (NOW() - COALESCE(last_vacuum, last_autovacuum)))::int AS last_vacuum_ago_sec,
759
+ last_vacuum,
760
+ last_autovacuum
761
+ FROM pg_stat_user_tables
762
+ WHERE schemaname = $1 AND relname = $2
763
+ SQL
764
+
765
+ return nil unless row
766
+
767
+ build_table_health_row(row.merge("table_name" => "#{schema}.#{table_name}", "kind" => kind))
768
+ end
769
+
770
+ def build_table_health_row(row)
771
+ return nil unless row["n_live_tup"] || row["n_dead_tup"]
772
+
773
+ live = row["n_live_tup"].to_i
774
+ dead = row["n_dead_tup"].to_i
775
+ total = live + dead
776
+ bloat = total.positive? ? (dead.to_f / total) : 0.0
777
+
778
+ {
779
+ table: row["table_name"],
780
+ kind: row["kind"],
781
+ live_tuples: live,
782
+ dead_tuples: dead,
783
+ bloat_ratio: bloat.round(4),
784
+ last_vacuum_ago_sec: row["last_vacuum_ago_sec"]&.to_i,
785
+ last_vacuum: row["last_vacuum"],
786
+ last_autovacuum: row["last_autovacuum"]
787
+ }
788
+ end
789
+
790
+ # Age of the oldest open transaction in seconds — indicates MVCC
791
+ # horizon pinning risk. Returns nil if no active transactions.
792
+ def oldest_transaction_age
793
+ row = connection.select_one(<<~SQL, "Pgbus Oldest Transaction")
794
+ SELECT EXTRACT(epoch FROM (NOW() - xact_start))::int AS age_sec
795
+ FROM pg_stat_activity
796
+ WHERE state != 'idle'
797
+ AND xact_start IS NOT NULL
798
+ AND pid != pg_backend_pid()
799
+ ORDER BY xact_start ASC
800
+ LIMIT 1
801
+ SQL
802
+
803
+ row&.dig("age_sec")&.to_i
804
+ rescue StandardError => e
805
+ Pgbus.logger.debug { "[Pgbus::Web] Error fetching oldest transaction age: #{e.class}: #{e.message}" }
806
+ nil
807
+ end
808
+
677
809
  # name is the full PGMQ queue name (already prefixed)
678
810
  def query_queue_messages(name, limit, offset)
679
811
  query_queue_messages_raw(name, limit, offset).map { |m| m.merge(queue: name) }
@@ -24,6 +24,7 @@ module Pgbus
24
24
  append_process_metrics(lines)
25
25
  append_summary_metrics(lines)
26
26
  append_stream_metrics(lines)
27
+ append_health_metrics(lines)
27
28
  "#{lines.join("\n")}\n"
28
29
  end
29
30
 
@@ -97,9 +98,41 @@ module Pgbus
97
98
  end
98
99
 
99
100
  def append_process_metrics(lines)
100
- count = @data_source.processes.count
101
+ procs = @data_source.processes
101
102
  gauge(lines, "pgbus_active_processes", "Number of active pgbus worker processes") do
102
- [[count]]
103
+ [[procs.count]]
104
+ end
105
+
106
+ workers = procs.select { |p| p[:kind] == "worker" && p[:metadata].is_a?(Hash) }
107
+ unless workers.empty?
108
+ gauge(lines, "pgbus_worker_pool_capacity", "Total thread/async pool capacity per worker") do
109
+ workers.filter_map do |w|
110
+ capacity = w[:metadata]["capacity"]
111
+ next unless capacity
112
+
113
+ [capacity, { pid: w[:pid], hostname: w[:hostname] }]
114
+ end
115
+ end
116
+
117
+ gauge(lines, "pgbus_worker_pool_busy", "Number of busy threads/slots per worker") do
118
+ workers.filter_map do |w|
119
+ busy = w[:metadata]["busy"]
120
+ next unless busy
121
+
122
+ [busy, { pid: w[:pid], hostname: w[:hostname] }]
123
+ end
124
+ end
125
+
126
+ gauge(lines, "pgbus_worker_pool_utilization", "Pool utilization ratio (busy / capacity)") do
127
+ workers.filter_map do |w|
128
+ capacity = w[:metadata]["capacity"].to_i
129
+ busy = w[:metadata]["busy"].to_i
130
+ next unless capacity.positive?
131
+
132
+ ratio = (busy.to_f / capacity).round(4)
133
+ [ratio, { pid: w[:pid], hostname: w[:hostname] }]
134
+ end
135
+ end
103
136
  end
104
137
  rescue StandardError => e
105
138
  Pgbus.logger.debug { "[Pgbus::Metrics] Error serializing process metrics: #{e.message}" }
@@ -141,6 +174,42 @@ module Pgbus
141
174
  Pgbus.logger.debug { "[Pgbus::Metrics] Error serializing stream metrics: #{e.message}" }
142
175
  end
143
176
 
177
+ def append_health_metrics(lines)
178
+ health = @data_source.queue_health_stats
179
+ return if health[:tables].empty? && health[:oldest_transaction_age_sec].nil?
180
+
181
+ tables = health[:tables]
182
+ unless tables.empty?
183
+ gauge(lines, "pgbus_table_dead_tuples", "Number of dead tuples in queue/archive table") do
184
+ tables.map { |t| [t[:dead_tuples], { table: t[:table], kind: t[:kind] }] }
185
+ end
186
+
187
+ gauge(lines, "pgbus_table_live_tuples", "Number of live tuples in queue/archive table") do
188
+ tables.map { |t| [t[:live_tuples], { table: t[:table], kind: t[:kind] }] }
189
+ end
190
+
191
+ gauge(lines, "pgbus_table_bloat_ratio", "Dead tuple ratio (dead / total) per table") do
192
+ tables.map { |t| [t[:bloat_ratio], { table: t[:table], kind: t[:kind] }] }
193
+ end
194
+
195
+ vacuum_tables = tables.select { |t| t[:last_vacuum_ago_sec] }
196
+ unless vacuum_tables.empty?
197
+ gauge(lines, "pgbus_table_last_vacuum_age_seconds", "Seconds since last vacuum") do
198
+ vacuum_tables.map { |t| [t[:last_vacuum_ago_sec], { table: t[:table], kind: t[:kind] }] }
199
+ end
200
+ end
201
+ end
202
+
203
+ if health[:oldest_transaction_age_sec]
204
+ gauge(lines, "pgbus_oldest_transaction_age_seconds",
205
+ "Age of the oldest open transaction (MVCC horizon pin risk)") do
206
+ [[health[:oldest_transaction_age_sec]]]
207
+ end
208
+ end
209
+ rescue StandardError => e
210
+ Pgbus.logger.debug { "[Pgbus::Metrics] Error serializing health metrics: #{e.message}" }
211
+ end
212
+
144
213
  # Emits a Prometheus gauge metric family. The block must return an array
145
214
  # of [value] or [value, { label: "val" }] pairs.
146
215
  def gauge(lines, name, help)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.6
4
+ version: 0.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikael Henriksson
@@ -121,6 +121,7 @@ files:
121
121
  - LICENSE.txt
122
122
  - README.md
123
123
  - Rakefile
124
+ - app/assets/javascripts/pgbus/stream_source_element.js
124
125
  - app/controllers/pgbus/api/insights_controller.rb
125
126
  - app/controllers/pgbus/api/metrics_controller.rb
126
127
  - app/controllers/pgbus/api/stats_controller.rb
@@ -145,7 +146,6 @@ files:
145
146
  - app/frontend/pgbus/vendor/turbo.js
146
147
  - app/helpers/pgbus/application_helper.rb
147
148
  - app/helpers/pgbus/streams_helper.rb
148
- - app/javascript/pgbus/stream_source_element.js
149
149
  - app/models/pgbus/application_record.rb
150
150
  - app/models/pgbus/batch_entry.rb
151
151
  - app/models/pgbus/blocked_execution.rb
@@ -162,6 +162,7 @@ files:
162
162
  - app/models/pgbus/uniqueness_key.rb
163
163
  - app/views/layouts/pgbus/application.html.erb
164
164
  - app/views/pgbus/dashboard/_processes_table.html.erb
165
+ - app/views/pgbus/dashboard/_queue_health.html.erb
165
166
  - app/views/pgbus/dashboard/_queues_table.html.erb
166
167
  - app/views/pgbus/dashboard/_recent_failures.html.erb
167
168
  - app/views/pgbus/dashboard/_stats_cards.html.erb
@@ -230,12 +231,15 @@ files:
230
231
  - lib/generators/pgbus/templates/pgbus.yml.erb
231
232
  - lib/generators/pgbus/templates/pgbus_binstub.erb
232
233
  - lib/generators/pgbus/templates/recurring.yml.erb
234
+ - lib/generators/pgbus/templates/tune_autovacuum.rb.erb
233
235
  - lib/generators/pgbus/templates/upgrade_pgmq.rb.erb
236
+ - lib/generators/pgbus/tune_autovacuum_generator.rb
234
237
  - lib/generators/pgbus/update_generator.rb
235
238
  - lib/generators/pgbus/upgrade_pgmq_generator.rb
236
239
  - lib/pgbus.rb
237
240
  - lib/pgbus/active_job/adapter.rb
238
241
  - lib/pgbus/active_job/executor.rb
242
+ - lib/pgbus/autovacuum_tuning.rb
239
243
  - lib/pgbus/batch.rb
240
244
  - lib/pgbus/bus_record.rb
241
245
  - lib/pgbus/circuit_breaker.rb