pgbus 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5027c420c26dac65291e11e07ca0291da540b7d2aef9d963e9fdebc2217e7943
4
- data.tar.gz: 9e66b85229c55528b3b5b2093883935c1333733018508bb6d9f7f7836666363d
3
+ metadata.gz: 562d32e977685559437bdb1d3817fd02feb4fd9485b9101241a64d25d37162b1
4
+ data.tar.gz: eeacf0de199bfdc49c5b68523c5bbae7b1eff7970637742f3ec3e4391e4a3b06
5
5
  SHA512:
6
- metadata.gz: 9c476c7de9c0db40e0d2183a11f7d502704a468e68f49b28934619cfab07f1130f3988d6f0fbfa6e77a57d6450aa183746b301367e0d4af7dcd2779319f96ff7
7
- data.tar.gz: ed62152e6c2c361c5b8f38c61e31ea4e3af5d031cf68f71d0ea95177ab04f538bfbb4fefc84f5d08a2acd8efb23b0024d6433d4a6db2272e7c05521a59e83a3b
6
+ metadata.gz: a06cdb8eeecef32d6fa024f1fadac4f8c22758a11c91b1e9f5b00266869ea92b068c58a239a543c9409375d7155a59a5ec05f17cad59d7a23a49c90ecf74d7cd
7
+ data.tar.gz: e2a941feb09d3462bb9a83cda9b48821cf93649b53e9e7305af4a3b43b95bccca7690f40ebcb42857c4b9118ebeb592bcfacc317302e3a71b39be1c4f78493aa
@@ -117,6 +117,14 @@ module Pgbus
117
117
  :streams_host, :streams_port, :streams_database_url
118
118
  attr_reader :streams_default_broadcast_mode # rubocop:disable Style/AccessorGrouping
119
119
 
120
+ # NOTIFY-gated worker wakeups. When true, each Worker fork owns a
121
+ # dedicated NotifyListener PG connection that LISTENs on its queues'
122
+ # INSERT channels and wakes the loop on a real insert. Defaults to the
123
+ # value of listen_notify. The worker_notify_* overrides mirror
124
+ # streams_* so the LISTEN connection can bypass PgBouncer.
125
+ attr_accessor :worker_notify_wakeup,
126
+ :worker_notify_host, :worker_notify_port, :worker_notify_database_url
127
+
120
128
  # AppSignal integration (auto-loaded when ::Appsignal is defined and this is true).
121
129
  # Set to false to opt out without uninstalling the appsignal gem.
122
130
  attr_accessor :appsignal_enabled, :appsignal_probe_enabled
@@ -171,6 +179,11 @@ module Pgbus
171
179
 
172
180
  @listen_notify = true
173
181
 
182
+ @worker_notify_wakeup = nil
183
+ @worker_notify_host = nil
184
+ @worker_notify_port = nil
185
+ @worker_notify_database_url = nil
186
+
174
187
  @pgmq_schema_mode = :auto
175
188
 
176
189
  @event_consumers = nil
@@ -732,6 +745,41 @@ module Pgbus
732
745
  end
733
746
  end
734
747
 
748
+ # Connection options for the Worker's dedicated NotifyListener connection.
749
+ # Mirrors streams_connection_options: defaults to the base connection_options,
750
+ # overridable via worker_notify_database_url / worker_notify_host /
751
+ # worker_notify_port so the LISTEN connection can bypass PgBouncer.
752
+ def worker_notify_connection_options
753
+ return worker_notify_database_url if worker_notify_database_url
754
+
755
+ base = connection_options
756
+ return base unless worker_notify_host || worker_notify_port
757
+
758
+ case base
759
+ when Hash
760
+ result = base.dup
761
+ result[:host] = worker_notify_host if worker_notify_host
762
+ result[:port] = worker_notify_port if worker_notify_port
763
+ result
764
+ when String
765
+ parts = [base]
766
+ parts << "host=#{worker_notify_host}" if worker_notify_host
767
+ parts << "port=#{worker_notify_port}" if worker_notify_port
768
+ parts.join(" ")
769
+ else
770
+ base
771
+ end
772
+ end
773
+
774
+ # Resolved notify wakeup flag: defaults to listen_notify when nil.
775
+ def worker_notify_wakeup?
776
+ if @worker_notify_wakeup.nil?
777
+ listen_notify
778
+ else
779
+ @worker_notify_wakeup
780
+ end
781
+ end
782
+
735
783
  private
736
784
 
737
785
  # Built-in presence-member extractor used when no custom
@@ -0,0 +1,268 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgbus
4
+ module Process
5
+ # Owns a single dedicated PG::Connection that LISTENs on the INSERT NOTIFY
6
+ # channel of every queue a Worker/Consumer reads, and fires a WakeSignal the
7
+ # moment any of them receives a row. This converts the worker/consumer loop
8
+ # from "blind-read every polling_interval" into "sleep until a real insert,
9
+ # poll only as a fallback" — eliminating the empty-read storm that dominates
10
+ # DB load on idle queues.
11
+ #
12
+ # pgmq-ruby's `wait_for_notify(queue, timeout:)` is single-queue and wraps
13
+ # the wait in `with_connection`, which only watches one channel and holds the
14
+ # pooled connection for the whole wait. Neither fits a worker that reads N
15
+ # queues on a small shared pool. So we own ONE raw PG::Connection and
16
+ # hand-roll per-channel LISTEN on it.
17
+ #
18
+ # A persistent LISTEN connection silently dies under a transaction-pool
19
+ # PgBouncer (LISTEN does not survive COMMIT boundaries). Point this
20
+ # connection at a DIRECT port via `config.worker_notify_*` overrides.
21
+ # The health-check-on-timeout catches a connection killed out from under us
22
+ # and re-LISTENs everything.
23
+ #
24
+ # NOTIFY channel naming (pgmq trigger): PG_NOTIFY('pgmq.' || table || '.' ||
25
+ # TG_OP). For queue `pgbus_default` the table is `q_pgbus_default`, so the
26
+ # channel is `pgmq.q_pgbus_default.INSERT`.
27
+ #
28
+ # Thread safety: @running, @conn, and @listening_to are guarded by
29
+ # @state_mutex. The listener thread owns @conn during wait_for_notify (a
30
+ # blocking IO call where the mutex MUST NOT be held), so wait_once reads
31
+ # the connection out of the mutex first and operates on a local. Reconnect
32
+ # publishes the new connection + channel set under the mutex.
33
+ class NotifyListener
34
+ CHANNEL_PREFIX = "pgmq.q_"
35
+ CHANNEL_SUFFIX = ".INSERT"
36
+
37
+ RECONNECT_BACKOFF_SECONDS = 0.5
38
+
39
+ def initialize(physical_queues:, on_wake:, connection_options:,
40
+ health_check_ms: 1000, logger: Pgbus.logger)
41
+ @physical_queues = Array(physical_queues)
42
+ @on_wake = on_wake
43
+ @connection_options = connection_options
44
+ @health_check_ms = health_check_ms
45
+ @logger = logger
46
+ @state_mutex = Mutex.new
47
+ @listening_to = Set.new
48
+ @commands = Queue.new
49
+ @running = false
50
+ @thread = nil
51
+ @conn = nil
52
+ end
53
+
54
+ def listening_to
55
+ @state_mutex.synchronize { @listening_to.dup }
56
+ end
57
+
58
+ def start
59
+ @state_mutex.synchronize do
60
+ return self if @running
61
+
62
+ @running = true
63
+ end
64
+ @physical_queues.each { |q| @commands << [:listen, q] }
65
+ @thread = Thread.new { run_loop }
66
+ self
67
+ end
68
+
69
+ def stop
70
+ conn_to_close = nil
71
+ @state_mutex.synchronize do
72
+ return self unless @running
73
+
74
+ @running = false
75
+ conn_to_close = @conn
76
+ end
77
+ @commands << [:stop]
78
+ # Interrupt the blocking wait by closing the socket; the rescue in
79
+ # wait_once sees @running == false and exits cleanly.
80
+ begin
81
+ conn_to_close&.close if conn_to_close.respond_to?(:close)
82
+ rescue StandardError
83
+ nil
84
+ end
85
+ @thread&.join(5)
86
+ @thread = nil
87
+ self
88
+ end
89
+
90
+ def add_queue(physical_queue)
91
+ @commands << [:listen, physical_queue]
92
+ end
93
+
94
+ def remove_queue(physical_queue)
95
+ @commands << [:unlisten, physical_queue]
96
+ end
97
+
98
+ private
99
+
100
+ def running?
101
+ @state_mutex.synchronize { @running }
102
+ end
103
+
104
+ def run_loop
105
+ conn = build_connection
106
+ @state_mutex.synchronize { @conn = conn }
107
+ drain_commands
108
+
109
+ loop do
110
+ break unless running?
111
+
112
+ drain_commands
113
+ break unless running?
114
+
115
+ wait_once
116
+ end
117
+ rescue StandardError => e
118
+ @logger.error { "[Pgbus::NotifyListener] fatal: #{e.class}: #{e.message}" } if running?
119
+ ensure
120
+ # Clear @running so #start can spawn a fresh thread after a fatal exit
121
+ # (e.g. build_connection raising at boot). Without this, the dead
122
+ # thread's @running stays true and #start returns early forever.
123
+ @state_mutex.synchronize { @running = false }
124
+ safe_unlisten_all
125
+ safe_close
126
+ end
127
+
128
+ def wait_once
129
+ conn = @state_mutex.synchronize { @conn }
130
+ return reconnect! unless conn
131
+
132
+ timeout_s = @health_check_ms / 1000.0
133
+ got_notify = conn.wait_for_notify(timeout_s) do |_channel, _pid, _payload|
134
+ @on_wake.call
135
+ end
136
+ run_health_check(conn) unless got_notify
137
+ rescue IOError, PG::Error => e
138
+ return unless running?
139
+
140
+ @logger.warn { "[Pgbus::NotifyListener] connection error (#{e.class}: #{e.message}) — reconnecting" }
141
+ reconnect!
142
+ end
143
+
144
+ def drain_commands
145
+ loop do
146
+ cmd = @commands.pop(true)
147
+ case cmd[0]
148
+ when :listen then do_listen(cmd[1])
149
+ when :unlisten then do_unlisten(cmd[1])
150
+ when :stop
151
+ @state_mutex.synchronize { @running = false }
152
+ return
153
+ end
154
+ rescue ThreadError
155
+ return
156
+ end
157
+ end
158
+
159
+ def do_listen(physical_queue)
160
+ channel = channel_for(physical_queue)
161
+ conn = @state_mutex.synchronize do
162
+ return if @listening_to.include?(channel)
163
+
164
+ @conn
165
+ end
166
+ return unless conn
167
+
168
+ conn.exec(%(LISTEN "#{channel}"))
169
+ @state_mutex.synchronize { @listening_to.add(channel) }
170
+ end
171
+
172
+ def do_unlisten(physical_queue)
173
+ channel = channel_for(physical_queue)
174
+ conn = @state_mutex.synchronize do
175
+ return unless @listening_to.include?(channel)
176
+
177
+ @conn
178
+ end
179
+ return unless conn
180
+
181
+ conn.exec(%(UNLISTEN "#{channel}"))
182
+ @state_mutex.synchronize { @listening_to.delete(channel) }
183
+ end
184
+
185
+ def run_health_check(conn)
186
+ conn.exec("SELECT 1")
187
+ end
188
+
189
+ # Retry reconnect until either we succeed (new conn + every channel
190
+ # re-LISTENed) or @running flips to false. Without the loop, a single
191
+ # PG::Error during build/LISTEN left @conn nil and the listener degraded
192
+ # silently — wait_once would re-enter and fail forever or run with an
193
+ # incomplete subscription set.
194
+ def reconnect!
195
+ channels = @state_mutex.synchronize { @listening_to.to_a }
196
+ loop do
197
+ return unless running?
198
+
199
+ safe_close
200
+ new_conn = nil
201
+ begin
202
+ new_conn = build_connection
203
+ channels.each { |channel| new_conn.exec(%(LISTEN "#{channel}")) }
204
+ rescue PG::Error => e
205
+ # build_connection may have succeeded before a later LISTEN raised.
206
+ # Without this close, the partially-built conn is orphaned and the
207
+ # next retry just allocates another one — leaking PG connections on
208
+ # repeated failures.
209
+ close_quietly(new_conn)
210
+ @logger.error { "[Pgbus::NotifyListener] reconnect failed: #{e.class}: #{e.message}" }
211
+ sleep RECONNECT_BACKOFF_SECONDS
212
+ next
213
+ end
214
+
215
+ @state_mutex.synchronize do
216
+ @conn = new_conn
217
+ @listening_to = Set.new(channels)
218
+ end
219
+ return
220
+ end
221
+ end
222
+
223
+ def build_connection
224
+ require "pg" unless defined?(::PG::Connection)
225
+ case @connection_options
226
+ when String then ::PG.connect(@connection_options)
227
+ when Hash then ::PG.connect(**@connection_options)
228
+ else
229
+ raise Pgbus::ConfigurationError,
230
+ "NotifyListener cannot build a PG connection from #{@connection_options.class}. " \
231
+ "Set worker_notify_database_url / worker_notify_host / worker_notify_port, " \
232
+ "or a base database_url, so the listener owns a dedicated connection."
233
+ end
234
+ end
235
+
236
+ def safe_unlisten_all
237
+ channels, conn = @state_mutex.synchronize { [@listening_to.to_a, @conn] }
238
+ channels.each do |channel|
239
+ conn&.exec(%(UNLISTEN "#{channel}"))
240
+ rescue PG::Error
241
+ nil
242
+ end
243
+ @state_mutex.synchronize { @listening_to.clear }
244
+ end
245
+
246
+ def safe_close
247
+ conn = @state_mutex.synchronize do
248
+ c = @conn
249
+ @conn = nil
250
+ c
251
+ end
252
+ close_quietly(conn)
253
+ end
254
+
255
+ # Close an unpublished PG::Connection (one that never made it into @conn,
256
+ # e.g. a half-built reconnect attempt where LISTEN raised). Best-effort.
257
+ def close_quietly(conn)
258
+ conn&.close if conn.respond_to?(:close)
259
+ rescue StandardError
260
+ nil
261
+ end
262
+
263
+ def channel_for(physical_queue)
264
+ "#{CHANNEL_PREFIX}#{physical_queue}#{CHANNEL_SUFFIX}"
265
+ end
266
+ end
267
+ end
268
+ end
@@ -13,6 +13,7 @@ module Pgbus
13
13
  single_active_consumer: false, consumer_priority: 0,
14
14
  execution_mode: :threads, group_mode: nil)
15
15
  @queues = Array(queues)
16
+ @initial_queues = @queues.dup.freeze
16
17
  @wildcard = @queues.include?("*")
17
18
  @threads = threads
18
19
  @config = config
@@ -49,6 +50,7 @@ module Pgbus
49
50
  )
50
51
  @circuit_breaker = Pgbus::CircuitBreaker.new(config: config)
51
52
  @queue_lock = QueueLock.new if @single_active_consumer
53
+ @notify_listener = nil
52
54
  end
53
55
 
54
56
  def stats
@@ -66,14 +68,17 @@ module Pgbus
66
68
  }.merge(@pool.metadata)
67
69
  end
68
70
 
71
+ NOTIFY_FALLBACK_POLL_SECONDS = 15
72
+
69
73
  def run
70
74
  setup_signals
71
75
  start_heartbeat
72
76
  resolve_wildcard_queues
77
+ start_notify_listener
73
78
  @lifecycle.transition_to!(:running)
74
79
  Pgbus.logger.info do
75
80
  "[Pgbus] Worker started: queues=#{queues.join(",")} threads=#{threads} " \
76
- "mode=#{@execution_mode} pid=#{::Process.pid}"
81
+ "mode=#{@execution_mode} notify_wakeup=#{notify_wakeup?} pid=#{::Process.pid}"
77
82
  end
78
83
 
79
84
  loop do
@@ -117,7 +122,7 @@ module Pgbus
117
122
  private
118
123
 
119
124
  def claim_and_execute
120
- poll_interval = effective_polling_interval
125
+ poll_interval = wake_timeout
121
126
 
122
127
  idle = @pool.available_capacity
123
128
  return @wake_signal.wait(timeout: poll_interval) if idle <= 0
@@ -147,9 +152,19 @@ module Pgbus
147
152
  # Returns an array of [queue_name, message] pairs so we always know
148
153
  # which queue each message came from.
149
154
  def fetch_messages(qty)
155
+ restore_evicted_queues if queues.empty? && !@wildcard
156
+
150
157
  active_queues = queues.reject { |q| @circuit_breaker.paused?(q) }
151
158
  active_queues = active_queues.select { |q| @queue_lock.try_lock(q) } if @single_active_consumer
152
- return [] if active_queues.empty?
159
+
160
+ if active_queues.empty?
161
+ Pgbus.logger.debug do
162
+ paused = queues.select { |q| @circuit_breaker.paused?(q) }
163
+ "[Pgbus] Worker fetch: all queues filtered — queues=#{queues.join(",")} " \
164
+ "paused=#{paused.join(",")}"
165
+ end
166
+ return []
167
+ end
153
168
 
154
169
  if priority_enabled?
155
170
  fetch_prioritized(active_queues, qty)
@@ -295,6 +310,7 @@ module Pgbus
295
310
  Pgbus.logger.info { "[Pgbus] Wildcard queue '*' resolved to: #{@queues.join(", ")}" } unless @last_wildcard_resolve
296
311
  end
297
312
  @last_wildcard_resolve = monotonic_now
313
+ sync_notify_listener_queues
298
314
  rescue StandardError => e
299
315
  Pgbus.logger.error { "[Pgbus] Failed to resolve wildcard queues: #{e.message} — falling back to default" }
300
316
  @queues = [config.default_queue] unless @last_wildcard_resolve
@@ -321,6 +337,15 @@ module Pgbus
321
337
  end
322
338
  end
323
339
  Pgbus.logger.error { "[Pgbus] Queue table missing: #{error.message}" }
340
+ restore_evicted_queues if @queues.empty? && !@wildcard
341
+ end
342
+
343
+ def restore_evicted_queues
344
+ @queues = @initial_queues.dup
345
+ Pgbus.logger.warn do
346
+ "[Pgbus] Worker queue list was empty after eviction — " \
347
+ "restoring initial queues: #{@queues.join(", ")}"
348
+ end
324
349
  end
325
350
 
326
351
  def detect_zombie(queue_name, message)
@@ -401,6 +426,16 @@ module Pgbus
401
426
  end
402
427
  end
403
428
 
429
+ def notify_wakeup?
430
+ config.respond_to?(:worker_notify_wakeup?) && config.worker_notify_wakeup?
431
+ end
432
+
433
+ def wake_timeout
434
+ return effective_polling_interval unless notify_wakeup? && @notify_listener
435
+
436
+ [effective_polling_interval, config.polling_interval, NOTIFY_FALLBACK_POLL_SECONDS].max
437
+ end
438
+
404
439
  def effective_polling_interval
405
440
  return config.polling_interval if @consumer_priority.zero?
406
441
 
@@ -413,6 +448,43 @@ module Pgbus
413
448
  config.polling_interval
414
449
  end
415
450
 
451
+ def start_notify_listener
452
+ return unless notify_wakeup?
453
+
454
+ @notify_listener = NotifyListener.new(
455
+ physical_queues: physical_queue_names,
456
+ on_wake: -> { @wake_signal.notify! },
457
+ connection_options: config.worker_notify_connection_options,
458
+ health_check_ms: (config.polling_interval * 1000).to_i.clamp(250, 5_000),
459
+ logger: Pgbus.logger
460
+ ).start
461
+ rescue StandardError => e
462
+ @notify_listener = nil
463
+ Pgbus.logger.error do
464
+ "[Pgbus] NotifyListener failed to start, falling back to polling: #{e.class}: #{e.message}"
465
+ end
466
+ end
467
+
468
+ def sync_notify_listener_queues
469
+ return unless @notify_listener
470
+
471
+ desired = physical_queue_names.to_set
472
+ current = @notify_listener.listening_to.to_set { |c| channel_to_physical(c) }
473
+ (desired - current).each { |q| @notify_listener.add_queue(q) }
474
+ (current - desired).each { |q| @notify_listener.remove_queue(q) }
475
+ rescue StandardError => e
476
+ Pgbus.logger.warn { "[Pgbus] NotifyListener queue sync failed: #{e.class}: #{e.message}" }
477
+ end
478
+
479
+ def physical_queue_names
480
+ prefix = "#{config.queue_prefix}_"
481
+ queues.map { |q| "#{prefix}#{q}" }
482
+ end
483
+
484
+ def channel_to_physical(channel)
485
+ channel.delete_prefix(NotifyListener::CHANNEL_PREFIX).delete_suffix(NotifyListener::CHANNEL_SUFFIX)
486
+ end
487
+
416
488
  def start_heartbeat
417
489
  @heartbeat = Heartbeat.new(
418
490
  kind: "worker",
@@ -427,6 +499,7 @@ module Pgbus
427
499
 
428
500
  def shutdown
429
501
  Pgbus.logger.info { "[Pgbus] Worker draining thread pool..." }
502
+ @notify_listener&.stop
430
503
  @pool.shutdown
431
504
  @pool.wait_for_termination(30)
432
505
  @stat_buffer&.stop
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.9.2"
4
+ VERSION = "0.9.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikael Henriksson
@@ -294,6 +294,7 @@ files:
294
294
  - lib/pgbus/process/dispatcher.rb
295
295
  - lib/pgbus/process/heartbeat.rb
296
296
  - lib/pgbus/process/lifecycle.rb
297
+ - lib/pgbus/process/notify_listener.rb
297
298
  - lib/pgbus/process/queue_lock.rb
298
299
  - lib/pgbus/process/signal_handler.rb
299
300
  - lib/pgbus/process/supervisor.rb