pgbus 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/README.md +238 -0
- data/Rakefile +8 -1
- data/app/controllers/pgbus/insights_controller.rb +6 -0
- data/app/helpers/pgbus/streams_helper.rb +115 -0
- data/app/javascript/pgbus/stream_source_element.js +212 -0
- data/app/models/pgbus/stream_stat.rb +118 -0
- data/app/views/pgbus/insights/show.html.erb +59 -0
- data/config/locales/en.yml +16 -0
- data/config/routes.rb +11 -0
- data/lib/generators/pgbus/add_presence_generator.rb +55 -0
- data/lib/generators/pgbus/add_stream_stats_generator.rb +54 -0
- data/lib/generators/pgbus/templates/add_presence.rb.erb +26 -0
- data/lib/generators/pgbus/templates/add_stream_stats.rb.erb +18 -0
- data/lib/pgbus/client/ensure_stream_queue.rb +54 -0
- data/lib/pgbus/client/read_after.rb +100 -0
- data/lib/pgbus/client.rb +6 -0
- data/lib/pgbus/configuration/capsule_dsl.rb +6 -20
- data/lib/pgbus/configuration.rb +126 -14
- data/lib/pgbus/engine.rb +31 -0
- data/lib/pgbus/process/dispatcher.rb +62 -4
- data/lib/pgbus/streams/cursor.rb +71 -0
- data/lib/pgbus/streams/envelope.rb +58 -0
- data/lib/pgbus/streams/filters.rb +98 -0
- data/lib/pgbus/streams/presence.rb +216 -0
- data/lib/pgbus/streams/signed_name.rb +69 -0
- data/lib/pgbus/streams/turbo_broadcastable.rb +53 -0
- data/lib/pgbus/streams/watermark_cache_middleware.rb +28 -0
- data/lib/pgbus/streams.rb +151 -0
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +29 -0
- data/lib/pgbus/web/stream_app.rb +179 -0
- data/lib/pgbus/web/streamer/connection.rb +122 -0
- data/lib/pgbus/web/streamer/dispatcher.rb +467 -0
- data/lib/pgbus/web/streamer/heartbeat.rb +105 -0
- data/lib/pgbus/web/streamer/instance.rb +176 -0
- data/lib/pgbus/web/streamer/io_writer.rb +73 -0
- data/lib/pgbus/web/streamer/listener.rb +228 -0
- data/lib/pgbus/web/streamer/registry.rb +103 -0
- data/lib/pgbus/web/streamer.rb +53 -0
- data/lib/pgbus.rb +28 -0
- data/lib/puma/plugin/pgbus_streams.rb +54 -0
- data/lib/tasks/pgbus_streams.rake +52 -0
- metadata +29 -1
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
module Streamer
|
|
6
|
+
# Composes the Streamer's three background threads (Listener, Dispatcher,
|
|
7
|
+
# Heartbeat) with the shared Registry and dispatch_queue. One Instance
|
|
8
|
+
# per Puma worker. Owns the lifecycle of all three threads and the
|
|
9
|
+
# dedicated PG::Connection for LISTEN.
|
|
10
|
+
#
|
|
11
|
+
# Lifecycle:
|
|
12
|
+
# Instance.new(...) — allocates wiring, does NOT start threads
|
|
13
|
+
# #start — spawns listener/dispatcher/heartbeat in order
|
|
14
|
+
# #register(conn) — enqueues a ConnectMessage into the dispatch queue
|
|
15
|
+
# #shutdown! — sends shutdown sentinel to every connection and
|
|
16
|
+
# stops all threads in reverse order
|
|
17
|
+
#
|
|
18
|
+
# Dependency injection: every collaborator is constructor-injected so
|
|
19
|
+
# tests can swap in fakes without touching Pgbus.configuration. In
|
|
20
|
+
# production the module-level Streamer.current(...) builds all of the
|
|
21
|
+
# defaults from the configuration.
|
|
22
|
+
class Instance
|
|
23
|
+
attr_reader :registry, :listener, :dispatcher, :heartbeat, :dispatch_queue
|
|
24
|
+
|
|
25
|
+
def initialize(
|
|
26
|
+
client: Pgbus.client,
|
|
27
|
+
config: Pgbus.configuration,
|
|
28
|
+
pg_connection: nil,
|
|
29
|
+
logger: Pgbus.logger,
|
|
30
|
+
registry: nil,
|
|
31
|
+
dispatch_queue: nil
|
|
32
|
+
)
|
|
33
|
+
@client = client
|
|
34
|
+
@config = config
|
|
35
|
+
@logger = logger
|
|
36
|
+
@registry = registry || Registry.new
|
|
37
|
+
@dispatch_queue = dispatch_queue || Queue.new
|
|
38
|
+
|
|
39
|
+
@pg_connection = pg_connection || build_pg_connection
|
|
40
|
+
@listener = Listener.new(
|
|
41
|
+
pg_connection: @pg_connection,
|
|
42
|
+
dispatch_queue: @dispatch_queue,
|
|
43
|
+
health_check_ms: @config.streams_listen_health_check_ms,
|
|
44
|
+
logger: @logger
|
|
45
|
+
)
|
|
46
|
+
@dispatcher = Dispatcher.new(
|
|
47
|
+
client: @client,
|
|
48
|
+
registry: @registry,
|
|
49
|
+
listener: @listener,
|
|
50
|
+
dispatch_queue: @dispatch_queue,
|
|
51
|
+
logger: @logger,
|
|
52
|
+
config: @config
|
|
53
|
+
)
|
|
54
|
+
@heartbeat = Heartbeat.new(
|
|
55
|
+
registry: @registry,
|
|
56
|
+
dispatch_queue: @dispatch_queue,
|
|
57
|
+
interval: @config.streams_heartbeat_interval,
|
|
58
|
+
idle_timeout: @config.streams_idle_timeout,
|
|
59
|
+
logger: @logger
|
|
60
|
+
)
|
|
61
|
+
@started = false
|
|
62
|
+
@shutdown_mutex = Mutex.new
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def start
|
|
66
|
+
return if @started
|
|
67
|
+
|
|
68
|
+
@started = true
|
|
69
|
+
@listener.start
|
|
70
|
+
@dispatcher.start
|
|
71
|
+
@heartbeat.start
|
|
72
|
+
self
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Enqueue a new SSE client. The dispatcher picks this up on its next
|
|
76
|
+
# iteration and runs the 5-step race-free replay sequence. The StreamApp
|
|
77
|
+
# calls this right after hijacking the socket.
|
|
78
|
+
#
|
|
79
|
+
# Guarded against the worker-shutdown race: if the request thread
|
|
80
|
+
# arrives here after `shutdown!` has flipped @started, we mark the
|
|
81
|
+
# connection dead and bail out instead of enqueueing a
|
|
82
|
+
# ConnectMessage. Otherwise the message would land on a dispatch
|
|
83
|
+
# queue that no one is draining, leaving the socket outside the
|
|
84
|
+
# registry and outside close_all_connections — the client would
|
|
85
|
+
# never see the pgbus:shutdown sentinel.
|
|
86
|
+
def register(connection)
|
|
87
|
+
return if connection.dead?
|
|
88
|
+
|
|
89
|
+
@shutdown_mutex.synchronize do
|
|
90
|
+
unless @started
|
|
91
|
+
connection.mark_dead!
|
|
92
|
+
return
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
@dispatch_queue << Dispatcher::ConnectMessage.new(connection: connection)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Graceful shutdown for Puma worker restart. Order matters:
|
|
100
|
+
# 1. Heartbeat first (stop writing comments to connections we're
|
|
101
|
+
# about to close)
|
|
102
|
+
# 2. Listener next (stop accepting new NOTIFYs)
|
|
103
|
+
# 3. Dispatcher next (drain the queue; it's now finite because
|
|
104
|
+
# nothing else writes into it)
|
|
105
|
+
# 4. Send pgbus:shutdown sentinel to every connection and close
|
|
106
|
+
# their sockets. We do this AFTER stopping the dispatcher so
|
|
107
|
+
# no one else is writing to these IOs concurrently.
|
|
108
|
+
#
|
|
109
|
+
# Bounded by the configured write deadline per connection; a dead
|
|
110
|
+
# client drops instantly, a slow one stalls for at most write_deadline_ms.
|
|
111
|
+
def shutdown!
|
|
112
|
+
@shutdown_mutex.synchronize do
|
|
113
|
+
return unless @started
|
|
114
|
+
|
|
115
|
+
@started = false
|
|
116
|
+
safely { @heartbeat.stop }
|
|
117
|
+
safely { @listener.stop }
|
|
118
|
+
safely { @dispatcher.stop }
|
|
119
|
+
close_all_connections
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
|
|
125
|
+
def safely
|
|
126
|
+
yield
|
|
127
|
+
rescue StandardError => e
|
|
128
|
+
@logger.warn { "[Pgbus::Streamer::Instance] component stop raised: #{e.class}: #{e.message}" }
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def close_all_connections
|
|
132
|
+
sentinel_bytes = Pgbus::Streams::Envelope.message(
|
|
133
|
+
id: 0,
|
|
134
|
+
event: "pgbus:shutdown",
|
|
135
|
+
data: '{"reason":"worker_restart"}'
|
|
136
|
+
)
|
|
137
|
+
deadline_ms = @config.streams_write_deadline_ms
|
|
138
|
+
|
|
139
|
+
@registry.each_connection do |connection|
|
|
140
|
+
# IoWriter holds the connection's mutex, so this write is
|
|
141
|
+
# serialised against any write the dispatcher/heartbeat
|
|
142
|
+
# might still be performing if their stop hadn't fully
|
|
143
|
+
# returned yet.
|
|
144
|
+
safely { IoWriter.write(connection, sentinel_bytes, deadline_ms: deadline_ms) }
|
|
145
|
+
safely { connection.close }
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def build_pg_connection
|
|
150
|
+
require "pg" unless defined?(::PG::Connection)
|
|
151
|
+
opts = @config.connection_options
|
|
152
|
+
case opts
|
|
153
|
+
when String then ::PG.connect(opts)
|
|
154
|
+
when Hash then ::PG.connect(**opts)
|
|
155
|
+
when Proc
|
|
156
|
+
# The Proc branch in connection_options typically returns
|
|
157
|
+
# ActiveRecord::Base.connection.raw_connection — a pooled
|
|
158
|
+
# AR connection. That's fatal for the streamer: LISTEN and
|
|
159
|
+
# wait_for_notify bind the session to the connection object,
|
|
160
|
+
# so if AR's pool hands the same raw_connection to another
|
|
161
|
+
# thread mid-wait, we get concurrent libpq calls → segfault
|
|
162
|
+
# or result corruption. Bail out with a clear error instead
|
|
163
|
+
# of silently segfaulting in production.
|
|
164
|
+
raise Pgbus::ConfigurationError,
|
|
165
|
+
"Streamer cannot use a Proc-based connection_options. " \
|
|
166
|
+
"Set Pgbus.configuration.database_url or connection_params " \
|
|
167
|
+
"so the streamer owns its own dedicated PG::Connection for LISTEN/NOTIFY."
|
|
168
|
+
else
|
|
169
|
+
raise Pgbus::ConfigurationError,
|
|
170
|
+
"Cannot build streamer PG connection from #{opts.class}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
module Streamer
|
|
6
|
+
# Non-blocking IO writer with a per-call deadline, serialised through the
|
|
7
|
+
# connection's own mutex. This is the bug-fix for puma/puma#576: a naive
|
|
8
|
+
# `io.write(bytes)` on a dead or slow SSE client deadlocks the dispatcher
|
|
9
|
+
# thread until the OS closes the socket (which can take minutes under a
|
|
10
|
+
# TCP keepalive). The message_bus gem hit this in production; we copy the
|
|
11
|
+
# pattern.
|
|
12
|
+
#
|
|
13
|
+
# The write loop uses write_nonblock + IO.select so a slow client at most
|
|
14
|
+
# stalls *its own* mutex-protected write for `deadline_ms`, never the
|
|
15
|
+
# dispatcher or heartbeat thread. When the deadline expires with bytes
|
|
16
|
+
# still pending, we return :blocked; the caller (Connection#enqueue or
|
|
17
|
+
# Connection#write_comment) translates that into mark_dead!, and the
|
|
18
|
+
# heartbeat sweep eventually unregisters the connection.
|
|
19
|
+
#
|
|
20
|
+
# Returns:
|
|
21
|
+
# :ok — all bytes written
|
|
22
|
+
# :closed — peer gone (EPIPE / ECONNRESET / IOError on closed IO)
|
|
23
|
+
# :blocked — deadline hit before all bytes could be written
|
|
24
|
+
module IoWriter
|
|
25
|
+
def self.write(connection, bytes, deadline_ms:)
|
|
26
|
+
connection.mutex.synchronize do
|
|
27
|
+
write_all(connection.io, bytes, deadline_ms)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.write_all(io, bytes, deadline_ms)
|
|
32
|
+
deadline = monotonic + (deadline_ms / 1000.0)
|
|
33
|
+
offset = 0
|
|
34
|
+
remaining = bytes.bytesize
|
|
35
|
+
|
|
36
|
+
while remaining.positive?
|
|
37
|
+
written = attempt_write(io, bytes, offset, remaining, deadline)
|
|
38
|
+
return written if written.is_a?(Symbol) # :blocked / :closed
|
|
39
|
+
|
|
40
|
+
offset += written
|
|
41
|
+
remaining -= written
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
:ok
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def self.attempt_write(io, bytes, offset, remaining, deadline)
|
|
48
|
+
chunk = bytes.byteslice(offset, remaining)
|
|
49
|
+
io.write_nonblock(chunk)
|
|
50
|
+
rescue IO::WaitWritable
|
|
51
|
+
wait = deadline - monotonic
|
|
52
|
+
return :blocked if wait <= 0
|
|
53
|
+
|
|
54
|
+
# io.wait_writable is fiber-scheduler-friendly (Falcon v1.1) and
|
|
55
|
+
# functionally identical to IO.select on threaded Puma.
|
|
56
|
+
ready = io.wait_writable(wait)
|
|
57
|
+
return :blocked if ready.nil?
|
|
58
|
+
|
|
59
|
+
retry
|
|
60
|
+
rescue Errno::EPIPE, Errno::ECONNRESET, IOError
|
|
61
|
+
:closed
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.monotonic
|
|
65
|
+
# Qualify ::Process because Pgbus::Process shadows it in this namespace.
|
|
66
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private_class_method :write_all, :attempt_write, :monotonic
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
module Streamer
|
|
6
|
+
# Owns a single dedicated PG::Connection running LISTEN against every
|
|
7
|
+
# stream channel currently serving at least one SSE subscriber. On
|
|
8
|
+
# NOTIFY, posts a WakeMessage into the dispatch queue; the Dispatcher
|
|
9
|
+
# thread does the actual read_after + fanout.
|
|
10
|
+
#
|
|
11
|
+
# Threading:
|
|
12
|
+
# - #start spawns ONE listener thread
|
|
13
|
+
# - ensure_listening / remove_listening are called from the
|
|
14
|
+
# dispatcher thread, which means the listener thread itself is
|
|
15
|
+
# only running `wait_for_notify` — all LISTEN/UNLISTEN SQL goes
|
|
16
|
+
# through a command queue that the listener thread drains between
|
|
17
|
+
# notifies
|
|
18
|
+
# - #stop joins the thread cleanly
|
|
19
|
+
#
|
|
20
|
+
# Health check: `wait_for_notify(timeout)` returns nil on timeout. When
|
|
21
|
+
# it does, the listener runs `SELECT 1` as a TCP keepalive. If that
|
|
22
|
+
# raises, the connection is reset (`conn.reset`) and every channel in
|
|
23
|
+
# `@listening_to` is re-LISTENed. This is the fix for design doc §11 #1
|
|
24
|
+
# (silently dropped LISTEN connections from NAT / PG restart / network
|
|
25
|
+
# blips).
|
|
26
|
+
#
|
|
27
|
+
# NOTIFY channel naming (from pgmq_v1.11.0.sql:1634):
|
|
28
|
+
# PG_NOTIFY('pgmq.' || TG_TABLE_NAME || '.' || TG_OP, NULL)
|
|
29
|
+
# For a queue named `pgbus_stream_chat` the trigger table is
|
|
30
|
+
# `q_pgbus_stream_chat`, so the channel is `pgmq.q_pgbus_stream_chat.INSERT`.
|
|
31
|
+
class Listener
|
|
32
|
+
WakeMessage = Data.define(:queue_name)
|
|
33
|
+
|
|
34
|
+
CHANNEL_PREFIX = "pgmq.q_"
|
|
35
|
+
CHANNEL_SUFFIX = ".INSERT"
|
|
36
|
+
|
|
37
|
+
attr_reader :listening_to
|
|
38
|
+
|
|
39
|
+
def initialize(pg_connection:, dispatch_queue:, health_check_ms:, logger: Pgbus.logger)
|
|
40
|
+
@conn = pg_connection
|
|
41
|
+
@dispatch_queue = dispatch_queue
|
|
42
|
+
@health_check_ms = health_check_ms
|
|
43
|
+
@logger = logger
|
|
44
|
+
@listening_to = Set.new
|
|
45
|
+
@commands = Queue.new
|
|
46
|
+
@running = false
|
|
47
|
+
@thread = nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def start
|
|
51
|
+
return if @running
|
|
52
|
+
|
|
53
|
+
@running = true
|
|
54
|
+
@thread = Thread.new { run_loop }
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def stop
|
|
59
|
+
return unless @running
|
|
60
|
+
|
|
61
|
+
@running = false
|
|
62
|
+
@commands << [:stop]
|
|
63
|
+
# Interrupt the blocking wait_for_notify by closing the PG
|
|
64
|
+
# connection. Without this, the listener thread would sit
|
|
65
|
+
# inside wait_for_notify until a NOTIFY arrived, which may
|
|
66
|
+
# never happen. Closing the socket raises PG::Error inside
|
|
67
|
+
# wait_for_notify; our rescue clause sees @running == false
|
|
68
|
+
# on the next iteration and exits cleanly.
|
|
69
|
+
begin
|
|
70
|
+
@conn.close if @conn.respond_to?(:close)
|
|
71
|
+
rescue StandardError
|
|
72
|
+
# best effort — connection may already be gone
|
|
73
|
+
end
|
|
74
|
+
@thread&.join(5)
|
|
75
|
+
@thread = nil
|
|
76
|
+
self
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Synchronously enable LISTEN on a queue's NOTIFY channel.
|
|
80
|
+
# Blocks until the listener thread has actually executed the
|
|
81
|
+
# LISTEN SQL — this matters because Dispatcher#handle_connect
|
|
82
|
+
# needs the LISTEN to be active *before* it issues read_after,
|
|
83
|
+
# otherwise a broadcast committed in the gap would be neither
|
|
84
|
+
# in the read_after result nor delivered as a WakeMessage.
|
|
85
|
+
#
|
|
86
|
+
# The wait is bounded by the next wait_for_notify timeout
|
|
87
|
+
# (streams_listen_health_check_ms, default 250ms) plus a small
|
|
88
|
+
# margin so a stuck listener can't hang the dispatcher forever.
|
|
89
|
+
def ensure_listening(queue_name)
|
|
90
|
+
ack = Queue.new
|
|
91
|
+
@commands << [:listen, queue_name, ack]
|
|
92
|
+
ack.pop(timeout: ack_timeout)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Asynchronous: lazy GC of LISTENs whose subscriber count
|
|
96
|
+
# has dropped to zero. No correctness path depends on this
|
|
97
|
+
# completing before the caller proceeds, so we don't block.
|
|
98
|
+
def remove_listening(queue_name)
|
|
99
|
+
@commands << [:unlisten, queue_name]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
def run_loop
|
|
105
|
+
loop do
|
|
106
|
+
break unless @running
|
|
107
|
+
|
|
108
|
+
drain_commands
|
|
109
|
+
break unless @running
|
|
110
|
+
|
|
111
|
+
timeout_s = @health_check_ms / 1000.0
|
|
112
|
+
begin
|
|
113
|
+
@conn.wait_for_notify(timeout_s) do |channel, _pid, _payload|
|
|
114
|
+
handle_notify(channel)
|
|
115
|
+
end || run_health_check
|
|
116
|
+
rescue PG::Error => e
|
|
117
|
+
break unless @running
|
|
118
|
+
|
|
119
|
+
@logger.warn { "[Pgbus::Streamer::Listener] PG error (#{e.class}: #{e.message}) — reconnecting" }
|
|
120
|
+
reconnect!
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
ensure
|
|
124
|
+
safe_unlisten_all
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def drain_commands
|
|
128
|
+
loop do
|
|
129
|
+
cmd = @commands.pop(true)
|
|
130
|
+
case cmd[0]
|
|
131
|
+
when :listen
|
|
132
|
+
ack = cmd[2]
|
|
133
|
+
begin
|
|
134
|
+
do_listen(cmd[1])
|
|
135
|
+
ensure
|
|
136
|
+
# Always ack so the caller is never left blocked, even
|
|
137
|
+
# if do_listen raised. The caller can still detect
|
|
138
|
+
# failure by checking @listening_to or by other means;
|
|
139
|
+
# we just promise not to deadlock the dispatcher.
|
|
140
|
+
ack&.push(:done)
|
|
141
|
+
end
|
|
142
|
+
when :unlisten then do_unlisten(cmd[1])
|
|
143
|
+
when :stop then @running = false
|
|
144
|
+
return
|
|
145
|
+
end
|
|
146
|
+
rescue ThreadError
|
|
147
|
+
# empty queue
|
|
148
|
+
return
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Caller wait budget for ensure_listening's ack. The listener
|
|
153
|
+
# thread will process the command at most one wait_for_notify
|
|
154
|
+
# cycle from now (bounded by health_check_ms); add a small
|
|
155
|
+
# safety margin so the dispatcher fails loud rather than
|
|
156
|
+
# hanging if the listener thread is dead.
|
|
157
|
+
def ack_timeout
|
|
158
|
+
(@health_check_ms / 1000.0) + 1.0
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def do_listen(queue_name)
|
|
162
|
+
channel = channel_for(queue_name)
|
|
163
|
+
return if @listening_to.include?(channel)
|
|
164
|
+
|
|
165
|
+
@conn.exec(%(LISTEN "#{channel}"))
|
|
166
|
+
@listening_to.add(channel)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def do_unlisten(queue_name)
|
|
170
|
+
channel = channel_for(queue_name)
|
|
171
|
+
return unless @listening_to.include?(channel)
|
|
172
|
+
|
|
173
|
+
@conn.exec(%(UNLISTEN "#{channel}"))
|
|
174
|
+
@listening_to.delete(channel)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def handle_notify(channel)
|
|
178
|
+
queue_name = queue_name_from(channel)
|
|
179
|
+
return unless queue_name
|
|
180
|
+
|
|
181
|
+
@dispatch_queue << WakeMessage.new(queue_name: queue_name)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def run_health_check
|
|
185
|
+
@conn.exec("SELECT 1")
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def reconnect!
|
|
189
|
+
@conn.reset
|
|
190
|
+
# Don't clear @listening_to until the new set is built. If a
|
|
191
|
+
# mid-loop LISTEN raises, we keep the original set so the
|
|
192
|
+
# next reconnect cycle still knows which channels need to
|
|
193
|
+
# come back. The previous version cleared first and lost
|
|
194
|
+
# any channels not yet retried on a transient error.
|
|
195
|
+
to_relisten = @listening_to.to_a
|
|
196
|
+
new_listening = Set.new
|
|
197
|
+
to_relisten.each do |channel|
|
|
198
|
+
@conn.exec(%(LISTEN "#{channel}"))
|
|
199
|
+
new_listening.add(channel)
|
|
200
|
+
end
|
|
201
|
+
@listening_to = new_listening
|
|
202
|
+
rescue PG::Error => e
|
|
203
|
+
@logger.error { "[Pgbus::Streamer::Listener] reconnect failed: #{e.class}: #{e.message}" }
|
|
204
|
+
sleep 0.5
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def safe_unlisten_all
|
|
208
|
+
@listening_to.each do |channel|
|
|
209
|
+
@conn.exec(%(UNLISTEN "#{channel}"))
|
|
210
|
+
rescue PG::Error
|
|
211
|
+
# connection may be dead; nothing we can do
|
|
212
|
+
end
|
|
213
|
+
@listening_to.clear
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def channel_for(queue_name)
|
|
217
|
+
"#{CHANNEL_PREFIX}#{queue_name}#{CHANNEL_SUFFIX}"
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def queue_name_from(channel)
|
|
221
|
+
return nil unless channel.start_with?(CHANNEL_PREFIX) && channel.end_with?(CHANNEL_SUFFIX)
|
|
222
|
+
|
|
223
|
+
channel[CHANNEL_PREFIX.length..-(CHANNEL_SUFFIX.length + 1)]
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
module Streamer
|
|
6
|
+
# Worker-local in-memory registry of SSE connections indexed by stream
|
|
7
|
+
# name and connection id. Thread-safe via a single mutex. Reads return
|
|
8
|
+
# snapshots so iterators never hold the lock.
|
|
9
|
+
#
|
|
10
|
+
# Registry operations are O(1) under contention (mutex-protected hash
|
|
11
|
+
# lookups), and iteration is O(n) over a snapshot. The data structure is
|
|
12
|
+
# deliberately boring — the interesting parts of the streamer (LISTEN
|
|
13
|
+
# multiplexing, write scheduling, replay race handling) live elsewhere.
|
|
14
|
+
class Registry
|
|
15
|
+
def initialize
|
|
16
|
+
@mutex = Mutex.new
|
|
17
|
+
@by_stream = {}
|
|
18
|
+
@by_id = {}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def register(connection)
|
|
22
|
+
@mutex.synchronize do
|
|
23
|
+
existing = @by_id[connection.id]
|
|
24
|
+
return if existing.equal?(connection)
|
|
25
|
+
|
|
26
|
+
# Registration of a new object under an existing id is a
|
|
27
|
+
# defensive path — SecureRandom.hex(8) collisions are
|
|
28
|
+
# astronomical, but the Registry's invariant is "@by_stream
|
|
29
|
+
# only contains objects that are also in @by_id", so we
|
|
30
|
+
# must scrub the old entry from its stream index before
|
|
31
|
+
# overwriting. Otherwise connections_for(stream) would
|
|
32
|
+
# return orphaned objects and writes would go nowhere.
|
|
33
|
+
evict_from_stream(existing) if existing
|
|
34
|
+
|
|
35
|
+
@by_id[connection.id] = connection
|
|
36
|
+
(@by_stream[connection.stream_name] ||= Set.new).add(connection)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def unregister(connection)
|
|
41
|
+
@mutex.synchronize do
|
|
42
|
+
existing = @by_id.delete(connection.id)
|
|
43
|
+
return unless existing
|
|
44
|
+
|
|
45
|
+
set = @by_stream[existing.stream_name]
|
|
46
|
+
next unless set
|
|
47
|
+
|
|
48
|
+
set.delete(existing)
|
|
49
|
+
@by_stream.delete(existing.stream_name) if set.empty?
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def lookup(id)
|
|
54
|
+
@mutex.synchronize { @by_id[id] }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns a snapshot Array of connections on the given stream.
|
|
58
|
+
# Mutating the result has no effect on the registry.
|
|
59
|
+
def connections_for(stream_name)
|
|
60
|
+
@mutex.synchronize do
|
|
61
|
+
set = @by_stream[stream_name]
|
|
62
|
+
set ? set.to_a : []
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Snapshot list of stream names with at least one registered connection.
|
|
67
|
+
def streams
|
|
68
|
+
@mutex.synchronize { @by_stream.keys.dup }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def empty?(stream_name)
|
|
72
|
+
@mutex.synchronize do
|
|
73
|
+
set = @by_stream[stream_name]
|
|
74
|
+
set.nil? || set.empty?
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def size
|
|
79
|
+
@mutex.synchronize { @by_id.size }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Yields every registered connection across all streams. The iteration
|
|
83
|
+
# walks a snapshot so the block may safely call back into register/
|
|
84
|
+
# unregister without risk of deadlock or skipped items.
|
|
85
|
+
def each_connection(&)
|
|
86
|
+
snapshot = @mutex.synchronize { @by_id.values.dup }
|
|
87
|
+
snapshot.each(&)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Must be called while holding @mutex.
|
|
93
|
+
def evict_from_stream(connection)
|
|
94
|
+
set = @by_stream[connection.stream_name]
|
|
95
|
+
return unless set
|
|
96
|
+
|
|
97
|
+
set.delete(connection)
|
|
98
|
+
@by_stream.delete(connection.stream_name) if set.empty?
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
# The worker-local coordinator that owns SSE connections, one PG LISTEN
|
|
6
|
+
# session, and the dispatch/heartbeat threads. Lazily created on the
|
|
7
|
+
# first SSE connection to a Puma worker (or eagerly in tests). There is
|
|
8
|
+
# exactly one Instance per Puma worker process; the module-level
|
|
9
|
+
# accessors memoise it.
|
|
10
|
+
#
|
|
11
|
+
# This is NOT a Singleton in the GoF sense — tests are free to construct
|
|
12
|
+
# throwaway Instances directly and dependency-inject everything. The
|
|
13
|
+
# `current` / `reset!` helpers exist purely so the Rack StreamApp can
|
|
14
|
+
# share an instance across requests within a worker without passing it
|
|
15
|
+
# through every method call.
|
|
16
|
+
module Streamer
|
|
17
|
+
# Module-level mutex protecting `@current`. Without this, two
|
|
18
|
+
# concurrent first-callers inside a multi-threaded Puma worker
|
|
19
|
+
# can both build and start an Instance, leaking listener,
|
|
20
|
+
# dispatcher, and heartbeat threads plus a PG connection. Same
|
|
21
|
+
# gap would let `reset!` overlap teardown with a fresh
|
|
22
|
+
# replacement instance.
|
|
23
|
+
@current_mutex = Mutex.new
|
|
24
|
+
|
|
25
|
+
class << self
|
|
26
|
+
# Returns the worker-local instance, creating it on first call.
|
|
27
|
+
# `factory_opts` are passed to `Instance.new` the first time.
|
|
28
|
+
def current(**factory_opts)
|
|
29
|
+
@current_mutex.synchronize do
|
|
30
|
+
@current ||= Instance.new(**factory_opts).tap(&:start)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Explicitly set the current instance — used by tests and by the
|
|
35
|
+
# Puma plugin to inject a pre-built instance.
|
|
36
|
+
def current=(instance)
|
|
37
|
+
@current_mutex.synchronize { @current = instance }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Tear down the current instance and clear the slot. Called by the
|
|
41
|
+
# Puma shutdown hook (Phase 4.4) and by tests between examples.
|
|
42
|
+
def reset!
|
|
43
|
+
instance = nil
|
|
44
|
+
@current_mutex.synchronize do
|
|
45
|
+
instance = @current
|
|
46
|
+
@current = nil
|
|
47
|
+
end
|
|
48
|
+
instance&.shutdown!
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
data/lib/pgbus.rb
CHANGED
|
@@ -39,6 +39,12 @@ module Pgbus
|
|
|
39
39
|
)
|
|
40
40
|
loader.ignore("#{__dir__}/generators")
|
|
41
41
|
loader.ignore("#{__dir__}/active_job")
|
|
42
|
+
# lib/puma/plugin/pgbus_streams.rb is a Puma plugin — it's required
|
|
43
|
+
# explicitly by the user from config/puma.rb via `plugin :pgbus_streams`.
|
|
44
|
+
# Without this ignore, Zeitwerk scans lib/puma/ under the pgbus loader
|
|
45
|
+
# root and tries to autoload Puma::Plugin, which collides with the real
|
|
46
|
+
# Puma::Plugin class defined by the puma gem itself.
|
|
47
|
+
loader.ignore("#{__dir__}/puma")
|
|
42
48
|
loader
|
|
43
49
|
end
|
|
44
50
|
end
|
|
@@ -78,10 +84,32 @@ module Pgbus
|
|
|
78
84
|
@client ||= Client.new(configuration)
|
|
79
85
|
end
|
|
80
86
|
|
|
87
|
+
# Entry point for the streams subsystem — `Pgbus.stream(name).broadcast(html)`
|
|
88
|
+
# or `Pgbus.stream(@order).current_msg_id`. Defined on Pgbus itself rather
|
|
89
|
+
# than inside lib/pgbus/streams.rb because that file is only Zeitwerk-loaded
|
|
90
|
+
# when Pgbus::Streams::Stream is first referenced — the chicken-and-egg
|
|
91
|
+
# problem means `Pgbus.stream(...)` would be undefined on the first call.
|
|
92
|
+
# Referencing Streams::Stream inside the method body forces Zeitwerk to
|
|
93
|
+
# load lib/pgbus/streams.rb lazily on first use, which is fine.
|
|
94
|
+
#
|
|
95
|
+
# Caches Stream instances by logical name so high-frequency callers
|
|
96
|
+
# (e.g. Turbo::StreamsChannel.broadcast_stream_to inside an
|
|
97
|
+
# after_update_commit callback firing 1000x/sec) don't allocate a new
|
|
98
|
+
# Stream + Mutex per broadcast. The cache is process-local; reset!
|
|
99
|
+
# clears it. The cache key is the resolved name string, not the raw
|
|
100
|
+
# streamables, so `Pgbus.stream(@order)` and `Pgbus.stream(@order)`
|
|
101
|
+
# in the same process return the same instance.
|
|
102
|
+
def stream(streamables)
|
|
103
|
+
name = Streams::Stream.name_from(streamables)
|
|
104
|
+
@stream_cache ||= Concurrent::Map.new
|
|
105
|
+
@stream_cache.compute_if_absent(name) { Streams::Stream.new(streamables) }
|
|
106
|
+
end
|
|
107
|
+
|
|
81
108
|
def reset!
|
|
82
109
|
@client&.close
|
|
83
110
|
@client = nil
|
|
84
111
|
@configuration = nil
|
|
112
|
+
@stream_cache = nil
|
|
85
113
|
end
|
|
86
114
|
|
|
87
115
|
# Discard the inherited PGMQ client after fork.
|