pgbus 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/README.md +238 -0
- data/Rakefile +8 -1
- data/app/controllers/pgbus/insights_controller.rb +6 -0
- data/app/helpers/pgbus/streams_helper.rb +115 -0
- data/app/javascript/pgbus/stream_source_element.js +212 -0
- data/app/models/pgbus/stream_stat.rb +118 -0
- data/app/views/pgbus/insights/show.html.erb +59 -0
- data/config/locales/en.yml +16 -0
- data/config/routes.rb +11 -0
- data/lib/generators/pgbus/add_presence_generator.rb +55 -0
- data/lib/generators/pgbus/add_stream_stats_generator.rb +54 -0
- data/lib/generators/pgbus/templates/add_presence.rb.erb +26 -0
- data/lib/generators/pgbus/templates/add_stream_stats.rb.erb +18 -0
- data/lib/pgbus/client/ensure_stream_queue.rb +54 -0
- data/lib/pgbus/client/read_after.rb +100 -0
- data/lib/pgbus/client.rb +6 -0
- data/lib/pgbus/configuration/capsule_dsl.rb +6 -20
- data/lib/pgbus/configuration.rb +126 -14
- data/lib/pgbus/engine.rb +31 -0
- data/lib/pgbus/process/dispatcher.rb +62 -4
- data/lib/pgbus/streams/cursor.rb +71 -0
- data/lib/pgbus/streams/envelope.rb +58 -0
- data/lib/pgbus/streams/filters.rb +98 -0
- data/lib/pgbus/streams/presence.rb +216 -0
- data/lib/pgbus/streams/signed_name.rb +69 -0
- data/lib/pgbus/streams/turbo_broadcastable.rb +53 -0
- data/lib/pgbus/streams/watermark_cache_middleware.rb +28 -0
- data/lib/pgbus/streams.rb +151 -0
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +29 -0
- data/lib/pgbus/web/stream_app.rb +179 -0
- data/lib/pgbus/web/streamer/connection.rb +122 -0
- data/lib/pgbus/web/streamer/dispatcher.rb +467 -0
- data/lib/pgbus/web/streamer/heartbeat.rb +105 -0
- data/lib/pgbus/web/streamer/instance.rb +176 -0
- data/lib/pgbus/web/streamer/io_writer.rb +73 -0
- data/lib/pgbus/web/streamer/listener.rb +228 -0
- data/lib/pgbus/web/streamer/registry.rb +103 -0
- data/lib/pgbus/web/streamer.rb +53 -0
- data/lib/pgbus.rb +28 -0
- data/lib/puma/plugin/pgbus_streams.rb +54 -0
- data/lib/tasks/pgbus_streams.rake +52 -0
- metadata +29 -1
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
module Streamer
|
|
6
|
+
# The single-threaded consumer of the shared dispatch_queue. Drains
|
|
7
|
+
# three kinds of messages:
|
|
8
|
+
#
|
|
9
|
+
# - Listener::WakeMessage(queue_name:) — a NOTIFY fired; read_after
|
|
10
|
+
# the minimum cursor and fan out to every connection on the stream
|
|
11
|
+
# (both registered and in-flight connects).
|
|
12
|
+
#
|
|
13
|
+
# - ConnectMessage(connection:) — a new SSE client connected. Runs
|
|
14
|
+
# the 5-step race-free replay sequence from design doc §6.5:
|
|
15
|
+
# 1. ensure_listening on the stream (so future WakeMessages
|
|
16
|
+
# deliver to the in-flight buffer)
|
|
17
|
+
# 2. register an in-flight buffer keyed by connection
|
|
18
|
+
# 3. read_after(connection.since_id) + enqueue to connection
|
|
19
|
+
# 4. drain the in-flight buffer into the connection (dedup is
|
|
20
|
+
# handled by Connection#enqueue's cursor check)
|
|
21
|
+
# 5. move the connection from in-flight to the main Registry
|
|
22
|
+
#
|
|
23
|
+
# - DisconnectMessage(connection:) — unregister and, if the stream
|
|
24
|
+
# now has zero subscribers, eventually unlisten (lazy GC,
|
|
25
|
+
# implemented in the Streamer sweep rather than here).
|
|
26
|
+
#
|
|
27
|
+
# All state ownership lives on this one thread: the registry is
|
|
28
|
+
# thread-safe (Phase 2.1) but the in-flight buffers are local to
|
|
29
|
+
# the Dispatcher and accessed only from this thread, so no locks.
|
|
30
|
+
class Dispatcher
|
|
31
|
+
WakeMessage = Listener::WakeMessage
|
|
32
|
+
ConnectMessage = Data.define(:connection)
|
|
33
|
+
DisconnectMessage = Data.define(:connection)
|
|
34
|
+
|
|
35
|
+
# An unwrapped stream broadcast. Similar shape to
|
|
36
|
+
# Pgbus::Client::ReadAfter::Envelope (msg_id + payload) so
|
|
37
|
+
# Connection#enqueue can consume either type via duck typing,
|
|
38
|
+
# but adds the `visible_to` label carried through from
|
|
39
|
+
# Pgbus::Streams::Stream#broadcast. The Dispatcher uses
|
|
40
|
+
# visible_to to decide per-connection delivery; Connection
|
|
41
|
+
# never sees the field.
|
|
42
|
+
StreamEnvelope = Data.define(:msg_id, :enqueued_at, :payload, :source, :visible_to)
|
|
43
|
+
|
|
44
|
+
DEFAULT_READ_LIMIT = 500
|
|
45
|
+
|
|
46
|
+
def initialize(client:, registry:, listener:, dispatch_queue:,
|
|
47
|
+
logger: Pgbus.logger, read_limit: DEFAULT_READ_LIMIT,
|
|
48
|
+
filters: nil, config: nil)
|
|
49
|
+
@client = client
|
|
50
|
+
@registry = registry
|
|
51
|
+
@listener = listener
|
|
52
|
+
@queue = dispatch_queue
|
|
53
|
+
@logger = logger
|
|
54
|
+
@read_limit = read_limit
|
|
55
|
+
# Filters default to the process-wide registry so production
|
|
56
|
+
# code picks up whatever was registered at boot. Tests inject
|
|
57
|
+
# a fresh Filters instance to avoid cross-test pollution.
|
|
58
|
+
@filters = filters || Pgbus::Streams.filters
|
|
59
|
+
# Config is injected so the Dispatcher can read
|
|
60
|
+
# `streams_stats_enabled` without reaching into the global
|
|
61
|
+
# Pgbus.configuration at every call site. Tests pass a
|
|
62
|
+
# throwaway config to flip the flag independently of the
|
|
63
|
+
# process-wide setting. Falls back to the global config
|
|
64
|
+
# for production call sites that don't specify one.
|
|
65
|
+
@config = config || Pgbus.configuration
|
|
66
|
+
# stream_name → Array<[connection, Array<Envelope>]>
|
|
67
|
+
@in_flight = Hash.new { |h, k| h[k] = [] }
|
|
68
|
+
# PGMQ full table name (pgbus_<prefix>_<name>) → logical stream
|
|
69
|
+
# name. Populated on connect so handle_wake can translate
|
|
70
|
+
# Listener::WakeMessage#queue_name (a full table name, because
|
|
71
|
+
# that's what PG NOTIFY channels carry) into the logical name
|
|
72
|
+
# used by Registry and the in-flight buffer.
|
|
73
|
+
@full_to_logical = {}
|
|
74
|
+
# Per-connection "scanned" cursor — the highest msg_id this
|
|
75
|
+
# Dispatcher has examined for a given connection, whether or
|
|
76
|
+
# not it was actually delivered. Needed because an audience
|
|
77
|
+
# filter can drop an entire read_after batch; without a
|
|
78
|
+
# separate scan cursor the dispatcher would re-read the
|
|
79
|
+
# same hidden window forever and starve later public
|
|
80
|
+
# messages. Connection#last_msg_id_sent still drives the
|
|
81
|
+
# client-visible Last-Event-ID; this cursor only feeds
|
|
82
|
+
# minimum_cursor so subsequent read_after calls advance.
|
|
83
|
+
@scanned_cursor = {}
|
|
84
|
+
# @running is a soft hint, not the authoritative stop signal.
|
|
85
|
+
# The :__stop__ sentinel pushed onto @queue is what actually
|
|
86
|
+
# terminates run_loop — even if a torn read of @running ever
|
|
87
|
+
# happened (it cannot under MRI's GVL for a single-word
|
|
88
|
+
# boolean assignment), the sentinel break would still fire.
|
|
89
|
+
@running = false
|
|
90
|
+
@thread = nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def start
|
|
94
|
+
return if @running
|
|
95
|
+
|
|
96
|
+
@running = true
|
|
97
|
+
@thread = Thread.new { run_loop }
|
|
98
|
+
self
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def stop
|
|
102
|
+
return unless @running
|
|
103
|
+
|
|
104
|
+
@running = false
|
|
105
|
+
@queue << :__stop__
|
|
106
|
+
if @thread && @thread.join(5).nil?
|
|
107
|
+
# join returned nil → 5s timeout. The thread is still running
|
|
108
|
+
# (probably blocked inside an unresponsive client write or a
|
|
109
|
+
# slow Postgres query). We log and clear the reference rather
|
|
110
|
+
# than calling Thread#kill, which leaves IO state corrupt.
|
|
111
|
+
# The orphaned thread will exit on its own once the blocking
|
|
112
|
+
# call returns and it sees @running == false on the next loop.
|
|
113
|
+
@logger.warn { "[Pgbus::Streamer::Dispatcher] thread did not terminate within 5s" }
|
|
114
|
+
end
|
|
115
|
+
@thread = nil
|
|
116
|
+
self
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
def run_loop
|
|
122
|
+
while @running
|
|
123
|
+
msg = @queue.pop
|
|
124
|
+
break if msg == :__stop__
|
|
125
|
+
|
|
126
|
+
# Wake coalescing: if a WakeMessage arrives, opportunistically
|
|
127
|
+
# drain consecutive same-stream wakes from the queue. Without
|
|
128
|
+
# this, N broadcasts in rapid succession produce N
|
|
129
|
+
# WakeMessages, each running its own read_after roundtrip
|
|
130
|
+
# even though one read_after with the lowest cursor would
|
|
131
|
+
# have pulled all N messages. The drain is bounded by the
|
|
132
|
+
# queue's current contents — once we hit a non-Wake or a
|
|
133
|
+
# different stream, we stop and let the regular path handle
|
|
134
|
+
# the rest.
|
|
135
|
+
if msg.is_a?(WakeMessage)
|
|
136
|
+
wakes, trailing = drain_wakes_for(msg)
|
|
137
|
+
wakes.each { |w| handle(w) }
|
|
138
|
+
handle(trailing) if trailing
|
|
139
|
+
else
|
|
140
|
+
handle(msg)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
rescue StandardError => e
|
|
144
|
+
@logger.error { "[Pgbus::Streamer::Dispatcher] crashed: #{e.class}: #{e.message}" }
|
|
145
|
+
raise
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Coalesces consecutive WakeMessages from the queue into one
|
|
149
|
+
# per unique stream. Returns [coalesced_wakes, trailing_msg]
|
|
150
|
+
# where trailing_msg is the first non-WakeMessage we hit (or
|
|
151
|
+
# nil if the queue is empty after the wakes). The caller
|
|
152
|
+
# processes the wakes first, then the trailing message — same
|
|
153
|
+
# order as the original queue, but with redundant wakes folded.
|
|
154
|
+
def drain_wakes_for(first)
|
|
155
|
+
seen = Set.new([first.queue_name])
|
|
156
|
+
coalesced = [first]
|
|
157
|
+
loop do
|
|
158
|
+
begin
|
|
159
|
+
peek = @queue.pop(true)
|
|
160
|
+
rescue ThreadError
|
|
161
|
+
return [coalesced, nil] # queue drained
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
return [coalesced, peek] unless peek.is_a?(WakeMessage)
|
|
165
|
+
|
|
166
|
+
next if seen.include?(peek.queue_name)
|
|
167
|
+
|
|
168
|
+
seen.add(peek.queue_name)
|
|
169
|
+
coalesced << peek
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def handle(msg)
|
|
174
|
+
case msg
|
|
175
|
+
when WakeMessage then handle_wake(msg)
|
|
176
|
+
when ConnectMessage then handle_connect(msg)
|
|
177
|
+
when DisconnectMessage then handle_disconnect(msg)
|
|
178
|
+
else
|
|
179
|
+
@logger.warn { "[Pgbus::Streamer::Dispatcher] unknown message: #{msg.class}" }
|
|
180
|
+
end
|
|
181
|
+
rescue StandardError => e
|
|
182
|
+
# Intentionally swallows per-message failures so one bad
|
|
183
|
+
# broadcast can't kill the dispatcher thread and orphan every
|
|
184
|
+
# connected client. The top-level run_loop rescue (below)
|
|
185
|
+
# does re-raise — a crash *between* messages is a real bug
|
|
186
|
+
# and the supervisor should see it.
|
|
187
|
+
@logger.error { "[Pgbus::Streamer::Dispatcher] handling #{msg.class} raised #{e.class}: #{e.message}" }
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def handle_wake(msg)
|
|
191
|
+
started_at = monotonic_ms
|
|
192
|
+
# msg.queue_name is the PGMQ full table name (pgbus_int_pbns_xxx),
|
|
193
|
+
# but connections are registered under the logical name (pbns_xxx).
|
|
194
|
+
# Translate before looking up.
|
|
195
|
+
stream = @full_to_logical[msg.queue_name] || msg.queue_name
|
|
196
|
+
registered = @registry.connections_for(stream)
|
|
197
|
+
in_flight_pairs = @in_flight[stream]
|
|
198
|
+
return if registered.empty? && in_flight_pairs.empty?
|
|
199
|
+
|
|
200
|
+
min_seen = minimum_cursor(registered, in_flight_pairs)
|
|
201
|
+
raw_envelopes = @client.read_after(stream, after_id: min_seen, limit: @read_limit)
|
|
202
|
+
return if raw_envelopes.empty?
|
|
203
|
+
|
|
204
|
+
envelopes = raw_envelopes.map { |e| unwrap_stream_envelope(e) }
|
|
205
|
+
# The maximum msg_id in THIS batch. We advance every
|
|
206
|
+
# connection's scanned cursor past this value even if the
|
|
207
|
+
# filter drops everything — otherwise a 500-message run
|
|
208
|
+
# of invisible broadcasts would pin minimum_cursor and
|
|
209
|
+
# the dispatcher would re-read the same window forever,
|
|
210
|
+
# starving later public messages. Connection#enqueue still
|
|
211
|
+
# gates the client-facing cursor on actual successful
|
|
212
|
+
# writes, so this advance is invisible to clients.
|
|
213
|
+
max_msg_id = envelopes.map(&:msg_id).max
|
|
214
|
+
|
|
215
|
+
# Each connection gets a per-connection filtered subset. We
|
|
216
|
+
# can't pre-filter once because different connections have
|
|
217
|
+
# different authorize contexts.
|
|
218
|
+
registered.each do |conn|
|
|
219
|
+
safe_enqueue(conn, visible_envelopes_for(envelopes, conn))
|
|
220
|
+
advance_scanned_cursor(conn, max_msg_id)
|
|
221
|
+
end
|
|
222
|
+
in_flight_pairs.each do |(conn, buffer)|
|
|
223
|
+
buffer.concat(visible_envelopes_for(envelopes, conn))
|
|
224
|
+
advance_scanned_cursor(conn, max_msg_id)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
prune_dead(registered)
|
|
228
|
+
|
|
229
|
+
# Record one stat row per wake. Fanout is the number of
|
|
230
|
+
# subscribers (registered + in-flight) that received the
|
|
231
|
+
# broadcast before any filter dropped it — the "intended"
|
|
232
|
+
# audience size, which is the useful operator number even
|
|
233
|
+
# when audience filtering is in play.
|
|
234
|
+
record_stat(
|
|
235
|
+
stream_name: stream,
|
|
236
|
+
event_type: "broadcast",
|
|
237
|
+
started_at: started_at,
|
|
238
|
+
fanout: registered.size + in_flight_pairs.size
|
|
239
|
+
)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def handle_connect(msg)
|
|
243
|
+
started_at = monotonic_ms
|
|
244
|
+
connection = msg.connection
|
|
245
|
+
stream = connection.stream_name
|
|
246
|
+
|
|
247
|
+
# Step 1: subscribe first. Any WakeMessage that arrives after
|
|
248
|
+
# this line will see our in-flight buffer and fan out into it.
|
|
249
|
+
# The Listener is told the prefixed PGMQ queue name (not the
|
|
250
|
+
# logical stream name) because the NOTIFY channel includes the
|
|
251
|
+
# prefix: pgmq.q_<prefixed>.INSERT. Registry and the in-flight
|
|
252
|
+
# buffer use the logical name. The Dispatcher is the single
|
|
253
|
+
# translator between the two naming worlds.
|
|
254
|
+
full_name = notify_queue_name_for(stream)
|
|
255
|
+
@full_to_logical[full_name] = stream
|
|
256
|
+
@listener.ensure_listening(full_name)
|
|
257
|
+
|
|
258
|
+
# Step 2: install the in-flight buffer BEFORE any read.
|
|
259
|
+
buffer = []
|
|
260
|
+
@in_flight[stream] << [connection, buffer]
|
|
261
|
+
|
|
262
|
+
# Step 3: read the archive for anything published before this
|
|
263
|
+
# connect landed, and write to the connection.
|
|
264
|
+
raw_initial = @client.read_after(
|
|
265
|
+
stream,
|
|
266
|
+
after_id: connection.last_msg_id_sent,
|
|
267
|
+
limit: @read_limit
|
|
268
|
+
)
|
|
269
|
+
initial = raw_initial.map { |e| unwrap_stream_envelope(e) }
|
|
270
|
+
safe_enqueue(connection, visible_envelopes_for(initial, connection))
|
|
271
|
+
|
|
272
|
+
# Step 4: drain the in-flight buffer (anything published between
|
|
273
|
+
# step 2 and now). Connection#enqueue dedupes by cursor, so
|
|
274
|
+
# overlap with step 3 is safe. The buffer entries were already
|
|
275
|
+
# filtered when enqueued by handle_wake, so no re-filter here.
|
|
276
|
+
safe_enqueue(connection, buffer)
|
|
277
|
+
|
|
278
|
+
# Step 5: promote to the main registry. From this point the
|
|
279
|
+
# regular WakeMessage path handles the connection. If the
|
|
280
|
+
# connection died during steps 3/4 (e.g. client vanished
|
|
281
|
+
# mid-replay, Connection#enqueue marks it dead without
|
|
282
|
+
# raising), no DisconnectMessage will ever be emitted, so
|
|
283
|
+
# we have to scrub @full_to_logical + the PG LISTEN right
|
|
284
|
+
# here. Otherwise this stream's state is pinned for the
|
|
285
|
+
# life of the worker.
|
|
286
|
+
remove_in_flight(stream, connection)
|
|
287
|
+
if connection.dead?
|
|
288
|
+
@scanned_cursor.delete(connection)
|
|
289
|
+
cleanup_stream_if_unused(stream)
|
|
290
|
+
else
|
|
291
|
+
@registry.register(connection)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Record the connect regardless of whether the connection
|
|
295
|
+
# survived the replay — a dead-before-register is still an
|
|
296
|
+
# operator-visible "connection attempt" and disconnects
|
|
297
|
+
# won't be recorded for it, so dropping it here would
|
|
298
|
+
# under-count.
|
|
299
|
+
record_stat(
|
|
300
|
+
stream_name: stream,
|
|
301
|
+
event_type: "connect",
|
|
302
|
+
started_at: started_at
|
|
303
|
+
)
|
|
304
|
+
rescue StandardError => e
|
|
305
|
+
# Same leak path for exceptions in steps 1-4. Mark dead and
|
|
306
|
+
# scrub state so a transient failure on a single connect
|
|
307
|
+
# doesn't permanently bloat @full_to_logical or leave a
|
|
308
|
+
# dangling LISTEN on the PG connection.
|
|
309
|
+
remove_in_flight(stream, connection)
|
|
310
|
+
@scanned_cursor.delete(connection)
|
|
311
|
+
cleanup_stream_if_unused(stream)
|
|
312
|
+
connection.mark_dead!
|
|
313
|
+
@logger.error { "[Pgbus::Streamer::Dispatcher] connect failed for #{connection.id}: #{e.class}: #{e.message}" }
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def handle_disconnect(msg)
|
|
317
|
+
started_at = monotonic_ms
|
|
318
|
+
connection = msg.connection
|
|
319
|
+
stream = connection.stream_name
|
|
320
|
+
@registry.unregister(connection)
|
|
321
|
+
@scanned_cursor.delete(connection)
|
|
322
|
+
cleanup_stream_if_unused(stream)
|
|
323
|
+
|
|
324
|
+
record_stat(
|
|
325
|
+
stream_name: stream,
|
|
326
|
+
event_type: "disconnect",
|
|
327
|
+
started_at: started_at
|
|
328
|
+
)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# If this stream has no remaining subscribers (registered or
|
|
332
|
+
# in-flight), release all per-stream state so long-running
|
|
333
|
+
# processes don't leak memory proportional to unique stream
|
|
334
|
+
# count (important for apps that use GlobalID-keyed streams
|
|
335
|
+
# like `order_42`). Three places to clean up:
|
|
336
|
+
# 1. @full_to_logical (the translation map — this file)
|
|
337
|
+
# 2. @in_flight[stream] (cleared by remove_in_flight already)
|
|
338
|
+
# 3. Listener's @listening_to set + the PG LISTEN itself
|
|
339
|
+
def cleanup_stream_if_unused(stream)
|
|
340
|
+
return unless @registry.empty?(stream) && @in_flight[stream].empty?
|
|
341
|
+
|
|
342
|
+
full_name = @full_to_logical.key(stream)
|
|
343
|
+
return unless full_name
|
|
344
|
+
|
|
345
|
+
@full_to_logical.delete(full_name)
|
|
346
|
+
@listener.remove_listening(full_name)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def minimum_cursor(registered, in_flight_pairs)
|
|
350
|
+
# Prefer the scanned cursor (per-connection max msg_id this
|
|
351
|
+
# Dispatcher has examined) over Connection#last_msg_id_sent
|
|
352
|
+
# (per-connection max successfully written). The two only
|
|
353
|
+
# differ when an audience filter drops envelopes: the scanned
|
|
354
|
+
# cursor advances past the hidden window so the next
|
|
355
|
+
# read_after moves forward. Falls back to last_msg_id_sent
|
|
356
|
+
# for connections that haven't been scanned yet (fresh
|
|
357
|
+
# in-flight entries on their first handle_wake pass).
|
|
358
|
+
cursors = registered.map { |c| cursor_for(c) }
|
|
359
|
+
in_flight_pairs.each { |(conn, _buf)| cursors << cursor_for(conn) }
|
|
360
|
+
cursors.min || 0
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def cursor_for(connection)
|
|
364
|
+
[@scanned_cursor.fetch(connection, 0), connection.last_msg_id_sent].max
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def advance_scanned_cursor(connection, msg_id)
|
|
368
|
+
return if msg_id.nil?
|
|
369
|
+
|
|
370
|
+
current = @scanned_cursor[connection] || 0
|
|
371
|
+
@scanned_cursor[connection] = msg_id if msg_id > current
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def safe_enqueue(connection, envelopes_or_buffer)
|
|
375
|
+
return if connection.dead?
|
|
376
|
+
return if envelopes_or_buffer.empty?
|
|
377
|
+
|
|
378
|
+
connection.enqueue(envelopes_or_buffer)
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def prune_dead(connections)
|
|
382
|
+
connections.each do |conn|
|
|
383
|
+
@queue << DisconnectMessage.new(connection: conn) if conn.dead?
|
|
384
|
+
end
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def remove_in_flight(stream, connection)
|
|
388
|
+
pairs = @in_flight[stream]
|
|
389
|
+
pairs.reject! { |(conn, _buf)| conn.equal?(connection) }
|
|
390
|
+
@in_flight.delete(stream) if pairs.empty?
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Translates a logical stream name (e.g. "chat") into the prefixed
|
|
394
|
+
# PGMQ queue name (e.g. "pgbus_int_chat") that appears in the
|
|
395
|
+
# NOTIFY channel `pgmq.q_<prefixed>.INSERT`. Mirrors the prefix
|
|
396
|
+
# Pgbus::Client#send_message already applied when the broadcast
|
|
397
|
+
# was published, so the Listener's LISTEN matches the NOTIFY.
|
|
398
|
+
def notify_queue_name_for(stream_name)
|
|
399
|
+
@client.config.queue_name(stream_name)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Pgbus::Streams::Stream#broadcast wraps HTML payloads as
|
|
403
|
+
# {"html": "..."} so PGMQ's JSONB column accepts them. Here we
|
|
404
|
+
# unwrap the html field and return a new envelope whose payload
|
|
405
|
+
# is the raw HTML, ready for the SSE `data:` line. If the
|
|
406
|
+
# payload is not a valid JSON object with an html key (e.g. a
|
|
407
|
+
# legacy broadcast that predates this subsystem), we fall back
|
|
408
|
+
# to passing it through untouched — a permissive approach that
|
|
409
|
+
# plays nicely with ad-hoc `Pgbus.client.send_message` calls
|
|
410
|
+
# pointed at stream queues by mistake.
|
|
411
|
+
def unwrap_stream_envelope(envelope)
|
|
412
|
+
parsed = JSON.parse(envelope.payload.to_s)
|
|
413
|
+
html = parsed.is_a?(Hash) ? parsed["html"] : nil
|
|
414
|
+
return envelope unless html.is_a?(String)
|
|
415
|
+
|
|
416
|
+
visible_to = parsed["visible_to"]
|
|
417
|
+
visible_to = visible_to.to_sym if visible_to.is_a?(String)
|
|
418
|
+
|
|
419
|
+
StreamEnvelope.new(
|
|
420
|
+
msg_id: envelope.msg_id,
|
|
421
|
+
enqueued_at: envelope.enqueued_at,
|
|
422
|
+
payload: html,
|
|
423
|
+
source: envelope.source,
|
|
424
|
+
visible_to: visible_to
|
|
425
|
+
)
|
|
426
|
+
rescue JSON::ParserError
|
|
427
|
+
envelope
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Filters a list of envelopes against a specific connection's
|
|
431
|
+
# context. Envelopes without a visible_to label pass through
|
|
432
|
+
# unchanged; envelopes with a label are evaluated via the
|
|
433
|
+
# Filters registry. Envelopes that predate the StreamEnvelope
|
|
434
|
+
# refactor (plain ReadAfter::Envelope with no visible_to) also
|
|
435
|
+
# pass through.
|
|
436
|
+
def visible_envelopes_for(envelopes, connection)
|
|
437
|
+
envelopes.select do |envelope|
|
|
438
|
+
label = envelope.respond_to?(:visible_to) ? envelope.visible_to : nil
|
|
439
|
+
@filters.visible?(label, connection.context)
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def monotonic_ms
|
|
444
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC) * 1000.0
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Opt-in stream event stat recording. Gated by
|
|
448
|
+
# `config.streams_stats_enabled` (default false) because
|
|
449
|
+
# stream volume can dwarf job volume in chat-style apps,
|
|
450
|
+
# and the Insights surface is only worth the INSERT cost
|
|
451
|
+
# if operators actually look at it. All failures are
|
|
452
|
+
# swallowed by StreamStat.record! itself so a stats-table
|
|
453
|
+
# outage cannot block the dispatcher.
|
|
454
|
+
def record_stat(stream_name:, event_type:, started_at:, fanout: nil)
|
|
455
|
+
return unless @config.streams_stats_enabled
|
|
456
|
+
|
|
457
|
+
Pgbus::StreamStat.record!(
|
|
458
|
+
stream_name: stream_name,
|
|
459
|
+
event_type: event_type,
|
|
460
|
+
duration_ms: (monotonic_ms - started_at).round,
|
|
461
|
+
fanout: fanout
|
|
462
|
+
)
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Web
|
|
5
|
+
module Streamer
|
|
6
|
+
# Periodic maintenance loop for SSE connections. Runs three sweeps
|
|
7
|
+
# on every tick:
|
|
8
|
+
#
|
|
9
|
+
# 1. Write an SSE comment (": heartbeat <epoch>\n\n") to each
|
|
10
|
+
# connection. This keeps proxies and load balancers from timing
|
|
11
|
+
# out idle HTTP responses; most reverse proxies close HTTP
|
|
12
|
+
# responses that sit idle for 30-60s, which would silently drop
|
|
13
|
+
# SSE clients.
|
|
14
|
+
#
|
|
15
|
+
# 2. Mark connections that have been idle longer than the
|
|
16
|
+
# configured idle_timeout as dead. The Dispatcher's next pass
|
|
17
|
+
# picks them up via its disconnect path.
|
|
18
|
+
#
|
|
19
|
+
# 3. Post a DisconnectMessage for any connection already flagged
|
|
20
|
+
# dead (by IoWriter returning :closed / :blocked, or by the
|
|
21
|
+
# idle sweep above).
|
|
22
|
+
#
|
|
23
|
+
# The heartbeat runs on its own dedicated thread because it does
|
|
24
|
+
# blocking writes (via IoWriter with a deadline) and we don't want
|
|
25
|
+
# to delay the dispatcher. Writes are serialised per-connection by
|
|
26
|
+
# the Connection's own mutex, so concurrent dispatcher + heartbeat
|
|
27
|
+
# writes are safe.
|
|
28
|
+
class Heartbeat
|
|
29
|
+
def initialize(registry:, dispatch_queue:, interval:, idle_timeout:, logger: Pgbus.logger, clock: nil)
|
|
30
|
+
@registry = registry
|
|
31
|
+
@queue = dispatch_queue
|
|
32
|
+
@interval = interval
|
|
33
|
+
@idle_timeout = idle_timeout
|
|
34
|
+
@logger = logger
|
|
35
|
+
@clock = clock || -> { ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) }
|
|
36
|
+
@running = false
|
|
37
|
+
@thread = nil
|
|
38
|
+
@wake = ConditionVariable.new
|
|
39
|
+
@wake_mutex = Mutex.new
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def start
|
|
43
|
+
return if @running
|
|
44
|
+
|
|
45
|
+
@running = true
|
|
46
|
+
@thread = Thread.new { run_loop }
|
|
47
|
+
self
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def stop
|
|
51
|
+
return unless @running
|
|
52
|
+
|
|
53
|
+
@running = false
|
|
54
|
+
@wake_mutex.synchronize { @wake.broadcast }
|
|
55
|
+
@thread&.join(5)
|
|
56
|
+
@thread = nil
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Runs a single sweep synchronously. Useful for tests — production
|
|
61
|
+
# code goes through the background thread.
|
|
62
|
+
def tick
|
|
63
|
+
now = @clock.call
|
|
64
|
+
@registry.each_connection do |connection|
|
|
65
|
+
if connection.dead?
|
|
66
|
+
# Already dead (e.g. IoWriter returned :closed on a previous
|
|
67
|
+
# dispatcher write). Post the disconnect and skip the rest.
|
|
68
|
+
enqueue_disconnect(connection)
|
|
69
|
+
next
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
if connection.idle_for > @idle_timeout
|
|
73
|
+
connection.mark_dead!
|
|
74
|
+
enqueue_disconnect(connection)
|
|
75
|
+
next
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
result = connection.write_comment("heartbeat #{now.to_i}")
|
|
79
|
+
enqueue_disconnect(connection) if connection.dead? || result != :ok
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def run_loop
|
|
86
|
+
while @running
|
|
87
|
+
begin
|
|
88
|
+
tick
|
|
89
|
+
rescue StandardError => e
|
|
90
|
+
@logger.error { "[Pgbus::Streamer::Heartbeat] tick raised: #{e.class}: #{e.message}" }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
@wake_mutex.synchronize do
|
|
94
|
+
@wake.wait(@wake_mutex, @interval) if @running
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def enqueue_disconnect(connection)
|
|
100
|
+
@queue << Dispatcher::DisconnectMessage.new(connection: connection)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|