pgbus 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/README.md +238 -0
  4. data/Rakefile +8 -1
  5. data/app/controllers/pgbus/insights_controller.rb +6 -0
  6. data/app/helpers/pgbus/streams_helper.rb +115 -0
  7. data/app/javascript/pgbus/stream_source_element.js +212 -0
  8. data/app/models/pgbus/stream_stat.rb +118 -0
  9. data/app/views/pgbus/insights/show.html.erb +59 -0
  10. data/config/locales/en.yml +16 -0
  11. data/config/routes.rb +11 -0
  12. data/lib/generators/pgbus/add_presence_generator.rb +55 -0
  13. data/lib/generators/pgbus/add_stream_stats_generator.rb +54 -0
  14. data/lib/generators/pgbus/templates/add_presence.rb.erb +26 -0
  15. data/lib/generators/pgbus/templates/add_stream_stats.rb.erb +18 -0
  16. data/lib/pgbus/client/ensure_stream_queue.rb +54 -0
  17. data/lib/pgbus/client/read_after.rb +100 -0
  18. data/lib/pgbus/client.rb +6 -0
  19. data/lib/pgbus/configuration/capsule_dsl.rb +6 -20
  20. data/lib/pgbus/configuration.rb +126 -14
  21. data/lib/pgbus/engine.rb +31 -0
  22. data/lib/pgbus/process/dispatcher.rb +62 -4
  23. data/lib/pgbus/streams/cursor.rb +71 -0
  24. data/lib/pgbus/streams/envelope.rb +58 -0
  25. data/lib/pgbus/streams/filters.rb +98 -0
  26. data/lib/pgbus/streams/presence.rb +216 -0
  27. data/lib/pgbus/streams/signed_name.rb +69 -0
  28. data/lib/pgbus/streams/turbo_broadcastable.rb +53 -0
  29. data/lib/pgbus/streams/watermark_cache_middleware.rb +28 -0
  30. data/lib/pgbus/streams.rb +151 -0
  31. data/lib/pgbus/version.rb +1 -1
  32. data/lib/pgbus/web/data_source.rb +29 -0
  33. data/lib/pgbus/web/stream_app.rb +179 -0
  34. data/lib/pgbus/web/streamer/connection.rb +122 -0
  35. data/lib/pgbus/web/streamer/dispatcher.rb +467 -0
  36. data/lib/pgbus/web/streamer/heartbeat.rb +105 -0
  37. data/lib/pgbus/web/streamer/instance.rb +176 -0
  38. data/lib/pgbus/web/streamer/io_writer.rb +73 -0
  39. data/lib/pgbus/web/streamer/listener.rb +228 -0
  40. data/lib/pgbus/web/streamer/registry.rb +103 -0
  41. data/lib/pgbus/web/streamer.rb +53 -0
  42. data/lib/pgbus.rb +28 -0
  43. data/lib/puma/plugin/pgbus_streams.rb +54 -0
  44. data/lib/tasks/pgbus_streams.rake +52 -0
  45. metadata +29 -1
@@ -0,0 +1,467 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgbus
4
+ module Web
5
+ module Streamer
6
+ # The single-threaded consumer of the shared dispatch_queue. Drains
7
+ # three kinds of messages:
8
+ #
9
+ # - Listener::WakeMessage(queue_name:) — a NOTIFY fired; read_after
10
+ # the minimum cursor and fan out to every connection on the stream
11
+ # (both registered and in-flight connects).
12
+ #
13
+ # - ConnectMessage(connection:) — a new SSE client connected. Runs
14
+ # the 5-step race-free replay sequence from design doc §6.5:
15
+ # 1. ensure_listening on the stream (so future WakeMessages
16
+ # deliver to the in-flight buffer)
17
+ # 2. register an in-flight buffer keyed by connection
18
+ # 3. read_after(connection.since_id) + enqueue to connection
19
+ # 4. drain the in-flight buffer into the connection (dedup is
20
+ # handled by Connection#enqueue's cursor check)
21
+ # 5. move the connection from in-flight to the main Registry
22
+ #
23
+ # - DisconnectMessage(connection:) — unregister and, if the stream
24
+ # now has zero subscribers, eventually unlisten (lazy GC,
25
+ # implemented in the Streamer sweep rather than here).
26
+ #
27
+ # All state ownership lives on this one thread: the registry is
28
+ # thread-safe (Phase 2.1) but the in-flight buffers are local to
29
+ # the Dispatcher and accessed only from this thread, so no locks.
30
+ class Dispatcher
31
+ WakeMessage = Listener::WakeMessage
32
+ ConnectMessage = Data.define(:connection)
33
+ DisconnectMessage = Data.define(:connection)
34
+
35
+ # An unwrapped stream broadcast. Similar shape to
36
+ # Pgbus::Client::ReadAfter::Envelope (msg_id + payload) so
37
+ # Connection#enqueue can consume either type via duck typing,
38
+ # but adds the `visible_to` label carried through from
39
+ # Pgbus::Streams::Stream#broadcast. The Dispatcher uses
40
+ # visible_to to decide per-connection delivery; Connection
41
+ # never sees the field.
42
+ StreamEnvelope = Data.define(:msg_id, :enqueued_at, :payload, :source, :visible_to)
43
+
44
+ DEFAULT_READ_LIMIT = 500
45
+
46
+ def initialize(client:, registry:, listener:, dispatch_queue:,
47
+ logger: Pgbus.logger, read_limit: DEFAULT_READ_LIMIT,
48
+ filters: nil, config: nil)
49
+ @client = client
50
+ @registry = registry
51
+ @listener = listener
52
+ @queue = dispatch_queue
53
+ @logger = logger
54
+ @read_limit = read_limit
55
+ # Filters default to the process-wide registry so production
56
+ # code picks up whatever was registered at boot. Tests inject
57
+ # a fresh Filters instance to avoid cross-test pollution.
58
+ @filters = filters || Pgbus::Streams.filters
59
+ # Config is injected so the Dispatcher can read
60
+ # `streams_stats_enabled` without reaching into the global
61
+ # Pgbus.configuration at every call site. Tests pass a
62
+ # throwaway config to flip the flag independently of the
63
+ # process-wide setting. Falls back to the global config
64
+ # for production call sites that don't specify one.
65
+ @config = config || Pgbus.configuration
66
+ # stream_name → Array<[connection, Array<Envelope>]>
67
+ @in_flight = Hash.new { |h, k| h[k] = [] }
68
+ # PGMQ full table name (pgbus_<prefix>_<name>) → logical stream
69
+ # name. Populated on connect so handle_wake can translate
70
+ # Listener::WakeMessage#queue_name (a full table name, because
71
+ # that's what PG NOTIFY channels carry) into the logical name
72
+ # used by Registry and the in-flight buffer.
73
+ @full_to_logical = {}
74
+ # Per-connection "scanned" cursor — the highest msg_id this
75
+ # Dispatcher has examined for a given connection, whether or
76
+ # not it was actually delivered. Needed because an audience
77
+ # filter can drop an entire read_after batch; without a
78
+ # separate scan cursor the dispatcher would re-read the
79
+ # same hidden window forever and starve later public
80
+ # messages. Connection#last_msg_id_sent still drives the
81
+ # client-visible Last-Event-ID; this cursor only feeds
82
+ # minimum_cursor so subsequent read_after calls advance.
83
+ @scanned_cursor = {}
84
+ # @running is a soft hint, not the authoritative stop signal.
85
+ # The :__stop__ sentinel pushed onto @queue is what actually
86
+ # terminates run_loop — even if a torn read of @running ever
87
+ # happened (it cannot under MRI's GVL for a single-word
88
+ # boolean assignment), the sentinel break would still fire.
89
+ @running = false
90
+ @thread = nil
91
+ end
92
+
93
+ def start
94
+ return if @running
95
+
96
+ @running = true
97
+ @thread = Thread.new { run_loop }
98
+ self
99
+ end
100
+
101
+ def stop
102
+ return unless @running
103
+
104
+ @running = false
105
+ @queue << :__stop__
106
+ if @thread && @thread.join(5).nil?
107
+ # join returned nil → 5s timeout. The thread is still running
108
+ # (probably blocked inside an unresponsive client write or a
109
+ # slow Postgres query). We log and clear the reference rather
110
+ # than calling Thread#kill, which leaves IO state corrupt.
111
+ # The orphaned thread will exit on its own once the blocking
112
+ # call returns and it sees @running == false on the next loop.
113
+ @logger.warn { "[Pgbus::Streamer::Dispatcher] thread did not terminate within 5s" }
114
+ end
115
+ @thread = nil
116
+ self
117
+ end
118
+
119
+ private
120
+
121
+ def run_loop
122
+ while @running
123
+ msg = @queue.pop
124
+ break if msg == :__stop__
125
+
126
+ # Wake coalescing: if a WakeMessage arrives, opportunistically
127
+ # drain consecutive same-stream wakes from the queue. Without
128
+ # this, N broadcasts in rapid succession produce N
129
+ # WakeMessages, each running its own read_after roundtrip
130
+ # even though one read_after with the lowest cursor would
131
+ # have pulled all N messages. The drain is bounded by the
132
+ # queue's current contents — once we hit a non-Wake or a
133
+ # different stream, we stop and let the regular path handle
134
+ # the rest.
135
+ if msg.is_a?(WakeMessage)
136
+ wakes, trailing = drain_wakes_for(msg)
137
+ wakes.each { |w| handle(w) }
138
+ handle(trailing) if trailing
139
+ else
140
+ handle(msg)
141
+ end
142
+ end
143
+ rescue StandardError => e
144
+ @logger.error { "[Pgbus::Streamer::Dispatcher] crashed: #{e.class}: #{e.message}" }
145
+ raise
146
+ end
147
+
148
+ # Coalesces consecutive WakeMessages from the queue into one
149
+ # per unique stream. Returns [coalesced_wakes, trailing_msg]
150
+ # where trailing_msg is the first non-WakeMessage we hit (or
151
+ # nil if the queue is empty after the wakes). The caller
152
+ # processes the wakes first, then the trailing message — same
153
+ # order as the original queue, but with redundant wakes folded.
154
+ def drain_wakes_for(first)
155
+ seen = Set.new([first.queue_name])
156
+ coalesced = [first]
157
+ loop do
158
+ begin
159
+ peek = @queue.pop(true)
160
+ rescue ThreadError
161
+ return [coalesced, nil] # queue drained
162
+ end
163
+
164
+ return [coalesced, peek] unless peek.is_a?(WakeMessage)
165
+
166
+ next if seen.include?(peek.queue_name)
167
+
168
+ seen.add(peek.queue_name)
169
+ coalesced << peek
170
+ end
171
+ end
172
+
173
+ def handle(msg)
174
+ case msg
175
+ when WakeMessage then handle_wake(msg)
176
+ when ConnectMessage then handle_connect(msg)
177
+ when DisconnectMessage then handle_disconnect(msg)
178
+ else
179
+ @logger.warn { "[Pgbus::Streamer::Dispatcher] unknown message: #{msg.class}" }
180
+ end
181
+ rescue StandardError => e
182
+ # Intentionally swallows per-message failures so one bad
183
+ # broadcast can't kill the dispatcher thread and orphan every
184
+ # connected client. The top-level run_loop rescue (below)
185
+ # does re-raise — a crash *between* messages is a real bug
186
+ # and the supervisor should see it.
187
+ @logger.error { "[Pgbus::Streamer::Dispatcher] handling #{msg.class} raised #{e.class}: #{e.message}" }
188
+ end
189
+
190
+ def handle_wake(msg)
191
+ started_at = monotonic_ms
192
+ # msg.queue_name is the PGMQ full table name (pgbus_int_pbns_xxx),
193
+ # but connections are registered under the logical name (pbns_xxx).
194
+ # Translate before looking up.
195
+ stream = @full_to_logical[msg.queue_name] || msg.queue_name
196
+ registered = @registry.connections_for(stream)
197
+ in_flight_pairs = @in_flight[stream]
198
+ return if registered.empty? && in_flight_pairs.empty?
199
+
200
+ min_seen = minimum_cursor(registered, in_flight_pairs)
201
+ raw_envelopes = @client.read_after(stream, after_id: min_seen, limit: @read_limit)
202
+ return if raw_envelopes.empty?
203
+
204
+ envelopes = raw_envelopes.map { |e| unwrap_stream_envelope(e) }
205
+ # The maximum msg_id in THIS batch. We advance every
206
+ # connection's scanned cursor past this value even if the
207
+ # filter drops everything — otherwise a 500-message run
208
+ # of invisible broadcasts would pin minimum_cursor and
209
+ # the dispatcher would re-read the same window forever,
210
+ # starving later public messages. Connection#enqueue still
211
+ # gates the client-facing cursor on actual successful
212
+ # writes, so this advance is invisible to clients.
213
+ max_msg_id = envelopes.map(&:msg_id).max
214
+
215
+ # Each connection gets a per-connection filtered subset. We
216
+ # can't pre-filter once because different connections have
217
+ # different authorize contexts.
218
+ registered.each do |conn|
219
+ safe_enqueue(conn, visible_envelopes_for(envelopes, conn))
220
+ advance_scanned_cursor(conn, max_msg_id)
221
+ end
222
+ in_flight_pairs.each do |(conn, buffer)|
223
+ buffer.concat(visible_envelopes_for(envelopes, conn))
224
+ advance_scanned_cursor(conn, max_msg_id)
225
+ end
226
+
227
+ prune_dead(registered)
228
+
229
+ # Record one stat row per wake. Fanout is the number of
230
+ # subscribers (registered + in-flight) that received the
231
+ # broadcast before any filter dropped it — the "intended"
232
+ # audience size, which is the useful operator number even
233
+ # when audience filtering is in play.
234
+ record_stat(
235
+ stream_name: stream,
236
+ event_type: "broadcast",
237
+ started_at: started_at,
238
+ fanout: registered.size + in_flight_pairs.size
239
+ )
240
+ end
241
+
242
+ def handle_connect(msg)
243
+ started_at = monotonic_ms
244
+ connection = msg.connection
245
+ stream = connection.stream_name
246
+
247
+ # Step 1: subscribe first. Any WakeMessage that arrives after
248
+ # this line will see our in-flight buffer and fan out into it.
249
+ # The Listener is told the prefixed PGMQ queue name (not the
250
+ # logical stream name) because the NOTIFY channel includes the
251
+ # prefix: pgmq.q_<prefixed>.INSERT. Registry and the in-flight
252
+ # buffer use the logical name. The Dispatcher is the single
253
+ # translator between the two naming worlds.
254
+ full_name = notify_queue_name_for(stream)
255
+ @full_to_logical[full_name] = stream
256
+ @listener.ensure_listening(full_name)
257
+
258
+ # Step 2: install the in-flight buffer BEFORE any read.
259
+ buffer = []
260
+ @in_flight[stream] << [connection, buffer]
261
+
262
+ # Step 3: read the archive for anything published before this
263
+ # connect landed, and write to the connection.
264
+ raw_initial = @client.read_after(
265
+ stream,
266
+ after_id: connection.last_msg_id_sent,
267
+ limit: @read_limit
268
+ )
269
+ initial = raw_initial.map { |e| unwrap_stream_envelope(e) }
270
+ safe_enqueue(connection, visible_envelopes_for(initial, connection))
271
+
272
+ # Step 4: drain the in-flight buffer (anything published between
273
+ # step 2 and now). Connection#enqueue dedupes by cursor, so
274
+ # overlap with step 3 is safe. The buffer entries were already
275
+ # filtered when enqueued by handle_wake, so no re-filter here.
276
+ safe_enqueue(connection, buffer)
277
+
278
+ # Step 5: promote to the main registry. From this point the
279
+ # regular WakeMessage path handles the connection. If the
280
+ # connection died during steps 3/4 (e.g. client vanished
281
+ # mid-replay, Connection#enqueue marks it dead without
282
+ # raising), no DisconnectMessage will ever be emitted, so
283
+ # we have to scrub @full_to_logical + the PG LISTEN right
284
+ # here. Otherwise this stream's state is pinned for the
285
+ # life of the worker.
286
+ remove_in_flight(stream, connection)
287
+ if connection.dead?
288
+ @scanned_cursor.delete(connection)
289
+ cleanup_stream_if_unused(stream)
290
+ else
291
+ @registry.register(connection)
292
+ end
293
+
294
+ # Record the connect regardless of whether the connection
295
+ # survived the replay — a dead-before-register is still an
296
+ # operator-visible "connection attempt" and disconnects
297
+ # won't be recorded for it, so dropping it here would
298
+ # under-count.
299
+ record_stat(
300
+ stream_name: stream,
301
+ event_type: "connect",
302
+ started_at: started_at
303
+ )
304
+ rescue StandardError => e
305
+ # Same leak path for exceptions in steps 1-4. Mark dead and
306
+ # scrub state so a transient failure on a single connect
307
+ # doesn't permanently bloat @full_to_logical or leave a
308
+ # dangling LISTEN on the PG connection.
309
+ remove_in_flight(stream, connection)
310
+ @scanned_cursor.delete(connection)
311
+ cleanup_stream_if_unused(stream)
312
+ connection.mark_dead!
313
+ @logger.error { "[Pgbus::Streamer::Dispatcher] connect failed for #{connection.id}: #{e.class}: #{e.message}" }
314
+ end
315
+
316
+ def handle_disconnect(msg)
317
+ started_at = monotonic_ms
318
+ connection = msg.connection
319
+ stream = connection.stream_name
320
+ @registry.unregister(connection)
321
+ @scanned_cursor.delete(connection)
322
+ cleanup_stream_if_unused(stream)
323
+
324
+ record_stat(
325
+ stream_name: stream,
326
+ event_type: "disconnect",
327
+ started_at: started_at
328
+ )
329
+ end
330
+
331
+ # If this stream has no remaining subscribers (registered or
332
+ # in-flight), release all per-stream state so long-running
333
+ # processes don't leak memory proportional to unique stream
334
+ # count (important for apps that use GlobalID-keyed streams
335
+ # like `order_42`). Three places to clean up:
336
+ # 1. @full_to_logical (the translation map — this file)
337
+ # 2. @in_flight[stream] (cleared by remove_in_flight already)
338
+ # 3. Listener's @listening_to set + the PG LISTEN itself
339
+ def cleanup_stream_if_unused(stream)
340
+ return unless @registry.empty?(stream) && @in_flight[stream].empty?
341
+
342
+ full_name = @full_to_logical.key(stream)
343
+ return unless full_name
344
+
345
+ @full_to_logical.delete(full_name)
346
+ @listener.remove_listening(full_name)
347
+ end
348
+
349
+ def minimum_cursor(registered, in_flight_pairs)
350
+ # Prefer the scanned cursor (per-connection max msg_id this
351
+ # Dispatcher has examined) over Connection#last_msg_id_sent
352
+ # (per-connection max successfully written). The two only
353
+ # differ when an audience filter drops envelopes: the scanned
354
+ # cursor advances past the hidden window so the next
355
+ # read_after moves forward. Falls back to last_msg_id_sent
356
+ # for connections that haven't been scanned yet (fresh
357
+ # in-flight entries on their first handle_wake pass).
358
+ cursors = registered.map { |c| cursor_for(c) }
359
+ in_flight_pairs.each { |(conn, _buf)| cursors << cursor_for(conn) }
360
+ cursors.min || 0
361
+ end
362
+
363
+ def cursor_for(connection)
364
+ [@scanned_cursor.fetch(connection, 0), connection.last_msg_id_sent].max
365
+ end
366
+
367
+ def advance_scanned_cursor(connection, msg_id)
368
+ return if msg_id.nil?
369
+
370
+ current = @scanned_cursor[connection] || 0
371
+ @scanned_cursor[connection] = msg_id if msg_id > current
372
+ end
373
+
374
+ def safe_enqueue(connection, envelopes_or_buffer)
375
+ return if connection.dead?
376
+ return if envelopes_or_buffer.empty?
377
+
378
+ connection.enqueue(envelopes_or_buffer)
379
+ end
380
+
381
+ def prune_dead(connections)
382
+ connections.each do |conn|
383
+ @queue << DisconnectMessage.new(connection: conn) if conn.dead?
384
+ end
385
+ end
386
+
387
+ def remove_in_flight(stream, connection)
388
+ pairs = @in_flight[stream]
389
+ pairs.reject! { |(conn, _buf)| conn.equal?(connection) }
390
+ @in_flight.delete(stream) if pairs.empty?
391
+ end
392
+
393
+ # Translates a logical stream name (e.g. "chat") into the prefixed
394
+ # PGMQ queue name (e.g. "pgbus_int_chat") that appears in the
395
+ # NOTIFY channel `pgmq.q_<prefixed>.INSERT`. Mirrors the prefix
396
+ # Pgbus::Client#send_message already applied when the broadcast
397
+ # was published, so the Listener's LISTEN matches the NOTIFY.
398
+ def notify_queue_name_for(stream_name)
399
+ @client.config.queue_name(stream_name)
400
+ end
401
+
402
+ # Pgbus::Streams::Stream#broadcast wraps HTML payloads as
403
+ # {"html": "..."} so PGMQ's JSONB column accepts them. Here we
404
+ # unwrap the html field and return a new envelope whose payload
405
+ # is the raw HTML, ready for the SSE `data:` line. If the
406
+ # payload is not a valid JSON object with an html key (e.g. a
407
+ # legacy broadcast that predates this subsystem), we fall back
408
+ # to passing it through untouched — a permissive approach that
409
+ # plays nicely with ad-hoc `Pgbus.client.send_message` calls
410
+ # pointed at stream queues by mistake.
411
+ def unwrap_stream_envelope(envelope)
412
+ parsed = JSON.parse(envelope.payload.to_s)
413
+ html = parsed.is_a?(Hash) ? parsed["html"] : nil
414
+ return envelope unless html.is_a?(String)
415
+
416
+ visible_to = parsed["visible_to"]
417
+ visible_to = visible_to.to_sym if visible_to.is_a?(String)
418
+
419
+ StreamEnvelope.new(
420
+ msg_id: envelope.msg_id,
421
+ enqueued_at: envelope.enqueued_at,
422
+ payload: html,
423
+ source: envelope.source,
424
+ visible_to: visible_to
425
+ )
426
+ rescue JSON::ParserError
427
+ envelope
428
+ end
429
+
430
+ # Filters a list of envelopes against a specific connection's
431
+ # context. Envelopes without a visible_to label pass through
432
+ # unchanged; envelopes with a label are evaluated via the
433
+ # Filters registry. Envelopes that predate the StreamEnvelope
434
+ # refactor (plain ReadAfter::Envelope with no visible_to) also
435
+ # pass through.
436
+ def visible_envelopes_for(envelopes, connection)
437
+ envelopes.select do |envelope|
438
+ label = envelope.respond_to?(:visible_to) ? envelope.visible_to : nil
439
+ @filters.visible?(label, connection.context)
440
+ end
441
+ end
442
+
443
+ def monotonic_ms
444
+ ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) * 1000.0
445
+ end
446
+
447
+ # Opt-in stream event stat recording. Gated by
448
+ # `config.streams_stats_enabled` (default false) because
449
+ # stream volume can dwarf job volume in chat-style apps,
450
+ # and the Insights surface is only worth the INSERT cost
451
+ # if operators actually look at it. All failures are
452
+ # swallowed by StreamStat.record! itself so a stats-table
453
+ # outage cannot block the dispatcher.
454
+ def record_stat(stream_name:, event_type:, started_at:, fanout: nil)
455
+ return unless @config.streams_stats_enabled
456
+
457
+ Pgbus::StreamStat.record!(
458
+ stream_name: stream_name,
459
+ event_type: event_type,
460
+ duration_ms: (monotonic_ms - started_at).round,
461
+ fanout: fanout
462
+ )
463
+ end
464
+ end
465
+ end
466
+ end
467
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgbus
4
+ module Web
5
+ module Streamer
6
+ # Periodic maintenance loop for SSE connections. Runs three sweeps
7
+ # on every tick:
8
+ #
9
+ # 1. Write an SSE comment (": heartbeat <epoch>\n\n") to each
10
+ # connection. This keeps proxies and load balancers from timing
11
+ # out idle HTTP responses; most reverse proxies close HTTP
12
+ # responses that sit idle for 30-60s, which would silently drop
13
+ # SSE clients.
14
+ #
15
+ # 2. Mark connections that have been idle longer than the
16
+ # configured idle_timeout as dead. The Dispatcher's next pass
17
+ # picks them up via its disconnect path.
18
+ #
19
+ # 3. Post a DisconnectMessage for any connection already flagged
20
+ # dead (by IoWriter returning :closed / :blocked, or by the
21
+ # idle sweep above).
22
+ #
23
+ # The heartbeat runs on its own dedicated thread because it does
24
+ # blocking writes (via IoWriter with a deadline) and we don't want
25
+ # to delay the dispatcher. Writes are serialised per-connection by
26
+ # the Connection's own mutex, so concurrent dispatcher + heartbeat
27
+ # writes are safe.
28
+ class Heartbeat
29
+ def initialize(registry:, dispatch_queue:, interval:, idle_timeout:, logger: Pgbus.logger, clock: nil)
30
+ @registry = registry
31
+ @queue = dispatch_queue
32
+ @interval = interval
33
+ @idle_timeout = idle_timeout
34
+ @logger = logger
35
+ @clock = clock || -> { ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) }
36
+ @running = false
37
+ @thread = nil
38
+ @wake = ConditionVariable.new
39
+ @wake_mutex = Mutex.new
40
+ end
41
+
42
+ def start
43
+ return if @running
44
+
45
+ @running = true
46
+ @thread = Thread.new { run_loop }
47
+ self
48
+ end
49
+
50
+ def stop
51
+ return unless @running
52
+
53
+ @running = false
54
+ @wake_mutex.synchronize { @wake.broadcast }
55
+ @thread&.join(5)
56
+ @thread = nil
57
+ self
58
+ end
59
+
60
+ # Runs a single sweep synchronously. Useful for tests — production
61
+ # code goes through the background thread.
62
+ def tick
63
+ now = @clock.call
64
+ @registry.each_connection do |connection|
65
+ if connection.dead?
66
+ # Already dead (e.g. IoWriter returned :closed on a previous
67
+ # dispatcher write). Post the disconnect and skip the rest.
68
+ enqueue_disconnect(connection)
69
+ next
70
+ end
71
+
72
+ if connection.idle_for > @idle_timeout
73
+ connection.mark_dead!
74
+ enqueue_disconnect(connection)
75
+ next
76
+ end
77
+
78
+ result = connection.write_comment("heartbeat #{now.to_i}")
79
+ enqueue_disconnect(connection) if connection.dead? || result != :ok
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def run_loop
86
+ while @running
87
+ begin
88
+ tick
89
+ rescue StandardError => e
90
+ @logger.error { "[Pgbus::Streamer::Heartbeat] tick raised: #{e.class}: #{e.message}" }
91
+ end
92
+
93
+ @wake_mutex.synchronize do
94
+ @wake.wait(@wake_mutex, @interval) if @running
95
+ end
96
+ end
97
+ end
98
+
99
+ def enqueue_disconnect(connection)
100
+ @queue << Dispatcher::DisconnectMessage.new(connection: connection)
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end