svelte-adapter-uws-extensions 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "svelte-adapter-uws-extensions",
3
- "version": "0.5.3",
3
+ "version": "0.5.5",
4
4
  "publishConfig": {
5
5
  "tag": "latest"
6
6
  },
@@ -154,7 +154,7 @@
154
154
  "node": ">=22.0.0"
155
155
  },
156
156
  "peerDependencies": {
157
- "svelte-adapter-uws": "^0.5.3"
157
+ "svelte-adapter-uws": "^0.5.4"
158
158
  },
159
159
  "dependencies": {
160
160
  "ioredis": "^5.0.0"
package/redis/cursor.d.ts CHANGED
@@ -74,6 +74,18 @@ export interface CursorEntry {
74
74
  data: any;
75
75
  }
76
76
 
77
+ /**
78
+ * Thrown by `attach()` when the websocket closes before `platform.subscribe`
79
+ * can land. Same shape as `presence.WsClosedError`; catch on `err.code ===
80
+ * 'WS_CLOSED'` for cross-feature handling.
81
+ */
82
+ export class WsClosedError extends Error {
83
+ name: 'WsClosedError';
84
+ code: 'WS_CLOSED';
85
+ operation: string;
86
+ topic: string;
87
+ }
88
+
77
89
  export interface RedisCursorTracker {
78
90
  /**
79
91
  * Opt this connection into receiving cursor updates for `topic`.
@@ -84,6 +96,14 @@ export interface RedisCursorTracker {
84
96
  *
85
97
  * Without `attach`, the publishes in `update` fan out to an empty
86
98
  * subscriber set and no client ever sees a cursor frame.
99
+ *
100
+ * @throws {WsClosedError} (`err.code === 'WS_CLOSED'`) if the websocket
101
+ * has already closed by the time `platform.subscribe` runs. No state
102
+ * to roll back (`wsState` is only created on `update`); callers do
103
+ * not need to compensate. The follow-up `snapshot()` call is skipped
104
+ * when this throws. Snapshot-send failures on an already-subscribed
105
+ * connection are NOT thrown - cursor frames are self-recovering via
106
+ * the next bulk tick.
87
107
  */
88
108
  attach(ws: any, topic: string, platform: Platform): Promise<void>;
89
109
 
@@ -127,6 +147,32 @@ export interface RedisCursorTracker {
127
147
  /** Stop the Redis subscriber and clear local timers. */
128
148
  destroy(): void;
129
149
 
150
+ /**
151
+ * Snapshot of scheduler health. Always available, near-zero cost.
152
+ *
153
+ * - `flushes`: total tick-driven flushes since tracker creation.
154
+ * - `driftMeanMs`: mean (target_deadline - actual_fire_time) across
155
+ * all tick-driven flushes. 0 means perfect cadence; values >
156
+ * `topicThrottle` indicate sustained event-loop saturation or CPU
157
+ * contention (consider a dedicated-CPU instance, or raise
158
+ * `topicThrottle`).
159
+ * - `driftMaxMs`: largest single observed late fire. Useful for
160
+ * spotting one-off GC pauses vs. sustained drift.
161
+ * - `dirtyTopicsCurrent`: topics with pending coalesced entries right
162
+ * now. Should hover near zero in healthy operation.
163
+ * - `activeTopicsTotal`: topics with at least one local cursor.
164
+ *
165
+ * Leading-edge synchronous flushes are not counted in drift stats -
166
+ * they fire on the call thread, not via the scheduler.
167
+ */
168
+ stats(): {
169
+ flushes: number;
170
+ driftMeanMs: number;
171
+ driftMaxMs: number;
172
+ dirtyTopicsCurrent: number;
173
+ activeTopicsTotal: number;
174
+ };
175
+
130
176
  /**
131
177
  * Ready-made WebSocket hooks for cursor tracking.
132
178
  *
package/redis/cursor.js CHANGED
@@ -40,6 +40,9 @@ import { stripInternal, createSensitiveWarner } from '../shared/sensitive.js';
40
40
  import { scanAndUnlink } from '../shared/redis-scan.js';
41
41
  import { MAX_CURSOR_WS, MAX_CURSOR_TOPICS } from '../shared/caps.js';
42
42
  import { createBusValidator } from '../shared/bus-validate.js';
43
+ import { WsClosedError } from '../shared/errors.js';
44
+
45
+ export { WsClosedError };
43
46
 
44
47
  /** Wire-protocol event names this module emits. */
45
48
  const EVENTS = Object.freeze({
@@ -91,6 +94,7 @@ const EVENTS = Object.freeze({
91
94
  * @property {(topic: string) => Promise<CursorEntry[]>} list
92
95
  * @property {() => Promise<void>} clear
93
96
  * @property {() => void} destroy - Stop the Redis subscriber
97
+ * @property {() => { flushes: number, driftMeanMs: number, driftMaxMs: number, dirtyTopicsCurrent: number, activeTopicsTotal: number }} stats - Scheduler health snapshot
94
98
  */
95
99
 
96
100
  /**
@@ -139,6 +143,7 @@ export function createCursor(client, options = {}) {
139
143
  const mUpdates = m?.counter('cursor_updates_total', 'Cursor update calls', ['topic']);
140
144
  const mBroadcasts = m?.counter('cursor_broadcasts_total', 'Cursor broadcasts sent', ['topic']);
141
145
  const mThrottled = m?.counter('cursor_throttled_total', 'Cursor updates deferred by throttle', ['topic']);
146
+ const mAttachesAborted = m?.counter('cursor_attaches_aborted_total', 'Cursor attach calls that aborted because the websocket closed before `platform.subscribe` could complete. Symmetric with `presence_joins_aborted_total`; same `WS_CLOSED` cause.', ['topic', 'reason']);
142
147
 
143
148
  const warnSensitive = createSensitiveWarner('redis/cursor');
144
149
 
@@ -198,7 +203,29 @@ export function createCursor(client, options = {}) {
198
203
  const parsed = JSON.parse(message);
199
204
  if (parsed.instanceId === instanceId) return;
200
205
  if (!validator.acceptEnvelope(parsed.topic, parsed.event)) return;
201
- if (activePlatform) {
206
+ if (!activePlatform) return;
207
+
208
+ // Receiver-side coalescing for high-frequency cursor-position
209
+ // events. UPDATE / BULK enqueue into the local topic's
210
+ // inboundDirty map so the NEXT local flush emits one combined
211
+ // frame covering local + peer cursors. Pre-change, peer-
212
+ // relayed frames published immediately on receive, producing
213
+ // tight doublets at subscribers (one frame per worker per
214
+ // cycle, ms apart). Now one frame per subscriber per cycle
215
+ // regardless of worker count.
216
+ //
217
+ // CATALOG / JOIN / REMOVE stay immediate: low-frequency
218
+ // roster events where coalescing would add latency without
219
+ // smoothness benefit.
220
+ if (parsed.event === EVENTS.UPDATE && parsed.payload && typeof parsed.payload.key === 'string') {
221
+ enqueueInbound(parsed.topic, parsed.payload.key, parsed.payload.data, activePlatform);
222
+ } else if (parsed.event === EVENTS.BULK && Array.isArray(parsed.payload)) {
223
+ for (const entry of parsed.payload) {
224
+ if (entry && typeof entry.key === 'string') {
225
+ enqueueInbound(parsed.topic, entry.key, entry.data, activePlatform);
226
+ }
227
+ }
228
+ } else {
202
229
  activePlatform.publish(
203
230
  '__cursor:' + parsed.topic,
204
231
  parsed.event,
@@ -358,11 +385,53 @@ export function createCursor(client, options = {}) {
358
385
  }
359
386
 
360
387
  /**
361
- * Per-topic aggregate throttle state.
362
- * @type {Map<string, { lastFlush: number, timer: any, dirty: Map<string, { user: any, data: any, platform: any }> }>}
388
+ * Per-topic aggregate flush state.
389
+ *
390
+ * - `dirty`: locally-originated cursors. Flushed locally AND relayed.
391
+ * - `inboundDirty`: cursors received from peer instances via Redis pub/sub.
392
+ * Flushed locally ONLY (re-relaying would loop). Kept separate from
393
+ * `dirty` so the relay payload is structurally a subset of the local
394
+ * flush, not a per-entry origin check.
395
+ * - `lastFlush`: target-anchored timestamp of the most recent flush.
396
+ * Advanced by `topicThrottleMs` per cycle (not to actual fire time) so
397
+ * a single late tick does not compound drift on subsequent cycles.
398
+ *
399
+ * @type {Map<string, { dirty: Map<string, { user: any, data: any, platform: any }>, inboundDirty: Map<string, { data: any, platform: any }>, lastFlush: number }>}
363
400
  */
364
401
  const topicFlush = new Map();
365
402
 
403
+ /**
404
+ * Single scheduler-driven set: topics with at least one dirty entry
405
+ * awaiting flush. Bounded by mover count, not topic count, so the
406
+ * per-tick walk does not scan idle topics. Updated synchronously on
407
+ * `broadcast()` / `enqueueInbound()` and on every tick.
408
+ *
409
+ * @type {Set<string>}
410
+ */
411
+ const dirtyTopics = new Set();
412
+
413
+ /**
414
+ * Single timer for the whole tracker. Always points at the next earliest
415
+ * topic deadline (or null when idle). Replaces the previous per-topic
416
+ * setTimeout pattern: N pending timers -> 1 pending timer regardless of
417
+ * topic count. Scheduling overhead is O(dirty topics), not O(active
418
+ * topics), and a single late fire affects exactly one cycle (target-
419
+ * anchored, no drift compounding).
420
+ *
421
+ * @type {ReturnType<typeof setTimeout> | null}
422
+ */
423
+ let tickTimer = null;
424
+
425
+ /**
426
+ * Drift accounting for observability. Updated on every flush in `tick()`.
427
+ * Exposed via the `stats()` accessor; optional `metrics` integration
428
+ * (Prometheus histogram) is wired separately.
429
+ */
430
+ let driftSum = 0;
431
+ let driftCount = 0;
432
+ let driftMax = 0;
433
+ let flushCount = 0;
434
+
366
435
  function relay(topic, event, payload) {
367
436
  if (b) { try { b.guard(); } catch { return; } }
368
437
  const msg = JSON.stringify({ instanceId, topic, event, payload });
@@ -388,24 +457,129 @@ export function createCursor(client, options = {}) {
388
457
  }
389
458
 
390
459
  /**
391
- * Flush all coalesced entries for a topic as a single `bulk` event.
392
- * Entries carry `{key, data}` only; `user` lives on the catalog channel.
393
- * Per-entry Redis snapshot writes are coalesced through `queueSnapshot`
394
- * onto the snapshot timer.
460
+ * Flush a topic's `dirty` + `inboundDirty` maps as a single wire frame to
461
+ * local subscribers, then relay the local-origin slice to peers.
462
+ *
463
+ * - Local subscribers see one combined frame per cycle covering this
464
+ * worker's own cursors PLUS any cursors received from peers since the
465
+ * last flush. Pre-change, peer-relayed cursors emitted as a separate
466
+ * frame immediately on receive, producing tight doublets at subscribers.
467
+ * - Peers receive only the local-origin slice (relay payload is built
468
+ * from `dirty`, not from `inboundDirty`). Re-relaying inbound cursors
469
+ * would loop: filtered at the receiver via `instanceId`, but still
470
+ * wastes Redis pub/sub bandwidth.
471
+ * - `queueSnapshot` runs for local-origin only. The originating worker
472
+ * owns the Redis HSET for its cursors; receivers must not re-write
473
+ * what the origin already wrote (would double the HSET storm).
474
+ *
475
+ * Single-entry vs. multi-entry choice mirrors the existing wire shape:
476
+ * one cursor -> `update {key, data}`, many -> `bulk [{key, data}, ...]`.
477
+ * Subscribers handle both as cursor-position frames.
395
478
  */
396
- function flushBulk(topic, dirty) {
479
+ function flushBoth(topic, state) {
397
480
  const entries = [];
398
481
  let flushPlatform = null;
399
- for (const [k, v] of dirty) {
482
+ let localCount = 0;
483
+
484
+ // Local-origin slice first so we can take a prefix for the relay.
485
+ for (const [k, v] of state.dirty) {
400
486
  entries.push({ key: k, data: v.data });
401
487
  flushPlatform = v.platform;
402
488
  queueSnapshot(topic, k, v.user, v.data);
489
+ localCount++;
403
490
  }
491
+ for (const [k, v] of state.inboundDirty) {
492
+ entries.push({ key: k, data: v.data });
493
+ flushPlatform ||= v.platform;
494
+ }
495
+
496
+ state.dirty.clear();
497
+ state.inboundDirty.clear();
498
+
404
499
  if (!flushPlatform || entries.length === 0) return;
405
- flushPlatform.publish('__cursor:' + topic, EVENTS.BULK, entries);
406
- relay(topic, EVENTS.BULK, entries);
500
+
501
+ mBroadcasts?.inc({ topic: mt(topic) });
502
+ flushCount++;
503
+
504
+ // Single local publish covering all entries (local + inbound).
505
+ if (entries.length === 1) {
506
+ flushPlatform.publish('__cursor:' + topic, EVENTS.UPDATE, entries[0]);
507
+ } else {
508
+ flushPlatform.publish('__cursor:' + topic, EVENTS.BULK, entries);
509
+ }
510
+
511
+ // Relay LOCAL-ORIGIN slice only; never re-relay what came from peers.
512
+ if (localCount > 0) {
513
+ if (localCount === 1) {
514
+ relay(topic, EVENTS.UPDATE, entries[0]);
515
+ } else {
516
+ relay(topic, EVENTS.BULK, entries.slice(0, localCount));
517
+ }
518
+ }
407
519
  }
408
520
 
521
+ /**
522
+ * Scheduler tick. Walks `dirtyTopics`, flushes any topic whose deadline
523
+ * (`lastFlush + topicThrottleMs`) has passed, and re-arms `tickTimer`
524
+ * for the next earliest pending deadline. Topics whose deadline has not
525
+ * yet passed stay in `dirtyTopics` for the next tick.
526
+ *
527
+ * Target-anchored advance: on flush, `lastFlush` is set to the deadline
528
+ * (not the actual fire time) so a single late tick does not compound
529
+ * drift on subsequent cycles. If we fell behind by more than one cycle
530
+ * (event loop saturation > `topicThrottleMs`), `lastFlush` resets to
531
+ * `now` to avoid queueing phantom catch-up fires that would all hit the
532
+ * next event loop turn.
533
+ */
534
+ function tick() {
535
+ tickTimer = null;
536
+ const now = Date.now();
537
+ let nextDeadline = Infinity;
538
+
539
+ for (const topic of dirtyTopics) {
540
+ const state = topicFlush.get(topic);
541
+ if (!state) { dirtyTopics.delete(topic); continue; }
542
+ if (state.dirty.size === 0 && state.inboundDirty.size === 0) {
543
+ dirtyTopics.delete(topic);
544
+ continue;
545
+ }
546
+ const deadline = state.lastFlush + topicThrottleMs;
547
+ if (deadline <= now) {
548
+ const drift = now - deadline;
549
+ driftSum += drift;
550
+ driftCount++;
551
+ if (drift > driftMax) driftMax = drift;
552
+
553
+ flushBoth(topic, state);
554
+ dirtyTopics.delete(topic);
555
+
556
+ // Target-anchored: advance lastFlush by the cadence amount.
557
+ // Multi-cycle backlog collapse to `now` so the next leading-
558
+ // edge check `(now - lastFlush) >= topicThrottleMs` works as
559
+ // expected without firing every queued cycle on this turn.
560
+ state.lastFlush = drift < topicThrottleMs ? deadline : now;
561
+ } else if (deadline < nextDeadline) {
562
+ nextDeadline = deadline;
563
+ }
564
+ }
565
+
566
+ if (nextDeadline !== Infinity) {
567
+ tickTimer = setTimeout(tick, Math.max(0, nextDeadline - Date.now()));
568
+ }
569
+ // else: scheduler goes idle until next broadcast() / enqueueInbound().
570
+ }
571
+
572
+ function armTick(delay) {
573
+ if (tickTimer !== null) return;
574
+ tickTimer = setTimeout(tick, delay);
575
+ }
576
+
577
+ /**
578
+ * Schedule a local cursor for the next coalesced flush. The leading-
579
+ * edge check fires synchronously when `topicThrottleMs` has elapsed
580
+ * since the last flush (preserves the contract that the first call on
581
+ * an idle topic publishes immediately, without a setTimeout(0) detour).
582
+ */
409
583
  function broadcast(topic, key, user, data, platform) {
410
584
  if (topicThrottleMs <= 0) {
411
585
  doBroadcast(topic, key, user, data, platform);
@@ -414,42 +588,64 @@ export function createCursor(client, options = {}) {
414
588
 
415
589
  let state = topicFlush.get(topic);
416
590
  if (!state) {
417
- state = { lastFlush: 0, timer: null, dirty: new Map() };
591
+ state = { dirty: new Map(), inboundDirty: new Map(), lastFlush: 0 };
418
592
  topicFlush.set(topic, state);
419
593
  }
420
-
421
594
  state.dirty.set(key, { user, data, platform });
422
595
 
423
596
  const now = Date.now();
424
-
425
597
  if (now - state.lastFlush >= topicThrottleMs) {
426
- if (state.timer) { clearTimeout(state.timer); state.timer = null; }
598
+ // Leading-edge synchronous flush.
427
599
  state.lastFlush = now;
428
- if (state.dirty.size === 1) {
429
- const [k, v] = state.dirty.entries().next().value;
430
- doBroadcast(topic, k, v.user, v.data, v.platform);
431
- } else {
432
- flushBulk(topic, state.dirty);
433
- }
434
- state.dirty.clear();
600
+ flushBoth(topic, state);
601
+ dirtyTopics.delete(topic);
435
602
  return;
436
603
  }
437
604
 
438
- if (!state.timer) {
439
- state.timer = setTimeout(() => {
440
- const s = topicFlush.get(topic);
441
- if (!s) return;
442
- s.timer = null;
443
- s.lastFlush = Date.now();
444
- if (s.dirty.size === 1) {
445
- const [k, v] = s.dirty.entries().next().value;
446
- doBroadcast(topic, k, v.user, v.data, v.platform);
447
- } else {
448
- flushBulk(topic, s.dirty);
449
- }
450
- s.dirty.clear();
451
- }, topicThrottleMs - (now - state.lastFlush));
605
+ // Within window: trailing-edge flush via the scheduler tick.
606
+ dirtyTopics.add(topic);
607
+ armTick(Math.max(0, topicThrottleMs - (now - state.lastFlush)));
608
+ }
609
+
610
+ /**
611
+ * Schedule a peer-relayed cursor for the next coalesced flush. Symmetric
612
+ * to `broadcast()`: same leading/trailing edge semantics, but inbound
613
+ * entries route through `state.inboundDirty` so they are visible to
614
+ * local subscribers on the next flush WITHOUT being re-relayed to peers
615
+ * (which would loop) and WITHOUT being written to Redis (origin owns
616
+ * the HSET).
617
+ *
618
+ * The peer's cross-replica end-to-end latency gains up to one
619
+ * `topicThrottleMs` of coalescing delay on the receiver side. Cursors
620
+ * are already throttled in the 8-16ms range; adding 8-16ms is well
621
+ * below the ~50-100ms human perception threshold for cursor lag. The
622
+ * smoothness win (one frame per subscriber per cycle instead of two)
623
+ * is the structural benefit.
624
+ */
625
+ function enqueueInbound(topic, key, data, platform) {
626
+ if (topicThrottleMs <= 0) {
627
+ // Legacy immediate mode (matches old receiver behavior).
628
+ platform.publish('__cursor:' + topic, EVENTS.UPDATE, { key, data }, { relay: false });
629
+ return;
452
630
  }
631
+
632
+ let state = topicFlush.get(topic);
633
+ if (!state) {
634
+ state = { dirty: new Map(), inboundDirty: new Map(), lastFlush: 0 };
635
+ topicFlush.set(topic, state);
636
+ }
637
+ state.inboundDirty.set(key, { data, platform });
638
+
639
+ const now = Date.now();
640
+ if (now - state.lastFlush >= topicThrottleMs) {
641
+ state.lastFlush = now;
642
+ flushBoth(topic, state);
643
+ dirtyTopics.delete(topic);
644
+ return;
645
+ }
646
+
647
+ dirtyTopics.add(topic);
648
+ armTick(Math.max(0, topicThrottleMs - (now - state.lastFlush)));
453
649
  }
454
650
 
455
651
  async function broadcastRemove(topic, key, platform) {
@@ -482,8 +678,17 @@ export function createCursor(client, options = {}) {
482
678
  try {
483
679
  platform.subscribe(ws, '__cursor:' + topic);
484
680
  } catch {
485
- return;
681
+ // ws closed before subscribe could land. No state to roll back
682
+ // (no wsState entry exists yet; that is only created on update).
683
+ // Throw so the caller can distinguish a no-op-and-rollback from
684
+ // a successful attach; without this the RPC metric reports
685
+ // status=ok for connections that never received cursor frames.
686
+ mAttachesAborted?.inc({ topic: mt(topic), reason: 'ws_closed' });
687
+ throw new WsClosedError('cursor.attach', topic);
486
688
  }
689
+ // snapshot() itself swallows ws-closed during platform.send (the
690
+ // state is already committed; clients recover via the next bulk
691
+ // frame). Intentional asymmetry with subscribe failure above.
487
692
  await tracker.snapshot(ws, topic, platform);
488
693
  },
489
694
 
@@ -580,6 +785,7 @@ export function createCursor(client, options = {}) {
580
785
  topics.delete(topic);
581
786
  activeTopics.delete(topic);
582
787
  topicFlush.delete(topic);
788
+ dirtyTopics.delete(topic);
583
789
  redisPending.delete(topic);
584
790
  stopCleanupTimer();
585
791
  }
@@ -637,6 +843,7 @@ export function createCursor(client, options = {}) {
637
843
  topics.delete(t);
638
844
  activeTopics.delete(t);
639
845
  topicFlush.delete(t);
846
+ dirtyTopics.delete(t);
640
847
  redisPending.delete(t);
641
848
  }
642
849
  }
@@ -717,9 +924,9 @@ export function createCursor(client, options = {}) {
717
924
  if (entry.timer) clearTimeout(entry.timer);
718
925
  }
719
926
  }
720
- for (const [, state] of topicFlush) {
721
- if (state.timer) clearTimeout(state.timer);
722
- }
927
+ // Tracker-level scheduler timer + dirty-topic set.
928
+ if (tickTimer !== null) { clearTimeout(tickTimer); tickTimer = null; }
929
+ dirtyTopics.clear();
723
930
  topics.clear();
724
931
  topicFlush.clear();
725
932
  wsState.clear();
@@ -739,9 +946,8 @@ export function createCursor(client, options = {}) {
739
946
  if (entry.timer) clearTimeout(entry.timer);
740
947
  }
741
948
  }
742
- for (const [, state] of topicFlush) {
743
- if (state.timer) clearTimeout(state.timer);
744
- }
949
+ if (tickTimer !== null) { clearTimeout(tickTimer); tickTimer = null; }
950
+ dirtyTopics.clear();
745
951
  topicFlush.clear();
746
952
  if (subscriber) {
747
953
  subscriber.quit().catch(() => subscriber.disconnect());
@@ -750,6 +956,37 @@ export function createCursor(client, options = {}) {
750
956
  activePlatform = null;
751
957
  },
752
958
 
959
+ /**
960
+ * Snapshot of scheduler health. Always available, near-zero cost.
961
+ *
962
+ * - `flushes`: total tick-driven flushes since tracker creation.
963
+ * - `driftMeanMs`: mean (target_deadline - actual_fire_time) across
964
+ * all tick-driven flushes. 0 means perfect cadence; values >
965
+ * `topicThrottle` indicate sustained event-loop saturation or
966
+ * CPU contention.
967
+ * - `driftMaxMs`: largest single observed late fire. Useful for
968
+ * spotting one-off GC pauses vs. sustained drift.
969
+ * - `dirtyTopicsCurrent`: topics with pending coalesced entries
970
+ * right now. Should hover near zero in healthy operation; growth
971
+ * means tick is falling behind.
972
+ * - `activeTopicsTotal`: topics with at least one local cursor.
973
+ *
974
+ * Leading-edge synchronous flushes (first call on an idle topic)
975
+ * are not counted in drift stats - they fire on the call thread,
976
+ * not via the scheduler.
977
+ *
978
+ * @returns {{ flushes: number, driftMeanMs: number, driftMaxMs: number, dirtyTopicsCurrent: number, activeTopicsTotal: number }}
979
+ */
980
+ stats() {
981
+ return {
982
+ flushes: flushCount,
983
+ driftMeanMs: driftCount > 0 ? driftSum / driftCount : 0,
984
+ driftMaxMs: driftMax,
985
+ dirtyTopicsCurrent: dirtyTopics.size,
986
+ activeTopicsTotal: topics.size
987
+ };
988
+ },
989
+
753
990
  hooks: {
754
991
  subscribe(ws, topic, { platform }) {
755
992
  if (topic.startsWith('__cursor:')) {
@@ -51,10 +51,29 @@ export interface PresenceMetricsSnapshot {
51
51
  staleCleanedTotal: number;
52
52
  }
53
53
 
54
+ /**
55
+ * Thrown by `join()` when the websocket closes during an async gap before
56
+ * the join can commit. Server-side state is fully rolled back before the
57
+ * throw. Catch on `err.code === 'WS_CLOSED'` rather than the class - the
58
+ * same code is shared with `cursor.attach` and any future RPC-shaped
59
+ * operation in this package.
60
+ */
61
+ export class WsClosedError extends Error {
62
+ name: 'WsClosedError';
63
+ code: 'WS_CLOSED';
64
+ operation: string;
65
+ topic: string;
66
+ }
67
+
54
68
  export interface RedisPresenceTracker {
55
69
  /**
56
70
  * Add a connection to a topic's presence.
57
71
  * Ignores `__`-prefixed topics. Idempotent.
72
+ *
73
+ * @throws {WsClosedError} (`err.code === 'WS_CLOSED'`) if the websocket
74
+ * closes during one of the internal async gaps (subscribe, Redis eval,
75
+ * snapshot fetch, ws.subscribe). Server state is rolled back before
76
+ * the throw; callers do not need to compensate.
58
77
  */
59
78
  join(ws: any, topic: string, platform: Platform): Promise<void>;
60
79
 
package/redis/presence.js CHANGED
@@ -47,6 +47,9 @@ import { stripInternal, createSensitiveWarner } from '../shared/sensitive.js';
47
47
  import { scanAndUnlink } from '../shared/redis-scan.js';
48
48
  import { withBreaker } from '../shared/breaker.js';
49
49
  import { MAX_PRESENCE_WS, MAX_PRESENCE_TOPICS } from '../shared/caps.js';
50
+ import { WsClosedError } from '../shared/errors.js';
51
+
52
+ export { WsClosedError };
50
53
 
51
54
  /**
52
55
  * Lua script for atomic JOIN. Sets this instance's field on the per-user
@@ -247,6 +250,7 @@ export function createPresence(client, options = {}) {
247
250
  const m = options.metrics;
248
251
  const mt = m?.mapTopic;
249
252
  const mJoins = m?.counter('presence_joins_total', 'Presence join events', ['topic']);
253
+ const mJoinsAborted = m?.counter('presence_joins_aborted_total', 'Presence join calls that aborted before commit because the websocket closed during an async gap. Server state was rolled back before the throw. Distinct from `presence_joins_total` (commits) and from generic RPC error metrics (which bucket all throws together regardless of cause).', ['topic', 'reason']);
250
254
  const mLeaves = m?.counter('presence_leaves_total', 'Presence leave events', ['topic']);
251
255
  const mHeartbeats = m?.counter('presence_heartbeats_total', 'Heartbeat refresh cycles');
252
256
  const mTotalOnline = m?.gauge('presence_total_online', 'Unique users present per topic on this instance', ['topic']);
@@ -985,6 +989,15 @@ export function createPresence(client, options = {}) {
985
989
  }
986
990
  }
987
991
 
992
+ // Throw helper for "ws closed during async gap" paths inside join(). All
993
+ // five callsites need the same metric label and the same typed error;
994
+ // inlining a helper avoids drift between them and keeps each callsite
995
+ // single-line.
996
+ function throwWsClosed(topic) {
997
+ mJoinsAborted?.inc({ topic: mt(topic), reason: 'ws_closed' });
998
+ throw new WsClosedError('presence.join', topic);
999
+ }
1000
+
988
1001
  /** @type {RedisPresenceTracker} */
989
1002
  const tracker = {
990
1003
  async join(ws, topic, platform) {
@@ -1071,11 +1084,15 @@ export function createPresence(client, options = {}) {
1071
1084
  throw err;
1072
1085
  }
1073
1086
 
1074
- if (!wsTopics.has(ws)) return;
1087
+ // ws closed during `await subscribeToTopic`. The close hook already
1088
+ // ran leaveAll, which swept localCounts / wsTopics for this ws;
1089
+ // no compensating undoJoin needed. Throw so the caller sees the
1090
+ // abort instead of a silent success.
1091
+ if (!wsTopics.has(ws)) throwWsClosed(topic);
1075
1092
 
1076
1093
  try { ws.getBufferedAmount(); } catch {
1077
1094
  await undoJoin(ws, topic, key, data, prevCount, prevData, false, false, platform);
1078
- return;
1095
+ throwWsClosed(topic);
1079
1096
  }
1080
1097
 
1081
1098
  let didRedisWrite = false;
@@ -1108,13 +1125,14 @@ export function createPresence(client, options = {}) {
1108
1125
 
1109
1126
  if (!wsTopics.has(ws)) {
1110
1127
  // ws closed during the eval. Roll back our Redis write so
1111
- // the per-user hash entry does not linger past TTL.
1128
+ // the per-user hash entry does not linger past TTL, then
1129
+ // surface the abort to the caller.
1112
1130
  await redis.eval(
1113
1131
  LEAVE_SCRIPT, 2,
1114
1132
  userHashKey(topic, key), topicHashKey(topic),
1115
1133
  instanceId, key
1116
1134
  ).catch(() => {});
1117
- return;
1135
+ throwWsClosed(topic);
1118
1136
  }
1119
1137
  } else if (prevData !== undefined && !deepEqual(prevData, data)) {
1120
1138
  // Same instance, same user, different `select()` output.
@@ -1157,7 +1175,7 @@ export function createPresence(client, options = {}) {
1157
1175
  ws.subscribe('__presence:' + topic);
1158
1176
  } catch {
1159
1177
  await undoJoin(ws, topic, key, data, prevCount, prevData, didRedisWrite, false, platform);
1160
- return;
1178
+ throwWsClosed(topic);
1161
1179
  }
1162
1180
 
1163
1181
  // If ws closed after subscribe, leave() already handled
@@ -1170,7 +1188,7 @@ export function createPresence(client, options = {}) {
1170
1188
  instanceId, key
1171
1189
  ).catch(() => {});
1172
1190
  }
1173
- return;
1191
+ throwWsClosed(topic);
1174
1192
  }
1175
1193
 
1176
1194
  // Commit localData and activeTopics now that the join is
package/shared/errors.js CHANGED
@@ -57,3 +57,41 @@ export class IdempotencyResultTooLargeError extends Error {
57
57
  this.maxBytes = maxBytes;
58
58
  }
59
59
  }
60
+
61
+ /**
62
+ * Thrown by RPC-shaped operations (`presence.join`, `cursor.attach`) when the
63
+ * caller's websocket closes during an async gap before the operation could
64
+ * commit, OR the websocket was already gone by the time the operation
65
+ * resumed from one of its awaits. Server-side state is fully rolled back
66
+ * before the throw so the caller does not need to compensate.
67
+ *
68
+ * Stable contract: `err.code === 'WS_CLOSED'`. Catch on the code, not the
69
+ * class - future RPC-shaped operations that hit the same pattern throw the
70
+ * same code. The `operation` field carries the dotted path (e.g.
71
+ * `'presence.join'`) for operators that want to bucket by feature without
72
+ * parsing the message.
73
+ *
74
+ * Pattern in callers:
75
+ *
76
+ * ```js
77
+ * try {
78
+ * await presence.join(ws, topic, platform);
79
+ * } catch (err) {
80
+ * if (err.code === 'WS_CLOSED') return; // ws already gone, no compensation needed
81
+ * throw err;
82
+ * }
83
+ * ```
84
+ */
85
+ export class WsClosedError extends Error {
86
+ /**
87
+ * @param {string} operation - Dotted operation path, e.g. `'presence.join'`.
88
+ * @param {string} topic
89
+ */
90
+ constructor(operation, topic) {
91
+ super(`${operation}: websocket closed during async gap (topic="${topic}"); rolled back`);
92
+ this.name = 'WsClosedError';
93
+ this.code = 'WS_CLOSED';
94
+ this.operation = operation;
95
+ this.topic = topic;
96
+ }
97
+ }