svelte-adapter-uws-extensions 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/redis/cursor.d.ts +46 -0
- package/redis/cursor.js +280 -43
- package/redis/presence.d.ts +19 -0
- package/redis/presence.js +24 -6
- package/shared/errors.js +38 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "svelte-adapter-uws-extensions",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.5",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"tag": "latest"
|
|
6
6
|
},
|
|
@@ -154,7 +154,7 @@
|
|
|
154
154
|
"node": ">=22.0.0"
|
|
155
155
|
},
|
|
156
156
|
"peerDependencies": {
|
|
157
|
-
"svelte-adapter-uws": "^0.5.
|
|
157
|
+
"svelte-adapter-uws": "^0.5.4"
|
|
158
158
|
},
|
|
159
159
|
"dependencies": {
|
|
160
160
|
"ioredis": "^5.0.0"
|
package/redis/cursor.d.ts
CHANGED
|
@@ -74,6 +74,18 @@ export interface CursorEntry {
|
|
|
74
74
|
data: any;
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
+
/**
|
|
78
|
+
* Thrown by `attach()` when the websocket closes before `platform.subscribe`
|
|
79
|
+
* can land. Same shape as `presence.WsClosedError`; catch on `err.code ===
|
|
80
|
+
* 'WS_CLOSED'` for cross-feature handling.
|
|
81
|
+
*/
|
|
82
|
+
export class WsClosedError extends Error {
|
|
83
|
+
name: 'WsClosedError';
|
|
84
|
+
code: 'WS_CLOSED';
|
|
85
|
+
operation: string;
|
|
86
|
+
topic: string;
|
|
87
|
+
}
|
|
88
|
+
|
|
77
89
|
export interface RedisCursorTracker {
|
|
78
90
|
/**
|
|
79
91
|
* Opt this connection into receiving cursor updates for `topic`.
|
|
@@ -84,6 +96,14 @@ export interface RedisCursorTracker {
|
|
|
84
96
|
*
|
|
85
97
|
* Without `attach`, the publishes in `update` fan out to an empty
|
|
86
98
|
* subscriber set and no client ever sees a cursor frame.
|
|
99
|
+
*
|
|
100
|
+
* @throws {WsClosedError} (`err.code === 'WS_CLOSED'`) if the websocket
|
|
101
|
+
* has already closed by the time `platform.subscribe` runs. No state
|
|
102
|
+
* to roll back (`wsState` is only created on `update`); callers do
|
|
103
|
+
* not need to compensate. The follow-up `snapshot()` call is skipped
|
|
104
|
+
* when this throws. Snapshot-send failures on an already-subscribed
|
|
105
|
+
* connection are NOT thrown - cursor frames are self-recovering via
|
|
106
|
+
* the next bulk tick.
|
|
87
107
|
*/
|
|
88
108
|
attach(ws: any, topic: string, platform: Platform): Promise<void>;
|
|
89
109
|
|
|
@@ -127,6 +147,32 @@ export interface RedisCursorTracker {
|
|
|
127
147
|
/** Stop the Redis subscriber and clear local timers. */
|
|
128
148
|
destroy(): void;
|
|
129
149
|
|
|
150
|
+
/**
|
|
151
|
+
* Snapshot of scheduler health. Always available, near-zero cost.
|
|
152
|
+
*
|
|
153
|
+
* - `flushes`: total tick-driven flushes since tracker creation.
|
|
154
|
+
* - `driftMeanMs`: mean (target_deadline - actual_fire_time) across
|
|
155
|
+
* all tick-driven flushes. 0 means perfect cadence; values >
|
|
156
|
+
* `topicThrottle` indicate sustained event-loop saturation or CPU
|
|
157
|
+
* contention (consider a dedicated-CPU instance, or raise
|
|
158
|
+
* `topicThrottle`).
|
|
159
|
+
* - `driftMaxMs`: largest single observed late fire. Useful for
|
|
160
|
+
* spotting one-off GC pauses vs. sustained drift.
|
|
161
|
+
* - `dirtyTopicsCurrent`: topics with pending coalesced entries right
|
|
162
|
+
* now. Should hover near zero in healthy operation.
|
|
163
|
+
* - `activeTopicsTotal`: topics with at least one local cursor.
|
|
164
|
+
*
|
|
165
|
+
* Leading-edge synchronous flushes are not counted in drift stats -
|
|
166
|
+
* they fire on the call thread, not via the scheduler.
|
|
167
|
+
*/
|
|
168
|
+
stats(): {
|
|
169
|
+
flushes: number;
|
|
170
|
+
driftMeanMs: number;
|
|
171
|
+
driftMaxMs: number;
|
|
172
|
+
dirtyTopicsCurrent: number;
|
|
173
|
+
activeTopicsTotal: number;
|
|
174
|
+
};
|
|
175
|
+
|
|
130
176
|
/**
|
|
131
177
|
* Ready-made WebSocket hooks for cursor tracking.
|
|
132
178
|
*
|
package/redis/cursor.js
CHANGED
|
@@ -40,6 +40,9 @@ import { stripInternal, createSensitiveWarner } from '../shared/sensitive.js';
|
|
|
40
40
|
import { scanAndUnlink } from '../shared/redis-scan.js';
|
|
41
41
|
import { MAX_CURSOR_WS, MAX_CURSOR_TOPICS } from '../shared/caps.js';
|
|
42
42
|
import { createBusValidator } from '../shared/bus-validate.js';
|
|
43
|
+
import { WsClosedError } from '../shared/errors.js';
|
|
44
|
+
|
|
45
|
+
export { WsClosedError };
|
|
43
46
|
|
|
44
47
|
/** Wire-protocol event names this module emits. */
|
|
45
48
|
const EVENTS = Object.freeze({
|
|
@@ -91,6 +94,7 @@ const EVENTS = Object.freeze({
|
|
|
91
94
|
* @property {(topic: string) => Promise<CursorEntry[]>} list
|
|
92
95
|
* @property {() => Promise<void>} clear
|
|
93
96
|
* @property {() => void} destroy - Stop the Redis subscriber
|
|
97
|
+
* @property {() => { flushes: number, driftMeanMs: number, driftMaxMs: number, dirtyTopicsCurrent: number, activeTopicsTotal: number }} stats - Scheduler health snapshot
|
|
94
98
|
*/
|
|
95
99
|
|
|
96
100
|
/**
|
|
@@ -139,6 +143,7 @@ export function createCursor(client, options = {}) {
|
|
|
139
143
|
const mUpdates = m?.counter('cursor_updates_total', 'Cursor update calls', ['topic']);
|
|
140
144
|
const mBroadcasts = m?.counter('cursor_broadcasts_total', 'Cursor broadcasts sent', ['topic']);
|
|
141
145
|
const mThrottled = m?.counter('cursor_throttled_total', 'Cursor updates deferred by throttle', ['topic']);
|
|
146
|
+
const mAttachesAborted = m?.counter('cursor_attaches_aborted_total', 'Cursor attach calls that aborted because the websocket closed before `platform.subscribe` could complete. Symmetric with `presence_joins_aborted_total`; same `WS_CLOSED` cause.', ['topic', 'reason']);
|
|
142
147
|
|
|
143
148
|
const warnSensitive = createSensitiveWarner('redis/cursor');
|
|
144
149
|
|
|
@@ -198,7 +203,29 @@ export function createCursor(client, options = {}) {
|
|
|
198
203
|
const parsed = JSON.parse(message);
|
|
199
204
|
if (parsed.instanceId === instanceId) return;
|
|
200
205
|
if (!validator.acceptEnvelope(parsed.topic, parsed.event)) return;
|
|
201
|
-
if (activePlatform)
|
|
206
|
+
if (!activePlatform) return;
|
|
207
|
+
|
|
208
|
+
// Receiver-side coalescing for high-frequency cursor-position
|
|
209
|
+
// events. UPDATE / BULK enqueue into the local topic's
|
|
210
|
+
// inboundDirty map so the NEXT local flush emits one combined
|
|
211
|
+
// frame covering local + peer cursors. Pre-change, peer-
|
|
212
|
+
// relayed frames published immediately on receive, producing
|
|
213
|
+
// tight doublets at subscribers (one frame per worker per
|
|
214
|
+
// cycle, ms apart). Now one frame per subscriber per cycle
|
|
215
|
+
// regardless of worker count.
|
|
216
|
+
//
|
|
217
|
+
// CATALOG / JOIN / REMOVE stay immediate: low-frequency
|
|
218
|
+
// roster events where coalescing would add latency without
|
|
219
|
+
// smoothness benefit.
|
|
220
|
+
if (parsed.event === EVENTS.UPDATE && parsed.payload && typeof parsed.payload.key === 'string') {
|
|
221
|
+
enqueueInbound(parsed.topic, parsed.payload.key, parsed.payload.data, activePlatform);
|
|
222
|
+
} else if (parsed.event === EVENTS.BULK && Array.isArray(parsed.payload)) {
|
|
223
|
+
for (const entry of parsed.payload) {
|
|
224
|
+
if (entry && typeof entry.key === 'string') {
|
|
225
|
+
enqueueInbound(parsed.topic, entry.key, entry.data, activePlatform);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
} else {
|
|
202
229
|
activePlatform.publish(
|
|
203
230
|
'__cursor:' + parsed.topic,
|
|
204
231
|
parsed.event,
|
|
@@ -358,11 +385,53 @@ export function createCursor(client, options = {}) {
|
|
|
358
385
|
}
|
|
359
386
|
|
|
360
387
|
/**
|
|
361
|
-
* Per-topic aggregate
|
|
362
|
-
*
|
|
388
|
+
* Per-topic aggregate flush state.
|
|
389
|
+
*
|
|
390
|
+
* - `dirty`: locally-originated cursors. Flushed locally AND relayed.
|
|
391
|
+
* - `inboundDirty`: cursors received from peer instances via Redis pub/sub.
|
|
392
|
+
* Flushed locally ONLY (re-relaying would loop). Kept separate from
|
|
393
|
+
* `dirty` so the relay payload is structurally a subset of the local
|
|
394
|
+
* flush, not a per-entry origin check.
|
|
395
|
+
* - `lastFlush`: target-anchored timestamp of the most recent flush.
|
|
396
|
+
* Advanced by `topicThrottleMs` per cycle (not to actual fire time) so
|
|
397
|
+
* a single late tick does not compound drift on subsequent cycles.
|
|
398
|
+
*
|
|
399
|
+
* @type {Map<string, { dirty: Map<string, { user: any, data: any, platform: any }>, inboundDirty: Map<string, { data: any, platform: any }>, lastFlush: number }>}
|
|
363
400
|
*/
|
|
364
401
|
const topicFlush = new Map();
|
|
365
402
|
|
|
403
|
+
/**
|
|
404
|
+
* Single scheduler-driven set: topics with at least one dirty entry
|
|
405
|
+
* awaiting flush. Bounded by mover count, not topic count, so the
|
|
406
|
+
* per-tick walk does not scan idle topics. Updated synchronously on
|
|
407
|
+
* `broadcast()` / `enqueueInbound()` and on every tick.
|
|
408
|
+
*
|
|
409
|
+
* @type {Set<string>}
|
|
410
|
+
*/
|
|
411
|
+
const dirtyTopics = new Set();
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Single timer for the whole tracker. Always points at the next earliest
|
|
415
|
+
* topic deadline (or null when idle). Replaces the previous per-topic
|
|
416
|
+
* setTimeout pattern: N pending timers -> 1 pending timer regardless of
|
|
417
|
+
* topic count. Scheduling overhead is O(dirty topics), not O(active
|
|
418
|
+
* topics), and a single late fire affects exactly one cycle (target-
|
|
419
|
+
* anchored, no drift compounding).
|
|
420
|
+
*
|
|
421
|
+
* @type {ReturnType<typeof setTimeout> | null}
|
|
422
|
+
*/
|
|
423
|
+
let tickTimer = null;
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Drift accounting for observability. Updated on every flush in `tick()`.
|
|
427
|
+
* Exposed via the `stats()` accessor; optional `metrics` integration
|
|
428
|
+
* (Prometheus histogram) is wired separately.
|
|
429
|
+
*/
|
|
430
|
+
let driftSum = 0;
|
|
431
|
+
let driftCount = 0;
|
|
432
|
+
let driftMax = 0;
|
|
433
|
+
let flushCount = 0;
|
|
434
|
+
|
|
366
435
|
function relay(topic, event, payload) {
|
|
367
436
|
if (b) { try { b.guard(); } catch { return; } }
|
|
368
437
|
const msg = JSON.stringify({ instanceId, topic, event, payload });
|
|
@@ -388,24 +457,129 @@ export function createCursor(client, options = {}) {
|
|
|
388
457
|
}
|
|
389
458
|
|
|
390
459
|
/**
|
|
391
|
-
* Flush
|
|
392
|
-
*
|
|
393
|
-
*
|
|
394
|
-
*
|
|
460
|
+
* Flush a topic's `dirty` + `inboundDirty` maps as a single wire frame to
|
|
461
|
+
* local subscribers, then relay the local-origin slice to peers.
|
|
462
|
+
*
|
|
463
|
+
* - Local subscribers see one combined frame per cycle covering this
|
|
464
|
+
* worker's own cursors PLUS any cursors received from peers since the
|
|
465
|
+
* last flush. Pre-change, peer-relayed cursors emitted as a separate
|
|
466
|
+
* frame immediately on receive, producing tight doublets at subscribers.
|
|
467
|
+
* - Peers receive only the local-origin slice (relay payload is built
|
|
468
|
+
* from `dirty`, not from `inboundDirty`). Re-relaying inbound cursors
|
|
469
|
+
* would loop: filtered at the receiver via `instanceId`, but still
|
|
470
|
+
* wastes Redis pub/sub bandwidth.
|
|
471
|
+
* - `queueSnapshot` runs for local-origin only. The originating worker
|
|
472
|
+
* owns the Redis HSET for its cursors; receivers must not re-write
|
|
473
|
+
* what the origin already wrote (would double the HSET storm).
|
|
474
|
+
*
|
|
475
|
+
* Single-entry vs. multi-entry choice mirrors the existing wire shape:
|
|
476
|
+
* one cursor -> `update {key, data}`, many -> `bulk [{key, data}, ...]`.
|
|
477
|
+
* Subscribers handle both as cursor-position frames.
|
|
395
478
|
*/
|
|
396
|
-
function
|
|
479
|
+
function flushBoth(topic, state) {
|
|
397
480
|
const entries = [];
|
|
398
481
|
let flushPlatform = null;
|
|
399
|
-
|
|
482
|
+
let localCount = 0;
|
|
483
|
+
|
|
484
|
+
// Local-origin slice first so we can take a prefix for the relay.
|
|
485
|
+
for (const [k, v] of state.dirty) {
|
|
400
486
|
entries.push({ key: k, data: v.data });
|
|
401
487
|
flushPlatform = v.platform;
|
|
402
488
|
queueSnapshot(topic, k, v.user, v.data);
|
|
489
|
+
localCount++;
|
|
403
490
|
}
|
|
491
|
+
for (const [k, v] of state.inboundDirty) {
|
|
492
|
+
entries.push({ key: k, data: v.data });
|
|
493
|
+
flushPlatform ||= v.platform;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
state.dirty.clear();
|
|
497
|
+
state.inboundDirty.clear();
|
|
498
|
+
|
|
404
499
|
if (!flushPlatform || entries.length === 0) return;
|
|
405
|
-
|
|
406
|
-
|
|
500
|
+
|
|
501
|
+
mBroadcasts?.inc({ topic: mt(topic) });
|
|
502
|
+
flushCount++;
|
|
503
|
+
|
|
504
|
+
// Single local publish covering all entries (local + inbound).
|
|
505
|
+
if (entries.length === 1) {
|
|
506
|
+
flushPlatform.publish('__cursor:' + topic, EVENTS.UPDATE, entries[0]);
|
|
507
|
+
} else {
|
|
508
|
+
flushPlatform.publish('__cursor:' + topic, EVENTS.BULK, entries);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// Relay LOCAL-ORIGIN slice only; never re-relay what came from peers.
|
|
512
|
+
if (localCount > 0) {
|
|
513
|
+
if (localCount === 1) {
|
|
514
|
+
relay(topic, EVENTS.UPDATE, entries[0]);
|
|
515
|
+
} else {
|
|
516
|
+
relay(topic, EVENTS.BULK, entries.slice(0, localCount));
|
|
517
|
+
}
|
|
518
|
+
}
|
|
407
519
|
}
|
|
408
520
|
|
|
521
|
+
/**
|
|
522
|
+
* Scheduler tick. Walks `dirtyTopics`, flushes any topic whose deadline
|
|
523
|
+
* (`lastFlush + topicThrottleMs`) has passed, and re-arms `tickTimer`
|
|
524
|
+
* for the next earliest pending deadline. Topics whose deadline has not
|
|
525
|
+
* yet passed stay in `dirtyTopics` for the next tick.
|
|
526
|
+
*
|
|
527
|
+
* Target-anchored advance: on flush, `lastFlush` is set to the deadline
|
|
528
|
+
* (not the actual fire time) so a single late tick does not compound
|
|
529
|
+
* drift on subsequent cycles. If we fell behind by more than one cycle
|
|
530
|
+
* (event loop saturation > `topicThrottleMs`), `lastFlush` resets to
|
|
531
|
+
* `now` to avoid queueing phantom catch-up fires that would all hit the
|
|
532
|
+
* next event loop turn.
|
|
533
|
+
*/
|
|
534
|
+
function tick() {
|
|
535
|
+
tickTimer = null;
|
|
536
|
+
const now = Date.now();
|
|
537
|
+
let nextDeadline = Infinity;
|
|
538
|
+
|
|
539
|
+
for (const topic of dirtyTopics) {
|
|
540
|
+
const state = topicFlush.get(topic);
|
|
541
|
+
if (!state) { dirtyTopics.delete(topic); continue; }
|
|
542
|
+
if (state.dirty.size === 0 && state.inboundDirty.size === 0) {
|
|
543
|
+
dirtyTopics.delete(topic);
|
|
544
|
+
continue;
|
|
545
|
+
}
|
|
546
|
+
const deadline = state.lastFlush + topicThrottleMs;
|
|
547
|
+
if (deadline <= now) {
|
|
548
|
+
const drift = now - deadline;
|
|
549
|
+
driftSum += drift;
|
|
550
|
+
driftCount++;
|
|
551
|
+
if (drift > driftMax) driftMax = drift;
|
|
552
|
+
|
|
553
|
+
flushBoth(topic, state);
|
|
554
|
+
dirtyTopics.delete(topic);
|
|
555
|
+
|
|
556
|
+
// Target-anchored: advance lastFlush by the cadence amount.
|
|
557
|
+
// Multi-cycle backlog collapse to `now` so the next leading-
|
|
558
|
+
// edge check `(now - lastFlush) >= topicThrottleMs` works as
|
|
559
|
+
// expected without firing every queued cycle on this turn.
|
|
560
|
+
state.lastFlush = drift < topicThrottleMs ? deadline : now;
|
|
561
|
+
} else if (deadline < nextDeadline) {
|
|
562
|
+
nextDeadline = deadline;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
if (nextDeadline !== Infinity) {
|
|
567
|
+
tickTimer = setTimeout(tick, Math.max(0, nextDeadline - Date.now()));
|
|
568
|
+
}
|
|
569
|
+
// else: scheduler goes idle until next broadcast() / enqueueInbound().
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
function armTick(delay) {
|
|
573
|
+
if (tickTimer !== null) return;
|
|
574
|
+
tickTimer = setTimeout(tick, delay);
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Schedule a local cursor for the next coalesced flush. The leading-
|
|
579
|
+
* edge check fires synchronously when `topicThrottleMs` has elapsed
|
|
580
|
+
* since the last flush (preserves the contract that the first call on
|
|
581
|
+
* an idle topic publishes immediately, without a setTimeout(0) detour).
|
|
582
|
+
*/
|
|
409
583
|
function broadcast(topic, key, user, data, platform) {
|
|
410
584
|
if (topicThrottleMs <= 0) {
|
|
411
585
|
doBroadcast(topic, key, user, data, platform);
|
|
@@ -414,42 +588,64 @@ export function createCursor(client, options = {}) {
|
|
|
414
588
|
|
|
415
589
|
let state = topicFlush.get(topic);
|
|
416
590
|
if (!state) {
|
|
417
|
-
state = {
|
|
591
|
+
state = { dirty: new Map(), inboundDirty: new Map(), lastFlush: 0 };
|
|
418
592
|
topicFlush.set(topic, state);
|
|
419
593
|
}
|
|
420
|
-
|
|
421
594
|
state.dirty.set(key, { user, data, platform });
|
|
422
595
|
|
|
423
596
|
const now = Date.now();
|
|
424
|
-
|
|
425
597
|
if (now - state.lastFlush >= topicThrottleMs) {
|
|
426
|
-
|
|
598
|
+
// Leading-edge synchronous flush.
|
|
427
599
|
state.lastFlush = now;
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
doBroadcast(topic, k, v.user, v.data, v.platform);
|
|
431
|
-
} else {
|
|
432
|
-
flushBulk(topic, state.dirty);
|
|
433
|
-
}
|
|
434
|
-
state.dirty.clear();
|
|
600
|
+
flushBoth(topic, state);
|
|
601
|
+
dirtyTopics.delete(topic);
|
|
435
602
|
return;
|
|
436
603
|
}
|
|
437
604
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
605
|
+
// Within window: trailing-edge flush via the scheduler tick.
|
|
606
|
+
dirtyTopics.add(topic);
|
|
607
|
+
armTick(Math.max(0, topicThrottleMs - (now - state.lastFlush)));
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Schedule a peer-relayed cursor for the next coalesced flush. Symmetric
|
|
612
|
+
* to `broadcast()`: same leading/trailing edge semantics, but inbound
|
|
613
|
+
* entries route through `state.inboundDirty` so they are visible to
|
|
614
|
+
* local subscribers on the next flush WITHOUT being re-relayed to peers
|
|
615
|
+
* (which would loop) and WITHOUT being written to Redis (origin owns
|
|
616
|
+
* the HSET).
|
|
617
|
+
*
|
|
618
|
+
* The peer's cross-replica end-to-end latency gains up to one
|
|
619
|
+
* `topicThrottleMs` of coalescing delay on the receiver side. Cursors
|
|
620
|
+
* are already throttled in the 8-16ms range; adding 8-16ms is well
|
|
621
|
+
* below the ~50-100ms human perception threshold for cursor lag. The
|
|
622
|
+
* smoothness win (one frame per subscriber per cycle instead of two)
|
|
623
|
+
* is the structural benefit.
|
|
624
|
+
*/
|
|
625
|
+
function enqueueInbound(topic, key, data, platform) {
|
|
626
|
+
if (topicThrottleMs <= 0) {
|
|
627
|
+
// Legacy immediate mode (matches old receiver behavior).
|
|
628
|
+
platform.publish('__cursor:' + topic, EVENTS.UPDATE, { key, data }, { relay: false });
|
|
629
|
+
return;
|
|
452
630
|
}
|
|
631
|
+
|
|
632
|
+
let state = topicFlush.get(topic);
|
|
633
|
+
if (!state) {
|
|
634
|
+
state = { dirty: new Map(), inboundDirty: new Map(), lastFlush: 0 };
|
|
635
|
+
topicFlush.set(topic, state);
|
|
636
|
+
}
|
|
637
|
+
state.inboundDirty.set(key, { data, platform });
|
|
638
|
+
|
|
639
|
+
const now = Date.now();
|
|
640
|
+
if (now - state.lastFlush >= topicThrottleMs) {
|
|
641
|
+
state.lastFlush = now;
|
|
642
|
+
flushBoth(topic, state);
|
|
643
|
+
dirtyTopics.delete(topic);
|
|
644
|
+
return;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
dirtyTopics.add(topic);
|
|
648
|
+
armTick(Math.max(0, topicThrottleMs - (now - state.lastFlush)));
|
|
453
649
|
}
|
|
454
650
|
|
|
455
651
|
async function broadcastRemove(topic, key, platform) {
|
|
@@ -482,8 +678,17 @@ export function createCursor(client, options = {}) {
|
|
|
482
678
|
try {
|
|
483
679
|
platform.subscribe(ws, '__cursor:' + topic);
|
|
484
680
|
} catch {
|
|
485
|
-
|
|
681
|
+
// ws closed before subscribe could land. No state to roll back
|
|
682
|
+
// (no wsState entry exists yet; that is only created on update).
|
|
683
|
+
// Throw so the caller can distinguish a no-op-and-rollback from
|
|
684
|
+
// a successful attach; without this the RPC metric reports
|
|
685
|
+
// status=ok for connections that never received cursor frames.
|
|
686
|
+
mAttachesAborted?.inc({ topic: mt(topic), reason: 'ws_closed' });
|
|
687
|
+
throw new WsClosedError('cursor.attach', topic);
|
|
486
688
|
}
|
|
689
|
+
// snapshot() itself swallows ws-closed during platform.send (the
|
|
690
|
+
// state is already committed; clients recover via the next bulk
|
|
691
|
+
// frame). Intentional asymmetry with subscribe failure above.
|
|
487
692
|
await tracker.snapshot(ws, topic, platform);
|
|
488
693
|
},
|
|
489
694
|
|
|
@@ -580,6 +785,7 @@ export function createCursor(client, options = {}) {
|
|
|
580
785
|
topics.delete(topic);
|
|
581
786
|
activeTopics.delete(topic);
|
|
582
787
|
topicFlush.delete(topic);
|
|
788
|
+
dirtyTopics.delete(topic);
|
|
583
789
|
redisPending.delete(topic);
|
|
584
790
|
stopCleanupTimer();
|
|
585
791
|
}
|
|
@@ -637,6 +843,7 @@ export function createCursor(client, options = {}) {
|
|
|
637
843
|
topics.delete(t);
|
|
638
844
|
activeTopics.delete(t);
|
|
639
845
|
topicFlush.delete(t);
|
|
846
|
+
dirtyTopics.delete(t);
|
|
640
847
|
redisPending.delete(t);
|
|
641
848
|
}
|
|
642
849
|
}
|
|
@@ -717,9 +924,9 @@ export function createCursor(client, options = {}) {
|
|
|
717
924
|
if (entry.timer) clearTimeout(entry.timer);
|
|
718
925
|
}
|
|
719
926
|
}
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
927
|
+
// Tracker-level scheduler timer + dirty-topic set.
|
|
928
|
+
if (tickTimer !== null) { clearTimeout(tickTimer); tickTimer = null; }
|
|
929
|
+
dirtyTopics.clear();
|
|
723
930
|
topics.clear();
|
|
724
931
|
topicFlush.clear();
|
|
725
932
|
wsState.clear();
|
|
@@ -739,9 +946,8 @@ export function createCursor(client, options = {}) {
|
|
|
739
946
|
if (entry.timer) clearTimeout(entry.timer);
|
|
740
947
|
}
|
|
741
948
|
}
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
}
|
|
949
|
+
if (tickTimer !== null) { clearTimeout(tickTimer); tickTimer = null; }
|
|
950
|
+
dirtyTopics.clear();
|
|
745
951
|
topicFlush.clear();
|
|
746
952
|
if (subscriber) {
|
|
747
953
|
subscriber.quit().catch(() => subscriber.disconnect());
|
|
@@ -750,6 +956,37 @@ export function createCursor(client, options = {}) {
|
|
|
750
956
|
activePlatform = null;
|
|
751
957
|
},
|
|
752
958
|
|
|
959
|
+
/**
|
|
960
|
+
* Snapshot of scheduler health. Always available, near-zero cost.
|
|
961
|
+
*
|
|
962
|
+
* - `flushes`: total tick-driven flushes since tracker creation.
|
|
963
|
+
* - `driftMeanMs`: mean (target_deadline - actual_fire_time) across
|
|
964
|
+
* all tick-driven flushes. 0 means perfect cadence; values >
|
|
965
|
+
* `topicThrottle` indicate sustained event-loop saturation or
|
|
966
|
+
* CPU contention.
|
|
967
|
+
* - `driftMaxMs`: largest single observed late fire. Useful for
|
|
968
|
+
* spotting one-off GC pauses vs. sustained drift.
|
|
969
|
+
* - `dirtyTopicsCurrent`: topics with pending coalesced entries
|
|
970
|
+
* right now. Should hover near zero in healthy operation; growth
|
|
971
|
+
* means tick is falling behind.
|
|
972
|
+
* - `activeTopicsTotal`: topics with at least one local cursor.
|
|
973
|
+
*
|
|
974
|
+
* Leading-edge synchronous flushes (first call on an idle topic)
|
|
975
|
+
* are not counted in drift stats - they fire on the call thread,
|
|
976
|
+
* not via the scheduler.
|
|
977
|
+
*
|
|
978
|
+
* @returns {{ flushes: number, driftMeanMs: number, driftMaxMs: number, dirtyTopicsCurrent: number, activeTopicsTotal: number }}
|
|
979
|
+
*/
|
|
980
|
+
stats() {
|
|
981
|
+
return {
|
|
982
|
+
flushes: flushCount,
|
|
983
|
+
driftMeanMs: driftCount > 0 ? driftSum / driftCount : 0,
|
|
984
|
+
driftMaxMs: driftMax,
|
|
985
|
+
dirtyTopicsCurrent: dirtyTopics.size,
|
|
986
|
+
activeTopicsTotal: topics.size
|
|
987
|
+
};
|
|
988
|
+
},
|
|
989
|
+
|
|
753
990
|
hooks: {
|
|
754
991
|
subscribe(ws, topic, { platform }) {
|
|
755
992
|
if (topic.startsWith('__cursor:')) {
|
package/redis/presence.d.ts
CHANGED
|
@@ -51,10 +51,29 @@ export interface PresenceMetricsSnapshot {
|
|
|
51
51
|
staleCleanedTotal: number;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Thrown by `join()` when the websocket closes during an async gap before
|
|
56
|
+
* the join can commit. Server-side state is fully rolled back before the
|
|
57
|
+
* throw. Catch on `err.code === 'WS_CLOSED'` rather than the class - the
|
|
58
|
+
* same code is shared with `cursor.attach` and any future RPC-shaped
|
|
59
|
+
* operation in this package.
|
|
60
|
+
*/
|
|
61
|
+
export class WsClosedError extends Error {
|
|
62
|
+
name: 'WsClosedError';
|
|
63
|
+
code: 'WS_CLOSED';
|
|
64
|
+
operation: string;
|
|
65
|
+
topic: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
54
68
|
export interface RedisPresenceTracker {
|
|
55
69
|
/**
|
|
56
70
|
* Add a connection to a topic's presence.
|
|
57
71
|
* Ignores `__`-prefixed topics. Idempotent.
|
|
72
|
+
*
|
|
73
|
+
* @throws {WsClosedError} (`err.code === 'WS_CLOSED'`) if the websocket
|
|
74
|
+
* closes during one of the internal async gaps (subscribe, Redis eval,
|
|
75
|
+
* snapshot fetch, ws.subscribe). Server state is rolled back before
|
|
76
|
+
* the throw; callers do not need to compensate.
|
|
58
77
|
*/
|
|
59
78
|
join(ws: any, topic: string, platform: Platform): Promise<void>;
|
|
60
79
|
|
package/redis/presence.js
CHANGED
|
@@ -47,6 +47,9 @@ import { stripInternal, createSensitiveWarner } from '../shared/sensitive.js';
|
|
|
47
47
|
import { scanAndUnlink } from '../shared/redis-scan.js';
|
|
48
48
|
import { withBreaker } from '../shared/breaker.js';
|
|
49
49
|
import { MAX_PRESENCE_WS, MAX_PRESENCE_TOPICS } from '../shared/caps.js';
|
|
50
|
+
import { WsClosedError } from '../shared/errors.js';
|
|
51
|
+
|
|
52
|
+
export { WsClosedError };
|
|
50
53
|
|
|
51
54
|
/**
|
|
52
55
|
* Lua script for atomic JOIN. Sets this instance's field on the per-user
|
|
@@ -247,6 +250,7 @@ export function createPresence(client, options = {}) {
|
|
|
247
250
|
const m = options.metrics;
|
|
248
251
|
const mt = m?.mapTopic;
|
|
249
252
|
const mJoins = m?.counter('presence_joins_total', 'Presence join events', ['topic']);
|
|
253
|
+
const mJoinsAborted = m?.counter('presence_joins_aborted_total', 'Presence join calls that aborted before commit because the websocket closed during an async gap. Server state was rolled back before the throw. Distinct from `presence_joins_total` (commits) and from generic RPC error metrics (which bucket all throws together regardless of cause).', ['topic', 'reason']);
|
|
250
254
|
const mLeaves = m?.counter('presence_leaves_total', 'Presence leave events', ['topic']);
|
|
251
255
|
const mHeartbeats = m?.counter('presence_heartbeats_total', 'Heartbeat refresh cycles');
|
|
252
256
|
const mTotalOnline = m?.gauge('presence_total_online', 'Unique users present per topic on this instance', ['topic']);
|
|
@@ -985,6 +989,15 @@ export function createPresence(client, options = {}) {
|
|
|
985
989
|
}
|
|
986
990
|
}
|
|
987
991
|
|
|
992
|
+
// Throw helper for "ws closed during async gap" paths inside join(). All
|
|
993
|
+
// five callsites need the same metric label and the same typed error;
|
|
994
|
+
// inlining a helper avoids drift between them and keeps each callsite
|
|
995
|
+
// single-line.
|
|
996
|
+
function throwWsClosed(topic) {
|
|
997
|
+
mJoinsAborted?.inc({ topic: mt(topic), reason: 'ws_closed' });
|
|
998
|
+
throw new WsClosedError('presence.join', topic);
|
|
999
|
+
}
|
|
1000
|
+
|
|
988
1001
|
/** @type {RedisPresenceTracker} */
|
|
989
1002
|
const tracker = {
|
|
990
1003
|
async join(ws, topic, platform) {
|
|
@@ -1071,11 +1084,15 @@ export function createPresence(client, options = {}) {
|
|
|
1071
1084
|
throw err;
|
|
1072
1085
|
}
|
|
1073
1086
|
|
|
1074
|
-
|
|
1087
|
+
// ws closed during `await subscribeToTopic`. The close hook already
|
|
1088
|
+
// ran leaveAll, which swept localCounts / wsTopics for this ws;
|
|
1089
|
+
// no compensating undoJoin needed. Throw so the caller sees the
|
|
1090
|
+
// abort instead of a silent success.
|
|
1091
|
+
if (!wsTopics.has(ws)) throwWsClosed(topic);
|
|
1075
1092
|
|
|
1076
1093
|
try { ws.getBufferedAmount(); } catch {
|
|
1077
1094
|
await undoJoin(ws, topic, key, data, prevCount, prevData, false, false, platform);
|
|
1078
|
-
|
|
1095
|
+
throwWsClosed(topic);
|
|
1079
1096
|
}
|
|
1080
1097
|
|
|
1081
1098
|
let didRedisWrite = false;
|
|
@@ -1108,13 +1125,14 @@ export function createPresence(client, options = {}) {
|
|
|
1108
1125
|
|
|
1109
1126
|
if (!wsTopics.has(ws)) {
|
|
1110
1127
|
// ws closed during the eval. Roll back our Redis write so
|
|
1111
|
-
// the per-user hash entry does not linger past TTL
|
|
1128
|
+
// the per-user hash entry does not linger past TTL, then
|
|
1129
|
+
// surface the abort to the caller.
|
|
1112
1130
|
await redis.eval(
|
|
1113
1131
|
LEAVE_SCRIPT, 2,
|
|
1114
1132
|
userHashKey(topic, key), topicHashKey(topic),
|
|
1115
1133
|
instanceId, key
|
|
1116
1134
|
).catch(() => {});
|
|
1117
|
-
|
|
1135
|
+
throwWsClosed(topic);
|
|
1118
1136
|
}
|
|
1119
1137
|
} else if (prevData !== undefined && !deepEqual(prevData, data)) {
|
|
1120
1138
|
// Same instance, same user, different `select()` output.
|
|
@@ -1157,7 +1175,7 @@ export function createPresence(client, options = {}) {
|
|
|
1157
1175
|
ws.subscribe('__presence:' + topic);
|
|
1158
1176
|
} catch {
|
|
1159
1177
|
await undoJoin(ws, topic, key, data, prevCount, prevData, didRedisWrite, false, platform);
|
|
1160
|
-
|
|
1178
|
+
throwWsClosed(topic);
|
|
1161
1179
|
}
|
|
1162
1180
|
|
|
1163
1181
|
// If ws closed after subscribe, leave() already handled
|
|
@@ -1170,7 +1188,7 @@ export function createPresence(client, options = {}) {
|
|
|
1170
1188
|
instanceId, key
|
|
1171
1189
|
).catch(() => {});
|
|
1172
1190
|
}
|
|
1173
|
-
|
|
1191
|
+
throwWsClosed(topic);
|
|
1174
1192
|
}
|
|
1175
1193
|
|
|
1176
1194
|
// Commit localData and activeTopics now that the join is
|
package/shared/errors.js
CHANGED
|
@@ -57,3 +57,41 @@ export class IdempotencyResultTooLargeError extends Error {
|
|
|
57
57
|
this.maxBytes = maxBytes;
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Thrown by RPC-shaped operations (`presence.join`, `cursor.attach`) when the
|
|
63
|
+
* caller's websocket closes during an async gap before the operation could
|
|
64
|
+
* commit, OR the websocket was already gone by the time the operation
|
|
65
|
+
* resumed from one of its awaits. Server-side state is fully rolled back
|
|
66
|
+
* before the throw so the caller does not need to compensate.
|
|
67
|
+
*
|
|
68
|
+
* Stable contract: `err.code === 'WS_CLOSED'`. Catch on the code, not the
|
|
69
|
+
* class - future RPC-shaped operations that hit the same pattern throw the
|
|
70
|
+
* same code. The `operation` field carries the dotted path (e.g.
|
|
71
|
+
* `'presence.join'`) for operators that want to bucket by feature without
|
|
72
|
+
* parsing the message.
|
|
73
|
+
*
|
|
74
|
+
* Pattern in callers:
|
|
75
|
+
*
|
|
76
|
+
* ```js
|
|
77
|
+
* try {
|
|
78
|
+
* await presence.join(ws, topic, platform);
|
|
79
|
+
* } catch (err) {
|
|
80
|
+
* if (err.code === 'WS_CLOSED') return; // ws already gone, no compensation needed
|
|
81
|
+
* throw err;
|
|
82
|
+
* }
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export class WsClosedError extends Error {
|
|
86
|
+
/**
|
|
87
|
+
* @param {string} operation - Dotted operation path, e.g. `'presence.join'`.
|
|
88
|
+
* @param {string} topic
|
|
89
|
+
*/
|
|
90
|
+
constructor(operation, topic) {
|
|
91
|
+
super(`${operation}: websocket closed during async gap (topic="${topic}"); rolled back`);
|
|
92
|
+
this.name = 'WsClosedError';
|
|
93
|
+
this.code = 'WS_CLOSED';
|
|
94
|
+
this.operation = operation;
|
|
95
|
+
this.topic = topic;
|
|
96
|
+
}
|
|
97
|
+
}
|