procmesh-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/broker.js ADDED
@@ -0,0 +1,676 @@
1
+ 'use strict';
2
+
3
+ const net = require('net');
4
+ const fs = require('fs');
5
+ const EventEmitter = require('events');
6
+ const { Peer, TYPES, PROTOCOL_VERSION, matchTopic, isPattern } = require('./protocol');
7
+ const { resolveCodec } = require('./codec');
8
+ const { resolveAddress, isPipe } = require('./transport');
9
+ const Store = require('./store');
10
+ const LockManager = require('./locks');
11
+ const { createPersistence } = require('./persistence');
12
+
13
+ /** Every valid inbound message tag, used to bound the per-type ops counter. */
14
+ const KNOWN_TYPES = new Set(Object.values(TYPES));
15
+
16
+ /** Extra time past a caller's deadline before the broker reaps a hung RPC call (cleanup backstop). */
17
+ const CALL_TIMEOUT_GRACE = 5000;
18
+
19
+ /**
20
+ * The central broker. Holds the authoritative cache, routes pub/sub and RPC,
21
+ * and manages locks. Because everything runs in this one process on a single
22
+ * event loop, cache/atomic/lock semantics are correct without any coordination.
23
+ *
24
+ * Emits: 'drop' ({ channel, connId }) when a pub/sub frame is dropped for a slow
25
+ * consumer (backpressure), and 'reap' (connId) when an idle connection is reaped.
26
+ */
27
+ class Broker extends EventEmitter {
28
+ constructor(opts = {}) {
29
+ super();
30
+ this.opts = opts;
31
+ this.name = opts.name || 'default';
32
+ this.address = opts.address || resolveAddress(this.name);
33
+ this.codec = resolveCodec(opts.codec);
34
+ this.store = new Store(opts.cache || {});
35
+ // Global monotonic fencing-token counter. Seeded from persisted state (Phase 3) so tokens
36
+ // stay strictly increasing across broker restarts — a stale pre-crash token can never pass.
37
+ this.nextToken = opts.fenceSeed || 0;
38
+ this.locks = new LockManager({ mintToken: () => this._mintToken() });
39
+ this.token = opts.token || null;
40
+ // Fallback deadline for an RPC call whose caller didn't send its own timeout (older clients).
41
+ this.callTimeout = opts.callTimeout || 30000;
42
+ // Grace past a caller's deadline before the broker reaps a hung call (cleanup backstop).
43
+ this.callTimeoutGrace = opts.callTimeoutGrace == null ? CALL_TIMEOUT_GRACE : opts.callTimeoutGrace;
44
+
45
+ // Crash-survival persistence (no-op unless opts.persist / PROCMESH_PERSIST_DIR is set).
46
+ this.persist = createPersistence(opts.persist, this.name, this.codec);
47
+ this.persist.onError = (err) => this.emit('persist-error', err);
48
+
49
+ this.peerOpts = {
50
+ sendHighWaterMark: opts.sendHighWaterMark,
51
+ sendHardLimit: opts.sendHardLimit,
52
+ };
53
+
54
+ this.conns = new Map(); // connId -> conn
55
+ this.channels = new Map(); // exact channel -> Set<connId>
56
+ this.patterns = new Map(); // wildcard pattern -> Set<connId>
57
+ this.procs = new Map(); // procName -> { workers: Set<connId>, inflight: Map<connId, n> }
58
+ this.pending = new Map(); // brokerCallId -> { callerConnId, callerCallId, ownerConnId, name }
59
+
60
+ this.nextConnId = 1;
61
+ this.nextCallId = 1;
62
+ this.dropped = 0; // count of pub/sub frames dropped due to backpressure
63
+
64
+ // Observability: cheap monotonic counters bumped on the hot path (one integer
65
+ // increment per message), surfaced via the STATS request and optional 'stats' emit.
66
+ this.startedAt = nowMs();
67
+ this.stats = { ops: {}, reaped: 0 };
68
+ this._cpuBase = process.cpuUsage();
69
+ this._cpuBaseAt = nowMs();
70
+ this.statsInterval = opts.statsInterval || 0; // ms; 0 = no periodic emit
71
+ this._statsTimer = null;
72
+
73
+ this.idleTimeout = opts.idleTimeout || 0; // ms; 0 = never auto-shutdown
74
+ this._idleTimer = null;
75
+ this.heartbeatInterval = opts.heartbeatInterval == null ? 30000 : opts.heartbeatInterval;
76
+ this._heartbeatTimer = null;
77
+ this.server = null;
78
+ }
79
+
80
+ async start() {
81
+ // Recover persisted cache/atomics BEFORE listening, so no request is ever served against
82
+ // half-loaded state. Seed the fencing counter past every token issued before the crash.
83
+ await this.persist.load(this.store);
84
+ this.nextToken = Math.max(this.nextToken, this.persist.loadedToken || 0);
85
+ await this._listen();
86
+ this._startHeartbeat();
87
+ this._startStats();
88
+ this.persist.start();
89
+ return this;
90
+ }
91
+
92
+ async _listen() {
93
+ this.server = net.createServer((socket) => this._onConnection(socket));
94
+ try {
95
+ await this._tryListen();
96
+ } catch (err) {
97
+ if (err.code !== 'EADDRINUSE') throw err;
98
+ // Address in use: is a live broker already there, or is this a stale socket?
99
+ const alive = await this._probe();
100
+ if (alive) throw err; // genuine — caller should just connect instead
101
+ if (!isPipe(this.address)) {
102
+ try {
103
+ fs.unlinkSync(this.address);
104
+ } catch {
105
+ /* ignore */
106
+ }
107
+ }
108
+ this.server = net.createServer((socket) => this._onConnection(socket));
109
+ await this._tryListen();
110
+ }
111
+ }
112
+
113
+ _tryListen() {
114
+ return new Promise((resolve, reject) => {
115
+ const onError = (err) => reject(err);
116
+ this.server.once('error', onError);
117
+ this.server.listen(this.address, () => {
118
+ this.server.removeListener('error', onError);
119
+ resolve();
120
+ });
121
+ });
122
+ }
123
+
124
+ _probe() {
125
+ return new Promise((resolve) => {
126
+ const socket = net.connect(this.address);
127
+ const done = (alive) => {
128
+ socket.destroy();
129
+ resolve(alive);
130
+ };
131
+ socket.once('connect', () => done(true));
132
+ socket.once('error', () => done(false));
133
+ socket.setTimeout(500, () => done(false));
134
+ });
135
+ }
136
+
137
+ _onConnection(socket) {
138
+ socket.setNoDelay(true);
139
+ const conn = {
140
+ id: this.nextConnId++,
141
+ peer: new Peer(socket, this.codec, this.peerOpts),
142
+ name: null,
143
+ authed: !this.token, // if no token configured, every conn is implicitly authed
144
+ lastSeen: nowMs(),
145
+ subs: new Set(),
146
+ procs: new Set(),
147
+ };
148
+ this.conns.set(conn.id, conn);
149
+ this.emit('connect', conn.id);
150
+ this._cancelIdle();
151
+ conn.peer.on('message', (msg) => {
152
+ conn.lastSeen = nowMs();
153
+ this._handle(conn, msg);
154
+ });
155
+ conn.peer.on('close', () => this._onClose(conn));
156
+ conn.peer.on('error', () => {
157
+ /* 'close' handles cleanup */
158
+ });
159
+ }
160
+
161
+ _ok(conn, id, value) {
162
+ if (id != null) conn.peer.send({ t: TYPES.OK, id, value });
163
+ }
164
+
165
+ _err(conn, id, message, code) {
166
+ if (id != null) conn.peer.send({ t: TYPES.ERR, id, message, code });
167
+ }
168
+
169
+ // Persistence loggers. Each early-returns when persistence is disabled so the hot path
170
+ // allocates no record object in the (default) in-memory configuration.
171
+
172
+ /** Log a set effect with absolute expiry (0 = no TTL). */
173
+ _logSet(key, value, ttl) {
174
+ if (!this.persist.enabled) return;
175
+ this.persist.logMutation({ op: 'set', k: key, v: value, e: ttl && ttl > 0 ? Date.now() + ttl : 0 });
176
+ }
177
+
178
+ _logDel(key) {
179
+ if (!this.persist.enabled) return;
180
+ this.persist.logMutation({ op: 'del', k: key });
181
+ }
182
+
183
+ _logClear() {
184
+ if (!this.persist.enabled) return;
185
+ this.persist.logMutation({ op: 'clear' });
186
+ }
187
+
188
+ /** A successful CAS is either a set (to `next`) or a delete (when `next` is undefined). */
189
+ _logCasEffect(key, next) {
190
+ if (!this.persist.enabled) return;
191
+ if (next === undefined) this.persist.logMutation({ op: 'del', k: key });
192
+ else this._logSet(key, next, this.store.remainingTTL(key)); // preserve any in-place TTL
193
+ }
194
+
195
+ /** Issue the next monotonic fencing token. Single event loop ⇒ no locking needed. */
196
+ _mintToken() {
197
+ if (this.nextToken >= Number.MAX_SAFE_INTEGER) {
198
+ const err = new Error('fencing token space exhausted');
199
+ err.code = 'EFENCEEXHAUSTED';
200
+ throw err;
201
+ }
202
+ this.nextToken += 1;
203
+ if (this.persist) this.persist.noteToken(this.nextToken);
204
+ return this.nextToken;
205
+ }
206
+
207
+ /**
208
+ * Gate a fenced mutation: reject (EFENCED) if the presented token is older than the highest
209
+ * ever issued for the governing lock key — i.e. the caller's lock was superseded. Throws so
210
+ * the surrounding _handle try/catch relays it as an ERR. On success, raises the bar.
211
+ */
212
+ _fence(lockKey, token) {
213
+ const high = this.locks.getFenceHigh(lockKey);
214
+ if (token == null || token < high) {
215
+ const err = new Error(`fenced: token ${token} < ${high} for "${lockKey}"`);
216
+ err.code = 'EFENCED';
217
+ throw err;
218
+ }
219
+ this.locks.bumpFence(lockKey, token);
220
+ }
221
+
222
+ _handle(conn, msg) {
223
+ const { t, id } = msg;
224
+
225
+ // Handshake / auth gate: until a connection says HELLO with a valid token,
226
+ // it may only send HELLO, PING, or PONG.
227
+ if (!conn.authed && t !== TYPES.HELLO && t !== TYPES.PING && t !== TYPES.PONG) {
228
+ this._err(conn, id, 'not authenticated', 'EAUTH');
229
+ return;
230
+ }
231
+
232
+ // Count ops only for KNOWN types, and only after the auth gate. `t` is
233
+ // attacker-controlled, so keying the counter by an arbitrary string before
234
+ // these checks would let an (even unauthenticated) peer grow this object
235
+ // without bound — a memory-exhaustion DoS. Unknown types fall through to the
236
+ // `default` branch and never create a counter key.
237
+ if (KNOWN_TYPES.has(t)) {
238
+ this.stats.ops[t] = (this.stats.ops[t] || 0) + 1;
239
+ }
240
+
241
+ try {
242
+ switch (t) {
243
+ case TYPES.HELLO:
244
+ if (this.token && msg.token !== this.token) {
245
+ this._err(conn, id, 'invalid auth token', 'EAUTH');
246
+ conn.peer.destroy();
247
+ return;
248
+ }
249
+ conn.authed = true;
250
+ conn.name = msg.name || null;
251
+ conn.peer.send({ t: TYPES.WELCOME, id, version: PROTOCOL_VERSION, broker: this.name });
252
+ break;
253
+ case TYPES.PING:
254
+ conn.peer.send({ t: TYPES.PONG, id });
255
+ break;
256
+ case TYPES.PONG:
257
+ break; // lastSeen already stamped on receipt
258
+ case TYPES.SHUTDOWN:
259
+ this._ok(conn, id, true);
260
+ setImmediate(() => this.close());
261
+ break;
262
+ case TYPES.STATS:
263
+ this._ok(conn, id, this.snapshot());
264
+ break;
265
+
266
+ // ---- cache ----
267
+ case TYPES.GET:
268
+ this._ok(conn, id, this.store.get(msg.key));
269
+ break;
270
+ case TYPES.SET:
271
+ this._ok(conn, id, this.store.set(msg.key, msg.value, msg.ttl));
272
+ this._logSet(msg.key, msg.value, msg.ttl);
273
+ break;
274
+ case TYPES.DEL:
275
+ this._ok(conn, id, this.store.del(msg.key));
276
+ this._logDel(msg.key);
277
+ break;
278
+ case TYPES.HAS:
279
+ this._ok(conn, id, this.store.has(msg.key));
280
+ break;
281
+ case TYPES.KEYS:
282
+ this._ok(conn, id, this.store.keys());
283
+ break;
284
+ case TYPES.CLEAR:
285
+ this._ok(conn, id, this.store.clear());
286
+ this._logClear();
287
+ break;
288
+ case TYPES.MGET:
289
+ this._ok(conn, id, this.store.mget(msg.keys || []));
290
+ break;
291
+ case TYPES.MSET: {
292
+ const entries = msg.entries || [];
293
+ this._ok(conn, id, this.store.mset(entries));
294
+ for (const [k, v] of entries) this._logSet(k, v, 0);
295
+ break;
296
+ }
297
+
298
+ // ---- atomic ----
299
+ case TYPES.INCR: {
300
+ const next = this.store.incr(msg.key, msg.by == null ? 1 : msg.by);
301
+ this._logSet(msg.key, next, this.store.remainingTTL(msg.key)); // preserve any TTL
302
+ this._ok(conn, id, next);
303
+ break;
304
+ }
305
+ case TYPES.DECR: {
306
+ const next = this.store.incr(msg.key, -(msg.by == null ? 1 : msg.by));
307
+ this._logSet(msg.key, next, this.store.remainingTTL(msg.key)); // preserve any TTL
308
+ this._ok(conn, id, next);
309
+ break;
310
+ }
311
+ case TYPES.CAS: {
312
+ const ok = this.store.cas(msg.key, msg.prev, msg.next);
313
+ if (ok) this._logCasEffect(msg.key, msg.next);
314
+ this._ok(conn, id, ok);
315
+ break;
316
+ }
317
+
318
+ // ---- locks ----
319
+ case TYPES.LOCK:
320
+ this.locks
321
+ .acquire(msg.key, conn.id, { ttl: msg.ttl, wait: msg.wait })
322
+ .then((res) => this._ok(conn, id, res));
323
+ break;
324
+ case TYPES.UNLOCK:
325
+ this._ok(conn, id, this.locks.release(msg.key, conn.id));
326
+ break;
327
+
328
+ // ---- fenced mutations (gated by a lock's fencing token) ----
329
+ case TYPES.FSET:
330
+ this._fence(msg.key, msg.token);
331
+ this._ok(conn, id, this.store.set(msg.k, msg.value, msg.ttl));
332
+ this._logSet(msg.k, msg.value, msg.ttl);
333
+ break;
334
+ case TYPES.FCAS: {
335
+ this._fence(msg.key, msg.token);
336
+ const ok = this.store.cas(msg.k, msg.prev, msg.next);
337
+ if (ok) this._logCasEffect(msg.k, msg.next);
338
+ this._ok(conn, id, ok);
339
+ break;
340
+ }
341
+ case TYPES.FDEL:
342
+ this._fence(msg.key, msg.token);
343
+ this._ok(conn, id, this.store.del(msg.k));
344
+ this._logDel(msg.k);
345
+ break;
346
+
347
+ // ---- pub/sub ----
348
+ case TYPES.SUBSCRIBE:
349
+ this._subscribe(conn, msg.channel);
350
+ this._ok(conn, id, true);
351
+ break;
352
+ case TYPES.UNSUBSCRIBE:
353
+ this._unsubscribe(conn, msg.channel);
354
+ this._ok(conn, id, true);
355
+ break;
356
+ case TYPES.PUBLISH:
357
+ this._ok(conn, id, this._publish(msg.channel, msg.payload));
358
+ break;
359
+
360
+ // ---- rpc ----
361
+ case TYPES.REGISTER:
362
+ this._register(conn, msg.name);
363
+ this._ok(conn, id, true);
364
+ break;
365
+ case TYPES.UNREGISTER:
366
+ this._unregister(conn, msg.name);
367
+ this._ok(conn, id, true);
368
+ break;
369
+ case TYPES.CALL:
370
+ this._call(conn, msg);
371
+ break;
372
+ case TYPES.RESULT:
373
+ this._result(msg);
374
+ break;
375
+
376
+ default:
377
+ this._err(conn, id, `unknown message type: ${t}`, 'EUNKNOWN');
378
+ }
379
+ } catch (err) {
380
+ this._err(conn, id, err.message, err.code || 'EBROKER');
381
+ }
382
+ }
383
+
384
+ // ------------------------------------------------------------------- pub/sub
385
+
386
+ _subscribe(conn, channel) {
387
+ conn.subs.add(channel);
388
+ const map = isPattern(channel) ? this.patterns : this.channels;
389
+ let set = map.get(channel);
390
+ if (!set) {
391
+ set = new Set();
392
+ map.set(channel, set);
393
+ }
394
+ set.add(conn.id);
395
+ }
396
+
397
+ _unsubscribe(conn, channel) {
398
+ conn.subs.delete(channel);
399
+ const map = isPattern(channel) ? this.patterns : this.channels;
400
+ const set = map.get(channel);
401
+ if (set) {
402
+ set.delete(conn.id);
403
+ if (set.size === 0) map.delete(channel);
404
+ }
405
+ }
406
+
407
+ _publish(channel, payload) {
408
+ // Collect target conns: exact subscribers + any matching wildcard patterns.
409
+ // Dedupe so a conn subscribed both exactly and by pattern gets one copy.
410
+ const targets = new Set(this.channels.get(channel) || []);
411
+ if (this.patterns.size) {
412
+ for (const [pattern, set] of this.patterns) {
413
+ if (matchTopic(pattern, channel)) {
414
+ for (const cid of set) targets.add(cid);
415
+ }
416
+ }
417
+ }
418
+ let delivered = 0;
419
+ for (const cid of targets) {
420
+ const c = this.conns.get(cid);
421
+ if (!c) continue;
422
+ const r = c.peer.send({ t: TYPES.MESSAGE, channel, payload }, { droppable: true });
423
+ if (r === 'dropped') {
424
+ this.dropped++;
425
+ this.emit('drop', { channel, connId: cid });
426
+ } else {
427
+ delivered++;
428
+ }
429
+ }
430
+ return delivered;
431
+ }
432
+
433
+ // ----------------------------------------------------------------------- rpc
434
+
435
+ _register(conn, name) {
436
+ let entry = this.procs.get(name);
437
+ if (!entry) {
438
+ entry = { workers: new Set(), inflight: new Map() };
439
+ this.procs.set(name, entry);
440
+ }
441
+ entry.workers.add(conn.id);
442
+ if (!entry.inflight.has(conn.id)) entry.inflight.set(conn.id, 0);
443
+ conn.procs.add(name);
444
+ }
445
+
446
+ _unregister(conn, name) {
447
+ conn.procs.delete(name);
448
+ const entry = this.procs.get(name);
449
+ if (!entry) return;
450
+ entry.workers.delete(conn.id);
451
+ entry.inflight.delete(conn.id);
452
+ if (entry.workers.size === 0) this.procs.delete(name);
453
+ }
454
+
455
+ /** Pick the least-busy worker for a proc (fewest in-flight calls). */
456
+ _pickWorker(entry) {
457
+ let best = null;
458
+ let bestLoad = Infinity;
459
+ for (const cid of entry.workers) {
460
+ const load = entry.inflight.get(cid) || 0;
461
+ if (load < bestLoad) {
462
+ bestLoad = load;
463
+ best = cid;
464
+ }
465
+ }
466
+ return best;
467
+ }
468
+
469
+ _call(conn, msg) {
470
+ const entry = this.procs.get(msg.name);
471
+ if (!entry || entry.workers.size === 0) {
472
+ this._err(conn, msg.id, `no handler registered for "${msg.name}"`, 'ENOHANDLER');
473
+ return;
474
+ }
475
+ const workerId = this._pickWorker(entry);
476
+ const owner = workerId != null ? this.conns.get(workerId) : null;
477
+ if (!owner) {
478
+ // Stale worker entry (conn already gone) — prune it and report unavailable.
479
+ if (workerId != null) {
480
+ entry.workers.delete(workerId);
481
+ entry.inflight.delete(workerId);
482
+ if (entry.workers.size === 0) this.procs.delete(msg.name);
483
+ }
484
+ this._err(conn, msg.id, `handler "${msg.name}" unavailable`, 'ENOHANDLER');
485
+ return;
486
+ }
487
+ const brokerCallId = this.nextCallId++;
488
+ // Backstop timeout so a worker that stays connected but never replies can't leak a `pending`
489
+ // entry forever or pin its `inflight` count (which would permanently skew least-busy dispatch).
490
+ // Fire a grace period AFTER the caller's own deadline so the broker is the cleanup backstop and
491
+ // doesn't race the client-side timeout that the user actually sees.
492
+ const callerTimeout = msg.timeout && msg.timeout > 0 ? msg.timeout : this.callTimeout;
493
+ const timer = setTimeout(() => this._expireCall(brokerCallId), callerTimeout + this.callTimeoutGrace);
494
+ if (timer.unref) timer.unref();
495
+ this.pending.set(brokerCallId, {
496
+ callerConnId: conn.id,
497
+ callerCallId: msg.id,
498
+ ownerConnId: owner.id,
499
+ name: msg.name,
500
+ timer,
501
+ });
502
+ entry.inflight.set(owner.id, (entry.inflight.get(owner.id) || 0) + 1);
503
+ owner.peer.send({ t: TYPES.INVOKE, id: brokerCallId, name: msg.name, args: msg.args || [] });
504
+ }
505
+
506
+ /** A pending call exceeded its deadline (worker hung while connected): free state, fail the caller. */
507
+ _expireCall(brokerCallId) {
508
+ const p = this.pending.get(brokerCallId);
509
+ if (!p) return;
510
+ this.pending.delete(brokerCallId);
511
+ this._decInflight(p.name, p.ownerConnId);
512
+ const caller = this.conns.get(p.callerConnId);
513
+ if (caller) this._err(caller, p.callerCallId, `rpc call "${p.name}" timed out`, 'ECALLTIMEOUT');
514
+ }
515
+
516
+ _decInflight(name, ownerConnId) {
517
+ const entry = this.procs.get(name);
518
+ if (!entry) return;
519
+ const cur = entry.inflight.get(ownerConnId);
520
+ if (cur != null) entry.inflight.set(ownerConnId, Math.max(0, cur - 1));
521
+ }
522
+
523
+ _result(msg) {
524
+ const p = this.pending.get(msg.id);
525
+ if (!p) return;
526
+ this.pending.delete(msg.id);
527
+ clearTimeout(p.timer);
528
+ this._decInflight(p.name, p.ownerConnId);
529
+ const caller = this.conns.get(p.callerConnId);
530
+ if (!caller) return;
531
+ if (msg.error) {
532
+ this._err(caller, p.callerCallId, msg.error.message, msg.error.code || 'ECALL');
533
+ } else {
534
+ this._ok(caller, p.callerCallId, msg.result);
535
+ }
536
+ }
537
+
538
+ // ------------------------------------------------------------------ lifecycle
539
+
540
+ _onClose(conn) {
541
+ this.conns.delete(conn.id);
542
+ this.emit('disconnect', conn.id);
543
+ for (const ch of conn.subs) {
544
+ const map = isPattern(ch) ? this.patterns : this.channels;
545
+ const set = map.get(ch);
546
+ if (set) {
547
+ set.delete(conn.id);
548
+ if (set.size === 0) map.delete(ch);
549
+ }
550
+ }
551
+ for (const name of conn.procs) {
552
+ const entry = this.procs.get(name);
553
+ if (entry) {
554
+ entry.workers.delete(conn.id);
555
+ entry.inflight.delete(conn.id);
556
+ if (entry.workers.size === 0) this.procs.delete(name);
557
+ }
558
+ }
559
+ this.locks.releaseAll(conn.id);
560
+ // Fail in-flight calls owned by this connection; drop calls it originated.
561
+ for (const [bid, p] of this.pending) {
562
+ if (p.ownerConnId === conn.id) {
563
+ clearTimeout(p.timer);
564
+ this.pending.delete(bid);
565
+ this._decInflight(p.name, p.ownerConnId);
566
+ const caller = this.conns.get(p.callerConnId);
567
+ if (caller) this._err(caller, p.callerCallId, 'rpc handler disconnected', 'EHANDLERGONE');
568
+ } else if (p.callerConnId === conn.id) {
569
+ clearTimeout(p.timer);
570
+ this.pending.delete(bid);
571
+ this._decInflight(p.name, p.ownerConnId);
572
+ }
573
+ }
574
+ this._scheduleIdle();
575
+ }
576
+
577
+ _startHeartbeat() {
578
+ if (!this.heartbeatInterval) return;
579
+ this._heartbeatTimer = setInterval(() => this._sweepHeartbeat(), this.heartbeatInterval);
580
+ if (this._heartbeatTimer.unref) this._heartbeatTimer.unref();
581
+ }
582
+
583
+ _sweepHeartbeat() {
584
+ const now = nowMs();
585
+ // Reap only after 3 intervals of silence. The ping window (idle > interval)
586
+ // must be at least 2 sweeps wide so a healthy conn is always pinged — and
587
+ // gets a chance to answer — before it can ever be reaped.
588
+ const reapAfter = this.heartbeatInterval * 3;
589
+ for (const conn of this.conns.values()) {
590
+ const idle = now - conn.lastSeen;
591
+ if (idle > reapAfter) {
592
+ this.stats.reaped += 1;
593
+ this.emit('reap', conn.id);
594
+ conn.peer.destroy();
595
+ } else if (idle > this.heartbeatInterval) {
596
+ conn.peer.send({ t: TYPES.PING });
597
+ }
598
+ }
599
+ }
600
+
601
+ _startStats() {
602
+ if (!this.statsInterval) return;
603
+ this._statsTimer = setInterval(() => this.emit('stats', this.snapshot()), this.statsInterval);
604
+ if (this._statsTimer.unref) this._statsTimer.unref();
605
+ }
606
+
607
+ /** A point-in-time operational snapshot — served on STATS and emitted periodically. */
608
+ snapshot() {
609
+ const cpu = process.cpuUsage(this._cpuBase);
610
+ const windowUs = Math.max(1, (nowMs() - this._cpuBaseAt) * 1000);
611
+ const lockStats = this.locks.stats();
612
+ const procs = [];
613
+ for (const [name, entry] of this.procs) procs.push({ name, workers: entry.workers.size });
614
+ return {
615
+ uptimeMs: nowMs() - this.startedAt,
616
+ connections: this.conns.size,
617
+ cacheSize: this.store.size,
618
+ ops: { ...this.stats.ops },
619
+ dropped: this.dropped,
620
+ reaped: this.stats.reaped,
621
+ locks: lockStats.locks,
622
+ lockWaiters: lockStats.waiters,
623
+ pendingCalls: this.pending.size,
624
+ subscriptions: this.channels.size + this.patterns.size,
625
+ procs,
626
+ memory: process.memoryUsage(),
627
+ cpuCoreFraction: (cpu.user + cpu.system) / windowUs,
628
+ };
629
+ }
630
+
631
+ _scheduleIdle() {
632
+ if (!this.idleTimeout || this.conns.size > 0) return;
633
+ this._cancelIdle();
634
+ this._idleTimer = setTimeout(() => this.close(), this.idleTimeout);
635
+ if (this._idleTimer.unref) this._idleTimer.unref();
636
+ }
637
+
638
+ _cancelIdle() {
639
+ if (this._idleTimer) {
640
+ clearTimeout(this._idleTimer);
641
+ this._idleTimer = null;
642
+ }
643
+ }
644
+
645
+ async close() {
646
+ this._cancelIdle();
647
+ if (this._heartbeatTimer) {
648
+ clearInterval(this._heartbeatTimer);
649
+ this._heartbeatTimer = null;
650
+ }
651
+ if (this._statsTimer) {
652
+ clearInterval(this._statsTimer);
653
+ this._statsTimer = null;
654
+ }
655
+ for (const c of this.conns.values()) c.peer.destroy();
656
+ this.conns.clear();
657
+ await this.persist.flushAndClose(); // final snapshot + fsync → planned restart is lossless
658
+ if (this.server) {
659
+ await new Promise((resolve) => this.server.close(() => resolve()));
660
+ }
661
+ if (!isPipe(this.address)) {
662
+ try {
663
+ fs.unlinkSync(this.address);
664
+ } catch {
665
+ /* ignore */
666
+ }
667
+ }
668
+ }
669
+ }
670
+
671
+ /** Monotonic-ish clock; avoids Date dependency on the hot path. */
672
+ function nowMs() {
673
+ return Number(process.hrtime.bigint() / 1000000n);
674
+ }
675
+
676
+ module.exports = Broker;