procmesh-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +331 -0
- package/package.json +46 -0
- package/src/broker-bin.js +41 -0
- package/src/broker.js +676 -0
- package/src/cli.js +146 -0
- package/src/client.js +512 -0
- package/src/codec.js +54 -0
- package/src/errors.js +43 -0
- package/src/hashring.js +27 -0
- package/src/index.js +49 -0
- package/src/locks.js +129 -0
- package/src/persistence.js +327 -0
- package/src/protocol.js +169 -0
- package/src/sharded-client.js +338 -0
- package/src/store.js +155 -0
- package/src/transport.js +32 -0
package/src/broker.js
ADDED
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const net = require('net');
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const EventEmitter = require('events');
|
|
6
|
+
const { Peer, TYPES, PROTOCOL_VERSION, matchTopic, isPattern } = require('./protocol');
|
|
7
|
+
const { resolveCodec } = require('./codec');
|
|
8
|
+
const { resolveAddress, isPipe } = require('./transport');
|
|
9
|
+
const Store = require('./store');
|
|
10
|
+
const LockManager = require('./locks');
|
|
11
|
+
const { createPersistence } = require('./persistence');
|
|
12
|
+
|
|
13
|
+
/** Every valid inbound message tag, used to bound the per-type ops counter. */
|
|
14
|
+
const KNOWN_TYPES = new Set(Object.values(TYPES));
|
|
15
|
+
|
|
16
|
+
/** Extra time past a caller's deadline before the broker reaps a hung RPC call (cleanup backstop). */
|
|
17
|
+
const CALL_TIMEOUT_GRACE = 5000;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* The central broker. Holds the authoritative cache, routes pub/sub and RPC,
|
|
21
|
+
* and manages locks. Because everything runs in this one process on a single
|
|
22
|
+
* event loop, cache/atomic/lock semantics are correct without any coordination.
|
|
23
|
+
*
|
|
24
|
+
* Emits: 'drop' ({ channel, connId }) when a pub/sub frame is dropped for a slow
|
|
25
|
+
* consumer (backpressure), and 'reap' (connId) when an idle connection is reaped.
|
|
26
|
+
*/
|
|
27
|
+
class Broker extends EventEmitter {
|
|
28
|
+
constructor(opts = {}) {
|
|
29
|
+
super();
|
|
30
|
+
this.opts = opts;
|
|
31
|
+
this.name = opts.name || 'default';
|
|
32
|
+
this.address = opts.address || resolveAddress(this.name);
|
|
33
|
+
this.codec = resolveCodec(opts.codec);
|
|
34
|
+
this.store = new Store(opts.cache || {});
|
|
35
|
+
// Global monotonic fencing-token counter. Seeded from persisted state (Phase 3) so tokens
|
|
36
|
+
// stay strictly increasing across broker restarts — a stale pre-crash token can never pass.
|
|
37
|
+
this.nextToken = opts.fenceSeed || 0;
|
|
38
|
+
this.locks = new LockManager({ mintToken: () => this._mintToken() });
|
|
39
|
+
this.token = opts.token || null;
|
|
40
|
+
// Fallback deadline for an RPC call whose caller didn't send its own timeout (older clients).
|
|
41
|
+
this.callTimeout = opts.callTimeout || 30000;
|
|
42
|
+
// Grace past a caller's deadline before the broker reaps a hung call (cleanup backstop).
|
|
43
|
+
this.callTimeoutGrace = opts.callTimeoutGrace == null ? CALL_TIMEOUT_GRACE : opts.callTimeoutGrace;
|
|
44
|
+
|
|
45
|
+
// Crash-survival persistence (no-op unless opts.persist / PROCMESH_PERSIST_DIR is set).
|
|
46
|
+
this.persist = createPersistence(opts.persist, this.name, this.codec);
|
|
47
|
+
this.persist.onError = (err) => this.emit('persist-error', err);
|
|
48
|
+
|
|
49
|
+
this.peerOpts = {
|
|
50
|
+
sendHighWaterMark: opts.sendHighWaterMark,
|
|
51
|
+
sendHardLimit: opts.sendHardLimit,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
this.conns = new Map(); // connId -> conn
|
|
55
|
+
this.channels = new Map(); // exact channel -> Set<connId>
|
|
56
|
+
this.patterns = new Map(); // wildcard pattern -> Set<connId>
|
|
57
|
+
this.procs = new Map(); // procName -> { workers: Set<connId>, inflight: Map<connId, n> }
|
|
58
|
+
this.pending = new Map(); // brokerCallId -> { callerConnId, callerCallId, ownerConnId, name }
|
|
59
|
+
|
|
60
|
+
this.nextConnId = 1;
|
|
61
|
+
this.nextCallId = 1;
|
|
62
|
+
this.dropped = 0; // count of pub/sub frames dropped due to backpressure
|
|
63
|
+
|
|
64
|
+
// Observability: cheap monotonic counters bumped on the hot path (one integer
|
|
65
|
+
// increment per message), surfaced via the STATS request and optional 'stats' emit.
|
|
66
|
+
this.startedAt = nowMs();
|
|
67
|
+
this.stats = { ops: {}, reaped: 0 };
|
|
68
|
+
this._cpuBase = process.cpuUsage();
|
|
69
|
+
this._cpuBaseAt = nowMs();
|
|
70
|
+
this.statsInterval = opts.statsInterval || 0; // ms; 0 = no periodic emit
|
|
71
|
+
this._statsTimer = null;
|
|
72
|
+
|
|
73
|
+
this.idleTimeout = opts.idleTimeout || 0; // ms; 0 = never auto-shutdown
|
|
74
|
+
this._idleTimer = null;
|
|
75
|
+
this.heartbeatInterval = opts.heartbeatInterval == null ? 30000 : opts.heartbeatInterval;
|
|
76
|
+
this._heartbeatTimer = null;
|
|
77
|
+
this.server = null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async start() {
|
|
81
|
+
// Recover persisted cache/atomics BEFORE listening, so no request is ever served against
|
|
82
|
+
// half-loaded state. Seed the fencing counter past every token issued before the crash.
|
|
83
|
+
await this.persist.load(this.store);
|
|
84
|
+
this.nextToken = Math.max(this.nextToken, this.persist.loadedToken || 0);
|
|
85
|
+
await this._listen();
|
|
86
|
+
this._startHeartbeat();
|
|
87
|
+
this._startStats();
|
|
88
|
+
this.persist.start();
|
|
89
|
+
return this;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async _listen() {
|
|
93
|
+
this.server = net.createServer((socket) => this._onConnection(socket));
|
|
94
|
+
try {
|
|
95
|
+
await this._tryListen();
|
|
96
|
+
} catch (err) {
|
|
97
|
+
if (err.code !== 'EADDRINUSE') throw err;
|
|
98
|
+
// Address in use: is a live broker already there, or is this a stale socket?
|
|
99
|
+
const alive = await this._probe();
|
|
100
|
+
if (alive) throw err; // genuine — caller should just connect instead
|
|
101
|
+
if (!isPipe(this.address)) {
|
|
102
|
+
try {
|
|
103
|
+
fs.unlinkSync(this.address);
|
|
104
|
+
} catch {
|
|
105
|
+
/* ignore */
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
this.server = net.createServer((socket) => this._onConnection(socket));
|
|
109
|
+
await this._tryListen();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
_tryListen() {
|
|
114
|
+
return new Promise((resolve, reject) => {
|
|
115
|
+
const onError = (err) => reject(err);
|
|
116
|
+
this.server.once('error', onError);
|
|
117
|
+
this.server.listen(this.address, () => {
|
|
118
|
+
this.server.removeListener('error', onError);
|
|
119
|
+
resolve();
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
_probe() {
|
|
125
|
+
return new Promise((resolve) => {
|
|
126
|
+
const socket = net.connect(this.address);
|
|
127
|
+
const done = (alive) => {
|
|
128
|
+
socket.destroy();
|
|
129
|
+
resolve(alive);
|
|
130
|
+
};
|
|
131
|
+
socket.once('connect', () => done(true));
|
|
132
|
+
socket.once('error', () => done(false));
|
|
133
|
+
socket.setTimeout(500, () => done(false));
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
_onConnection(socket) {
|
|
138
|
+
socket.setNoDelay(true);
|
|
139
|
+
const conn = {
|
|
140
|
+
id: this.nextConnId++,
|
|
141
|
+
peer: new Peer(socket, this.codec, this.peerOpts),
|
|
142
|
+
name: null,
|
|
143
|
+
authed: !this.token, // if no token configured, every conn is implicitly authed
|
|
144
|
+
lastSeen: nowMs(),
|
|
145
|
+
subs: new Set(),
|
|
146
|
+
procs: new Set(),
|
|
147
|
+
};
|
|
148
|
+
this.conns.set(conn.id, conn);
|
|
149
|
+
this.emit('connect', conn.id);
|
|
150
|
+
this._cancelIdle();
|
|
151
|
+
conn.peer.on('message', (msg) => {
|
|
152
|
+
conn.lastSeen = nowMs();
|
|
153
|
+
this._handle(conn, msg);
|
|
154
|
+
});
|
|
155
|
+
conn.peer.on('close', () => this._onClose(conn));
|
|
156
|
+
conn.peer.on('error', () => {
|
|
157
|
+
/* 'close' handles cleanup */
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
_ok(conn, id, value) {
|
|
162
|
+
if (id != null) conn.peer.send({ t: TYPES.OK, id, value });
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
_err(conn, id, message, code) {
|
|
166
|
+
if (id != null) conn.peer.send({ t: TYPES.ERR, id, message, code });
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Persistence loggers. Each early-returns when persistence is disabled so the hot path
|
|
170
|
+
// allocates no record object in the (default) in-memory configuration.
|
|
171
|
+
|
|
172
|
+
/** Log a set effect with absolute expiry (0 = no TTL). */
|
|
173
|
+
_logSet(key, value, ttl) {
|
|
174
|
+
if (!this.persist.enabled) return;
|
|
175
|
+
this.persist.logMutation({ op: 'set', k: key, v: value, e: ttl && ttl > 0 ? Date.now() + ttl : 0 });
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
_logDel(key) {
|
|
179
|
+
if (!this.persist.enabled) return;
|
|
180
|
+
this.persist.logMutation({ op: 'del', k: key });
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
_logClear() {
|
|
184
|
+
if (!this.persist.enabled) return;
|
|
185
|
+
this.persist.logMutation({ op: 'clear' });
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/** A successful CAS is either a set (to `next`) or a delete (when `next` is undefined). */
|
|
189
|
+
_logCasEffect(key, next) {
|
|
190
|
+
if (!this.persist.enabled) return;
|
|
191
|
+
if (next === undefined) this.persist.logMutation({ op: 'del', k: key });
|
|
192
|
+
else this._logSet(key, next, this.store.remainingTTL(key)); // preserve any in-place TTL
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** Issue the next monotonic fencing token. Single event loop ⇒ no locking needed. */
|
|
196
|
+
_mintToken() {
|
|
197
|
+
if (this.nextToken >= Number.MAX_SAFE_INTEGER) {
|
|
198
|
+
const err = new Error('fencing token space exhausted');
|
|
199
|
+
err.code = 'EFENCEEXHAUSTED';
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
202
|
+
this.nextToken += 1;
|
|
203
|
+
if (this.persist) this.persist.noteToken(this.nextToken);
|
|
204
|
+
return this.nextToken;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Gate a fenced mutation: reject (EFENCED) if the presented token is older than the highest
|
|
209
|
+
* ever issued for the governing lock key — i.e. the caller's lock was superseded. Throws so
|
|
210
|
+
* the surrounding _handle try/catch relays it as an ERR. On success, raises the bar.
|
|
211
|
+
*/
|
|
212
|
+
_fence(lockKey, token) {
|
|
213
|
+
const high = this.locks.getFenceHigh(lockKey);
|
|
214
|
+
if (token == null || token < high) {
|
|
215
|
+
const err = new Error(`fenced: token ${token} < ${high} for "${lockKey}"`);
|
|
216
|
+
err.code = 'EFENCED';
|
|
217
|
+
throw err;
|
|
218
|
+
}
|
|
219
|
+
this.locks.bumpFence(lockKey, token);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
_handle(conn, msg) {
|
|
223
|
+
const { t, id } = msg;
|
|
224
|
+
|
|
225
|
+
// Handshake / auth gate: until a connection says HELLO with a valid token,
|
|
226
|
+
// it may only send HELLO, PING, or PONG.
|
|
227
|
+
if (!conn.authed && t !== TYPES.HELLO && t !== TYPES.PING && t !== TYPES.PONG) {
|
|
228
|
+
this._err(conn, id, 'not authenticated', 'EAUTH');
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Count ops only for KNOWN types, and only after the auth gate. `t` is
|
|
233
|
+
// attacker-controlled, so keying the counter by an arbitrary string before
|
|
234
|
+
// these checks would let an (even unauthenticated) peer grow this object
|
|
235
|
+
// without bound — a memory-exhaustion DoS. Unknown types fall through to the
|
|
236
|
+
// `default` branch and never create a counter key.
|
|
237
|
+
if (KNOWN_TYPES.has(t)) {
|
|
238
|
+
this.stats.ops[t] = (this.stats.ops[t] || 0) + 1;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
try {
|
|
242
|
+
switch (t) {
|
|
243
|
+
case TYPES.HELLO:
|
|
244
|
+
if (this.token && msg.token !== this.token) {
|
|
245
|
+
this._err(conn, id, 'invalid auth token', 'EAUTH');
|
|
246
|
+
conn.peer.destroy();
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
conn.authed = true;
|
|
250
|
+
conn.name = msg.name || null;
|
|
251
|
+
conn.peer.send({ t: TYPES.WELCOME, id, version: PROTOCOL_VERSION, broker: this.name });
|
|
252
|
+
break;
|
|
253
|
+
case TYPES.PING:
|
|
254
|
+
conn.peer.send({ t: TYPES.PONG, id });
|
|
255
|
+
break;
|
|
256
|
+
case TYPES.PONG:
|
|
257
|
+
break; // lastSeen already stamped on receipt
|
|
258
|
+
case TYPES.SHUTDOWN:
|
|
259
|
+
this._ok(conn, id, true);
|
|
260
|
+
setImmediate(() => this.close());
|
|
261
|
+
break;
|
|
262
|
+
case TYPES.STATS:
|
|
263
|
+
this._ok(conn, id, this.snapshot());
|
|
264
|
+
break;
|
|
265
|
+
|
|
266
|
+
// ---- cache ----
|
|
267
|
+
case TYPES.GET:
|
|
268
|
+
this._ok(conn, id, this.store.get(msg.key));
|
|
269
|
+
break;
|
|
270
|
+
case TYPES.SET:
|
|
271
|
+
this._ok(conn, id, this.store.set(msg.key, msg.value, msg.ttl));
|
|
272
|
+
this._logSet(msg.key, msg.value, msg.ttl);
|
|
273
|
+
break;
|
|
274
|
+
case TYPES.DEL:
|
|
275
|
+
this._ok(conn, id, this.store.del(msg.key));
|
|
276
|
+
this._logDel(msg.key);
|
|
277
|
+
break;
|
|
278
|
+
case TYPES.HAS:
|
|
279
|
+
this._ok(conn, id, this.store.has(msg.key));
|
|
280
|
+
break;
|
|
281
|
+
case TYPES.KEYS:
|
|
282
|
+
this._ok(conn, id, this.store.keys());
|
|
283
|
+
break;
|
|
284
|
+
case TYPES.CLEAR:
|
|
285
|
+
this._ok(conn, id, this.store.clear());
|
|
286
|
+
this._logClear();
|
|
287
|
+
break;
|
|
288
|
+
case TYPES.MGET:
|
|
289
|
+
this._ok(conn, id, this.store.mget(msg.keys || []));
|
|
290
|
+
break;
|
|
291
|
+
case TYPES.MSET: {
|
|
292
|
+
const entries = msg.entries || [];
|
|
293
|
+
this._ok(conn, id, this.store.mset(entries));
|
|
294
|
+
for (const [k, v] of entries) this._logSet(k, v, 0);
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// ---- atomic ----
|
|
299
|
+
case TYPES.INCR: {
|
|
300
|
+
const next = this.store.incr(msg.key, msg.by == null ? 1 : msg.by);
|
|
301
|
+
this._logSet(msg.key, next, this.store.remainingTTL(msg.key)); // preserve any TTL
|
|
302
|
+
this._ok(conn, id, next);
|
|
303
|
+
break;
|
|
304
|
+
}
|
|
305
|
+
case TYPES.DECR: {
|
|
306
|
+
const next = this.store.incr(msg.key, -(msg.by == null ? 1 : msg.by));
|
|
307
|
+
this._logSet(msg.key, next, this.store.remainingTTL(msg.key)); // preserve any TTL
|
|
308
|
+
this._ok(conn, id, next);
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
case TYPES.CAS: {
|
|
312
|
+
const ok = this.store.cas(msg.key, msg.prev, msg.next);
|
|
313
|
+
if (ok) this._logCasEffect(msg.key, msg.next);
|
|
314
|
+
this._ok(conn, id, ok);
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// ---- locks ----
|
|
319
|
+
case TYPES.LOCK:
|
|
320
|
+
this.locks
|
|
321
|
+
.acquire(msg.key, conn.id, { ttl: msg.ttl, wait: msg.wait })
|
|
322
|
+
.then((res) => this._ok(conn, id, res));
|
|
323
|
+
break;
|
|
324
|
+
case TYPES.UNLOCK:
|
|
325
|
+
this._ok(conn, id, this.locks.release(msg.key, conn.id));
|
|
326
|
+
break;
|
|
327
|
+
|
|
328
|
+
// ---- fenced mutations (gated by a lock's fencing token) ----
|
|
329
|
+
case TYPES.FSET:
|
|
330
|
+
this._fence(msg.key, msg.token);
|
|
331
|
+
this._ok(conn, id, this.store.set(msg.k, msg.value, msg.ttl));
|
|
332
|
+
this._logSet(msg.k, msg.value, msg.ttl);
|
|
333
|
+
break;
|
|
334
|
+
case TYPES.FCAS: {
|
|
335
|
+
this._fence(msg.key, msg.token);
|
|
336
|
+
const ok = this.store.cas(msg.k, msg.prev, msg.next);
|
|
337
|
+
if (ok) this._logCasEffect(msg.k, msg.next);
|
|
338
|
+
this._ok(conn, id, ok);
|
|
339
|
+
break;
|
|
340
|
+
}
|
|
341
|
+
case TYPES.FDEL:
|
|
342
|
+
this._fence(msg.key, msg.token);
|
|
343
|
+
this._ok(conn, id, this.store.del(msg.k));
|
|
344
|
+
this._logDel(msg.k);
|
|
345
|
+
break;
|
|
346
|
+
|
|
347
|
+
// ---- pub/sub ----
|
|
348
|
+
case TYPES.SUBSCRIBE:
|
|
349
|
+
this._subscribe(conn, msg.channel);
|
|
350
|
+
this._ok(conn, id, true);
|
|
351
|
+
break;
|
|
352
|
+
case TYPES.UNSUBSCRIBE:
|
|
353
|
+
this._unsubscribe(conn, msg.channel);
|
|
354
|
+
this._ok(conn, id, true);
|
|
355
|
+
break;
|
|
356
|
+
case TYPES.PUBLISH:
|
|
357
|
+
this._ok(conn, id, this._publish(msg.channel, msg.payload));
|
|
358
|
+
break;
|
|
359
|
+
|
|
360
|
+
// ---- rpc ----
|
|
361
|
+
case TYPES.REGISTER:
|
|
362
|
+
this._register(conn, msg.name);
|
|
363
|
+
this._ok(conn, id, true);
|
|
364
|
+
break;
|
|
365
|
+
case TYPES.UNREGISTER:
|
|
366
|
+
this._unregister(conn, msg.name);
|
|
367
|
+
this._ok(conn, id, true);
|
|
368
|
+
break;
|
|
369
|
+
case TYPES.CALL:
|
|
370
|
+
this._call(conn, msg);
|
|
371
|
+
break;
|
|
372
|
+
case TYPES.RESULT:
|
|
373
|
+
this._result(msg);
|
|
374
|
+
break;
|
|
375
|
+
|
|
376
|
+
default:
|
|
377
|
+
this._err(conn, id, `unknown message type: ${t}`, 'EUNKNOWN');
|
|
378
|
+
}
|
|
379
|
+
} catch (err) {
|
|
380
|
+
this._err(conn, id, err.message, err.code || 'EBROKER');
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// ------------------------------------------------------------------- pub/sub
|
|
385
|
+
|
|
386
|
+
_subscribe(conn, channel) {
|
|
387
|
+
conn.subs.add(channel);
|
|
388
|
+
const map = isPattern(channel) ? this.patterns : this.channels;
|
|
389
|
+
let set = map.get(channel);
|
|
390
|
+
if (!set) {
|
|
391
|
+
set = new Set();
|
|
392
|
+
map.set(channel, set);
|
|
393
|
+
}
|
|
394
|
+
set.add(conn.id);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
_unsubscribe(conn, channel) {
|
|
398
|
+
conn.subs.delete(channel);
|
|
399
|
+
const map = isPattern(channel) ? this.patterns : this.channels;
|
|
400
|
+
const set = map.get(channel);
|
|
401
|
+
if (set) {
|
|
402
|
+
set.delete(conn.id);
|
|
403
|
+
if (set.size === 0) map.delete(channel);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
_publish(channel, payload) {
|
|
408
|
+
// Collect target conns: exact subscribers + any matching wildcard patterns.
|
|
409
|
+
// Dedupe so a conn subscribed both exactly and by pattern gets one copy.
|
|
410
|
+
const targets = new Set(this.channels.get(channel) || []);
|
|
411
|
+
if (this.patterns.size) {
|
|
412
|
+
for (const [pattern, set] of this.patterns) {
|
|
413
|
+
if (matchTopic(pattern, channel)) {
|
|
414
|
+
for (const cid of set) targets.add(cid);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
let delivered = 0;
|
|
419
|
+
for (const cid of targets) {
|
|
420
|
+
const c = this.conns.get(cid);
|
|
421
|
+
if (!c) continue;
|
|
422
|
+
const r = c.peer.send({ t: TYPES.MESSAGE, channel, payload }, { droppable: true });
|
|
423
|
+
if (r === 'dropped') {
|
|
424
|
+
this.dropped++;
|
|
425
|
+
this.emit('drop', { channel, connId: cid });
|
|
426
|
+
} else {
|
|
427
|
+
delivered++;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
return delivered;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// ----------------------------------------------------------------------- rpc
|
|
434
|
+
|
|
435
|
+
_register(conn, name) {
|
|
436
|
+
let entry = this.procs.get(name);
|
|
437
|
+
if (!entry) {
|
|
438
|
+
entry = { workers: new Set(), inflight: new Map() };
|
|
439
|
+
this.procs.set(name, entry);
|
|
440
|
+
}
|
|
441
|
+
entry.workers.add(conn.id);
|
|
442
|
+
if (!entry.inflight.has(conn.id)) entry.inflight.set(conn.id, 0);
|
|
443
|
+
conn.procs.add(name);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
_unregister(conn, name) {
|
|
447
|
+
conn.procs.delete(name);
|
|
448
|
+
const entry = this.procs.get(name);
|
|
449
|
+
if (!entry) return;
|
|
450
|
+
entry.workers.delete(conn.id);
|
|
451
|
+
entry.inflight.delete(conn.id);
|
|
452
|
+
if (entry.workers.size === 0) this.procs.delete(name);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/** Pick the least-busy worker for a proc (fewest in-flight calls). */
|
|
456
|
+
_pickWorker(entry) {
|
|
457
|
+
let best = null;
|
|
458
|
+
let bestLoad = Infinity;
|
|
459
|
+
for (const cid of entry.workers) {
|
|
460
|
+
const load = entry.inflight.get(cid) || 0;
|
|
461
|
+
if (load < bestLoad) {
|
|
462
|
+
bestLoad = load;
|
|
463
|
+
best = cid;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
return best;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
_call(conn, msg) {
|
|
470
|
+
const entry = this.procs.get(msg.name);
|
|
471
|
+
if (!entry || entry.workers.size === 0) {
|
|
472
|
+
this._err(conn, msg.id, `no handler registered for "${msg.name}"`, 'ENOHANDLER');
|
|
473
|
+
return;
|
|
474
|
+
}
|
|
475
|
+
const workerId = this._pickWorker(entry);
|
|
476
|
+
const owner = workerId != null ? this.conns.get(workerId) : null;
|
|
477
|
+
if (!owner) {
|
|
478
|
+
// Stale worker entry (conn already gone) — prune it and report unavailable.
|
|
479
|
+
if (workerId != null) {
|
|
480
|
+
entry.workers.delete(workerId);
|
|
481
|
+
entry.inflight.delete(workerId);
|
|
482
|
+
if (entry.workers.size === 0) this.procs.delete(msg.name);
|
|
483
|
+
}
|
|
484
|
+
this._err(conn, msg.id, `handler "${msg.name}" unavailable`, 'ENOHANDLER');
|
|
485
|
+
return;
|
|
486
|
+
}
|
|
487
|
+
const brokerCallId = this.nextCallId++;
|
|
488
|
+
// Backstop timeout so a worker that stays connected but never replies can't leak a `pending`
|
|
489
|
+
// entry forever or pin its `inflight` count (which would permanently skew least-busy dispatch).
|
|
490
|
+
// Fire a grace period AFTER the caller's own deadline so the broker is the cleanup backstop and
|
|
491
|
+
// doesn't race the client-side timeout that the user actually sees.
|
|
492
|
+
const callerTimeout = msg.timeout && msg.timeout > 0 ? msg.timeout : this.callTimeout;
|
|
493
|
+
const timer = setTimeout(() => this._expireCall(brokerCallId), callerTimeout + this.callTimeoutGrace);
|
|
494
|
+
if (timer.unref) timer.unref();
|
|
495
|
+
this.pending.set(brokerCallId, {
|
|
496
|
+
callerConnId: conn.id,
|
|
497
|
+
callerCallId: msg.id,
|
|
498
|
+
ownerConnId: owner.id,
|
|
499
|
+
name: msg.name,
|
|
500
|
+
timer,
|
|
501
|
+
});
|
|
502
|
+
entry.inflight.set(owner.id, (entry.inflight.get(owner.id) || 0) + 1);
|
|
503
|
+
owner.peer.send({ t: TYPES.INVOKE, id: brokerCallId, name: msg.name, args: msg.args || [] });
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/** A pending call exceeded its deadline (worker hung while connected): free state, fail the caller. */
|
|
507
|
+
_expireCall(brokerCallId) {
|
|
508
|
+
const p = this.pending.get(brokerCallId);
|
|
509
|
+
if (!p) return;
|
|
510
|
+
this.pending.delete(brokerCallId);
|
|
511
|
+
this._decInflight(p.name, p.ownerConnId);
|
|
512
|
+
const caller = this.conns.get(p.callerConnId);
|
|
513
|
+
if (caller) this._err(caller, p.callerCallId, `rpc call "${p.name}" timed out`, 'ECALLTIMEOUT');
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
_decInflight(name, ownerConnId) {
|
|
517
|
+
const entry = this.procs.get(name);
|
|
518
|
+
if (!entry) return;
|
|
519
|
+
const cur = entry.inflight.get(ownerConnId);
|
|
520
|
+
if (cur != null) entry.inflight.set(ownerConnId, Math.max(0, cur - 1));
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
_result(msg) {
|
|
524
|
+
const p = this.pending.get(msg.id);
|
|
525
|
+
if (!p) return;
|
|
526
|
+
this.pending.delete(msg.id);
|
|
527
|
+
clearTimeout(p.timer);
|
|
528
|
+
this._decInflight(p.name, p.ownerConnId);
|
|
529
|
+
const caller = this.conns.get(p.callerConnId);
|
|
530
|
+
if (!caller) return;
|
|
531
|
+
if (msg.error) {
|
|
532
|
+
this._err(caller, p.callerCallId, msg.error.message, msg.error.code || 'ECALL');
|
|
533
|
+
} else {
|
|
534
|
+
this._ok(caller, p.callerCallId, msg.result);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// ------------------------------------------------------------------ lifecycle
|
|
539
|
+
|
|
540
|
+
_onClose(conn) {
|
|
541
|
+
this.conns.delete(conn.id);
|
|
542
|
+
this.emit('disconnect', conn.id);
|
|
543
|
+
for (const ch of conn.subs) {
|
|
544
|
+
const map = isPattern(ch) ? this.patterns : this.channels;
|
|
545
|
+
const set = map.get(ch);
|
|
546
|
+
if (set) {
|
|
547
|
+
set.delete(conn.id);
|
|
548
|
+
if (set.size === 0) map.delete(ch);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
for (const name of conn.procs) {
|
|
552
|
+
const entry = this.procs.get(name);
|
|
553
|
+
if (entry) {
|
|
554
|
+
entry.workers.delete(conn.id);
|
|
555
|
+
entry.inflight.delete(conn.id);
|
|
556
|
+
if (entry.workers.size === 0) this.procs.delete(name);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
this.locks.releaseAll(conn.id);
|
|
560
|
+
// Fail in-flight calls owned by this connection; drop calls it originated.
|
|
561
|
+
for (const [bid, p] of this.pending) {
|
|
562
|
+
if (p.ownerConnId === conn.id) {
|
|
563
|
+
clearTimeout(p.timer);
|
|
564
|
+
this.pending.delete(bid);
|
|
565
|
+
this._decInflight(p.name, p.ownerConnId);
|
|
566
|
+
const caller = this.conns.get(p.callerConnId);
|
|
567
|
+
if (caller) this._err(caller, p.callerCallId, 'rpc handler disconnected', 'EHANDLERGONE');
|
|
568
|
+
} else if (p.callerConnId === conn.id) {
|
|
569
|
+
clearTimeout(p.timer);
|
|
570
|
+
this.pending.delete(bid);
|
|
571
|
+
this._decInflight(p.name, p.ownerConnId);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
this._scheduleIdle();
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
_startHeartbeat() {
|
|
578
|
+
if (!this.heartbeatInterval) return;
|
|
579
|
+
this._heartbeatTimer = setInterval(() => this._sweepHeartbeat(), this.heartbeatInterval);
|
|
580
|
+
if (this._heartbeatTimer.unref) this._heartbeatTimer.unref();
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
_sweepHeartbeat() {
|
|
584
|
+
const now = nowMs();
|
|
585
|
+
// Reap only after 3 intervals of silence. The ping window (idle > interval)
|
|
586
|
+
// must be at least 2 sweeps wide so a healthy conn is always pinged — and
|
|
587
|
+
// gets a chance to answer — before it can ever be reaped.
|
|
588
|
+
const reapAfter = this.heartbeatInterval * 3;
|
|
589
|
+
for (const conn of this.conns.values()) {
|
|
590
|
+
const idle = now - conn.lastSeen;
|
|
591
|
+
if (idle > reapAfter) {
|
|
592
|
+
this.stats.reaped += 1;
|
|
593
|
+
this.emit('reap', conn.id);
|
|
594
|
+
conn.peer.destroy();
|
|
595
|
+
} else if (idle > this.heartbeatInterval) {
|
|
596
|
+
conn.peer.send({ t: TYPES.PING });
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
_startStats() {
|
|
602
|
+
if (!this.statsInterval) return;
|
|
603
|
+
this._statsTimer = setInterval(() => this.emit('stats', this.snapshot()), this.statsInterval);
|
|
604
|
+
if (this._statsTimer.unref) this._statsTimer.unref();
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
/** A point-in-time operational snapshot — served on STATS and emitted periodically. */
|
|
608
|
+
snapshot() {
|
|
609
|
+
const cpu = process.cpuUsage(this._cpuBase);
|
|
610
|
+
const windowUs = Math.max(1, (nowMs() - this._cpuBaseAt) * 1000);
|
|
611
|
+
const lockStats = this.locks.stats();
|
|
612
|
+
const procs = [];
|
|
613
|
+
for (const [name, entry] of this.procs) procs.push({ name, workers: entry.workers.size });
|
|
614
|
+
return {
|
|
615
|
+
uptimeMs: nowMs() - this.startedAt,
|
|
616
|
+
connections: this.conns.size,
|
|
617
|
+
cacheSize: this.store.size,
|
|
618
|
+
ops: { ...this.stats.ops },
|
|
619
|
+
dropped: this.dropped,
|
|
620
|
+
reaped: this.stats.reaped,
|
|
621
|
+
locks: lockStats.locks,
|
|
622
|
+
lockWaiters: lockStats.waiters,
|
|
623
|
+
pendingCalls: this.pending.size,
|
|
624
|
+
subscriptions: this.channels.size + this.patterns.size,
|
|
625
|
+
procs,
|
|
626
|
+
memory: process.memoryUsage(),
|
|
627
|
+
cpuCoreFraction: (cpu.user + cpu.system) / windowUs,
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
_scheduleIdle() {
|
|
632
|
+
if (!this.idleTimeout || this.conns.size > 0) return;
|
|
633
|
+
this._cancelIdle();
|
|
634
|
+
this._idleTimer = setTimeout(() => this.close(), this.idleTimeout);
|
|
635
|
+
if (this._idleTimer.unref) this._idleTimer.unref();
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
_cancelIdle() {
|
|
639
|
+
if (this._idleTimer) {
|
|
640
|
+
clearTimeout(this._idleTimer);
|
|
641
|
+
this._idleTimer = null;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
async close() {
|
|
646
|
+
this._cancelIdle();
|
|
647
|
+
if (this._heartbeatTimer) {
|
|
648
|
+
clearInterval(this._heartbeatTimer);
|
|
649
|
+
this._heartbeatTimer = null;
|
|
650
|
+
}
|
|
651
|
+
if (this._statsTimer) {
|
|
652
|
+
clearInterval(this._statsTimer);
|
|
653
|
+
this._statsTimer = null;
|
|
654
|
+
}
|
|
655
|
+
for (const c of this.conns.values()) c.peer.destroy();
|
|
656
|
+
this.conns.clear();
|
|
657
|
+
await this.persist.flushAndClose(); // final snapshot + fsync → planned restart is lossless
|
|
658
|
+
if (this.server) {
|
|
659
|
+
await new Promise((resolve) => this.server.close(() => resolve()));
|
|
660
|
+
}
|
|
661
|
+
if (!isPipe(this.address)) {
|
|
662
|
+
try {
|
|
663
|
+
fs.unlinkSync(this.address);
|
|
664
|
+
} catch {
|
|
665
|
+
/* ignore */
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
/** Monotonic-ish clock; avoids Date dependency on the hot path. */
|
|
672
|
+
function nowMs() {
|
|
673
|
+
return Number(process.hrtime.bigint() / 1000000n);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
module.exports = Broker;
|