@cotal-ai/core 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/agent-file.d.ts +36 -5
- package/dist/agent-file.d.ts.map +1 -1
- package/dist/agent-file.js +91 -11
- package/dist/agent-file.js.map +1 -1
- package/dist/channels.d.ts +13 -2
- package/dist/channels.d.ts.map +1 -1
- package/dist/channels.js +24 -1
- package/dist/channels.js.map +1 -1
- package/dist/command.d.ts +21 -0
- package/dist/command.d.ts.map +1 -1
- package/dist/connector-config.d.ts +42 -0
- package/dist/connector-config.d.ts.map +1 -0
- package/dist/connector-config.js +103 -0
- package/dist/connector-config.js.map +1 -0
- package/dist/connector.d.ts +11 -0
- package/dist/connector.d.ts.map +1 -1
- package/dist/endpoint.d.ts +331 -40
- package/dist/endpoint.d.ts.map +1 -1
- package/dist/endpoint.js +1280 -246
- package/dist/endpoint.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/members.d.ts +93 -0
- package/dist/members.d.ts.map +1 -0
- package/dist/members.js +193 -0
- package/dist/members.js.map +1 -0
- package/dist/provision.d.ts +49 -11
- package/dist/provision.d.ts.map +1 -1
- package/dist/provision.js +92 -31
- package/dist/provision.js.map +1 -1
- package/dist/resolve.d.ts +53 -0
- package/dist/resolve.d.ts.map +1 -0
- package/dist/resolve.js +61 -0
- package/dist/resolve.js.map +1 -0
- package/dist/streams.d.ts +37 -0
- package/dist/streams.d.ts.map +1 -1
- package/dist/streams.js +91 -4
- package/dist/streams.js.map +1 -1
- package/dist/subjects.d.ts +80 -2
- package/dist/subjects.d.ts.map +1 -1
- package/dist/subjects.js +127 -3
- package/dist/subjects.js.map +1 -1
- package/dist/types.d.ts +111 -5
- package/dist/types.d.ts.map +1 -1
- package/package.json +4 -2
package/dist/endpoint.js
CHANGED
|
@@ -1,23 +1,33 @@
|
|
|
1
1
|
import { EventEmitter } from "node:events";
|
|
2
2
|
import { randomUUID } from "node:crypto";
|
|
3
|
-
import { connect, credsAuthenticator, nanos, AuthorizationError, PermissionViolationError, UserAuthenticationExpiredError, } from "@nats-io/transport-node";
|
|
3
|
+
import { connect, credsAuthenticator, nanos, AuthorizationError, PermissionViolationError, UserAuthenticationExpiredError, NoRespondersError, RequestError, } from "@nats-io/transport-node";
|
|
4
4
|
import { idFromCreds } from "./identity.js";
|
|
5
|
-
import {
|
|
5
|
+
import { assertValidName } from "./resolve.js";
|
|
6
|
+
import { createSpaceStreams, dmDurableConfig, dlvDurableConfig, taskDurableConfig, fanoutDurableConfig, inboxReaderConfig, MAX_MSGS_PER_SUBJECT } from "./streams.js";
|
|
6
7
|
import { jetstream, jetstreamManager, AckPolicy, DeliverPolicy, } from "@nats-io/jetstream";
|
|
7
8
|
import { Kvm } from "@nats-io/kv";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
9
|
+
import { openMembersRegistry, commitMember, tombstoneMember, activateMember, readMember, listMembers, durableEligible, StaleMembershipWrite, } from "./members.js";
|
|
10
|
+
import { openChannelRegistry, effectiveReplay, effectiveReplayWindowMs, effectiveDeliveryClass, readChannelConfig, readChannelDefaults, } from "./channels.js";
|
|
11
|
+
import { anycastSubject, CHANNEL_DEFAULTS_KEY, chatStream, chatHistDurable, chatSubject, controlServiceSubject, CONTROL_SELF_SERVICE, dmStream, dmDurable, dlvStream, dlvDurable, dlvSubject, dinboxSubject, inboxStream, parseDinboxOwner, FANOUT_DURABLE, INBOX_READER_DURABLE, chatWildcard, channelInAllow, isConcreteChannel, normalizeMentions, parseSubject, presenceBucket, spacePrefix, spaceWildcard, subjectMatches, taskStream, taskDurable, token, unicastSubject, } from "./subjects.js";
|
|
10
12
|
export const DEFAULT_SERVER = "nats://127.0.0.1:4222";
|
|
11
13
|
/** Space joined when none is given on the CLI (the `cotal-<space>` cmux tab, etc.). */
|
|
12
14
|
export const DEFAULT_SPACE = "main";
|
|
13
15
|
/**
|
|
14
|
-
* Events: "message" (CotalMessage), "presence" (PresenceEvent), "roster" (Presence[]), "error" (Error)
|
|
16
|
+
* Events: "message" (CotalMessage), "presence" (PresenceEvent), "roster" (Presence[]), "error" (Error),
|
|
17
|
+
* "connection" ({ connected: boolean }) — true on every successful (re)bind (initial start, manual
|
|
18
|
+
* reconnect, AND background self-heal), false the moment the connection drops (rebuild null window /
|
|
19
|
+
* terminal close). Lets an in-process agent track connectedness off the endpoint's own (re)binds
|
|
20
|
+
* instead of an imperative flag the self-heal path can't reach.
|
|
15
21
|
*
|
|
16
22
|
* Callers MUST attach an "error" listener before `start()`: async faults (incl. NATS
|
|
17
23
|
* permission denials, surfaced via `watchStatus`) are emitted as "error", and Node throws
|
|
18
24
|
* synchronously on an unhandled "error" — a missing listener turns any such fault into a
|
|
19
25
|
* process crash instead of a logged denial.
|
|
20
26
|
*/
|
|
27
|
+
/** Plane-3 trusted-reader redelivery ceiling: a dinbox entry that keeps failing re-auth-defer
|
|
28
|
+
* (unknown owner) or DELIVER transfer is `term()`d + surfaced after this many redeliveries, so one
|
|
29
|
+
* stuck/poison entry can't head-of-line the single shared reader forever. */
|
|
30
|
+
const READER_MAX_REDELIVERIES = 10;
|
|
21
31
|
export class CotalEndpoint extends EventEmitter {
|
|
22
32
|
card;
|
|
23
33
|
space;
|
|
@@ -40,6 +50,11 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
40
50
|
jsm;
|
|
41
51
|
kv;
|
|
42
52
|
channelKv;
|
|
53
|
+
/** Plane-3 durable-membership registry KV — lazily opened by the privileged (manager) endpoint. */
|
|
54
|
+
membersKv;
|
|
55
|
+
/** When set, this endpoint hosts the Plane-3 fan-out writer + trusted reader (the manager). `aclFor`
|
|
56
|
+
* maps an owner id to its current read ACL (`allowSubscribe`) for the reader's re-authorization. */
|
|
57
|
+
plane3;
|
|
43
58
|
/** Live local cache of the channel registry (key = channel token), kept by a KV watch. */
|
|
44
59
|
channelConfigs = new Map();
|
|
45
60
|
channelDefaults = {};
|
|
@@ -48,17 +63,72 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
48
63
|
* a lagging joiner + dedups the backfill overlap). Keyed by the subscription pattern (may be
|
|
49
64
|
* wildcard), so the drop matches every concrete channel the pattern subsumes. */
|
|
50
65
|
joinSeq = new Map();
|
|
66
|
+
/** Serializes history reads ({@link collectHistory}): they share the fixed per-instance
|
|
67
|
+
* `chathist_<id>` consumer, so overlapping reads would delete/recreate it under one another. */
|
|
68
|
+
histLock = Promise.resolve();
|
|
51
69
|
subs = [];
|
|
52
70
|
streamMsgs = [];
|
|
71
|
+
/** Per-channel native core subscriptions (SPEC v0.3) — the manager-free live read path for boot +
|
|
72
|
+
* runtime channels (there is no per-instance chat durable). Keyed by channel so leave unsubscribes
|
|
73
|
+
* just one. */
|
|
74
|
+
chatSubs = new Map();
|
|
75
|
+
/** Channels whose core-sub the broker refused (async sub.allow violation) — read by the
|
|
76
|
+
* broker-confirmed join: a denied subscribe is NOT a successful join (SPEC conformance #13). */
|
|
77
|
+
chatSubDenied = new Set();
|
|
78
|
+
/** Channels this session has a Plane-3 durable backstop for (per-channel join GENERATION, from
|
|
79
|
+
* durableJoin, so leave passes it back for the stale-leave guard). A durable channel's core-sub is
|
|
80
|
+
* NOT coverage-dropped — it stays a live wake-hint, dedup-coalesced with the Plane-3 durable copy by
|
|
81
|
+
* id-dedup. Drives the durable-state surface + routes leave to `durableLeave`. PERSISTS across
|
|
82
|
+
* reconnect (like `this.channels`): the membership record + the `dlv_<id>` durable are persistent so
|
|
83
|
+
* the backstop survives a reconnect on its own; the agent can't re-read the privileged members KV,
|
|
84
|
+
* so this in-memory mirror is kept, not rebuilt. Cleared only on full stop. */
|
|
85
|
+
plane3Channels = new Map();
|
|
86
|
+
/** Channels whose live sub was REFUSED while they held a Plane-3 durable membership, whose §7
|
|
87
|
+
* tombstone has not yet confirmed (channel → join generation). {@link closeRefusedMembership} retries
|
|
88
|
+
* the tombstone until it lands; until then this is a `durable-unclosed` state surfaced via
|
|
89
|
+
* {@link pendingDurableLeaves} (the connector shows it in `cotal_channels`, never as ordinary
|
|
90
|
+
* absence). Persists across reconnect; cleared on tombstone success or full stop. */
|
|
91
|
+
pendingDurableLeave = new Map();
|
|
92
|
+
/** Chat-join subjects currently being broker-confirmed. An out-of-ACL subscribe among these trips an
|
|
93
|
+
* EXPECTED async permission violation that joinChannel turns into a clean throw, so watchStatus
|
|
94
|
+
* suppresses it rather than surfacing a spurious connection error. */
|
|
95
|
+
confirmingChatSubs = new Set();
|
|
96
|
+
/** True until the first successful connect completes its boot backfill — distinguishes first-connect
|
|
97
|
+
* (backfill the boot channels' history) from a reconnect (reopen the core-subs, no re-backfill).
|
|
98
|
+
* Persists across reconnect (NOT connection-scoped). Replaces the legacy chat-durable consumed-cursor
|
|
99
|
+
* signal now that there is no per-instance chat durable. */
|
|
100
|
+
firstConnect = true;
|
|
53
101
|
heartbeatTimer;
|
|
54
102
|
sweepTimer;
|
|
55
103
|
roster = new Map();
|
|
56
104
|
status = "idle";
|
|
57
105
|
activity;
|
|
106
|
+
/** Mirror of the connector's authoritative attention state, published in presence (advisory). The
|
|
107
|
+
* endpoint never reads these back into delivery — they exist only to broadcast. */
|
|
108
|
+
attentionMode;
|
|
109
|
+
channelModes;
|
|
58
110
|
stopped = false;
|
|
111
|
+
/** In-flight rebuild (drain+rebind) — serializes manual reconnect, the supervisor's
|
|
112
|
+
* closed(), and reestablishLoop so only ONE rebuild runs at a time (a second trigger
|
|
113
|
+
* coalesces onto the shared promise, never starts a parallel connectAndBind). */
|
|
114
|
+
rebuildPromise;
|
|
115
|
+
/** True only during the null window of a rebuild (this.nc unset) — user-facing ops then
|
|
116
|
+
* throw a "reconnecting" message instead of the misleading "endpoint not started". */
|
|
117
|
+
reconnecting = false;
|
|
118
|
+
/** One reestablishLoop at a time; concurrent triggers coalesce via rebuild(). */
|
|
119
|
+
reestablishing = false;
|
|
120
|
+
/** Interruptible backoff for reestablishLoop — reconnect()/stop() resolves this to retry
|
|
121
|
+
* now instead of awaiting the full retryMs. */
|
|
122
|
+
backoffResolve;
|
|
123
|
+
backoffTimer;
|
|
124
|
+
retryMs = 3000;
|
|
59
125
|
constructor(opts) {
|
|
60
126
|
super();
|
|
61
127
|
this.space = opts.space;
|
|
128
|
+
// A display name is the client-side handle a peer is addressed by; reject the reserved `/`
|
|
129
|
+
// (the future owner/name separator) and surrounding whitespace at the one identity choke
|
|
130
|
+
// point every join/spawn path flows through.
|
|
131
|
+
assertValidName(opts.card.name);
|
|
62
132
|
// Identity precedence: an explicit card.id, else the creds' identity, else a random
|
|
63
133
|
// uuid. When both an id and creds are given they MUST name the same nkey — otherwise
|
|
64
134
|
// the subject sender token wouldn't match the authenticated user and every publish
|
|
@@ -80,6 +150,9 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
80
150
|
this.doRegister = opts.registerPresence ?? true;
|
|
81
151
|
this.doWatch = opts.watchPresence ?? true;
|
|
82
152
|
this.doConsume = opts.consume ?? true;
|
|
153
|
+
// Seed the presence mirror so file-default channel modes are visible from the first publish
|
|
154
|
+
// (not only after the first runtime toggle). Mirror only — delivery reads the connector's state.
|
|
155
|
+
this.channelModes = opts.channelModes && Object.keys(opts.channelModes).length ? opts.channelModes : undefined;
|
|
83
156
|
this.ackWaitMs = opts.ackWaitMs ?? 60_000;
|
|
84
157
|
this.inactiveThresholdMs = opts.inactiveThresholdMs ?? 600_000;
|
|
85
158
|
}
|
|
@@ -87,6 +160,19 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
87
160
|
return { id: this.card.id, name: this.card.name, role: this.card.role };
|
|
88
161
|
}
|
|
89
162
|
async start() {
|
|
163
|
+
await this.connectAndBind();
|
|
164
|
+
// nats.js auto-reconnects transient drops; when it exhausts its attempts and the
|
|
165
|
+
// connection closes for good, rebuild from scratch so an in-process agent (e.g. the
|
|
166
|
+
// OpenCode plugin) recovers without a host respawn. Armed only after a successful first
|
|
167
|
+
// connect — a first-connect failure throws to the caller's connect-retry loop instead.
|
|
168
|
+
this.superviseConnection();
|
|
169
|
+
}
|
|
170
|
+
/** Open the connection and bind everything that hangs off it: status watch, presence
|
|
171
|
+
* watch + heartbeat, channel registry, and the durable consumers. Re-runnable — a
|
|
172
|
+
* reconnect calls it again after {@link clearConnectionScoped}; every binding is
|
|
173
|
+
* idempotent (durables bind by name, JetStream dedups by msgID, KV opens are idempotent). */
|
|
174
|
+
async connectAndBind() {
|
|
175
|
+
this.clearConnectionScoped();
|
|
90
176
|
this.nc = await connect({
|
|
91
177
|
servers: this.servers,
|
|
92
178
|
name: `cotal:${this.card.name}`,
|
|
@@ -134,11 +220,199 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
134
220
|
await this.ensureStreams();
|
|
135
221
|
await this.startConsumers();
|
|
136
222
|
}
|
|
223
|
+
// Re-arm Plane-3 (manager-hosted fan-out + trusted reader) on every (re)connect — no-op unless this
|
|
224
|
+
// endpoint hosts it. The first arm comes from startPlane3 (after start()); this re-binds the loops
|
|
225
|
+
// a reconnect's clearConnectionScoped() tore down, so a broker blip doesn't silently kill the backstop.
|
|
226
|
+
await this.armPlane3();
|
|
227
|
+
// Bound and live — covers initial start, manual reconnect, AND background self-heal (every
|
|
228
|
+
// path lands here). The single signal an in-process agent's connected flag tracks.
|
|
229
|
+
this.emit("connection", { connected: true });
|
|
230
|
+
}
|
|
231
|
+
/** Tear down everything {@link connectAndBind} (re)creates, so a rebind can't leak a
|
|
232
|
+
* second heartbeat, double-pump a consumer, or keep stale roster ghosts. Caller-owned
|
|
233
|
+
* subs (tap/serve) are left alone — they aren't rebuilt here. */
|
|
234
|
+
clearConnectionScoped() {
|
|
235
|
+
if (this.heartbeatTimer) {
|
|
236
|
+
clearInterval(this.heartbeatTimer);
|
|
237
|
+
this.heartbeatTimer = undefined;
|
|
238
|
+
}
|
|
239
|
+
if (this.sweepTimer) {
|
|
240
|
+
clearInterval(this.sweepTimer);
|
|
241
|
+
this.sweepTimer = undefined;
|
|
242
|
+
}
|
|
243
|
+
for (const msgs of this.streamMsgs) {
|
|
244
|
+
try {
|
|
245
|
+
msgs.stop();
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
/* already closed with the connection */
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
this.streamMsgs.length = 0;
|
|
252
|
+
for (const sub of this.chatSubs.values()) {
|
|
253
|
+
try {
|
|
254
|
+
sub.unsubscribe();
|
|
255
|
+
}
|
|
256
|
+
catch {
|
|
257
|
+
/* already closed with the connection */
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
this.chatSubs.clear();
|
|
261
|
+
this.chatSubDenied.clear();
|
|
262
|
+
this.confirmingChatSubs.clear();
|
|
263
|
+
this.roster.clear();
|
|
264
|
+
this.joinSeq.clear();
|
|
265
|
+
this.channelConfigs.clear();
|
|
266
|
+
this.channelDefaults = {};
|
|
267
|
+
}
|
|
268
|
+
/** If stop() ran during a rebuild's `await connectAndBind`, the just-bound connection +
|
|
269
|
+
* heartbeat + supervisor would be left live on a stopped endpoint. Tear that fresh
|
|
270
|
+
* connection back down and report it. Reads `this.nc` in its own scope (a bare `this.nc`
|
|
271
|
+
* in doRebuild narrows to `never` via TS inlining connectAndBind's assignment). Returns
|
|
272
|
+
* true iff it tore something down (caller bails out of the rebuild). */
|
|
273
|
+
async tearDownIfStopped() {
|
|
274
|
+
if (!this.stopped)
|
|
275
|
+
return false;
|
|
276
|
+
const nc = this.nc;
|
|
277
|
+
this.clearConnectionScoped();
|
|
278
|
+
try {
|
|
279
|
+
await nc?.drain();
|
|
280
|
+
}
|
|
281
|
+
catch {
|
|
282
|
+
/* already closing */
|
|
283
|
+
}
|
|
284
|
+
this.nc = undefined;
|
|
285
|
+
return true;
|
|
286
|
+
}
|
|
287
|
+
/** Watch for a terminal close (nats.js has exhausted its own reconnect) and rebuild.
|
|
288
|
+
* Our own stop()/drain also resolves closed(), so the `stopped` guard keeps a clean
|
|
289
|
+
* shutdown from re-establishing. The identity guard (`this.nc !== nc`) no-ops a STALE
|
|
290
|
+
* supervisor — one whose connection reconnect()/rebuild already replaced — so only a
|
|
291
|
+
* close of the CURRENT connection triggers a rebuild. The rebuild itself is serialized
|
|
292
|
+
* with the manual path via {@link rebuild}. */
|
|
293
|
+
superviseConnection() {
|
|
294
|
+
const nc = this.nc;
|
|
295
|
+
if (!nc)
|
|
296
|
+
return;
|
|
297
|
+
void nc.closed().then((err) => {
|
|
298
|
+
if (this.stopped)
|
|
299
|
+
return;
|
|
300
|
+
if (this.nc !== nc)
|
|
301
|
+
return; // epoch-stale — a rebuild already swapped this connection
|
|
302
|
+
this.emit("connection", { connected: false }); // dropped — report it before the rebuild kicks in
|
|
303
|
+
this.emit("error", new Error(`mesh connection closed${err ? `: ${err.message}` : ""} — re-establishing`));
|
|
304
|
+
void this.reestablishLoop();
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
/** Single serialized rebuild: drain the old connection and rebind via {@link connectAndBind},
|
|
308
|
+
* guarded so concurrent triggers (manual {@link reconnect}, the supervisor's closed(), the
|
|
309
|
+
* retry loop) coalesce onto ONE in-flight rebuild instead of racing two connectAndBinds and
|
|
310
|
+
* leaking a connection. Returns the shared promise; a second caller gets the in-flight one. */
|
|
311
|
+
rebuild() {
|
|
312
|
+
if (this.rebuildPromise)
|
|
313
|
+
return this.rebuildPromise;
|
|
314
|
+
const p = this.doRebuild().finally(() => {
|
|
315
|
+
if (this.rebuildPromise === p)
|
|
316
|
+
this.rebuildPromise = undefined;
|
|
317
|
+
});
|
|
318
|
+
this.rebuildPromise = p;
|
|
319
|
+
return p;
|
|
320
|
+
}
|
|
321
|
+
/** The transition: stop the connection-scoped timers FIRST (so nothing live touches
|
|
322
|
+
* this.nc during the null window), drop the connection refs, drain the old nc, then
|
|
323
|
+
* rebind + re-arm the supervisor on the fresh connection. clearConnectionScoped is
|
|
324
|
+
* idempotent, so connectAndBind's own call here is a noop. */
|
|
325
|
+
async doRebuild() {
|
|
326
|
+
const oldNc = this.nc;
|
|
327
|
+
this.reconnecting = true;
|
|
328
|
+
try {
|
|
329
|
+
this.clearConnectionScoped();
|
|
330
|
+
this.nc = undefined;
|
|
331
|
+
this.js = undefined;
|
|
332
|
+
this.jsm = undefined;
|
|
333
|
+
this.kv = undefined;
|
|
334
|
+
this.channelKv = undefined;
|
|
335
|
+
this.emit("connection", { connected: false }); // null window opened — not live until the rebind below
|
|
336
|
+
try {
|
|
337
|
+
await oldNc?.drain();
|
|
338
|
+
}
|
|
339
|
+
catch {
|
|
340
|
+
/* already closing */
|
|
341
|
+
}
|
|
342
|
+
await this.connectAndBind();
|
|
343
|
+
// stop() may have run during the await — don't leave a live connection + heartbeat +
|
|
344
|
+
// supervisor on a stopped endpoint. (Reads this.nc in its own scope — a bare `this.nc`
|
|
345
|
+
// here in doRebuild narrows to `never` via TS inlining connectAndBind's assignment.)
|
|
346
|
+
if (await this.tearDownIfStopped())
|
|
347
|
+
return;
|
|
348
|
+
this.superviseConnection(); // re-arm on the fresh nc
|
|
349
|
+
}
|
|
350
|
+
finally {
|
|
351
|
+
this.reconnecting = false;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
/** Rebuild with backoff until it sticks or we're stopped. Interruptible: a manual
|
|
355
|
+
* {@link reconnect} kicks the backoff so the next attempt runs immediately instead of
|
|
356
|
+
* awaiting the full retryMs. One loop at a time ({@link reestablishing}); concurrent
|
|
357
|
+
* triggers coalesce via {@link rebuild}. */
|
|
358
|
+
async reestablishLoop() {
|
|
359
|
+
if (this.reestablishing)
|
|
360
|
+
return;
|
|
361
|
+
this.reestablishing = true;
|
|
362
|
+
try {
|
|
363
|
+
while (!this.stopped) {
|
|
364
|
+
try {
|
|
365
|
+
await this.rebuild();
|
|
366
|
+
return; // success — re-armed; the supervisor re-triggers on the next terminal close
|
|
367
|
+
}
|
|
368
|
+
catch (e) {
|
|
369
|
+
if (!this.stopped)
|
|
370
|
+
this.emit("error", e);
|
|
371
|
+
await new Promise((resolve) => {
|
|
372
|
+
this.backoffResolve = resolve;
|
|
373
|
+
this.backoffTimer = setTimeout(resolve, this.retryMs);
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
finally {
|
|
379
|
+
this.reestablishing = false;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
/** Cut an in-flight reestablish backoff short so the next attempt runs immediately, and
|
|
383
|
+
* clear its timer so it can't fire later on a stopped/restarted loop. */
|
|
384
|
+
kickBackoff() {
|
|
385
|
+
this.backoffResolve?.();
|
|
386
|
+
if (this.backoffTimer) {
|
|
387
|
+
clearTimeout(this.backoffTimer);
|
|
388
|
+
this.backoffTimer = undefined;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
/** Manual reconnect: tear down the current connection and rebuild, WITHOUT the permanent
|
|
392
|
+
* stop (stopped/stopping stay false). Serialized with the self-heal supervisor via
|
|
393
|
+
* {@link rebuild}, and interruptible — if a backoff is in flight, kick it so the attempt
|
|
394
|
+
* is now, not in retryMs. Throws if stopped. On failure, leaves {@link reestablishLoop}
|
|
395
|
+
* running in the background so the endpoint never stays dead, and rethrows so the caller
|
|
396
|
+
* can report it. */
|
|
397
|
+
async reconnect() {
|
|
398
|
+
if (this.stopped)
|
|
399
|
+
throw new Error("endpoint stopped — cannot reconnect");
|
|
400
|
+
this.kickBackoff();
|
|
401
|
+
try {
|
|
402
|
+
await this.rebuild();
|
|
403
|
+
}
|
|
404
|
+
catch (e) {
|
|
405
|
+
void this.reestablishLoop(); // background retry until success or stop
|
|
406
|
+
throw e;
|
|
407
|
+
}
|
|
137
408
|
}
|
|
138
409
|
async stop() {
|
|
139
410
|
if (this.stopped)
|
|
140
411
|
return;
|
|
141
412
|
this.stopped = true;
|
|
413
|
+
// Wake a reestablishLoop sitting in backoff so it sees `stopped` and exits instead of
|
|
414
|
+
// sleeping out retryMs; also clears the timer so it can't fire later.
|
|
415
|
+
this.kickBackoff();
|
|
142
416
|
if (this.heartbeatTimer)
|
|
143
417
|
clearInterval(this.heartbeatTimer);
|
|
144
418
|
if (this.sweepTimer)
|
|
@@ -286,7 +560,7 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
286
560
|
/** Send a control request to a service and await its reply (client side). */
|
|
287
561
|
async requestControl(service, req, timeoutMs = 5000) {
|
|
288
562
|
if (!this.nc)
|
|
289
|
-
throw new Error(
|
|
563
|
+
throw new Error(this.notLiveMsg());
|
|
290
564
|
const body = { ...req, from: req.from ?? this.ref() };
|
|
291
565
|
const m = await this.nc.request(controlServiceSubject(this.space, service, this.card.id), JSON.stringify(body), { timeout: timeoutMs });
|
|
292
566
|
return m.json();
|
|
@@ -303,6 +577,30 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
303
577
|
this.status = status;
|
|
304
578
|
await this.publishPresence();
|
|
305
579
|
}
|
|
580
|
+
/** Publish the agent's global attention mode into presence (advisory observability). Mirror only —
|
|
581
|
+
* delivery decisions stay in the connector's authoritative state. */
|
|
582
|
+
async setAttention(attention) {
|
|
583
|
+
this.attentionMode = attention;
|
|
584
|
+
await this.publishPresence();
|
|
585
|
+
}
|
|
586
|
+
/** Publish the agent's per-channel attention overrides into presence (advisory). An empty map drops
|
|
587
|
+
* the field. Mirror only — never read back into delivery. */
|
|
588
|
+
async setChannelModes(modes) {
|
|
589
|
+
this.channelModes = Object.keys(modes).length ? modes : undefined;
|
|
590
|
+
await this.publishPresence();
|
|
591
|
+
}
|
|
592
|
+
/** Overlay the host's live model onto the card's display-only `meta.model` and republish presence.
|
|
593
|
+
* For connectors that learn the actual model only *after* launch (e.g. Claude Code's `SessionStart`
|
|
594
|
+
* hook payload) rather than from an operator pin. Display-only discovery metadata; a no-op when the
|
|
595
|
+
* value is empty or already current (no redundant publish). The mutated card is read live by every
|
|
596
|
+
* later publish, so even a pre-connect call surfaces on the first presence write. */
|
|
597
|
+
async setCardModel(model) {
|
|
598
|
+
const m = model.trim();
|
|
599
|
+
if (!m || this.card.meta?.model === m)
|
|
600
|
+
return;
|
|
601
|
+
this.card.meta = { ...(this.card.meta ?? {}), model: m };
|
|
602
|
+
await this.publishPresence();
|
|
603
|
+
}
|
|
306
604
|
// ---- channel discovery ---------------------------------------------------
|
|
307
605
|
/** This channel's registry config from the live local cache (undefined if unset). */
|
|
308
606
|
getChannelConfig(channel) {
|
|
@@ -319,45 +617,102 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
319
617
|
return [...this.channels];
|
|
320
618
|
}
|
|
321
619
|
/**
|
|
322
|
-
* Join a channel mid-session:
|
|
323
|
-
*
|
|
324
|
-
*
|
|
325
|
-
* Idempotent: re-joining
|
|
326
|
-
* the
|
|
620
|
+
* Join a channel mid-session: open a native core subscription (manager-free live read, broker-
|
|
621
|
+
* confirmed against `sub.allow`), capture the stream frontier as the join watermark, backfill its
|
|
622
|
+
* history if replay is on, and — for a `durable`-class channel under a manager — request a Plane-3
|
|
623
|
+
* durable backstop. Idempotent: re-joining is a no-op (no re-backfill). Returns the backfill count +
|
|
624
|
+
* whether the durable backstop is active (+ a `reason` when a durable channel couldn't get one).
|
|
327
625
|
*/
|
|
328
626
|
async joinChannel(channel) {
|
|
329
627
|
if (!this.jsm)
|
|
330
|
-
throw new Error(
|
|
628
|
+
throw new Error(this.notLiveMsg());
|
|
331
629
|
if (this.channels.includes(channel))
|
|
332
|
-
return { joined: false, backfilled: 0 };
|
|
333
|
-
|
|
334
|
-
//
|
|
335
|
-
// channel is then either ≤ frontier → backfill-only or > frontier → tail-only, never both),
|
|
336
|
-
// and filter BEFORE backfill (gap-safe: backfill-first leaves a window in neither stream).
|
|
630
|
+
return { joined: false, backfilled: 0, durable: this.plane3Channels.has(channel) };
|
|
631
|
+
// Arm the watermark BEFORE going live: the backfill reads ≤ frontier and the core-sub only ever
|
|
632
|
+
// delivers post-subscribe live messages (> frontier), so the two never overlap.
|
|
337
633
|
const armed = await this.armJoin([channel]);
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
634
|
+
// Live read (SPEC v0.3): open the native core subscription — MANAGER-FREE, broker-enforced by
|
|
635
|
+
// sub.allow. This is what lets an agent join a channel's live feed on its own. The sub.allow
|
|
636
|
+
// refusal is async — broker-confirm before committing local join state; the subscribe handler
|
|
637
|
+
// ALSO drops a channel on ANY refusal (incl. a late one), so this is not a timing gamble (#13).
|
|
638
|
+
this.subscribeChat(channel);
|
|
639
|
+
try {
|
|
640
|
+
await this.confirmChatSub();
|
|
641
|
+
}
|
|
642
|
+
catch (e) {
|
|
643
|
+
// The confirm boundary (flush) failed — the connection drained/closed mid-join, so we have NO
|
|
644
|
+
// confirmation the subscribe was accepted. Fail closed: undo the half-open join rather than
|
|
645
|
+
// returning as if it were confirmed (a reconnect re-confirms from this.channels, which we never
|
|
646
|
+
// pushed to). unsubscribeChat clears chatSubs + confirmingChatSubs.
|
|
647
|
+
this.unsubscribeChat(channel);
|
|
648
|
+
this.joinSeq.delete(channel);
|
|
649
|
+
throw new Error(`cannot join "${channel}": live subscription could not be confirmed (${e.message})`);
|
|
650
|
+
}
|
|
651
|
+
this.confirmingChatSubs.delete(chatSubject(this.space, "*", channel));
|
|
652
|
+
if (this.chatSubDenied.has(channel)) {
|
|
653
|
+
this.unsubscribeChat(channel);
|
|
654
|
+
this.joinSeq.delete(channel);
|
|
655
|
+
throw new Error(`cannot join "${channel}": not within this agent's read ACL (allowSubscribe)`);
|
|
656
|
+
}
|
|
341
657
|
this.channels.push(channel);
|
|
658
|
+
// Durable backstop. The live core-sub above already delivers (manager-free). For a `durable`-class
|
|
659
|
+
// channel, request a Plane-3 per-member backstop from the manager (durableJoin) so a post reaches a
|
|
660
|
+
// busy/offline turn — the core-sub stays as the live wake-hint, dedup-coalesced with the Plane-3
|
|
661
|
+
// copy by id-dedup. No manager (open dev / manager-less) ⇒ joined LIVE only, surfaced via `reason`
|
|
662
|
+
// (never silent). A `live`-class channel takes no backstop (joined live is the contract).
|
|
663
|
+
let durable = false;
|
|
664
|
+
let reason;
|
|
665
|
+
if (effectiveDeliveryClass(this.channelConfigs.get(channel), this.channelDefaults) === "durable") {
|
|
666
|
+
try {
|
|
667
|
+
const r = await this.durableJoinChannel(channel);
|
|
668
|
+
if (r.durable) {
|
|
669
|
+
this.plane3Channels.set(channel, r.generation ?? 0);
|
|
670
|
+
durable = true;
|
|
671
|
+
}
|
|
672
|
+
else {
|
|
673
|
+
reason = r.reason ?? "durable backstop unavailable";
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
catch (e) {
|
|
677
|
+
// No privileged writer (manager-less) or the write was rejected — joined live, backstop
|
|
678
|
+
// unavailable. NOT a join failure: the live subscription is up and authorized.
|
|
679
|
+
reason = `durable backstop unavailable (${e.message})`;
|
|
680
|
+
}
|
|
681
|
+
}
|
|
342
682
|
const backfilled = await this.backfillArmed(armed);
|
|
343
|
-
return { joined: true, backfilled };
|
|
683
|
+
return { joined: true, backfilled, durable, ...(reason !== undefined ? { reason } : {}) };
|
|
344
684
|
}
|
|
345
|
-
/** Leave a channel mid-session
|
|
346
|
-
*
|
|
347
|
-
*
|
|
685
|
+
/** Leave a channel mid-session — MANAGER-FREE for the live read: close the core subscription. For a
|
|
686
|
+
* Plane-3 durable channel, the membership is tombstoned FIRST at the leave cursor (SPEC §7: leave is
|
|
687
|
+
* a hard read boundary for the backstop — a pre-leave entry stays deliverable, `seq > leaveCursor` is
|
|
688
|
+
* denied). FAIL-CLOSED: if the tombstone can't be confirmed the call throws and the leave is NOT
|
|
689
|
+
* applied (live sub stays up, local mirror intact) so the caller can retry — never close the live
|
|
690
|
+
* read while the backstop keeps delivering. */
|
|
348
691
|
async leaveChannel(channel) {
|
|
349
692
|
if (!this.jsm)
|
|
350
|
-
throw new Error(
|
|
351
|
-
|
|
352
|
-
if (i < 0)
|
|
693
|
+
throw new Error(this.notLiveMsg());
|
|
694
|
+
if (!this.channels.includes(channel))
|
|
353
695
|
return { left: false };
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
696
|
+
// Auth + durable-class ⇒ a Plane-3 membership may exist; tombstone it BEFORE touching local state.
|
|
697
|
+
// The join generation comes from the local mirror, but a BOOT membership whose hydration was missed
|
|
698
|
+
// (transient manager error at connect) is NOT in the mirror — so re-resolve it from the manager on
|
|
699
|
+
// demand. FAIL-CLOSED: fetchMemberships throws on a responder-present error, so a leave whose
|
|
700
|
+
// tombstone can't be confirmed propagates (live sub stays up, mirror intact) for the caller to retry
|
|
701
|
+
// — reporting `left` while the trusted reader keeps transferring to DLV is the fail-open leak. A
|
|
702
|
+
// genuine no-responder (open / manager-less, no Plane-3) means there is no membership to tombstone.
|
|
703
|
+
if (this.creds && effectiveDeliveryClass(this.channelConfigs.get(channel), this.channelDefaults) === "durable") {
|
|
704
|
+
let generation = this.plane3Channels.get(channel);
|
|
705
|
+
if (generation === undefined)
|
|
706
|
+
generation = (await this.fetchMemberships())?.find((m) => m.channel === channel)?.generation;
|
|
707
|
+
if (generation !== undefined) {
|
|
708
|
+
await this.durableLeaveChannel(channel, generation);
|
|
709
|
+
this.plane3Channels.delete(channel);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
this.unsubscribeChat(channel);
|
|
713
|
+
const i = this.channels.indexOf(channel);
|
|
714
|
+
if (i >= 0)
|
|
715
|
+
this.channels.splice(i, 1);
|
|
361
716
|
this.joinSeq.delete(channel);
|
|
362
717
|
return { left: true };
|
|
363
718
|
}
|
|
@@ -366,7 +721,7 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
366
721
|
* observer endpoints (no consumers needed). */
|
|
367
722
|
async listChannels() {
|
|
368
723
|
if (!this.nc)
|
|
369
|
-
throw new Error(
|
|
724
|
+
throw new Error(this.notLiveMsg());
|
|
370
725
|
const mgr = await jetstreamManager(this.nc);
|
|
371
726
|
// Subjects carry the sender (chat.<sender>.<channel>), so collapse across senders: sum
|
|
372
727
|
// each channel's counts regardless of who published.
|
|
@@ -394,56 +749,32 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
394
749
|
.sort((a, b) => a.channel.localeCompare(b.channel));
|
|
395
750
|
}
|
|
396
751
|
async channelMembers(channel) {
|
|
397
|
-
const
|
|
398
|
-
|
|
399
|
-
// One peer has one chat consumer, so this is a straight per-peer collection; join/leave
|
|
400
|
-
// just mutates that consumer's filter_subjects, which the next call re-reads live.
|
|
401
|
-
const byTok = new Map();
|
|
402
|
-
for await (const ci of mgr.consumers.list(chatStream(this.space))) {
|
|
403
|
-
const tok = chatDurableToken(ci.config.durable_name ?? ci.name);
|
|
404
|
-
if (tok === null)
|
|
405
|
-
continue;
|
|
406
|
-
// The server may report a single filter as `filter_subject` or `filter_subjects` — both
|
|
407
|
-
// are the same datum; read whichever is present. Filters are already collapsed (the
|
|
408
|
-
// effective subscription), so parse the channel straight out of each.
|
|
409
|
-
const filters = ci.config.filter_subjects ?? (ci.config.filter_subject ? [ci.config.filter_subject] : []);
|
|
410
|
-
const set = byTok.get(tok) ?? new Set();
|
|
411
|
-
for (const f of filters) {
|
|
412
|
-
const p = parseSubject(f);
|
|
413
|
-
if (p?.kind === "chat")
|
|
414
|
-
set.add(p.rest);
|
|
415
|
-
}
|
|
416
|
-
byTok.set(tok, set);
|
|
417
|
-
}
|
|
418
|
-
// Join with presence for liveness. token() is lossy, so match forward: index the roster
|
|
419
|
-
// by token(id). A durable with no roster match is a ghost/foreign id — keep its token,
|
|
420
|
-
// never drop it.
|
|
421
|
-
const byToken = new Map();
|
|
752
|
+
const members = (await listMembers(await this.membersRegistry())).filter((r) => r.leaveCursor === undefined && r.activated === true);
|
|
753
|
+
const byId = new Map();
|
|
422
754
|
for (const p of this.roster.values())
|
|
423
|
-
|
|
424
|
-
const
|
|
425
|
-
const p =
|
|
755
|
+
byId.set(p.card.id, p);
|
|
756
|
+
const memberForId = (id) => {
|
|
757
|
+
const p = byId.get(id);
|
|
426
758
|
return p
|
|
427
759
|
? { id: p.card.id, name: p.card.name, role: p.card.role, live: p.status !== "offline" }
|
|
428
|
-
: { id
|
|
760
|
+
: { id, name: id, live: false };
|
|
429
761
|
};
|
|
430
762
|
const byName = (a, b) => a.name.localeCompare(b.name);
|
|
431
|
-
if (channel !== undefined)
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
return out.sort(byName);
|
|
437
|
-
}
|
|
763
|
+
if (channel !== undefined)
|
|
764
|
+
return members
|
|
765
|
+
.filter((r) => subjectMatches(r.channel, channel))
|
|
766
|
+
.map((r) => memberForId(r.owner))
|
|
767
|
+
.sort(byName);
|
|
438
768
|
const map = new Map();
|
|
439
|
-
for (const
|
|
440
|
-
const
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
if (arr)
|
|
769
|
+
for (const r of members) {
|
|
770
|
+
const arr = map.get(r.channel);
|
|
771
|
+
const m = memberForId(r.owner);
|
|
772
|
+
if (arr) {
|
|
773
|
+
if (!arr.some((x) => x.id === m.id))
|
|
444
774
|
arr.push(m);
|
|
445
|
-
|
|
446
|
-
|
|
775
|
+
}
|
|
776
|
+
else {
|
|
777
|
+
map.set(r.channel, [m]);
|
|
447
778
|
}
|
|
448
779
|
}
|
|
449
780
|
for (const arr of map.values())
|
|
@@ -504,17 +835,32 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
504
835
|
return;
|
|
505
836
|
void (async () => {
|
|
506
837
|
for await (const s of this.nc.status()) {
|
|
507
|
-
if (s.type
|
|
508
|
-
|
|
838
|
+
if (s.type !== "error")
|
|
839
|
+
continue;
|
|
840
|
+
// Suppress the EXPECTED permission violation from a manager-free join we're confirming: an
|
|
841
|
+
// out-of-ACL `nc.subscribe` is refused async on its chat subject, which joinChannel catches
|
|
842
|
+
// and turns into a clean throw — it is not a connection error to surface.
|
|
843
|
+
if (s.error instanceof PermissionViolationError && this.confirmingChatSubs.has(s.error.subject))
|
|
844
|
+
continue;
|
|
845
|
+
this.emit("error", describeStatusError(s.error));
|
|
509
846
|
}
|
|
510
847
|
})().catch((e) => {
|
|
511
848
|
if (!this.stopped)
|
|
512
849
|
this.emit("error", e);
|
|
513
850
|
});
|
|
514
851
|
}
|
|
852
|
+
/** The error message for a guard that finds the endpoint unbound: "reconnecting" during a
|
|
853
|
+
* rebuild's null window OR an inter-retry backoff (so a concurrent op reports the real
|
|
854
|
+
* reason, not "not started" — `reestablishing` spans the whole retry loop incl. backoff),
|
|
855
|
+
* else "endpoint not started" (genuine pre-start). */
|
|
856
|
+
notLiveMsg() {
|
|
857
|
+
return this.reconnecting || this.reestablishing
|
|
858
|
+
? "reconnecting — try again shortly"
|
|
859
|
+
: "endpoint not started";
|
|
860
|
+
}
|
|
515
861
|
async publishMsg(subject, msg) {
|
|
516
862
|
if (!this.js)
|
|
517
|
-
throw new Error(
|
|
863
|
+
throw new Error(this.notLiveMsg());
|
|
518
864
|
// msgID = message id → free server-side dedup across JetStream redelivery.
|
|
519
865
|
await this.js.publish(subject, JSON.stringify(msg), { msgID: msg.id });
|
|
520
866
|
}
|
|
@@ -525,6 +871,28 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
525
871
|
throw new Error("endpoint not started");
|
|
526
872
|
await createSpaceStreams(this.jsm, this.space);
|
|
527
873
|
}
|
|
874
|
+
/**
|
|
875
|
+
* Privileged: write an agent's BOOT durable membership — each `durable`-class channel in its boot
|
|
876
|
+
* subscribe set gets a Plane-3 durable-active record (via {@link durableJoinFor}: cursor capture +
|
|
877
|
+
* activation catch-up), so it receives durable backstop copies from boot exactly like a runtime
|
|
878
|
+
* `durableJoin`. `live`-class (and non-concrete) channels are skipped. Idempotent.
|
|
879
|
+
*
|
|
880
|
+
* Writes the durable RECORDS with the caller's privileged creds — it does NOT require this endpoint
|
|
881
|
+
* to host the runtime fan-out/reader loops (a space-level manager service), so EVERY auth launcher
|
|
882
|
+
* provisions identically: the manager AND the short-lived `cotal spawn` provisioner both write boot
|
|
883
|
+
* records, which the space's manager then delivers (no silent no-op — that would hide a boot
|
|
884
|
+
* membership; AGENTS.md "no fallbacks"). A space running no manager is live-only for everyone (the
|
|
885
|
+
* records exist; nothing delivers them until a manager hosts the loops).
|
|
886
|
+
*/
|
|
887
|
+
async provisionMembership(targetId, channels) {
|
|
888
|
+
for (const ch of channels) {
|
|
889
|
+
if (!isConcreteChannel(ch))
|
|
890
|
+
continue; // durable membership is per-concrete-channel
|
|
891
|
+
if ((await this.deliveryClassFresh(ch)) !== "durable")
|
|
892
|
+
continue;
|
|
893
|
+
await this.durableJoinFor(targetId, ch);
|
|
894
|
+
}
|
|
895
|
+
}
|
|
528
896
|
/**
|
|
529
897
|
* Privileged: pre-create an agent's DM inbox durable (auth mode), so the agent can BIND
|
|
530
898
|
* it without holding CONSUMER.CREATE on DM_<space>. The creator sets the filter to
|
|
@@ -536,6 +904,17 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
536
904
|
const jsm = await this.manager();
|
|
537
905
|
await jsm.consumers.add(dmStream(this.space), dmDurableConfig(this.space, targetId));
|
|
538
906
|
}
|
|
907
|
+
/**
|
|
908
|
+
* Privileged: pre-create an agent's bind-only Plane-3 DELIVER durable (`dlv_<id>`, filtered to
|
|
909
|
+
* `dlv.<id>`), so the agent can BIND its per-member durable handoff without holding CONSUMER.CREATE
|
|
910
|
+
* on the DLV stream. Same bind-only model as {@link provisionDmInbox}: the creator sets the filter,
|
|
911
|
+
* the agent never does. The trusted reader transfers re-authorized copies onto `dlv.<id>`; the agent
|
|
912
|
+
* acks them via native JetStream (SPEC §8). Idempotent. The caller must be permissive on DLV.
|
|
913
|
+
*/
|
|
914
|
+
async provisionDlvInbox(targetId) {
|
|
915
|
+
const jsm = await this.manager();
|
|
916
|
+
await jsm.consumers.add(dlvStream(this.space), dlvDurableConfig(this.space, targetId));
|
|
917
|
+
}
|
|
539
918
|
/**
|
|
540
919
|
* Privileged: pre-create a role's shared TASK work-queue durable (auth mode), so agents
|
|
541
920
|
* of that role can BIND it without holding CONSUMER.CREATE on TASK_<space>. The creator
|
|
@@ -546,6 +925,524 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
546
925
|
const jsm = await this.manager();
|
|
547
926
|
await jsm.consumers.add(taskStream(this.space), taskDurableConfig(this.space, role));
|
|
548
927
|
}
|
|
928
|
+
// ---- Plane-3: durable backstop (SPEC §8) — privileged, manager-hosted ----------------------------
|
|
929
|
+
//
|
|
930
|
+
// Two manager loops + two privileged membership ops. The FAN-OUT writer (routing, not auth) reads
|
|
931
|
+
// every chat message and copies it into each eligible owner's MIXED inbox (`dinbox.<owner>`); the
|
|
932
|
+
// TRUSTED READER (the auth gate) re-authorizes each entry against the CURRENT ACL + membership
|
|
933
|
+
// interval and TRANSFERS the authorized copy to the owner's per-member DELIVER store
|
|
934
|
+
// (`dlv.<owner>`), which the agent binds + acks via native JetStream. The agent holds no read on the
|
|
935
|
+
// mixed store. See `.internal/research/stage4-impl-design.md`.
|
|
936
|
+
/** Lazily open the privileged members registry KV (manager / open-mode self). */
|
|
937
|
+
async membersRegistry() {
|
|
938
|
+
if (!this.nc)
|
|
939
|
+
throw new Error("endpoint not started");
|
|
940
|
+
this.membersKv ??= await openMembersRegistry(this.nc, this.space);
|
|
941
|
+
return this.membersKv;
|
|
942
|
+
}
|
|
943
|
+
/** Privileged: one owner's NON-TOMBSTONED durable memberships as `{channel, generation, activated}` —
|
|
944
|
+
* the manager serves this to a connecting agent (via the `listMemberships` self-service op). The agent
|
|
945
|
+
* hydrates its leave mirror from the ACTIVATED ones (the confirmed backstops), but the non-activated
|
|
946
|
+
* ones are returned too so `leaveChannel` can discover + close a record that still routes under the
|
|
947
|
+
* pure-interval predicate (a crash-stuck pending activation) — without reading the privileged KV. */
|
|
948
|
+
async ownerMemberships(owner) {
|
|
949
|
+
const recs = await listMembers(await this.membersRegistry(), { owner });
|
|
950
|
+
return recs
|
|
951
|
+
.filter((r) => r.leaveCursor === undefined)
|
|
952
|
+
.map((r) => ({ channel: r.channel, generation: r.generation, activated: r.activated === true }));
|
|
953
|
+
}
|
|
954
|
+
/** Effective delivery class read AUTHORITATIVELY from the registry KV (not the watch cache) — so a
|
|
955
|
+
* `live`→`durable` flip is seen by fan-out without a cache-propagation gap (red-team MED-3). */
|
|
956
|
+
async deliveryClassFresh(channel) {
|
|
957
|
+
if (!this.channelKv)
|
|
958
|
+
return effectiveDeliveryClass(undefined, undefined);
|
|
959
|
+
const [cfg, defaults] = await Promise.all([
|
|
960
|
+
isConcreteChannel(channel) ? readChannelConfig(this.channelKv, channel) : Promise.resolve(undefined),
|
|
961
|
+
readChannelDefaults(this.channelKv),
|
|
962
|
+
]);
|
|
963
|
+
return effectiveDeliveryClass(cfg, defaults);
|
|
964
|
+
}
|
|
965
|
+
/** Collision-safe `@mention` → owner-id resolution: a name that resolves to exactly one present
|
|
966
|
+
* peer wins; 0 or >1 matches drop (never fan a directed durable copy to an unrelated same-named
|
|
967
|
+
* bystander — red-team LOW; SPEC §4 unique instance id). */
|
|
968
|
+
resolveOwnerByName(name) {
|
|
969
|
+
const matches = [...this.roster.values()].filter((p) => p.card.name.toLowerCase() === name.toLowerCase());
|
|
970
|
+
return matches.length === 1 ? matches[0].card.id : undefined;
|
|
971
|
+
}
|
|
972
|
+
/** Publish one fan-out entry into an owner's mixed inbox, idempotent via `Nats-Msg-Id`
|
|
973
|
+
* (`<msgId>:<owner>:<generation>`) so a catch-up copy and a racing fan-out copy collapse. */
|
|
974
|
+
async publishDinbox(owner, entry) {
|
|
975
|
+
if (!this.js)
|
|
976
|
+
return;
|
|
977
|
+
await this.js.publish(dinboxSubject(this.space, owner), JSON.stringify(entry), {
|
|
978
|
+
msgID: `${entry.msg.id}:${owner}:${entry.generation}`,
|
|
979
|
+
});
|
|
980
|
+
}
|
|
981
|
+
/** The fan-out consumer's delivered stream-seq — the activation-fence upper bound (red-team
|
|
982
|
+
* BLOCKER-1: the shared fan-out cursor advances independently of the stream frontier). */
|
|
983
|
+
async fanoutDeliveredSeq() {
|
|
984
|
+
const info = await this.consumerInfo(chatStream(this.space), FANOUT_DURABLE);
|
|
985
|
+
return info?.delivered?.stream_seq ?? 0;
|
|
986
|
+
}
|
|
987
|
+
/**
|
|
988
|
+
* Privileged durable-JOIN write (the manager calls this after validating channel ⊆ allowSubscribe;
|
|
989
|
+
* {@link provisionMembership} calls it at provision time for boot channels): capture `joinCursor`,
|
|
990
|
+
* commit a `durable-active` record (CAS + generation bump), then ACTIVATION CATCH-UP idempotently
|
|
991
|
+
* copies `(joinCursor, fence]` into the owner inbox where `fence = max(frontier, fanoutDelivered)` —
|
|
992
|
+
* fan-out owns `seq > fence`. Idempotent against a timeout-retry (an already-activated membership
|
|
993
|
+
* no-ops). Returns `{durable:false}` (honest degrade) only if the catch-up window was evicted.
|
|
994
|
+
*
|
|
995
|
+
* This writes durable KV + dinbox state with the caller's privileged creds; it does NOT require THIS
|
|
996
|
+
* endpoint to host the fan-out/reader loops (those are a space-level manager service). So a
|
|
997
|
+
* short-lived provisioner can write a boot membership a separate long-lived manager then delivers.
|
|
998
|
+
*/
|
|
999
|
+
async durableJoinFor(owner, channel) {
|
|
1000
|
+
if (!this.js)
|
|
1001
|
+
throw new Error("endpoint not started");
|
|
1002
|
+
await this.manager(); // ensure jsm — a non-consuming provisioner inits it lazily; catch-up + fence need it
|
|
1003
|
+
const kv = await this.membersRegistry();
|
|
1004
|
+
const existing = await readMember(kv, channel, owner);
|
|
1005
|
+
const open = existing?.record.state === "durable-active" && existing.record.leaveCursor === undefined;
|
|
1006
|
+
if (open && existing.record.activated)
|
|
1007
|
+
return { durable: true, generation: existing.record.generation }; // fully activated — idempotent
|
|
1008
|
+
// Either a NEW join (no record / a tombstone to supersede) → fresh joinCursor + bumped generation,
|
|
1009
|
+
// OR a retry of an INCOMPLETE activation (durable-active but not yet activated, from an earlier
|
|
1010
|
+
// eviction/crash) → re-run catch-up over the SAME join window, no bump. The record is committed
|
|
1011
|
+
// `activated:false` first and routes IN-INTERVAL immediately (fan-out + reader deliver via the
|
|
1012
|
+
// pure-interval durableEligible) so no live message published during catch-up is lost. `activated`
|
|
1013
|
+
// gates only the REPORT — durableJoin returns true / channelMembers lists the owner only after the
|
|
1014
|
+
// catch-up confirms. A join that never completes catch-up still routes live (harmless: the agent is
|
|
1015
|
+
// live-subscribed and DLV is id-deduped) but honestly reports durable:false and stays hidden.
|
|
1016
|
+
const joinCursor = open ? existing.record.joinCursor : await this.chatFrontier();
|
|
1017
|
+
const generation = open ? existing.record.generation : (existing?.record.generation ?? 0) + 1;
|
|
1018
|
+
const base = {
|
|
1019
|
+
channel, owner, state: "durable-active", joinCursor, generation,
|
|
1020
|
+
activated: false, writerIdentity: this.card.id, updatedAt: Date.now(),
|
|
1021
|
+
};
|
|
1022
|
+
if (!open)
|
|
1023
|
+
await commitMember(kv, base);
|
|
1024
|
+
const fence = Math.max(await this.chatFrontier(), await this.fanoutDeliveredSeq());
|
|
1025
|
+
const cu = await this.catchupCopy(owner, channel, joinCursor, fence, generation);
|
|
1026
|
+
if (cu.evicted) {
|
|
1027
|
+
// Catch-up window irreparably evicted (the oldest in-window message aged out) — this join can never
|
|
1028
|
+
// be a complete backstop. TOMBSTONE the just-committed record at `fence` so it does NOT route:
|
|
1029
|
+
// pure-interval durableEligible would otherwise keep delivering to a record the agent was told is
|
|
1030
|
+
// durable:false AND can't discover to leave (critic BLOCKER-1). Pass `generation` as the expected
|
|
1031
|
+
// generation (ux stale-write guard) so this cleanup can't tombstone a concurrent NEWER rejoin — if
|
|
1032
|
+
// one won, StaleMembershipWrite is the correct no-op (the rejoin is the live record). Then degrade
|
|
1033
|
+
// honestly — a retry is a fresh join (no longer `open`, so a current joinCursor is captured).
|
|
1034
|
+
try {
|
|
1035
|
+
await tombstoneMember(kv, channel, owner, fence, this.card.id, generation);
|
|
1036
|
+
}
|
|
1037
|
+
catch (e) {
|
|
1038
|
+
if (!(e instanceof StaleMembershipWrite))
|
|
1039
|
+
throw e;
|
|
1040
|
+
}
|
|
1041
|
+
return { durable: false, reason: "activation catch-up window partially evicted by retention", generation };
|
|
1042
|
+
}
|
|
1043
|
+
// Flip → reported durable, ATOMICALLY: refuse if a concurrent SAME-generation leave (tombstone) or a
|
|
1044
|
+
// rejoin superseded this pending join while catch-up ran. A blind same-gen commit would clobber the
|
|
1045
|
+
// tombstone (clear leaveCursor) and resurrect the membership, reopening §7 (review-general-2 BLOCKER).
|
|
1046
|
+
const activated = await activateMember(kv, channel, owner, generation, joinCursor);
|
|
1047
|
+
if (!activated)
|
|
1048
|
+
return { durable: false, reason: "activation superseded by a concurrent leave or rejoin", generation };
|
|
1049
|
+
return { durable: true, generation };
|
|
1050
|
+
}
|
|
1051
|
+
/** Privileged durable-LEAVE write: tombstone the membership at `leaveCursor = frontier` so the
|
|
1052
|
+
* backstop denies `seq > leaveCursor` while a pre-leave entry stays deliverable (SPEC §7 interval). */
|
|
1053
|
+
async durableLeaveFor(owner, channel, expectedGeneration) {
|
|
1054
|
+
if (!this.plane3)
|
|
1055
|
+
return; // not a Plane-3 host — no membership to tombstone
|
|
1056
|
+
const kv = await this.membersRegistry();
|
|
1057
|
+
// expectedGeneration (captured by the agent at durableJoin) refuses a stale leave from tombstoning
|
|
1058
|
+
// a newer rejoin (StaleMembershipWrite) — a durable-disable primitive otherwise.
|
|
1059
|
+
await tombstoneMember(kv, channel, owner, await this.chatFrontier(), this.card.id, expectedGeneration);
|
|
1060
|
+
}
|
|
1061
|
+
/** Idempotently copy the eligible chat messages in `(fromSeqExcl, toSeqIncl]` for `channel` into the
|
|
1062
|
+
* owner inbox, via a DEDICATED per-(owner,join) ephemeral consumer (NOT the agent-scoped
|
|
1063
|
+
* `chathist_<id>`/`histLock` — red-team HIGH-8). `evicted` ⇒ the oldest eligible seq aged out under
|
|
1064
|
+
* `discard=Old` (the start seq could not be served), a durable shortfall the caller surfaces. */
|
|
1065
|
+
async catchupCopy(owner, channel, fromSeqExcl, toSeqIncl, generation) {
|
|
1066
|
+
if (!this.js || !this.jsm || toSeqIncl <= fromSeqExcl)
|
|
1067
|
+
return { copied: 0, evicted: false };
|
|
1068
|
+
const subject = chatSubject(this.space, "*", channel);
|
|
1069
|
+
// Eviction = a message in `(joinCursor, …]` on THIS channel's subject aged out under discard=Old.
|
|
1070
|
+
// Judged PER-SUBJECT (reuse channelDropped: oldest-retained-for-subject vs the watermark, only at
|
|
1071
|
+
// the per-subject cap), NOT against the stream-global joinCursor+1 — other channels' traffic
|
|
1072
|
+
// inflates the global seq, so a naive "first delivered seq > joinCursor+1" false-positives on any
|
|
1073
|
+
// busy multi-channel space (impl-review HIGH-2). A true eviction → durableJoin reports durable:false.
|
|
1074
|
+
const evicted = await this.channelDropped(subject, fromSeqExcl);
|
|
1075
|
+
const name = `cu_${token(owner)}_${generation}`;
|
|
1076
|
+
try {
|
|
1077
|
+
await this.jsm.consumers.delete(chatStream(this.space), name);
|
|
1078
|
+
}
|
|
1079
|
+
catch { /* none */ }
|
|
1080
|
+
await this.jsm.consumers.add(chatStream(this.space), {
|
|
1081
|
+
name, filter_subject: subject, ack_policy: AckPolicy.None, mem_storage: true,
|
|
1082
|
+
inactive_threshold: nanos(30_000), deliver_policy: DeliverPolicy.StartSequence, opt_start_seq: fromSeqExcl + 1,
|
|
1083
|
+
});
|
|
1084
|
+
let copied = 0;
|
|
1085
|
+
try {
|
|
1086
|
+
const consumer = await this.js.consumers.get(chatStream(this.space), name);
|
|
1087
|
+
let pending = (await consumer.info()).num_pending;
|
|
1088
|
+
while (pending > 0) {
|
|
1089
|
+
const want = Math.min(pending, 256);
|
|
1090
|
+
const iter = await consumer.fetch({ max_messages: want, expires: 5_000 });
|
|
1091
|
+
let got = 0;
|
|
1092
|
+
for await (const m of iter) {
|
|
1093
|
+
got++;
|
|
1094
|
+
if (m.seq > toSeqIncl)
|
|
1095
|
+
return { copied, evicted };
|
|
1096
|
+
let msg;
|
|
1097
|
+
try {
|
|
1098
|
+
msg = m.json();
|
|
1099
|
+
}
|
|
1100
|
+
catch {
|
|
1101
|
+
continue;
|
|
1102
|
+
}
|
|
1103
|
+
const parsed = parseSubject(m.subject);
|
|
1104
|
+
if (!parsed || msg.from?.id !== parsed.sender || msg.from.id === owner)
|
|
1105
|
+
continue;
|
|
1106
|
+
await this.publishDinbox(owner, { msg, channel, seq: m.seq, reason: "durable-channel", generation });
|
|
1107
|
+
copied++;
|
|
1108
|
+
}
|
|
1109
|
+
if (got < want)
|
|
1110
|
+
break;
|
|
1111
|
+
pending -= got;
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
finally {
|
|
1115
|
+
try {
|
|
1116
|
+
await this.jsm.consumers.delete(chatStream(this.space), name);
|
|
1117
|
+
}
|
|
1118
|
+
catch { /* gone */ }
|
|
1119
|
+
}
|
|
1120
|
+
return { copied, evicted };
|
|
1121
|
+
}
|
|
1122
|
+
/** Start the Plane-3 fan-out writer + trusted reader on THIS (privileged) endpoint. `aclFor` maps an
|
|
1123
|
+
* owner id to its current read ACL for the reader's re-authorization (the manager passes its managed
|
|
1124
|
+
* set). Call once after connect; idempotent durable creation lets it resume on a manager restart. */
|
|
1125
|
+
async startPlane3(aclFor) {
|
|
1126
|
+
if (!this.js)
|
|
1127
|
+
throw new Error("endpoint not started");
|
|
1128
|
+
this.plane3 = { aclFor };
|
|
1129
|
+
await this.armPlane3();
|
|
1130
|
+
}
|
|
1131
|
+
/** (Re)bind the Plane-3 fan-out writer + trusted reader. Idempotent — the durables resume from their
|
|
1132
|
+
* cursor. Called by {@link startPlane3} once AND by {@link connectAndBind} on every (re)connect, so
|
|
1133
|
+
* a manager-endpoint reconnect RE-ARMS the backstop. Without this, a broker blip would silently kill
|
|
1134
|
+
* the loops while `durableJoinFor` kept reporting `durable:true` (the impl-review's BLOCKER-1). No-op
|
|
1135
|
+
* unless this endpoint hosts Plane-3 (`this.plane3` set). */
|
|
1136
|
+
async armPlane3() {
|
|
1137
|
+
if (!this.plane3 || !this.js)
|
|
1138
|
+
return;
|
|
1139
|
+
await this.manager(); // the manager runs consume:false, so this.jsm is lazy — ensure it
|
|
1140
|
+
await this.runFanout();
|
|
1141
|
+
await this.runReader();
|
|
1142
|
+
}
|
|
1143
|
+
/** Fan-out loop: bind the privileged `fanout` durable on CHAT and route each message (routing only —
|
|
1144
|
+
* the trusted reader is the auth gate). */
|
|
1145
|
+
async runFanout() {
|
|
1146
|
+
if (!this.js || !this.jsm)
|
|
1147
|
+
return;
|
|
1148
|
+
try {
|
|
1149
|
+
await this.jsm.consumers.add(chatStream(this.space), fanoutDurableConfig(this.space, { ackWaitMs: this.ackWaitMs }));
|
|
1150
|
+
}
|
|
1151
|
+
catch { /* exists */ }
|
|
1152
|
+
const consumer = await this.js.consumers.get(chatStream(this.space), FANOUT_DURABLE);
|
|
1153
|
+
const msgs = await consumer.consume();
|
|
1154
|
+
this.streamMsgs.push(msgs);
|
|
1155
|
+
void (async () => {
|
|
1156
|
+
for await (const m of msgs) {
|
|
1157
|
+
try {
|
|
1158
|
+
await this.fanOutMessage(m);
|
|
1159
|
+
}
|
|
1160
|
+
catch (e) {
|
|
1161
|
+
if (!this.stopped)
|
|
1162
|
+
this.emit("error", e);
|
|
1163
|
+
try {
|
|
1164
|
+
m.nak();
|
|
1165
|
+
}
|
|
1166
|
+
catch { /* draining */ }
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
})().catch((e) => { if (!this.stopped)
|
|
1170
|
+
this.emit("error", e); });
|
|
1171
|
+
}
|
|
1172
|
+
/** Route ONE chat message to eligible owners' mixed inboxes. `durable` channel → its `durable-active`
|
|
1173
|
+
* members within interval; `live` channel → `@mention` targets authorized to read it (ACL only).
|
|
1174
|
+
* Members KV is scanned FRESH per message (no cache — red-team BLOCKER-1 catch-up correctness). */
|
|
1175
|
+
async fanOutMessage(m) {
|
|
1176
|
+
const parsed = parseSubject(m.subject);
|
|
1177
|
+
if (!parsed || parsed.kind !== "chat") {
|
|
1178
|
+
m.ack();
|
|
1179
|
+
return;
|
|
1180
|
+
}
|
|
1181
|
+
const channel = parsed.rest;
|
|
1182
|
+
let msg;
|
|
1183
|
+
try {
|
|
1184
|
+
msg = m.json();
|
|
1185
|
+
}
|
|
1186
|
+
catch {
|
|
1187
|
+
m.ack();
|
|
1188
|
+
return;
|
|
1189
|
+
}
|
|
1190
|
+
if (!msg.from || msg.from.id !== parsed.sender) {
|
|
1191
|
+
m.ack();
|
|
1192
|
+
return;
|
|
1193
|
+
} // authenticity
|
|
1194
|
+
const seq = m.seq;
|
|
1195
|
+
if ((await this.deliveryClassFresh(channel)) === "durable") {
|
|
1196
|
+
for (const rec of await listMembers(await this.membersRegistry(), { channel })) {
|
|
1197
|
+
if (rec.owner === msg.from.id)
|
|
1198
|
+
continue; // never backstop the sender's own post
|
|
1199
|
+
if (!durableEligible(rec, seq))
|
|
1200
|
+
continue; // routing fast-filter (reader re-checks)
|
|
1201
|
+
await this.publishDinbox(rec.owner, { msg, channel, seq, reason: "durable-channel", generation: rec.generation });
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
else {
|
|
1205
|
+
for (const name of msg.mentions ?? []) {
|
|
1206
|
+
const owner = this.resolveOwnerByName(name);
|
|
1207
|
+
if (!owner || owner === msg.from.id)
|
|
1208
|
+
continue;
|
|
1209
|
+
const acl = this.plane3?.aclFor(owner);
|
|
1210
|
+
if (!acl || !channelInAllow(acl, channel))
|
|
1211
|
+
continue; // @mention can't bypass the read ACL
|
|
1212
|
+
await this.publishDinbox(owner, { msg, channel, seq, reason: "live-mention", generation: 0 });
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
m.ack();
|
|
1216
|
+
}
|
|
1217
|
+
/** Trusted-reader loop: bind the single privileged `reader` durable over `dinbox.>` and re-authorize
|
|
1218
|
+
* + transfer each entry. */
|
|
1219
|
+
async runReader() {
|
|
1220
|
+
if (!this.js || !this.jsm)
|
|
1221
|
+
return;
|
|
1222
|
+
try {
|
|
1223
|
+
await this.jsm.consumers.add(inboxStream(this.space), inboxReaderConfig(this.space, { ackWaitMs: this.ackWaitMs }));
|
|
1224
|
+
}
|
|
1225
|
+
catch { /* exists */ }
|
|
1226
|
+
const consumer = await this.js.consumers.get(inboxStream(this.space), INBOX_READER_DURABLE);
|
|
1227
|
+
const msgs = await consumer.consume();
|
|
1228
|
+
this.streamMsgs.push(msgs);
|
|
1229
|
+
void (async () => {
|
|
1230
|
+
for await (const m of msgs) {
|
|
1231
|
+
try {
|
|
1232
|
+
await this.readerHandle(m);
|
|
1233
|
+
}
|
|
1234
|
+
catch (e) {
|
|
1235
|
+
if (!this.stopped)
|
|
1236
|
+
this.emit("error", e);
|
|
1237
|
+
try {
|
|
1238
|
+
m.nak();
|
|
1239
|
+
}
|
|
1240
|
+
catch { /* draining */ }
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
})().catch((e) => { if (!this.stopped)
|
|
1244
|
+
this.emit("error", e); });
|
|
1245
|
+
}
|
|
1246
|
+
/** Re-authorize ONE mixed-inbox entry and transfer it to the owner's DELIVER store. Deny (drop) on a
|
|
1247
|
+
* revoked/narrowed ACL or out-of-interval seq; on transfer success, ack the mixed entry (durability
|
|
1248
|
+
* has moved to DLV — an §8 equivalent per-member at-least-once mechanism). The agent acks DLV. */
|
|
1249
|
+
async readerHandle(m) {
|
|
1250
|
+
const owner = parseDinboxOwner(m.subject);
|
|
1251
|
+
if (!owner) {
|
|
1252
|
+
m.ack();
|
|
1253
|
+
return;
|
|
1254
|
+
} // unparseable subject — not a real entry
|
|
1255
|
+
let entry;
|
|
1256
|
+
try {
|
|
1257
|
+
entry = m.json();
|
|
1258
|
+
}
|
|
1259
|
+
catch {
|
|
1260
|
+
m.ack();
|
|
1261
|
+
return;
|
|
1262
|
+
} // undecodable — drop
|
|
1263
|
+
const redeliveries = m.info?.deliveryCount ?? 1; // JsMsg delivery attempts (1 on first delivery)
|
|
1264
|
+
const acl = this.plane3?.aclFor(owner);
|
|
1265
|
+
if (acl === undefined) {
|
|
1266
|
+
// UNKNOWN owner — the manager has not (re)hydrated this owner's ACL yet (e.g. right after a
|
|
1267
|
+
// manager PROCESS restart). This is NOT a revocation: DEFER (redeliver), never drop — an ack here
|
|
1268
|
+
// would lose at-least-once on restart (impl-review BLOCKER-2). A delayed nak + a redelivery
|
|
1269
|
+
// ceiling stops one perma-unknown owner from head-of-lining the shared reader.
|
|
1270
|
+
// (Follow-up: the manager does not yet rehydrate its managed set across a process restart — until
|
|
1271
|
+
// it does, a long-unknown owner's entries term after the ceiling; tracked, not a silent ack-drop.)
|
|
1272
|
+
if (redeliveries >= READER_MAX_REDELIVERIES) {
|
|
1273
|
+
m.term();
|
|
1274
|
+
this.emit("error", new Error(`plane-3 reader: gave up on entry for unknown owner ${owner} after ${redeliveries} redeliveries`));
|
|
1275
|
+
return;
|
|
1276
|
+
}
|
|
1277
|
+
m.nak(2000);
|
|
1278
|
+
return;
|
|
1279
|
+
}
|
|
1280
|
+
// KNOWN owner whose CURRENT ACL no longer covers the channel — a revocation/narrowing. Drop: the
|
|
1281
|
+
// entry is no longer authorized (SPEC §7 current-ACL gate before surfacing).
|
|
1282
|
+
if (!channelInAllow(acl, entry.channel)) {
|
|
1283
|
+
m.ack();
|
|
1284
|
+
return;
|
|
1285
|
+
}
|
|
1286
|
+
if (entry.reason === "durable-channel") {
|
|
1287
|
+
const rec = await readMember(await this.membersRegistry(), entry.channel, owner);
|
|
1288
|
+
// INTERVAL re-auth (not a current-member boolean): a pre-leave entry (seq ≤ leaveCursor) stays
|
|
1289
|
+
// deliverable; seq > leaveCursor (or after a rejoin's newer joinCursor) is the hard cut.
|
|
1290
|
+
if (!rec || !durableEligible(rec.record, entry.seq)) {
|
|
1291
|
+
m.ack();
|
|
1292
|
+
return;
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
try {
|
|
1296
|
+
await this.js.publish(dlvSubject(this.space, owner), JSON.stringify(entry.msg), {
|
|
1297
|
+
msgID: `${entry.msg.id}:${owner}:${entry.generation}`,
|
|
1298
|
+
});
|
|
1299
|
+
}
|
|
1300
|
+
catch {
|
|
1301
|
+
// Transfer failed — keep the entry pending (redeliver), bounded by the same ceiling so a poison
|
|
1302
|
+
// entry can't head-of-line the shared reader forever.
|
|
1303
|
+
if (redeliveries >= READER_MAX_REDELIVERIES) {
|
|
1304
|
+
m.term();
|
|
1305
|
+
this.emit("error", new Error(`plane-3 reader: gave up transferring ${entry.msg.id} for ${owner} after ${redeliveries} redeliveries`));
|
|
1306
|
+
return;
|
|
1307
|
+
}
|
|
1308
|
+
m.nak(2000);
|
|
1309
|
+
return;
|
|
1310
|
+
}
|
|
1311
|
+
m.ack();
|
|
1312
|
+
}
|
|
1313
|
+
/** Agent-side: bind + pump our pre-created Plane-3 DELIVER durable (`dlv_<id>`). Every message here is
|
|
1314
|
+
* manager-written (DLV is manager-write-only, broker-enforced) and is a CHANNEL message by contract
|
|
1315
|
+
* (the backstop never carries DMs), so `kind=channel` is path-derived (SPEC §4) and the body is
|
|
1316
|
+
* trusted (no spoof-guard). `durable:true` — real JetStream ack, coalesced with the core-sub live
|
|
1317
|
+
* copy by `MeshAgent.ingest`. No-op when the durable isn't present (open mode / not provisioned). */
|
|
1318
|
+
async pumpDlv() {
|
|
1319
|
+
if (!this.js)
|
|
1320
|
+
return;
|
|
1321
|
+
let consumer;
|
|
1322
|
+
try {
|
|
1323
|
+
consumer = await this.js.consumers.get(dlvStream(this.space), dlvDurable(this.card.id));
|
|
1324
|
+
}
|
|
1325
|
+
catch {
|
|
1326
|
+
return;
|
|
1327
|
+
} // no DLV durable — Plane-3 not active for us
|
|
1328
|
+
const msgs = await consumer.consume();
|
|
1329
|
+
this.streamMsgs.push(msgs);
|
|
1330
|
+
void (async () => {
|
|
1331
|
+
for await (const m of msgs) {
|
|
1332
|
+
let msg;
|
|
1333
|
+
try {
|
|
1334
|
+
msg = m.json();
|
|
1335
|
+
}
|
|
1336
|
+
catch (e) {
|
|
1337
|
+
this.emit("error", e);
|
|
1338
|
+
try {
|
|
1339
|
+
m.term();
|
|
1340
|
+
}
|
|
1341
|
+
catch { /* draining */ }
|
|
1342
|
+
continue;
|
|
1343
|
+
}
|
|
1344
|
+
if (msg.from?.id === this.card.id) {
|
|
1345
|
+
m.ack();
|
|
1346
|
+
continue;
|
|
1347
|
+
} // own echo (defensive)
|
|
1348
|
+
const delivery = { ack: () => m.ack(), nak: () => m.nak(), durable: true };
|
|
1349
|
+
this.emit("message", msg, delivery, { historical: false, kind: "channel" });
|
|
1350
|
+
}
|
|
1351
|
+
})().catch((e) => { if (!this.stopped)
|
|
1352
|
+
this.emit("error", e); });
|
|
1353
|
+
}
|
|
1354
|
+
/** Agent-side: request a Plane-3 durable backstop for a channel via the manager (ctl.self). Throws
|
|
1355
|
+
* when no privileged writer is present (open / manager-less). 30s timeout — activation catch-up may
|
|
1356
|
+
* run before the reply (the window is small, but a busy channel can take more than the 5s default). */
|
|
1357
|
+
async durableJoinChannel(channel) {
|
|
1358
|
+
const reply = await this.requestControl(CONTROL_SELF_SERVICE, { op: "durableJoin", args: { channel } }, 30_000);
|
|
1359
|
+
if (!reply.ok)
|
|
1360
|
+
throw new Error(reply.error ?? "durable join rejected");
|
|
1361
|
+
return reply.data ?? { durable: false };
|
|
1362
|
+
}
|
|
1363
|
+
/** Agent-side: release a Plane-3 durable backstop (tombstone membership at the leave cursor). Passes
|
|
1364
|
+
* the join generation so a stale leave can't tombstone a newer rejoin (the manager validates it). */
|
|
1365
|
+
async durableLeaveChannel(channel, generation) {
|
|
1366
|
+
const reply = await this.requestControl(CONTROL_SELF_SERVICE, { op: "durableLeave", args: { channel, generation } });
|
|
1367
|
+
if (!reply.ok)
|
|
1368
|
+
throw new Error(reply.error ?? "durable leave rejected");
|
|
1369
|
+
}
|
|
1370
|
+
/** Fail-closed async cleanup for a channel forced out by a LATE sub.allow refusal (the broker revoked
|
|
1371
|
+
* the live read). The sync sub callback can't await, so this RETRIES the Plane-3 tombstone with capped
|
|
1372
|
+
* backoff UNTIL IT SUCCEEDS (or the endpoint stops) — the §7 boundary always closes once the manager
|
|
1373
|
+
* is reachable, never a silent give-up. While pending, the channel is tracked in
|
|
1374
|
+
* {@link pendingDurableLeave} and surfaced via {@link pendingDurableLeaves} (the connector shows it in
|
|
1375
|
+
* `cotal_channels` as `durable-unclosed`, never ordinary absence). The generation is kept the whole
|
|
1376
|
+
* time. Authoritative closure of a revoked membership is also the manager's job (revocation). */
|
|
1377
|
+
async closeRefusedMembership(channel, generation) {
|
|
1378
|
+
this.pendingDurableLeave.set(channel, generation);
|
|
1379
|
+
for (let attempt = 0;; attempt++) {
|
|
1380
|
+
if (this.stopped)
|
|
1381
|
+
return;
|
|
1382
|
+
try {
|
|
1383
|
+
await this.durableLeaveChannel(channel, generation);
|
|
1384
|
+
this.plane3Channels.delete(channel);
|
|
1385
|
+
this.pendingDurableLeave.delete(channel);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
catch (e) {
|
|
1389
|
+
if (attempt === 0)
|
|
1390
|
+
this.emit("error", new Error(`channel "${channel}": Plane-3 durable membership (generation ${generation}) not yet tombstoned after a refused live sub — retrying; §7 boundary may be open until it succeeds (${e.message})`));
|
|
1391
|
+
await new Promise((r) => setTimeout(r, Math.min(30_000, 1000 * 2 ** attempt)));
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
/** Channels with a Plane-3 durable membership whose §7 tombstone is still pending after a refused live
|
|
1396
|
+
* sub (see {@link closeRefusedMembership}) — surfaced by the connector as a `durable-unclosed` state so
|
|
1397
|
+
* it is never presented as ordinary "not subscribed". */
|
|
1398
|
+
pendingDurableLeaves() {
|
|
1399
|
+
return [...this.pendingDurableLeave.keys()];
|
|
1400
|
+
}
|
|
1401
|
+
/** A control request that found NO responder — open / manager-less (no privileged control plane),
|
|
1402
|
+
* distinct from a responder that errored. nats.js surfaces it as NoRespondersError, or a RequestError
|
|
1403
|
+
* whose `isNoResponders()` is true. */
|
|
1404
|
+
isNoResponders(e) {
|
|
1405
|
+
return e instanceof NoRespondersError || (e instanceof RequestError && e.isNoResponders());
|
|
1406
|
+
}
|
|
1407
|
+
/** Agent-side: this session's CURRENT durable memberships (channel + join generation) from the
|
|
1408
|
+
* manager — the agent holds no read on the privileged members KV. `undefined` ⇒ NO control responder
|
|
1409
|
+
* (open / manager-less, so there is no Plane-3 and no memberships). THROWS on a responder-present RPC
|
|
1410
|
+
* failure, so a caller can FAIL-CLOSED rather than mistaking a transient error for "no membership". */
|
|
1411
|
+
async fetchMemberships() {
|
|
1412
|
+
let reply;
|
|
1413
|
+
try {
|
|
1414
|
+
reply = await this.requestControl(CONTROL_SELF_SERVICE, { op: "listMemberships", args: {} }, 5_000);
|
|
1415
|
+
}
|
|
1416
|
+
catch (e) {
|
|
1417
|
+
if (this.isNoResponders(e))
|
|
1418
|
+
return undefined; // no manager — open / manager-less, no Plane-3
|
|
1419
|
+
throw e; // responder present but errored — surface it (leaveChannel fails closed)
|
|
1420
|
+
}
|
|
1421
|
+
if (!reply.ok)
|
|
1422
|
+
throw new Error(reply.error ?? "listMemberships failed");
|
|
1423
|
+
return reply.data?.memberships ?? [];
|
|
1424
|
+
}
|
|
1425
|
+
/** Agent-side: seed `plane3Channels` with this session's boot durable memberships + generations on
|
|
1426
|
+
* first connect (the agent holds no read on the privileged members KV). A best-effort OPTIMIZATION: it
|
|
1427
|
+
* pre-fills the leave-generation mirror + the durable-state surface. If it can't (a transient manager
|
|
1428
|
+
* error), {@link leaveChannel} re-resolves the generation on demand and fails closed there — so a
|
|
1429
|
+
* missed hydration never silently leaves a boot durable channel untombstonable. */
|
|
1430
|
+
async hydrateMemberships() {
|
|
1431
|
+
let memberships;
|
|
1432
|
+
try {
|
|
1433
|
+
memberships = await this.fetchMemberships();
|
|
1434
|
+
}
|
|
1435
|
+
catch {
|
|
1436
|
+
return; // transient manager error at boot — leaveChannel re-resolves on demand (fail-closed there)
|
|
1437
|
+
}
|
|
1438
|
+
if (!memberships)
|
|
1439
|
+
return; // no manager — live-only
|
|
1440
|
+
// Seed the mirror (+ durable-state surface) with CONFIRMED backstops only; leaveChannel re-resolves a
|
|
1441
|
+
// non-activated record on demand if it ever needs to close one.
|
|
1442
|
+
for (const m of memberships)
|
|
1443
|
+
if (m.activated && this.channels.includes(m.channel))
|
|
1444
|
+
this.plane3Channels.set(m.channel, m.generation);
|
|
1445
|
+
}
|
|
549
1446
|
/** Lazily obtain a JetStream manager — so a non-consuming endpoint (e.g. the supervisor,
|
|
550
1447
|
* consume:false) can still pre-create others' durables. */
|
|
551
1448
|
async manager() {
|
|
@@ -559,8 +1456,6 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
559
1456
|
if (!this.jsm)
|
|
560
1457
|
throw new Error("endpoint not started");
|
|
561
1458
|
const id = this.card.id;
|
|
562
|
-
const ack_wait = nanos(this.ackWaitMs);
|
|
563
|
-
const inactive_threshold = nanos(this.inactiveThresholdMs);
|
|
564
1459
|
// Unicast: this instance's private DM inbox. Open mode self-creates; auth mode BINDS a
|
|
565
1460
|
// durable the provisioner pre-created (agents are denied CONSUMER.CREATE on DM_<space>,
|
|
566
1461
|
// since the create-time filter_subject is the attack surface — see provisionDmInbox).
|
|
@@ -571,48 +1466,36 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
571
1466
|
}));
|
|
572
1467
|
}
|
|
573
1468
|
await this.pump(dmStream(this.space), dmDurable(id));
|
|
574
|
-
//
|
|
575
|
-
//
|
|
576
|
-
//
|
|
1469
|
+
// Plane-3 (SPEC §8): bind + pump our per-member DELIVER durable (`dlv_<id>`) — the re-authorized
|
|
1470
|
+
// durable-backstop channel copies the trusted reader transfers to us. No-op when it isn't present
|
|
1471
|
+
// (open mode / un-provisioned). Auth-only feature; the pump self-guards on the durable's existence.
|
|
1472
|
+
await this.pumpDlv();
|
|
1473
|
+
// Multicast: open a native CORE subscription for each channel (live, manager-free, broker-enforced
|
|
1474
|
+
// by sub.allow) — boot + runtime joins use the SAME path; there is no per-instance chat durable.
|
|
1475
|
+
// The durable backstop (a busy/offline turn) is Plane-3 (auth: membership written at provision, the
|
|
1476
|
+
// manager's fan-out writer + trusted reader deliver via the `dlv_<id>` pump above; open dev mode is
|
|
1477
|
+
// live-only — the durable plane needs the manager's trusted reader, the security boundary). Per-
|
|
1478
|
+
// channel history is the explicit replay-gated backfill, on FIRST connect only; a reconnect reopens
|
|
1479
|
+
// the subs without re-backfilling (the durable backstop redelivers any missed window via dlv).
|
|
577
1480
|
if (this.channels.length) {
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
ack_wait,
|
|
589
|
-
deliver_policy: DeliverPolicy.New,
|
|
590
|
-
inactive_threshold,
|
|
591
|
-
});
|
|
592
|
-
// Arm the tail-drop watermarks BEFORE pump starts, so the tail can never deliver a
|
|
593
|
-
// just-created channel's message un-watermarked (which would double-emit: live + backfill).
|
|
594
|
-
const armed = await this.armJoin(this.channels);
|
|
595
|
-
await this.pump(chatStream(this.space), durable);
|
|
1481
|
+
// Arm the per-channel join watermarks BEFORE opening the subs: the backfill reads <= frontier and
|
|
1482
|
+
// the core-sub delivers > frontier, so they never overlap (first connect). On reconnect we reopen
|
|
1483
|
+
// without arming/backfilling.
|
|
1484
|
+
const armed = this.firstConnect ? await this.armJoin(this.channels) : undefined;
|
|
1485
|
+
for (const ch of this.channels)
|
|
1486
|
+
this.subscribeChat(ch);
|
|
1487
|
+
await this.confirmChatSub();
|
|
1488
|
+
for (const ch of this.channels)
|
|
1489
|
+
this.confirmingChatSubs.delete(chatSubject(this.space, "*", ch));
|
|
1490
|
+
if (armed)
|
|
596
1491
|
await this.backfillArmed(armed);
|
|
597
|
-
}
|
|
598
|
-
else {
|
|
599
|
-
// Rebind: reconcile the durable's filter to the CURRENT config (a config that changed
|
|
600
|
-
// between restarts is honored). Channels the config GAINED are backfilled like a fresh
|
|
601
|
-
// join; channels it LOST are dropped from the filter. An unchanged config = pure resume,
|
|
602
|
-
// empty diff, no re-replay.
|
|
603
|
-
await this.pump(chatStream(this.space), durable);
|
|
604
|
-
const haveFilters = info.config.filter_subjects ?? (info.config.filter_subject ? [info.config.filter_subject] : []);
|
|
605
|
-
// Channels the config gained = those not already covered by the durable's filters (a
|
|
606
|
-
// wildcard already covers its sub-channels). Backfill only those.
|
|
607
|
-
const gained = this.channels.filter((c) => !haveFilters.some((f) => subjectMatches(f, chatSubject(this.space, "*", c))));
|
|
608
|
-
// Arm watermarks for the gained channels BEFORE the filter reconcile flips them on.
|
|
609
|
-
const armed = gained.length ? await this.armJoin(gained) : undefined;
|
|
610
|
-
if (!sameSet(haveFilters, want))
|
|
611
|
-
await this.jsm.consumers.update(chatStream(this.space), durable, { filter_subjects: want });
|
|
612
|
-
if (armed)
|
|
613
|
-
await this.backfillArmed(armed);
|
|
614
|
-
}
|
|
615
1492
|
}
|
|
1493
|
+
// First connect, auth mode: hydrate the local generation mirror for BOOT durable memberships (the
|
|
1494
|
+
// manager provisioned them server-side, so they are not in plane3Channels yet) — without it,
|
|
1495
|
+
// leaving a boot durable channel could not tombstone its §7 boundary. Open mode has no Plane-3.
|
|
1496
|
+
if (this.firstConnect && this.creds && this.channels.length)
|
|
1497
|
+
await this.hydrateMemberships();
|
|
1498
|
+
this.firstConnect = false;
|
|
616
1499
|
// Anycast: a shared work-queue consumer for our role — one instance grabs each task.
|
|
617
1500
|
// Open mode self-creates; auth mode BINDS the provisioner-pre-created svc_<role>
|
|
618
1501
|
// durable (agents are denied CONSUMER.CREATE on TASK_<space>, since the create-time
|
|
@@ -667,8 +1550,14 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
667
1550
|
m.ack();
|
|
668
1551
|
continue;
|
|
669
1552
|
}
|
|
1553
|
+
// No pre-commit dedup here: the durable is the at-least-once path, so it must NEVER ack a copy
|
|
1554
|
+
// just because an id was "seen" — that would drop an unhandled message (the security/critic
|
|
1555
|
+
// HIGH). Steady state is single-path (coverage-partition: the core-sub drops durable-covered
|
|
1556
|
+
// channels). The only overlap is the brief live-first transition window, and a duplicate there
|
|
1557
|
+
// is coalesced downstream by the receiver's commit-aware id-dedup (MeshAgent.ingest keeps ONE
|
|
1558
|
+
// entry and takes THIS durable ack handle) — so the durable copy is acked only once handled.
|
|
670
1559
|
}
|
|
671
|
-
const delivery = { ack: () => m.ack(), nak: () => m.nak() };
|
|
1560
|
+
const delivery = { ack: () => m.ack(), nak: () => m.nak(), durable: true };
|
|
672
1561
|
this.emit("message", msg, delivery, {
|
|
673
1562
|
historical: false,
|
|
674
1563
|
kind: kindFromParsed(parsed.kind),
|
|
@@ -679,6 +1568,98 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
679
1568
|
this.emit("error", e);
|
|
680
1569
|
});
|
|
681
1570
|
}
|
|
1571
|
+
/** Open a native core subscription to a channel's live feed (the manager-free live read path,
|
|
1572
|
+
* broker-enforced by `sub.allow`). At-most-once — no replay, no ack; it is the live delivery for
|
|
1573
|
+
* every channel (boot + runtime). For a `durable` channel it is also the low-latency wake-hint
|
|
1574
|
+
* alongside the Plane-3 durable copy, coalesced by the receiver's id-dedup. Drops our own echo +
|
|
1575
|
+
* spoofed senders. */
|
|
1576
|
+
subscribeChat(channel) {
|
|
1577
|
+
if (!this.nc || this.chatSubs.has(channel))
|
|
1578
|
+
return;
|
|
1579
|
+
this.chatSubDenied.delete(channel);
|
|
1580
|
+
const subject = chatSubject(this.space, "*", channel);
|
|
1581
|
+
this.confirmingChatSubs.add(subject);
|
|
1582
|
+
const sub = this.nc.subscribe(subject, {
|
|
1583
|
+
callback: (err, m) => {
|
|
1584
|
+
if (err) {
|
|
1585
|
+
// async sub.allow refusal (or sub error): the live feed for this channel is dead — never a
|
|
1586
|
+
// leak (the broker refused it). Drop the channel from local joined state even if it was
|
|
1587
|
+
// already treated as joined — a LATE refusal beyond the confirm window: conformance #13
|
|
1588
|
+
// "drop on late refusal". (During the join's own confirm the channel isn't pushed yet, so
|
|
1589
|
+
// this fires nothing then; joinChannel reads `chatSubDenied` and throws cleanly.)
|
|
1590
|
+
this.chatSubDenied.add(channel);
|
|
1591
|
+
this.chatSubs.delete(channel);
|
|
1592
|
+
// NOTE: do NOT remove `subject` from confirmingChatSubs here — that set gates watchStatus's
|
|
1593
|
+
// suppression of this expected violation, and is cleared by joinChannel after confirm (or by
|
|
1594
|
+
// unsubscribeChat). Removing it in the callback races the watcher and leaks a spurious error.
|
|
1595
|
+
const i = this.channels.indexOf(channel);
|
|
1596
|
+
if (i >= 0) {
|
|
1597
|
+
this.channels.splice(i, 1);
|
|
1598
|
+
this.joinSeq.delete(channel);
|
|
1599
|
+
// A late sub.allow refusal forces this agent out of the channel (the broker revoked its live
|
|
1600
|
+
// read). If it held a Plane-3 durable membership, the §7 boundary must close too. This sub
|
|
1601
|
+
// callback can't await, so a fail-closed async helper RETRIES the tombstone (backoff) UNTIL it
|
|
1602
|
+
// succeeds, clearing the mirror only then; while pending it is surfaced via cotal_channels —
|
|
1603
|
+
// never a silent drop, never lost retry state.
|
|
1604
|
+
const gen = this.plane3Channels.get(channel);
|
|
1605
|
+
if (gen !== undefined)
|
|
1606
|
+
void this.closeRefusedMembership(channel, gen);
|
|
1607
|
+
this.emit("error", new Error(`left channel "${channel}": its live subscription was refused by the broker`));
|
|
1608
|
+
}
|
|
1609
|
+
return;
|
|
1610
|
+
}
|
|
1611
|
+
const parsed = parseSubject(m.subject);
|
|
1612
|
+
if (!parsed || parsed.kind !== "chat")
|
|
1613
|
+
return;
|
|
1614
|
+
let msg;
|
|
1615
|
+
try {
|
|
1616
|
+
msg = m.json();
|
|
1617
|
+
}
|
|
1618
|
+
catch (e) {
|
|
1619
|
+
this.emit("error", e);
|
|
1620
|
+
return;
|
|
1621
|
+
}
|
|
1622
|
+
if (!msg.from || msg.from.id !== parsed.sender)
|
|
1623
|
+
return; // spoof/malformed — drop (at-most-once)
|
|
1624
|
+
if (msg.from.id === this.card.id)
|
|
1625
|
+
return; // our own echo
|
|
1626
|
+
const delivery = { ack: () => { }, nak: () => { }, durable: false }; // live = at-most-once, not acked
|
|
1627
|
+
this.emit("message", msg, delivery, {
|
|
1628
|
+
historical: false,
|
|
1629
|
+
kind: kindFromParsed(parsed.kind),
|
|
1630
|
+
});
|
|
1631
|
+
},
|
|
1632
|
+
});
|
|
1633
|
+
this.chatSubs.set(channel, sub);
|
|
1634
|
+
}
|
|
1635
|
+
/** Close a channel's core subscription (manager-free leave). */
|
|
1636
|
+
unsubscribeChat(channel) {
|
|
1637
|
+
this.confirmingChatSubs.delete(chatSubject(this.space, "*", channel));
|
|
1638
|
+
const sub = this.chatSubs.get(channel);
|
|
1639
|
+
if (sub) {
|
|
1640
|
+
try {
|
|
1641
|
+
sub.unsubscribe();
|
|
1642
|
+
}
|
|
1643
|
+
catch {
|
|
1644
|
+
/* closing with the connection */
|
|
1645
|
+
}
|
|
1646
|
+
this.chatSubs.delete(channel);
|
|
1647
|
+
}
|
|
1648
|
+
this.chatSubDenied.delete(channel);
|
|
1649
|
+
}
|
|
1650
|
+
/** Confirm a just-opened core subscription was accepted by the broker. A `sub.allow` violation is
|
|
1651
|
+
* async in NATS, so flush (round-trips the SUB) then settle briefly to let the refusal land — a
|
|
1652
|
+
* denied subscribe must not read as a successful join (SPEC conformance #13). */
|
|
1653
|
+
async confirmChatSub() {
|
|
1654
|
+
if (!this.nc)
|
|
1655
|
+
throw new Error("connection not established");
|
|
1656
|
+
// flush() is the deterministic boundary: the broker's -ERR for an out-of-ACL SUB arrives BEFORE the
|
|
1657
|
+
// PONG, so once flush resolves the subscribe callback has already recorded any denial. A flush
|
|
1658
|
+
// FAILURE means the connection drained/closed mid-join — we have no confirmation, so let it throw
|
|
1659
|
+
// (joinChannel fails closed) instead of swallowing it and continuing as if confirmed.
|
|
1660
|
+
await this.nc.flush();
|
|
1661
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
1662
|
+
}
|
|
682
1663
|
/** The highest join watermark among the joined subscriptions that cover `concreteChannel`
|
|
683
1664
|
* (a wildcard sub like `team.>` covers `team.backend`), or undefined if none — the tail
|
|
684
1665
|
* drops a chat message with `seq <= ` this. */
|
|
@@ -709,8 +1690,8 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
709
1690
|
return (await this.jsm.streams.info(chatStream(this.space))).state.last_seq;
|
|
710
1691
|
}
|
|
711
1692
|
/** Phase 1 of a join — arm each channel's tail-drop watermark at the current frontier. MUST run
|
|
712
|
-
* BEFORE the
|
|
713
|
-
*
|
|
1693
|
+
* BEFORE opening the core subscription so the live tail can never carry a just-joined message
|
|
1694
|
+
* un-watermarked — which would double-emit it (live + backfill).
|
|
714
1695
|
* Returns the per-channel frontiers for {@link backfillArmed}. */
|
|
715
1696
|
async armJoin(channels) {
|
|
716
1697
|
const frontiers = new Map();
|
|
@@ -738,73 +1719,122 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
738
1719
|
async joinPolicyFresh(channel) {
|
|
739
1720
|
if (!this.channelKv)
|
|
740
1721
|
return { replay: effectiveReplay(undefined, undefined) };
|
|
1722
|
+
// A wildcard subscription (`review.>`) has no single registry entry — and `>`/`*` are illegal
|
|
1723
|
+
// KV keys, so a per-channel get would throw. Read only the space defaults for it; concrete
|
|
1724
|
+
// channels still get their per-channel override.
|
|
741
1725
|
const [cfg, defaults] = await Promise.all([
|
|
742
|
-
readChannelConfig(this.channelKv, channel),
|
|
1726
|
+
isConcreteChannel(channel) ? readChannelConfig(this.channelKv, channel) : Promise.resolve(undefined),
|
|
743
1727
|
readChannelDefaults(this.channelKv),
|
|
744
1728
|
]);
|
|
745
1729
|
return { replay: effectiveReplay(cfg, defaults), windowMs: effectiveReplayWindowMs(cfg, defaults) };
|
|
746
1730
|
}
|
|
747
|
-
/**
|
|
748
|
-
*
|
|
749
|
-
*
|
|
750
|
-
*
|
|
751
|
-
*
|
|
752
|
-
|
|
753
|
-
|
|
1731
|
+
/**
|
|
1732
|
+
* Read retained chat history on ONE channel subject through a name-scoped, single-filter
|
|
1733
|
+
* EPHEMERAL pull consumer — the broker-contained replacement for the removed Direct Get. The
|
|
1734
|
+
* create rides `$JS.API.CONSUMER.CREATE.<CHAT>.<chathist_id>.<subject>`, whose trailing filter
|
|
1735
|
+
* token nats-server pins to the request body (JSConsumerCreateFilterSubjectMismatchErr, code
|
|
1736
|
+
* 10131) — so an agent can only ever replay a channel its `allowSubscribe` grants. Single filter
|
|
1737
|
+
* only (plural isn't ACL-constrainable); `AckPolicy.None` + `mem_storage` so it leaves no durable
|
|
1738
|
+
* state, and it is deleted right after. Returns raw messages in stream order from `start`,
|
|
1739
|
+
* stopping once past `untilSeq` (exclusive of it) or after `limit`. The per-instance name means
|
|
1740
|
+
* calls must be serial — every reader here awaits to completion, so they are.
|
|
1741
|
+
*/
|
|
1742
|
+
async collectHistory(subject, start, opts = {}) {
|
|
1743
|
+
// Serialize on the per-instance lock: the fixed `chathist_<id>` name means two concurrent reads
|
|
1744
|
+
// (recall + join-backfill + drop-marker can race in-process) would delete/recreate the consumer
|
|
1745
|
+
// under each other and cross-feed results. The chain makes the "serial callers" assumption true.
|
|
1746
|
+
const run = this.histLock.then(() => this.collectHistoryInner(subject, start, opts));
|
|
1747
|
+
this.histLock = run.catch(() => { }); // keep the chain alive on error
|
|
1748
|
+
return run;
|
|
1749
|
+
}
|
|
1750
|
+
async collectHistoryInner(subject, start, opts = {}) {
|
|
1751
|
+
if (!this.jsm || !this.js)
|
|
754
1752
|
throw new Error("endpoint not started");
|
|
755
|
-
const
|
|
756
|
-
const
|
|
757
|
-
|
|
758
|
-
//
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
1753
|
+
const stream = chatStream(this.space);
|
|
1754
|
+
const name = chatHistDurable(this.card.id);
|
|
1755
|
+
const out = [];
|
|
1756
|
+
// Clear any consumer leaked by a crashed prior read before re-creating it with THIS read's
|
|
1757
|
+
// single filter (the read ACL is enforced at create — see the doc above).
|
|
1758
|
+
try {
|
|
1759
|
+
await this.jsm.consumers.delete(stream, name);
|
|
1760
|
+
}
|
|
1761
|
+
catch { /* none — fine */ }
|
|
1762
|
+
await this.jsm.consumers.add(stream, {
|
|
1763
|
+
name,
|
|
1764
|
+
filter_subject: subject,
|
|
1765
|
+
ack_policy: AckPolicy.None,
|
|
1766
|
+
mem_storage: true,
|
|
1767
|
+
inactive_threshold: nanos(30_000),
|
|
1768
|
+
...("time" in start
|
|
1769
|
+
? { deliver_policy: DeliverPolicy.StartTime, opt_start_time: start.time.toISOString() }
|
|
1770
|
+
: { deliver_policy: DeliverPolicy.StartSequence, opt_start_seq: start.seq }),
|
|
1771
|
+
});
|
|
1772
|
+
try {
|
|
1773
|
+
const consumer = await this.js.consumers.get(stream, name);
|
|
1774
|
+
let pending = (await consumer.info()).num_pending;
|
|
1775
|
+
while (pending > 0) {
|
|
1776
|
+
const want = Math.min(pending, 256);
|
|
1777
|
+
const iter = await consumer.fetch({ max_messages: want, expires: 5_000 });
|
|
1778
|
+
let got = 0;
|
|
1779
|
+
for await (const m of iter) {
|
|
772
1780
|
got++;
|
|
773
|
-
if (
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
msg = sm.json();
|
|
779
|
-
}
|
|
780
|
-
catch {
|
|
781
|
-
continue; // skip undecodable
|
|
782
|
-
}
|
|
783
|
-
// Same authenticity guard as the tail; skip our own echoes in history.
|
|
784
|
-
const parsed = parseSubject(sm.subject);
|
|
785
|
-
if (!parsed || msg.from?.id !== parsed.sender || msg.from.id === this.card.id)
|
|
1781
|
+
if (opts.untilSeq !== undefined && m.seq > opts.untilSeq)
|
|
1782
|
+
return out; // crossed the frontier
|
|
1783
|
+
// Belt-and-suspenders over the lock: only keep messages on the requested channel subject
|
|
1784
|
+
// (the consumer's filter already bounds this; guards against any stale-consumer edge).
|
|
1785
|
+
if (!subjectMatches(subject, m.subject))
|
|
786
1786
|
continue;
|
|
787
|
-
|
|
1787
|
+
out.push(m);
|
|
1788
|
+
if (opts.limit !== undefined && out.length >= opts.limit)
|
|
1789
|
+
return out;
|
|
788
1790
|
}
|
|
1791
|
+
if (got < want)
|
|
1792
|
+
break; // drained early
|
|
1793
|
+
pending -= got;
|
|
789
1794
|
}
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
if (e.code === 404)
|
|
795
|
-
break;
|
|
796
|
-
this.emit("error", e);
|
|
797
|
-
break;
|
|
1795
|
+
}
|
|
1796
|
+
finally {
|
|
1797
|
+
try {
|
|
1798
|
+
await this.jsm.consumers.delete(stream, name);
|
|
798
1799
|
}
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
1800
|
+
catch { /* already gone */ }
|
|
1801
|
+
}
|
|
1802
|
+
return out;
|
|
1803
|
+
}
|
|
1804
|
+
/** Read a channel's retained history up to `upToSeq` (the join frontier) and emit each message
|
|
1805
|
+
* as a `historical` "message" event. `sinceMs` bounds how far back via a native consumer
|
|
1806
|
+
* `start_time` (now − window); unset ⇒ the full retained window. New messages (`seq > upToSeq`)
|
|
1807
|
+
* are skipped — the live tail owns them. Reads through the contained {@link collectHistory}. */
|
|
1808
|
+
async backfillChannel(channel, upToSeq, sinceMs) {
|
|
1809
|
+
const subject = chatSubject(this.space, "*", channel);
|
|
1810
|
+
const start = sinceMs === undefined ? { seq: 1 } : { time: new Date(Date.now() - sinceMs) };
|
|
1811
|
+
let msgs;
|
|
1812
|
+
try {
|
|
1813
|
+
msgs = await this.collectHistory(subject, start, { untilSeq: upToSeq });
|
|
802
1814
|
}
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
1815
|
+
catch (e) {
|
|
1816
|
+
this.emit("error", e);
|
|
1817
|
+
return 0;
|
|
1818
|
+
}
|
|
1819
|
+
const noop = { ack: () => { }, nak: () => { }, durable: false };
|
|
1820
|
+
let n = 0;
|
|
1821
|
+
for (const sm of msgs) {
|
|
1822
|
+
let msg;
|
|
1823
|
+
try {
|
|
1824
|
+
msg = sm.json();
|
|
1825
|
+
}
|
|
1826
|
+
catch {
|
|
1827
|
+
continue; // skip undecodable
|
|
1828
|
+
}
|
|
1829
|
+
// Same authenticity guard as the tail; skip our own echoes in history.
|
|
1830
|
+
const parsed = parseSubject(sm.subject);
|
|
1831
|
+
if (!parsed || msg.from?.id !== parsed.sender || msg.from.id === this.card.id)
|
|
1832
|
+
continue;
|
|
1833
|
+
// Backfill only ever reads the chat stream, so the authenticated class is always "channel".
|
|
806
1834
|
this.emit("message", msg, noop, { historical: true, kind: "channel" });
|
|
807
|
-
|
|
1835
|
+
n++;
|
|
1836
|
+
}
|
|
1837
|
+
return n;
|
|
808
1838
|
}
|
|
809
1839
|
/**
|
|
810
1840
|
* Replay-gated pull of a channel's retained ambient from `sinceSeq` (exclusive) forward — the
|
|
@@ -815,55 +1845,40 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
815
1845
|
*
|
|
816
1846
|
* Honors the **same** per-channel replay gate as join-backfill ({@link joinPolicyFresh}): a
|
|
817
1847
|
* `replay=off` channel returns nothing, so `focus` can't become a history bypass for a channel
|
|
818
|
-
* that denies replay to everyone else (
|
|
819
|
-
* app gate
|
|
1848
|
+
* that denies replay to everyone else (the read ACL bounds *which* channels recall can touch; this
|
|
1849
|
+
* app gate bounds *whether* a permitted channel replays).
|
|
820
1850
|
*/
|
|
821
1851
|
async recallChannel(channel, sinceSeq) {
|
|
822
1852
|
if (!this.jsm)
|
|
823
|
-
throw new Error(
|
|
1853
|
+
throw new Error(this.notLiveMsg());
|
|
824
1854
|
if (!isConcreteChannel(channel))
|
|
825
1855
|
return { messages: [], dropped: false };
|
|
826
1856
|
const policy = await this.joinPolicyFresh(channel);
|
|
827
1857
|
if (!policy.replay)
|
|
828
1858
|
return { messages: [], dropped: false };
|
|
829
1859
|
const subject = chatSubject(this.space, "*", channel);
|
|
1860
|
+
let raw;
|
|
1861
|
+
try {
|
|
1862
|
+
raw = await this.collectHistory(subject, { seq: sinceSeq + 1 });
|
|
1863
|
+
}
|
|
1864
|
+
catch (e) {
|
|
1865
|
+
this.emit("error", e);
|
|
1866
|
+
raw = [];
|
|
1867
|
+
}
|
|
830
1868
|
const collected = [];
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
let last = 0;
|
|
834
|
-
let got = 0;
|
|
1869
|
+
for (const sm of raw) {
|
|
1870
|
+
let msg;
|
|
835
1871
|
try {
|
|
836
|
-
|
|
837
|
-
seq: startSeq,
|
|
838
|
-
next_by_subj: subject,
|
|
839
|
-
batch: 256,
|
|
840
|
-
});
|
|
841
|
-
for await (const sm of iter) {
|
|
842
|
-
got++;
|
|
843
|
-
last = sm.seq;
|
|
844
|
-
let msg;
|
|
845
|
-
try {
|
|
846
|
-
msg = sm.json();
|
|
847
|
-
}
|
|
848
|
-
catch {
|
|
849
|
-
continue; // skip undecodable
|
|
850
|
-
}
|
|
851
|
-
// Same authenticity guard as the tail/backfill; skip our own echoes.
|
|
852
|
-
const parsed = parseSubject(sm.subject);
|
|
853
|
-
if (!parsed || msg.from?.id !== parsed.sender || msg.from.id === this.card.id)
|
|
854
|
-
continue;
|
|
855
|
-
collected.push(msg);
|
|
856
|
-
}
|
|
1872
|
+
msg = sm.json();
|
|
857
1873
|
}
|
|
858
|
-
catch
|
|
859
|
-
|
|
860
|
-
break; // no more history (empty or last page)
|
|
861
|
-
this.emit("error", e);
|
|
862
|
-
break;
|
|
1874
|
+
catch {
|
|
1875
|
+
continue; // skip undecodable
|
|
863
1876
|
}
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
1877
|
+
// Same authenticity guard as the tail/backfill; skip our own echoes.
|
|
1878
|
+
const parsed = parseSubject(sm.subject);
|
|
1879
|
+
if (!parsed || msg.from?.id !== parsed.sender || msg.from.id === this.card.id)
|
|
1880
|
+
continue;
|
|
1881
|
+
collected.push(msg);
|
|
867
1882
|
}
|
|
868
1883
|
const dropped = await this.channelDropped(subject, sinceSeq);
|
|
869
1884
|
return { messages: collected, dropped };
|
|
@@ -895,24 +1910,18 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
895
1910
|
return oldest !== undefined && oldest > sinceSeq + 1;
|
|
896
1911
|
}
|
|
897
1912
|
/** Sequence of the earliest message still retained on a channel subject (any sender), or
|
|
898
|
-
* undefined if nothing is retained. One
|
|
1913
|
+
* undefined if nothing is retained. One message through the contained {@link collectHistory} —
|
|
1914
|
+
* used for the recall drop marker. */
|
|
899
1915
|
async channelOldestSeq(subject) {
|
|
900
1916
|
if (!this.jsm)
|
|
901
1917
|
return undefined;
|
|
902
1918
|
try {
|
|
903
|
-
const
|
|
904
|
-
|
|
905
|
-
next_by_subj: subject,
|
|
906
|
-
batch: 1,
|
|
907
|
-
});
|
|
908
|
-
for await (const sm of iter)
|
|
909
|
-
return sm.seq;
|
|
910
|
-
return undefined;
|
|
1919
|
+
const [first] = await this.collectHistory(subject, { seq: 1 }, { limit: 1 });
|
|
1920
|
+
return first?.seq;
|
|
911
1921
|
}
|
|
912
1922
|
catch (e) {
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
return undefined; // 404 = nothing retained on this subject (normal)
|
|
1923
|
+
this.emit("error", e);
|
|
1924
|
+
return undefined;
|
|
916
1925
|
}
|
|
917
1926
|
}
|
|
918
1927
|
async publishPresence() {
|
|
@@ -922,9 +1931,15 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
922
1931
|
card: this.card,
|
|
923
1932
|
status: this.status,
|
|
924
1933
|
activity: this.activity,
|
|
1934
|
+
attention: this.attentionMode,
|
|
1935
|
+
channelModes: this.channelModes,
|
|
925
1936
|
ts: Date.now(),
|
|
926
1937
|
};
|
|
927
|
-
|
|
1938
|
+
// Wire contract (SPEC §6): an OFFLINE record must not carry the advisory attention fields. Scrub at
|
|
1939
|
+
// the publisher — this covers stop(), setStatus("offline"), and any future offline publish site, so
|
|
1940
|
+
// the raw KV record is compliant, not only the observer-side roster materialization.
|
|
1941
|
+
const record = this.status === "offline" ? this.toOffline(p) : p;
|
|
1942
|
+
await this.kv.put(this.card.id, JSON.stringify(record));
|
|
928
1943
|
}
|
|
929
1944
|
async startPresenceWatch() {
|
|
930
1945
|
if (!this.kv)
|
|
@@ -989,7 +2004,9 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
989
2004
|
applyPresence(id, raw) {
|
|
990
2005
|
const prev = this.roster.get(id);
|
|
991
2006
|
const stale = Date.now() - raw.ts > this.ttlMs;
|
|
992
|
-
|
|
2007
|
+
// Any offline materialization (a stale snapshot OR a graceful-leave record) drops the advisory
|
|
2008
|
+
// attention fields — an offline peer must not carry a stale `[focus]`/`locally muted` hint.
|
|
2009
|
+
const p = stale || raw.status === "offline" ? this.toOffline(raw) : raw;
|
|
993
2010
|
// First time we hear about an already-offline peer (stale snapshot): record quietly.
|
|
994
2011
|
if (!prev && p.status === "offline") {
|
|
995
2012
|
this.roster.set(id, p);
|
|
@@ -1002,7 +2019,9 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
1002
2019
|
prev.status !== "offline" &&
|
|
1003
2020
|
p.status !== "offline" &&
|
|
1004
2021
|
prev.status === p.status &&
|
|
1005
|
-
prev.activity === p.activity
|
|
2022
|
+
prev.activity === p.activity &&
|
|
2023
|
+
prev.attention === p.attention &&
|
|
2024
|
+
sameChannelModes(prev.channelModes, p.channelModes)) {
|
|
1006
2025
|
this.roster.set(id, p);
|
|
1007
2026
|
return;
|
|
1008
2027
|
}
|
|
@@ -1015,12 +2034,18 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
1015
2034
|
this.emit("presence", { type, presence: p });
|
|
1016
2035
|
this.emit("roster", this.getRoster());
|
|
1017
2036
|
}
|
|
2037
|
+
/** Materialize an OFFLINE presence record: drop the advisory attention fields. An offline peer must
|
|
2038
|
+
* not show a stale `[focus]` or "locally muted #x" hint — SPEC: attention removed on offline sweep,
|
|
2039
|
+
* channel modes reset on restart. card/activity/ts are kept. */
|
|
2040
|
+
toOffline(p) {
|
|
2041
|
+
return { ...p, status: "offline", attention: undefined, channelModes: undefined };
|
|
2042
|
+
}
|
|
1018
2043
|
/** Mark a known peer offline (on KV delete/purge), keeping it in the roster. */
|
|
1019
2044
|
markOffline(id) {
|
|
1020
2045
|
const prev = this.roster.get(id);
|
|
1021
2046
|
if (!prev || prev.status === "offline")
|
|
1022
2047
|
return;
|
|
1023
|
-
const offline =
|
|
2048
|
+
const offline = this.toOffline(prev);
|
|
1024
2049
|
this.roster.set(id, offline);
|
|
1025
2050
|
this.emit("presence", { type: "offline", presence: offline });
|
|
1026
2051
|
this.emit("roster", this.getRoster());
|
|
@@ -1028,10 +2053,11 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
1028
2053
|
sweep() {
|
|
1029
2054
|
const now = Date.now();
|
|
1030
2055
|
let changed = false;
|
|
1031
|
-
for (const [, p] of this.roster) {
|
|
2056
|
+
for (const [id, p] of this.roster) {
|
|
1032
2057
|
if (p.status !== "offline" && now - p.ts > this.ttlMs) {
|
|
1033
|
-
|
|
1034
|
-
this.
|
|
2058
|
+
const offline = this.toOffline(p);
|
|
2059
|
+
this.roster.set(id, offline);
|
|
2060
|
+
this.emit("presence", { type: "offline", presence: offline });
|
|
1035
2061
|
changed = true;
|
|
1036
2062
|
}
|
|
1037
2063
|
}
|
|
@@ -1039,13 +2065,6 @@ export class CotalEndpoint extends EventEmitter {
|
|
|
1039
2065
|
this.emit("roster", this.getRoster());
|
|
1040
2066
|
}
|
|
1041
2067
|
}
|
|
1042
|
-
/** The id token of a chat-stream durable, or null if it isn't one — the inverse of
|
|
1043
|
-
* `chatDurable` (`chat_<token(id)>`). token() is lossy, so this returns the token, not the
|
|
1044
|
-
* original id; callers match it forward against `token(card.id)`. */
|
|
1045
|
-
function chatDurableToken(durable) {
|
|
1046
|
-
const prefix = "chat_";
|
|
1047
|
-
return durable.startsWith(prefix) ? durable.slice(prefix.length) : null;
|
|
1048
|
-
}
|
|
1049
2068
|
/** Map an authenticated parsed-subject kind to the message class surfaced to "message" listeners.
|
|
1050
2069
|
* Throws on `ctl` (control-plane is request/reply, never a "message") — per repo convention, no
|
|
1051
2070
|
* silent default: an unexpected delivering kind is a bug, not something to swallow. */
|
|
@@ -1061,12 +2080,14 @@ function kindFromParsed(kind) {
|
|
|
1061
2080
|
throw new Error(`cannot derive a message kind from subject kind "${kind}"`);
|
|
1062
2081
|
}
|
|
1063
2082
|
}
|
|
1064
|
-
/**
|
|
1065
|
-
|
|
1066
|
-
|
|
2083
|
+
/** Shallow-equal two per-channel-mode maps (presence dedup): a change must re-emit, so an attention
|
|
2084
|
+
* toggle isn't swallowed as a quiet heartbeat. Absent and empty compare equal. */
|
|
2085
|
+
function sameChannelModes(a, b) {
|
|
2086
|
+
const ak = a ? Object.keys(a) : [];
|
|
2087
|
+
const bk = b ? Object.keys(b) : [];
|
|
2088
|
+
if (ak.length !== bk.length)
|
|
1067
2089
|
return false;
|
|
1068
|
-
|
|
1069
|
-
return b.every((x) => s.has(x));
|
|
2090
|
+
return ak.every((k) => a[k] === b?.[k]);
|
|
1070
2091
|
}
|
|
1071
2092
|
function authOpts(a) {
|
|
1072
2093
|
const tls = a.tls ? {} : undefined;
|
|
@@ -1088,6 +2109,19 @@ function describeStatusError(err) {
|
|
|
1088
2109
|
}
|
|
1089
2110
|
return err;
|
|
1090
2111
|
}
|
|
2112
|
+
/** True when a failure is a NATS *permission denial* — the subject is forbidden to this
|
|
2113
|
+
* endpoint's creds — rather than a missing responder or a timeout. The two need opposite
|
|
2114
|
+
* fixes (grant the capability vs. start/await the service), so callers (e.g. a control
|
|
2115
|
+
* request that can't reach the manager) must tell them apart instead of defaulting to
|
|
2116
|
+
* "service down". Unwraps a wrapped `cause` and falls back to the server's error text, since
|
|
2117
|
+
* a denied publish can surface either as the typed error or inside a request rejection. */
|
|
2118
|
+
export function isPermissionDenied(e) {
|
|
2119
|
+
if (e instanceof PermissionViolationError)
|
|
2120
|
+
return true;
|
|
2121
|
+
if (e?.cause instanceof PermissionViolationError)
|
|
2122
|
+
return true;
|
|
2123
|
+
return /permissions?\s+violation/i.test(String(e?.message ?? ""));
|
|
2124
|
+
}
|
|
1091
2125
|
/** Whether a NATS server is *running* at `servers`. True on a successful connect AND on an
|
|
1092
2126
|
* auth rejection — an auth error means a server is there, just refusing these creds (so the
|
|
1093
2127
|
* caller should surface the real auth failure, not a misleading "server down", and `up`
|