@cello-protocol/daemon 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-loader.d.ts +41 -0
- package/dist/agent-loader.d.ts.map +1 -0
- package/dist/agent-loader.js +94 -0
- package/dist/agent-loader.js.map +1 -0
- package/dist/bin/cello-daemon.d.ts +13 -0
- package/dist/bin/cello-daemon.d.ts.map +1 -0
- package/dist/bin/cello-daemon.js +170 -0
- package/dist/bin/cello-daemon.js.map +1 -0
- package/dist/cello-node-transport-dialer.d.ts +59 -0
- package/dist/cello-node-transport-dialer.d.ts.map +1 -0
- package/dist/cello-node-transport-dialer.js +108 -0
- package/dist/cello-node-transport-dialer.js.map +1 -0
- package/dist/challenge-verifier.d.ts +12 -0
- package/dist/challenge-verifier.d.ts.map +1 -0
- package/dist/challenge-verifier.js +11 -0
- package/dist/challenge-verifier.js.map +1 -0
- package/dist/connect-or-start.d.ts +25 -0
- package/dist/connect-or-start.d.ts.map +1 -0
- package/dist/connect-or-start.js +117 -0
- package/dist/connect-or-start.js.map +1 -0
- package/dist/content-park-client.d.ts +49 -0
- package/dist/content-park-client.d.ts.map +1 -0
- package/dist/content-park-client.js +196 -0
- package/dist/content-park-client.js.map +1 -0
- package/dist/daemon.d.ts +65 -0
- package/dist/daemon.d.ts.map +1 -0
- package/dist/daemon.js +3202 -0
- package/dist/daemon.js.map +1 -0
- package/dist/directory-bootstrap.d.ts +55 -0
- package/dist/directory-bootstrap.d.ts.map +1 -0
- package/dist/directory-bootstrap.js +102 -0
- package/dist/directory-bootstrap.js.map +1 -0
- package/dist/file-manifest-provider.d.ts +18 -0
- package/dist/file-manifest-provider.d.ts.map +1 -0
- package/dist/file-manifest-provider.js +72 -0
- package/dist/file-manifest-provider.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +18 -0
- package/dist/index.js.map +1 -0
- package/dist/ipc-client.d.ts +31 -0
- package/dist/ipc-client.d.ts.map +1 -0
- package/dist/ipc-client.js +112 -0
- package/dist/ipc-client.js.map +1 -0
- package/dist/ipc-server.d.ts +49 -0
- package/dist/ipc-server.d.ts.map +1 -0
- package/dist/ipc-server.js +268 -0
- package/dist/ipc-server.js.map +1 -0
- package/dist/lock-file.d.ts +27 -0
- package/dist/lock-file.d.ts.map +1 -0
- package/dist/lock-file.js +84 -0
- package/dist/lock-file.js.map +1 -0
- package/dist/manifest-loader.d.ts +33 -0
- package/dist/manifest-loader.d.ts.map +1 -0
- package/dist/manifest-loader.js +70 -0
- package/dist/manifest-loader.js.map +1 -0
- package/dist/manifest-poll-scheduler.d.ts +31 -0
- package/dist/manifest-poll-scheduler.d.ts.map +1 -0
- package/dist/manifest-poll-scheduler.js +59 -0
- package/dist/manifest-poll-scheduler.js.map +1 -0
- package/dist/manifest-version-store-file.d.ts +18 -0
- package/dist/manifest-version-store-file.d.ts.map +1 -0
- package/dist/manifest-version-store-file.js +40 -0
- package/dist/manifest-version-store-file.js.map +1 -0
- package/dist/manifest-version-store.d.ts +14 -0
- package/dist/manifest-version-store.d.ts.map +1 -0
- package/dist/manifest-version-store.js +13 -0
- package/dist/manifest-version-store.js.map +1 -0
- package/dist/network-directory-node.d.ts +94 -0
- package/dist/network-directory-node.d.ts.map +1 -0
- package/dist/network-directory-node.js +626 -0
- package/dist/network-directory-node.js.map +1 -0
- package/dist/nonce-dedup.d.ts +68 -0
- package/dist/nonce-dedup.d.ts.map +1 -0
- package/dist/nonce-dedup.js +204 -0
- package/dist/nonce-dedup.js.map +1 -0
- package/dist/notification-dispatcher.d.ts +65 -0
- package/dist/notification-dispatcher.d.ts.map +1 -0
- package/dist/notification-dispatcher.js +138 -0
- package/dist/notification-dispatcher.js.map +1 -0
- package/dist/registration-context.d.ts +69 -0
- package/dist/registration-context.d.ts.map +1 -0
- package/dist/registration-context.js +118 -0
- package/dist/registration-context.js.map +1 -0
- package/dist/registration-manager.d.ts +72 -0
- package/dist/registration-manager.d.ts.map +1 -0
- package/dist/registration-manager.js +267 -0
- package/dist/registration-manager.js.map +1 -0
- package/dist/registration-persistence.d.ts +131 -0
- package/dist/registration-persistence.d.ts.map +1 -0
- package/dist/registration-persistence.js +233 -0
- package/dist/registration-persistence.js.map +1 -0
- package/dist/retry-queue.d.ts +144 -0
- package/dist/retry-queue.d.ts.map +1 -0
- package/dist/retry-queue.js +444 -0
- package/dist/retry-queue.js.map +1 -0
- package/dist/seal-frontier-verify.d.ts +58 -0
- package/dist/seal-frontier-verify.d.ts.map +1 -0
- package/dist/seal-frontier-verify.js +87 -0
- package/dist/seal-frontier-verify.js.map +1 -0
- package/dist/seal-legibility-tbs.d.ts +25 -0
- package/dist/seal-legibility-tbs.d.ts.map +1 -0
- package/dist/seal-legibility-tbs.js +78 -0
- package/dist/seal-legibility-tbs.js.map +1 -0
- package/dist/seal-upgrade.d.ts +90 -0
- package/dist/seal-upgrade.d.ts.map +1 -0
- package/dist/seal-upgrade.js +178 -0
- package/dist/seal-upgrade.js.map +1 -0
- package/dist/session-assignment-parser.d.ts +22 -0
- package/dist/session-assignment-parser.d.ts.map +1 -0
- package/dist/session-assignment-parser.js +139 -0
- package/dist/session-assignment-parser.js.map +1 -0
- package/dist/session-ceremony.d.ts +156 -0
- package/dist/session-ceremony.d.ts.map +1 -0
- package/dist/session-ceremony.js +447 -0
- package/dist/session-ceremony.js.map +1 -0
- package/dist/session-connection-gater.d.ts +91 -0
- package/dist/session-connection-gater.d.ts.map +1 -0
- package/dist/session-connection-gater.js +146 -0
- package/dist/session-connection-gater.js.map +1 -0
- package/dist/session-node-manager.d.ts +585 -0
- package/dist/session-node-manager.d.ts.map +1 -0
- package/dist/session-node-manager.js +2609 -0
- package/dist/session-node-manager.js.map +1 -0
- package/dist/session-relay-client.d.ts +101 -0
- package/dist/session-relay-client.d.ts.map +1 -0
- package/dist/session-relay-client.js +520 -0
- package/dist/session-relay-client.js.map +1 -0
- package/dist/session-tree.d.ts +80 -0
- package/dist/session-tree.d.ts.map +1 -0
- package/dist/session-tree.js +123 -0
- package/dist/session-tree.js.map +1 -0
- package/dist/signaling-connect.d.ts +83 -0
- package/dist/signaling-connect.d.ts.map +1 -0
- package/dist/signaling-connect.js +266 -0
- package/dist/signaling-connect.js.map +1 -0
- package/dist/transcript-cipher.d.ts +31 -0
- package/dist/transcript-cipher.d.ts.map +1 -0
- package/dist/transcript-cipher.js +74 -0
- package/dist/transcript-cipher.js.map +1 -0
- package/dist/transport-composition.d.ts +31 -0
- package/dist/transport-composition.d.ts.map +1 -0
- package/dist/transport-composition.js +55 -0
- package/dist/transport-composition.js.map +1 -0
- package/dist/transport-selector.d.ts +189 -0
- package/dist/transport-selector.d.ts.map +1 -0
- package/dist/transport-selector.js +195 -0
- package/dist/transport-selector.js.map +1 -0
- package/dist/types.d.ts +265 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +33 -0
- package/dist/types.js.map +1 -0
- package/package.json +4 -4
package/dist/daemon.js
ADDED
|
@@ -0,0 +1,3202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CELLO Daemon process — the long-running background service.
|
|
3
|
+
*
|
|
4
|
+
* Pseudocode:
|
|
5
|
+
* 1. startDaemon(config):
|
|
6
|
+
* a. M7-MANIFEST-002: Load and verify consortium manifest (BEFORE any directory connection)
|
|
7
|
+
* - On signature failure: log error, skip connection
|
|
8
|
+
* - On expiry: log directory.auth.manifest.expired at ERROR, skip connection
|
|
9
|
+
* - On version rollback: log directory.auth.manifest.version.rollback at ERROR
|
|
10
|
+
* - On success: log directory.auth.manifest.verified at INFO
|
|
11
|
+
* b. Load agents from ~/.cello/agents/ (or legacy ~/.cello/key)
|
|
12
|
+
* c. Acquire lock file atomically
|
|
13
|
+
* d. Initialize SessionNodeManager (creates standing receiver, detects interrupted sessions)
|
|
14
|
+
* e. Start IPC server on Unix domain socket
|
|
15
|
+
* f. Register method handlers (status, shutdown)
|
|
16
|
+
* g. Log daemon.started event (with manifestVerified field)
|
|
17
|
+
* h. Set up SIGTERM/SIGINT handlers for graceful shutdown
|
|
18
|
+
* i. Start background manifest polling (if pollScheduler provided and manifest verified)
|
|
19
|
+
*
|
|
20
|
+
* 2. shutdown(reason):
|
|
21
|
+
* a. Cancel manifest poll scheduler
|
|
22
|
+
* b. Log daemon.stopped event
|
|
23
|
+
* c. Call SessionNodeManager.gracefulShutdown() (marks sessions interrupted)
|
|
24
|
+
* d. Stop IPC server (finishes in-flight, sends shutdown frame)
|
|
25
|
+
* e. Remove lock file
|
|
26
|
+
* f. Exit 0
|
|
27
|
+
*/
|
|
28
|
+
import { mkdir } from "node:fs/promises";
|
|
29
|
+
import { randomUUID, createHash } from "node:crypto";
|
|
30
|
+
import { dirname, join } from "node:path";
|
|
31
|
+
import { loadAgents } from "./agent-loader.js";
|
|
32
|
+
import { acquireLock, removeLock } from "./lock-file.js";
|
|
33
|
+
import { createIpcServer } from "./ipc-server.js";
|
|
34
|
+
import { SessionNodeManager } from "./session-node-manager.js";
|
|
35
|
+
import { RetryQueue } from "./retry-queue.js";
|
|
36
|
+
import { NonceDedupStore } from "./nonce-dedup.js";
|
|
37
|
+
import { ContentParkClient } from "./content-park-client.js";
|
|
38
|
+
import { NotificationDispatcher } from "./notification-dispatcher.js";
|
|
39
|
+
import { createNode, SignalingManager } from "@cello-protocol/transport";
|
|
40
|
+
import { createSignalingConnect } from "./signaling-connect.js";
|
|
41
|
+
import { RegistrationManager } from "./registration-manager.js";
|
|
42
|
+
import { DaemonRegistrationContext } from "./registration-context.js";
|
|
43
|
+
import { FileRegistrationPersistence } from "./registration-persistence.js";
|
|
44
|
+
import { verify as ed25519Verify, sealToRecipient } from "@cello-protocol/crypto";
|
|
45
|
+
import { attemptSealUpgrade as attemptSealUpgradeImpl, verifyUpgradeConfirmedCert } from "./seal-upgrade.js";
|
|
46
|
+
// CELLO-M7-MSG-001 (AC-013/AC-018): the single application content-size cap, enforced
|
|
47
|
+
// at the send point here (the receive point lives in the transport content decode).
|
|
48
|
+
import { MAX_CONTENT_BYTES, computeGenesisPrevRoot } from "@cello-protocol/protocol-types";
|
|
49
|
+
import { resolveCelloEnv, createTransportSelector, isProductionVariant, } from "./transport-composition.js";
|
|
50
|
+
import { selectAdvertisedAddress } from "./transport-selector.js";
|
|
51
|
+
import { parseSessionAssignment, sessionRequestErrorReason } from "./session-assignment-parser.js";
|
|
52
|
+
import { wireSessionCeremonyHandler, wireSessionOfferHandler, wireSealCeremonyHandler, verifyUnilateralCertificate, verifyBilateralSealCertificate } from "./session-ceremony.js";
|
|
53
|
+
import { reDeriveFrontiers, findInflatedFrontier } from "./seal-frontier-verify.js";
|
|
54
|
+
import { LocalAutoNatStub } from "@cello-protocol/transport";
|
|
55
|
+
/**
|
|
56
|
+
* M7-SESSION-001 (H-1): canonical byte encoding of a SEAL-INTERRUPTED leaf for
|
|
57
|
+
* Ed25519 signing/verification. Field order is fixed and deterministic. Both the
|
|
58
|
+
* initiator and the responder, and the verifier, MUST use exactly this encoding —
|
|
59
|
+
* any drift causes silent signature-verification failure.
|
|
60
|
+
*/
|
|
61
|
+
function canonicalSealInterruptedLeafBytes(leaf) {
|
|
62
|
+
return new TextEncoder().encode(JSON.stringify({
|
|
63
|
+
type: leaf.type,
|
|
64
|
+
sessionId: leaf.sessionId,
|
|
65
|
+
leafCount: leaf.leafCount,
|
|
66
|
+
merkleRootAtInterruption: leaf.merkleRootAtInterruption,
|
|
67
|
+
timestamp: leaf.timestamp,
|
|
68
|
+
signerPubkey: leaf.signerPubkey,
|
|
69
|
+
}));
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* M7-SESSION-001 (H-1): construct and K_local-sign a SEAL-INTERRUPTED leaf.
|
|
73
|
+
* The private key never leaves keyProvider — only the Ed25519 signature is returned.
|
|
74
|
+
*/
|
|
75
|
+
async function buildSignedSealInterruptedLeaf(keyProvider, opts) {
|
|
76
|
+
const partial = {
|
|
77
|
+
type: "SEAL_INTERRUPTED",
|
|
78
|
+
sessionId: opts.sessionId,
|
|
79
|
+
leafCount: opts.leafCount,
|
|
80
|
+
merkleRootAtInterruption: opts.merkleRootAtInterruption,
|
|
81
|
+
timestamp: Date.now(),
|
|
82
|
+
signerPubkey: opts.signerPubkeyHex,
|
|
83
|
+
};
|
|
84
|
+
const sig = await keyProvider.sign(canonicalSealInterruptedLeafBytes(partial));
|
|
85
|
+
return { ...partial, signature: Buffer.from(sig).toString("hex") };
|
|
86
|
+
}
|
|
87
|
+
function verifyCounterpartySealLeaf(opts) {
|
|
88
|
+
const { leaf, sentNonce, ackNonce, ownLeafCount, expectedCounterpartyPubkey } = opts;
|
|
89
|
+
// 1. L-2: the counterparty MUST echo the exact nonce we sent.
|
|
90
|
+
if (ackNonce !== sentNonce) {
|
|
91
|
+
return { ok: false, reason: "nonce_mismatch", error: "ack nonce did not match the request nonce" };
|
|
92
|
+
}
|
|
93
|
+
// 2. leafCount agreement against our own independent count.
|
|
94
|
+
const cpLeafCount = typeof leaf["leafCount"] === "number" ? leaf["leafCount"] : null;
|
|
95
|
+
if (cpLeafCount !== ownLeafCount) {
|
|
96
|
+
return {
|
|
97
|
+
ok: false,
|
|
98
|
+
reason: "leaf_count_mismatch",
|
|
99
|
+
error: `counterparty leafCount ${String(cpLeafCount)} != own leafCount ${ownLeafCount}`,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// 3. SI-002/SI-003: verify the counterparty's Ed25519 signature on its OWN leaf.
|
|
103
|
+
try {
|
|
104
|
+
const signerPubkeyHex = typeof leaf["signerPubkey"] === "string" ? leaf["signerPubkey"] : null;
|
|
105
|
+
const signatureHex = typeof leaf["signature"] === "string" ? leaf["signature"] : null;
|
|
106
|
+
if (!signerPubkeyHex || !signatureHex) {
|
|
107
|
+
throw new Error("leaf missing signerPubkey or signature");
|
|
108
|
+
}
|
|
109
|
+
if (signerPubkeyHex !== expectedCounterpartyPubkey) {
|
|
110
|
+
throw new Error(`leaf signerPubkey ${signerPubkeyHex.slice(0, 16)} does not match counterparty ${expectedCounterpartyPubkey.slice(0, 16)}`);
|
|
111
|
+
}
|
|
112
|
+
const canonicalLeaf = {
|
|
113
|
+
type: leaf["type"],
|
|
114
|
+
sessionId: leaf["sessionId"],
|
|
115
|
+
leafCount: leaf["leafCount"],
|
|
116
|
+
merkleRootAtInterruption: leaf["merkleRootAtInterruption"],
|
|
117
|
+
timestamp: leaf["timestamp"],
|
|
118
|
+
signerPubkey: leaf["signerPubkey"],
|
|
119
|
+
};
|
|
120
|
+
const leafBytes = new TextEncoder().encode(JSON.stringify(canonicalLeaf));
|
|
121
|
+
const pubkeyBytes = new Uint8Array(Buffer.from(signerPubkeyHex, "hex"));
|
|
122
|
+
const sigBytes = new Uint8Array(Buffer.from(signatureHex, "hex"));
|
|
123
|
+
if (!ed25519Verify(pubkeyBytes, leafBytes, sigBytes)) {
|
|
124
|
+
return { ok: false, reason: "leaf_signature_invalid", error: "Ed25519 signature verification failed on SEAL-INTERRUPTED leaf" };
|
|
125
|
+
}
|
|
126
|
+
return { ok: true };
|
|
127
|
+
}
|
|
128
|
+
catch (verifyErr) {
|
|
129
|
+
return {
|
|
130
|
+
ok: false,
|
|
131
|
+
reason: "leaf_signature_invalid",
|
|
132
|
+
error: verifyErr instanceof Error ? verifyErr.message : String(verifyErr),
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Minimal no-op KeyProvider stub for session nodes.
|
|
137
|
+
// Session nodes don't need signing keys — libp2p generates its own fresh
|
|
138
|
+
// transport keypair internally. The KeyProvider interface is required by
|
|
139
|
+
// createNode but is never called on session nodes.
|
|
140
|
+
const SESSION_NODE_KEY_STUB = {
|
|
141
|
+
getPublicKey: () => Promise.resolve(new Uint8Array(32)),
|
|
142
|
+
sign: (_data) => Promise.resolve(new Uint8Array(64)),
|
|
143
|
+
};
|
|
144
|
+
// Production session node factory — wraps createNode from @cello-protocol/transport
|
|
145
|
+
class ProductionSessionNodeFactory {
|
|
146
|
+
async createNode(config) {
|
|
147
|
+
return createNode({
|
|
148
|
+
keyProvider: SESSION_NODE_KEY_STUB,
|
|
149
|
+
listenAddresses: ["/ip4/127.0.0.1/tcp/0"],
|
|
150
|
+
connectionGater: config.connectionGater,
|
|
151
|
+
// CELLO-M7-TRANSPORT-001: forward the role so AutoNAT/dcutr are configured
|
|
152
|
+
// correctly (session nodes get dcutr; standing receivers do not).
|
|
153
|
+
nodeType: config.nodeType,
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
export async function startDaemon(config) {
|
|
158
|
+
const { celloDir, socketPath, lockFilePath, maxConnections, version, logger, manifestProvider, manifestRootKeys, manifestThreshold, manifestVersionStore, manifestPollScheduler, signalingConnect, challengeVerifier, directoryEndpointResolver, sessionNodeFactory, sessionNegotiator, getRelayCircuitAddress, } = config;
|
|
159
|
+
// CELLO-M7-TRANSPORT-001: composition-root selection of the transport selector.
|
|
160
|
+
// Driven by CELLO_ENV; fails fast at startup (here, not at first session) when a
|
|
161
|
+
// production environment is missing the required transport dialer (AC-010).
|
|
162
|
+
const celloEnv = resolveCelloEnv(process.env["CELLO_ENV"]);
|
|
163
|
+
const transportSelector = createTransportSelector({
|
|
164
|
+
env: celloEnv,
|
|
165
|
+
logger,
|
|
166
|
+
transportDialer: config.transportDialer,
|
|
167
|
+
});
|
|
168
|
+
logger.info("transport.adapters.wired", {
|
|
169
|
+
env: celloEnv,
|
|
170
|
+
selector: isProductionVariant(celloEnv) ? "real" : "stub",
|
|
171
|
+
});
|
|
172
|
+
// M7-MANIFEST-002: Load and verify consortium manifest BEFORE any directory connection.
|
|
173
|
+
//
|
|
174
|
+
// Pseudocode for manifest loading:
|
|
175
|
+
// 1. If manifestProvider is configured:
|
|
176
|
+
// a. Call manifestProvider.loadAndVerify(rootKeys, threshold).
|
|
177
|
+
// b. Check validity window: not_before <= now < expires.
|
|
178
|
+
// c. Check version monotonicity (if manifestVersionStore is provided).
|
|
179
|
+
// d. On success: log directory.auth.manifest.verified.
|
|
180
|
+
// e. On failure: log error event, set directory_signaling to 'reconnecting'.
|
|
181
|
+
// 2. If manifestProvider is absent: skip (backward compat for DAEMON-001 tests).
|
|
182
|
+
let manifestVerified = false;
|
|
183
|
+
// M7 Keystone: the version of the verified manifest, surfaced in ConnectResult.
|
|
184
|
+
// Stays 0 when no manifestProvider is configured (the M6 backward-compat path).
|
|
185
|
+
let verifiedManifestVersion = 0;
|
|
186
|
+
// ADV-006 + ADV-008: If manifestProvider is set, manifestRootKeys and a positive
|
|
187
|
+
// manifestThreshold are required. Fail loudly on misconfiguration rather than
|
|
188
|
+
// silently proceeding unverified.
|
|
189
|
+
if (manifestProvider && (!manifestRootKeys || !manifestThreshold || manifestThreshold <= 0)) {
|
|
190
|
+
throw new Error("DaemonConfig: manifestProvider requires manifestRootKeys (non-empty) and manifestThreshold (positive integer >= 1)");
|
|
191
|
+
}
|
|
192
|
+
if (manifestProvider && manifestRootKeys && manifestThreshold !== undefined) {
|
|
193
|
+
try {
|
|
194
|
+
const manifest = await manifestProvider.loadAndVerify(manifestRootKeys, manifestThreshold);
|
|
195
|
+
// Check validity window: not_before <= now < expires
|
|
196
|
+
const now = new Date();
|
|
197
|
+
const notBefore = new Date(manifest.not_before);
|
|
198
|
+
const expiresAt = new Date(manifest.expires);
|
|
199
|
+
if (now < notBefore) {
|
|
200
|
+
logger.error("directory.auth.manifest.not.yet.valid", {
|
|
201
|
+
manifestVersion: manifest.version,
|
|
202
|
+
notBefore: manifest.not_before,
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
else if (expiresAt <= now) {
|
|
206
|
+
logger.error("directory.auth.manifest.expired", {
|
|
207
|
+
manifestVersion: manifest.version,
|
|
208
|
+
expiresAt: manifest.expires,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
// Check version monotonicity if version store is provided
|
|
213
|
+
if (manifestVersionStore) {
|
|
214
|
+
const lastSeen = await manifestVersionStore.getLastSeenVersion();
|
|
215
|
+
if (lastSeen !== null && manifest.version < lastSeen) {
|
|
216
|
+
logger.error("directory.auth.manifest.version.rollback", {
|
|
217
|
+
manifestVersion: manifest.version,
|
|
218
|
+
lastSeenVersion: lastSeen,
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
await manifestVersionStore.persistVersion(manifest.version);
|
|
223
|
+
manifestVerified = true;
|
|
224
|
+
verifiedManifestVersion = manifest.version;
|
|
225
|
+
logger.info("directory.auth.manifest.verified", {
|
|
226
|
+
manifestVersion: manifest.version,
|
|
227
|
+
signerCount: manifest.signatures.length,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
manifestVerified = true;
|
|
233
|
+
verifiedManifestVersion = manifest.version;
|
|
234
|
+
logger.info("directory.auth.manifest.verified", {
|
|
235
|
+
manifestVersion: manifest.version,
|
|
236
|
+
signerCount: manifest.signatures.length,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
catch (err) {
|
|
242
|
+
logger.error("directory.auth.manifest.load.failed", {
|
|
243
|
+
error: err instanceof Error ? err.message : String(err),
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// ADV-002: When manifestProvider is configured (opt-in mode) and verification
|
|
248
|
+
// failed, the daemon must refuse to proceed. Operators who configure
|
|
249
|
+
// manifestProvider have opted into manifest enforcement.
|
|
250
|
+
if (manifestProvider && !manifestVerified) {
|
|
251
|
+
throw new Error("Manifest verification failed. The daemon cannot start with an unverified manifest when manifestProvider is configured. " +
|
|
252
|
+
"Check the logs for the specific failure reason (manifest_signature_invalid, manifest_expired, or manifest_version_rollback).");
|
|
253
|
+
}
|
|
254
|
+
// Ensure the cello directory exists
|
|
255
|
+
await mkdir(celloDir, { recursive: true });
|
|
256
|
+
// Ensure the socket parent directory exists
|
|
257
|
+
await mkdir(dirname(socketPath), { recursive: true });
|
|
258
|
+
// Load agent identities
|
|
259
|
+
const { loaded: loadedAgents, failed: failedAgents } = await loadAgents(celloDir, logger);
|
|
260
|
+
// Acquire lock file
|
|
261
|
+
await acquireLock(lockFilePath, {
|
|
262
|
+
pid: process.pid,
|
|
263
|
+
socketPath,
|
|
264
|
+
version,
|
|
265
|
+
});
|
|
266
|
+
// Build agent state (all start in 'registered' state — no auto-start)
|
|
267
|
+
const agents = [
|
|
268
|
+
...loadedAgents.map((a) => ({
|
|
269
|
+
name: a.name,
|
|
270
|
+
state: "registered",
|
|
271
|
+
pubkey: a.pubkey,
|
|
272
|
+
})),
|
|
273
|
+
...failedAgents.map((a) => ({
|
|
274
|
+
name: a.name,
|
|
275
|
+
state: "load_failed",
|
|
276
|
+
error: a.error,
|
|
277
|
+
})),
|
|
278
|
+
];
|
|
279
|
+
// M7-SESSION-001 (H-1): retain each agent's K_local signing key so the daemon
|
|
280
|
+
// can produce K_local-signed SEAL-INTERRUPTED leaves (both as initiator and as
|
|
281
|
+
// the bilateral responder). The KeyProvider keeps the private scalar internal —
|
|
282
|
+
// only signatures leave it.
|
|
283
|
+
const keyProviders = new Map();
|
|
284
|
+
for (const a of loadedAgents) {
|
|
285
|
+
keyProviders.set(a.name, a.keyProvider);
|
|
286
|
+
}
|
|
287
|
+
// M7 DOD-SPINE-6 / MSG-001-3b: assemble the relay-witness connect params for a
|
|
288
|
+
// session node from the FROST-signed assignment (relay endpoint + 16-byte session id)
|
|
289
|
+
// and the acting agent's K_local. Returns undefined when the agent key or relay
|
|
290
|
+
// endpoint is missing — the session then runs on the direct content path without a
|
|
291
|
+
// relay witness (degraded, never blocked).
|
|
292
|
+
const buildRelayConnectParams = async (agentName, assignment) => {
|
|
293
|
+
const kp = keyProviders.get(agentName);
|
|
294
|
+
const endpoint = assignment.relay_endpoint;
|
|
295
|
+
if (!kp || !endpoint || !endpoint.peer_id || !endpoint.multiaddrs || endpoint.multiaddrs.length === 0) {
|
|
296
|
+
return undefined;
|
|
297
|
+
}
|
|
298
|
+
return {
|
|
299
|
+
relayPeerId: endpoint.peer_id,
|
|
300
|
+
relayAddrs: endpoint.multiaddrs,
|
|
301
|
+
keyProvider: kp,
|
|
302
|
+
senderPubkey: await kp.getPublicKey(),
|
|
303
|
+
sessionIdBytes: assignment.session_id,
|
|
304
|
+
};
|
|
305
|
+
};
|
|
306
|
+
// Stub: all connections marked as 'unverified' until connection validation is wired
|
|
307
|
+
const connections = [];
|
|
308
|
+
// M7-SIGNAL-001: Instantiate SignalingManager — owns directory signaling stream lifecycle.
|
|
309
|
+
const defaultConnect = async () => {
|
|
310
|
+
throw new Error("directory_signaling_not_configured");
|
|
311
|
+
};
|
|
312
|
+
// M7 Keystone (Part 1): resolve the agent identity that authenticates the
|
|
313
|
+
// directory signaling stream. The daemon's directory-facing node is one per
|
|
314
|
+
// daemon, so the keystone authenticates as the PRIMARY agent (first successfully
|
|
315
|
+
// loaded). Returns null when no agent is registered yet → connect() throws
|
|
316
|
+
// no_agent_identity and the SignalingManager stays reconnecting until one exists
|
|
317
|
+
// (registration, Action 2, brings the first identity).
|
|
318
|
+
//
|
|
319
|
+
// NOTE (multi-agent, Action 2+): per-agent directory operations under distinct
|
|
320
|
+
// identities are out of keystone scope. This establishes the directory door.
|
|
321
|
+
// L4: sort by name so the "primary" agent is STABLE across restarts — readdir
|
|
322
|
+
// order (agent-loader) is platform-dependent and unsorted, which would otherwise
|
|
323
|
+
// let the authenticating identity change between daemon restarts.
|
|
324
|
+
const primaryAgent = [...loadedAgents].sort((a, b) => a.name.localeCompare(b.name))[0];
|
|
325
|
+
const getAuthIdentity = () => {
|
|
326
|
+
if (!primaryAgent)
|
|
327
|
+
return null;
|
|
328
|
+
return { keyProvider: primaryAgent.keyProvider, pubkeyHex: primaryAgent.pubkey };
|
|
329
|
+
};
|
|
330
|
+
// Production builds signalingConnect from the bootstrap resolver + agent identity.
|
|
331
|
+
// Tests inject signalingConnect directly (takes precedence). Neither → defaultConnect
|
|
332
|
+
// (DAEMON-001 backward-compat). challengeVerifier is left to the caller: when absent,
|
|
333
|
+
// step-6 directory verification is skipped — the M6 path that connected and ran the
|
|
334
|
+
// full DKG/seal pipeline.
|
|
335
|
+
// M7 Action 2: the daemon holds a reference to the live directory-facing node so
|
|
336
|
+
// registration's FROST DKG (NetworkDirectoryNode) — and future ceremonies/seal — can
|
|
337
|
+
// open streams to the directory on the SAME node. createSignalingConnect sets it via
|
|
338
|
+
// publishNode on a successful connect and clears it (null) when the stream closes.
|
|
339
|
+
// Consumers MUST gate use on signalingManager.status === "connected".
|
|
340
|
+
let directoryNode = null;
|
|
341
|
+
const getDirectoryNode = () => directoryNode;
|
|
342
|
+
const resolvedConnect = signalingConnect ??
|
|
343
|
+
(directoryEndpointResolver
|
|
344
|
+
? createSignalingConnect({
|
|
345
|
+
getDirectoryEndpoint: directoryEndpointResolver,
|
|
346
|
+
getAuthIdentity,
|
|
347
|
+
logger,
|
|
348
|
+
challengeVerifier,
|
|
349
|
+
getManifestVersion: () => verifiedManifestVersion,
|
|
350
|
+
publishNode: (n) => {
|
|
351
|
+
directoryNode = n;
|
|
352
|
+
},
|
|
353
|
+
})
|
|
354
|
+
: defaultConnect);
|
|
355
|
+
// H1: a long-running daemon must ride out directory outages — notably the
|
|
356
|
+
// 25-30 min multi-region directory deploy. The transport default of 10 reconnect
|
|
357
|
+
// attempts (~5 min with default backoff) transitions the manager to terminal
|
|
358
|
+
// "lost" mid-deploy, with no public way to re-enter the loop — the daemon would
|
|
359
|
+
// never recover without a cello logout/login. Use an effectively-unbounded attempt
|
|
360
|
+
// budget with a capped backoff so it keeps retrying and reconnects within
|
|
361
|
+
// ~maxBackoffMs of the directory returning. (Availability is a first-class invariant.)
|
|
362
|
+
//
|
|
363
|
+
// L3: challengeVerifier is NOT passed here — the dialer (createSignalingConnect)
|
|
364
|
+
// performs step-6 verification itself, matching #doOpen. The manager's copy would
|
|
365
|
+
// be dead (processStep5Frame is only invoked inside connect()).
|
|
366
|
+
const signalingManager = new SignalingManager({
|
|
367
|
+
connect: resolvedConnect,
|
|
368
|
+
logger,
|
|
369
|
+
maxReconnectAttempts: Number.MAX_SAFE_INTEGER,
|
|
370
|
+
maxBackoffMs: 30_000,
|
|
371
|
+
// DOD-AUTH-2: the keystone manager (primary agent's directory door) carries the
|
|
372
|
+
// manifest-poll deps so it re-polls the directory on its live stream and adopts a
|
|
373
|
+
// newer signed manifest. The SAME shared manifestProvider instance the startup load
|
|
374
|
+
// + challengeVerifier use, so an adopted manifest updates the cache step-6 reads from.
|
|
375
|
+
// All optional — undefined on the M6 backward-compat path → polling is simply off.
|
|
376
|
+
pollScheduler: manifestPollScheduler,
|
|
377
|
+
manifestProvider,
|
|
378
|
+
manifestVersionStore,
|
|
379
|
+
rootKeys: manifestRootKeys,
|
|
380
|
+
threshold: manifestThreshold,
|
|
381
|
+
});
|
|
382
|
+
const perAgentSignaling = new Map();
|
|
383
|
+
/**
|
|
384
|
+
* Return the directory signaling stream for `agentName`, authenticated as that
|
|
385
|
+
* agent. The primary agent reuses the keystone manager + its published node; any
|
|
386
|
+
* other agent gets (and caches) a dedicated manager. Falls back to the keystone
|
|
387
|
+
* manager when no production bootstrap resolver is configured (in-process tests
|
|
388
|
+
* inject a single `signalingConnect` and never exercise the per-agent path).
|
|
389
|
+
*
|
|
390
|
+
* SCOPE (SPINE-5 follow-on): this wires per-agent signaling for REGISTRATION (the
|
|
391
|
+
* registration reply frames are routed via the per-agent DaemonRegistrationContext's
|
|
392
|
+
* own inbound handler). The daemon's INBOUND SESSION handlers (session_assignment /
|
|
393
|
+
* session_request, registerInboundHandler below) are still attached to the keystone
|
|
394
|
+
* `signalingManager` only — so a NON-primary agent can register but cannot yet RECEIVE
|
|
395
|
+
* inbound sessions on its dedicated stream (frames there are unhandled). Not a
|
|
396
|
+
* regression (before this, a non-primary agent could not register at all); closing it
|
|
397
|
+
* is SPINE-5, which attaches the session inbound handlers per-agent. Tracked in the
|
|
398
|
+
* M7 build journal + DoD SPINE-5 scope note.
|
|
399
|
+
*/
|
|
400
|
+
function getAgentSignaling(agentName, agentKeyProvider, agentPubkeyHex) {
|
|
401
|
+
if (primaryAgent && agentName === primaryAgent.name) {
|
|
402
|
+
return { signaling: signalingManager, getNode: getDirectoryNode };
|
|
403
|
+
}
|
|
404
|
+
const existing = perAgentSignaling.get(agentName);
|
|
405
|
+
if (existing)
|
|
406
|
+
return existing;
|
|
407
|
+
if (!directoryEndpointResolver) {
|
|
408
|
+
return { signaling: signalingManager, getNode: getDirectoryNode };
|
|
409
|
+
}
|
|
410
|
+
let nodeRef = null;
|
|
411
|
+
const connect = createSignalingConnect({
|
|
412
|
+
getDirectoryEndpoint: directoryEndpointResolver,
|
|
413
|
+
getAuthIdentity: () => ({ keyProvider: agentKeyProvider, pubkeyHex: agentPubkeyHex }),
|
|
414
|
+
logger,
|
|
415
|
+
challengeVerifier,
|
|
416
|
+
getManifestVersion: () => verifiedManifestVersion,
|
|
417
|
+
publishNode: (n) => {
|
|
418
|
+
nodeRef = n;
|
|
419
|
+
},
|
|
420
|
+
});
|
|
421
|
+
const mgr = new SignalingManager({
|
|
422
|
+
connect,
|
|
423
|
+
logger,
|
|
424
|
+
maxReconnectAttempts: Number.MAX_SAFE_INTEGER,
|
|
425
|
+
maxBackoffMs: 30_000,
|
|
426
|
+
});
|
|
427
|
+
const entry = { signaling: mgr, getNode: () => nodeRef };
|
|
428
|
+
perAgentSignaling.set(agentName, entry);
|
|
429
|
+
logger.info("agent.signaling.created", { agentName, agentPubkey: agentPubkeyHex });
|
|
430
|
+
// DOD-SPINE-5: answer the directory's delegated-signing `ceremony_request` on THIS
|
|
431
|
+
// agent's stream (the session FROST ceremony — the per-agent counterpart to SPINE-4's
|
|
432
|
+
// registration routing). Unregistered implicitly when the manager is stopped.
|
|
433
|
+
wireSessionCeremonyHandler({
|
|
434
|
+
agentName,
|
|
435
|
+
agentDir: join(celloDir, "agents", agentName),
|
|
436
|
+
agentPubkeyHex,
|
|
437
|
+
getNode: entry.getNode,
|
|
438
|
+
getDirectoryEndpoint: async () => (directoryEndpointResolver ? (await directoryEndpointResolver()) ?? null : null),
|
|
439
|
+
signaling: mgr,
|
|
440
|
+
logger,
|
|
441
|
+
});
|
|
442
|
+
// DOD-SPINE-7: coordinate the SEAL FROST ceremony on this agent's stream too.
|
|
443
|
+
wireSealCeremonyHandler({
|
|
444
|
+
agentName,
|
|
445
|
+
agentDir: join(celloDir, "agents", agentName),
|
|
446
|
+
agentPubkeyHex,
|
|
447
|
+
getNode: entry.getNode,
|
|
448
|
+
getDirectoryEndpoint: async () => (directoryEndpointResolver ? (await directoryEndpointResolver()) ?? null : null),
|
|
449
|
+
signaling: mgr,
|
|
450
|
+
logger,
|
|
451
|
+
});
|
|
452
|
+
// DOD-SPINE-7: and resolve session_sealed for this agent's sessions on its own stream.
|
|
453
|
+
registerSessionSealedListener(mgr, agentName, agentPubkeyHex);
|
|
454
|
+
// SESSION-002: resolve seal_unilateral_confirmed (verify the cert) on this agent's stream.
|
|
455
|
+
registerUnilateralConfirmedListener(mgr, agentName, agentPubkeyHex);
|
|
456
|
+
// DOD-UP-1: as the ABSENT party, react to seal_unilateral_notification on reconnect — recover +
|
|
457
|
+
// verify the content, then ratify the unilateral seal (upgrade to bilateral). Also handles the
|
|
458
|
+
// seal_upgrade_confirmed / seal_upgrade_rejected responses.
|
|
459
|
+
registerUnilateralUpgradeListener(mgr, agentName, agentPubkeyHex);
|
|
460
|
+
// WIRE-002: answer the directory's session_offer on this agent's stream (advertise the
|
|
461
|
+
// standing-receiver session endpoint so the assignment carries a reachable counterparty).
|
|
462
|
+
wireSessionOfferHandler({
|
|
463
|
+
agentName,
|
|
464
|
+
getStandingReceiverEndpoint: () => sessionNodeManager.getStandingReceiverInfo(agentName),
|
|
465
|
+
signaling: mgr,
|
|
466
|
+
logger,
|
|
467
|
+
});
|
|
468
|
+
return entry;
|
|
469
|
+
}
|
|
470
|
+
/** Resolve once `mgr` reaches "connected", or false on timeout. */
|
|
471
|
+
async function waitForSignalingConnected(mgr, timeoutMs) {
|
|
472
|
+
const deadline = Date.now() + timeoutMs;
|
|
473
|
+
while (mgr.status !== "connected" && Date.now() < deadline) {
|
|
474
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
475
|
+
}
|
|
476
|
+
return mgr.status === "connected";
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Stop and forget a dedicated per-agent signaling manager. Called when an agent's
|
|
480
|
+
* registration fails terminally — otherwise the lazily-created manager (and its
|
|
481
|
+
* libp2p node + effectively-unbounded reconnect loop) would keep reconnecting forever
|
|
482
|
+
* for an agent that is not registered/online. No-op for the primary (it reuses the
|
|
483
|
+
* keystone manager, which is never stored here) and for agents with no dedicated
|
|
484
|
+
* manager. On a later retry, getAgentSignaling re-creates it.
|
|
485
|
+
*/
|
|
486
|
+
async function dropAgentSignaling(agentName) {
|
|
487
|
+
const entry = perAgentSignaling.get(agentName);
|
|
488
|
+
if (!entry)
|
|
489
|
+
return;
|
|
490
|
+
perAgentSignaling.delete(agentName);
|
|
491
|
+
await entry.signaling.stop();
|
|
492
|
+
logger.info("agent.signaling.dropped", { agentName });
|
|
493
|
+
}
|
|
494
|
+
// DOD-SPINE-5: the PRIMARY agent registers + initiates over the keystone signaling
|
|
495
|
+
// stream (not a per-agent one), so wire its ceremony_request handler on the keystone
|
|
496
|
+
// manager too — otherwise a primary-agent initiator's session ceremony would time out.
|
|
497
|
+
if (primaryAgent) {
|
|
498
|
+
wireSessionCeremonyHandler({
|
|
499
|
+
agentName: primaryAgent.name,
|
|
500
|
+
agentDir: join(celloDir, "agents", primaryAgent.name),
|
|
501
|
+
agentPubkeyHex: primaryAgent.pubkey,
|
|
502
|
+
getNode: getDirectoryNode,
|
|
503
|
+
getDirectoryEndpoint: async () => (directoryEndpointResolver ? (await directoryEndpointResolver()) ?? null : null),
|
|
504
|
+
signaling: signalingManager,
|
|
505
|
+
logger,
|
|
506
|
+
});
|
|
507
|
+
// DOD-SPINE-7: the primary agent also coordinates the SEAL FROST ceremony on the keystone.
|
|
508
|
+
wireSealCeremonyHandler({
|
|
509
|
+
agentName: primaryAgent.name,
|
|
510
|
+
agentDir: join(celloDir, "agents", primaryAgent.name),
|
|
511
|
+
agentPubkeyHex: primaryAgent.pubkey,
|
|
512
|
+
getNode: getDirectoryNode,
|
|
513
|
+
getDirectoryEndpoint: async () => (directoryEndpointResolver ? (await directoryEndpointResolver()) ?? null : null),
|
|
514
|
+
signaling: signalingManager,
|
|
515
|
+
logger,
|
|
516
|
+
});
|
|
517
|
+
wireSessionOfferHandler({
|
|
518
|
+
agentName: primaryAgent.name,
|
|
519
|
+
getStandingReceiverEndpoint: () => sessionNodeManager.getStandingReceiverInfo(primaryAgent.name),
|
|
520
|
+
signaling: signalingManager,
|
|
521
|
+
logger,
|
|
522
|
+
});
|
|
523
|
+
}
|
|
524
|
+
// Per-connection state: tracks which agent is "current" for each IPC connection.
|
|
525
|
+
// Key = connectionId (assigned by IPC server), Value = current agent name or null.
|
|
526
|
+
const perConnectionState = new Map();
|
|
527
|
+
// Set of agents currently in "online" state (transitioned via cello_start_agent)
|
|
528
|
+
const onlineAgents = new Set();
|
|
529
|
+
// Initialize SessionNodeManager (DAEMON-002: composition root — AC-011).
|
|
530
|
+
// This runs before the IPC socket opens so:
|
|
531
|
+
// 1. The standing receiver is ready before any cello_await_session call.
|
|
532
|
+
// 2. Interrupted session detection runs before any tool call can race.
|
|
533
|
+
const sessionNodeManager = new SessionNodeManager({
|
|
534
|
+
factory: sessionNodeFactory ?? new ProductionSessionNodeFactory(),
|
|
535
|
+
logger,
|
|
536
|
+
dbPath: join(celloDir, "sessions.db"),
|
|
537
|
+
contentTtfMs: config.contentTtfMs,
|
|
538
|
+
// CELLO-M7-TRANSPORT-001: directory-node AutoNAT probers (SI-002). The
|
|
539
|
+
// directory connection (SIGNAL-001) is not yet wired into the daemon, so the
|
|
540
|
+
// prober set is empty — AutoNAT cannot run and the standing receiver reports
|
|
541
|
+
// the conservative default + transport.autonat.unavailable (AC-004/DB-001).
|
|
542
|
+
autoNatProbers: () => [],
|
|
543
|
+
});
|
|
544
|
+
await sessionNodeManager.initialize();
|
|
545
|
+
// CELLO-M7-TRANSPORT-001: the daemon's runtime AutoNAT service is the one
|
|
546
|
+
// wrapping the standing receiver node (it emits transport.autonat.result /
|
|
547
|
+
// transport.autonat.unavailable and its dialability drives the SessionAssignment
|
|
548
|
+
// advertised address — AC-004/AC-019). config.autoNatService is an explicit
|
|
549
|
+
// override (tests); otherwise we use the standing receiver's, falling back to a
|
|
550
|
+
// stub only if the standing receiver failed to come up.
|
|
551
|
+
const autoNatService = config.autoNatService ??
|
|
552
|
+
sessionNodeManager.getStandingReceiverAutoNat() ??
|
|
553
|
+
new LocalAutoNatStub();
|
|
554
|
+
// ─── DOD-SPINE-5: real client-side session negotiator (built internally) ──────
|
|
555
|
+
// The directory already brokers `session_request` → FROST-signed `session_assignment`
|
|
556
|
+
// live; the missing half was the CLIENT driver. This negotiator sends `session_request`
|
|
557
|
+
// over the CURRENT agent's OWN signaling stream (so the directory routes the signed
|
|
558
|
+
// assignment back to that agent — same per-agent routing SPINE-4 established), advertising
|
|
559
|
+
// the standing receiver's session endpoint (WIRE-001: the directory rejects a request
|
|
560
|
+
// with no initiator session Peer ID), then parses the returned assignment. Ported from
|
|
561
|
+
// core/client `initiateSession` (NOT imported — that stack is dead). Tests still inject
|
|
562
|
+
// their own `sessionNegotiator`; the binary now gets a real one instead of
|
|
563
|
+
// directory_signaling_not_configured.
|
|
564
|
+
// M3: a `session_assignment` frame carries no echoed request id, so two overlapping
|
|
565
|
+
// initiations on ONE agent's stream would race to resolve on whichever assignment
|
|
566
|
+
// arrives first — request A could complete with B's assignment. Guard with a per-agent
|
|
567
|
+
// single-flight slot (genuine single-slot, as the prior comment falsely claimed). Cross-
|
|
568
|
+
// agent concurrency is unaffected (separate streams). A directory-side echoed request id
|
|
569
|
+
// would allow true concurrency later; this is the correct minimum.
|
|
570
|
+
const negotiationInProgress = new Set();
|
|
571
|
+
const resolvedSessionNegotiator = sessionNegotiator ?? {
|
|
572
|
+
negotiate: async (ctx) => {
|
|
573
|
+
const kp = keyProviders.get(ctx.agentName);
|
|
574
|
+
const agentRec = loadedAgents.find((a) => a.name === ctx.agentName);
|
|
575
|
+
if (!kp || !agentRec) {
|
|
576
|
+
return { ok: false, reason: "agent_not_found", guidance: `Agent '${ctx.agentName}' is not loaded on this daemon.` };
|
|
577
|
+
}
|
|
578
|
+
if (negotiationInProgress.has(ctx.agentName)) {
|
|
579
|
+
return {
|
|
580
|
+
ok: false,
|
|
581
|
+
reason: "session_negotiation_in_progress",
|
|
582
|
+
guidance: `Another session initiation is already in progress for agent '${ctx.agentName}'. Wait for it to finish, then retry.`,
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
const targetHex = typeof ctx.params["target_pubkey"] === "string"
|
|
586
|
+
? ctx.params["target_pubkey"]
|
|
587
|
+
: typeof ctx.params["counterparty_pubkey"] === "string"
|
|
588
|
+
? ctx.params["counterparty_pubkey"]
|
|
589
|
+
: "";
|
|
590
|
+
if (!/^[0-9a-fA-F]{64}$/.test(targetHex)) {
|
|
591
|
+
return {
|
|
592
|
+
ok: false,
|
|
593
|
+
reason: "invalid_target_pubkey",
|
|
594
|
+
guidance: "cello_initiate_session requires 'target_pubkey' as the counterparty's 32-byte hex K_local public key.",
|
|
595
|
+
};
|
|
596
|
+
}
|
|
597
|
+
const sr = sessionNodeManager.getStandingReceiverInfo(ctx.agentName);
|
|
598
|
+
if (!sr) {
|
|
599
|
+
return {
|
|
600
|
+
ok: false,
|
|
601
|
+
reason: "standing_receiver_unavailable",
|
|
602
|
+
guidance: "The standing receiver is not ready, so no initiator session endpoint can be advertised. Retry once the daemon has finished starting.",
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
const { signaling } = getAgentSignaling(ctx.agentName, kp, agentRec.pubkey);
|
|
606
|
+
if (!(await waitForSignalingConnected(signaling, 10_000))) {
|
|
607
|
+
return {
|
|
608
|
+
ok: false,
|
|
609
|
+
reason: "directory_signaling_timeout",
|
|
610
|
+
guidance: `Agent '${ctx.agentName}' could not establish its directory signaling stream within 10s. Check CELLO_DIRECTORY_URL and that the directory is reachable, then retry.`,
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
// Single-flight claimed (above) → safe to register one inbound handler for this
|
|
614
|
+
// agent's reply; unregistered + slot released in finally.
|
|
615
|
+
negotiationInProgress.add(ctx.agentName);
|
|
616
|
+
let resolveFrame;
|
|
617
|
+
const pending = new Promise((r) => {
|
|
618
|
+
resolveFrame = r;
|
|
619
|
+
});
|
|
620
|
+
const unregister = signaling.registerInboundHandler((frame) => {
|
|
621
|
+
const t = frame["type"];
|
|
622
|
+
if (t === "session_assignment" || t === "session_request_error")
|
|
623
|
+
resolveFrame(frame);
|
|
624
|
+
});
|
|
625
|
+
try {
|
|
626
|
+
const sent = await signaling.sendRaw({
|
|
627
|
+
type: "session_request",
|
|
628
|
+
target_pubkey: new Uint8Array(Buffer.from(targetHex, "hex")),
|
|
629
|
+
initiator_session_peer_id: sr.peerId,
|
|
630
|
+
initiator_session_addrs: sr.addrs,
|
|
631
|
+
// WIRE-002 opt-in: ask the directory to run the session_offer→accept round-trip so
|
|
632
|
+
// the assignment carries the counterparty's reachable session endpoint.
|
|
633
|
+
wants_session_offer: true,
|
|
634
|
+
});
|
|
635
|
+
if (!sent.ok) {
|
|
636
|
+
return {
|
|
637
|
+
ok: false,
|
|
638
|
+
reason: sent.reason ?? "directory_unreachable",
|
|
639
|
+
guidance: sent.guidance ?? "Could not send session_request over the directory signaling stream.",
|
|
640
|
+
};
|
|
641
|
+
}
|
|
642
|
+
let timer;
|
|
643
|
+
const timeoutP = new Promise((r) => {
|
|
644
|
+
timer = setTimeout(() => r({ type: "__timeout__" }), 30_000);
|
|
645
|
+
});
|
|
646
|
+
const frame = await Promise.race([pending, timeoutP]);
|
|
647
|
+
clearTimeout(timer);
|
|
648
|
+
if (frame["type"] === "__timeout__") {
|
|
649
|
+
return { ok: false, reason: "timeout", guidance: "The directory did not return a session assignment within 30s. Retry once cello status shows directory_signaling connected." };
|
|
650
|
+
}
|
|
651
|
+
if (frame["type"] === "session_request_error") {
|
|
652
|
+
const reason = sessionRequestErrorReason(frame);
|
|
653
|
+
return { ok: false, reason, guidance: `The directory refused the session request (${reason}). Ensure the counterparty is registered and online.` };
|
|
654
|
+
}
|
|
655
|
+
const raw = frame["assignment"];
|
|
656
|
+
const assignment = raw ? parseSessionAssignment(raw) : null;
|
|
657
|
+
if (!assignment) {
|
|
658
|
+
return { ok: false, reason: "assignment_parse_failed", guidance: "The directory's session_assignment was missing or malformed." };
|
|
659
|
+
}
|
|
660
|
+
logger.info("session.negotiate.assignment.received", {
|
|
661
|
+
agentName: ctx.agentName,
|
|
662
|
+
correlationId: ctx.correlationId,
|
|
663
|
+
signatureType: assignment.signature_type,
|
|
664
|
+
});
|
|
665
|
+
return { ok: true, assignment };
|
|
666
|
+
}
|
|
667
|
+
finally {
|
|
668
|
+
unregister();
|
|
669
|
+
negotiationInProgress.delete(ctx.agentName);
|
|
670
|
+
}
|
|
671
|
+
},
|
|
672
|
+
};
|
|
673
|
+
// DAEMON-003: Initialize RetryQueue and NonceDedupStore (AC-008).
|
|
674
|
+
// Both use the same SQLite DB as the SessionNodeManager (daemon.db equivalent).
|
|
675
|
+
// loadFromDb() must complete BEFORE IPC socket opens (AC-007).
|
|
676
|
+
const retryQueue = new RetryQueue(sessionNodeManager.getDb(), logger, sessionNodeManager.getTranscriptCipher());
|
|
677
|
+
retryQueue.loadFromDb();
|
|
678
|
+
// CELLO-M7-MSG-001 (AC-001/AC-003/AC-019): wire the awaiting-ACK lifecycle's durable
|
|
679
|
+
// side effects to the retry_queue. A `persisted` delivery ACK clears the durable
|
|
680
|
+
// entry; a TTF expiry records the un-acked content for the crash backstop (the relay
|
|
681
|
+
// park deposit itself is added in 3b). Both side effects are best-effort and never
|
|
682
|
+
// throw into the content stream handler.
|
|
683
|
+
sessionNodeManager.setAwaitingAckHooks({
|
|
684
|
+
onPersisted: (agentName, sessionId, contentHashHex) => {
|
|
685
|
+
retryQueue.markContentAcked(agentName, sessionId, Buffer.from(contentHashHex, "hex"));
|
|
686
|
+
},
|
|
687
|
+
onTtf: (agentName, sessionId, contentHashHex, content) => {
|
|
688
|
+
retryQueue.enqueueAwaitingContent(agentName, sessionId, Buffer.from(contentHashHex, "hex"), content);
|
|
689
|
+
},
|
|
690
|
+
});
|
|
691
|
+
// MSG-001-3b (2b): the LIVE content-park deposit. On a not-confirmed send (direct delivery
|
|
692
|
+
// failed, or TTF with no `persisted` ACK) the session manager calls this with the recipient +
|
|
693
|
+
// the session's relay endpoint; we seal the content to the recipient (E2E — the relay never sees
|
|
694
|
+
// plaintext, INV-3) and deposit it to that relay's store-and-forward mailbox via the standing
|
|
695
|
+
// receiver node. The recipient pulls + recovers it at the witnessed sequence (R1) on next online.
|
|
696
|
+
sessionNodeManager.setContentParkHook(async ({ sessionId, recipientPubkeyHex, relayPeerId, relayAddrs, contentHashHex, content, structure1Cbor, structure2Cbor }) => {
|
|
697
|
+
const node = sessionNodeManager.getStandingReceiverNode();
|
|
698
|
+
if (!node) {
|
|
699
|
+
logger.warn("content.park.deposit.failed", { sessionId, contentHash: contentHashHex, reason: "standing_receiver_unavailable" });
|
|
700
|
+
return;
|
|
701
|
+
}
|
|
702
|
+
const recipientPubkey = Buffer.from(recipientPubkeyHex, "hex");
|
|
703
|
+
// DOD-MSG-4 (2b): seal the ORDERING ENVELOPE (content + the relay's signed Structure2), not bare
|
|
704
|
+
// content, so the parked entry is self-ordering on recover. The relay still holds only ciphertext.
|
|
705
|
+
const ciphertext = sealToRecipient(recipientPubkey, sessionNodeManager.encodeParkEnvelope(content, structure1Cbor, structure2Cbor));
|
|
706
|
+
const client = new ContentParkClient({ relayPeerId, relayAddrs: [...relayAddrs], logger });
|
|
707
|
+
const res = await client.deposit(node, {
|
|
708
|
+
recipientPubkey,
|
|
709
|
+
contentHash: Buffer.from(contentHashHex, "hex"),
|
|
710
|
+
sessionId: Buffer.from(sessionId, "hex"),
|
|
711
|
+
ciphertext,
|
|
712
|
+
});
|
|
713
|
+
if (res.ok) {
|
|
714
|
+
logger.info("content.park.deposited", { sessionId, contentHash: contentHashHex, recipientPubkey: recipientPubkeyHex.slice(0, 16) });
|
|
715
|
+
}
|
|
716
|
+
else {
|
|
717
|
+
logger.warn("content.park.deposit.failed", { sessionId, contentHash: contentHashHex, reason: res.reason });
|
|
718
|
+
}
|
|
719
|
+
});
|
|
720
|
+
// CELLO-M7-MSG-001 (AC-004/AC-005, D-d): startup flush of locally-persisted un-acked
|
|
721
|
+
// content (the crash backstop). Runs HERE — before the IPC socket opens, consistent
|
|
722
|
+
// with DAEMON-003 startup loading (AC-007) — so a sender that crashed before its TTF
|
|
723
|
+
// park confirmed re-parks its un-acked content to the relay store-and-forward queue on
|
|
724
|
+
// restart. Best-effort: a failed park stays queued (drainAwaitingToPark does not evict
|
|
725
|
+
// on failure), to be retried at the next startup flush or reconnect.
|
|
726
|
+
//
|
|
727
|
+
// Re-home note (Option A): the park target (config.contentParkFn) is supplied natively
|
|
728
|
+
// by the daemon's own send path — NOT by a hosted CelloClient. When it is absent (e.g.
|
|
729
|
+
// a daemon started without the content send path wired, or unit tests), the flush is a
|
|
730
|
+
// documented no-op (content.park.flush.deferred at WARN) and the durable awaiting
|
|
731
|
+
// entries simply remain queued for the next startup that has a park target.
|
|
732
|
+
// MSG-2 startup-flush park target: seal + deposit an un-acked awaiting entry sourced from
|
|
733
|
+
// PERSISTED session state (the in-memory entry is gone after a restart). Same seal + deposit
|
|
734
|
+
// as the live hook above; the endpoint + recipient come from the sessions row.
|
|
735
|
+
const startupParkFn = async (entry) => {
|
|
736
|
+
const ep = sessionNodeManager.getPersistedRelayEndpoint(entry.agentName, entry.sessionId);
|
|
737
|
+
const record = sessionNodeManager.getSessionRecord(entry.agentName, entry.sessionId);
|
|
738
|
+
if (!ep)
|
|
739
|
+
return { parked: false, error: "no_persisted_relay_endpoint" };
|
|
740
|
+
if (!record?.counterparty_pubkey)
|
|
741
|
+
return { parked: false, error: "no_counterparty" };
|
|
742
|
+
// DOD-LOOP-1: the re-park must originate from the session's OWNING agent (the original
|
|
743
|
+
// sender), so use THAT agent's standing-receiver node — not "any" agent's. Post-DOD-LOOP-1 the
|
|
744
|
+
// owning agent's SR exists only once it is online, which is why the native flush is
|
|
745
|
+
// (re-)triggered per-agent on agent-online (see flushAwaitingContent / cello_start_agent), not
|
|
746
|
+
// only at pre-IPC startup when no agent is online yet.
|
|
747
|
+
const node = sessionNodeManager.getStandingReceiverNode(record.agent_name);
|
|
748
|
+
if (!node)
|
|
749
|
+
return { parked: false, error: "standing_receiver_unavailable" };
|
|
750
|
+
const recipientPubkey = Buffer.from(record.counterparty_pubkey, "hex");
|
|
751
|
+
// DOD-MSG-4 (2b): seal the envelope shape too (content only — the durable awaiting queue does not
|
|
752
|
+
// persist the ordering record, so a crash-backstop re-park recovers in arrival order; the common
|
|
753
|
+
// live-park path above carries the full Structure2). Keeps ONE envelope format on the recover side.
|
|
754
|
+
const ciphertext = sealToRecipient(recipientPubkey, sessionNodeManager.encodeParkEnvelope(entry.contentBlob));
|
|
755
|
+
const client = new ContentParkClient({ relayPeerId: ep.relayPeerId, relayAddrs: [...ep.relayAddrs], logger });
|
|
756
|
+
const res = await client.deposit(node, {
|
|
757
|
+
recipientPubkey,
|
|
758
|
+
contentHash: Buffer.from(entry.contentHashHex, "hex"),
|
|
759
|
+
sessionId: Buffer.from(entry.sessionId, "hex"),
|
|
760
|
+
ciphertext,
|
|
761
|
+
});
|
|
762
|
+
if (res.ok) {
|
|
763
|
+
logger.info("content.park.deposited", { sessionId: entry.sessionId, contentHash: entry.contentHashHex, source: "startup_flush" });
|
|
764
|
+
return { parked: true };
|
|
765
|
+
}
|
|
766
|
+
return { parked: false, error: res.reason ?? "deposit_failed" };
|
|
767
|
+
};
|
|
768
|
+
// Re-park un-acked awaiting content to the relay store-and-forward queue. Runs once pre-IPC
|
|
769
|
+
// (the crash backstop) and again per-agent when an agent comes online — because post-DOD-LOOP-1
|
|
770
|
+
// the native `startupParkFn` needs the OWNING agent's standing receiver, which exists only once
|
|
771
|
+
// that agent is online. `filterAgent` scopes the drain to one agent's sessions on the agent-
|
|
772
|
+
// online re-run; with no filter it attempts all (the pre-IPC pass / injected-target test path).
|
|
773
|
+
async function flushAwaitingContent(filterAgent) {
|
|
774
|
+
const all = retryQueue.getAwaitingSessions();
|
|
775
|
+
const sessions = filterAgent === undefined
|
|
776
|
+
? all
|
|
777
|
+
: all.filter((s) => s.agentName === filterAgent);
|
|
778
|
+
if (sessions.length === 0)
|
|
779
|
+
return;
|
|
780
|
+
const parkFn = config.contentParkFn ?? startupParkFn;
|
|
781
|
+
if (!parkFn) {
|
|
782
|
+
const pendingCount = sessions.reduce((n, s) => n + retryQueue.getAwaitingDepth(s.agentName, s.sessionId), 0);
|
|
783
|
+
logger.warn("content.park.flush.deferred", {
|
|
784
|
+
sessionCount: sessions.length,
|
|
785
|
+
pendingCount,
|
|
786
|
+
reason: "no_content_park_target",
|
|
787
|
+
});
|
|
788
|
+
return;
|
|
789
|
+
}
|
|
790
|
+
let parkedTotal = 0;
|
|
791
|
+
for (const s of sessions) {
|
|
792
|
+
try {
|
|
793
|
+
parkedTotal += await retryQueue.drainAwaitingToPark(s.agentName, s.sessionId, parkFn);
|
|
794
|
+
}
|
|
795
|
+
catch (err) {
|
|
796
|
+
logger.error("content.park.flush.failed", {
|
|
797
|
+
sessionId: s.sessionId,
|
|
798
|
+
error: err instanceof Error ? err.message : String(err),
|
|
799
|
+
});
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
logger.info("content.park.flush.completed", {
|
|
803
|
+
sessionCount: sessions.length,
|
|
804
|
+
parkedCount: parkedTotal,
|
|
805
|
+
...(filterAgent !== undefined ? { agentName: filterAgent } : {}),
|
|
806
|
+
});
|
|
807
|
+
}
|
|
808
|
+
await flushAwaitingContent();
|
|
809
|
+
const nonceDedupStore = new NonceDedupStore(sessionNodeManager.getDb(), logger);
|
|
810
|
+
nonceDedupStore.loadFromDb();
|
|
811
|
+
// Build agent list from this connection's perspective
|
|
812
|
+
function getAgentsForConnection(connectionId) {
|
|
813
|
+
const connState = perConnectionState.get(connectionId);
|
|
814
|
+
const currentAgent = connState?.currentAgent ?? null;
|
|
815
|
+
return agents
|
|
816
|
+
.filter((a) => a.state !== "load_failed")
|
|
817
|
+
.map((a) => {
|
|
818
|
+
let state;
|
|
819
|
+
if (a.name === currentAgent && onlineAgents.has(a.name)) {
|
|
820
|
+
state = "current";
|
|
821
|
+
}
|
|
822
|
+
else if (onlineAgents.has(a.name)) {
|
|
823
|
+
state = "online";
|
|
824
|
+
}
|
|
825
|
+
else {
|
|
826
|
+
state = "registered";
|
|
827
|
+
}
|
|
828
|
+
return { name: a.name, state, pubkey: a.pubkey };
|
|
829
|
+
});
|
|
830
|
+
}
|
|
831
|
+
// M7-SESSION-001 AC-006/AC-007 (and M-1 PULL): build the interrupted_sessions
|
|
832
|
+
// array from SQLite. Shared by both getStatus() (daemon-wide) and the
|
|
833
|
+
// cello_status MCP handler (per-connection) so live MCP clients see the same
|
|
834
|
+
// interrupted sessions a CLI `cello status` would.
|
|
835
|
+
function buildInterruptedSessions() {
|
|
836
|
+
const interruptedRows = sessionNodeManager.getSessionsByStatus("interrupted");
|
|
837
|
+
return interruptedRows.map((row) => ({
|
|
838
|
+
sessionId: row.session_id,
|
|
839
|
+
agentName: row.agent_name,
|
|
840
|
+
counterpartyPubkey: row.counterparty_pubkey,
|
|
841
|
+
messageCount: row.message_count ?? 0,
|
|
842
|
+
interruptedAt: row.interrupted_at ?? new Date(row.updated_at).toISOString(),
|
|
843
|
+
}));
|
|
844
|
+
}
|
|
845
|
+
// Build status response factory
|
|
846
|
+
function getStatus() {
|
|
847
|
+
// M7-SESSION-001 AC-006/AC-007: surface interrupted sessions
|
|
848
|
+
const interrupted_sessions = buildInterruptedSessions();
|
|
849
|
+
return {
|
|
850
|
+
daemon: "running",
|
|
851
|
+
directory_signaling: signalingManager.status,
|
|
852
|
+
agents,
|
|
853
|
+
connections,
|
|
854
|
+
standing_receiver_ready: sessionNodeManager.getStandingReceiverReady(),
|
|
855
|
+
retryQueueDepth: retryQueue.getTotalDepth(),
|
|
856
|
+
interrupted_sessions,
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
// Register IPC handlers
|
|
860
|
+
const handlers = new Map();
|
|
861
|
+
handlers.set("status", async (_params, _connectionId) => {
|
|
862
|
+
return getStatus();
|
|
863
|
+
});
|
|
864
|
+
// ─── MCP-001: ipc.connect handler ───
|
|
865
|
+
// Registers the connection's clientType and returns the connectionId.
|
|
866
|
+
handlers.set("ipc.connect", async (params, connectionId) => {
|
|
867
|
+
const clientType = params?.clientType ?? "cli";
|
|
868
|
+
perConnectionState.set(connectionId, { currentAgent: null, clientType });
|
|
869
|
+
// MCP-002: Register connection with notification dispatcher
|
|
870
|
+
notificationDispatcher.registerConnection(connectionId);
|
|
871
|
+
// Re-log with correct clientType (overrides the default "cli" from handleConnection)
|
|
872
|
+
logger.info("daemon.ipc.connected", { connectionId, clientType });
|
|
873
|
+
return { connectionId };
|
|
874
|
+
});
|
|
875
|
+
// ─── MCP-001: cello_start_agent handler ───
|
|
876
|
+
handlers.set("cello_start_agent", async (params, _connectionId) => {
|
|
877
|
+
const name = params?.name;
|
|
878
|
+
if (!name) {
|
|
879
|
+
return { ok: false, reason: "missing_params", guidance: "Provide 'name' parameter with the agent name to start." };
|
|
880
|
+
}
|
|
881
|
+
const agent = agents.find((a) => a.name === name);
|
|
882
|
+
if (!agent || agent.state === "load_failed") {
|
|
883
|
+
return { ok: false, reason: "agent_not_found", guidance: `Agent '${name}' does not exist. Run 'cello login' to register agents, or check agent names with cello_list_agents.` };
|
|
884
|
+
}
|
|
885
|
+
if (onlineAgents.has(name)) {
|
|
886
|
+
// Idempotent — already online, no event
|
|
887
|
+
return { ok: true };
|
|
888
|
+
}
|
|
889
|
+
onlineAgents.add(name);
|
|
890
|
+
// DOD-LOOP-1: each online agent gets its OWN standing receiver, so two agents on one daemon
|
|
891
|
+
// (loopback) never contend for a single one. Fire-and-forget (initiate/accept also ensure on
|
|
892
|
+
// demand); never let it throw out of the handler. Once the SR is up, re-park any of THIS
|
|
893
|
+
// agent's un-acked awaiting content (the crash backstop — its node was unavailable at the
|
|
894
|
+
// pre-IPC startup flush because no agent was online yet).
|
|
895
|
+
// The standing-receiver ensure + sender re-park; a rejection here is a standing-receiver failure.
|
|
896
|
+
void sessionNodeManager.ensureStandingReceiverForAgent(name)
|
|
897
|
+
.then(() => flushAwaitingContent(name))
|
|
898
|
+
.catch((err) => {
|
|
899
|
+
logger.warn("session.standing_receiver.ensure.failed", { agentName: name, reason: err instanceof Error ? err.message : String(err) });
|
|
900
|
+
})
|
|
901
|
+
// DOD-MSG-4 (auto-recover-on-reconnect): RECEIVER drains its parked mailbox from every relay it
|
|
902
|
+
// has sessions on (symmetric to the sender re-park). Its own stage so a failure is labelled
|
|
903
|
+
// correctly (review #4), not as a standing-receiver error. autoRecoverForAgent catches per-relay
|
|
904
|
+
// errors internally, so this .catch is a backstop only.
|
|
905
|
+
.then(() => autoRecoverForAgent(name))
|
|
906
|
+
.catch((err) => {
|
|
907
|
+
logger.warn("content.recover.auto.failed", { agentName: name, reason: err instanceof Error ? err.message : String(err) });
|
|
908
|
+
});
|
|
909
|
+
logger.info("agent.online", { agentName: name, agentPubkey: agent.pubkey ?? "" });
|
|
910
|
+
// MCP-002: Broadcast agent_state_changed to ALL connections
|
|
911
|
+
notificationDispatcher.dispatchAgentStateChanged(name, "online", "started");
|
|
912
|
+
return { ok: true };
|
|
913
|
+
});
|
|
914
|
+
// ─── MCP-001: cello_stop_agent handler ───
|
|
915
|
+
handlers.set("cello_stop_agent", async (params, _connectionId) => {
|
|
916
|
+
const name = params?.name;
|
|
917
|
+
if (!name) {
|
|
918
|
+
return { ok: false, reason: "missing_params", guidance: "Provide 'name' parameter with the agent name to stop." };
|
|
919
|
+
}
|
|
920
|
+
const agent = agents.find((a) => a.name === name);
|
|
921
|
+
if (!agent || agent.state === "load_failed") {
|
|
922
|
+
return { ok: false, reason: "agent_not_found", guidance: `Agent '${name}' does not exist. Check agent names with cello_list_agents.` };
|
|
923
|
+
}
|
|
924
|
+
if (!onlineAgents.has(name)) {
|
|
925
|
+
// Idempotent — already registered/offline, no event
|
|
926
|
+
return { ok: true };
|
|
927
|
+
}
|
|
928
|
+
onlineAgents.delete(name);
|
|
929
|
+
// DOD-LOOP-1: tear down this agent's standing receiver (fire-and-forget, never throws out).
|
|
930
|
+
void sessionNodeManager.removeStandingReceiverForAgent(name).catch(() => { });
|
|
931
|
+
logger.info("agent.offline", { agentName: name, reason: "stopped" });
|
|
932
|
+
// MCP-002: Broadcast agent_state_changed to ALL connections
|
|
933
|
+
notificationDispatcher.dispatchAgentStateChanged(name, "offline", "stopped");
|
|
934
|
+
// Clear current agent for all connections that had this agent as current
|
|
935
|
+
for (const [connId, state] of perConnectionState) {
|
|
936
|
+
if (state.currentAgent === name) {
|
|
937
|
+
state.currentAgent = null;
|
|
938
|
+
notificationDispatcher.setCurrentAgent(connId, null);
|
|
939
|
+
notificationDispatcher.dispatchAgentCurrentChanged(connId, name, null);
|
|
940
|
+
logger.info("agent.current.switched", { connectionId: connId, fromAgent: name, toAgent: null });
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
return { ok: true };
|
|
944
|
+
});
|
|
945
|
+
// ─── MCP-001: cello_use_agent handler ───
|
|
946
|
+
handlers.set("cello_use_agent", async (params, connectionId) => {
|
|
947
|
+
const name = params?.name;
|
|
948
|
+
if (!name) {
|
|
949
|
+
return { ok: false, reason: "missing_params", guidance: "Provide 'name' parameter with the agent name to use." };
|
|
950
|
+
}
|
|
951
|
+
const agent = agents.find((a) => a.name === name);
|
|
952
|
+
if (!agent || agent.state === "load_failed") {
|
|
953
|
+
return { ok: false, reason: "agent_not_found", guidance: `Agent '${name}' does not exist. Check agent names with cello_list_agents.` };
|
|
954
|
+
}
|
|
955
|
+
if (!onlineAgents.has(name)) {
|
|
956
|
+
return { ok: false, reason: "agent_not_online", guidance: `Agent '${name}' exists but is not online. Call cello_start_agent('${name}') first to bring it online, then retry cello_use_agent.` };
|
|
957
|
+
}
|
|
958
|
+
const connState = perConnectionState.get(connectionId);
|
|
959
|
+
if (!connState) {
|
|
960
|
+
return { ok: false, reason: "connection_not_registered", guidance: "Send ipc.connect frame before calling agent tools." };
|
|
961
|
+
}
|
|
962
|
+
if (connState.currentAgent === name) {
|
|
963
|
+
return { ok: false, reason: "agent_already_current", guidance: `Agent '${name}' is already the current agent for this connection. No action needed — you can proceed with session operations.` };
|
|
964
|
+
}
|
|
965
|
+
const fromAgent = connState.currentAgent;
|
|
966
|
+
connState.currentAgent = name;
|
|
967
|
+
// MCP-002: Update dispatcher's routing table and send notification to this connection only
|
|
968
|
+
notificationDispatcher.setCurrentAgent(connectionId, name);
|
|
969
|
+
notificationDispatcher.dispatchAgentCurrentChanged(connectionId, fromAgent, name);
|
|
970
|
+
logger.info("agent.current.switched", { connectionId, fromAgent, toAgent: name });
|
|
971
|
+
return { ok: true };
|
|
972
|
+
});
|
|
973
|
+
// ─── MCP-001: cello_list_agents handler ───
|
|
974
|
+
handlers.set("cello_list_agents", async (_params, connectionId) => {
|
|
975
|
+
return { agents: getAgentsForConnection(connectionId) };
|
|
976
|
+
});
|
|
977
|
+
// ─── M7-REGISTRATION (Action 2): cello_register handler ───
|
|
978
|
+
// Registers a LOADED agent (one with a K_local `key` under ~/.cello/agents/<name>/)
|
|
979
|
+
// with the directory: ML-DSA keygen → register_request → FROST DKG → register_success,
|
|
980
|
+
// persisting the ML-DSA keypair, FROST share, registration state, and agent→user link.
|
|
981
|
+
// Always invoked with a pre-authorization ticket from the CELLO Operations Agent.
|
|
982
|
+
// Single-flight guard (M1): the directory's registration reply frames
|
|
983
|
+
// (dkg_ready / register_success / register_error) carry NO agent identifier, so
|
|
984
|
+
// two concurrent registrations over the one shared directory signaling stream
|
|
985
|
+
// would each arm a resolver and both receive the same reply — cross-wiring the
|
|
986
|
+
// ceremonies. Serialize registration daemon-wide (it is a rare, once-per-agent,
|
|
987
|
+
// human-initiated operation). This is the registration analogue of the
|
|
988
|
+
// sealInterruptedInProgress guard, but global rather than per-key because the
|
|
989
|
+
// frames are not agent-tagged.
|
|
990
|
+
let registrationInProgress = false;
|
|
991
|
+
const registrationGuidance = (reason) => {
|
|
992
|
+
switch (reason) {
|
|
993
|
+
case "already_registered":
|
|
994
|
+
return "This agent is already registered with the directory. No action needed.";
|
|
995
|
+
case "directory_unreachable":
|
|
996
|
+
return "The directory signaling stream is not connected (or its bootstrap endpoint could not be resolved). Wait for directory_signaling to show connected in cello status, then retry.";
|
|
997
|
+
case "dkg_failed":
|
|
998
|
+
return "The FROST DKG ceremony with the directory failed. This usually means the directory rejected the pre-authorization token or a node was unavailable mid-ceremony. Verify the preAuthToken is valid/unused and retry.";
|
|
999
|
+
case "timeout":
|
|
1000
|
+
return "The directory did not respond within the registration timeout. Retry once directory_signaling is connected.";
|
|
1001
|
+
default:
|
|
1002
|
+
return `Registration failed: ${reason}. Check the daemon logs (registration.* events) and that the preAuthToken is valid.`;
|
|
1003
|
+
}
|
|
1004
|
+
};
|
|
1005
|
+
handlers.set("cello_register", async (params, _connectionId) => {
|
|
1006
|
+
const name = params?.agent;
|
|
1007
|
+
const preAuthToken = params?.preAuthToken;
|
|
1008
|
+
const phoneStub = params?.phoneStub ?? "";
|
|
1009
|
+
if (!name) {
|
|
1010
|
+
return { ok: false, reason: "missing_params", guidance: "Provide 'agent' (the agent name to register) and 'preAuthToken' (the pre-authorization ticket from the CELLO Operations Agent)." };
|
|
1011
|
+
}
|
|
1012
|
+
if (!preAuthToken) {
|
|
1013
|
+
return { ok: false, reason: "missing_preauth_token", guidance: "Registration requires a 'preAuthToken' issued by the CELLO Operations Agent (Telegram). Obtain one, then retry cello_register." };
|
|
1014
|
+
}
|
|
1015
|
+
const keyProvider = keyProviders.get(name);
|
|
1016
|
+
if (!keyProvider) {
|
|
1017
|
+
return { ok: false, reason: "agent_not_found", guidance: `Agent '${name}' has no local K_local key loaded. Its key must exist at ~/.cello/agents/${name}/key before registration — create it and restart the daemon, then retry cello_register.` };
|
|
1018
|
+
}
|
|
1019
|
+
if (!directoryEndpointResolver) {
|
|
1020
|
+
return { ok: false, reason: "directory_unreachable", guidance: "The daemon has no directory endpoint resolver configured, so it cannot reach the directory to register." };
|
|
1021
|
+
}
|
|
1022
|
+
// M1: claim the single-flight slot synchronously (no await between the check
|
|
1023
|
+
// and the set) so two concurrent calls cannot both proceed.
|
|
1024
|
+
if (registrationInProgress) {
|
|
1025
|
+
return { ok: false, reason: "registration_already_in_progress", guidance: "Another agent registration is already in progress on this daemon. Registration runs one at a time because the directory's reply frames are not agent-tagged. Wait for it to finish (check the daemon logs for registration.succeeded/failed), then retry." };
|
|
1026
|
+
}
|
|
1027
|
+
registrationInProgress = true;
|
|
1028
|
+
try {
|
|
1029
|
+
// Resolve the directory endpoint once for this registration (the context's
|
|
1030
|
+
// getDirectoryEndpoint is synchronous; the daemon's resolver is async with a
|
|
1031
|
+
// last-known-good fallback). The endpoint is stable for the duration of one
|
|
1032
|
+
// registration — if it changed mid-flow the DKG streams would break anyway.
|
|
1033
|
+
const ep = await directoryEndpointResolver();
|
|
1034
|
+
if (!ep || !ep.multiaddr) {
|
|
1035
|
+
// FROST DKG must dial the directory's /cello/frost/1.0.0 — a dialable
|
|
1036
|
+
// multiaddr is required (DirectoryEndpoint.multiaddr is optional for the
|
|
1037
|
+
// already-connected signaling case, but registration needs to open streams).
|
|
1038
|
+
return { ok: false, reason: "directory_unreachable", guidance: "Could not resolve a dialable directory bootstrap endpoint (GET /bootstrap). Check CELLO_DIRECTORY_URL and network connectivity, then retry." };
|
|
1039
|
+
}
|
|
1040
|
+
const directoryEndpoint = { peer_id: ep.peerId, multiaddrs: [ep.multiaddr] };
|
|
1041
|
+
// Multi-agent: register over THIS agent's own directory signaling stream (authed
|
|
1042
|
+
// as this agent), so the directory routes its dkg_complete/register_success back
|
|
1043
|
+
// to it. The primary agent reuses the keystone stream; any other agent gets a
|
|
1044
|
+
// dedicated one. The DKG's FROST streams open on this agent's directory node.
|
|
1045
|
+
const agentRecord = loadedAgents.find((a) => a.name === name);
|
|
1046
|
+
const agentPubkeyHex = agentRecord?.pubkey ?? Buffer.from(await keyProvider.getPublicKey()).toString("hex");
|
|
1047
|
+
const { signaling: agentSignaling, getNode: agentGetNode } = getAgentSignaling(name, keyProvider, agentPubkeyHex);
|
|
1048
|
+
// A non-primary agent's stream connects lazily — wait for it before the DKG
|
|
1049
|
+
// (RegistrationManager returns directory_unreachable if signaling isn't connected).
|
|
1050
|
+
const signalingConnected = await waitForSignalingConnected(agentSignaling, 10_000);
|
|
1051
|
+
if (!signalingConnected) {
|
|
1052
|
+
// Distinct cause → distinct code (M7 error discipline): this is specifically the
|
|
1053
|
+
// per-agent signaling stream failing to come up in time, not a missing/unresolvable
|
|
1054
|
+
// directory endpoint. Drop the manager so it doesn't reconnect forever for an
|
|
1055
|
+
// unregistered agent (it is re-created on the next cello_register).
|
|
1056
|
+
await dropAgentSignaling(name);
|
|
1057
|
+
return {
|
|
1058
|
+
ok: false,
|
|
1059
|
+
reason: "directory_signaling_timeout",
|
|
1060
|
+
guidance: `Agent '${name}' could not establish its directory signaling stream within 10s. Check CELLO_DIRECTORY_URL and that the directory is reachable, then retry cello_register.`,
|
|
1061
|
+
};
|
|
1062
|
+
}
|
|
1063
|
+
const persistence = new FileRegistrationPersistence({ agentDir: join(celloDir, "agents", name), logger });
|
|
1064
|
+
const ctx = new DaemonRegistrationContext({
|
|
1065
|
+
signaling: agentSignaling,
|
|
1066
|
+
getDirectoryNode: agentGetNode,
|
|
1067
|
+
getDirectoryEndpoint: () => directoryEndpoint,
|
|
1068
|
+
keyProvider,
|
|
1069
|
+
persistence,
|
|
1070
|
+
logger,
|
|
1071
|
+
});
|
|
1072
|
+
try {
|
|
1073
|
+
const result = await new RegistrationManager(ctx).register(phoneStub, preAuthToken);
|
|
1074
|
+
if ("error" in result) {
|
|
1075
|
+
logger.warn("registration.failed", { agentName: name, reason: result.error });
|
|
1076
|
+
// Terminal failure for THIS agent — drop its dedicated signaling manager so it
|
|
1077
|
+
// does not reconnect forever for an unregistered agent (re-created on retry).
|
|
1078
|
+
await dropAgentSignaling(name);
|
|
1079
|
+
return { ok: false, reason: result.error, guidance: registrationGuidance(result.error) };
|
|
1080
|
+
}
|
|
1081
|
+
// Capture-now-or-lose-it: persist the agent→user link (using it is future
|
|
1082
|
+
// trust-layer work). L1: the agent is already registered at this point —
|
|
1083
|
+
// a link-write failure must NOT be reported as a registration failure.
|
|
1084
|
+
// Surface it as a non-fatal warning so the operator knows the link wasn't
|
|
1085
|
+
// captured (re-registering with the same token re-attempts it).
|
|
1086
|
+
try {
|
|
1087
|
+
await persistence.persistAgentUserLink({ agentId: result.agent_id, preAuthToken, linkedAt: Date.now() });
|
|
1088
|
+
logger.info("registration.succeeded", { agentName: name, agentId: result.agent_id, primaryPubkey: result.primary_pubkey });
|
|
1089
|
+
return { ok: true, agent_id: result.agent_id, primary_pubkey: result.primary_pubkey };
|
|
1090
|
+
}
|
|
1091
|
+
catch (linkErr) {
|
|
1092
|
+
logger.warn("registration.user_link.capture_failed", {
|
|
1093
|
+
agentName: name,
|
|
1094
|
+
agentId: result.agent_id,
|
|
1095
|
+
error: linkErr instanceof Error ? linkErr.message : String(linkErr),
|
|
1096
|
+
});
|
|
1097
|
+
logger.info("registration.succeeded", { agentName: name, agentId: result.agent_id, primaryPubkey: result.primary_pubkey });
|
|
1098
|
+
return {
|
|
1099
|
+
ok: true,
|
|
1100
|
+
agent_id: result.agent_id,
|
|
1101
|
+
primary_pubkey: result.primary_pubkey,
|
|
1102
|
+
warning: "agent_user_link_not_captured",
|
|
1103
|
+
};
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
finally {
|
|
1107
|
+
ctx.dispose();
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
finally {
|
|
1111
|
+
registrationInProgress = false;
|
|
1112
|
+
}
|
|
1113
|
+
});
|
|
1114
|
+
// DOD-LOOP-1: daemon-level seal bookkeeping is keyed by (agentName, sessionId), NOT sessionId
|
|
1115
|
+
// alone — two of the operator's agents can hold both ends of the same session_id on one daemon
|
|
1116
|
+
// (loopback), and each end seals independently. Keying by session_id alone would let A's close
|
|
1117
|
+
// block B's (false seal_interrupted_in_progress) and make their seal waiters collide.
|
|
1118
|
+
const sealKey = (agentName, sessionId) => `${agentName}\x1f${sessionId}`;
|
|
1119
|
+
// M7-SESSION-001: tracks seal-interrupted flows currently in progress.
|
|
1120
|
+
// Prevents duplicate concurrent seal-interrupted attempts for the same (agent, session) (AC-011).
|
|
1121
|
+
const sealInterruptedInProgress = new Set();
|
|
1122
|
+
const pendingSealWaiters = new Map();
|
|
1123
|
+
// M7 DOD-SPINE-7: register the session_sealed completion handler on a signaling seam (keystone
|
|
1124
|
+
// for the primary agent, per-agent for the rest — the directory routes session_sealed to the
|
|
1125
|
+
// session-owning agent's authenticated stream). Function declaration so getAgentSignaling
|
|
1126
|
+
// (defined earlier, called at runtime) can wire it per-agent.
|
|
1127
|
+
function registerSessionSealedListener(signaling, agentName, agentPubkeyHex) {
|
|
1128
|
+
return signaling.registerInboundHandler((frame) => {
|
|
1129
|
+
if (frame["type"] !== "session_sealed")
|
|
1130
|
+
return;
|
|
1131
|
+
const sidHex = frameValueToHex(frame["session_id"]);
|
|
1132
|
+
const rootHex = frameValueToHex(frame["sealed_root"]);
|
|
1133
|
+
if (!sidHex || !rootHex)
|
|
1134
|
+
return;
|
|
1135
|
+
void (async () => {
|
|
1136
|
+
const toU8 = (v) => v instanceof Uint8Array ? v : Buffer.isBuffer(v) ? new Uint8Array(v) : null;
|
|
1137
|
+
// M7 legibility-TBS-binding: when THIS party is the seal's signer (the initiator, whose
|
|
1138
|
+
// group key produced the FROST signature), verify the signature over the legibility-bound
|
|
1139
|
+
// TBS. A tampered legibility (answered / content_frontier_seq / attestation_mode, carried
|
|
1140
|
+
// unsigned on the frame) changes the hash → the signature fails → the seal is REJECTED. The
|
|
1141
|
+
// non-initiator does not hold the signer's key, so it accepts (verified:false): the frame
|
|
1142
|
+
// arrived over the authenticated Noise channel, and the binding lets any out-of-band holder
|
|
1143
|
+
// of the initiator's primary verify an exported cert.
|
|
1144
|
+
if (frame["signature_type"] === "frost") {
|
|
1145
|
+
const sessionIdBytes = toU8(frame["session_id"]);
|
|
1146
|
+
const sealedRootBytes = toU8(frame["sealed_root"]);
|
|
1147
|
+
const frostSig = toU8(frame["frost_signature"]);
|
|
1148
|
+
const signerPubkey = toU8(frame["signer_pubkey"]);
|
|
1149
|
+
const leafCount = typeof frame["leaf_count"] === "number" ? frame["leaf_count"] : null;
|
|
1150
|
+
const ctRaw = frame["close_timestamp"];
|
|
1151
|
+
const closeTs = typeof ctRaw === "number" ? ctRaw : typeof ctRaw === "bigint" ? Number(ctRaw) : null;
|
|
1152
|
+
if (!sessionIdBytes || !sealedRootBytes || !frostSig || !signerPubkey || leafCount === null || closeTs === null) {
|
|
1153
|
+
logger.error("session.sealed.signature.invalid", { sessionId: sidHex, reason: "missing_certificate_fields" });
|
|
1154
|
+
return;
|
|
1155
|
+
}
|
|
1156
|
+
const record = sessionNodeManager.getSessionRecord(agentName, sidHex);
|
|
1157
|
+
const verdict = await verifyBilateralSealCertificate({ agentDir: join(celloDir, "agents", agentName), agentPubkeyHex, logger, counterpartyPrimaryHex: record?.counterparty_primary_pubkey ?? null }, {
|
|
1158
|
+
sessionId: sessionIdBytes,
|
|
1159
|
+
sealedRoot: sealedRootBytes,
|
|
1160
|
+
leafCount,
|
|
1161
|
+
closeTimestamp: closeTs,
|
|
1162
|
+
frostSignature: frostSig,
|
|
1163
|
+
signerPubkey,
|
|
1164
|
+
signatureType: "frost",
|
|
1165
|
+
legibility: frame["legibility"] && typeof frame["legibility"] === "object"
|
|
1166
|
+
? frame["legibility"]
|
|
1167
|
+
: null,
|
|
1168
|
+
});
|
|
1169
|
+
if (!verdict.ok) {
|
|
1170
|
+
// tamper-evidence: do NOT mark sealed, do NOT resolve the waiter as success.
|
|
1171
|
+
logger.error("session.sealed.signature.invalid", { sessionId: sidHex, reason: verdict.reason });
|
|
1172
|
+
return;
|
|
1173
|
+
}
|
|
1174
|
+
logger.info("session.sealed.signature.checked", { sessionId: sidHex, verified: verdict.verified });
|
|
1175
|
+
}
|
|
1176
|
+
// M7-SESSION-004 (AC-005): normalise the wire legibility (Uint8Array pubkeys → hex) into a
|
|
1177
|
+
// JSON-safe certificate and persist it with the sealed record so it survives a restart and
|
|
1178
|
+
// is readable via cello_get_sealed_receipt — receipt-not-assent, per-party frontiers,
|
|
1179
|
+
// attestation modes, and final_message.answered.
|
|
1180
|
+
const legibility = normalizeLegibility(frame["legibility"]);
|
|
1181
|
+
logger.info("session.sealed.received", {
|
|
1182
|
+
sessionId: sidHex,
|
|
1183
|
+
sealedRoot: rootHex,
|
|
1184
|
+
hasLegibility: legibility !== undefined,
|
|
1185
|
+
finalMessageAnswered: legibility && typeof legibility === "object" && "final_message" in legibility
|
|
1186
|
+
? legibility.final_message?.answered
|
|
1187
|
+
: undefined,
|
|
1188
|
+
});
|
|
1189
|
+
// DOD-LEG-2 (SI-002): independently re-derive each party's content_frontier_seq from the
|
|
1190
|
+
// signed leaves the directory shipped, and REJECT the certificate if any published frontier
|
|
1191
|
+
// is inflated beyond what the signed leaves support. The client does NOT trust the directory
|
|
1192
|
+
// for the frontier VALUE — only for transporting signed bytes it re-checks itself. When no
|
|
1193
|
+
// frontier_leaves are present (a pre-LEG-2 directory), the guard is skipped (backward-compat).
|
|
1194
|
+
const frontierLeavesRaw = frame["frontier_leaves"];
|
|
1195
|
+
if (legibility !== undefined) {
|
|
1196
|
+
const rawParticipants = legibility.participants ?? [];
|
|
1197
|
+
// Any party claiming to have received content (frontier > 0) MUST be backed by signed leaves.
|
|
1198
|
+
const anyClaimedFrontier = rawParticipants.some((p) => typeof p.content_frontier_seq === "number" && p.content_frontier_seq > 0);
|
|
1199
|
+
const haveLeaves = Array.isArray(frontierLeavesRaw) && frontierLeavesRaw.length > 0;
|
|
1200
|
+
// HIGH (fail-closed): a malicious directory must not bypass the guard by OMITTING the leaves
|
|
1201
|
+
// while still publishing a frontier. No leaves + a claimed frontier → reject.
|
|
1202
|
+
if (anyClaimedFrontier && !haveLeaves) {
|
|
1203
|
+
logger.error("seal.certificate.frontier.unverifiable", { sessionId: sidHex, reason: "frontier_leaves_missing" });
|
|
1204
|
+
return;
|
|
1205
|
+
}
|
|
1206
|
+
// LOW (robustness): a malformed/malicious legibility (null pubkey or non-numeric frontier)
|
|
1207
|
+
// must be rejected, never crash the guard.
|
|
1208
|
+
for (const p of rawParticipants) {
|
|
1209
|
+
if (typeof p.pubkey !== "string" || typeof p.content_frontier_seq !== "number") {
|
|
1210
|
+
logger.error("seal.certificate.frontier.unverifiable", { sessionId: sidHex, reason: "participant_malformed" });
|
|
1211
|
+
return;
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
const participants = rawParticipants;
|
|
1215
|
+
if (haveLeaves) {
|
|
1216
|
+
const toU8 = (v) => (v instanceof Uint8Array ? v : new Uint8Array(v));
|
|
1217
|
+
const leaves = frontierLeavesRaw.map((l) => {
|
|
1218
|
+
const o = l;
|
|
1219
|
+
return {
|
|
1220
|
+
structure1_cbor: toU8(o["structure1_cbor"]),
|
|
1221
|
+
sender_pubkey: toU8(o["sender_pubkey"]),
|
|
1222
|
+
sender_signature: toU8(o["sender_signature"]),
|
|
1223
|
+
};
|
|
1224
|
+
});
|
|
1225
|
+
// Session-bound re-derivation (BLOCKING fix): leaves must be from THIS session, so a
|
|
1226
|
+
// malicious directory cannot replay a party's leaves from another session to inflate.
|
|
1227
|
+
const rederived = reDeriveFrontiers(leaves, toU8(frame["session_id"]));
|
|
1228
|
+
if (!rederived.ok) {
|
|
1229
|
+
logger.error("seal.certificate.frontier.unverifiable", { sessionId: sidHex, reason: rederived.reason });
|
|
1230
|
+
return;
|
|
1231
|
+
}
|
|
1232
|
+
const inflated = findInflatedFrontier(participants, rederived.frontiers);
|
|
1233
|
+
if (inflated) {
|
|
1234
|
+
// The directory published a frontier higher than the signed leaves support — refuse the
|
|
1235
|
+
// seal (do NOT persist, do NOT resolve the close as success), exactly like a bad signature.
|
|
1236
|
+
logger.error("seal.certificate.frontier.unverifiable", {
|
|
1237
|
+
sessionId: sidHex,
|
|
1238
|
+
party: inflated.party,
|
|
1239
|
+
publishedFrontier: inflated.publishedFrontier,
|
|
1240
|
+
derivedFrontier: inflated.derivedFrontier,
|
|
1241
|
+
});
|
|
1242
|
+
return;
|
|
1243
|
+
}
|
|
1244
|
+
logger.info("seal.certificate.frontier.verified", {
|
|
1245
|
+
sessionId: sidHex,
|
|
1246
|
+
parties: participants.length,
|
|
1247
|
+
});
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
if (legibility !== undefined) {
|
|
1251
|
+
try {
|
|
1252
|
+
sessionNodeManager.recordSealCertificate(agentName, sidHex, rootHex, JSON.stringify(legibility));
|
|
1253
|
+
}
|
|
1254
|
+
catch (error) {
|
|
1255
|
+
logger.warn("seal.certificate.persist.failed", {
|
|
1256
|
+
sessionId: sidHex,
|
|
1257
|
+
reason: error instanceof Error ? error.message : String(error),
|
|
1258
|
+
});
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
const waiter = pendingSealWaiters.get(sealKey(agentName, sidHex));
|
|
1262
|
+
if (waiter) {
|
|
1263
|
+
pendingSealWaiters.delete(sealKey(agentName, sidHex));
|
|
1264
|
+
waiter({ rootHex, legibility });
|
|
1265
|
+
}
|
|
1266
|
+
// Mark the session sealed + tear the node down (idempotent — safe if already gone).
|
|
1267
|
+
void sessionNodeManager.destroySessionNode(agentName, sidHex, "sealed");
|
|
1268
|
+
})();
|
|
1269
|
+
});
|
|
1270
|
+
}
|
|
1271
|
+
const pendingUnilateralWaiters = new Map();
|
|
1272
|
+
// SESSION-002: per-agent listener for the unilateral certificate. Verifies the FROST
|
|
1273
|
+
// signature over the rebuilt TBS against the agent's own primary_pubkey BEFORE resolving
|
|
1274
|
+
// the close as sealed (SI-003: a channel-swapped sealed_root fails the signature check).
|
|
1275
|
+
function registerUnilateralConfirmedListener(signaling, agentName, agentPubkeyHex) {
|
|
1276
|
+
return signaling.registerInboundHandler((frame) => {
|
|
1277
|
+
const ftype = frame["type"];
|
|
1278
|
+
if (ftype !== "seal_unilateral_confirmed" && ftype !== "seal_unilateral_too_early")
|
|
1279
|
+
return;
|
|
1280
|
+
const sidHex = frameValueToHex(frame["session_id"]);
|
|
1281
|
+
if (!sidHex)
|
|
1282
|
+
return;
|
|
1283
|
+
const waiter = pendingUnilateralWaiters.get(sidHex);
|
|
1284
|
+
if (!waiter)
|
|
1285
|
+
return;
|
|
1286
|
+
if (ftype === "seal_unilateral_too_early") {
|
|
1287
|
+
pendingUnilateralWaiters.delete(sidHex);
|
|
1288
|
+
waiter({ ok: false, reason: "seal_unilateral_too_early" });
|
|
1289
|
+
return;
|
|
1290
|
+
}
|
|
1291
|
+
void (async () => {
|
|
1292
|
+
const toU8 = (v) => v instanceof Uint8Array ? v : Buffer.isBuffer(v) ? new Uint8Array(v) : null;
|
|
1293
|
+
const sessionId = toU8(frame["session_id"]);
|
|
1294
|
+
const sealedRoot = toU8(frame["sealed_root"]);
|
|
1295
|
+
const frostSig = toU8(frame["frost_signature"]);
|
|
1296
|
+
const leafCount = typeof frame["leaf_count"] === "number" ? frame["leaf_count"] : null;
|
|
1297
|
+
const tsRaw = frame["close_timestamp"];
|
|
1298
|
+
const closeTs = typeof tsRaw === "number" ? tsRaw : typeof tsRaw === "bigint" ? Number(tsRaw) : null;
|
|
1299
|
+
const sigType = frame["signature_type"];
|
|
1300
|
+
if (!sessionId || !sealedRoot || !frostSig || leafCount === null || closeTs === null ||
|
|
1301
|
+
(sigType !== "frost" && sigType !== "single")) {
|
|
1302
|
+
logger.warn("session.unilateral.certificate.invalid", { sessionId: sidHex, reason: "malformed_certificate" });
|
|
1303
|
+
pendingUnilateralWaiters.delete(sidHex);
|
|
1304
|
+
waiter({ ok: false, reason: "malformed_certificate" });
|
|
1305
|
+
return;
|
|
1306
|
+
}
|
|
1307
|
+
const result = await verifyUnilateralCertificate({ agentDir: join(celloDir, "agents", agentName), agentPubkeyHex, logger }, { sessionId, sealedRoot, leafCount, closeTimestamp: closeTs, frostSignature: frostSig, signatureType: sigType });
|
|
1308
|
+
pendingUnilateralWaiters.delete(sidHex);
|
|
1309
|
+
if (!result.ok) {
|
|
1310
|
+
// SI-003: do NOT mark sealed when the certificate signature does not verify.
|
|
1311
|
+
logger.warn("session.unilateral.certificate.invalid", { sessionId: sidHex, reason: result.reason, signatureType: sigType });
|
|
1312
|
+
waiter({ ok: false, reason: `certificate_invalid:${result.reason}` });
|
|
1313
|
+
return;
|
|
1314
|
+
}
|
|
1315
|
+
logger.info("session.unilateral.certificate.verified", { sessionId: sidHex, signatureType: sigType, party: "present" });
|
|
1316
|
+
void sessionNodeManager.destroySessionNode(agentName, sidHex, "sealed");
|
|
1317
|
+
waiter({ ok: true, sealedRootHex: Buffer.from(sealedRoot).toString("hex") });
|
|
1318
|
+
})();
|
|
1319
|
+
});
|
|
1320
|
+
}
|
|
1321
|
+
// ─── DOD-UP-1: returning-absent-party seal upgrade (unilateral → bilateral) ───
|
|
1322
|
+
// Per-session idempotency guard so a notification burst (reconnect re-delivery) cannot launch
|
|
1323
|
+
// concurrent upgrade attempts. Keyed `${agentName}:${sessionIdHex}`; cleared after each attempt.
|
|
1324
|
+
const sealUpgradeInFlight = new Set();
|
|
1325
|
+
/**
|
|
1326
|
+
* DOD-UP-1: B (the absent party) ratifies a unilateral seal it learns about on reconnect.
|
|
1327
|
+
*
|
|
1328
|
+
* THE KERNEL: B signs the ratification ONLY after it has recovered + integrity-verified the
|
|
1329
|
+
* content behind the sealed root. We (0) verify the unilateral cert so R1 is provably authentic
|
|
1330
|
+
* (SI-003 — a channel-swapped root fails); (1) recover any parked content from the relay; (2) gate
|
|
1331
|
+
* on getSealUpgradeReadiness — refuse content_unrecoverable (session unknown) or content_tamper
|
|
1332
|
+
* (cross-check mismatch, AC-003); only then (3) sign the ack over R1 with B's OWN K_local and send
|
|
1333
|
+
* seal_upgrade_request. B never co-signs content it could not verify.
|
|
1334
|
+
*/
|
|
1335
|
+
// Thin wrapper over the extracted, unit-tested seal-upgrade.ts module (the KERNEL + AC-008 + H1/M1
|
|
1336
|
+
// hardening live there so the refusal/reject bodies run under adversarial tests). This wrapper owns
|
|
1337
|
+
// only the per-session in-flight guard and the real-dep injection.
|
|
1338
|
+
async function attemptSealUpgrade(signaling, agentName, agentPubkeyHex, sidHex, frame) {
|
|
1339
|
+
const key = `${agentName}:${sidHex}`;
|
|
1340
|
+
if (sealUpgradeInFlight.has(key))
|
|
1341
|
+
return;
|
|
1342
|
+
sealUpgradeInFlight.add(key);
|
|
1343
|
+
try {
|
|
1344
|
+
await attemptSealUpgradeImpl({
|
|
1345
|
+
logger, agentName, agentPubkeyHex,
|
|
1346
|
+
getReadiness: (a, s) => sessionNodeManager.getSealUpgradeReadiness(a, s),
|
|
1347
|
+
getContentLeafCount: (a, s) => sessionNodeManager.getSessionTree(a, s).size(),
|
|
1348
|
+
recoverContent: (a) => autoRecoverForAgent(a),
|
|
1349
|
+
getKeyProvider: (a) => keyProviders.get(a),
|
|
1350
|
+
sendRaw: (f) => signaling.sendRaw(f),
|
|
1351
|
+
}, sidHex, frame);
|
|
1352
|
+
}
|
|
1353
|
+
finally {
|
|
1354
|
+
// Clear the guard so a later reconnect can retry if the request never reached the directory;
|
|
1355
|
+
// the directory dedups a repeat with already_bilateral.
|
|
1356
|
+
sealUpgradeInFlight.delete(key);
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
// Thin wrapper: verify the dual-attestation cert (module, AC-008 + H1), then APPLY — mark bilateral
|
|
1360
|
+
// ONLY on ok. Never trust the directory's "bilateral" claim.
|
|
1361
|
+
async function verifyAndApplyUpgradeConfirmed(agentName, agentPubkeyHex, sidHex, frame) {
|
|
1362
|
+
const result = await verifyUpgradeConfirmedCert({
|
|
1363
|
+
logger, agentName, agentPubkeyHex, celloDir,
|
|
1364
|
+
getCounterpartyHex: (a, s) => sessionNodeManager.getSessionRecord(a, s)?.counterparty_pubkey ?? null,
|
|
1365
|
+
}, sidHex, frame);
|
|
1366
|
+
if (!result.ok)
|
|
1367
|
+
return; // cert.invalid already logged inside; do NOT accept as bilateral.
|
|
1368
|
+
logger.info("session.seal.upgraded", { sessionId: sidHex, agentName, party: result.party });
|
|
1369
|
+
void sessionNodeManager.destroySessionNode(agentName, sidHex, "sealed");
|
|
1370
|
+
}
|
|
1371
|
+
/**
|
|
1372
|
+
* DOD-UP-1: per-agent listener for the absent-party seal upgrade. On reconnect the directory
|
|
1373
|
+
* delivers a queued seal_unilateral_notification to B (the absent party) — that triggers the
|
|
1374
|
+
* ratification attempt. The directory's seal_upgrade_confirmed / seal_upgrade_rejected responses
|
|
1375
|
+
* are observed here too (B marks the session bilaterally sealed / logs the refusal).
|
|
1376
|
+
*/
|
|
1377
|
+
function registerUnilateralUpgradeListener(signaling, agentName, agentPubkeyHex) {
|
|
1378
|
+
return signaling.registerInboundHandler((frame) => {
|
|
1379
|
+
const ftype = frame["type"];
|
|
1380
|
+
if (ftype === "seal_upgrade_confirmed") {
|
|
1381
|
+
const sidHex = frameValueToHex(frame["session_id"]);
|
|
1382
|
+
if (!sidHex)
|
|
1383
|
+
return;
|
|
1384
|
+
// AC-008: do NOT accept "bilateral" on the directory's word — verify the dual attestation.
|
|
1385
|
+
void verifyAndApplyUpgradeConfirmed(agentName, agentPubkeyHex, sidHex, frame);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
if (ftype === "seal_upgrade_rejected") {
|
|
1389
|
+
const sidHex = frameValueToHex(frame["session_id"]);
|
|
1390
|
+
if (!sidHex)
|
|
1391
|
+
return;
|
|
1392
|
+
logger.warn("session.seal.upgrade.rejected", { sessionId: sidHex, agentName, reason: frame["reason"] });
|
|
1393
|
+
return;
|
|
1394
|
+
}
|
|
1395
|
+
if (ftype !== "seal_unilateral_notification")
|
|
1396
|
+
return;
|
|
1397
|
+
const sidHex = frameValueToHex(frame["session_id"]);
|
|
1398
|
+
if (!sidHex)
|
|
1399
|
+
return;
|
|
1400
|
+
// Only the ABSENT party receives this frame — B reacts by attempting the ratification.
|
|
1401
|
+
void attemptSealUpgrade(signaling, agentName, agentPubkeyHex, sidHex, frame);
|
|
1402
|
+
});
|
|
1403
|
+
}
|
|
1404
|
+
// ─── MCP-001: cello_status (per-connection perspective) ───
|
|
1405
|
+
handlers.set("cello_status", async (_params, connectionId) => {
|
|
1406
|
+
return {
|
|
1407
|
+
daemon: "running",
|
|
1408
|
+
directory_signaling: signalingManager.status,
|
|
1409
|
+
agents: getAgentsForConnection(connectionId),
|
|
1410
|
+
connections,
|
|
1411
|
+
// M-1 PULL: live MCP clients must see interrupted sessions too, exactly as
|
|
1412
|
+
// the daemon-wide getStatus() surfaces them.
|
|
1413
|
+
interrupted_sessions: buildInterruptedSessions(),
|
|
1414
|
+
};
|
|
1415
|
+
});
|
|
1416
|
+
// ─── MCP-001: no_current_agent guard for session tools ───
|
|
1417
|
+
// cello_send / cello_receive are NOT in this stub list — DAEMON-004 registers
|
|
1418
|
+
// real handlers for them below (each enforces the no_current_agent guard inline).
|
|
1419
|
+
// NOTE: cello_await_session is NOT in this stub list — Seam 2 registers a real
|
|
1420
|
+
// handler for it below (inbound session establishment), with its own inline
|
|
1421
|
+
// no_current_agent guard.
|
|
1422
|
+
const SESSION_TOOLS_REQUIRING_AGENT = [
|
|
1423
|
+
"cello_receive_session",
|
|
1424
|
+
"cello_list_sessions",
|
|
1425
|
+
];
|
|
1426
|
+
const NO_CURRENT_AGENT_RESPONSE = {
|
|
1427
|
+
ok: false,
|
|
1428
|
+
reason: "no_current_agent",
|
|
1429
|
+
guidance: "No current agent is set for this connection. Call cello_start_agent to bring an agent online, then call cello_use_agent to set it as the current agent for this connection.",
|
|
1430
|
+
};
|
|
1431
|
+
for (const tool of SESSION_TOOLS_REQUIRING_AGENT) {
|
|
1432
|
+
handlers.set(tool, async (_params, connectionId) => {
|
|
1433
|
+
const connState = perConnectionState.get(connectionId);
|
|
1434
|
+
if (!connState || !connState.currentAgent) {
|
|
1435
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
1436
|
+
}
|
|
1437
|
+
// Stub: actual session tool routing will be implemented in DAEMON-002/SIGNAL-001
|
|
1438
|
+
return { ok: false, reason: "not_implemented", guidance: `Session tool '${tool}' routing is not yet implemented in the daemon. This will be available after the session node manager is wired to the IPC layer.` };
|
|
1439
|
+
});
|
|
1440
|
+
}
|
|
1441
|
+
// ─── CELLO-M7-TRANSPORT-001: cello_initiate_session ─────────────────────────
|
|
1442
|
+
// Direct-P2P-by-default transport selection (AC-005/AC-006/AC-008/AC-010c).
|
|
1443
|
+
// Flow:
|
|
1444
|
+
// 1. Require a current agent.
|
|
1445
|
+
// 2. Mint a correlationId for the whole session-establishment flow.
|
|
1446
|
+
// 3. Read the standing receiver's AutoNAT dialability → choose the advertised
|
|
1447
|
+
// address (direct when dialable, relay circuit otherwise — AC-004/AC-019).
|
|
1448
|
+
// 4. Negotiate the FROST-signed SessionAssignment via the directory
|
|
1449
|
+
// (sessionNegotiator — WIRE-001/SIGNAL-001). When no negotiator is wired,
|
|
1450
|
+
// return directory_signaling_not_configured (graceful — the transport
|
|
1451
|
+
// adapters ARE wired; this proves it does not crash with "adapter not wired").
|
|
1452
|
+
// 5. Drive the transport selector to dial the counterparty using the
|
|
1453
|
+
// assignment's authoritative transport_mode (SI-001). Map the TransportResult
|
|
1454
|
+
// to the MCP response.
|
|
1455
|
+
handlers.set("cello_initiate_session", async (params, connectionId) => {
|
|
1456
|
+
const connState = perConnectionState.get(connectionId);
|
|
1457
|
+
if (!connState || !connState.currentAgent) {
|
|
1458
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
1459
|
+
}
|
|
1460
|
+
const agentName = connState.currentAgent;
|
|
1461
|
+
const correlationId = randomUUID();
|
|
1462
|
+
// AC-004/AC-019: the advertised address is chosen from the standing receiver's
|
|
1463
|
+
// current dialability. Not dialable (or AutoNAT unavailable) → relay circuit.
|
|
1464
|
+
const dialability = autoNatService.getDialability();
|
|
1465
|
+
const relayCircuitAddr = getRelayCircuitAddress ? getRelayCircuitAddress() : "";
|
|
1466
|
+
const advertisedAddress = selectAdvertisedAddress(dialability, relayCircuitAddr);
|
|
1467
|
+
// resolvedSessionNegotiator is always defined (the daemon builds a real internal
|
|
1468
|
+
// negotiator when none is injected), so directory_signaling_not_configured no longer
|
|
1469
|
+
// fires on the live binary — session_request is actually negotiated with the directory.
|
|
1470
|
+
const negotiation = await resolvedSessionNegotiator.negotiate({
|
|
1471
|
+
agentName,
|
|
1472
|
+
correlationId,
|
|
1473
|
+
advertisedAddress,
|
|
1474
|
+
params: params ?? {},
|
|
1475
|
+
});
|
|
1476
|
+
if (!negotiation.ok) {
|
|
1477
|
+
return { ok: false, reason: negotiation.reason, guidance: negotiation.guidance };
|
|
1478
|
+
}
|
|
1479
|
+
// SI-001: the selector consumes the assignment's signed transport_mode as the
|
|
1480
|
+
// sole dial authority — never inferred from address format.
|
|
1481
|
+
const assignment = negotiation.assignment;
|
|
1482
|
+
const result = await transportSelector.dial(assignment, { correlationId });
|
|
1483
|
+
const sessionId = Buffer.from(assignment.session_id).toString("hex");
|
|
1484
|
+
if (!result.ok) {
|
|
1485
|
+
// Terminal: both direct and relay failed (AC-008). Pass the error through.
|
|
1486
|
+
return { ok: false, reason: result.reason, guidance: result.guidance };
|
|
1487
|
+
}
|
|
1488
|
+
// SEAM (initiate → DAEMON-004 session-core): transport is now established, but the
|
|
1489
|
+
// session does not yet exist in the daemon's session-core. Without this, initiate
|
|
1490
|
+
// would set up a connection no session can use and a subsequent cello_send would
|
|
1491
|
+
// report session_not_found. Create the DAEMON-004 session node + DB row, bound (via
|
|
1492
|
+
// its connection gater) to the counterparty's negotiated session peer id, so the
|
|
1493
|
+
// session is queryable and usable (cello_send / cello_receive / cello_close_session).
|
|
1494
|
+
//
|
|
1495
|
+
// NOTE (seam 1b, next): the session node N_A created here does NOT yet share the
|
|
1496
|
+
// connection that transportSelector.dial established on the separate transportDialer
|
|
1497
|
+
// node — so its content newStream cannot ride that link until the dial is routed
|
|
1498
|
+
// THROUGH N_A. Tracked as the dialer/session-node reconciliation; this seam only
|
|
1499
|
+
// establishes that initiate creates the session-core session.
|
|
1500
|
+
// The initiator's session row must record WHO this session is with, so an interrupted
|
|
1501
|
+
// initiator session surfaces its counterparty at next login (DOD-INT-1). The public
|
|
1502
|
+
// tool param is `target_pubkey` (the counterparty's K_local) — the same field the
|
|
1503
|
+
// negotiator reads above; `counterparty_pubkey` is the legacy fallback. Reading only
|
|
1504
|
+
// the legacy field stored an EMPTY counterparty on every initiator session.
|
|
1505
|
+
const counterpartyPubkey = typeof params?.target_pubkey === "string"
|
|
1506
|
+
? params.target_pubkey
|
|
1507
|
+
: typeof params?.counterparty_pubkey === "string"
|
|
1508
|
+
? params.counterparty_pubkey
|
|
1509
|
+
: "";
|
|
1510
|
+
const counterpartyPeerId = assignment.counterparty_session_peer_id ?? "";
|
|
1511
|
+
// M7 DOD-SPINE-6 / MSG-001-3b: relay witness params from the FROST-signed assignment
|
|
1512
|
+
// + this agent's K_local. N_A connects to the relay and submits message-leaf hashes.
|
|
1513
|
+
const relayParams = await buildRelayConnectParams(agentName, assignment);
|
|
1514
|
+
const created = await sessionNodeManager.createSessionNode(sessionId, agentName, counterpartyPubkey, counterpartyPeerId, correlationId,
|
|
1515
|
+
// Reuse the standing receiver as N_A so its peer id matches the session endpoint the
|
|
1516
|
+
// negotiator advertised — the counterparty's gater admits the dial (WIRE-002).
|
|
1517
|
+
true, relayParams);
|
|
1518
|
+
if (!created.ok) {
|
|
1519
|
+
return { ok: false, reason: created.reason, guidance: created.guidance };
|
|
1520
|
+
}
|
|
1521
|
+
// SEAM 1b: the session node N_A must hold the connection its content stream rides — so
|
|
1522
|
+
// dial the counterparty THROUGH N_A. The counterparty's advertised SESSION addresses are
|
|
1523
|
+
// the source of truth for dialability (a NATed node advertises a relay-circuit address; a
|
|
1524
|
+
// directly-reachable one — localhost or a public addr — advertises a direct multiaddr), so
|
|
1525
|
+
// attempt the dial whenever the assignment carries counterparty session addrs, regardless
|
|
1526
|
+
// of the transport_mode LABEL (the local selector stub labels everything "relay" even when
|
|
1527
|
+
// the addrs are directly dialable). A failure is NOT fatal: per the dead-channel contract,
|
|
1528
|
+
// the session stays active and a later cello_send queues the content in the durable retry
|
|
1529
|
+
// queue until a route exists (the relay-park path is MSG-001-3b).
|
|
1530
|
+
const counterpartyAddrs = assignment.counterparty_session_addrs ?? [];
|
|
1531
|
+
if (counterpartyAddrs.length > 0) {
|
|
1532
|
+
const connected = await sessionNodeManager.connectToCounterparty(agentName, sessionId, counterpartyAddrs);
|
|
1533
|
+
if (!connected.ok) {
|
|
1534
|
+
logger.warn("session.initiate.connect.failed", {
|
|
1535
|
+
sessionId,
|
|
1536
|
+
reason: connected.reason,
|
|
1537
|
+
error: connected.error,
|
|
1538
|
+
transportMode: assignment.transport_mode,
|
|
1539
|
+
correlationId,
|
|
1540
|
+
});
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1543
|
+
// AC-007: the session is usable immediately upon (relay) connection — the dcutr
|
|
1544
|
+
// upgrade runs in the background and is intentionally NOT awaited here.
|
|
1545
|
+
return { ok: true, sessionId, transportMode: result.mode, correlationId };
|
|
1546
|
+
});
|
|
1547
|
+
// ─── M7-SESSION-001: cello_close_session ────────────────────────────────────
|
|
1548
|
+
// M7 error discipline: each distinct failure cause produces a distinct error code.
|
|
1549
|
+
// AC-010: session_already_sealed
|
|
1550
|
+
// AC-011: seal_interrupted_in_progress
|
|
1551
|
+
// AC-012: seal_interrupted_counterparty_unavailable
|
|
1552
|
+
// AC-013: seal_interrupted_rejected_by_counterparty
|
|
1553
|
+
// DB-001: signaling_reconnecting
|
|
1554
|
+
// SI-001: no auto-seal on session_interrupted receipt; operator must call explicitly
|
|
1555
|
+
handlers.set("cello_close_session", async (params, connectionId) => {
|
|
1556
|
+
const connState = perConnectionState.get(connectionId);
|
|
1557
|
+
if (!connState || !connState.currentAgent) {
|
|
1558
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
1559
|
+
}
|
|
1560
|
+
// round-2 BLOCKING: the public IPC contract field is snake_case `session_id`
|
|
1561
|
+
// (this is what cello-mcp.ts forwards verbatim through IpcProxy, matching the
|
|
1562
|
+
// rest of the public MCP tool surface — target_pubkey, content_hash, timeout_ms).
|
|
1563
|
+
// Reading camelCase `sessionId` here meant every real proxy invocation produced
|
|
1564
|
+
// undefined → missing_params. Consume the field the producer actually sends.
|
|
1565
|
+
const sessionId = params?.session_id;
|
|
1566
|
+
if (!sessionId) {
|
|
1567
|
+
return {
|
|
1568
|
+
ok: false,
|
|
1569
|
+
reason: "missing_params",
|
|
1570
|
+
guidance: "Provide 'session_id' parameter with the hex session ID to close.",
|
|
1571
|
+
};
|
|
1572
|
+
}
|
|
1573
|
+
// DOD-LOOP-1: scope the lookup to the current agent — the composite (agent, session_id) key IS
|
|
1574
|
+
// the ownership scope. A session_id owned only by a DIFFERENT agent does not exist in this
|
|
1575
|
+
// agent's namespace (returns null → session_not_found), which is correct for loopback (two
|
|
1576
|
+
// agents can hold the same session_id on one daemon).
|
|
1577
|
+
const record = sessionNodeManager.getSessionRecord(connState.currentAgent, sessionId);
|
|
1578
|
+
if (!record) {
|
|
1579
|
+
return {
|
|
1580
|
+
ok: false,
|
|
1581
|
+
reason: "session_not_found",
|
|
1582
|
+
guidance: "No session found with this ID. Check cello_list_sessions for active and interrupted sessions.",
|
|
1583
|
+
};
|
|
1584
|
+
}
|
|
1585
|
+
// Ownership: redundant now that the lookup is agent-scoped (record.agent_name === currentAgent),
|
|
1586
|
+
// kept as a defensive invariant.
|
|
1587
|
+
if (record.agent_name !== connState.currentAgent) {
|
|
1588
|
+
return {
|
|
1589
|
+
ok: false,
|
|
1590
|
+
reason: "session_not_owned",
|
|
1591
|
+
guidance: "This session belongs to a different agent. Call cello_use_agent to switch to the agent that owns it (see cello_list_sessions), then retry.",
|
|
1592
|
+
};
|
|
1593
|
+
}
|
|
1594
|
+
// AC-010: already sealed
|
|
1595
|
+
if (record.status === "sealed") {
|
|
1596
|
+
return {
|
|
1597
|
+
ok: false,
|
|
1598
|
+
reason: "session_already_sealed",
|
|
1599
|
+
guidance: "This session is already sealed. No further action is needed — check cello_list_sessions to view its sealed record and the FROST notarization.",
|
|
1600
|
+
};
|
|
1601
|
+
}
|
|
1602
|
+
// AC-011: seal-interrupted already in progress
|
|
1603
|
+
if (sealInterruptedInProgress.has(sealKey(record.agent_name, sessionId))) {
|
|
1604
|
+
return {
|
|
1605
|
+
ok: false,
|
|
1606
|
+
reason: "seal_interrupted_in_progress",
|
|
1607
|
+
guidance: "A seal-interrupted attempt is already in progress for this session. Wait for session.interrupted.sealed to appear in the daemon logs before retrying. Do not call cello_close_session again until the current attempt completes or times out.",
|
|
1608
|
+
};
|
|
1609
|
+
}
|
|
1610
|
+
// DB-001: signaling stream reconnecting
|
|
1611
|
+
if (record.status === "interrupted" && signalingManager.status === "reconnecting") {
|
|
1612
|
+
return {
|
|
1613
|
+
ok: false,
|
|
1614
|
+
reason: "signaling_reconnecting",
|
|
1615
|
+
guidance: "The directory signaling stream is reconnecting. Wait for directory_signaling to show connected in cello status before initiating seal-interrupted. The daemon reconnects automatically — no manual intervention required.",
|
|
1616
|
+
};
|
|
1617
|
+
}
|
|
1618
|
+
// AC-012 / AC-013: seal-interrupted bilateral flow for interrupted sessions.
|
|
1619
|
+
// BLOCKING-1 fix: await the flow synchronously so the caller receives the real result
|
|
1620
|
+
// (counterparty_unavailable, rejected_by_counterparty, or sealed).
|
|
1621
|
+
// The sealInterruptedInProgress Set still guards concurrent calls (AC-011).
|
|
1622
|
+
if (record.status === "interrupted") {
|
|
1623
|
+
// H-1: the Merkle root at interruption is held by the client (the daemon
|
|
1624
|
+
// does not maintain the session Merkle tree). The client supplies it here
|
|
1625
|
+
// so both parties co-sign over the same root. Absent → empty string, in
|
|
1626
|
+
// which case the bilateral commitment binds leafCount only.
|
|
1627
|
+
const merkleRootAtInterruption = typeof params?.merkleRootAtInterruption === "string" ? params.merkleRootAtInterruption : "";
|
|
1628
|
+
sealInterruptedInProgress.add(sealKey(record.agent_name, sessionId));
|
|
1629
|
+
const correlationId = randomUUID();
|
|
1630
|
+
try {
|
|
1631
|
+
return await handleSealInterruptedFlow(sessionId, record, correlationId, merkleRootAtInterruption);
|
|
1632
|
+
}
|
|
1633
|
+
finally {
|
|
1634
|
+
sealInterruptedInProgress.delete(sealKey(record.agent_name, sessionId));
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
// CELLO-M7-DAEMON-004 (AC-003): ACTIVE session — initiate the active-session
|
|
1638
|
+
// seal over the daemon's OWN tree root. SI-001: any caller-supplied
|
|
1639
|
+
// merkleRoot is IGNORED; the daemon signs only the root it built itself.
|
|
1640
|
+
//
|
|
1641
|
+
// round-2 finding #4: the active path must take the SAME concurrency guard as
|
|
1642
|
+
// the interrupted path (the top-of-handler check at sealInterruptedInProgress.has
|
|
1643
|
+
// rejects re-entry). Without adding to the set here, two concurrent active closes
|
|
1644
|
+
// would both send a seal_interrupted_request and both await acks (double seal).
|
|
1645
|
+
if (record.status === "active") {
|
|
1646
|
+
sealInterruptedInProgress.add(sealKey(record.agent_name, sessionId));
|
|
1647
|
+
const correlationId = randomUUID();
|
|
1648
|
+
try {
|
|
1649
|
+
// M7 DOD-SPINE-7: relay-mediated bilateral seal. Submit our SEAL ctrl leaf to the
|
|
1650
|
+
// relay witness; when the counterparty ALSO closes, the relay's #maybeProcessSeal
|
|
1651
|
+
// fires → directory processSeal rebuilds + verifies the signed chain → FROST
|
|
1652
|
+
// notarization → session_sealed to BOTH parties. Register the waiter BEFORE
|
|
1653
|
+
// submitting so the notification can never race ahead of us.
|
|
1654
|
+
let resolveSeal;
|
|
1655
|
+
const sealedP = new Promise((r) => { resolveSeal = r; });
|
|
1656
|
+
pendingSealWaiters.set(sealKey(record.agent_name, sessionId), resolveSeal);
|
|
1657
|
+
const submit = await sessionNodeManager.submitSealLeaf(record.agent_name, sessionId, correlationId);
|
|
1658
|
+
// M7-UPGRADE-002: the auto-acknowledge path may have already submitted THIS party's
|
|
1659
|
+
// responder SEAL leaf (it won the race against this explicit close). That is success, not
|
|
1660
|
+
// failure — keep the waiter registered and fall through to await session_sealed (the
|
|
1661
|
+
// auto-ack's submission drives the same bilateral seal).
|
|
1662
|
+
if (!submit.ok && submit.reason !== "responder_seal_already_submitted") {
|
|
1663
|
+
pendingSealWaiters.delete(sealKey(record.agent_name, sessionId));
|
|
1664
|
+
if (submit.reason === "relay_unavailable") {
|
|
1665
|
+
// No relay witness for this session (direct/interrupted) — fall back to the
|
|
1666
|
+
// directory-mediated bilateral-ack seal.
|
|
1667
|
+
return await handleActiveSealFlow(sessionId, record, correlationId);
|
|
1668
|
+
}
|
|
1669
|
+
return {
|
|
1670
|
+
ok: false,
|
|
1671
|
+
reason: submit.reason,
|
|
1672
|
+
guidance: "The SEAL leaf could not be submitted to the relay witness. Retry once the relay is reachable (cello status).",
|
|
1673
|
+
};
|
|
1674
|
+
}
|
|
1675
|
+
// Both parties must close for the directory to notarize. Await session_sealed; a
|
|
1676
|
+
// timeout means the counterparty has not closed yet (our leaf is recorded — the
|
|
1677
|
+
// session seals when they call cello_close_session). CELLO_SEAL_BILATERAL_TIMEOUT_MS
|
|
1678
|
+
// tunes how long to wait for the counterparty before escalating to a unilateral seal.
|
|
1679
|
+
const bilateralTimeoutMs = Number(process.env["CELLO_SEAL_BILATERAL_TIMEOUT_MS"]) || 30_000;
|
|
1680
|
+
let timer;
|
|
1681
|
+
const timeoutP = new Promise((r) => { timer = setTimeout(() => r(null), bilateralTimeoutMs); });
|
|
1682
|
+
const sealedCompletion = await Promise.race([sealedP, timeoutP]);
|
|
1683
|
+
clearTimeout(timer);
|
|
1684
|
+
pendingSealWaiters.delete(sealKey(record.agent_name, sessionId));
|
|
1685
|
+
if (sealedCompletion !== null) {
|
|
1686
|
+
logger.info("session.seal.completed", { sessionId, sealedRoot: sealedCompletion.rootHex, role: "bilateral", correlationId });
|
|
1687
|
+
// M7-SESSION-004 (AC-006): return the legibility certificate on the seal completion so
|
|
1688
|
+
// a reader gets it on the same surface that proves the seal — receipt-not-assent,
|
|
1689
|
+
// per-party frontiers, attestation modes, and final_message.answered.
|
|
1690
|
+
return { ok: true, sealed_root: sealedCompletion.rootHex, legibility: sealedCompletion.legibility };
|
|
1691
|
+
}
|
|
1692
|
+
// M7-UPGRADE-002: if THIS close fell through via the auto-ack 'already submitted' path, we
|
|
1693
|
+
// hold no local reported_root to escalate with — and we should not need to: the
|
|
1694
|
+
// counterparty's SEAL ctrl leaf is what triggered our auto-ack, so its seal is already on
|
|
1695
|
+
// the relay and the bilateral seal should finalize. A timeout here is unexpected; report it
|
|
1696
|
+
// as pending rather than escalating to a unilateral seal with no root.
|
|
1697
|
+
if (!submit.ok) {
|
|
1698
|
+
return {
|
|
1699
|
+
ok: false,
|
|
1700
|
+
reason: "seal_pending_bilateral",
|
|
1701
|
+
guidance: "Your SEAL leaf is recorded (auto-acknowledged) and the bilateral seal is completing, but it did not finalize within the wait window. Check cello status and the daemon logs; retry cello_close_session if the session remains unsealed.",
|
|
1702
|
+
};
|
|
1703
|
+
}
|
|
1704
|
+
// SESSION-002 (DOD-SEAL): the counterparty did not co-close. Escalate to a UNILATERAL
|
|
1705
|
+
// seal — submit a seal_unilateral request carrying our reported_root (the content-hash
|
|
1706
|
+
// root the directory rebuilds from the relay chain and verifies). The directory enforces
|
|
1707
|
+
// the delivery-grace gate; if grace has not elapsed it replies seal_unilateral_too_early.
|
|
1708
|
+
let resolveUni;
|
|
1709
|
+
const uniP = new Promise((r) => { resolveUni = r; });
|
|
1710
|
+
pendingUnilateralWaiters.set(sessionId, resolveUni);
|
|
1711
|
+
const sent = await signalingManager.sendRaw({
|
|
1712
|
+
type: "seal_unilateral",
|
|
1713
|
+
session_id: new Uint8Array(Buffer.from(sessionId, "hex")),
|
|
1714
|
+
reported_root: new Uint8Array(Buffer.from(submit.reportedRootHex, "hex")),
|
|
1715
|
+
reported_seq: submit.sequenceNumber,
|
|
1716
|
+
});
|
|
1717
|
+
if (!sent.ok) {
|
|
1718
|
+
pendingUnilateralWaiters.delete(sessionId);
|
|
1719
|
+
return {
|
|
1720
|
+
ok: false,
|
|
1721
|
+
reason: "seal_unilateral_send_failed",
|
|
1722
|
+
guidance: "The unilateral seal request could not be sent to the directory. Check the directory connection (cello status) and retry cello_close_session.",
|
|
1723
|
+
};
|
|
1724
|
+
}
|
|
1725
|
+
let uniTimer;
|
|
1726
|
+
const uniTimeoutP = new Promise((r) => {
|
|
1727
|
+
uniTimer = setTimeout(() => r({ ok: false, reason: "seal_unilateral_timeout" }), 30_000);
|
|
1728
|
+
});
|
|
1729
|
+
const uniResult = await Promise.race([uniP, uniTimeoutP]);
|
|
1730
|
+
clearTimeout(uniTimer);
|
|
1731
|
+
pendingUnilateralWaiters.delete(sessionId);
|
|
1732
|
+
if (uniResult.ok) {
|
|
1733
|
+
logger.info("session.seal.completed", { sessionId, sealedRoot: uniResult.sealedRootHex, role: "unilateral", correlationId });
|
|
1734
|
+
return { ok: true, sealed_root: uniResult.sealedRootHex, seal_type: "unilateral" };
|
|
1735
|
+
}
|
|
1736
|
+
if (uniResult.reason === "seal_unilateral_too_early") {
|
|
1737
|
+
return {
|
|
1738
|
+
ok: false,
|
|
1739
|
+
reason: "seal_counterparty_pending",
|
|
1740
|
+
guidance: "Your SEAL leaf is recorded, but the counterparty has not closed and the directory's delivery-grace window has not yet elapsed, so a unilateral seal is not yet allowed. Retry cello_close_session after the grace period, or once the counterparty closes.",
|
|
1741
|
+
};
|
|
1742
|
+
}
|
|
1743
|
+
return {
|
|
1744
|
+
ok: false,
|
|
1745
|
+
reason: uniResult.reason,
|
|
1746
|
+
guidance: "The unilateral seal did not complete (the directory could not verify the reported root, or the certificate failed verification). Confirm your messages reached the relay (cello_list_sessions) before retrying cello_close_session.",
|
|
1747
|
+
};
|
|
1748
|
+
}
|
|
1749
|
+
finally {
|
|
1750
|
+
sealInterruptedInProgress.delete(sealKey(record.agent_name, sessionId));
|
|
1751
|
+
}
|
|
1752
|
+
}
|
|
1753
|
+
// Any other status (e.g. seal_interrupted_pending) — nothing to do.
|
|
1754
|
+
return {
|
|
1755
|
+
ok: false,
|
|
1756
|
+
reason: "session_not_closeable",
|
|
1757
|
+
guidance: `Session is in status '${record.status}', which cannot be closed via cello_close_session. Check cello_list_sessions; a seal_interrupted_pending session is awaiting FROST notarization.`,
|
|
1758
|
+
};
|
|
1759
|
+
});
|
|
1760
|
+
// ─── MCP-001: stubs for tools registered in cello-mcp.ts but not yet implemented ───
|
|
1761
|
+
// These return not_implemented (same as session tools) so LLMs get consistent guidance.
|
|
1762
|
+
for (const tool of ["cello_backup", "cello_restore", "cello_get_inclusion_proof"]) {
|
|
1763
|
+
handlers.set(tool, async (_params, _connectionId) => {
|
|
1764
|
+
return { ok: false, reason: "not_implemented", guidance: `'${tool}' is not yet implemented in the daemon. This feature will be available in a future milestone.` };
|
|
1765
|
+
});
|
|
1766
|
+
}
|
|
1767
|
+
// ─── M7-SESSION-004 (AC-005/AC-006): read the sealed certificate's legibility ───
|
|
1768
|
+
// The cert-read surface: returns the receipt-not-assent certificate for a sealed session —
|
|
1769
|
+
// per-party content frontiers, attestation modes, and whether the final message was answered.
|
|
1770
|
+
// Reads the PERSISTED record, so it works after a daemon restart and from a DIFFERENT process
|
|
1771
|
+
// than the one that built the certificate (an arbitrator reading the receiving side). The
|
|
1772
|
+
// legibility states, as a first-class machine-readable property, that a signature attests
|
|
1773
|
+
// receipt — never assent (implies_assent: false); a malicious unanswered tail reads as
|
|
1774
|
+
// delivered-but-unanswered (final_message.answered: false), never agreed.
|
|
1775
|
+
handlers.set("cello_get_sealed_receipt", async (params, connectionId) => {
|
|
1776
|
+
// cello-mcp forwards this as { session_id } (snake_case, matching the other session tools).
|
|
1777
|
+
const sessionId = params?.["session_id"];
|
|
1778
|
+
if (!sessionId || typeof sessionId !== "string") {
|
|
1779
|
+
return { ok: false, reason: "missing_session_id", guidance: "Provide the session_id (hex) of the sealed session. Check cello_list_sessions for sealed sessions." };
|
|
1780
|
+
}
|
|
1781
|
+
// DOD-LOOP-1: the certificate is keyed by (agent, session_id) — read the current agent's.
|
|
1782
|
+
const connState = perConnectionState.get(connectionId);
|
|
1783
|
+
if (!connState || !connState.currentAgent) {
|
|
1784
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
1785
|
+
}
|
|
1786
|
+
const cert = sessionNodeManager.getSealCertificate(connState.currentAgent, sessionId);
|
|
1787
|
+
if (!cert) {
|
|
1788
|
+
return {
|
|
1789
|
+
ok: false,
|
|
1790
|
+
reason: "sealed_receipt_not_found",
|
|
1791
|
+
guidance: "No sealed certificate is recorded for this session. It may not be sealed yet, or the session_id is wrong — close it with cello_close_session and confirm it reports sealed, then retry.",
|
|
1792
|
+
};
|
|
1793
|
+
}
|
|
1794
|
+
return { ok: true, session_id: sessionId, sealed_root: cert.sealed_root, legibility: cert.legibility };
|
|
1795
|
+
});
|
|
1796
|
+
// DOD-LOG-1 (PERSIST-LOG-001): read the durable, decrypted conversation transcript for a session —
|
|
1797
|
+
// the readable sent+received messages in canonical-sequence order, recovered AFTER a daemon restart
|
|
1798
|
+
// (not just the opaque hash chain). The plaintext is decrypted from the encrypted-at-rest store here,
|
|
1799
|
+
// in the daemon; the relay/directory never held it (INV-3).
|
|
1800
|
+
handlers.set("cello_get_transcript", async (params, connectionId) => {
|
|
1801
|
+
const sessionId = params?.["session_id"];
|
|
1802
|
+
if (!sessionId || typeof sessionId !== "string") {
|
|
1803
|
+
return { ok: false, reason: "missing_session_id", guidance: "Provide the session_id (hex) whose transcript to read. See cello_list_sessions." };
|
|
1804
|
+
}
|
|
1805
|
+
const connState = perConnectionState.get(connectionId);
|
|
1806
|
+
if (!connState || !connState.currentAgent) {
|
|
1807
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
1808
|
+
}
|
|
1809
|
+
const { messages, undecryptable } = sessionNodeManager.readTranscript(connState.currentAgent, sessionId);
|
|
1810
|
+
// undecryptable > 0 means some rows failed GCM auth (tamper / wrong key) — surfaced, not hidden,
|
|
1811
|
+
// so the reader can tell a real gap from an empty transcript.
|
|
1812
|
+
return { ok: true, session_id: sessionId, messages, undecryptable };
|
|
1813
|
+
});
|
|
1814
|
+
// DAEMON-003 IPC handlers: queue_failed_send and check_nonce (AC-010)
|
|
1815
|
+
handlers.set("queue_failed_send", async (params, _connectionId) => {
|
|
1816
|
+
const sessionId = params?.sessionId;
|
|
1817
|
+
const nonceHex = params?.nonce;
|
|
1818
|
+
const contentHex = params?.content;
|
|
1819
|
+
if (!sessionId || !nonceHex || !contentHex) {
|
|
1820
|
+
return { error: "missing_params", guidance: "Provide sessionId, nonce (hex), and content (hex)." };
|
|
1821
|
+
}
|
|
1822
|
+
const nonce = Buffer.from(nonceHex, "hex");
|
|
1823
|
+
const content = Buffer.from(contentHex, "hex");
|
|
1824
|
+
retryQueue.enqueue(sessionId, nonce, content);
|
|
1825
|
+
return { queued: true, queueDepth: retryQueue.getSessionDepth(sessionId) };
|
|
1826
|
+
});
|
|
1827
|
+
// CELLO-M7-MSG-001 (AC-004/AC-005): the send path records un-acked content here when
|
|
1828
|
+
// its TTF timer fires, so a crash before the relay park confirms is recoverable at the
|
|
1829
|
+
// next startup flush. Stored in the SAME retry_queue table (awaiting_ack = 1).
|
|
1830
|
+
handlers.set("enqueue_awaiting_content", async (params, connectionId) => {
|
|
1831
|
+
const sessionId = params?.sessionId;
|
|
1832
|
+
const contentHashHex = params?.contentHash;
|
|
1833
|
+
const contentHex = params?.content;
|
|
1834
|
+
if (!sessionId || !contentHashHex || !contentHex) {
|
|
1835
|
+
return { error: "missing_params", guidance: "Provide sessionId, contentHash (hex), and content (hex)." };
|
|
1836
|
+
}
|
|
1837
|
+
// DOD-LOOP-1: awaiting content is keyed by the OWNING agent. Prefer an explicit agentName param;
|
|
1838
|
+
// fall back to the connection's current agent.
|
|
1839
|
+
const agentName = params?.agentName
|
|
1840
|
+
?? perConnectionState.get(connectionId)?.currentAgent ?? "";
|
|
1841
|
+
retryQueue.enqueueAwaitingContent(agentName, sessionId, Buffer.from(contentHashHex, "hex"), Buffer.from(contentHex, "hex"));
|
|
1842
|
+
return { queued: true, awaitingDepth: retryQueue.getAwaitingDepth(agentName, sessionId) };
|
|
1843
|
+
});
|
|
1844
|
+
// CELLO-M7-MSG-001: a `persisted` delivery ACK (or a confirmed park) clears the durable
|
|
1845
|
+
// awaiting-ACK entry so the startup flush does not re-park already-delivered content.
|
|
1846
|
+
handlers.set("mark_content_acked", async (params, connectionId) => {
|
|
1847
|
+
const sessionId = params?.sessionId;
|
|
1848
|
+
const contentHashHex = params?.contentHash;
|
|
1849
|
+
if (!sessionId || !contentHashHex) {
|
|
1850
|
+
return { error: "missing_params", guidance: "Provide sessionId and contentHash (hex)." };
|
|
1851
|
+
}
|
|
1852
|
+
const agentName = params?.agentName
|
|
1853
|
+
?? perConnectionState.get(connectionId)?.currentAgent ?? "";
|
|
1854
|
+
retryQueue.markContentAcked(agentName, sessionId, Buffer.from(contentHashHex, "hex"));
|
|
1855
|
+
return { acked: true, awaitingDepth: retryQueue.getAwaitingDepth(agentName, sessionId) };
|
|
1856
|
+
});
|
|
1857
|
+
handlers.set("check_nonce", async (params, _connectionId) => {
|
|
1858
|
+
const sessionId = params?.sessionId;
|
|
1859
|
+
const nonceHex = params?.nonce;
|
|
1860
|
+
const senderPubkeyHex = params?.senderPubkey;
|
|
1861
|
+
if (!sessionId || !nonceHex || !senderPubkeyHex) {
|
|
1862
|
+
return { error: "missing_params", guidance: "Provide sessionId, nonce (hex), and senderPubkey (hex)." };
|
|
1863
|
+
}
|
|
1864
|
+
const nonce = Buffer.from(nonceHex, "hex");
|
|
1865
|
+
const senderPubkey = Buffer.from(senderPubkeyHex, "hex");
|
|
1866
|
+
const duplicate = nonceDedupStore.checkAndAdd(sessionId, nonce, senderPubkey);
|
|
1867
|
+
return { duplicate };
|
|
1868
|
+
});
|
|
1869
|
+
// DAEMON-003: drain_session IPC handler — triggered on peer reconnect.
|
|
1870
|
+
// Returns pending entry metadata (nonces only — SI-002 forbids content in IPC frames).
|
|
1871
|
+
// The actual drain+delivery is triggered separately when a real sendFn is available.
|
|
1872
|
+
handlers.set("drain_session", async (params, _connectionId) => {
|
|
1873
|
+
const sessionId = params?.sessionId;
|
|
1874
|
+
if (!sessionId) {
|
|
1875
|
+
return { error: "missing_params", guidance: "Provide sessionId." };
|
|
1876
|
+
}
|
|
1877
|
+
const depth = retryQueue.getSessionDepth(sessionId);
|
|
1878
|
+
const entries = retryQueue.getSessionEntries(sessionId);
|
|
1879
|
+
return { pendingCount: depth, nonces: entries.map(e => e.nonceHex) };
|
|
1880
|
+
});
|
|
1881
|
+
// MSG-001-3b: content-park deposit/pull IPC handlers. These drive the daemon's
|
|
1882
|
+
// ContentParkClient directly so the daemon↔relay store-and-forward transport can be
|
|
1883
|
+
// proven (J-CONTENT increment 1) before the send/receive-path integration. The relay
|
|
1884
|
+
// multiaddr (with /p2p/<peerId>) comes from the session assignment's relay endpoint;
|
|
1885
|
+
// dials run from the standing receiver (open-gater) node.
|
|
1886
|
+
const parseRelayPeer = (multiaddr) => {
|
|
1887
|
+
if (!multiaddr)
|
|
1888
|
+
return null;
|
|
1889
|
+
const peerId = multiaddr.split("/p2p/")[1];
|
|
1890
|
+
return peerId ? { peerId, addr: multiaddr } : null;
|
|
1891
|
+
};
|
|
1892
|
+
handlers.set("content_park_deposit", async (params, _connectionId) => {
|
|
1893
|
+
const relay = parseRelayPeer(params?.relayMultiaddr);
|
|
1894
|
+
const recipientPubkey = params?.recipientPubkey;
|
|
1895
|
+
const contentHash = params?.contentHash;
|
|
1896
|
+
const sessionId = params?.sessionId;
|
|
1897
|
+
const ciphertext = params?.ciphertext;
|
|
1898
|
+
if (!relay || !recipientPubkey || !contentHash || !sessionId || !ciphertext) {
|
|
1899
|
+
return { ok: false, reason: "missing_params", guidance: "Provide relayMultiaddr (with /p2p/<peerId>), recipientPubkey, contentHash, sessionId, ciphertext — all hex." };
|
|
1900
|
+
}
|
|
1901
|
+
const node = sessionNodeManager.getStandingReceiverNode();
|
|
1902
|
+
if (!node)
|
|
1903
|
+
return { ok: false, reason: "standing_receiver_unavailable", guidance: "The daemon's standing receiver is not ready yet; retry after startup." };
|
|
1904
|
+
const client = new ContentParkClient({ relayPeerId: relay.peerId, relayAddrs: [relay.addr], logger });
|
|
1905
|
+
return await client.deposit(node, {
|
|
1906
|
+
recipientPubkey: Buffer.from(recipientPubkey, "hex"),
|
|
1907
|
+
contentHash: Buffer.from(contentHash, "hex"),
|
|
1908
|
+
sessionId: Buffer.from(sessionId, "hex"),
|
|
1909
|
+
ciphertext: Buffer.from(ciphertext, "hex"),
|
|
1910
|
+
});
|
|
1911
|
+
});
|
|
1912
|
+
handlers.set("content_park_pull", async (params, _connectionId) => {
|
|
1913
|
+
const relay = parseRelayPeer(params?.relayMultiaddr);
|
|
1914
|
+
const recipientPubkey = params?.recipientPubkey;
|
|
1915
|
+
if (!relay || !recipientPubkey) {
|
|
1916
|
+
return { ok: false, reason: "missing_params", guidance: "Provide relayMultiaddr (with /p2p/<peerId>) and recipientPubkey (hex)." };
|
|
1917
|
+
}
|
|
1918
|
+
// The recipient must be a local agent — its K_local signs the relay's auth challenge.
|
|
1919
|
+
const recipientAgent = agents.find((a) => a.pubkey === recipientPubkey);
|
|
1920
|
+
if (!recipientAgent)
|
|
1921
|
+
return { ok: false, reason: "agent_not_found", guidance: "No local agent matches recipientPubkey; only the recipient can pull its own parked content." };
|
|
1922
|
+
const kp = keyProviders.get(recipientAgent.name);
|
|
1923
|
+
if (!kp)
|
|
1924
|
+
return { ok: false, reason: "signing_key_unavailable", guidance: `Signing key for '${recipientAgent.name}' is not loaded.` };
|
|
1925
|
+
const node = sessionNodeManager.getStandingReceiverNode();
|
|
1926
|
+
if (!node)
|
|
1927
|
+
return { ok: false, reason: "standing_receiver_unavailable", guidance: "The daemon's standing receiver is not ready yet; retry after startup." };
|
|
1928
|
+
const client = new ContentParkClient({ relayPeerId: relay.peerId, relayAddrs: [relay.addr], logger });
|
|
1929
|
+
const entries = await client.pull(node, Buffer.from(recipientPubkey, "hex"), kp);
|
|
1930
|
+
return {
|
|
1931
|
+
ok: true,
|
|
1932
|
+
entries: entries.map((e) => ({ contentHash: e.contentHashHex, sessionId: e.sessionIdHex, ciphertext: Buffer.from(e.ciphertext).toString("hex") })),
|
|
1933
|
+
};
|
|
1934
|
+
});
|
|
1935
|
+
// MSG-001-3b (increment 3): RECOVER parked content. Pulls the recipient's parked entries,
|
|
1936
|
+
// decrypts each IN-DAEMON (openContentSeal — the relay never sees plaintext), and routes the
|
|
1937
|
+
// plaintext through ingestReceivedContent — the SAME inbound chokepoint as a direct receive
|
|
1938
|
+
// (M9 single-funnel AC). The content completes the recipient's transcript view of an already-
|
|
1939
|
+
// witnessed message so it can be read (cello_receive) and the session bilaterally sealed
|
|
1940
|
+
// (DOD-INT-2). This is content-completion, NOT a resumption — the session stays interrupted.
|
|
1941
|
+
// DOD-MSG-4: pull a recipient agent's parked mailbox from ONE relay and recover each entry through
|
|
1942
|
+
// the inbound funnel (decode envelope → verify+order the signed Structure2 → ingest). Shared by the
|
|
1943
|
+
// explicit IPC handler and the auto-recover-on-reconnect trigger below.
|
|
1944
|
+
async function recoverParkedFromRelay(recipientAgent, relayPeerId, relayAddrs) {
|
|
1945
|
+
const kp = keyProviders.get(recipientAgent.name);
|
|
1946
|
+
if (!kp)
|
|
1947
|
+
return { ok: false, reason: "signing_key_unavailable" };
|
|
1948
|
+
if (!kp.openContentSeal)
|
|
1949
|
+
return { ok: false, reason: "cannot_unseal" };
|
|
1950
|
+
const node = sessionNodeManager.getStandingReceiverNode();
|
|
1951
|
+
if (!node)
|
|
1952
|
+
return { ok: false, reason: "standing_receiver_unavailable" };
|
|
1953
|
+
const recipientPubkey = recipientAgent.pubkey ?? "";
|
|
1954
|
+
const client = new ContentParkClient({ relayPeerId, relayAddrs, logger });
|
|
1955
|
+
const entries = await client.pull(node, Buffer.from(recipientPubkey, "hex"), kp);
|
|
1956
|
+
let recovered = 0;
|
|
1957
|
+
for (const e of entries) {
|
|
1958
|
+
const unsealed = await kp.openContentSeal(e.ciphertext);
|
|
1959
|
+
if (!unsealed) {
|
|
1960
|
+
logger.warn("content.recover.unseal_failed", { sessionId: e.sessionIdHex, contentHash: e.contentHashHex });
|
|
1961
|
+
continue;
|
|
1962
|
+
}
|
|
1963
|
+
// DOD-MSG-4 (2b): the unsealed blob is the ordering envelope (content + the relay's signed
|
|
1964
|
+
// Structure2). Extract the content; if the record is present, verify it and feed the strict-in-
|
|
1965
|
+
// order gate the canonical sequence BEFORE ingest — so recovered messages order the same way a
|
|
1966
|
+
// direct frame does (closes review finding #3). Old/bare-content seals decode to content alone.
|
|
1967
|
+
const env = sessionNodeManager.decodeParkEnvelope(unsealed);
|
|
1968
|
+
const contentHashBytes = Buffer.from(e.contentHashHex, "hex");
|
|
1969
|
+
if (env.structure1Cbor && env.structure2Cbor) {
|
|
1970
|
+
sessionNodeManager.recordOrderingRecord(recipientAgent.name, e.sessionIdHex, env.structure1Cbor, env.structure2Cbor, contentHashBytes);
|
|
1971
|
+
}
|
|
1972
|
+
const ingest = sessionNodeManager.ingestReceivedContent(recipientAgent.name, e.sessionIdHex, env.content, contentHashBytes);
|
|
1973
|
+
if (ingest.ok && ingest.held) {
|
|
1974
|
+
// DOD-MSG-4 (review finding #4): a held entry is NOT yet an appended leaf — its sequence is
|
|
1975
|
+
// the FUTURE canonical index, not a completed recovery. Do not count it as recovered; log it
|
|
1976
|
+
// distinctly so the tally reflects leaves actually written, not content still queued in memory.
|
|
1977
|
+
logger.info("content.recover.held", { sessionId: e.sessionIdHex, contentHash: e.contentHashHex, canonicalSeq: ingest.sequenceNumber });
|
|
1978
|
+
}
|
|
1979
|
+
else if (ingest.ok) {
|
|
1980
|
+
// DOD-MSG-4 (review #3): count leaves ACTUALLY written — the directly-ingested leaf PLUS any
|
|
1981
|
+
// held out-of-order entries this ingest unblocked (appendedCount), not just 1.
|
|
1982
|
+
recovered += ingest.appendedCount ?? 1;
|
|
1983
|
+
logger.info("content.recovered", { sessionId: e.sessionIdHex, contentHash: e.contentHashHex, sequenceNumber: ingest.sequenceNumber });
|
|
1984
|
+
// Delete-on-confirm (review #1): the entry is now durably ingested (a fresh leaf, or a dedup
|
|
1985
|
+
// of one already present), so confirm-delete it from the relay mailbox. The relay is
|
|
1986
|
+
// delete-on-CONFIRM, not delete-on-pull — without this the queue never drains and every
|
|
1987
|
+
// reconnect re-pulls the whole history. Held entries are deliberately NOT confirmed (not yet
|
|
1988
|
+
// durable). Best-effort: a failed confirm leaves the entry to be re-pulled + deduped next time.
|
|
1989
|
+
try {
|
|
1990
|
+
await client.confirm(node, Buffer.from(recipientPubkey, "hex"), contentHashBytes, kp);
|
|
1991
|
+
}
|
|
1992
|
+
catch (err) {
|
|
1993
|
+
logger.warn("content.recover.confirm.failed", { sessionId: e.sessionIdHex, contentHash: e.contentHashHex, error: err instanceof Error ? err.message : String(err) });
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
else {
|
|
1997
|
+
logger.warn("content.recover.ingest_failed", { sessionId: e.sessionIdHex, contentHash: e.contentHashHex, reason: ingest.reason });
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
return { ok: true, recovered, pulled: entries.length };
|
|
2001
|
+
}
|
|
2002
|
+
// DOD-MSG-4 (auto-recover-on-reconnect): when an agent comes online, drain its parked mailbox from
|
|
2003
|
+
// every relay it has sessions on — symmetric to the SENDER's flushAwaitingContent. Without this,
|
|
2004
|
+
// nothing in production pulls a recipient's store-and-forward mailbox and parked content is never
|
|
2005
|
+
// delivered. Best-effort; a relay miss is retried on the next agent-online.
|
|
2006
|
+
async function autoRecoverForAgent(agentName) {
|
|
2007
|
+
const agent = agents.find((a) => a.name === agentName);
|
|
2008
|
+
if (!agent?.pubkey)
|
|
2009
|
+
return;
|
|
2010
|
+
const relays = sessionNodeManager.getAgentRelayEndpoints(agentName);
|
|
2011
|
+
if (relays.length === 0)
|
|
2012
|
+
return;
|
|
2013
|
+
let total = 0;
|
|
2014
|
+
let failed = 0;
|
|
2015
|
+
for (const r of relays) {
|
|
2016
|
+
try {
|
|
2017
|
+
const res = await recoverParkedFromRelay(agent, r.relayPeerId, r.relayAddrs);
|
|
2018
|
+
if (res.ok) {
|
|
2019
|
+
total += res.recovered;
|
|
2020
|
+
}
|
|
2021
|
+
else {
|
|
2022
|
+
// Review #2: a non-ok result (signing_key_unavailable / cannot_unseal /
|
|
2023
|
+
// standing_receiver_unavailable) was previously silent — log the reason so a run where
|
|
2024
|
+
// every relay failed is distinguishable from "nothing was parked".
|
|
2025
|
+
failed++;
|
|
2026
|
+
logger.warn("content.recover.auto.relay_failed", { agentName, relayPeerId: r.relayPeerId, reason: res.reason });
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
catch (err) {
|
|
2030
|
+
failed++;
|
|
2031
|
+
logger.warn("content.recover.auto.failed", { agentName, relayPeerId: r.relayPeerId, error: err instanceof Error ? err.message : String(err) });
|
|
2032
|
+
}
|
|
2033
|
+
}
|
|
2034
|
+
// Review #2: emit the completion event UNCONDITIONALLY (not only when total > 0) so a clean
|
|
2035
|
+
// "nothing parked" run is observable and distinct from an all-failed run.
|
|
2036
|
+
logger.info("content.recover.auto.completed", { agentName, recovered: total, relayCount: relays.length, failedRelays: failed });
|
|
2037
|
+
}
|
|
2038
|
+
handlers.set("content_park_recover", async (params, _connectionId) => {
|
|
2039
|
+
const relay = parseRelayPeer(params?.relayMultiaddr);
|
|
2040
|
+
const recipientPubkey = params?.recipientPubkey;
|
|
2041
|
+
if (!relay || !recipientPubkey) {
|
|
2042
|
+
return { ok: false, reason: "missing_params", guidance: "Provide relayMultiaddr (with /p2p/<peerId>) and recipientPubkey (hex)." };
|
|
2043
|
+
}
|
|
2044
|
+
const recipientAgent = agents.find((a) => a.pubkey === recipientPubkey);
|
|
2045
|
+
if (!recipientAgent)
|
|
2046
|
+
return { ok: false, reason: "agent_not_found", guidance: "No local agent matches recipientPubkey." };
|
|
2047
|
+
const res = await recoverParkedFromRelay(recipientAgent, relay.peerId, [relay.addr]);
|
|
2048
|
+
if (!res.ok) {
|
|
2049
|
+
const guidanceByReason = {
|
|
2050
|
+
signing_key_unavailable: `Signing key for '${recipientAgent.name}' is not loaded.`,
|
|
2051
|
+
cannot_unseal: `Agent '${recipientAgent.name}' key provider cannot open content seals.`,
|
|
2052
|
+
standing_receiver_unavailable: "The daemon's standing receiver is not ready yet; retry after startup.",
|
|
2053
|
+
};
|
|
2054
|
+
return { ok: false, reason: res.reason, guidance: guidanceByReason[res.reason] ?? "Recover failed." };
|
|
2055
|
+
}
|
|
2056
|
+
return { ok: true, recovered: res.recovered, pulled: res.pulled };
|
|
2057
|
+
});
|
|
2058
|
+
// MCP-002: Test-only handler to emit session lifecycle events.
|
|
2059
|
+
// Guarded by CELLO_ENV=test — never available in production.
|
|
2060
|
+
if (process.env["CELLO_ENV"] === "test") {
|
|
2061
|
+
handlers.set("__test_emit_session_event", async (params, _connectionId) => {
|
|
2062
|
+
const type = params?.type;
|
|
2063
|
+
const sessionId = params?.sessionId;
|
|
2064
|
+
const agentName = params?.agentName;
|
|
2065
|
+
const counterpartyPubkey = params?.counterpartyPubkey ?? null;
|
|
2066
|
+
if (!type || !sessionId || !agentName) {
|
|
2067
|
+
return { error: "missing_params", guidance: "Provide type, sessionId, and agentName." };
|
|
2068
|
+
}
|
|
2069
|
+
if (type === "created") {
|
|
2070
|
+
const sessionPeerId = params?.sessionPeerId ?? "";
|
|
2071
|
+
const correlationId = params?.correlationId ?? "";
|
|
2072
|
+
logger.info("session.node.created", { sessionId, agentName, sessionPeerId, correlationId });
|
|
2073
|
+
notificationDispatcher.dispatchSessionStateChanged(agentName, sessionId, "created", counterpartyPubkey);
|
|
2074
|
+
}
|
|
2075
|
+
else if (type === "destroyed") {
|
|
2076
|
+
const state = params?.state ?? "interrupted";
|
|
2077
|
+
const reason = params?.reason ?? state;
|
|
2078
|
+
logger.info("session.node.destroyed", { sessionId, agentName, reason });
|
|
2079
|
+
notificationDispatcher.dispatchSessionStateChanged(agentName, sessionId, state, counterpartyPubkey);
|
|
2080
|
+
}
|
|
2081
|
+
return { ok: true };
|
|
2082
|
+
});
|
|
2083
|
+
} // end CELLO_ENV=test guard
|
|
2084
|
+
// ─── M7-SESSION-001 (H-1): seal-interrupted bilateral RESPONDER ────────────
|
|
2085
|
+
//
|
|
2086
|
+
// A PERSISTENT inbound handler (registered once, below) that reacts to inbound
|
|
2087
|
+
// `seal_interrupted_request` frames from a counterparty. It validates local
|
|
2088
|
+
// state, K_local-signs this node's SEAL-INTERRUPTED leaf (co-signing the same
|
|
2089
|
+
// Merkle root the initiator sent), echoes the nonce, includes initiatorPubkey
|
|
2090
|
+
// for directory routing, persists the responder side of the commitment, moves
|
|
2091
|
+
// the session to 'seal_interrupted_pending', and returns a seal_interrupted_ack.
|
|
2092
|
+
// On any inconsistent local state it returns a seal_interrupted_rejection.
|
|
2093
|
+
async function handleInboundSealInterruptedRequest(frame) {
|
|
2094
|
+
const correlationId = randomUUID();
|
|
2095
|
+
const sessionId = typeof frame["sessionId"] === "string" ? frame["sessionId"] : null;
|
|
2096
|
+
const initiatorPubkey = typeof frame["initiatorPubkey"] === "string" ? frame["initiatorPubkey"] : null;
|
|
2097
|
+
const counterpartyPubkey = typeof frame["counterpartyPubkey"] === "string" ? frame["counterpartyPubkey"] : null;
|
|
2098
|
+
const leafCountReq = typeof frame["leafCountAtInterruption"] === "number" ? frame["leafCountAtInterruption"] : null;
|
|
2099
|
+
const merkleRootReq = typeof frame["merkleRootAtInterruption"] === "string" ? frame["merkleRootAtInterruption"] : "";
|
|
2100
|
+
const nonce = typeof frame["nonce"] === "string" ? frame["nonce"] : null;
|
|
2101
|
+
// Cannot even route a rejection without sessionId + initiatorPubkey.
|
|
2102
|
+
if (!sessionId || !initiatorPubkey || !counterpartyPubkey || nonce === null || leafCountReq === null) {
|
|
2103
|
+
logger.warn("session.interrupted.request.malformed", {
|
|
2104
|
+
correlationId,
|
|
2105
|
+
hasSessionId: sessionId !== null,
|
|
2106
|
+
hasInitiatorPubkey: initiatorPubkey !== null,
|
|
2107
|
+
});
|
|
2108
|
+
return;
|
|
2109
|
+
}
|
|
2110
|
+
const reject = async (reason) => {
|
|
2111
|
+
await signalingManager.sendRaw({
|
|
2112
|
+
type: "seal_interrupted_rejection",
|
|
2113
|
+
sessionId,
|
|
2114
|
+
initiatorPubkey,
|
|
2115
|
+
reason,
|
|
2116
|
+
});
|
|
2117
|
+
logger.warn("session.interrupted.request.rejected", { sessionId, reason, correlationId });
|
|
2118
|
+
};
|
|
2119
|
+
// DOD-LOOP-1: resolve the addressed local agent FIRST — the composite (agent, session_id) key
|
|
2120
|
+
// needs it. The request must be addressed to one of our agents (counterpartyPubkey is OUR
|
|
2121
|
+
// pubkey from the initiator's perspective).
|
|
2122
|
+
const localAgent = agents.find((a) => a.pubkey === counterpartyPubkey);
|
|
2123
|
+
if (!localAgent) {
|
|
2124
|
+
await reject("unknown_counterparty");
|
|
2125
|
+
return;
|
|
2126
|
+
}
|
|
2127
|
+
const localRecord = sessionNodeManager.getSessionRecord(localAgent.name, sessionId);
|
|
2128
|
+
if (!localRecord) {
|
|
2129
|
+
await reject("session_not_found");
|
|
2130
|
+
return;
|
|
2131
|
+
}
|
|
2132
|
+
// DAEMON-004: an 'active' session is eligible too (the active-session seal
|
|
2133
|
+
// reuses this exchange). We still never re-process a terminal 'sealed' row or
|
|
2134
|
+
// an already-pending one.
|
|
2135
|
+
if (localRecord.status !== "interrupted" && localRecord.status !== "active") {
|
|
2136
|
+
await reject("session_not_interrupted");
|
|
2137
|
+
return;
|
|
2138
|
+
}
|
|
2139
|
+
// From our perspective the initiator is our counterparty.
|
|
2140
|
+
if (localRecord.counterparty_pubkey !== initiatorPubkey) {
|
|
2141
|
+
await reject("initiator_mismatch");
|
|
2142
|
+
return;
|
|
2143
|
+
}
|
|
2144
|
+
// DAEMON-004 (SI-001): we sign over OUR OWN daemon-owned tree, never the
|
|
2145
|
+
// initiator-supplied root.
|
|
2146
|
+
//
|
|
2147
|
+
// round-2 finding #6: for an ACTIVE session the daemon ALWAYS binds its own tree
|
|
2148
|
+
// root — even the canonical EMPTY-tree root when no content has flowed — never the
|
|
2149
|
+
// initiator-supplied `merkleRootReq`. Echoing the caller's root would let an
|
|
2150
|
+
// initiator dictate the root a responder signs (the SI-001 trust hole). Only a
|
|
2151
|
+
// LEGACY 'interrupted' session that predates DAEMON-004 (no tree ever persisted)
|
|
2152
|
+
// falls back to message_count + the supplied root (SESSION-001 behavior).
|
|
2153
|
+
const ownTree = sessionNodeManager.getSessionTree(localAgent.name, sessionId);
|
|
2154
|
+
const isActive = localRecord.status === "active";
|
|
2155
|
+
const useOwnTree = isActive || ownTree.size() > 0;
|
|
2156
|
+
const ownLeafCount = useOwnTree ? ownTree.size() : (localRecord.message_count ?? 0);
|
|
2157
|
+
const ownRoot = useOwnTree ? sessionNodeManager.getSessionTreeRootHex(localAgent.name, sessionId) : merkleRootReq;
|
|
2158
|
+
// SI-002/AC-008: leaf-count agreement against our own state.
|
|
2159
|
+
if (ownLeafCount !== leafCountReq) {
|
|
2160
|
+
await reject("leaf_count_mismatch");
|
|
2161
|
+
return;
|
|
2162
|
+
}
|
|
2163
|
+
const kp = keyProviders.get(localAgent.name);
|
|
2164
|
+
if (!kp) {
|
|
2165
|
+
await reject("signing_key_unavailable");
|
|
2166
|
+
return;
|
|
2167
|
+
}
|
|
2168
|
+
// Co-sign our SEAL-INTERRUPTED leaf. When we hold our own tree the root is
|
|
2169
|
+
// ours (SI-001); otherwise we echo the initiator-supplied root unchanged.
|
|
2170
|
+
const ownLeaf = await buildSignedSealInterruptedLeaf(kp, {
|
|
2171
|
+
sessionId,
|
|
2172
|
+
leafCount: ownLeafCount,
|
|
2173
|
+
merkleRootAtInterruption: ownRoot,
|
|
2174
|
+
signerPubkeyHex: counterpartyPubkey,
|
|
2175
|
+
});
|
|
2176
|
+
// Persist the responder side of the bilateral commitment. The responder never
|
|
2177
|
+
// receives the initiator's leaf in this request→ack protocol, so it records
|
|
2178
|
+
// only its own signed leaf plus the agreed root; the full both-leaves artifact
|
|
2179
|
+
// lives on the initiator side. Advances status interrupted → seal_interrupted_pending.
|
|
2180
|
+
sessionNodeManager.persistSealInterruptedCommitment({
|
|
2181
|
+
agentName: localAgent.name,
|
|
2182
|
+
sessionId,
|
|
2183
|
+
role: "responder",
|
|
2184
|
+
ownLeaf,
|
|
2185
|
+
counterpartyLeaf: null,
|
|
2186
|
+
merkleRoot: ownRoot,
|
|
2187
|
+
nonce,
|
|
2188
|
+
});
|
|
2189
|
+
const ack = {
|
|
2190
|
+
type: "seal_interrupted_ack",
|
|
2191
|
+
sessionId,
|
|
2192
|
+
initiatorPubkey,
|
|
2193
|
+
nonce,
|
|
2194
|
+
sealInterruptedLeaf: ownLeaf,
|
|
2195
|
+
};
|
|
2196
|
+
const sendResult = await signalingManager.sendRaw(ack);
|
|
2197
|
+
if (!sendResult.ok) {
|
|
2198
|
+
logger.error("session.interrupted.ack.send.failed", {
|
|
2199
|
+
sessionId,
|
|
2200
|
+
agentName: localAgent.name,
|
|
2201
|
+
reason: sendResult.reason,
|
|
2202
|
+
correlationId,
|
|
2203
|
+
});
|
|
2204
|
+
return;
|
|
2205
|
+
}
|
|
2206
|
+
logger.info("session.interrupted.responder.acked", {
|
|
2207
|
+
sessionId,
|
|
2208
|
+
agentName: localAgent.name,
|
|
2209
|
+
leafCount: ownLeafCount,
|
|
2210
|
+
correlationId,
|
|
2211
|
+
});
|
|
2212
|
+
}
|
|
2213
|
+
// Register the persistent responder. This is a REAL registered handler (not a
|
|
2214
|
+
// test-only path): it fires for every inbound seal_interrupted_request.
|
|
2215
|
+
signalingManager.registerInboundHandler((frame) => {
|
|
2216
|
+
if (frame["type"] !== "seal_interrupted_request")
|
|
2217
|
+
return;
|
|
2218
|
+
void handleInboundSealInterruptedRequest(frame);
|
|
2219
|
+
});
|
|
2220
|
+
// Per-agent FIFO queue (events accepted while no cello_await_session is blocked) and
|
|
2221
|
+
// the per-agent list of blocked waiters. Inbound sessions are addressed to a specific
|
|
2222
|
+
// local agent (participant_b), so both are keyed by agent name.
|
|
2223
|
+
const inboundSessionQueues = new Map();
|
|
2224
|
+
const inboundSessionWaiters = new Map();
|
|
2225
|
+
// Session ids whose acceptInboundAssignment is in flight (the accept step is async
|
|
2226
|
+
// because it may wait for the standing receiver to rebuild). Guards against two
|
|
2227
|
+
// simultaneous frames for the SAME session both passing the getSessionRecord check
|
|
2228
|
+
// before either has inserted the row (review M1, race half).
|
|
2229
|
+
const inboundInFlight = new Set();
|
|
2230
|
+
// Inbound accepts are SERIALIZED through this chain. acceptSession synchronously
|
|
2231
|
+
// consumes the single standing receiver and rebuilds a replacement asynchronously;
|
|
2232
|
+
// running two accepts concurrently would let the second pass its readiness check on
|
|
2233
|
+
// the receiver the first is about to consume, then fail on the consumed receiver
|
|
2234
|
+
// (review M2, race). Serializing makes the second accept wait for the first's rebuild.
|
|
2235
|
+
let inboundAcceptChain = Promise.resolve();
|
|
2236
|
+
function enqueueInboundSession(agentName, event) {
|
|
2237
|
+
const waiters = inboundSessionWaiters.get(agentName);
|
|
2238
|
+
if (waiters && waiters.length > 0) {
|
|
2239
|
+
// Hand straight to the oldest blocked waiter (deliver clears its own timeout).
|
|
2240
|
+
const w = waiters.shift();
|
|
2241
|
+
w.deliver(event);
|
|
2242
|
+
return;
|
|
2243
|
+
}
|
|
2244
|
+
const q = inboundSessionQueues.get(agentName) ?? [];
|
|
2245
|
+
q.push(event);
|
|
2246
|
+
inboundSessionQueues.set(agentName, q);
|
|
2247
|
+
}
|
|
2248
|
+
// CBOR-decoded byte fields arrive as Uint8Array or Buffer; a field may also already be
|
|
2249
|
+
// a hex string. Hex strings are lowercased so the case-sensitive agent-pubkey match
|
|
2250
|
+
// (agents store lowercase hex) cannot silently miss (review L2).
|
|
2251
|
+
function frameValueToHex(v) {
|
|
2252
|
+
if (v instanceof Uint8Array)
|
|
2253
|
+
return Buffer.from(v).toString("hex");
|
|
2254
|
+
if (Buffer.isBuffer(v))
|
|
2255
|
+
return Buffer.from(v).toString("hex");
|
|
2256
|
+
if (typeof v === "string")
|
|
2257
|
+
return v.toLowerCase();
|
|
2258
|
+
return null;
|
|
2259
|
+
}
|
|
2260
|
+
// M7-SESSION-004 (AC-005): normalise the wire `legibility` object — CBOR-decoded, so pubkeys
|
|
2261
|
+
// arrive as Uint8Array/Buffer — into a JSON-safe certificate with hex-encoded pubkeys. Returns
|
|
2262
|
+
// undefined for an absent or structurally-implausible object (pre-M7 frame, or a malformed
|
|
2263
|
+
// field), in which case nothing is persisted and the seal still completes. The receipt-not-
|
|
2264
|
+
// assent constants (attests/implies_assent/disclaimer) and the integers/booleans are carried
|
|
2265
|
+
// verbatim; only the byte fields are re-encoded. The daemon never invents or alters the
|
|
2266
|
+
// certificate's meaning — it is the directory's derivation, surfaced.
|
|
2267
|
+
function normalizeLegibility(raw) {
|
|
2268
|
+
if (!raw || typeof raw !== "object")
|
|
2269
|
+
return undefined;
|
|
2270
|
+
const o = raw;
|
|
2271
|
+
if (o["attests"] !== "receipt")
|
|
2272
|
+
return undefined;
|
|
2273
|
+
const participantsRaw = o["participants"];
|
|
2274
|
+
const finalRaw = o["final_message"];
|
|
2275
|
+
if (!Array.isArray(participantsRaw) || !finalRaw || typeof finalRaw !== "object")
|
|
2276
|
+
return undefined;
|
|
2277
|
+
// Review finding (low): the disclaimer is the human-readable half of the receipt-not-assent
|
|
2278
|
+
// property; a non-string value means a malformed/tampered frame, so REJECT the whole cert
|
|
2279
|
+
// rather than surfacing an empty disclaimer (implies_assent:false alone is the machine-readable
|
|
2280
|
+
// half, but we do not surface a half-formed certificate).
|
|
2281
|
+
if (typeof o["disclaimer"] !== "string" || o["disclaimer"].length === 0)
|
|
2282
|
+
return undefined;
|
|
2283
|
+
// Review finding (low): validate attestation_mode against the closed enum — never surface an
|
|
2284
|
+
// arbitrary string from a malformed frame on the cert read surface (defensive parity with the
|
|
2285
|
+
// coerced fields). An out-of-enum value rejects the whole cert.
|
|
2286
|
+
const VALID_MODES = new Set(["live", "recovered", "absent"]);
|
|
2287
|
+
const participants = [];
|
|
2288
|
+
for (const p of participantsRaw) {
|
|
2289
|
+
const pp = p;
|
|
2290
|
+
const mode = pp["attestation_mode"];
|
|
2291
|
+
if (typeof mode !== "string" || !VALID_MODES.has(mode))
|
|
2292
|
+
return undefined;
|
|
2293
|
+
participants.push({
|
|
2294
|
+
pubkey: frameValueToHex(pp["pubkey"]),
|
|
2295
|
+
content_frontier_seq: typeof pp["content_frontier_seq"] === "number" ? pp["content_frontier_seq"] : null,
|
|
2296
|
+
last_authored_seq: typeof pp["last_authored_seq"] === "number" ? pp["last_authored_seq"] : null,
|
|
2297
|
+
attestation_mode: mode,
|
|
2298
|
+
});
|
|
2299
|
+
}
|
|
2300
|
+
const fm = finalRaw;
|
|
2301
|
+
const final_message = {
|
|
2302
|
+
sender_pubkey: frameValueToHex(fm["sender_pubkey"]),
|
|
2303
|
+
seq: typeof fm["seq"] === "number" ? fm["seq"] : null,
|
|
2304
|
+
answered: fm["answered"] === true,
|
|
2305
|
+
};
|
|
2306
|
+
return {
|
|
2307
|
+
attests: "receipt",
|
|
2308
|
+
implies_assent: false,
|
|
2309
|
+
disclaimer: o["disclaimer"],
|
|
2310
|
+
participants,
|
|
2311
|
+
final_message,
|
|
2312
|
+
};
|
|
2313
|
+
}
|
|
2314
|
+
// Pull the fields out of a pushed session_assignment frame. Returns null when the frame
|
|
2315
|
+
// carries no usable assignment object / is missing the essential ids (the handler then
|
|
2316
|
+
// ignores it rather than throwing). Field-level validity (peer id, signature type) is
|
|
2317
|
+
// checked in the handler so it can emit a distinct, diagnosable event per failure.
|
|
2318
|
+
function extractInboundSessionAssignment(frame) {
|
|
2319
|
+
const raw = frame["assignment"];
|
|
2320
|
+
if (!raw || typeof raw !== "object")
|
|
2321
|
+
return null;
|
|
2322
|
+
const a = raw;
|
|
2323
|
+
const pa = a["participant_a"];
|
|
2324
|
+
const pb = a["participant_b"];
|
|
2325
|
+
const sessionIdHex = frameValueToHex(a["session_id"]);
|
|
2326
|
+
const participantAPubkeyHex = pa ? frameValueToHex(pa["pubkey"]) : null;
|
|
2327
|
+
const participantBPubkeyHex = pb ? frameValueToHex(pb["pubkey"]) : null;
|
|
2328
|
+
if (!sessionIdHex || !participantAPubkeyHex || !participantBPubkeyHex)
|
|
2329
|
+
return null;
|
|
2330
|
+
// M7 DOD-SPINE-6 / MSG-001-3b: relay endpoint so the receiver also connects to the
|
|
2331
|
+
// relay witness (so the relay can deliver the initiator's witnessed leaves to it).
|
|
2332
|
+
const relayEndpoint = a["relay_endpoint"];
|
|
2333
|
+
const relayPeerId = relayEndpoint && typeof relayEndpoint["peer_id"] === "string" ? relayEndpoint["peer_id"] : "";
|
|
2334
|
+
const relayAddrs = relayEndpoint && Array.isArray(relayEndpoint["multiaddrs"])
|
|
2335
|
+
? relayEndpoint["multiaddrs"].filter((m) => typeof m === "string")
|
|
2336
|
+
: [];
|
|
2337
|
+
return {
|
|
2338
|
+
sessionIdHex,
|
|
2339
|
+
participantAPubkeyHex,
|
|
2340
|
+
participantBPubkeyHex,
|
|
2341
|
+
initiatorPeerId: typeof a["initiator_session_peer_id"] === "string" ? a["initiator_session_peer_id"] : "",
|
|
2342
|
+
sessionTimestamp: typeof a["session_timestamp"] === "number" ? a["session_timestamp"] : 0,
|
|
2343
|
+
signatureType: typeof a["signature_type"] === "string" ? a["signature_type"] : null,
|
|
2344
|
+
// M7 legibility-TBS-binding (responder verify): the FROST-signed assignment embeds the
|
|
2345
|
+
// initiator's primary (group) pubkey as `signer_pubkey` — the key that signs the seal.
|
|
2346
|
+
// The responder stores it so it can verify the bilateral seal signature locally, not just
|
|
2347
|
+
// accept it (session.ts: "embedded so the counterparty can verify").
|
|
2348
|
+
signerPubkeyHex: a["signer_pubkey"] !== undefined ? frameValueToHex(a["signer_pubkey"]) : null,
|
|
2349
|
+
relayPeerId,
|
|
2350
|
+
relayAddrs,
|
|
2351
|
+
};
|
|
2352
|
+
}
|
|
2353
|
+
// Wait (bounded) for THIS AGENT's standing receiver to be ready. acceptSession consumes the
|
|
2354
|
+
// agent's standing receiver and rebuilds a replacement asynchronously, so a burst of inbound
|
|
2355
|
+
// assignments for that agent would otherwise drop all but the first (review M2). Polling the
|
|
2356
|
+
// per-agent readiness lets each accept proceed once the prior rebuild completes. Must check the
|
|
2357
|
+
// OWNING agent — `getStandingReceiverReady()` with no arg returns true if ANY agent has one,
|
|
2358
|
+
// which in the loopback case (alice + bob on one daemon) would falsely pass while bob's own SR
|
|
2359
|
+
// is still mid-rebuild and drop bob's session (DOD-LOOP-1).
|
|
2360
|
+
async function waitForStandingReceiver(agentName, maxWaitMs = 3_000, stepMs = 25) {
|
|
2361
|
+
if (sessionNodeManager.getStandingReceiverReady(agentName))
|
|
2362
|
+
return true;
|
|
2363
|
+
const deadline = Date.now() + maxWaitMs;
|
|
2364
|
+
while (Date.now() < deadline) {
|
|
2365
|
+
await new Promise((r) => setTimeout(r, stepMs));
|
|
2366
|
+
if (sessionNodeManager.getStandingReceiverReady(agentName))
|
|
2367
|
+
return true;
|
|
2368
|
+
}
|
|
2369
|
+
return sessionNodeManager.getStandingReceiverReady(agentName);
|
|
2370
|
+
}
|
|
2371
|
+
async function acceptInboundAssignment(parsed, agentName, correlationId) {
|
|
2372
|
+
try {
|
|
2373
|
+
// M2: do not drop the session if this agent's standing receiver is mid-rebuild.
|
|
2374
|
+
const ready = await waitForStandingReceiver(agentName);
|
|
2375
|
+
if (!ready) {
|
|
2376
|
+
logger.warn("session.inbound.accept.failed", {
|
|
2377
|
+
sessionId: parsed.sessionIdHex,
|
|
2378
|
+
agentName,
|
|
2379
|
+
reason: "standing_receiver_unavailable",
|
|
2380
|
+
correlationId,
|
|
2381
|
+
});
|
|
2382
|
+
return;
|
|
2383
|
+
}
|
|
2384
|
+
// M7 DOD-SPINE-6 / MSG-001-3b: relay witness for the receiver. Build from the
|
|
2385
|
+
// inbound assignment's relay endpoint + this agent's K_local + the 16-byte session id.
|
|
2386
|
+
const kp = keyProviders.get(agentName);
|
|
2387
|
+
let relayParams;
|
|
2388
|
+
if (kp && parsed.relayPeerId && parsed.relayAddrs.length > 0) {
|
|
2389
|
+
relayParams = {
|
|
2390
|
+
relayPeerId: parsed.relayPeerId,
|
|
2391
|
+
relayAddrs: parsed.relayAddrs,
|
|
2392
|
+
keyProvider: kp,
|
|
2393
|
+
senderPubkey: await kp.getPublicKey(),
|
|
2394
|
+
sessionIdBytes: new Uint8Array(Buffer.from(parsed.sessionIdHex, "hex")),
|
|
2395
|
+
};
|
|
2396
|
+
}
|
|
2397
|
+
const result = await sessionNodeManager.acceptSession(parsed.sessionIdHex, agentName, parsed.participantAPubkeyHex, // the initiator is OUR counterparty
|
|
2398
|
+
parsed.initiatorPeerId, correlationId, relayParams);
|
|
2399
|
+
if (!result.ok) {
|
|
2400
|
+
logger.warn("session.inbound.accept.failed", {
|
|
2401
|
+
sessionId: parsed.sessionIdHex,
|
|
2402
|
+
agentName,
|
|
2403
|
+
reason: result.reason,
|
|
2404
|
+
correlationId,
|
|
2405
|
+
});
|
|
2406
|
+
return;
|
|
2407
|
+
}
|
|
2408
|
+
// M7 legibility-TBS-binding (responder verify): store the initiator's primary (the seal
|
|
2409
|
+
// signer, carried as signer_pubkey on the FROST-signed assignment) so the bilateral seal
|
|
2410
|
+
// signature can be verified locally rather than accepted on faith.
|
|
2411
|
+
if (parsed.signerPubkeyHex) {
|
|
2412
|
+
sessionNodeManager.recordCounterpartyPrimary(agentName, parsed.sessionIdHex, parsed.signerPubkeyHex);
|
|
2413
|
+
}
|
|
2414
|
+
// H1: genesis_prev_root is the canonical two-party genesis value — the SAME value
|
|
2415
|
+
// baked into the FROST-signed session-establishment TBS and derived by the initiator
|
|
2416
|
+
// and directory — NOT the daemon's (empty) tree root. computeGenesisPrevRoot sorts
|
|
2417
|
+
// the pubkeys internally, so natural (A, B) order is correct.
|
|
2418
|
+
const genesisPrevRootHex = Buffer.from(computeGenesisPrevRoot(Buffer.from(parsed.participantAPubkeyHex, "hex"), Buffer.from(parsed.participantBPubkeyHex, "hex"), Buffer.from(parsed.sessionIdHex, "hex"), parsed.sessionTimestamp)).toString("hex");
|
|
2419
|
+
logger.info("session.inbound.accepted", {
|
|
2420
|
+
sessionId: parsed.sessionIdHex,
|
|
2421
|
+
agentName,
|
|
2422
|
+
sessionPeerId: result.peerId,
|
|
2423
|
+
correlationId,
|
|
2424
|
+
});
|
|
2425
|
+
enqueueInboundSession(agentName, {
|
|
2426
|
+
sessionIdHex: parsed.sessionIdHex,
|
|
2427
|
+
counterpartyPubkeyHex: parsed.participantAPubkeyHex,
|
|
2428
|
+
genesisPrevRootHex,
|
|
2429
|
+
});
|
|
2430
|
+
notificationDispatcher.dispatchSessionStateChanged(agentName, parsed.sessionIdHex, "created", parsed.participantAPubkeyHex);
|
|
2431
|
+
}
|
|
2432
|
+
finally {
|
|
2433
|
+
inboundInFlight.delete(parsed.sessionIdHex);
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
function handleInboundSessionAssignment(frame) {
|
|
2437
|
+
// M4: one correlationId minted per inbound flow, threaded through EVERY event below.
|
|
2438
|
+
const correlationId = randomUUID();
|
|
2439
|
+
const parsed = extractInboundSessionAssignment(frame);
|
|
2440
|
+
if (!parsed) {
|
|
2441
|
+
logger.warn("session.inbound.assignment.malformed", {
|
|
2442
|
+
reason: "missing_assignment_or_ids",
|
|
2443
|
+
correlationId,
|
|
2444
|
+
});
|
|
2445
|
+
return;
|
|
2446
|
+
}
|
|
2447
|
+
// L1: refuse M1 single-key assignments outright (downgrade guard). Distinct from the
|
|
2448
|
+
// deferred FROST verification below — track it so SESSION-004's re-home keeps it.
|
|
2449
|
+
if (parsed.signatureType === "single") {
|
|
2450
|
+
logger.warn("session.inbound.assignment.refused", {
|
|
2451
|
+
sessionId: parsed.sessionIdHex,
|
|
2452
|
+
reason: "unsupported_signature_type",
|
|
2453
|
+
correlationId,
|
|
2454
|
+
});
|
|
2455
|
+
return;
|
|
2456
|
+
}
|
|
2457
|
+
// M3: the initiator session peer id is the AC-015 hand-off gate (acceptSession passes
|
|
2458
|
+
// it to gater.setAllowedPeer). An empty value would gate the handed-off receiver to "",
|
|
2459
|
+
// defeating "only the initiator may connect". The dead stack treated this as malformed.
|
|
2460
|
+
if (!parsed.initiatorPeerId) {
|
|
2461
|
+
logger.warn("session.inbound.assignment.malformed", {
|
|
2462
|
+
sessionId: parsed.sessionIdHex,
|
|
2463
|
+
reason: "missing_initiator_peer_id",
|
|
2464
|
+
correlationId,
|
|
2465
|
+
});
|
|
2466
|
+
return;
|
|
2467
|
+
}
|
|
2468
|
+
// Resolve which local agent is participant_b. participant pubkeys are the agents'
|
|
2469
|
+
// K_local identity pubkeys (same convention as the seal-interrupted responder's
|
|
2470
|
+
// counterparty match above). If none of our agents is the counterparty, this
|
|
2471
|
+
// assignment is not for this daemon — drop it.
|
|
2472
|
+
const localAgent = agents.find((ag) => ag.pubkey === parsed.participantBPubkeyHex);
|
|
2473
|
+
if (!localAgent) {
|
|
2474
|
+
logger.debug("session.inbound.not_local", {
|
|
2475
|
+
sessionId: parsed.sessionIdHex,
|
|
2476
|
+
counterpartyPubkey: parsed.participantBPubkeyHex,
|
|
2477
|
+
correlationId,
|
|
2478
|
+
});
|
|
2479
|
+
return;
|
|
2480
|
+
}
|
|
2481
|
+
// M1: idempotency — a retransmitted assignment for an already-known session (persisted
|
|
2482
|
+
// row OR currently in flight) must not double-accept (orphaned node) or double-enqueue.
|
|
2483
|
+
if (inboundInFlight.has(parsed.sessionIdHex) || sessionNodeManager.getSessionRecord(localAgent.name, parsed.sessionIdHex)) {
|
|
2484
|
+
logger.info("session.inbound.duplicate.ignored", {
|
|
2485
|
+
sessionId: parsed.sessionIdHex,
|
|
2486
|
+
agentName: localAgent.name,
|
|
2487
|
+
correlationId,
|
|
2488
|
+
});
|
|
2489
|
+
return;
|
|
2490
|
+
}
|
|
2491
|
+
// SECURITY — DEFERRED (SESSION-004 re-home): the directory's FROST threshold signature
|
|
2492
|
+
// on the assignment (directory_signature over the TBS, verified against signer_pubkey)
|
|
2493
|
+
// is NOT yet verified here. The old client's receiveSessionAssignment performed that
|
|
2494
|
+
// check; it must be re-homed natively before this path faces a real (untrusted)
|
|
2495
|
+
// directory. Until then we accept directory-pushed assignments on trust — the in-process
|
|
2496
|
+
// seam tests inject trusted frames. This is logged loudly, never silent.
|
|
2497
|
+
logger.warn("session.inbound.assignment.unverified", {
|
|
2498
|
+
sessionId: parsed.sessionIdHex,
|
|
2499
|
+
agentName: localAgent.name,
|
|
2500
|
+
note: "FROST assignment signature verification deferred to SESSION-004 re-home",
|
|
2501
|
+
correlationId,
|
|
2502
|
+
});
|
|
2503
|
+
inboundInFlight.add(parsed.sessionIdHex);
|
|
2504
|
+
// Serialize: the next accept does not begin until this one (and any standing-receiver
|
|
2505
|
+
// rebuild it triggers) settles. A throw inside one accept must not break the chain.
|
|
2506
|
+
const agentName = localAgent.name;
|
|
2507
|
+
inboundAcceptChain = inboundAcceptChain
|
|
2508
|
+
.then(() => acceptInboundAssignment(parsed, agentName, correlationId))
|
|
2509
|
+
.catch((err) => {
|
|
2510
|
+
inboundInFlight.delete(parsed.sessionIdHex);
|
|
2511
|
+
logger.error("session.inbound.accept.error", {
|
|
2512
|
+
sessionId: parsed.sessionIdHex,
|
|
2513
|
+
agentName,
|
|
2514
|
+
error: err instanceof Error ? err.message : String(err),
|
|
2515
|
+
correlationId,
|
|
2516
|
+
});
|
|
2517
|
+
});
|
|
2518
|
+
}
|
|
2519
|
+
signalingManager.registerInboundHandler((frame) => {
|
|
2520
|
+
if (frame["type"] !== "session_assignment")
|
|
2521
|
+
return;
|
|
2522
|
+
handleInboundSessionAssignment(frame);
|
|
2523
|
+
});
|
|
2524
|
+
// M7 DOD-SPINE-7: session_sealed listener. The directory delivers this over the SESSION-OWNING
|
|
2525
|
+
// agent's signaling stream after the relay-mediated bilateral seal notarizes. Registered on the
|
|
2526
|
+
// keystone (primary agent) here AND per-agent in getAgentSignaling — for a non-primary agent the
|
|
2527
|
+
// directory routes session_sealed to its per-agent stream, so a keystone-only listener would
|
|
2528
|
+
// leave that agent's close waiter unresolved (reviewer finding). Resolve the close waiter with
|
|
2529
|
+
// the sealed_root and mark the session sealed. Guarded on primaryAgent: the keystone listener now
|
|
2530
|
+
// needs the primary agent's name/pubkey to verify the seal signature (legibility-TBS-binding), and
|
|
2531
|
+
// with no agents there are no keystone sessions to seal anyway.
|
|
2532
|
+
if (primaryAgent) {
|
|
2533
|
+
registerSessionSealedListener(signalingManager, primaryAgent.name, primaryAgent.pubkey);
|
|
2534
|
+
// SESSION-002: the keystone counterpart for the unilateral certificate listener — the
|
|
2535
|
+
// primary agent closes over the keystone stream, so the directory routes its
|
|
2536
|
+
// seal_unilateral_confirmed there (mirrors the session_sealed keystone listener above).
|
|
2537
|
+
registerUnilateralConfirmedListener(signalingManager, primaryAgent.name, primaryAgent.pubkey);
|
|
2538
|
+
// DOD-UP-1: the keystone counterpart for the absent-party upgrade listener. The directory
|
|
2539
|
+
// PUSHES the queued seal_unilateral_notification during the keystone's auth/reconnect drain —
|
|
2540
|
+
// BEFORE any cello_start_agent runs — so the handler MUST be registered here at startup, not only
|
|
2541
|
+
// in startAgent, or B (the returning absent party) would miss its own ratification trigger.
|
|
2542
|
+
registerUnilateralUpgradeListener(signalingManager, primaryAgent.name, primaryAgent.pubkey);
|
|
2543
|
+
}
|
|
2544
|
+
// cello_await_session — the counterparty's blocking pull for the next inbound session.
|
|
2545
|
+
// Returns immediately if one is already queued for the current agent (FIFO), otherwise
|
|
2546
|
+
// blocks until one arrives or timeout_ms elapses. Response shape matches the established
|
|
2547
|
+
// contract (core/adapter-claude-code/src/server.ts) so the E2E fixture migration is drop-in.
|
|
2548
|
+
handlers.set("cello_await_session", async (params, connectionId) => {
|
|
2549
|
+
const connState = perConnectionState.get(connectionId);
|
|
2550
|
+
if (!connState || !connState.currentAgent) {
|
|
2551
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
2552
|
+
}
|
|
2553
|
+
const agentName = connState.currentAgent;
|
|
2554
|
+
const timeoutMs = typeof params?.["timeout_ms"] === "number" ? params["timeout_ms"] : 30_000;
|
|
2555
|
+
const toResponse = (e) => ({
|
|
2556
|
+
type: "new_session",
|
|
2557
|
+
session_id: e.sessionIdHex,
|
|
2558
|
+
counterparty_pubkey: e.counterpartyPubkeyHex,
|
|
2559
|
+
genesis_prev_root: e.genesisPrevRootHex,
|
|
2560
|
+
});
|
|
2561
|
+
const queued = inboundSessionQueues.get(agentName);
|
|
2562
|
+
if (queued && queued.length > 0) {
|
|
2563
|
+
return toResponse(queued.shift());
|
|
2564
|
+
}
|
|
2565
|
+
const event = await new Promise((resolve) => {
|
|
2566
|
+
const waiters = inboundSessionWaiters.get(agentName) ?? [];
|
|
2567
|
+
const waiter = {
|
|
2568
|
+
connectionId,
|
|
2569
|
+
deliver: (e) => {
|
|
2570
|
+
clearTimeout(timer);
|
|
2571
|
+
resolve(e);
|
|
2572
|
+
},
|
|
2573
|
+
};
|
|
2574
|
+
waiters.push(waiter);
|
|
2575
|
+
inboundSessionWaiters.set(agentName, waiters);
|
|
2576
|
+
const timer = setTimeout(() => {
|
|
2577
|
+
const list = inboundSessionWaiters.get(agentName);
|
|
2578
|
+
if (list) {
|
|
2579
|
+
const idx = list.indexOf(waiter);
|
|
2580
|
+
if (idx !== -1)
|
|
2581
|
+
list.splice(idx, 1);
|
|
2582
|
+
}
|
|
2583
|
+
resolve(null);
|
|
2584
|
+
}, timeoutMs);
|
|
2585
|
+
});
|
|
2586
|
+
if (event === null)
|
|
2587
|
+
return { type: "timeout" };
|
|
2588
|
+
return toResponse(event);
|
|
2589
|
+
});
|
|
2590
|
+
// M7-SESSION-001 / DAEMON-004: shared bilateral ack-await machinery. The
|
|
2591
|
+
// interrupted-seal flow AND the active-session seal flow both send a
|
|
2592
|
+
// `seal_interrupted_request` and wait on the directory signaling stream for the
|
|
2593
|
+
// counterparty's `seal_interrupted_ack` / `seal_interrupted_rejection` (or time
|
|
2594
|
+
// out). Extracted so the two flows wait identically — the directory pass-through
|
|
2595
|
+
// routing (directory-node.ts) is the only wired transport for this exchange.
|
|
2596
|
+
const SEAL_INTERRUPTED_TIMEOUT_MS = 30_000;
|
|
2597
|
+
function awaitSealAck(sessionId) {
|
|
2598
|
+
return new Promise((resolve) => {
|
|
2599
|
+
const timeoutHandle = setTimeout(() => {
|
|
2600
|
+
unregister();
|
|
2601
|
+
resolve({ type: "timeout" });
|
|
2602
|
+
}, SEAL_INTERRUPTED_TIMEOUT_MS);
|
|
2603
|
+
const unregister = signalingManager.registerInboundHandler((frame) => {
|
|
2604
|
+
if (frame.type !== "seal_interrupted_ack" && frame.type !== "seal_interrupted_rejection") {
|
|
2605
|
+
return;
|
|
2606
|
+
}
|
|
2607
|
+
if (typeof frame.sessionId !== "string" || frame.sessionId !== sessionId)
|
|
2608
|
+
return;
|
|
2609
|
+
clearTimeout(timeoutHandle);
|
|
2610
|
+
unregister();
|
|
2611
|
+
if (frame.type === "seal_interrupted_ack") {
|
|
2612
|
+
resolve({
|
|
2613
|
+
type: "seal_interrupted_ack",
|
|
2614
|
+
sealInterruptedLeaf: frame.sealInterruptedLeaf ?? {},
|
|
2615
|
+
nonce: typeof frame.nonce === "string" ? frame.nonce : null,
|
|
2616
|
+
});
|
|
2617
|
+
}
|
|
2618
|
+
else {
|
|
2619
|
+
resolve({
|
|
2620
|
+
type: "seal_interrupted_rejection",
|
|
2621
|
+
reason: typeof frame.reason === "string" ? frame.reason : "unknown",
|
|
2622
|
+
});
|
|
2623
|
+
}
|
|
2624
|
+
});
|
|
2625
|
+
});
|
|
2626
|
+
}
|
|
2627
|
+
async function handleSealInterruptedFlow(sessionId, record, correlationId, merkleRootAtInterruption) {
|
|
2628
|
+
const nonce = randomUUID();
|
|
2629
|
+
// Retrieve the agent's own pubkey from the agent list
|
|
2630
|
+
// (the agent_name stored in the session record identifies which agent was in session)
|
|
2631
|
+
const agent = agents.find((a) => a.name === record.agent_name);
|
|
2632
|
+
const myPubkeyHex = agent?.pubkey ?? "";
|
|
2633
|
+
const counterpartyPubkey = record.counterparty_pubkey;
|
|
2634
|
+
// DAEMON-004 (AC-007 / SI-001 / finding #2): prefer the daemon-owned tree.
|
|
2635
|
+
// After a SIGKILL+restart the active session is forced to 'interrupted' and
|
|
2636
|
+
// its Merkle tree is reloaded from session_tree_leaves. When that reloaded
|
|
2637
|
+
// tree is non-empty it is the authoritative transcript: the seal binds over
|
|
2638
|
+
// the daemon's OWN reloaded root + size, and any caller-supplied
|
|
2639
|
+
// merkleRootAtInterruption is IGNORED (SI-001). Only when no tree was ever
|
|
2640
|
+
// persisted (legacy / pre-DAEMON-004 sessions) do we fall back to the
|
|
2641
|
+
// caller-supplied root and the message_count column (SESSION-001 behavior).
|
|
2642
|
+
const reloadedTree = sessionNodeManager.getSessionTree(record.agent_name, sessionId);
|
|
2643
|
+
const hasOwnTree = reloadedTree.size() > 0;
|
|
2644
|
+
const ownLeafCount = hasOwnTree ? reloadedTree.size() : (record.message_count ?? 0);
|
|
2645
|
+
const effectiveRoot = hasOwnTree
|
|
2646
|
+
? sessionNodeManager.getSessionTreeRootHex(record.agent_name, sessionId)
|
|
2647
|
+
: merkleRootAtInterruption;
|
|
2648
|
+
// DB-001: check signaling status before attempting to send
|
|
2649
|
+
if (signalingManager.status === "reconnecting") {
|
|
2650
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2651
|
+
sessionId,
|
|
2652
|
+
agentName: record.agent_name,
|
|
2653
|
+
reason: "signaling_reconnecting",
|
|
2654
|
+
error: "directory_signaling_reconnecting",
|
|
2655
|
+
correlationId,
|
|
2656
|
+
});
|
|
2657
|
+
return {
|
|
2658
|
+
ok: false,
|
|
2659
|
+
reason: "signaling_reconnecting",
|
|
2660
|
+
guidance: "The directory signaling stream is reconnecting. Wait for directory_signaling to show connected in cello status before initiating seal-interrupted. The daemon reconnects automatically — no manual intervention required.",
|
|
2661
|
+
};
|
|
2662
|
+
}
|
|
2663
|
+
// H-1: construct and K_local-sign our OWN SEAL-INTERRUPTED leaf before sending.
|
|
2664
|
+
const myKeyProvider = keyProviders.get(record.agent_name);
|
|
2665
|
+
if (!myKeyProvider) {
|
|
2666
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2667
|
+
sessionId,
|
|
2668
|
+
agentName: record.agent_name,
|
|
2669
|
+
reason: "signing_key_unavailable",
|
|
2670
|
+
error: "no_key_provider_for_agent",
|
|
2671
|
+
correlationId,
|
|
2672
|
+
});
|
|
2673
|
+
return {
|
|
2674
|
+
ok: false,
|
|
2675
|
+
reason: "signing_key_unavailable",
|
|
2676
|
+
guidance: "The signing key for the agent that owned this session could not be loaded. Confirm the agent's key file exists under ~/.cello/agents and restart the daemon.",
|
|
2677
|
+
};
|
|
2678
|
+
}
|
|
2679
|
+
const ownLeaf = await buildSignedSealInterruptedLeaf(myKeyProvider, {
|
|
2680
|
+
sessionId,
|
|
2681
|
+
leafCount: ownLeafCount,
|
|
2682
|
+
merkleRootAtInterruption: effectiveRoot,
|
|
2683
|
+
signerPubkeyHex: myPubkeyHex,
|
|
2684
|
+
});
|
|
2685
|
+
// Send SealInterruptedRequest via directory signaling
|
|
2686
|
+
const request = {
|
|
2687
|
+
type: "seal_interrupted_request",
|
|
2688
|
+
sessionId,
|
|
2689
|
+
initiatorPubkey: myPubkeyHex,
|
|
2690
|
+
counterpartyPubkey,
|
|
2691
|
+
leafCountAtInterruption: ownLeafCount,
|
|
2692
|
+
merkleRootAtInterruption: effectiveRoot,
|
|
2693
|
+
nonce,
|
|
2694
|
+
};
|
|
2695
|
+
const sendResult = await signalingManager.sendRaw(request);
|
|
2696
|
+
if (!sendResult.ok) {
|
|
2697
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2698
|
+
sessionId,
|
|
2699
|
+
agentName: record.agent_name,
|
|
2700
|
+
reason: "seal_interrupted_counterparty_unavailable",
|
|
2701
|
+
error: sendResult.reason,
|
|
2702
|
+
correlationId,
|
|
2703
|
+
});
|
|
2704
|
+
return {
|
|
2705
|
+
ok: false,
|
|
2706
|
+
reason: "seal_interrupted_counterparty_unavailable",
|
|
2707
|
+
guidance: "The counterparty is not currently reachable to complete the seal-interrupted flow. Retry when the counterparty is online — check their connection status via cello_list_connections.",
|
|
2708
|
+
};
|
|
2709
|
+
}
|
|
2710
|
+
// Wait for counterparty ack/rejection via the shared signaling await machinery.
|
|
2711
|
+
const ackResult = await awaitSealAck(sessionId);
|
|
2712
|
+
if (ackResult.type === "timeout") {
|
|
2713
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2714
|
+
sessionId,
|
|
2715
|
+
agentName: record.agent_name,
|
|
2716
|
+
reason: "seal_interrupted_counterparty_unavailable",
|
|
2717
|
+
error: "seal_interrupted_response_timeout",
|
|
2718
|
+
correlationId,
|
|
2719
|
+
});
|
|
2720
|
+
return {
|
|
2721
|
+
ok: false,
|
|
2722
|
+
reason: "seal_interrupted_counterparty_unavailable",
|
|
2723
|
+
guidance: "The counterparty is not currently reachable to complete the seal-interrupted flow. Retry when the counterparty is online — check their connection status via cello_list_connections.",
|
|
2724
|
+
};
|
|
2725
|
+
}
|
|
2726
|
+
if (ackResult.type === "seal_interrupted_rejection") {
|
|
2727
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2728
|
+
sessionId,
|
|
2729
|
+
agentName: record.agent_name,
|
|
2730
|
+
reason: "seal_interrupted_rejected_by_counterparty",
|
|
2731
|
+
error: ackResult.reason,
|
|
2732
|
+
correlationId,
|
|
2733
|
+
});
|
|
2734
|
+
return {
|
|
2735
|
+
ok: false,
|
|
2736
|
+
reason: "seal_interrupted_rejected_by_counterparty",
|
|
2737
|
+
guidance: "The counterparty rejected the seal-interrupted request. This may indicate their session state is inconsistent. Ask the counterparty to check their interrupted sessions via cello status on their end.",
|
|
2738
|
+
};
|
|
2739
|
+
}
|
|
2740
|
+
// ackResult.type === "seal_interrupted_ack"
|
|
2741
|
+
{
|
|
2742
|
+
const leaf = ackResult.sealInterruptedLeaf;
|
|
2743
|
+
// C-1 / SI-002 / SI-003: nonce (L-2), leafCount agreement, and the
|
|
2744
|
+
// counterparty's own Ed25519 signature are verified by the shared helper.
|
|
2745
|
+
// We compare against our OWN ownLeafCount (an independent value) so a real
|
|
2746
|
+
// divergence in transcript length is caught. Merkle-root agreement is NOT
|
|
2747
|
+
// verified at this leaf-exchange layer (it is the FROST-seal step's job
|
|
2748
|
+
// against the directory-held tree); see the H-1 SCOPE note above.
|
|
2749
|
+
const verified = verifyCounterpartySealLeaf({
|
|
2750
|
+
leaf,
|
|
2751
|
+
sentNonce: nonce,
|
|
2752
|
+
ackNonce: ackResult.nonce,
|
|
2753
|
+
ownLeafCount,
|
|
2754
|
+
expectedCounterpartyPubkey: record.counterparty_pubkey,
|
|
2755
|
+
});
|
|
2756
|
+
if (!verified.ok) {
|
|
2757
|
+
const reasonMap = {
|
|
2758
|
+
nonce_mismatch: "seal_interrupted_nonce_mismatch",
|
|
2759
|
+
leaf_count_mismatch: "seal_interrupted_leaf_count_mismatch",
|
|
2760
|
+
leaf_signature_invalid: "seal_interrupted_leaf_signature_invalid",
|
|
2761
|
+
};
|
|
2762
|
+
const guidanceMap = {
|
|
2763
|
+
nonce_mismatch: "The counterparty's acknowledgement did not echo the expected nonce. This indicates a stale or replayed response. The session remains interrupted — retry cello_close_session.",
|
|
2764
|
+
leaf_count_mismatch: "The counterparty's recorded message count at interruption does not match ours. The two sides have divergent session histories and cannot form a bilateral commitment. Compare cello status on both ends before retrying.",
|
|
2765
|
+
leaf_signature_invalid: "The counterparty's SEAL-INTERRUPTED leaf signature did not verify. The seal flow has been aborted. The session remains interrupted — retry cello_close_session after confirming the counterparty is using a compatible version.",
|
|
2766
|
+
};
|
|
2767
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2768
|
+
sessionId,
|
|
2769
|
+
agentName: record.agent_name,
|
|
2770
|
+
reason: reasonMap[verified.reason],
|
|
2771
|
+
error: verified.error,
|
|
2772
|
+
correlationId,
|
|
2773
|
+
});
|
|
2774
|
+
return { ok: false, reason: reasonMap[verified.reason], guidance: guidanceMap[verified.reason] };
|
|
2775
|
+
}
|
|
2776
|
+
// H-1: signature + nonce + cross-checks all passed. We have a VERIFIED
|
|
2777
|
+
// bilateral commitment (both K_local-signed leaves over the same
|
|
2778
|
+
// {leafCount, merkleRoot}). Persist BOTH leaves and advance the session to
|
|
2779
|
+
// the NON-TERMINAL 'seal_interrupted_pending' state. We do NOT write
|
|
2780
|
+
// 'sealed' — the FROST threshold notarization has not run (see the H-1
|
|
2781
|
+
// SCOPE note above for exactly what blocks it).
|
|
2782
|
+
const advanced = sessionNodeManager.persistSealInterruptedCommitment({
|
|
2783
|
+
agentName: record.agent_name,
|
|
2784
|
+
sessionId,
|
|
2785
|
+
role: "initiator",
|
|
2786
|
+
ownLeaf,
|
|
2787
|
+
counterpartyLeaf: leaf,
|
|
2788
|
+
merkleRoot: effectiveRoot,
|
|
2789
|
+
nonce,
|
|
2790
|
+
});
|
|
2791
|
+
if (!advanced) {
|
|
2792
|
+
logger.error("session.interrupted.seal.failed", {
|
|
2793
|
+
sessionId,
|
|
2794
|
+
agentName: record.agent_name,
|
|
2795
|
+
reason: "seal_interrupted_persist_failed",
|
|
2796
|
+
error: "session row was not in 'interrupted' state at commit time",
|
|
2797
|
+
correlationId,
|
|
2798
|
+
});
|
|
2799
|
+
return {
|
|
2800
|
+
ok: false,
|
|
2801
|
+
reason: "seal_interrupted_persist_failed",
|
|
2802
|
+
guidance: "The bilateral commitment could not be persisted because the session was no longer in the interrupted state. Re-check cello status — it may already be pending or sealed.",
|
|
2803
|
+
};
|
|
2804
|
+
}
|
|
2805
|
+
logger.info("session.interrupted.pending", {
|
|
2806
|
+
sessionId,
|
|
2807
|
+
agentName: record.agent_name,
|
|
2808
|
+
leafCount: ownLeafCount,
|
|
2809
|
+
correlationId,
|
|
2810
|
+
});
|
|
2811
|
+
return { ok: true, sessionId, status: "seal_interrupted_pending" };
|
|
2812
|
+
}
|
|
2813
|
+
}
|
|
2814
|
+
async function handleActiveSealFlow(sessionId, record, correlationId) {
|
|
2815
|
+
const agent = agents.find((a) => a.name === record.agent_name);
|
|
2816
|
+
const myPubkeyHex = agent?.pubkey ?? "";
|
|
2817
|
+
const kp = keyProviders.get(record.agent_name);
|
|
2818
|
+
// DB-002: never initiate a partial seal while signaling is reconnecting.
|
|
2819
|
+
if (signalingManager.status === "reconnecting") {
|
|
2820
|
+
return {
|
|
2821
|
+
ok: false,
|
|
2822
|
+
reason: "signaling_reconnecting",
|
|
2823
|
+
guidance: "The directory signaling stream is reconnecting. Wait for directory_signaling to show connected in cello status before retrying the close. The daemon reconnects automatically.",
|
|
2824
|
+
};
|
|
2825
|
+
}
|
|
2826
|
+
if (!kp || !myPubkeyHex) {
|
|
2827
|
+
logger.error("session.seal.initiate.failed", {
|
|
2828
|
+
sessionId,
|
|
2829
|
+
reason: "signing_key_unavailable",
|
|
2830
|
+
errorMessage: "no key provider or pubkey for the agent that owns this session",
|
|
2831
|
+
correlationId,
|
|
2832
|
+
});
|
|
2833
|
+
return {
|
|
2834
|
+
ok: false,
|
|
2835
|
+
reason: "signing_key_unavailable",
|
|
2836
|
+
guidance: "The signing key for the agent that owns this session could not be loaded. Confirm the agent's key file exists under ~/.cello/agents and restart the daemon.",
|
|
2837
|
+
};
|
|
2838
|
+
}
|
|
2839
|
+
// SI-001: the root is the daemon's OWN tree root — computed from the leaves it
|
|
2840
|
+
// appended itself. Any caller-supplied merkleRoot is never read here.
|
|
2841
|
+
const ownRootHex = sessionNodeManager.getSessionTreeRootHex(record.agent_name, sessionId);
|
|
2842
|
+
const leafCount = sessionNodeManager.getSessionTree(record.agent_name, sessionId).size();
|
|
2843
|
+
const nonce = randomUUID();
|
|
2844
|
+
// SI-003: K_local-sign our OWN SEAL leaf over our own root. We reuse the
|
|
2845
|
+
// wired SEAL-INTERRUPTED leaf shape so the counterparty co-signs an identical
|
|
2846
|
+
// canonical form (the active and interrupted bilateral exchanges share the
|
|
2847
|
+
// directory pass-through routing — there is no separate `seal_request`
|
|
2848
|
+
// transport, and inventing one silently drops the frame at the directory).
|
|
2849
|
+
let ownLeaf;
|
|
2850
|
+
try {
|
|
2851
|
+
ownLeaf = await buildSignedSealInterruptedLeaf(kp, {
|
|
2852
|
+
sessionId,
|
|
2853
|
+
leafCount,
|
|
2854
|
+
merkleRootAtInterruption: ownRootHex,
|
|
2855
|
+
signerPubkeyHex: myPubkeyHex,
|
|
2856
|
+
});
|
|
2857
|
+
}
|
|
2858
|
+
catch (err) {
|
|
2859
|
+
logger.error("session.seal.initiate.failed", {
|
|
2860
|
+
sessionId,
|
|
2861
|
+
reason: "seal_leaf_signing_failed",
|
|
2862
|
+
errorMessage: err instanceof Error ? err.message : String(err),
|
|
2863
|
+
correlationId,
|
|
2864
|
+
});
|
|
2865
|
+
return {
|
|
2866
|
+
ok: false,
|
|
2867
|
+
reason: "seal_leaf_signing_failed",
|
|
2868
|
+
guidance: "The SEAL leaf could not be signed. Check the daemon logs for the signing error and confirm the agent key is intact.",
|
|
2869
|
+
};
|
|
2870
|
+
}
|
|
2871
|
+
// AC-003: session.seal.initiated — rootHex MUST equal the daemon's own root.
|
|
2872
|
+
logger.info("session.seal.initiated", {
|
|
2873
|
+
sessionId,
|
|
2874
|
+
rootHex: ownRootHex,
|
|
2875
|
+
role: "initiator",
|
|
2876
|
+
correlationId,
|
|
2877
|
+
});
|
|
2878
|
+
// Submit the SEAL request over the directory signaling stream (the SAME wired
|
|
2879
|
+
// pass-through the interrupted-seal flow uses) and AWAIT the counterparty's
|
|
2880
|
+
// bilateral ack. We never report success on a fire-and-forget send.
|
|
2881
|
+
const sendResult = await signalingManager.sendRaw({
|
|
2882
|
+
type: "seal_interrupted_request",
|
|
2883
|
+
sessionId,
|
|
2884
|
+
initiatorPubkey: myPubkeyHex,
|
|
2885
|
+
counterpartyPubkey: record.counterparty_pubkey,
|
|
2886
|
+
leafCountAtInterruption: leafCount,
|
|
2887
|
+
merkleRootAtInterruption: ownRootHex,
|
|
2888
|
+
nonce,
|
|
2889
|
+
});
|
|
2890
|
+
if (!sendResult.ok) {
|
|
2891
|
+
logger.error("session.seal.initiate.failed", {
|
|
2892
|
+
sessionId,
|
|
2893
|
+
reason: sendResult.reason,
|
|
2894
|
+
errorMessage: sendResult.reason,
|
|
2895
|
+
correlationId,
|
|
2896
|
+
});
|
|
2897
|
+
return {
|
|
2898
|
+
ok: false,
|
|
2899
|
+
reason: sendResult.reason,
|
|
2900
|
+
guidance: "guidance" in sendResult && typeof sendResult.guidance === "string"
|
|
2901
|
+
? sendResult.guidance
|
|
2902
|
+
: "The seal could not be submitted to the directory. Retry once cello status shows directory_signaling connected.",
|
|
2903
|
+
};
|
|
2904
|
+
}
|
|
2905
|
+
// Wait for the counterparty's bilateral ack (or rejection / timeout).
|
|
2906
|
+
const ackResult = await awaitSealAck(sessionId);
|
|
2907
|
+
if (ackResult.type === "timeout" || ackResult.type === "seal_interrupted_rejection") {
|
|
2908
|
+
const reason = ackResult.type === "timeout" ? "seal_counterparty_unavailable" : "seal_rejected_by_counterparty";
|
|
2909
|
+
logger.error("session.seal.initiate.failed", {
|
|
2910
|
+
sessionId,
|
|
2911
|
+
reason,
|
|
2912
|
+
errorMessage: ackResult.type === "timeout" ? "seal_response_timeout" : ackResult.reason,
|
|
2913
|
+
correlationId,
|
|
2914
|
+
});
|
|
2915
|
+
return {
|
|
2916
|
+
ok: false,
|
|
2917
|
+
reason,
|
|
2918
|
+
guidance: ackResult.type === "timeout"
|
|
2919
|
+
? "The counterparty did not acknowledge the seal in time. Retry when they are online — check cello_list_connections. The session remains active and usable."
|
|
2920
|
+
: "The counterparty rejected the seal request. Their session state may be inconsistent. Ask them to check cello status before retrying.",
|
|
2921
|
+
};
|
|
2922
|
+
}
|
|
2923
|
+
// SI-002 / SI-003: verify the counterparty's own-signed ack leaf over our root.
|
|
2924
|
+
const verified = verifyCounterpartySealLeaf({
|
|
2925
|
+
leaf: ackResult.sealInterruptedLeaf,
|
|
2926
|
+
sentNonce: nonce,
|
|
2927
|
+
ackNonce: ackResult.nonce,
|
|
2928
|
+
ownLeafCount: leafCount,
|
|
2929
|
+
expectedCounterpartyPubkey: record.counterparty_pubkey,
|
|
2930
|
+
});
|
|
2931
|
+
if (!verified.ok) {
|
|
2932
|
+
logger.error("session.seal.initiate.failed", {
|
|
2933
|
+
sessionId,
|
|
2934
|
+
reason: `seal_${verified.reason}`,
|
|
2935
|
+
errorMessage: verified.error,
|
|
2936
|
+
correlationId,
|
|
2937
|
+
});
|
|
2938
|
+
return {
|
|
2939
|
+
ok: false,
|
|
2940
|
+
reason: `seal_${verified.reason}`,
|
|
2941
|
+
guidance: "The counterparty's seal acknowledgement failed verification (nonce, leaf count, or signature). The session remains active — retry cello_close_session once both sides agree on the transcript.",
|
|
2942
|
+
};
|
|
2943
|
+
}
|
|
2944
|
+
// Verified bilateral commitment over the daemon's OWN root. Persist both
|
|
2945
|
+
// signed leaves and advance the session out of 'active'. As in the
|
|
2946
|
+
// interrupted flow, we stop at the bilateral commitment ('seal_interrupted_pending');
|
|
2947
|
+
// the FROST threshold notarization that finalizes 'sealed' is the deferred
|
|
2948
|
+
// directory step (AC-004, exercised under CELLO_E2E_LIVE).
|
|
2949
|
+
const advanced = sessionNodeManager.persistSealInterruptedCommitment({
|
|
2950
|
+
agentName: record.agent_name,
|
|
2951
|
+
sessionId,
|
|
2952
|
+
role: "initiator",
|
|
2953
|
+
ownLeaf,
|
|
2954
|
+
counterpartyLeaf: ackResult.sealInterruptedLeaf,
|
|
2955
|
+
merkleRoot: ownRootHex,
|
|
2956
|
+
nonce,
|
|
2957
|
+
});
|
|
2958
|
+
if (!advanced) {
|
|
2959
|
+
logger.error("session.seal.initiate.failed", {
|
|
2960
|
+
sessionId,
|
|
2961
|
+
reason: "seal_persist_failed",
|
|
2962
|
+
errorMessage: "session row was not in an active/interrupted state at commit time",
|
|
2963
|
+
correlationId,
|
|
2964
|
+
});
|
|
2965
|
+
return {
|
|
2966
|
+
ok: false,
|
|
2967
|
+
reason: "seal_persist_failed",
|
|
2968
|
+
guidance: "The bilateral seal commitment could not be persisted because the session changed state. Re-check cello status — it may already be pending or sealed.",
|
|
2969
|
+
};
|
|
2970
|
+
}
|
|
2971
|
+
// round-2 finding #5: the session is now frozen at 'seal_interrupted_pending'.
|
|
2972
|
+
// Retire its live libp2p node so no further inbound content can arrive (which
|
|
2973
|
+
// ingestReceivedContent now also rejects) and so the node is not leaked per
|
|
2974
|
+
// active close. retireSessionNode stops the node WITHOUT changing the DB status.
|
|
2975
|
+
await sessionNodeManager.retireSessionNode(record.agent_name, sessionId);
|
|
2976
|
+
return { ok: true, sessionId, status: "seal_interrupted_pending", rootHex: ownRootHex };
|
|
2977
|
+
}
|
|
2978
|
+
// ─── CELLO-M7-DAEMON-004: cello_send (live send + daemon-owned tree append) ──
|
|
2979
|
+
handlers.set("cello_send", async (params, connectionId) => {
|
|
2980
|
+
const connState = perConnectionState.get(connectionId);
|
|
2981
|
+
if (!connState || !connState.currentAgent)
|
|
2982
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
2983
|
+
// round-2 BLOCKING: read the snake_case public field cello-mcp.ts actually sends.
|
|
2984
|
+
const sessionId = params?.session_id;
|
|
2985
|
+
const contentStr = typeof params?.content === "string" ? params.content : undefined;
|
|
2986
|
+
if (!sessionId || contentStr === undefined) {
|
|
2987
|
+
return { ok: false, reason: "missing_params", guidance: "Provide 'session_id' (hex) and 'content' (string) parameters." };
|
|
2988
|
+
}
|
|
2989
|
+
// DOD-LOOP-1: the (agent, session_id) lookup is itself the ownership scope.
|
|
2990
|
+
const record = sessionNodeManager.getSessionRecord(connState.currentAgent, sessionId);
|
|
2991
|
+
if (!record) {
|
|
2992
|
+
return { ok: false, reason: "session_not_found", guidance: "No session found with this ID. Check cello_list_sessions for active sessions." };
|
|
2993
|
+
}
|
|
2994
|
+
if (record.agent_name !== connState.currentAgent) {
|
|
2995
|
+
return { ok: false, reason: "session_not_owned", guidance: "This session belongs to a different agent. Call cello_use_agent to switch to the agent that owns it, then retry." };
|
|
2996
|
+
}
|
|
2997
|
+
if (record.status !== "active") {
|
|
2998
|
+
return { ok: false, reason: "session_not_active", guidance: `Session is '${record.status}', not active. Content can only be sent on an active session. If it is interrupted, call cello_close_session to seal it.` };
|
|
2999
|
+
}
|
|
3000
|
+
const correlationId = randomUUID();
|
|
3001
|
+
const contentBytes = new TextEncoder().encode(contentStr);
|
|
3002
|
+
// CELLO-M7-MSG-001 (AC-013/AC-018/AC-021): enforce the 1 MB application content cap
|
|
3003
|
+
// BEFORE any transmission or hash/leaf production. This replaces the silent oversize
|
|
3004
|
+
// decode-failure → desync: the send is rejected with a distinct, diagnosable reason
|
|
3005
|
+
// and actionable guidance; no content frame is transmitted, no leaf is appended, and
|
|
3006
|
+
// the session stays usable.
|
|
3007
|
+
if (contentBytes.length > MAX_CONTENT_BYTES) {
|
|
3008
|
+
logger.warn("content.rejected.too_large", {
|
|
3009
|
+
sessionId,
|
|
3010
|
+
contentSize: contentBytes.length,
|
|
3011
|
+
cap: MAX_CONTENT_BYTES,
|
|
3012
|
+
correlationId,
|
|
3013
|
+
});
|
|
3014
|
+
return {
|
|
3015
|
+
ok: false,
|
|
3016
|
+
reason: "content_too_large",
|
|
3017
|
+
guidance: `This message is ${contentBytes.length} bytes, over the ${MAX_CONTENT_BYTES}-byte (1 MB) per-message content cap. Split it into multiple messages each under the cap, or use the large-object/file transfer path for large payloads (not cello_send). Nothing was sent and the session is still active — retry with smaller content.`,
|
|
3018
|
+
};
|
|
3019
|
+
}
|
|
3020
|
+
const contentHash = createHash("sha256").update(new Uint8Array([0x00])).update(contentBytes).digest();
|
|
3021
|
+
const contentHashHex = Buffer.from(contentHash).toString("hex");
|
|
3022
|
+
const recipientPubkey = record.counterparty_pubkey;
|
|
3023
|
+
const sendResult = await sessionNodeManager.sendContent(record.agent_name, sessionId, contentBytes, new Uint8Array(contentHash), correlationId);
|
|
3024
|
+
if (!sendResult.ok) {
|
|
3025
|
+
// DB-001 / dead-channel contract: never silently drop, never desync. Preserve
|
|
3026
|
+
// the content in the durable retry_queue so it is retried on reconnect, and
|
|
3027
|
+
// surface a named, diagnosable failure.
|
|
3028
|
+
const nonce = randomUUID();
|
|
3029
|
+
try {
|
|
3030
|
+
retryQueue.enqueue(sessionId, new TextEncoder().encode(nonce), contentBytes);
|
|
3031
|
+
}
|
|
3032
|
+
catch (err) {
|
|
3033
|
+
logger.error("session.content.queue.failed", {
|
|
3034
|
+
sessionId,
|
|
3035
|
+
error: err instanceof Error ? err.message : String(err),
|
|
3036
|
+
correlationId,
|
|
3037
|
+
});
|
|
3038
|
+
}
|
|
3039
|
+
logger.warn("session.content.send.failed", {
|
|
3040
|
+
sessionId,
|
|
3041
|
+
recipientPubkey,
|
|
3042
|
+
reason: sendResult.reason,
|
|
3043
|
+
errorMessage: sendResult.error,
|
|
3044
|
+
correlationId,
|
|
3045
|
+
});
|
|
3046
|
+
return {
|
|
3047
|
+
ok: false,
|
|
3048
|
+
reason: sendResult.reason,
|
|
3049
|
+
guidance: "The content could not be delivered over the session stream right now. It has been queued in the durable retry queue and will be retried when the counterparty reconnects. The session remains usable — check cello_list_connections for the counterparty's status.",
|
|
3050
|
+
};
|
|
3051
|
+
}
|
|
3052
|
+
// Delivered — append the message leaf to the daemon-owned tree (advances root).
|
|
3053
|
+
const { leafIndex, newRootHex } = sessionNodeManager.appendSessionLeaf(record.agent_name, sessionId, "msg", contentHashHex, correlationId);
|
|
3054
|
+
// DOD-LOG-1: persist the readable SENT plaintext to the durable transcript, keyed by the
|
|
3055
|
+
// canonical leaf sequence so it joins the committed hash chain (survives restart).
|
|
3056
|
+
sessionNodeManager.recordTranscriptMessage(record.agent_name, sessionId, leafIndex, "sent", contentBytes, correlationId);
|
|
3057
|
+
logger.info("session.content.sent", {
|
|
3058
|
+
sessionId,
|
|
3059
|
+
recipientPubkey,
|
|
3060
|
+
contentHashHex,
|
|
3061
|
+
sequenceNumber: leafIndex,
|
|
3062
|
+
correlationId,
|
|
3063
|
+
});
|
|
3064
|
+
void newRootHex;
|
|
3065
|
+
return { ok: true, sequence_number: leafIndex };
|
|
3066
|
+
});
|
|
3067
|
+
// ─── CELLO-M7-DAEMON-004: cello_receive (returns from the daemon's own buffer) ──
|
|
3068
|
+
handlers.set("cello_receive", async (params, connectionId) => {
|
|
3069
|
+
const connState = perConnectionState.get(connectionId);
|
|
3070
|
+
if (!connState || !connState.currentAgent)
|
|
3071
|
+
return NO_CURRENT_AGENT_RESPONSE;
|
|
3072
|
+
// round-2 BLOCKING: read the snake_case public field cello-mcp.ts actually sends.
|
|
3073
|
+
const sessionId = params?.session_id;
|
|
3074
|
+
if (!sessionId) {
|
|
3075
|
+
return { ok: false, reason: "missing_params", guidance: "Provide 'session_id' (hex) to receive content for a specific session." };
|
|
3076
|
+
}
|
|
3077
|
+
const record = sessionNodeManager.getSessionRecord(connState.currentAgent, sessionId);
|
|
3078
|
+
if (!record) {
|
|
3079
|
+
return { ok: false, reason: "session_not_found", guidance: "No session found with this ID. Check cello_list_sessions." };
|
|
3080
|
+
}
|
|
3081
|
+
if (record.agent_name !== connState.currentAgent) {
|
|
3082
|
+
return { ok: false, reason: "session_not_owned", guidance: "This session belongs to a different agent. Call cello_use_agent to switch to the agent that owns it, then retry." };
|
|
3083
|
+
}
|
|
3084
|
+
const entry = sessionNodeManager.takeReceivedContent(connState.currentAgent, sessionId);
|
|
3085
|
+
if (!entry) {
|
|
3086
|
+
return { ok: true, content: null, guidance: "No content is currently buffered for this session. Call cello_receive again after the counterparty sends, or use the blocking receive variant." };
|
|
3087
|
+
}
|
|
3088
|
+
return {
|
|
3089
|
+
ok: true,
|
|
3090
|
+
content: Buffer.from(entry.contentHex, "hex").toString("utf8"),
|
|
3091
|
+
sessionId,
|
|
3092
|
+
sequence_number: entry.sequenceNumber,
|
|
3093
|
+
senderPubkey: entry.senderPubkey,
|
|
3094
|
+
};
|
|
3095
|
+
});
|
|
3096
|
+
let shutdownPromise = null;
|
|
3097
|
+
handlers.set("shutdown", async (_params, _connectionId) => {
|
|
3098
|
+
if (!shutdownPromise) {
|
|
3099
|
+
shutdownPromise = stop("logout_requested").catch((err) => {
|
|
3100
|
+
logger.error("daemon.shutdown.failed", {
|
|
3101
|
+
signal: "logout",
|
|
3102
|
+
error: err instanceof Error ? err.message : String(err),
|
|
3103
|
+
});
|
|
3104
|
+
});
|
|
3105
|
+
}
|
|
3106
|
+
return { acknowledged: true };
|
|
3107
|
+
});
|
|
3108
|
+
// Create and start IPC server
|
|
3109
|
+
const ipcServer = createIpcServer({ socketPath, maxConnections, logger }, handlers);
|
|
3110
|
+
try {
|
|
3111
|
+
await ipcServer.start();
|
|
3112
|
+
}
|
|
3113
|
+
catch (err) {
|
|
3114
|
+
await removeLock(lockFilePath, logger);
|
|
3115
|
+
throw err;
|
|
3116
|
+
}
|
|
3117
|
+
// MCP-002: Instantiate NotificationDispatcher (wired to IPC server)
|
|
3118
|
+
const notificationDispatcher = new NotificationDispatcher({
|
|
3119
|
+
logger,
|
|
3120
|
+
sendNotification: (connectionId, notification) => ipcServer.sendNotification(connectionId, notification),
|
|
3121
|
+
getConnectionIds: () => ipcServer.getConnectionIds(),
|
|
3122
|
+
});
|
|
3123
|
+
// M7-SESSION-001 (M-1 PUSH): now that the dispatcher exists, wire the session
|
|
3124
|
+
// node manager so that an active→interrupted transition pushes a
|
|
3125
|
+
// session_state_changed notification to live MCP clients. Setter injection is
|
|
3126
|
+
// used because the dispatcher is constructed AFTER the SessionNodeManager
|
|
3127
|
+
// (it depends on the IPC server), so constructor injection would be circular.
|
|
3128
|
+
sessionNodeManager.setOnSessionStateChanged((agentName, sessionId, state, counterpartyPubkey) => {
|
|
3129
|
+
notificationDispatcher.dispatchSessionStateChanged(agentName, sessionId, state, counterpartyPubkey);
|
|
3130
|
+
});
|
|
3131
|
+
// MCP-001: Clean up per-connection state when a connection disconnects
|
|
3132
|
+
// MCP-002: Also unregister from notification dispatcher
|
|
3133
|
+
ipcServer.onDisconnect((connectionId) => {
|
|
3134
|
+
perConnectionState.delete(connectionId);
|
|
3135
|
+
notificationDispatcher.unregisterConnection(connectionId);
|
|
3136
|
+
// Seam 2 (review H2): evict any cello_await_session waiters owned by this connection.
|
|
3137
|
+
// Otherwise enqueueInboundSession would hand the next inbound session to a closed
|
|
3138
|
+
// connection's waiter and the event would be lost. deliver(null) clears the waiter's
|
|
3139
|
+
// timer and resolves its (now-orphaned) promise as a timeout.
|
|
3140
|
+
for (const [agentName, waiters] of inboundSessionWaiters) {
|
|
3141
|
+
const survivors = [];
|
|
3142
|
+
for (const w of waiters) {
|
|
3143
|
+
if (w.connectionId === connectionId)
|
|
3144
|
+
w.deliver(null);
|
|
3145
|
+
else
|
|
3146
|
+
survivors.push(w);
|
|
3147
|
+
}
|
|
3148
|
+
if (survivors.length > 0)
|
|
3149
|
+
inboundSessionWaiters.set(agentName, survivors);
|
|
3150
|
+
else
|
|
3151
|
+
inboundSessionWaiters.delete(agentName);
|
|
3152
|
+
}
|
|
3153
|
+
});
|
|
3154
|
+
// Log daemon.login.validation.complete (stub — all unverified until SIGNAL-001)
|
|
3155
|
+
logger.info("daemon.login.validation.complete", {
|
|
3156
|
+
verifiedCount: 0,
|
|
3157
|
+
staleCount: 0,
|
|
3158
|
+
goneCount: 0,
|
|
3159
|
+
});
|
|
3160
|
+
// Log daemon.started
|
|
3161
|
+
logger.info("daemon.started", {
|
|
3162
|
+
pid: process.pid,
|
|
3163
|
+
ipcSocketPath: socketPath,
|
|
3164
|
+
agentCount: loadedAgents.length,
|
|
3165
|
+
manifestVerified,
|
|
3166
|
+
});
|
|
3167
|
+
// M7-MANIFEST-002 / DOD-AUTH-2: background manifest polling is now ACTIVE. The keystone
|
|
3168
|
+
// SignalingManager (constructed above with the poll deps) calls startPolling() when its
|
|
3169
|
+
// stream reaches connected — it re-polls the directory on the randomized 6–12h interval
|
|
3170
|
+
// and adopts a newer signed manifest (handleManifestPollResponse). No separate wiring
|
|
3171
|
+
// needed here; poll lifecycle = the keystone connection lifecycle.
|
|
3172
|
+
// Graceful shutdown
|
|
3173
|
+
async function stop(reason) {
|
|
3174
|
+
// Cancel any pending manifest poll timer
|
|
3175
|
+
if (manifestPollScheduler) {
|
|
3176
|
+
manifestPollScheduler.cancel();
|
|
3177
|
+
}
|
|
3178
|
+
logger.info("daemon.stopped", { pid: process.pid, reason });
|
|
3179
|
+
// Stop SignalingManager (flushes pending ops with shutdown error, cancels reconnect loop)
|
|
3180
|
+
await signalingManager.stop();
|
|
3181
|
+
// Stop every per-agent signaling stream too (multi-agent), so no agent's directory
|
|
3182
|
+
// node / reconnect loop is orphaned past shutdown.
|
|
3183
|
+
for (const entry of perAgentSignaling.values()) {
|
|
3184
|
+
await entry.signaling.stop();
|
|
3185
|
+
}
|
|
3186
|
+
// Gracefully mark active sessions interrupted (AC-009) before stopping IPC
|
|
3187
|
+
await sessionNodeManager.gracefulShutdown();
|
|
3188
|
+
await ipcServer.stop();
|
|
3189
|
+
await removeLock(lockFilePath, logger);
|
|
3190
|
+
}
|
|
3191
|
+
function getSessionNodeManager() {
|
|
3192
|
+
return sessionNodeManager;
|
|
3193
|
+
}
|
|
3194
|
+
function getTransportSelector() {
|
|
3195
|
+
return transportSelector;
|
|
3196
|
+
}
|
|
3197
|
+
function getAutoNatService() {
|
|
3198
|
+
return autoNatService;
|
|
3199
|
+
}
|
|
3200
|
+
return { stop, getStatus, getSessionNodeManager, getDirectoryNode, getTransportSelector, getAutoNatService };
|
|
3201
|
+
}
|
|
3202
|
+
//# sourceMappingURL=daemon.js.map
|