@cello-protocol/daemon 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/dist/agent-loader.d.ts +41 -0
  2. package/dist/agent-loader.d.ts.map +1 -0
  3. package/dist/agent-loader.js +94 -0
  4. package/dist/agent-loader.js.map +1 -0
  5. package/dist/bin/cello-daemon.d.ts +13 -0
  6. package/dist/bin/cello-daemon.d.ts.map +1 -0
  7. package/dist/bin/cello-daemon.js +170 -0
  8. package/dist/bin/cello-daemon.js.map +1 -0
  9. package/dist/cello-node-transport-dialer.d.ts +59 -0
  10. package/dist/cello-node-transport-dialer.d.ts.map +1 -0
  11. package/dist/cello-node-transport-dialer.js +108 -0
  12. package/dist/cello-node-transport-dialer.js.map +1 -0
  13. package/dist/challenge-verifier.d.ts +12 -0
  14. package/dist/challenge-verifier.d.ts.map +1 -0
  15. package/dist/challenge-verifier.js +11 -0
  16. package/dist/challenge-verifier.js.map +1 -0
  17. package/dist/connect-or-start.d.ts +25 -0
  18. package/dist/connect-or-start.d.ts.map +1 -0
  19. package/dist/connect-or-start.js +117 -0
  20. package/dist/connect-or-start.js.map +1 -0
  21. package/dist/content-park-client.d.ts +49 -0
  22. package/dist/content-park-client.d.ts.map +1 -0
  23. package/dist/content-park-client.js +196 -0
  24. package/dist/content-park-client.js.map +1 -0
  25. package/dist/daemon.d.ts +65 -0
  26. package/dist/daemon.d.ts.map +1 -0
  27. package/dist/daemon.js +3202 -0
  28. package/dist/daemon.js.map +1 -0
  29. package/dist/directory-bootstrap.d.ts +55 -0
  30. package/dist/directory-bootstrap.d.ts.map +1 -0
  31. package/dist/directory-bootstrap.js +102 -0
  32. package/dist/directory-bootstrap.js.map +1 -0
  33. package/dist/file-manifest-provider.d.ts +18 -0
  34. package/dist/file-manifest-provider.d.ts.map +1 -0
  35. package/dist/file-manifest-provider.js +72 -0
  36. package/dist/file-manifest-provider.js.map +1 -0
  37. package/dist/index.d.ts +18 -0
  38. package/dist/index.d.ts.map +1 -0
  39. package/dist/index.js +18 -0
  40. package/dist/index.js.map +1 -0
  41. package/dist/ipc-client.d.ts +31 -0
  42. package/dist/ipc-client.d.ts.map +1 -0
  43. package/dist/ipc-client.js +112 -0
  44. package/dist/ipc-client.js.map +1 -0
  45. package/dist/ipc-server.d.ts +49 -0
  46. package/dist/ipc-server.d.ts.map +1 -0
  47. package/dist/ipc-server.js +268 -0
  48. package/dist/ipc-server.js.map +1 -0
  49. package/dist/lock-file.d.ts +27 -0
  50. package/dist/lock-file.d.ts.map +1 -0
  51. package/dist/lock-file.js +84 -0
  52. package/dist/lock-file.js.map +1 -0
  53. package/dist/manifest-loader.d.ts +33 -0
  54. package/dist/manifest-loader.d.ts.map +1 -0
  55. package/dist/manifest-loader.js +70 -0
  56. package/dist/manifest-loader.js.map +1 -0
  57. package/dist/manifest-poll-scheduler.d.ts +31 -0
  58. package/dist/manifest-poll-scheduler.d.ts.map +1 -0
  59. package/dist/manifest-poll-scheduler.js +59 -0
  60. package/dist/manifest-poll-scheduler.js.map +1 -0
  61. package/dist/manifest-version-store-file.d.ts +18 -0
  62. package/dist/manifest-version-store-file.d.ts.map +1 -0
  63. package/dist/manifest-version-store-file.js +40 -0
  64. package/dist/manifest-version-store-file.js.map +1 -0
  65. package/dist/manifest-version-store.d.ts +14 -0
  66. package/dist/manifest-version-store.d.ts.map +1 -0
  67. package/dist/manifest-version-store.js +13 -0
  68. package/dist/manifest-version-store.js.map +1 -0
  69. package/dist/network-directory-node.d.ts +94 -0
  70. package/dist/network-directory-node.d.ts.map +1 -0
  71. package/dist/network-directory-node.js +626 -0
  72. package/dist/network-directory-node.js.map +1 -0
  73. package/dist/nonce-dedup.d.ts +68 -0
  74. package/dist/nonce-dedup.d.ts.map +1 -0
  75. package/dist/nonce-dedup.js +204 -0
  76. package/dist/nonce-dedup.js.map +1 -0
  77. package/dist/notification-dispatcher.d.ts +65 -0
  78. package/dist/notification-dispatcher.d.ts.map +1 -0
  79. package/dist/notification-dispatcher.js +138 -0
  80. package/dist/notification-dispatcher.js.map +1 -0
  81. package/dist/registration-context.d.ts +69 -0
  82. package/dist/registration-context.d.ts.map +1 -0
  83. package/dist/registration-context.js +118 -0
  84. package/dist/registration-context.js.map +1 -0
  85. package/dist/registration-manager.d.ts +72 -0
  86. package/dist/registration-manager.d.ts.map +1 -0
  87. package/dist/registration-manager.js +267 -0
  88. package/dist/registration-manager.js.map +1 -0
  89. package/dist/registration-persistence.d.ts +131 -0
  90. package/dist/registration-persistence.d.ts.map +1 -0
  91. package/dist/registration-persistence.js +233 -0
  92. package/dist/registration-persistence.js.map +1 -0
  93. package/dist/retry-queue.d.ts +144 -0
  94. package/dist/retry-queue.d.ts.map +1 -0
  95. package/dist/retry-queue.js +444 -0
  96. package/dist/retry-queue.js.map +1 -0
  97. package/dist/seal-frontier-verify.d.ts +58 -0
  98. package/dist/seal-frontier-verify.d.ts.map +1 -0
  99. package/dist/seal-frontier-verify.js +87 -0
  100. package/dist/seal-frontier-verify.js.map +1 -0
  101. package/dist/seal-legibility-tbs.d.ts +25 -0
  102. package/dist/seal-legibility-tbs.d.ts.map +1 -0
  103. package/dist/seal-legibility-tbs.js +78 -0
  104. package/dist/seal-legibility-tbs.js.map +1 -0
  105. package/dist/seal-upgrade.d.ts +90 -0
  106. package/dist/seal-upgrade.d.ts.map +1 -0
  107. package/dist/seal-upgrade.js +178 -0
  108. package/dist/seal-upgrade.js.map +1 -0
  109. package/dist/session-assignment-parser.d.ts +22 -0
  110. package/dist/session-assignment-parser.d.ts.map +1 -0
  111. package/dist/session-assignment-parser.js +139 -0
  112. package/dist/session-assignment-parser.js.map +1 -0
  113. package/dist/session-ceremony.d.ts +156 -0
  114. package/dist/session-ceremony.d.ts.map +1 -0
  115. package/dist/session-ceremony.js +447 -0
  116. package/dist/session-ceremony.js.map +1 -0
  117. package/dist/session-connection-gater.d.ts +91 -0
  118. package/dist/session-connection-gater.d.ts.map +1 -0
  119. package/dist/session-connection-gater.js +146 -0
  120. package/dist/session-connection-gater.js.map +1 -0
  121. package/dist/session-node-manager.d.ts +585 -0
  122. package/dist/session-node-manager.d.ts.map +1 -0
  123. package/dist/session-node-manager.js +2609 -0
  124. package/dist/session-node-manager.js.map +1 -0
  125. package/dist/session-relay-client.d.ts +101 -0
  126. package/dist/session-relay-client.d.ts.map +1 -0
  127. package/dist/session-relay-client.js +520 -0
  128. package/dist/session-relay-client.js.map +1 -0
  129. package/dist/session-tree.d.ts +80 -0
  130. package/dist/session-tree.d.ts.map +1 -0
  131. package/dist/session-tree.js +123 -0
  132. package/dist/session-tree.js.map +1 -0
  133. package/dist/signaling-connect.d.ts +83 -0
  134. package/dist/signaling-connect.d.ts.map +1 -0
  135. package/dist/signaling-connect.js +266 -0
  136. package/dist/signaling-connect.js.map +1 -0
  137. package/dist/transcript-cipher.d.ts +31 -0
  138. package/dist/transcript-cipher.d.ts.map +1 -0
  139. package/dist/transcript-cipher.js +74 -0
  140. package/dist/transcript-cipher.js.map +1 -0
  141. package/dist/transport-composition.d.ts +31 -0
  142. package/dist/transport-composition.d.ts.map +1 -0
  143. package/dist/transport-composition.js +55 -0
  144. package/dist/transport-composition.js.map +1 -0
  145. package/dist/transport-selector.d.ts +189 -0
  146. package/dist/transport-selector.d.ts.map +1 -0
  147. package/dist/transport-selector.js +195 -0
  148. package/dist/transport-selector.js.map +1 -0
  149. package/dist/types.d.ts +265 -0
  150. package/dist/types.d.ts.map +1 -0
  151. package/dist/types.js +33 -0
  152. package/dist/types.js.map +1 -0
  153. package/package.json +4 -4
@@ -0,0 +1,2609 @@
1
+ /**
2
+ * CELLO Daemon — SessionNodeManager
3
+ *
4
+ * Manages the lifecycle of all ephemeral session nodes:
5
+ * 1. Per-session nodes: fresh transport key + Peer ID, connectionGater allows
6
+ * only the designated counterparty. Created during cello_initiate_session
7
+ * (outbound) or cello_await_session (inbound, via standing receiver handoff).
8
+ * 2. Standing receiver node: pre-created, open gater, kept alive at all times.
9
+ * Handed to the first inbound session; immediately replaced.
10
+ * 3. 32-node cap: enforced before any new node is created.
11
+ * 4. Session status in SQLite: active → sealed (on close) or interrupted
12
+ * (on graceful shutdown or SIGKILL-restart detection).
13
+ *
14
+ * Pseudocode (SPARC Phase P):
15
+ *
16
+ * initialize():
17
+ * 1. Open SQLite (node:sqlite), create sessions table if not exists
18
+ * 2. Detect interrupted sessions: SELECT * FROM sessions WHERE status='active'
19
+ * → batch-update to 'interrupted', log session.interrupted.detected for each
20
+ * (source: 'daemon_restart') — runs before IPC socket opens so no race
21
+ * 3. Create standing receiver node (fresh libp2p, open gater, sentinel agentName)
22
+ * 4. Start standing receiver, set standingReceiverReady=true
23
+ * 5. Log session.node.created for the standing receiver
24
+ *
25
+ * createSessionNode(sessionId, agentName, counterpartyPubkey, counterpartyPeerId, correlationId):
26
+ * Pseudocode:
27
+ * 1. Check activeNodes.size >= MAX_SESSION_NODES → log cap.reached, return error
28
+ * 2. Create SessionConnectionGater(counterpartyPeerId) — restricted from birth
29
+ * 3. nodeFactory.createNode({gater}) → fresh libp2p node
30
+ * 4. node.start() — bind TCP ephemeral port
31
+ * 5. Insert SQLite row status='active'
32
+ * 6. Log session.node.created
33
+ * 7. Add to activeNodes map
34
+ * 8. Return {ok:true, peerId, addrs}
35
+ * On libp2p error: extract error.message (never ${error}), log create.failed, return error
36
+ *
37
+ * acceptSession(sessionId, agentName, counterpartyPubkey, initiatorPeerId, correlationId):
38
+ * Pseudocode:
39
+ * 1. If !standingReceiverReady → return standing_receiver_unavailable
40
+ * 2. Take standing receiver from slot (clear slot atomically)
41
+ * 3. gater.setAllowedPeer(initiatorPeerId) ← BEFORE returning multiaddr (AC-015)
42
+ * 4. Insert SQLite row status='active'
43
+ * 5. Log session.node.created
44
+ * 6. Add to activeNodes map
45
+ * 7. Trigger async replacement of standing receiver (do NOT await)
46
+ * 8. Return {ok:true, peerId, addrs}
47
+ *
48
+ * destroySessionNode(sessionId, reason):
49
+ * Pseudocode:
50
+ * 1. Find node in activeNodes
51
+ * 2. stop node
52
+ * 3. Update SQLite status to sealed/interrupted/error
53
+ * 4. Remove from activeNodes
54
+ * 5. Log session.node.destroyed
55
+ *
56
+ * gracefulShutdown():
57
+ * Pseudocode:
58
+ * 1. Get all activeNodes
59
+ * 2. For each: update SQLite 'interrupted', log destroyed(reason:'interrupted')
60
+ * 3. Stop all nodes
61
+ * 4. Stop standing receiver
62
+ *
63
+ * getStatus(): { standingReceiverReady: boolean }
64
+ */
65
+ // node:sqlite (DatabaseSync) requires Node.js >= 24 (stable in 24 LTS).
66
+ // The engines field in package.json is set to ">=24" specifically because of this
67
+ // dependency — do not lower the engine floor without replacing this import.
68
+ import { DatabaseSync } from "node:sqlite";
69
+ import { TranscriptCipher } from "./transcript-cipher.js";
70
+ import { randomUUID, createHash } from "node:crypto";
71
+ import * as lp from "it-length-prefixed";
72
+ import { decode, Encoder } from "cbor-x";
73
+ import { MAX_SESSION_NODES, STANDING_RECEIVER_AGENT_NAME } from "./types.js";
74
+ import { SessionConnectionGater } from "./session-connection-gater.js";
75
+ import { SessionTree } from "./session-tree.js";
76
+ import { CELLO_CONTENT_PROTOCOL_ID, NodeAutoNatService } from "@cello-protocol/transport";
77
+ import { verify } from "@cello-protocol/crypto";
78
+ import { encodeSealPayload } from "@cello-protocol/protocol-types";
79
+ import { AgentRelayClient, LEAF_KIND_CTRL } from "./session-relay-client.js";
80
+ const CBOR_ENC = new Encoder({ tagUint8Array: false });
81
+ // ─── SessionNodeManager ───────────────────────────────────────────────────────
82
+ export class SessionNodeManager {
83
+ #factory;
84
+ #logger;
85
+ #dbPath;
86
+ #db = null;
87
+ // DOD-LOG-1: at-rest cipher for the durable transcript blobs (loaded in init()).
88
+ #transcriptCipher = null;
89
+ #activeNodes = new Map();
90
+ // M7 DOD-SPINE-6 / MSG-001-3b: ONE relay witness client per AGENT (keyed by agent name).
91
+ // The relay authenticates and keys delivery by the agent's K_local pubkey, so all of an
92
+ // agent's sessions share one authenticated relay stream (each frame carries session_id).
93
+ #relayClients = new Map();
94
+ // DOD-LOOP-1: the standing receiver is PER-AGENT, not per-daemon. A daemon hosting two agents
95
+ // (the loopback case) needs each agent to have its OWN inbound receiver node — otherwise the
96
+ // initiator (consuming its agent's standing receiver) and the responder (consuming its agent's)
97
+ // would contend for a single node and thrash. Keyed by agentName. A creation-in-flight guard set
98
+ // prevents two concurrent ensure() calls from building two nodes for the same agent.
99
+ #standingReceivers = new Map();
100
+ #standingReceiverCreating = new Set();
101
+ // Agents whose removeStandingReceiverForAgent ran while an #ensureStandingReceiver for them was
102
+ // in flight (parked on createNode/start, so the map had no entry to delete yet). The in-flight
103
+ // ensure checks this after start() and tears the fresh node down instead of installing an SR for
104
+ // an agent that has since gone offline (cello_stop_agent race). A fresh ensure clears it.
105
+ #standingReceiverRemoving = new Set();
106
+ // Set once gracefulShutdown begins. The standing-receiver replacement that
107
+ // acceptSession kicks off runs un-awaited (AC-003), so it can be in flight when
108
+ // shutdown starts; #createStandingReceiver checks this flag and stops a freshly
109
+ // built node instead of leaving an orphan bound to a TCP port (review M2).
110
+ #shuttingDown = false;
111
+ // DAEMON-004: lazily-loaded in-memory cache of each session's daemon-owned
112
+ // Merkle tree. The authoritative store is the session_tree_leaves table —
113
+ // the cache is rebuilt from it on first access (so it survives a restart).
114
+ #trees = new Map();
115
+ // DAEMON-004: per-session FIFO buffer of verified received content awaiting
116
+ // cello_receive. Populated by ingestReceivedContent / the content stream handler.
117
+ #receivedContent = new Map();
118
+ // CELLO-M7-TRANSPORT-001: the directory-node multiaddrs serving as AutoNAT
119
+ // probers (SI-002). Empty () => [] when the directory is in 'reconnecting'
120
+ // state — AutoNAT cannot run and dialability stays the conservative default.
121
+ #autoNatProbers;
122
+ // M7-SESSION-003: per-session direct-path counterparty liveness, observed on the
123
+ // session node's onPeerConnect ('alive') / onPeerDisconnect ('gone'). This is
124
+ // the liveness authority for direct sessions — the unilateral-seal gate reads
125
+ // it (relay sessions query the relay instead). NEVER the directory (SI-002).
126
+ #sessionLiveness = new Map();
127
+ // M7-UPGRADE-002: sessions whose content integrity could NOT be verified (a content_hash
128
+ // mismatch = tamper was observed). The auto-acknowledge gate (SI-002) refuses to auto-co-sign
129
+ // for a desynced session — B must never blind-sign a tail it cannot verify. Keyed by sessionId hex.
130
+ #contentDesynced = new Set();
131
+ // DOD-MSG-4 (strict in-order): the RELAY is the ordering authority (Structure 2). For each
132
+ // message the relay witnesses, it delivers B a (content_hash -> canonical sequence) binding via
133
+ // the leaf_deliver stream. B records it here — keyed #k(agent,session) -> (contentHashHex -> seq)
134
+ // — and orders its transcript by THIS, never by a sender-stamped field (sovereign-node: B does
135
+ // not trust the counterparty for ordering). When B has no witness for an arriving hash
136
+ // (relay-degraded), it falls back to arrival-order append.
137
+ #witnessedSeq = new Map();
138
+ // DOD-MSG-4: out-of-order direct arrivals. A content frame whose canonical sequence is AHEAD of
139
+ // the next expected leaf is HELD here (keyed #k(agent,session) -> (canonicalSeq -> entry)) instead
140
+ // of being appended out of order. Once the missing in-between sequence(s) land (recovered from the
141
+ // relay mailbox), #releaseHeld drains the held entries in canonical order. content is plaintext in
142
+ // memory only — evicted on teardown, same as #receivedContent.
143
+ #heldContent = new Map();
144
+ // DOD-MSG-4: the relay's high-water canonical sequence for this session — the largest sequence the
145
+ // relay has witnessed (max over leaf_deliver). Keyed #k(agent,session). EXPOSED for the next
146
+ // sub-increment (catch-up-before-live: on reconnect, hold live arrivals until the tree reaches this
147
+ // so a fresh message can't append ahead of earlier ones still parked) — it is NOT yet consumed by
148
+ // the gate, which today holds purely on the per-message `canonicalSeq > nextExpected` test.
149
+ #highWaterSeq = new Map();
150
+ // M7-UPGRADE-002: sessions for which B has already submitted its responder SEAL leaf (via
151
+ // auto-ack OR cello_close_session). Idempotency guard — A's SEAL ctrl leaf may be delivered
152
+ // more than once (and the relay echoes leaves), so auto-ack fires AT MOST ONCE per session.
153
+ #responderSealSubmitted = new Set();
154
+ // M7-SESSION-001 (M-1 PUSH): optional callback fired when a session changes
155
+ // state, so the composition root can dispatch a session_state_changed
156
+ // notification to live MCP clients. Injected via a setter AFTER construction
157
+ // because the NotificationDispatcher is built later than this manager in
158
+ // daemon.ts (it depends on the IPC server). Never required — when unset,
159
+ // state changes are persisted and logged but no push notification is emitted.
160
+ #onSessionStateChanged = null;
161
+ // CELLO-M7-MSG-001 (AC-001/AC-002/AC-003): the send is no longer fire-and-forget.
162
+ // After a content_frame is delivered over the direct session channel, the sender
163
+ // arms a TTF timer and waits for an unsigned, transport-authenticated `persisted`
164
+ // delivery ACK on the same /cello/content/1.0.0 protocol. A persisted ACK cancels
165
+ // the timer (content.delivery.acked); TTF expiry hands the content to the park
166
+ // backstop. Keyed sessionId → contentHashHex → entry.
167
+ #awaitingAck = new Map();
168
+ // TTF (time-to-flush) for an un-acked content entry. Injectable so tests can drive
169
+ // expiry deterministically; production default sits in the Part-4 proposed 10–30s band.
170
+ #contentTtfMs = 20_000;
171
+ // CELLO-M7-MSG-001: side-effect hooks the composition root wires to the durable
172
+ // retry_queue (and, in 3b, the relay park deposit). Injected after construction
173
+ // because RetryQueue is built later in daemon.ts. When unset, the awaiting-ACK timer
174
+ // still fires and the ACK still resolves — only the durable crash-backstop is skipped.
175
+ #onAwaitingPersisted = null;
176
+ #onAwaitingTtf = null;
177
+ /**
178
+ * MSG-001-3b (2b): the live content-park deposit. The manager resolves the recipient + relay
179
+ * endpoint from the session entry and calls this when a send is NOT confirmed delivered
180
+ * (direct-fail or TTF expiry). The daemon's hook seals (sealToRecipient) + deposits via
181
+ * ContentParkClient. Best-effort.
182
+ */
183
+ #contentParkHook = null;
184
+ constructor(opts) {
185
+ this.#factory = opts.factory;
186
+ this.#logger = opts.logger;
187
+ this.#dbPath = opts.dbPath;
188
+ if (typeof opts.contentTtfMs === "number" && opts.contentTtfMs > 0) {
189
+ this.#contentTtfMs = opts.contentTtfMs;
190
+ }
191
+ this.#autoNatProbers = opts.autoNatProbers ?? (() => []);
192
+ }
193
+ /**
194
+ * CELLO-M7-MSG-001: wire the durable-backstop side effects of the awaiting-ACK
195
+ * lifecycle. `onPersisted` clears the durable retry_queue entry when a persisted ACK
196
+ * arrives; `onTtf` records/parks the un-acked content when the TTF timer fires.
197
+ * Injected by the composition root (daemon.ts) after the RetryQueue exists.
198
+ */
199
+ setAwaitingAckHooks(hooks) {
200
+ this.#onAwaitingPersisted = hooks.onPersisted ?? null;
201
+ this.#onAwaitingTtf = hooks.onTtf ?? null;
202
+ }
203
+ /**
204
+ * MSG-001-3b (2b): inject the live content-park deposit (seal + ContentParkClient.deposit).
205
+ * Injected by the composition root (daemon.ts). When absent, a not-confirmed send still records
206
+ * the durable awaiting entry (crash backstop) but does not deposit live.
207
+ */
208
+ setContentParkHook(fn) {
209
+ this.#contentParkHook = fn;
210
+ }
211
+ /**
212
+ * MSG-001-3b (2b): deposit un-confirmed content to the relay store-and-forward backstop — keyed
213
+ * to the recipient, on the SAME relay this session is witnessed by — so an offline recipient
214
+ * recovers it (at the sequence the witness already assigned, R1). Best-effort, never throws.
215
+ */
216
+ #parkContent(agentName, sessionId, contentHashHex, content, structure1Cbor, structure2Cbor) {
217
+ const hook = this.#contentParkHook;
218
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
219
+ if (!hook || !entry || !entry.relayPeerId || !entry.relayAddrs)
220
+ return;
221
+ void hook({
222
+ sessionId,
223
+ recipientPubkeyHex: entry.counterpartyPubkey,
224
+ relayPeerId: entry.relayPeerId,
225
+ relayAddrs: entry.relayAddrs,
226
+ contentHashHex,
227
+ content,
228
+ // DOD-MSG-4 (2b): carry the relay's signed ordering record so the parked entry is self-ordering
229
+ // on recover too (sealed INTO the ciphertext envelope — INV-3: the relay still sees only ciphertext).
230
+ structure1Cbor,
231
+ structure2Cbor,
232
+ }).catch((err) => {
233
+ this.#logger.warn("content.park.deposit.failed", {
234
+ sessionId,
235
+ contentHash: contentHashHex,
236
+ error: err instanceof Error ? err.message : String(err),
237
+ });
238
+ });
239
+ }
240
+ // ─── Initialization ──────────────────────────────────────────────────────
241
+ async initialize() {
242
+ // Step 1: Open SQLite and create sessions table
243
+ this.#db = new DatabaseSync(this.#dbPath);
244
+ // DOD-LOG-1: at-rest cipher for the durable transcript. Dedicated 0600 key beside the DB.
245
+ this.#transcriptCipher = TranscriptCipher.loadOrCreate(`${this.#dbPath}.transcript-key`);
246
+ this.#db.exec(`
247
+ CREATE TABLE IF NOT EXISTS sessions (
248
+ session_id TEXT NOT NULL,
249
+ agent_name TEXT NOT NULL,
250
+ counterparty_pubkey TEXT NOT NULL,
251
+ status TEXT NOT NULL,
252
+ created_at INTEGER NOT NULL,
253
+ updated_at INTEGER NOT NULL,
254
+ -- DOD-LOOP-1: composite key so two of the operator's agents can hold both ends of the
255
+ -- SAME session_id on ONE daemon (the loopback case). A bare session_id PK would reject
256
+ -- the second end's row.
257
+ PRIMARY KEY (agent_name, session_id)
258
+ )
259
+ `);
260
+ // M7-SESSION-001: idempotent schema extension — add message_count and interrupted_at
261
+ // columns if they do not exist. ALTER TABLE IF NOT EXISTS COLUMN is not supported by
262
+ // older SQLite; we use a try/catch per column as the idempotent approach.
263
+ for (const ddl of [
264
+ "ALTER TABLE sessions ADD COLUMN message_count INTEGER NOT NULL DEFAULT 0",
265
+ "ALTER TABLE sessions ADD COLUMN interrupted_at TEXT",
266
+ // MSG-001-3b (MSG-2 startup-flush): persist the session's relay endpoint so the
267
+ // crash-backstop flush can deposit un-acked content after a restart, when the
268
+ // in-memory entry is gone. relay_addrs is a JSON array of multiaddr strings.
269
+ "ALTER TABLE sessions ADD COLUMN relay_peer_id TEXT",
270
+ "ALTER TABLE sessions ADD COLUMN relay_addrs TEXT",
271
+ // M7-SESSION-004 (AC-005): persist the seal certificate's legibility object with the
272
+ // sealed record so it survives a daemon restart and is readable on the cert-read surface
273
+ // (cello_get_sealed_receipt). JSON string with hex-encoded pubkeys; NULL until sealed.
274
+ // Inline idempotent migration (NOT Flyway — this is the client-side SQLite, AC-011).
275
+ "ALTER TABLE sessions ADD COLUMN seal_legibility TEXT",
276
+ "ALTER TABLE sessions ADD COLUMN sealed_root_hex TEXT",
277
+ // M7 legibility-TBS-binding (responder verify): the counterparty's FROST primary (group)
278
+ // pubkey, taken from the FROST-signed SessionAssignment's signer_pubkey. The responder uses
279
+ // it to VERIFY the bilateral seal signature locally (the seal is signed by the initiator's
280
+ // primary), not just accept it. NULL when this party initiated (it uses its own primary).
281
+ "ALTER TABLE sessions ADD COLUMN counterparty_primary_pubkey TEXT",
282
+ ]) {
283
+ try {
284
+ this.#db.exec(ddl);
285
+ }
286
+ catch (err) {
287
+ // Only swallow the idempotent "duplicate column name" case (the column
288
+ // already exists from a prior init). Any other failure — disk full,
289
+ // SQLITE_LOCKED, corruption — must propagate, otherwise the daemon would
290
+ // run without these columns and later silently read undefined.
291
+ const msg = err instanceof Error ? err.message : String(err);
292
+ if (!msg.includes("duplicate column name"))
293
+ throw err;
294
+ }
295
+ }
296
+ // M7-SESSION-001 (H-1): side table holding the verified bilateral
297
+ // SEAL-INTERRUPTED commitment artifacts. A side table (CREATE TABLE IF NOT
298
+ // EXISTS) is inherently idempotent — no ALTER TABLE / duplicate-column
299
+ // handling required. We keep BOTH parties' signed leaves and the agreed
300
+ // Merkle root so the achieved commitment is never discarded.
301
+ this.#db.exec(`
302
+ CREATE TABLE IF NOT EXISTS seal_interrupted_artifacts (
303
+ agent_name TEXT NOT NULL,
304
+ session_id TEXT NOT NULL,
305
+ role TEXT NOT NULL,
306
+ own_leaf TEXT NOT NULL,
307
+ counterparty_leaf TEXT NOT NULL,
308
+ merkle_root TEXT NOT NULL,
309
+ nonce TEXT NOT NULL,
310
+ created_at INTEGER NOT NULL,
311
+ -- DOD-LOOP-1: composite key (per-agent end of a loopback session).
312
+ PRIMARY KEY (agent_name, session_id)
313
+ )
314
+ `);
315
+ // DAEMON-004 (AC-007 / SI-001): the daemon-owned per-session Merkle tree,
316
+ // persisted as an ordered list of leaf hashes. The (session_id, leaf_index)
317
+ // primary key enforces append-order uniqueness; a fresh daemon reconstructs
318
+ // each tree from these rows so the transcript survives a restart. Querying
319
+ // by session_id ORDER BY leaf_index is the only read pattern.
320
+ this.#db.exec(`
321
+ CREATE TABLE IF NOT EXISTS session_tree_leaves (
322
+ agent_name TEXT NOT NULL,
323
+ session_id TEXT NOT NULL,
324
+ leaf_index INTEGER NOT NULL,
325
+ leaf_kind TEXT NOT NULL,
326
+ leaf_hash_hex TEXT NOT NULL,
327
+ created_at INTEGER NOT NULL,
328
+ -- DOD-LOOP-1: composite key so each agent's end has its own append-ordered tree.
329
+ PRIMARY KEY (agent_name, session_id, leaf_index)
330
+ )
331
+ `);
332
+ // DOD-LOG-1 (PERSIST-LOG-001): the durable, ENCRYPTED-at-rest readable transcript. Each row
333
+ // is keyed by the canonical leaf `sequence`, so it JOINS to session_tree_leaves(leaf_index) —
334
+ // a stored message is provably behind a committed hash-chain leaf, not a loose dump. `blob` is
335
+ // the AES-256-GCM envelope of the readable plaintext (relay/directory never see this — INV-3).
336
+ this.#db.exec(`
337
+ CREATE TABLE IF NOT EXISTS transcript (
338
+ agent_name TEXT NOT NULL,
339
+ session_id TEXT NOT NULL,
340
+ sequence INTEGER NOT NULL,
341
+ direction TEXT NOT NULL, -- 'sent' | 'received'
342
+ blob BLOB NOT NULL, -- AES-256-GCM(iv||ct||tag) of the plaintext
343
+ created_at INTEGER NOT NULL,
344
+ PRIMARY KEY (agent_name, session_id, sequence, direction)
345
+ )
346
+ `);
347
+ // Step 2: Detect interrupted sessions (SIGKILL detection — AC-010).
348
+ // Any 'active' row in a freshly-started daemon is a remnant of a prior
349
+ // killed process. Batch-update to 'interrupted' before IPC opens.
350
+ const activeRows = this.#db
351
+ .prepare("SELECT * FROM sessions WHERE status = 'active'")
352
+ .all();
353
+ if (activeRows.length > 0) {
354
+ const now = Date.now();
355
+ const interruptedAt = new Date(now).toISOString();
356
+ for (const row of activeRows) {
357
+ try {
358
+ this.#db
359
+ .prepare("UPDATE sessions SET status = 'interrupted', updated_at = ?, interrupted_at = COALESCE(interrupted_at, ?) WHERE agent_name = ? AND session_id = ?")
360
+ .run(now, interruptedAt, row.agent_name, row.session_id);
361
+ this.#logger.warn("session.interrupted.detected", {
362
+ sessionId: row.session_id,
363
+ agentName: row.agent_name,
364
+ source: "daemon_restart",
365
+ });
366
+ }
367
+ catch (err) {
368
+ this.#logger.error("session.interrupt.db.write.failed", {
369
+ sessionId: row.session_id,
370
+ error: err instanceof Error ? err.message : String(err),
371
+ });
372
+ }
373
+ }
374
+ }
375
+ // DOD-LOOP-1: standing receivers are now PER-AGENT, created when each agent comes online
376
+ // (cello_start_agent → ensureStandingReceiverForAgent). No daemon-global receiver is created at
377
+ // init (no agent is online yet). The initiate/accept paths kick off creation on demand if missing.
378
+ }
379
+ // ─── Public API ──────────────────────────────────────────────────────────
380
+ /**
381
+ * Get the underlying DatabaseSync handle.
382
+ * Used by the composition root (daemon.ts) to pass to RetryQueue and
383
+ * NonceDedupStore — they share the same SQLCipher DB file (DAEMON-003 AC-008).
384
+ */
385
+ getDb() {
386
+ if (!this.#db) {
387
+ throw new Error("SessionNodeManager not initialized — call initialize() first");
388
+ }
389
+ return this.#db;
390
+ }
391
+ /**
392
+ * DOD-LOG-1: the at-rest cipher, shared with the RetryQueue so its content_blob is encrypted with
393
+ * the SAME key as the transcript. Available after initialize().
394
+ */
395
+ getTranscriptCipher() {
396
+ if (!this.#transcriptCipher) {
397
+ throw new Error("SessionNodeManager not initialized — call initialize() first");
398
+ }
399
+ return this.#transcriptCipher;
400
+ }
401
+ /**
402
+ * DOD-LOG-1: append one readable message to the durable, encrypted-at-rest transcript, keyed by
403
+ * the canonical leaf `sequence` so it joins to the committed hash chain. Idempotent on replay
404
+ * (INSERT OR IGNORE — the same (session, sequence, direction) is written at most once). Never
405
+ * throws into the caller's content path: a transcript-write failure is logged, not fatal.
406
+ */
407
+ recordTranscriptMessage(agentName, sessionId, sequence, direction, plaintext, correlationId) {
408
+ if (!this.#db || !this.#transcriptCipher)
409
+ return;
410
+ try {
411
+ const blob = this.#transcriptCipher.encrypt(plaintext);
412
+ this.#db
413
+ .prepare(`INSERT OR IGNORE INTO transcript (agent_name, session_id, sequence, direction, blob, created_at)
414
+ VALUES (?, ?, ?, ?, ?, ?)`)
415
+ .run(agentName, sessionId, sequence, direction, blob, Date.now());
416
+ this.#logger.info("transcript.message.recorded", { sessionId, agentName, sequence, direction, correlationId });
417
+ }
418
+ catch (err) {
419
+ this.#logger.warn("transcript.message.record.failed", {
420
+ sessionId, agentName, sequence, direction,
421
+ reason: err instanceof Error ? err.message : String(err),
422
+ correlationId,
423
+ });
424
+ }
425
+ }
426
+ /**
427
+ * DOD-LOG-1: read a session's durable transcript back (after a restart), decrypted and ordered by
428
+ * canonical sequence then direction. A blob that fails to decrypt (tamper/wrong key) is skipped
429
+ * with a loud log rather than crashing the read.
430
+ */
431
+ readTranscript(agentName, sessionId) {
432
+ if (!this.#db || !this.#transcriptCipher)
433
+ return { messages: [], undecryptable: 0 };
434
+ const rows = this.#db
435
+ .prepare(`SELECT sequence, direction, blob, created_at FROM transcript
436
+ WHERE agent_name = ? AND session_id = ? ORDER BY sequence ASC, direction ASC`)
437
+ .all(agentName, sessionId);
438
+ const messages = [];
439
+ let undecryptable = 0;
440
+ for (const r of rows) {
441
+ const pt = this.#transcriptCipher.decrypt(r.blob instanceof Uint8Array ? r.blob : new Uint8Array(r.blob));
442
+ if (pt === null) {
443
+ // A row that fails GCM auth (tamper / wrong key) is REPORTED to the reader, not silently
444
+ // dropped — a gap in the transcript must be visible, not invisible (the reader needs to
445
+ // distinguish "never existed" from "tampered/unreadable").
446
+ undecryptable += 1;
447
+ this.#logger.warn("transcript.message.decrypt.failed", { sessionId, agentName, sequence: r.sequence, direction: r.direction });
448
+ continue;
449
+ }
450
+ messages.push({
451
+ sequence: r.sequence,
452
+ direction: r.direction === "sent" ? "sent" : "received",
453
+ text: new TextDecoder().decode(pt),
454
+ createdAt: r.created_at,
455
+ });
456
+ }
457
+ return { messages, undecryptable };
458
+ }
459
+ /** DOD-LOOP-1: whether the given agent has a standing receiver ready (any agent if omitted). */
460
+ getStandingReceiverReady(agentName) {
461
+ if (agentName !== undefined)
462
+ return this.#standingReceivers.has(agentName);
463
+ return this.#standingReceivers.size > 0;
464
+ }
465
+ /** First ready standing receiver (any agent) — for agent-agnostic OUTBOUND use (gater-open). */
466
+ #anyStandingReceiver() {
467
+ for (const sr of this.#standingReceivers.values())
468
+ return sr;
469
+ return null;
470
+ }
471
+ /**
472
+ * The current standing receiver node's session-transport coordinates (peer id +
473
+ * listen multiaddrs), or null if it is not ready. These are the addresses a local
474
+ * SessionNegotiator advertises as this node's counterparty endpoint so the initiator
475
+ * can dial it, and the value an inbound session_assignment carries in its
476
+ * counterparty_session_* fields. Read-only — does NOT consume the standing receiver
477
+ * (unlike acceptSession, which hands it off).
478
+ */
479
+ getStandingReceiverInfo(agentName) {
480
+ // DOD-LOOP-1: the initiator advertises ITS OWN agent's standing receiver, which it then reuses
481
+ // as the session node — so the advertised endpoint matches the node the counterparty dials.
482
+ const sr = this.#standingReceivers.get(agentName);
483
+ if (!sr)
484
+ return null;
485
+ return { peerId: sr.node.getPeerId(), addrs: sr.node.listenAddresses() };
486
+ }
487
+ /**
488
+ * The standing receiver's libp2p node — a general-purpose, OPEN-gater node usable for
489
+ * OUTBOUND dials that are not session-scoped (e.g. the content-park deposit/pull to the
490
+ * relay, MSG-001-3b). Session nodes have restrictive gaters; the standing receiver does not.
491
+ * Returns null until the receiver is ready.
492
+ */
493
+ getStandingReceiverNode(agentName) {
494
+ // With an agentName: that agent's own standing-receiver node (needed when the dial must
495
+ // originate from a SPECIFIC agent — e.g. the startup content-park re-park, where the
496
+ // depositor is the original sender). Without one: any ready standing receiver (outbound
497
+ // content-park deposit/pull to the relay — open gater, not session-scoped).
498
+ if (agentName !== undefined)
499
+ return this.#standingReceivers.get(agentName)?.node ?? null;
500
+ return this.#anyStandingReceiver()?.node ?? null;
501
+ }
502
+ /**
503
+ * The libp2p Peer ID of an active session's node (N_A for an initiated session), or
504
+ * null if no active node exists for it. This is the initiator's session peer id that an
505
+ * inbound session_assignment must carry to the counterparty (so the counterparty gates
506
+ * its handed-off receiver to it). Read-only.
507
+ */
508
+ getSessionNodePeerId(agentName, sessionId) {
509
+ return this.#activeNodes.get(this.#k(agentName, sessionId))?.node.getPeerId() ?? null;
510
+ }
511
+ /**
512
+ * CELLO-M7-TRANSPORT-001: the AutoNAT service wrapping the current standing
513
+ * receiver node, or null if the standing receiver is not ready. The composition
514
+ * root uses this as the daemon's runtime IAutoNatService — its getDialability()
515
+ * drives the SessionAssignment advertised address (AC-004/AC-019), and it is the
516
+ * source of the transport.autonat.result / transport.autonat.unavailable events.
517
+ */
518
+ getStandingReceiverAutoNat() {
519
+ // DOD-LOOP-1: the daemon-level autonat source is any ready standing receiver; null until one
520
+ // exists (the composition root falls back to LocalAutoNatStub). Per-session advertised dialability
521
+ // comes from the initiating agent's own SR via getStandingReceiverInfo, not this daemon-level value.
522
+ return this.#anyStandingReceiver()?.autoNat ?? null;
523
+ }
524
+ /**
525
+ * M7-SESSION-001 (M-1 PUSH): register the session-state-change callback.
526
+ * Called by the composition root (daemon.ts) after the NotificationDispatcher
527
+ * exists. Setter injection avoids a construction-order/circular dependency.
528
+ */
529
+ setOnSessionStateChanged(cb) {
530
+ this.#onSessionStateChanged = cb;
531
+ }
532
+ /**
533
+ * DOD-LOOP-1: the session core is keyed by (agentName, sessionId), NOT sessionId alone. Two of
534
+ * the operator's own agents (the loopback case) can hold the two ends of the SAME session_id on
535
+ * ONE daemon, so a bare session_id is ambiguous between them. This composite string key — the
536
+ * agent name and the hex session id joined by a 0x1f unit separator (which appears in neither) —
537
+ * is the key for every in-memory session-core map (#activeNodes, #trees, #receivedContent,
538
+ * #sessionLiveness, #contentDesynced, #responderSealSubmitted, #awaitingAck). #relayClients is
539
+ * already per-agent (its own key), and the standing receivers are keyed by agent name directly.
540
+ */
541
+ #k(agentName, sessionId) {
542
+ return `${agentName}\x1f${sessionId}`;
543
+ }
544
+ /**
545
+ * Create a new outbound session node.
546
+ * Called during cello_initiate_session.
547
+ *
548
+ * @param sessionId Unique session ID (hex string)
549
+ * @param agentName Name of the initiating agent
550
+ * @param counterpartyPubkey Counterparty's K_local public key (hex)
551
+ * @param counterpartyPeerId Counterparty's session-layer Peer ID (for gater)
552
+ * @param correlationId Correlation ID minted at session initiation
553
+ */
554
+ async createSessionNode(sessionId, agentName, counterpartyPubkey, counterpartyPeerId, correlationId, reuseStandingReceiver = false, relay) {
555
+ // Cap enforcement (AC-006)
556
+ if (this.#activeNodes.size >= MAX_SESSION_NODES) {
557
+ this.#logger.warn("session.node.cap.reached", {
558
+ agentName,
559
+ currentCount: this.#activeNodes.size,
560
+ maxCount: MAX_SESSION_NODES,
561
+ });
562
+ return {
563
+ ok: false,
564
+ reason: "max_sessions_reached",
565
+ guidance: "The daemon has reached its maximum of 32 concurrent session nodes. " +
566
+ "Close an existing session before starting a new one.",
567
+ };
568
+ }
569
+ // The session node N_A: either a FRESH ephemeral node (default), or — for the initiator
570
+ // path (reuseStandingReceiver) — the standing receiver handed off as the session node. The
571
+ // latter makes N_A's peer id equal the SESSION endpoint the initiator ADVERTISED to the
572
+ // directory (its standing receiver), so the counterparty's connection gater (set to that
573
+ // advertised peer id) admits N_A's dial. Mirrors acceptSession, which already hands off the
574
+ // standing receiver on the receiver side. WIRE-001/INV-5: a fully-fresh ephemeral initiator
575
+ // node would require advertising N_A's peer id pre-negotiation (a session-node lifecycle
576
+ // split); the symmetric standing-receiver handoff is the consistent interim model.
577
+ let node;
578
+ let gater;
579
+ let autoNat;
580
+ if (reuseStandingReceiver) {
581
+ const sr = this.#standingReceivers.get(agentName);
582
+ if (!sr) {
583
+ // DOD-LOOP-1: this agent has no standing receiver ready — kick off (idempotent) creation
584
+ // so a retry finds it, and report unavailable. Per-agent, so the initiator consuming its
585
+ // OWN agent's receiver never contends with a co-resident responder agent (the loopback case).
586
+ void this.#ensureStandingReceiver(agentName, correlationId);
587
+ return {
588
+ ok: false,
589
+ reason: "standing_receiver_unavailable",
590
+ guidance: "The standing receiver node is initializing (completes within 200ms). Retry the session in a moment.",
591
+ };
592
+ }
593
+ ({ node, gater, autoNat } = sr);
594
+ gater.setAllowedPeer(counterpartyPeerId);
595
+ // Hand this agent's standing receiver off to this session; a replacement is spun up below.
596
+ this.#standingReceivers.delete(agentName);
597
+ }
598
+ else {
599
+ gater = new SessionConnectionGater({
600
+ sessionId,
601
+ allowedPeerId: counterpartyPeerId,
602
+ logger: this.#logger,
603
+ });
604
+ try {
605
+ node = await this.#factory.createNode({ sessionId, connectionGater: gater, nodeType: "session" });
606
+ await node.start();
607
+ }
608
+ catch (err) {
609
+ const errorMessage = err instanceof Error ? err.message : String(err);
610
+ this.#logger.error("session.node.create.failed", {
611
+ sessionId,
612
+ agentName,
613
+ error: errorMessage,
614
+ correlationId,
615
+ });
616
+ return {
617
+ ok: false,
618
+ reason: "session_node_creation_failed",
619
+ guidance: "Failed to create session transport node. The daemon logged the cause in " +
620
+ "session.node.create.failed. Check that the system has available ports and sufficient memory.",
621
+ };
622
+ }
623
+ // CELLO-M7-TRANSPORT-001: session nodes also need dialability awareness for the
624
+ // dcutr decision path (AC-002). Wrap the node in a NodeAutoNatService and emit
625
+ // its initial result (nodeType: 'session').
626
+ autoNat = new NodeAutoNatService({
627
+ node,
628
+ logger: this.#logger,
629
+ nodeType: "session",
630
+ probers: this.#autoNatProbers(),
631
+ });
632
+ autoNat.emitInitialResult();
633
+ }
634
+ const peerId = node.getPeerId();
635
+ const addrs = node.listenAddresses();
636
+ // Persist to SQLite
637
+ this.#insertSessionRow(sessionId, agentName, counterpartyPubkey, "active");
638
+ // Log observability event (session.node.created)
639
+ this.#logger.info("session.node.created", {
640
+ sessionId,
641
+ agentName,
642
+ sessionPeerId: peerId,
643
+ correlationId,
644
+ });
645
+ // Add to active map (keyed by (agentName, sessionId) — DOD-LOOP-1)
646
+ this.#activeNodes.set(this.#k(agentName, sessionId), {
647
+ node,
648
+ agentName,
649
+ sessionId,
650
+ counterpartyPubkey,
651
+ gater,
652
+ correlationId,
653
+ counterpartySessionPeerId: counterpartyPeerId,
654
+ autoNat,
655
+ });
656
+ // DAEMON-004: register the content stream handler so inbound content_frames
657
+ // are cross-checked, appended to the daemon-owned tree, and buffered.
658
+ await this.#registerContentHandler(agentName, sessionId, node, counterpartyPubkey);
659
+ // M7-SESSION-003 AC-004: act on the session node's peer events for direct-path
660
+ // liveness. The session connection IS the authority for a direct session.
661
+ this.#wireSessionLiveness(agentName, sessionId, node, counterpartyPubkey, correlationId);
662
+ // M7 DOD-SPINE-6 / MSG-001-3b: connect this session node to the relay as the
663
+ // Structure-2 witness (non-fatal — direct content still works without it).
664
+ if (relay) {
665
+ await this.#connectSessionRelay(sessionId, node, agentName, relay, correlationId);
666
+ }
667
+ // If we consumed this agent's standing receiver, spin up a replacement (async — do NOT await).
668
+ if (reuseStandingReceiver) {
669
+ void this.#ensureStandingReceiver(agentName, correlationId);
670
+ }
671
+ return { ok: true, peerId, addrs };
672
+ }
673
+ /**
674
+ * M7 DOD-SPINE-6 / MSG-001-3b: connect a session node to the relay witness and
675
+ * store the client on the active entry. Best-effort: a connect/auth failure logs
676
+ * and leaves relayClient undefined — the session is NOT destroyed and the direct
677
+ * content path keeps working (the relay-park/recovery path is MSG-001-3b's domain).
678
+ */
679
+ async #connectSessionRelay(sessionId, node, agentName, relay, correlationId) {
680
+ try {
681
+ // The session node's gater admits only the counterparty; the relay witness is a
682
+ // third peer. Permit it OUTBOUND so the dial isn't denied — inbound stays
683
+ // counterparty-only (INV-5). The relay peer id comes from the signed assignment.
684
+ this.#activeNodes.get(this.#k(agentName, sessionId))?.gater.setAllowedOutboundPeer(relay.relayPeerId);
685
+ // One relay client per (AGENT, RELAY NODE). The relay keys by agent pubkey, so the
686
+ // collision H1 addresses is per relay; CELLO is federated, so a different session for
687
+ // the same agent may be assigned a DIFFERENT relay — that needs its own client.
688
+ const clientKey = `${agentName}::${relay.relayPeerId}`;
689
+ let client = this.#relayClients.get(clientKey);
690
+ if (!client) {
691
+ client = new AgentRelayClient({
692
+ relayPeerId: relay.relayPeerId,
693
+ relayAddrs: relay.relayAddrs,
694
+ keyProvider: relay.keyProvider,
695
+ senderPubkey: relay.senderPubkey,
696
+ logger: this.#logger,
697
+ });
698
+ this.#relayClients.set(clientKey, client);
699
+ }
700
+ const sessionIdHexForRelay = Buffer.from(relay.sessionIdBytes).toString("hex");
701
+ client.registerSession(sessionIdHexForRelay, node, (frame) => {
702
+ // The counterparty's witnessed leaf arrived with its canonical sequence. The
703
+ // plaintext is delivered separately over the direct content stream; this is the
704
+ // ordering/witness signal. Full canonical-sequence reconciliation against the
705
+ // local tree is MSG-001-3b (J-CONTENT).
706
+ this.#logger.info("session.relay.leaf.delivered", {
707
+ sessionId,
708
+ sequenceNumber: frame.sequence_number,
709
+ leafKind: frame.leaf_kind,
710
+ correlationId,
711
+ });
712
+ // DOD-MSG-4 (strict in-order): record the relay-witnessed canonical sequence for the
713
+ // counterparty's MSG leaves. The relay is the ordering authority; structure1_cbor =
714
+ // [1, content_hash(32), sender_pubkey, session_id, last_seen_seq, ts]. The relay sequence
715
+ // is 1-based and global per session; the daemon tree is 0-based — normalize with -1. Only
716
+ // COUNTERPARTY leaves (the ones B will ingest); our own echoed leaf already lands via the
717
+ // send path. The gate (ingestReceivedContent) reads this map to hold out-of-order arrivals.
718
+ if (!frame.authored_by_us && frame.leaf_kind !== LEAF_KIND_CTRL) {
719
+ try {
720
+ const s1 = decode(frame.structure1_cbor);
721
+ const contentHash = s1?.[1];
722
+ if (contentHash instanceof Uint8Array && frame.sequence_number > 0) {
723
+ this.recordWitnessedSequence(agentName, sessionId, Buffer.from(contentHash).toString("hex"), frame.sequence_number - 1);
724
+ }
725
+ }
726
+ catch (err) {
727
+ this.#logger.warn("session.relay.leaf.witness.decode.failed", {
728
+ sessionId,
729
+ error: err instanceof Error ? err.message : String(err),
730
+ correlationId,
731
+ });
732
+ }
733
+ }
734
+ // M7-UPGRADE-002: auto-acknowledge close. When the COUNTERPARTY's SEAL ctrl leaf (0x02)
735
+ // arrives and B has verified the content, B's OWN node auto-co-signs the responder SEAL
736
+ // leaf — no agent prompt — so the bilateral seal completes promptly instead of degrading
737
+ // to unilateral on a slow/busy/crashed agent. Never auto-ack our OWN echoed ctrl leaf.
738
+ if (frame.leaf_kind === LEAF_KIND_CTRL && !frame.authored_by_us) {
739
+ this.#maybeAutoAcknowledgeSeal(agentName, sessionId, correlationId);
740
+ }
741
+ });
742
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
743
+ if (entry) {
744
+ entry.relayClient = client;
745
+ entry.relaySessionIdBytes = relay.sessionIdBytes;
746
+ entry.relayClientKey = clientKey;
747
+ // 2b: remember the relay endpoint so the content-park backstop deposits to the SAME relay.
748
+ entry.relayPeerId = relay.relayPeerId;
749
+ entry.relayAddrs = relay.relayAddrs;
750
+ // MSG-2 startup-flush: also PERSIST it, so a restart's crash-backstop flush (which runs
751
+ // before the in-memory entry exists) can deposit un-acked content to the same relay.
752
+ try {
753
+ this.#db
754
+ ?.prepare("UPDATE sessions SET relay_peer_id = ?, relay_addrs = ?, updated_at = ? WHERE agent_name = ? AND session_id = ?")
755
+ .run(relay.relayPeerId, JSON.stringify(relay.relayAddrs), Date.now(), agentName, sessionId);
756
+ }
757
+ catch (err) {
758
+ this.#logger.warn("session.relay.endpoint.persist.failed", {
759
+ sessionId,
760
+ error: err instanceof Error ? err.message : String(err),
761
+ });
762
+ }
763
+ }
764
+ else {
765
+ // The session was torn down while we were wiring — undo the registration.
766
+ client.unregisterSession(sessionIdHexForRelay);
767
+ if (!client.hasSessions() && this.#relayClients.get(clientKey) === client) {
768
+ client.close();
769
+ this.#relayClients.delete(clientKey);
770
+ }
771
+ return;
772
+ }
773
+ // Proactively connect so the relay has this agent's stream to deliver leaves to
774
+ // (the RECEIVER must be connected before the counterparty submits). Best-effort.
775
+ await client.connect(node);
776
+ }
777
+ catch (err) {
778
+ this.#logger.warn("session.relay.connect.error", {
779
+ sessionId,
780
+ error: err instanceof Error ? err.message : String(err),
781
+ correlationId,
782
+ });
783
+ }
784
+ }
785
+ /**
786
+ * M7 DOD-SPINE-6 / MSG-001-3b: detach a session from its (agent, relay) client and
787
+ * close the client when it has no remaining sessions. Idempotent and identity-guarded:
788
+ * the map delete only fires if the map still holds THIS client (a racing teardown of a
789
+ * sibling session must not close a freshly-created replacement client for the same key).
790
+ */
791
+ #detachSessionRelay(entry) {
792
+ const client = entry.relayClient;
793
+ const key = entry.relayClientKey;
794
+ if (!client || !entry.relaySessionIdBytes)
795
+ return;
796
+ // Idempotent: clear the entry's reference so a second teardown of the same entry no-ops.
797
+ entry.relayClient = undefined;
798
+ const sidHex = Buffer.from(entry.relaySessionIdBytes).toString("hex");
799
+ client.unregisterSession(sidHex);
800
+ if (!client.hasSessions() && key && this.#relayClients.get(key) === client) {
801
+ client.close();
802
+ this.#relayClients.delete(key);
803
+ }
804
+ }
805
+ /**
806
+ * M7-SESSION-003 AC-004: wire a session node's peer-connect / peer-disconnect
807
+ * events to per-session direct-path liveness. onPeerConnect → 'alive'; the
808
+ * session node's gater restricts connections to the designated counterparty, so
809
+ * a connect/disconnect on this node is the counterparty's session-path liveness.
810
+ * onPeerDisconnect → 'gone' (the hook the client did not act on before),
811
+ * emitting session.liveness.changed at WARN. Combined with the transport
812
+ * keepalive (AC-005), a peer that vanished without a clean close still surfaces
813
+ * a disconnect and drives 'gone'.
814
+ */
815
+ #wireSessionLiveness(agentName, sessionId, node, counterpartyPubkey, correlationId) {
816
+ const key = this.#k(agentName, sessionId);
817
+ node.onPeerConnect(() => {
818
+ const prior = this.#sessionLiveness.get(key);
819
+ this.#sessionLiveness.set(key, "alive");
820
+ if (prior !== "alive") {
821
+ this.#logger.info("session.liveness.changed", {
822
+ sessionId,
823
+ counterpartyPubkey,
824
+ transportPath: "direct",
825
+ liveness: "alive",
826
+ observedBy: "session_node",
827
+ correlationId,
828
+ });
829
+ }
830
+ });
831
+ node.onPeerDisconnect(() => {
832
+ const prior = this.#sessionLiveness.get(key);
833
+ this.#sessionLiveness.set(key, "gone");
834
+ if (prior !== "gone") {
835
+ this.#logger.warn("session.liveness.changed", {
836
+ sessionId,
837
+ counterpartyPubkey,
838
+ transportPath: "direct",
839
+ liveness: "gone",
840
+ observedBy: "session_node",
841
+ correlationId,
842
+ });
843
+ }
844
+ });
845
+ }
846
+ /**
847
+ * M7-SESSION-003: read the direct-path counterparty liveness for a session.
848
+ * 'unknown' when no session node observation has occurred yet.
849
+ */
850
+ getSessionLiveness(agentName, sessionId) {
851
+ return this.#sessionLiveness.get(this.#k(agentName, sessionId)) ?? "unknown";
852
+ }
853
+ /**
854
+ * Hand the standing receiver to an inbound session.
855
+ * Called during cello_await_session.
856
+ *
857
+ * CRITICAL (AC-015): gater.setAllowedPeer() is called BEFORE returning
858
+ * the node's multiaddr to the caller. This closes the window where an
859
+ * unexpected peer could connect during the hand-off.
860
+ */
861
+ async acceptSession(sessionId, agentName, counterpartyPubkey, initiatorPeerId, correlationId, relay) {
862
+ const inboundSr = this.#standingReceivers.get(agentName);
863
+ if (!inboundSr) {
864
+ // DOD-LOOP-1: per-agent — kick off (idempotent) creation so a retry finds it.
865
+ void this.#ensureStandingReceiver(agentName, correlationId);
866
+ return {
867
+ ok: false,
868
+ reason: "standing_receiver_unavailable",
869
+ guidance: "The standing receiver node is initializing (completes within 200ms). " +
870
+ "Retry cello_await_session in a moment.",
871
+ };
872
+ }
873
+ // Cap enforcement — inbound sessions count against the same limit (AC-006)
874
+ if (this.#activeNodes.size >= MAX_SESSION_NODES) {
875
+ this.#logger.warn("session.node.cap.reached", {
876
+ agentName,
877
+ currentCount: this.#activeNodes.size,
878
+ maxCount: MAX_SESSION_NODES,
879
+ });
880
+ return {
881
+ ok: false,
882
+ reason: "max_sessions_reached",
883
+ guidance: "The daemon has reached its maximum of 32 concurrent session nodes. " +
884
+ "Close an existing session before starting a new one.",
885
+ };
886
+ }
887
+ const { node, gater, autoNat } = inboundSr;
888
+ // AC-015: update gater BEFORE retrieving multiaddr / returning to caller
889
+ gater.setAllowedPeer(initiatorPeerId);
890
+ const peerId = node.getPeerId();
891
+ const addrs = node.listenAddresses();
892
+ // Persist to SQLite
893
+ this.#insertSessionRow(sessionId, agentName, counterpartyPubkey, "active");
894
+ // Log observability event
895
+ this.#logger.info("session.node.created", {
896
+ sessionId,
897
+ agentName,
898
+ sessionPeerId: peerId,
899
+ correlationId,
900
+ });
901
+ // Remove this agent's standing receiver from the slot and add to active map. The handed-off
902
+ // node keeps its AutoNAT service (it continues to surface dialability).
903
+ this.#standingReceivers.delete(agentName);
904
+ this.#activeNodes.set(this.#k(agentName, sessionId), {
905
+ node,
906
+ agentName,
907
+ sessionId,
908
+ counterpartyPubkey,
909
+ gater,
910
+ correlationId,
911
+ counterpartySessionPeerId: initiatorPeerId,
912
+ autoNat,
913
+ });
914
+ // DAEMON-004: register the content stream handler for the inbound session.
915
+ await this.#registerContentHandler(agentName, sessionId, node, counterpartyPubkey);
916
+ // M7-SESSION-003 AC-004: act on the inbound session node's peer events too.
917
+ this.#wireSessionLiveness(agentName, sessionId, node, counterpartyPubkey, correlationId);
918
+ // M7 DOD-SPINE-6 / MSG-001-3b: the receiver also connects to the relay witness so
919
+ // the relay can deliver the initiator's witnessed leaves (leaf_deliver) to it.
920
+ if (relay) {
921
+ await this.#connectSessionRelay(sessionId, node, agentName, relay, correlationId);
922
+ }
923
+ // Immediately spin up a replacement for THIS agent (async — do NOT await, AC-003)
924
+ void this.#ensureStandingReceiver(agentName, correlationId);
925
+ return { ok: true, peerId, addrs };
926
+ }
927
+ /**
928
+ * Destroy a session node after seal or on error teardown.
929
+ * Status written to SQLite.
930
+ */
931
+ async destroySessionNode(agentName, sessionId, reason) {
932
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
933
+ if (!entry)
934
+ return;
935
+ entry.autoNat.stop();
936
+ // M7 DOD-SPINE-6 / MSG-001-3b: close the relay witness stream so we don't leak it.
937
+ this.#detachSessionRelay(entry);
938
+ try {
939
+ await entry.node.stop();
940
+ }
941
+ catch (err) {
942
+ this.#logger.error("session.node.stop.failed", {
943
+ sessionId,
944
+ agentName: entry.agentName,
945
+ error: err instanceof Error ? err.message : String(err),
946
+ correlationId: entry.correlationId,
947
+ });
948
+ // Fall through — still remove from active map and update DB
949
+ }
950
+ // Update SQLite — 'sealed' → 'sealed', 'interrupted'/'error' → 'interrupted'.
951
+ // 'error' is not a valid SessionStatus in SQLite; error-torn-down sessions
952
+ // surface as interrupted so AC-010 recovery handles them at next login.
953
+ // The session.node.destroyed log preserves the original reason for observability.
954
+ const dbStatus = reason === "sealed" ? "sealed" : "interrupted";
955
+ this.#updateSessionStatus(agentName, sessionId, dbStatus);
956
+ this.#activeNodes.delete(this.#k(agentName, sessionId));
957
+ // Evict the in-memory per-session caches on teardown. The tree is durable in
958
+ // SQLite (getSessionTree reloads it on demand), and the received-content buffer
959
+ // holds plaintext that must not linger after a session ends. Without this, both
960
+ // maps grow unbounded by total sessions seen over a long-lived daemon.
961
+ // (#evictSessionCaches also drops the M7-SESSION-003 liveness flag, so both the
962
+ // destroy and retire teardown paths clear it — no stale verdict survives.)
963
+ this.#evictSessionCaches(agentName, sessionId);
964
+ this.#logger.info("session.node.destroyed", {
965
+ sessionId,
966
+ agentName: entry.agentName,
967
+ reason,
968
+ });
969
+ }
970
+ /**
971
+ * round-2 finding #5: retire a session's live libp2p node WITHOUT changing its
972
+ * DB status. Used after the active-session bilateral seal commitment has already
973
+ * advanced the row to 'seal_interrupted_pending': the session is frozen, so we
974
+ * stop the node and unregister its /cello/content handler (no more inbound leaves,
975
+ * no leaked node per active close) but must NOT overwrite the pending/sealed status
976
+ * the way destroySessionNode would. The durable tree stays in SQLite (getSessionTree
977
+ * reloads it); the in-memory plaintext buffer is evicted.
978
+ */
979
+ async retireSessionNode(agentName, sessionId) {
980
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
981
+ if (!entry)
982
+ return;
983
+ this.#detachSessionRelay(entry);
984
+ try {
985
+ await entry.node.stop();
986
+ }
987
+ catch (err) {
988
+ this.#logger.error("session.node.stop.failed", {
989
+ sessionId,
990
+ agentName: entry.agentName,
991
+ error: err instanceof Error ? err.message : String(err),
992
+ correlationId: entry.correlationId,
993
+ });
994
+ // Fall through — still remove from active map.
995
+ }
996
+ this.#activeNodes.delete(this.#k(agentName, sessionId));
997
+ this.#evictSessionCaches(agentName, sessionId);
998
+ this.#logger.info("session.node.destroyed", {
999
+ sessionId,
1000
+ agentName: entry.agentName,
1001
+ reason: "sealing",
1002
+ });
1003
+ }
1004
+ /** Drop the in-memory tree + received-content caches for a torn-down session (DOD-LOOP-1: per (agent, session)). */
1005
+ #evictSessionCaches(agentName, sessionId) {
1006
+ const key = this.#k(agentName, sessionId);
1007
+ this.#trees.delete(key);
1008
+ this.#receivedContent.delete(key);
1009
+ // CELLO-M7-MSG-001: cancel any armed TTF timers so a torn-down session never
1010
+ // fires a park backstop (or keeps a timer) after it is gone.
1011
+ this.#clearAwaitingForSession(agentName, sessionId);
1012
+ // M7-SESSION-003: drop the direct-path liveness flag (the seal gate already read
1013
+ // its verdict) so a destroyed/retired session retains no stale alive/gone state.
1014
+ this.#sessionLiveness.delete(key);
1015
+ // M7-UPGRADE-002: drop the auto-acknowledge bookkeeping for a torn-down session.
1016
+ this.#contentDesynced.delete(key);
1017
+ this.#responderSealSubmitted.delete(key);
1018
+ // DOD-MSG-4: drop the strict-in-order bookkeeping (witness map, held plaintext, high-water)
1019
+ // so a torn-down session retains no stale ordering state or buffered plaintext.
1020
+ this.#witnessedSeq.delete(key);
1021
+ this.#heldContent.delete(key);
1022
+ this.#highWaterSeq.delete(key);
1023
+ }
1024
+ /**
1025
+ * Graceful shutdown: mark all active sessions as interrupted, stop all nodes.
1026
+ * Called from the SIGTERM / cello logout path (AC-009).
1027
+ * SQLite writes complete before this method returns.
1028
+ */
1029
+ async gracefulShutdown() {
1030
+ // Signal any in-flight standing-receiver replacement to self-stop (review M2).
1031
+ this.#shuttingDown = true;
1032
+ // Cancel every armed awaiting-ACK timer so an un-acked send (e.g. a rejected /
1033
+ // tampered frame that never produced a `persisted` ACK) does not leave a 20s
1034
+ // timer pinning the content + this manager in memory past teardown (review M1).
1035
+ for (const bySession of this.#awaitingAck.values()) {
1036
+ for (const entry of bySession.values())
1037
+ clearTimeout(entry.timer);
1038
+ }
1039
+ this.#awaitingAck.clear();
1040
+ // Mark ALL 'active' rows interrupted in SQLite — single batch UPDATE covers
1041
+ // both in-memory managed nodes AND any rows that were inserted directly
1042
+ // (e.g. by the binary AC-009 SIGTERM test inserting synthetic rows).
1043
+ // This is the authoritative persistence step; in-memory map is secondary.
1044
+ const now = Date.now();
1045
+ if (!this.#db) {
1046
+ this.#logger.error("session.interrupt.db.write.failed", {
1047
+ sessionId: "__all__",
1048
+ error: "db not initialized",
1049
+ });
1050
+ }
1051
+ else {
1052
+ const interruptedAt = new Date(now).toISOString();
1053
+ try {
1054
+ this.#db.prepare("UPDATE sessions SET status = 'interrupted', updated_at = ?, interrupted_at = COALESCE(interrupted_at, ?) WHERE status = 'active'").run(now, interruptedAt);
1055
+ }
1056
+ catch (err) {
1057
+ this.#logger.error("session.interrupt.db.write.failed", {
1058
+ sessionId: "__all__",
1059
+ error: err instanceof Error ? err.message : String(err),
1060
+ });
1061
+ }
1062
+ }
1063
+ // Stop all session nodes, then emit session.node.destroyed only on success
1064
+ // (mirrors destroySessionNode ordering: stop first, log destroyed after)
1065
+ const stopPromises = [];
1066
+ for (const entry of this.#activeNodes.values()) {
1067
+ entry.autoNat.stop();
1068
+ // M7 DOD-SPINE-6: detach from the agent relay client (closes it when its last
1069
+ // session goes) — consistent with the other teardown paths.
1070
+ this.#detachSessionRelay(entry);
1071
+ stopPromises.push(entry.node.stop().then(() => {
1072
+ this.#logger.info("session.node.destroyed", {
1073
+ sessionId: entry.sessionId,
1074
+ agentName: entry.agentName,
1075
+ reason: "interrupted",
1076
+ });
1077
+ }).catch((err) => {
1078
+ this.#logger.error("session.node.stop.failed", {
1079
+ sessionId: entry.sessionId,
1080
+ agentName: entry.agentName,
1081
+ error: err instanceof Error ? err.message : String(err),
1082
+ correlationId: entry.correlationId,
1083
+ });
1084
+ }));
1085
+ }
1086
+ await Promise.all(stopPromises);
1087
+ this.#activeNodes.clear();
1088
+ // Evict in-memory per-session caches (trees reload from SQLite; received-content
1089
+ // plaintext must not survive shutdown in memory).
1090
+ this.#trees.clear();
1091
+ this.#receivedContent.clear();
1092
+ // Stop ALL per-agent standing receivers (DOD-LOOP-1).
1093
+ for (const [agentName, sr] of this.#standingReceivers) {
1094
+ sr.autoNat.stop();
1095
+ try {
1096
+ await sr.node.stop();
1097
+ }
1098
+ catch (err) {
1099
+ this.#logger.error("session.node.stop.failed", {
1100
+ sessionId: "standing_receiver_shutdown",
1101
+ agentName: `${STANDING_RECEIVER_AGENT_NAME}:${agentName}`,
1102
+ error: err instanceof Error ? err.message : String(err),
1103
+ correlationId: "n/a",
1104
+ });
1105
+ }
1106
+ }
1107
+ this.#standingReceivers.clear();
1108
+ // Release the SQLite handle so the DB file is no longer held open after shutdown
1109
+ // (review L5). Queries guard on `#db === null` and degrade to empty/null.
1110
+ if (this.#db) {
1111
+ try {
1112
+ this.#db.close();
1113
+ }
1114
+ catch { /* already closed */ }
1115
+ this.#db = null;
1116
+ }
1117
+ }
1118
+ /**
1119
+ * Return all sessions with a given status from SQLite.
1120
+ * Used by cello status to surface interrupted sessions.
1121
+ */
1122
+ getSessionsByStatus(status) {
1123
+ if (!this.#db)
1124
+ return [];
1125
+ return this.#db
1126
+ .prepare("SELECT * FROM sessions WHERE status = ?")
1127
+ .all(status);
1128
+ }
1129
+ /**
1130
+ * M7-SESSION-004 (AC-005): persist the seal certificate's legibility object with the
1131
+ * sealed record. Stored as a JSON string (hex-encoded pubkeys) so it round-trips a
1132
+ * daemon restart and is returned intact on the cert-read surface. The caller normalises
1133
+ * the raw wire legibility (Uint8Array pubkeys) into a JSON-safe shape before storing.
1134
+ * Best-effort: a session row may not yet exist (the seal arrived before the row was
1135
+ * persisted); in that case we no-op rather than throw — the cert still flows through the
1136
+ * live return path. The legibility content is identical regardless of delivery timing.
1137
+ */
1138
+ recordSealCertificate(agentName, sessionId, sealedRootHex, legibilityJson) {
1139
+ if (!this.#db)
1140
+ return;
1141
+ this.#db
1142
+ .prepare("UPDATE sessions SET seal_legibility = ?, sealed_root_hex = ?, updated_at = ? WHERE agent_name = ? AND session_id = ?")
1143
+ .run(legibilityJson, sealedRootHex, Date.now(), agentName, sessionId);
1144
+ }
1145
+ /**
1146
+ * M7 legibility-TBS-binding (responder verify): record the counterparty's FROST primary (group)
1147
+ * pubkey from the FROST-signed SessionAssignment, so the responder can VERIFY the bilateral seal
1148
+ * signature locally. Best-effort — a missing row (race) is a no-op; the seal then falls back to
1149
+ * accept-without-verify (still sound: the live frame arrives over the authenticated Noise channel).
1150
+ */
1151
+ recordCounterpartyPrimary(agentName, sessionId, primaryPubkeyHex) {
1152
+ if (!this.#db)
1153
+ return;
1154
+ this.#db
1155
+ .prepare("UPDATE sessions SET counterparty_primary_pubkey = ?, updated_at = ? WHERE agent_name = ? AND session_id = ?")
1156
+ .run(primaryPubkeyHex, Date.now(), agentName, sessionId);
1157
+ }
1158
+ /**
1159
+ * M7-SESSION-004 (AC-005/AC-006): read the persisted seal certificate for a session.
1160
+ * Returns the sealed root and the parsed legibility object (JSON-safe, hex pubkeys), or
1161
+ * null if the session is unknown or not yet sealed. This is the cert-read surface a
1162
+ * reader (operator, agent, arbitrator) — possibly in a DIFFERENT process than the one
1163
+ * that built the certificate — uses to determine receipt-not-assent, per-party frontiers,
1164
+ * attestation modes, and whether the final message was answered.
1165
+ */
1166
+ getSealCertificate(agentName, sessionId) {
1167
+ if (!this.#db)
1168
+ return null;
1169
+ const row = this.#db
1170
+ .prepare("SELECT sealed_root_hex, seal_legibility FROM sessions WHERE agent_name = ? AND session_id = ?")
1171
+ .get(agentName, sessionId);
1172
+ if (!row || !row.seal_legibility || !row.sealed_root_hex)
1173
+ return null;
1174
+ let legibility;
1175
+ try {
1176
+ legibility = JSON.parse(row.seal_legibility);
1177
+ }
1178
+ catch {
1179
+ return null;
1180
+ }
1181
+ return { sealed_root: row.sealed_root_hex, legibility };
1182
+ }
1183
+ /**
1184
+ * M7-SESSION-001: Mark a session as interrupted with message count and timestamp.
1185
+ * Called when a relay session_interrupted frame arrives or a relay stream closes.
1186
+ * Also tears down the in-memory session node if one exists for this sessionId.
1187
+ *
1188
+ * @param sessionId The hex session ID from the relay frame
1189
+ * @param messageCount Number of message leaves at interruption
1190
+ * @param source 'relay_frame' | 'stream_close'
1191
+ */
1192
+ async markInterruptedWithDetails(agentName, sessionId, messageCount, source) {
1193
+ if (!this.#db)
1194
+ return;
1195
+ // H-3 SECURITY: only an 'active' session may transition to 'interrupted'.
1196
+ // A late or forged relay frame must NOT revert a 'sealed', 'seal_interrupted_pending',
1197
+ // or already-'interrupted' session back to 'interrupted'. This mirrors the
1198
+ // stream-close guard in #watchRelayStream below — the two paths must agree.
1199
+ const existing = this.getSessionRecord(agentName, sessionId);
1200
+ if (!existing || existing.status !== "active") {
1201
+ this.#logger.warn("session.interrupt.ignored", {
1202
+ sessionId,
1203
+ source,
1204
+ currentStatus: existing?.status ?? "absent",
1205
+ reason: "session_not_active",
1206
+ });
1207
+ return;
1208
+ }
1209
+ const now = Date.now();
1210
+ const interruptedAt = new Date(now).toISOString();
1211
+ // round-2 finding #7: the daemon-owned tree is the authoritative transcript
1212
+ // length. The `messageCount` arg comes from registerRelayStream time and defaults
1213
+ // to 0, so writing it blindly would clobber the column out of sync with the tree
1214
+ // (both seal flows prefer tree.size(), but the column must not lie). When a tree
1215
+ // exists for this session, persist its size; otherwise fall back to the arg.
1216
+ const treeSize = this.getSessionTree(agentName, sessionId).size();
1217
+ const authoritativeCount = treeSize > 0 ? treeSize : messageCount;
1218
+ try {
1219
+ // The `AND status = 'active'` predicate is the authoritative guard: even if
1220
+ // the pre-check above raced (it cannot — DatabaseSync is synchronous), the
1221
+ // UPDATE only mutates a row that is still active.
1222
+ this.#db
1223
+ .prepare("UPDATE sessions SET status = 'interrupted', updated_at = ?, message_count = ?, interrupted_at = ? WHERE agent_name = ? AND session_id = ? AND status = 'active'")
1224
+ .run(now, authoritativeCount, interruptedAt, agentName, sessionId);
1225
+ }
1226
+ catch (err) {
1227
+ this.#logger.error("session.interrupt.db.write.failed", {
1228
+ sessionId,
1229
+ error: err instanceof Error ? err.message : String(err),
1230
+ });
1231
+ }
1232
+ // Look up the in-memory entry (keyed by (agent, session)) for teardown.
1233
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
1234
+ // Tear down the in-memory session node if it exists
1235
+ if (entry) {
1236
+ entry.autoNat.stop();
1237
+ this.#detachSessionRelay(entry);
1238
+ try {
1239
+ await entry.node.stop();
1240
+ }
1241
+ catch (err) {
1242
+ this.#logger.error("session.node.stop.failed", {
1243
+ sessionId,
1244
+ agentName,
1245
+ error: err instanceof Error ? err.message : String(err),
1246
+ correlationId: entry.correlationId,
1247
+ });
1248
+ // Fall through — still remove from active map
1249
+ }
1250
+ this.#activeNodes.delete(this.#k(agentName, sessionId));
1251
+ this.#logger.info("session.node.destroyed", {
1252
+ sessionId,
1253
+ agentName,
1254
+ reason: "interrupted",
1255
+ });
1256
+ }
1257
+ this.#logger.warn("session.interrupted.detected", {
1258
+ sessionId,
1259
+ agentName,
1260
+ source,
1261
+ });
1262
+ // M7-SESSION-001 (M-1 PUSH): notify live MCP clients that this session is now
1263
+ // interrupted. Only fires on a real active→interrupted transition (the guard
1264
+ // above already returned for any non-active session).
1265
+ try {
1266
+ this.#onSessionStateChanged?.(agentName, sessionId, "interrupted", existing.counterparty_pubkey);
1267
+ }
1268
+ catch (err) {
1269
+ this.#logger.debug("session.state.notify.failed", {
1270
+ sessionId,
1271
+ error: err instanceof Error ? err.message : String(err),
1272
+ });
1273
+ }
1274
+ }
1275
+ /**
1276
+ * M7-SESSION-001 (H-1): persist a verified bilateral SEAL-INTERRUPTED
1277
+ * commitment and transition the session to 'seal_interrupted_pending'.
1278
+ *
1279
+ * This is NOT a seal. It records that both parties produced and exchanged
1280
+ * K_local-signed SEAL-INTERRUPTED leaves over the same {leafCount, merkleRoot}.
1281
+ * The FROST threshold notarization is a separate, currently-unwired step (see
1282
+ * daemon.ts handleSealInterruptedFlow H-1 note), which is precisely why the
1283
+ * status is 'seal_interrupted_pending' and never 'sealed'.
1284
+ *
1285
+ * The status update is guarded so it only advances a session out of the
1286
+ * 'interrupted' state — it will not overwrite a 'sealed' row.
1287
+ *
1288
+ * @returns true if the session row was advanced to seal_interrupted_pending.
1289
+ */
1290
+ persistSealInterruptedCommitment(opts) {
1291
+ if (!this.#db)
1292
+ return false;
1293
+ const now = Date.now();
1294
+ try {
1295
+ this.#db
1296
+ .prepare(`INSERT OR REPLACE INTO seal_interrupted_artifacts
1297
+ (agent_name, session_id, role, own_leaf, counterparty_leaf, merkle_root, nonce, created_at)
1298
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`)
1299
+ .run(opts.agentName, opts.sessionId, opts.role, JSON.stringify(opts.ownLeaf), JSON.stringify(opts.counterpartyLeaf), opts.merkleRoot, opts.nonce, now);
1300
+ }
1301
+ catch (err) {
1302
+ this.#logger.error("session.interrupted.db.write.failed", {
1303
+ sessionId: opts.sessionId,
1304
+ error: err instanceof Error ? err.message : String(err),
1305
+ });
1306
+ return false;
1307
+ }
1308
+ // DAEMON-004: the bilateral commitment advances a session out of either
1309
+ // 'interrupted' (SESSION-001 interrupted-seal flow) OR 'active' (DAEMON-004
1310
+ // active-session seal). The guard still refuses to overwrite a terminal
1311
+ // 'sealed' row or an already-pending one.
1312
+ const result = this.#db
1313
+ .prepare("UPDATE sessions SET status = 'seal_interrupted_pending', updated_at = ? WHERE agent_name = ? AND session_id = ? AND status IN ('active', 'interrupted')")
1314
+ .run(now, opts.agentName, opts.sessionId);
1315
+ return Number(result.changes) > 0;
1316
+ }
1317
+ /**
1318
+ * M7-SESSION-001 (H-1): read back the persisted bilateral commitment artifacts
1319
+ * for a session. Returns null when none exist.
1320
+ */
1321
+ getSealInterruptedArtifacts(agentName, sessionId) {
1322
+ if (!this.#db)
1323
+ return null;
1324
+ const row = this.#db
1325
+ .prepare("SELECT * FROM seal_interrupted_artifacts WHERE agent_name = ? AND session_id = ?")
1326
+ .get(agentName, sessionId);
1327
+ if (!row)
1328
+ return null;
1329
+ return {
1330
+ role: row.role,
1331
+ ownLeaf: JSON.parse(row.own_leaf),
1332
+ counterpartyLeaf: JSON.parse(row.counterparty_leaf),
1333
+ merkleRoot: row.merkle_root,
1334
+ nonce: row.nonce,
1335
+ };
1336
+ }
1337
+ /**
1338
+ * Return the session record for a specific sessionId, regardless of status.
1339
+ * Used by cello_close_session to inspect session state.
1340
+ */
1341
+ getSessionRecord(agentName, sessionId) {
1342
+ if (!this.#db)
1343
+ return null;
1344
+ const row = this.#db
1345
+ .prepare("SELECT * FROM sessions WHERE agent_name = ? AND session_id = ?")
1346
+ .get(agentName, sessionId);
1347
+ return row ?? null;
1348
+ }
1349
+ /**
1350
+ * MSG-2 startup-flush: the persisted relay endpoint for a session, or null if none was
1351
+ * recorded. Used by the crash-backstop flush, which runs at startup BEFORE the in-memory
1352
+ * session entries exist, so it cannot use `entry.relayPeerId`.
1353
+ */
1354
+ getPersistedRelayEndpoint(agentName, sessionId) {
1355
+ if (!this.#db)
1356
+ return null;
1357
+ const row = this.#db
1358
+ .prepare("SELECT relay_peer_id, relay_addrs FROM sessions WHERE agent_name = ? AND session_id = ?")
1359
+ .get(agentName, sessionId);
1360
+ if (!row?.relay_peer_id || !row?.relay_addrs)
1361
+ return null;
1362
+ try {
1363
+ const addrs = JSON.parse(row.relay_addrs);
1364
+ if (!Array.isArray(addrs) || addrs.length === 0)
1365
+ return null;
1366
+ return { relayPeerId: row.relay_peer_id, relayAddrs: addrs };
1367
+ }
1368
+ catch {
1369
+ return null;
1370
+ }
1371
+ }
1372
+ /**
1373
+ * DOD-MSG-4 (auto-recover): the DISTINCT relay endpoints this agent has sessions on, so the daemon
1374
+ * can pull the agent's parked mailbox from each on reconnect (the relay mailbox is keyed by recipient
1375
+ * pubkey, so one pull per relay drains all of the agent's parked content there). Distinct by relay
1376
+ * peer id.
1377
+ */
1378
+ getAgentRelayEndpoints(agentName) {
1379
+ if (!this.#db)
1380
+ return [];
1381
+ const rows = this.#db
1382
+ .prepare("SELECT DISTINCT relay_peer_id, relay_addrs FROM sessions WHERE agent_name = ? AND relay_peer_id IS NOT NULL")
1383
+ .all(agentName);
1384
+ const byPeer = new Map();
1385
+ for (const row of rows) {
1386
+ if (!row.relay_peer_id || !row.relay_addrs)
1387
+ continue;
1388
+ try {
1389
+ const addrs = JSON.parse(row.relay_addrs);
1390
+ if (!Array.isArray(addrs) || addrs.length === 0)
1391
+ continue;
1392
+ if (!byPeer.has(row.relay_peer_id))
1393
+ byPeer.set(row.relay_peer_id, { relayPeerId: row.relay_peer_id, relayAddrs: addrs });
1394
+ }
1395
+ catch {
1396
+ /* skip malformed */
1397
+ }
1398
+ }
1399
+ return [...byPeer.values()];
1400
+ }
1401
+ // ─── DAEMON-004: daemon-owned Merkle tree ──────────────────────────────────
1402
+ /**
1403
+ * Return the daemon-owned Merkle tree for a session, loading it from SQLite
1404
+ * on first access (so it survives a restart — AC-007). Never returns null;
1405
+ * an unknown session yields an empty tree.
1406
+ */
1407
+ getSessionTree(agentName, sessionId) {
1408
+ const key = this.#k(agentName, sessionId);
1409
+ const cached = this.#trees.get(key);
1410
+ if (cached)
1411
+ return cached;
1412
+ const tree = this.#loadTreeFromDb(agentName, sessionId);
1413
+ this.#trees.set(key, tree);
1414
+ return tree;
1415
+ }
1416
+ /** Current daemon-owned tree root for a session, as hex. */
1417
+ getSessionTreeRootHex(agentName, sessionId) {
1418
+ return this.getSessionTree(agentName, sessionId).rootHex();
1419
+ }
1420
+ /**
1421
+ * Append a leaf (by its 32-byte leaf-hash hex) to the daemon-owned tree,
1422
+ * persist it, advance the root, and fire session.tree.appended.
1423
+ *
1424
+ * @returns the new leaf index and the recomputed root hex.
1425
+ */
1426
+ appendSessionLeaf(agentName, sessionId, kind, leafHashHex, correlationId) {
1427
+ const tree = this.getSessionTree(agentName, sessionId);
1428
+ const { leafIndex, newRootHex } = tree.appendLeafHash(kind, leafHashHex);
1429
+ if (this.#db) {
1430
+ try {
1431
+ this.#db
1432
+ .prepare(`INSERT INTO session_tree_leaves
1433
+ (agent_name, session_id, leaf_index, leaf_kind, leaf_hash_hex, created_at)
1434
+ VALUES (?, ?, ?, ?, ?, ?)`)
1435
+ .run(agentName, sessionId, leafIndex, kind, leafHashHex, Date.now());
1436
+ // DAEMON-004 (finding #2): keep sessions.message_count synced to the tree
1437
+ // size. message_count is the bilateral leafCount the seal flow signs over
1438
+ // (handleSealInterruptedFlow / the responder). If it diverged from the
1439
+ // daemon-owned tree, a post-active-messaging seal would attest to a
1440
+ // truncated transcript and the bilateral leafCount check would mismatch.
1441
+ // The tree (leafIndex + 1 leaves) is authoritative; the column tracks it.
1442
+ this.#db
1443
+ .prepare("UPDATE sessions SET message_count = ?, updated_at = ? WHERE agent_name = ? AND session_id = ?")
1444
+ .run(leafIndex + 1, Date.now(), agentName, sessionId);
1445
+ }
1446
+ catch (err) {
1447
+ // A persist failure must be visible, not swallowed: the in-memory tree
1448
+ // has advanced but the durable transcript has not, which would diverge
1449
+ // on restart. Surface it loudly.
1450
+ this.#logger.error("session.tree.persist.failed", {
1451
+ sessionId,
1452
+ leafIndex,
1453
+ error: err instanceof Error ? err.message : String(err),
1454
+ correlationId,
1455
+ });
1456
+ }
1457
+ }
1458
+ this.#logger.info("session.tree.appended", {
1459
+ sessionId,
1460
+ leafIndex,
1461
+ newRootHex,
1462
+ correlationId,
1463
+ });
1464
+ return { leafIndex, newRootHex };
1465
+ }
1466
+ /**
1467
+ * SEAM 1b (dialer ⇄ session-node reconciliation): dial the counterparty THROUGH
1468
+ * this session's OWN node, so the session node N_A holds the connection its content
1469
+ * newStream actually rides. TRANSPORT-001's transport selector dialed on a separate
1470
+ * (composition-root) node whose connection N_A could not use — the per-session node
1471
+ * must be the dialer. Direct mode only here (the default content path, Part 4 D-a);
1472
+ * relay-circuit + dcutr strategy via N_A is a later seam. Tries each addr in turn;
1473
+ * succeeds on the first connection, returns a named failure if none connect.
1474
+ */
1475
+ async connectToCounterparty(agentName, sessionId, addrs) {
1476
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
1477
+ if (!entry) {
1478
+ return { ok: false, reason: "session_node_unavailable", error: "no active session node for this session" };
1479
+ }
1480
+ if (addrs.length === 0) {
1481
+ return { ok: false, reason: "no_counterparty_addrs", error: "the assignment carried no counterparty session addrs to dial" };
1482
+ }
1483
+ let lastError = "";
1484
+ for (const addr of addrs) {
1485
+ try {
1486
+ await entry.node.dial(addr);
1487
+ this.#logger.info("session.transport.connected", {
1488
+ sessionId,
1489
+ addr,
1490
+ correlationId: entry.correlationId,
1491
+ });
1492
+ return { ok: true };
1493
+ }
1494
+ catch (err) {
1495
+ // error.message extracted — never [object Object]; try the next addr.
1496
+ lastError = err instanceof Error ? err.message : String(err);
1497
+ }
1498
+ }
1499
+ this.#logger.warn("session.transport.connect.failed", {
1500
+ sessionId,
1501
+ reason: "counterparty_dial_failed",
1502
+ error: lastError,
1503
+ correlationId: entry.correlationId,
1504
+ });
1505
+ return { ok: false, reason: "counterparty_dial_failed", error: lastError };
1506
+ }
1507
+ /**
1508
+ * DAEMON-004: send content over the session node's direct P2P content stream.
1509
+ * On a dead/missing stream this returns a NAMED, diagnosable failure — never a
1510
+ * silent success and never a desync (closing the old silent fire-and-forget
1511
+ * content catch in the retired in-process client send path).
1512
+ *
1513
+ * SCOPE / findings #3 + #4 — what this send path does and does NOT do today:
1514
+ * - #4: it delivers the content over the direct /cello/content/1.0.0 P2P
1515
+ * stream only. It does NOT also submit a K_local-SIGNED content_hash leaf to
1516
+ * the RELAY on /cello/relay/1.0.0 (EARS behavior #1). That relay hash-submit
1517
+ * is MSG-001's scope; AC-001's "relay log shows a hash_submit" evidence is
1518
+ * produced once MSG-001 lands.
1519
+ * - #3: because there is no relay yet, the sequence number cello_send returns
1520
+ * is the LOCAL leaf index, not a relay-assigned canonical global sequence.
1521
+ * Each daemon appends leaves in its own LOCAL observation order, so two
1522
+ * daemons' roots agree only under perfectly ping-ponged traffic. Canonical
1523
+ * cross-process ordering (and thus AC-002 root agreement under concurrent
1524
+ * bidirectional traffic) requires the relay-assigned sequence from MSG-001.
1525
+ */
1526
+ async sendContent(agentName, sessionId, content, contentHash, correlationId) {
1527
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
1528
+ if (!entry) {
1529
+ return { ok: false, reason: "session_node_unavailable", error: "no active session node for this session" };
1530
+ }
1531
+ // R1 (MSG-001-3b): witness the message-leaf HASH to the relay FIRST, INDEPENDENT of
1532
+ // direct delivery. The relay is the ordering authority (Structure 2): it assigns the
1533
+ // canonical sequence from the hash whether or not the counterparty is reachable for direct
1534
+ // content. So an OFFLINE recipient still gets a sequence, and the parked content is later
1535
+ // recovered AT that sequence (DOD-MSG-4 recovery-not-desync). The relay only ever sees the
1536
+ // hash (INV-3). Best-effort: a relay miss degrades to local-only sequencing. Previously this
1537
+ // ran AFTER a successful direct send, so an offline recipient's content got NO sequence — the
1538
+ // gap R1 closes.
1539
+ // DOD-MSG-4 (self-ordering content frame): the relay's committed ordering record for this leaf,
1540
+ // captured from the hash submit so it can be stamped into the content frame (and the parked
1541
+ // entry). Undefined if the relay is unreachable / an old relay — the receiver then falls back to
1542
+ // the leaf_deliver witness stream / arrival order.
1543
+ let orderingS1;
1544
+ let orderingS2;
1545
+ if (entry.relayClient && entry.relaySessionIdBytes) {
1546
+ try {
1547
+ const witnessed = await entry.relayClient.submitMessageHash(entry.node, entry.relaySessionIdBytes, contentHash);
1548
+ if (witnessed.ok) {
1549
+ orderingS1 = witnessed.structure1_cbor;
1550
+ orderingS2 = witnessed.structure2_cbor;
1551
+ this.#logger.info("session.relay.hash.submitted", {
1552
+ sessionId,
1553
+ sequenceNumber: witnessed.sequence_number,
1554
+ correlationId,
1555
+ });
1556
+ }
1557
+ else {
1558
+ this.#logger.warn("session.relay.hash.submit.failed", {
1559
+ sessionId,
1560
+ reason: witnessed.reason,
1561
+ correlationId,
1562
+ });
1563
+ }
1564
+ }
1565
+ catch (relayErr) {
1566
+ this.#logger.warn("session.relay.hash.submit.failed", {
1567
+ sessionId,
1568
+ reason: relayErr instanceof Error ? relayErr.message : String(relayErr),
1569
+ correlationId,
1570
+ });
1571
+ }
1572
+ }
1573
+ // Attempt direct peer↔peer content delivery. On success the receiver's `persisted` ACK
1574
+ // resolves the awaiting timer; on failure (counterparty offline) the hash is already
1575
+ // witnessed above, so the caller / TTF path parks the SEALED content to the relay
1576
+ // store-and-forward backstop and the recipient recovers it at the witnessed sequence (2b).
1577
+ try {
1578
+ const stream = await entry.node.newStream(entry.counterpartySessionPeerId, CELLO_CONTENT_PROTOCOL_ID);
1579
+ // AC-001/AC-003: arm the TTF tracking BEFORE the frame goes on the wire. The
1580
+ // receiver's `persisted` ACK can come back fast (in-process / low-latency
1581
+ // transports), so registering the awaiting entry after send would let the ACK
1582
+ // race ahead of it and be dropped — the timer would then spuriously fire. The
1583
+ // content is delivered to the wire but NOT yet confirmed persisted; the ACK
1584
+ // resolves it (content.delivery.acked) and TTF expiry hands it to the park
1585
+ // backstop. The correlationId rides in the frame so the receiver's
1586
+ // session.content.received shares ONE flow id with the sender.
1587
+ this.#trackAwaitingAck(agentName, sessionId, content, contentHash, correlationId, orderingS1, orderingS2);
1588
+ const frame = CBOR_ENC.encode({
1589
+ type: "content_frame",
1590
+ session_id: sessionId,
1591
+ content_hash: contentHash,
1592
+ content_bytes: content,
1593
+ correlation_id: correlationId,
1594
+ // DOD-MSG-4 (self-ordering): the relay's signed ordering record, so the receiver verifies +
1595
+ // orders from the frame ALONE (no dependence on the separate leaf_deliver witness timing).
1596
+ // structure1_cbor = sender-signed bytes (verify); structure2_cbor = relay's committed seq +
1597
+ // prev_root (order). Omitted if the relay was unreachable — receiver falls back to the witness.
1598
+ structure1_cbor: orderingS1,
1599
+ structure2_cbor: orderingS2,
1600
+ });
1601
+ stream.send(lp.encode.single(frame));
1602
+ try {
1603
+ await stream.close();
1604
+ }
1605
+ catch { /* best-effort close */ }
1606
+ return { ok: true };
1607
+ }
1608
+ catch (err) {
1609
+ // The send failed after (possibly) arming the awaiting tracking — drop it so a
1610
+ // never-delivered frame does not later fire a spurious TTF park.
1611
+ this.#untrackAwaitingAck(agentName, sessionId, contentHash);
1612
+ // 2b: direct delivery failed (counterparty offline). The hash is already witnessed (R1, the
1613
+ // sequence is assigned), so deposit the content to the relay store-and-forward backstop now;
1614
+ // the recipient pulls + recovers it on next online (DOD-MSG-3/4).
1615
+ this.#parkContent(agentName, sessionId, Buffer.from(contentHash).toString("hex"), content, orderingS1, orderingS2);
1616
+ // error.message extracted — never [object Object]. libp2p/cross-package errors are not
1617
+ // always `instanceof Error` in this realm, so fall back to a message property / JSON.
1618
+ const errMsg = err instanceof Error
1619
+ ? err.message
1620
+ : err && typeof err === "object" && typeof err.message === "string"
1621
+ ? err.message
1622
+ : (() => {
1623
+ try {
1624
+ return JSON.stringify(err);
1625
+ }
1626
+ catch {
1627
+ return String(err);
1628
+ }
1629
+ })();
1630
+ return { ok: false, reason: "session_stream_unavailable", error: errMsg };
1631
+ }
1632
+ }
1633
+ /**
1634
+ * M7 DOD-SPINE-7: submit THIS party's SEAL ctrl leaf (0x02) to the relay witness.
1635
+ * Structure: content_hash = SHA-256(0x02 || encodeSealPayload({session_id, final_root,
1636
+ * close_timestamp, "PENDING"})), where final_root is the daemon's OWN tree root. Two
1637
+ * distinct-sender SEAL leaves in the relay's log trigger the relay's #maybeProcessSeal
1638
+ * → directory processSeal (rebuild + verify the signed chain) → FROST notarization →
1639
+ * session_sealed. Requires an active relay client; the caller falls back to the
1640
+ * directory-mediated path when this returns relay_unavailable.
1641
+ */
1642
+ async submitSealLeaf(agentName, sessionId, correlationId) {
1643
+ const sealKey = this.#k(agentName, sessionId);
1644
+ const entry = this.#activeNodes.get(sealKey);
1645
+ if (!entry)
1646
+ return { ok: false, reason: "session_node_unavailable" };
1647
+ if (!entry.relayClient || !entry.relaySessionIdBytes)
1648
+ return { ok: false, reason: "relay_unavailable" };
1649
+ // M7-UPGRADE-002 idempotency: this party submits its responder SEAL leaf AT MOST ONCE per
1650
+ // session. BOTH cello_close_session and the auto-acknowledge path call here; the first to reach
1651
+ // this point wins, the second short-circuits. The check+set is SYNCHRONOUS (before any await) so
1652
+ // two near-simultaneous triggers (e.g. B's own close racing A's delivered SEAL ctrl leaf) cannot
1653
+ // both submit. Cleared below on a relay submit failure so a genuine retry can proceed.
1654
+ if (this.#responderSealSubmitted.has(sealKey)) {
1655
+ return { ok: false, reason: "responder_seal_already_submitted" };
1656
+ }
1657
+ this.#responderSealSubmitted.add(sealKey);
1658
+ const finalRootHex = this.getSessionTreeRootHex(agentName, sessionId);
1659
+ const sealPayload = encodeSealPayload({
1660
+ session_id: entry.relaySessionIdBytes,
1661
+ final_root: new Uint8Array(Buffer.from(finalRootHex, "hex")),
1662
+ close_timestamp: Date.now(),
1663
+ attestation: "PENDING",
1664
+ });
1665
+ // content_hash = SHA-256(0x02 || seal_payload) — the ctrl leaf kind byte is 0x02.
1666
+ const contentHash = new Uint8Array(createHash("sha256").update(new Uint8Array([LEAF_KIND_CTRL])).update(sealPayload).digest());
1667
+ const result = await entry.relayClient.submitLeaf(entry.node, entry.relaySessionIdBytes, contentHash, LEAF_KIND_CTRL);
1668
+ if (!result.ok) {
1669
+ // Clear the idempotency mark so a genuine retry (agent close / reconnect) can proceed (DB-001).
1670
+ this.#responderSealSubmitted.delete(sealKey);
1671
+ this.#logger.warn("session.seal.leaf.submit.failed", { sessionId, reason: result.reason, correlationId });
1672
+ return { ok: false, reason: result.reason };
1673
+ }
1674
+ // SESSION-002: the reported_root for a unilateral seal is the content-hash root the
1675
+ // local tree WOULD have with this SEAL ctrl leaf appended — the same root the directory
1676
+ // rebuilds from the relay's content-hash chain (the relay records the identical
1677
+ // content_hash for this ctrl leaf). Computed without mutating the durable tree /
1678
+ // message_count, so the bilateral + interrupted seal paths are unaffected.
1679
+ const contentHashHex = Buffer.from(contentHash).toString("hex");
1680
+ const reportedRootHex = this.getSessionTree(agentName, sessionId).rootWithAppendedHex(contentHashHex);
1681
+ this.#logger.info("session.seal.leaf.submitted", {
1682
+ sessionId,
1683
+ sequenceNumber: result.sequence_number,
1684
+ correlationId,
1685
+ });
1686
+ // M7-UPGRADE-002: #responderSealSubmitted was set synchronously at the top of this method —
1687
+ // the guard now blocks any second submit (auto-ack OR a redelivered counterparty SEAL ctrl leaf).
1688
+ return { ok: true, sequenceNumber: result.sequence_number, reportedRootHex };
1689
+ }
1690
+ /**
1691
+ * CELLO-M7-UPGRADE-001 (DOD-UP-1): readiness of a session for B to RATIFY a unilateral seal
1692
+ * (the returning absent party). This is the SAME verifiability bar as the UP-2 auto-ack gate:
1693
+ *
1694
+ * - `known`: the session exists locally with its content (B has a transcript to ratify). After a
1695
+ * restart B reloads it from SQLite, and autoRecoverForAgent re-pulls any parked content first.
1696
+ * - `tampered`: the content cross-check flagged a content_hash mismatch (#contentDesynced) — B
1697
+ * must NEVER ratify content it could not integrity-verify (the KERNEL refusal, AC-003).
1698
+ *
1699
+ * The directory separately verifies B's ack signature is genuine; B separately verifies the
1700
+ * unilateral cert signature (R1 is authentic). NOTE: a full "B's frontier covers R1's tail"
1701
+ * completeness check (the `desynced` reason) requires the deferred MSG-001-3b canonical-sequence
1702
+ * reconciliation — same documented limitation as the UP-2 gate above.
1703
+ */
1704
+ getSealUpgradeReadiness(agentName, sessionId) {
1705
+ const record = this.getSessionRecord(agentName, sessionId);
1706
+ return {
1707
+ known: !!record,
1708
+ tampered: this.#contentDesynced.has(this.#k(agentName, sessionId)),
1709
+ };
1710
+ }
1711
+ /**
1712
+ * M7-UPGRADE-002: auto-acknowledge close (POSTMORTEM Workstream E / C-5). When B's daemon
1713
+ * ingests the COUNTERPARTY's SEAL control leaf and B has verified the content, B's OWN node
1714
+ * auto-co-signs + submits its responder SEAL leaf WITHOUT waiting for B's agent to call
1715
+ * cello_close_session — so a bilateral seal completes promptly instead of degrading to
1716
+ * unilateral on a slow/busy/crashed agent.
1717
+ *
1718
+ * SI-001 (non-negotiable): B's signature is ALWAYS produced by B's own node — submitSealLeaf
1719
+ * signs the responder SEAL leaf with B's K_local. We remove the agent PROMPT, never the SIGNER;
1720
+ * nothing here lets the directory or the peer synthesize B's acknowledgement.
1721
+ *
1722
+ * SI-002 (verifiability gate): auto-ack ONLY content B has verified. A session whose content
1723
+ * cross-check failed (content_hash_mismatch = tamper, recorded in #contentDesynced) is NEVER
1724
+ * auto-signed — it surfaces to the agent as a genuine decision point. DISAGREEMENT with the
1725
+ * content is NOT a gate failure (C-6): the gate is "can I verify integrity?", never "do I agree?"
1726
+ * — a verified-but-disliked tail is auto-sealed and the transcript speaks for B.
1727
+ *
1728
+ * Idempotent + non-throwing: marks #responderSealSubmitted BEFORE the async submit so a
1729
+ * redelivered ctrl leaf cannot double-submit; clears the mark on submit failure so a later
1730
+ * agent close / reconnect can still complete the seal (DB-001 — never a silent half-seal).
1731
+ */
1732
+ #maybeAutoAcknowledgeSeal(agentName, sessionId, correlationId) {
1733
+ const ackKey = this.#k(agentName, sessionId);
1734
+ // Idempotency: at most one responder seal per session (auto-ack or agent close).
1735
+ if (this.#responderSealSubmitted.has(ackKey))
1736
+ return;
1737
+ const record = this.getSessionRecord(agentName, sessionId);
1738
+ // Only an ACTIVE session auto-acks. A committed/sealing/sealed/interrupted session is out of
1739
+ // scope (already sealing, or needs the interrupted/upgrade path), not an auto-ack candidate.
1740
+ if (!record || record.status !== "active")
1741
+ return;
1742
+ // SI-002 verifiability gate: never auto-sign a session whose content we could not verify.
1743
+ // Today the ONLY tracked unverifiable cause is a content_hash mismatch = TAMPER (#contentDesynced
1744
+ // is set only there). Genuine tamper is a SECURITY event — log it at ERROR with the distinct
1745
+ // reason `content_tamper` so the AC-008 tamper alarm can fire (it keys on that reason). The other
1746
+ // two specced reasons — `desynced` (B's tree is behind the canonical sealed tail) and
1747
+ // `content_unverifiable` (parked content unrecoverable) — require the MSG-001-3b canonical-
1748
+ // sequence reconciliation that is deferred; they are reserved for that follow-on.
1749
+ if (this.#contentDesynced.has(ackKey)) {
1750
+ this.#logger.error("session.seal.autoack.skipped", {
1751
+ sessionId,
1752
+ reason: "content_tamper",
1753
+ correlationId,
1754
+ });
1755
+ // AC-002: the verifiability gate refused — surface counterparty_closing to B's agent as a
1756
+ // GENUINE decision point (the seal will not auto-complete; B must decide). Uses the existing
1757
+ // session-state push to the live MCP clients; best-effort (never throws out of this gate).
1758
+ try {
1759
+ this.#onSessionStateChanged?.(record.agent_name, sessionId, "counterparty_closing", record.counterparty_pubkey);
1760
+ }
1761
+ catch (err) {
1762
+ this.#logger.debug("session.state.notify.failed", {
1763
+ sessionId,
1764
+ reason: err instanceof Error ? err.message : String(err),
1765
+ });
1766
+ }
1767
+ return;
1768
+ }
1769
+ const entry = this.#activeNodes.get(ackKey);
1770
+ const responderPubkey = entry?.relayClient?.senderPubkeyHex ?? "unknown";
1771
+ // submitSealLeaf owns the #responderSealSubmitted idempotency mark (set synchronously at its
1772
+ // top), so the auto-ack does not pre-mark — it just reacts to the result.
1773
+ void this.submitSealLeaf(agentName, sessionId, correlationId)
1774
+ .then((result) => {
1775
+ if (result.ok) {
1776
+ // SI-001: the responder SEAL leaf was signed by B's OWN node (K_local) in submitSealLeaf.
1777
+ this.#logger.info("session.seal.autoacknowledged", {
1778
+ sessionId,
1779
+ responderPubkey,
1780
+ correlationId,
1781
+ });
1782
+ }
1783
+ else if (result.reason === "responder_seal_already_submitted") {
1784
+ // B's agent close already submitted the responder seal (it won the race) — nothing to do.
1785
+ return;
1786
+ }
1787
+ else {
1788
+ // Submission failed (e.g. relay path down) — the agent close / reconnect can still
1789
+ // complete the seal; never a silent half-seal (DB-001).
1790
+ this.#logger.warn("session.seal.autoack.skipped", {
1791
+ sessionId,
1792
+ reason: result.reason,
1793
+ correlationId,
1794
+ });
1795
+ }
1796
+ })
1797
+ .catch((err) => {
1798
+ this.#logger.warn("session.seal.autoack.skipped", {
1799
+ sessionId,
1800
+ reason: err instanceof Error ? err.message : String(err),
1801
+ correlationId,
1802
+ });
1803
+ });
1804
+ }
1805
+ /**
1806
+ * DAEMON-004: cross-check received content against its hash, append the
1807
+ * verified leaf to the daemon-owned tree, and buffer it for cello_receive.
1808
+ * A hash MISMATCH is genuine tamper — rejected without append or buffer.
1809
+ *
1810
+ * SCOPE / finding #5 — what this cross-check does and does NOT prove today:
1811
+ * `contentHash` here is carried in the SAME content_frame as `content`, so this
1812
+ * comparison only catches wire corruption of a single frame — it does NOT prove
1813
+ * the content matches what the sender independently committed. Full tamper-
1814
+ * evidence (EARS behavior #2) requires cross-checking against the K_local-signed
1815
+ * content_hash leaf the sender submits to the RELAY on a separate channel; that
1816
+ * relay hash-submit path is MSG-001's scope and does not exist yet. Until MSG-001
1817
+ * lands, a malicious sender that sends matching (content, hash) in one frame is
1818
+ * not detected here — only the relay-relayed signed leaf closes that gap.
1819
+ *
1820
+ * @returns the appended leaf index (as sequenceNumber) on success.
1821
+ */
1822
+ ingestReceivedContent(agentName, sessionId, content, contentHash, correlationId) {
1823
+ // The transcript is frozen ONLY once it is COMMITTED + signed — 'sealed' or
1824
+ // 'seal_interrupted_pending' (the bilateral seal commitment) — because a later FROST
1825
+ // notarization attests that exact root; a late leaf would diverge from it.
1826
+ //
1827
+ // MSG-001-3b recovery: a merely 'interrupted' session is NOT yet committed. The
1828
+ // counterparty's last message(s) may have been parked while this party was offline, so its
1829
+ // local transcript is INCOMPLETE (not frozen-final). Recovering that parked content COMPLETES
1830
+ // the local view to match the counterparty BEFORE the bilateral seal — it is not a resumption
1831
+ // (no new activity, no re-accept) and its root was never committed. So allow 'active' AND
1832
+ // 'interrupted'; reject only the two committed states. (No DB row = test-only path, allowed.)
1833
+ const record = this.getSessionRecord(agentName, sessionId);
1834
+ if (record && (record.status === "sealed" || record.status === "seal_interrupted_pending")) {
1835
+ this.#logger.warn("session.content.cross_check.failed", {
1836
+ sessionId,
1837
+ reason: "session_committed",
1838
+ currentStatus: record.status,
1839
+ correlationId,
1840
+ });
1841
+ return { ok: false, reason: "session_committed" };
1842
+ }
1843
+ const computed = createHash("sha256").update(new Uint8Array([0x00])).update(content).digest();
1844
+ const contentHashHex = Buffer.from(contentHash).toString("hex");
1845
+ if (Buffer.from(computed).toString("hex") !== contentHashHex) {
1846
+ this.#logger.warn("session.content.cross_check.failed", {
1847
+ sessionId,
1848
+ reason: "content_hash_mismatch",
1849
+ correlationId,
1850
+ });
1851
+ // M7-UPGRADE-002 (SI-002): a tamper makes this session's content unverifiable — the
1852
+ // auto-acknowledge gate must never auto-co-sign it. The session stays alive (DOD-MSG-7),
1853
+ // but the responder seal now requires the agent's explicit decision, not an auto-ack.
1854
+ this.#contentDesynced.add(this.#k(agentName, sessionId));
1855
+ return { ok: false, reason: "content_hash_mismatch" };
1856
+ }
1857
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
1858
+ const senderPubkey = entry?.counterpartyPubkey
1859
+ ?? this.getSessionRecord(agentName, sessionId)?.counterparty_pubkey
1860
+ ?? "unknown";
1861
+ // DOD-MSG-5: a content_hash satisfies AT MOST ONE Merkle leaf, exactly once. If this hash is
1862
+ // already a leaf in the tree — it arrived BOTH directly and via the relay-park backstop, or it
1863
+ // is a replay — do NOT append a second leaf and do NOT double-count it. The recipient already
1864
+ // holds this message at its assigned sequence. (In the normal single-delivery case this find is
1865
+ // -1, so the live/recover append paths are unchanged.)
1866
+ const existingIdx = this.getSessionTree(agentName, sessionId).indexOfHash(contentHashHex);
1867
+ if (existingIdx >= 0) {
1868
+ this.#logger.info("session.content.deduplicated", {
1869
+ sessionId,
1870
+ contentHashHex,
1871
+ sequenceNumber: existingIdx,
1872
+ correlationId,
1873
+ });
1874
+ // appendedCount 0 — a dedup appends NO new leaf, so a recover that re-pulls an already-ingested
1875
+ // entry (e.g. after auto-recover already drained it) must not count it as a fresh recovery.
1876
+ return { ok: true, leafIndex: existingIdx, sequenceNumber: existingIdx, appendedCount: 0 };
1877
+ }
1878
+ // DOD-MSG-4 (strict in-order gate): the RELAY is the ordering authority. If B holds the
1879
+ // canonical sequence for this hash (witnessed via leaf_deliver) and it is AHEAD of the next
1880
+ // expected leaf, HOLD the content rather than append it out of order. The missing in-between
1881
+ // sequence(s) are recovered from the relay mailbox; #releaseHeld then drains the held entries
1882
+ // in canonical order. This keeps the daemon-owned leaf index === the canonical sequence by
1883
+ // construction, so two parties' roots match even when direct delivery and park-recovery
1884
+ // interleave. With NO witness for this hash (relay-degraded) B falls back to arrival-order
1885
+ // append — the pre-MSG-4 behavior (no ordering signal available).
1886
+ const key = this.#k(agentName, sessionId);
1887
+ const canonicalSeq = this.#witnessedSeq.get(key)?.get(contentHashHex);
1888
+ const nextExpected = this.getSessionTree(agentName, sessionId).size();
1889
+ if (canonicalSeq !== undefined && canonicalSeq > nextExpected) {
1890
+ let held = this.#heldContent.get(key);
1891
+ if (!held) {
1892
+ held = new Map();
1893
+ this.#heldContent.set(key, held);
1894
+ }
1895
+ held.set(canonicalSeq, { content, contentHashHex, correlationId });
1896
+ this.#logger.info("session.content.held", {
1897
+ sessionId,
1898
+ canonicalSeq,
1899
+ nextExpected,
1900
+ gap: canonicalSeq - nextExpected,
1901
+ correlationId,
1902
+ });
1903
+ // Held content is NOT yet a durable leaf, so it is deliberately NOT acknowledged `persisted`
1904
+ // (the caller checks `held`). The sender's TTF→park backstop and the recover/dedup path
1905
+ // guarantee eventual delivery; B never claims persisted for content it only holds in memory.
1906
+ return { ok: true, leafIndex: canonicalSeq, sequenceNumber: canonicalSeq, held: true };
1907
+ }
1908
+ if (canonicalSeq !== undefined && canonicalSeq < nextExpected) {
1909
+ // Contradiction (review finding #2): the witness says this hash belongs BEHIND the current
1910
+ // tree, yet the dedup scan above found no existing leaf for it — so it is neither a duplicate
1911
+ // nor in canonical order. This is only reachable via the accepted content-before-witness /
1912
+ // relay-degraded interleaving (the next sub-increment's pending-witness buffer closes it). Log
1913
+ // it loudly (the leaf-index===sequence invariant is at risk) and append rather than DROP the
1914
+ // message — losing content is worse than a transient mis-order the seal cross-check will catch.
1915
+ this.#logger.warn("session.content.sequence_behind_tree", {
1916
+ sessionId,
1917
+ canonicalSeq,
1918
+ nextExpected,
1919
+ correlationId,
1920
+ });
1921
+ }
1922
+ const { leafIndex } = this.#appendVerifiedContent(agentName, sessionId, content, contentHashHex, senderPubkey, correlationId);
1923
+ // A just-appended leaf may unblock held out-of-order arrivals whose turn is now next.
1924
+ // appendedCount = this leaf + any held leaves released by it, so a caller (recover) can tally the
1925
+ // leaves ACTUALLY written, not just the directly-ingested one (review #3).
1926
+ const released = this.#releaseHeld(agentName, sessionId, senderPubkey);
1927
+ return { ok: true, leafIndex, sequenceNumber: leafIndex, appendedCount: 1 + released };
1928
+ }
1929
+ /**
1930
+ * DOD-MSG-4: record the relay-witnessed canonical sequence for a content hash. The relay is the
1931
+ * ordering authority (Structure 2): it assigns each message a sequence from its hash and delivers
1932
+ * B the (content_hash -> sequence) binding via leaf_deliver. The strict-in-order gate orders the
1933
+ * transcript by THIS — never a sender-stamped field. Also advances the per-session high-water mark
1934
+ * (the largest witnessed sequence) reserved for the future catch-up-before-live increment. Idempotent.
1935
+ */
1936
+ recordWitnessedSequence(agentName, sessionId, contentHashHex, sequenceNumber) {
1937
+ if (sequenceNumber < 0)
1938
+ return;
1939
+ const key = this.#k(agentName, sessionId);
1940
+ let map = this.#witnessedSeq.get(key);
1941
+ if (!map) {
1942
+ map = new Map();
1943
+ this.#witnessedSeq.set(key, map);
1944
+ }
1945
+ map.set(contentHashHex, sequenceNumber);
1946
+ const hw = this.#highWaterSeq.get(key) ?? -1;
1947
+ if (sequenceNumber > hw)
1948
+ this.#highWaterSeq.set(key, sequenceNumber);
1949
+ }
1950
+ /**
1951
+ * DOD-MSG-4: the relay's high-water canonical sequence for this session (largest witnessed leaf),
1952
+ * or -1 if none. Exposed for the next sub-increment (catch-up-before-live — on reconnect B holds
1953
+ * live arrivals until its tree reaches this, because it has more to recover than it has appended);
1954
+ * NOT yet consumed by the gate. Also `recordWitnessedSequence` maintains it.
1955
+ */
1956
+ getHighWaterSeq(agentName, sessionId) {
1957
+ return this.#highWaterSeq.get(this.#k(agentName, sessionId)) ?? -1;
1958
+ }
1959
+ /** DOD-MSG-4 / DAEMON-004: append a verified message leaf and buffer it for cello_receive. */
1960
+ #appendVerifiedContent(agentName, sessionId, content, contentHashHex, senderPubkey, correlationId) {
1961
+ const { leafIndex } = this.appendSessionLeaf(agentName, sessionId, "msg", contentHashHex, correlationId);
1962
+ // DOD-LOG-1: persist the readable RECEIVED plaintext to the durable transcript, keyed by the
1963
+ // canonical leaf sequence so it joins the committed hash chain (survives restart; INV-3 — the
1964
+ // relay/directory never see this plaintext, only the hash).
1965
+ this.recordTranscriptMessage(agentName, sessionId, leafIndex, "received", content, correlationId);
1966
+ const recvKey = this.#k(agentName, sessionId);
1967
+ // Review finding #6: the witness for this hash has done its ordering job once the leaf is
1968
+ // appended — drop it so #witnessedSeq stays proportional to held/pending content, not the whole
1969
+ // transcript. A later replay of the same hash is still caught by the dedup leaf-scan, which is
1970
+ // independent of the witness map.
1971
+ this.#witnessedSeq.get(recvKey)?.delete(contentHashHex);
1972
+ let buf = this.#receivedContent.get(recvKey);
1973
+ if (!buf) {
1974
+ buf = [];
1975
+ this.#receivedContent.set(recvKey, buf);
1976
+ }
1977
+ buf.push({ contentHex: Buffer.from(content).toString("hex"), senderPubkey, sequenceNumber: leafIndex });
1978
+ this.#logger.info("session.content.received", {
1979
+ sessionId,
1980
+ senderPubkey,
1981
+ contentHashHex,
1982
+ sequenceNumber: leafIndex,
1983
+ correlationId,
1984
+ });
1985
+ return { leafIndex };
1986
+ }
1987
+ /**
1988
+ * DOD-MSG-4: drain held out-of-order content in canonical order. After a leaf is appended, any
1989
+ * held entry whose canonical sequence equals the new next-expected index is now in order — append
1990
+ * it, then check again (a single fill can release a run of consecutive held messages).
1991
+ */
1992
+ #releaseHeld(agentName, sessionId, senderPubkey) {
1993
+ const key = this.#k(agentName, sessionId);
1994
+ const held = this.#heldContent.get(key);
1995
+ if (!held)
1996
+ return 0;
1997
+ let released = 0;
1998
+ for (;;) {
1999
+ const nextExpected = this.getSessionTree(agentName, sessionId).size();
2000
+ const entry = held.get(nextExpected);
2001
+ if (!entry)
2002
+ break;
2003
+ held.delete(nextExpected);
2004
+ this.#appendVerifiedContent(agentName, sessionId, entry.content, entry.contentHashHex, senderPubkey, entry.correlationId);
2005
+ released++;
2006
+ this.#logger.info("session.content.released", {
2007
+ sessionId,
2008
+ sequenceNumber: nextExpected,
2009
+ correlationId: entry.correlationId,
2010
+ });
2011
+ if (held.size === 0) {
2012
+ this.#heldContent.delete(key);
2013
+ break;
2014
+ }
2015
+ }
2016
+ return released;
2017
+ }
2018
+ /** DAEMON-004: pop the oldest verified received content for cello_receive. */
2019
+ takeReceivedContent(agentName, sessionId) {
2020
+ const buf = this.#receivedContent.get(this.#k(agentName, sessionId));
2021
+ if (!buf || buf.length === 0)
2022
+ return null;
2023
+ return buf.shift() ?? null;
2024
+ }
2025
+ // ─── CELLO-M7-MSG-001: delivery ACK / TTF tracking (send side) ──────────────
2026
+ /**
2027
+ * Arm awaiting-ACK tracking for a just-sent content frame (AC-001/AC-003). Records
2028
+ * the content + a TTF timer keyed by content hash; a `persisted` ACK on the inbound
2029
+ * content stream resolves it, TTF expiry hands it to the park backstop. The timer is
2030
+ * `unref`'d so an in-flight wait never keeps the daemon process (or a test runner)
2031
+ * alive on its own.
2032
+ */
2033
+ #trackAwaitingAck(agentName, sessionId, content, contentHash, correlationId, structure1Cbor, structure2Cbor) {
2034
+ const hashHex = Buffer.from(contentHash).toString("hex");
2035
+ const ackKey = this.#k(agentName, sessionId);
2036
+ let bySession = this.#awaitingAck.get(ackKey);
2037
+ if (!bySession) {
2038
+ bySession = new Map();
2039
+ this.#awaitingAck.set(ackKey, bySession);
2040
+ }
2041
+ // Replace any prior timer for the same (session, hash) so we never leak a timer.
2042
+ const prior = bySession.get(hashHex);
2043
+ if (prior)
2044
+ clearTimeout(prior.timer);
2045
+ const timer = setTimeout(() => {
2046
+ this.#handleTtfExpiry(agentName, sessionId, hashHex);
2047
+ }, this.#contentTtfMs);
2048
+ if (typeof timer.unref === "function")
2049
+ timer.unref();
2050
+ // DOD-MSG-4 (2b, review #1): retain the relay's ordering record so a TTF-triggered park carries
2051
+ // it too (not only the direct-dial-fail park) — so a TTF-parked entry is self-ordering on recover.
2052
+ bySession.set(hashHex, { timer, content, correlationId, structure1Cbor, structure2Cbor });
2053
+ }
2054
+ /**
2055
+ * Resolve an awaiting-ACK entry on a `persisted` delivery ACK (AC-001/AC-002): cancel
2056
+ * the TTF timer, emit content.delivery.acked, and clear the durable backstop entry.
2057
+ * A `received`-level ACK is NOT handled here — the protocol acts on `persisted` only,
2058
+ * so a received ACK leaves the timer armed.
2059
+ */
2060
+ #resolveAwaitingAck(agentName, sessionId, contentHash) {
2061
+ const hashHex = Buffer.from(contentHash).toString("hex");
2062
+ const ackKey = this.#k(agentName, sessionId);
2063
+ const bySession = this.#awaitingAck.get(ackKey);
2064
+ const entry = bySession?.get(hashHex);
2065
+ if (!entry || !bySession)
2066
+ return; // unknown / already resolved — idempotent
2067
+ clearTimeout(entry.timer);
2068
+ bySession.delete(hashHex);
2069
+ if (bySession.size === 0)
2070
+ this.#awaitingAck.delete(ackKey);
2071
+ this.#logger.info("content.delivery.acked", {
2072
+ sessionId,
2073
+ contentHash: hashHex,
2074
+ level: "persisted",
2075
+ correlationId: entry.correlationId,
2076
+ });
2077
+ // Clear the durable crash-backstop entry so the startup flush does not re-park
2078
+ // already-delivered content.
2079
+ try {
2080
+ this.#onAwaitingPersisted?.(agentName, sessionId, hashHex);
2081
+ }
2082
+ catch (err) {
2083
+ this.#logger.error("content.delivery.ack.backstop.failed", {
2084
+ sessionId, contentHash: hashHex, error: err instanceof Error ? err.message : String(err),
2085
+ });
2086
+ }
2087
+ }
2088
+ /**
2089
+ * TTF timer fired with no `persisted` ACK (AC-003/AC-019): hand the un-acked content
2090
+ * to the park backstop (the durable retry_queue today; the relay store-and-forward
2091
+ * deposit in 3b). The session is never killed and the operator is never interrupted —
2092
+ * parking is best-effort durability.
2093
+ */
2094
+ #handleTtfExpiry(agentName, sessionId, hashHex) {
2095
+ const ackKey = this.#k(agentName, sessionId);
2096
+ const bySession = this.#awaitingAck.get(ackKey);
2097
+ const entry = bySession?.get(hashHex);
2098
+ if (!entry || !bySession)
2099
+ return;
2100
+ bySession.delete(hashHex);
2101
+ if (bySession.size === 0)
2102
+ this.#awaitingAck.delete(ackKey);
2103
+ this.#logger.debug("content.delivery.ttf_expired", { sessionId, contentHash: hashHex });
2104
+ try {
2105
+ this.#onAwaitingTtf?.(agentName, sessionId, hashHex, entry.content);
2106
+ }
2107
+ catch (err) {
2108
+ this.#logger.error("content.park.backstop.failed", {
2109
+ sessionId, contentHash: hashHex, error: err instanceof Error ? err.message : String(err),
2110
+ });
2111
+ }
2112
+ // 2b: delivered to the wire but never confirmed `persisted` — deposit it to the relay
2113
+ // store-and-forward so the recipient recovers it (at the witnessed sequence). The durable
2114
+ // awaiting entry above remains the crash backstop. Carry the retained ordering record (review #1)
2115
+ // so a TTF-parked entry self-orders on recover, exactly like the direct-dial-fail park.
2116
+ this.#parkContent(agentName, sessionId, hashHex, entry.content, entry.structure1Cbor, entry.structure2Cbor);
2117
+ }
2118
+ /**
2119
+ * Send an unsigned `persisted` delivery ACK back to the sender over the same
2120
+ * /cello/content/1.0.0 protocol (AC-001). Best-effort: authentication is the Noise
2121
+ * session channel, so the ACK carries no signature; a failed ACK send is logged and
2122
+ * the sender recovers via its TTF/recovery path rather than a thrown error here.
2123
+ */
2124
+ async #sendDeliveryAck(agentName, sessionId, contentHash, correlationId) {
2125
+ const entry = this.#activeNodes.get(this.#k(agentName, sessionId));
2126
+ if (!entry)
2127
+ return;
2128
+ try {
2129
+ const stream = await entry.node.newStream(entry.counterpartySessionPeerId, CELLO_CONTENT_PROTOCOL_ID);
2130
+ const frame = CBOR_ENC.encode({
2131
+ type: "content_delivery_ack",
2132
+ session_id: sessionId,
2133
+ content_hash: contentHash,
2134
+ level: "persisted",
2135
+ correlation_id: correlationId,
2136
+ });
2137
+ stream.send(lp.encode.single(frame));
2138
+ try {
2139
+ await stream.close();
2140
+ }
2141
+ catch { /* best-effort close */ }
2142
+ }
2143
+ catch (err) {
2144
+ this.#logger.warn("content.delivery.ack.send.failed", {
2145
+ sessionId,
2146
+ contentHash: Buffer.from(contentHash).toString("hex"),
2147
+ error: err instanceof Error ? err.message : String(err),
2148
+ correlationId,
2149
+ });
2150
+ }
2151
+ }
2152
+ /** Cancel and drop a single awaiting-ACK entry (e.g. the send failed after arming). */
2153
+ #untrackAwaitingAck(agentName, sessionId, contentHash) {
2154
+ const hashHex = Buffer.from(contentHash).toString("hex");
2155
+ const ackKey = this.#k(agentName, sessionId);
2156
+ const bySession = this.#awaitingAck.get(ackKey);
2157
+ const entry = bySession?.get(hashHex);
2158
+ if (!entry || !bySession)
2159
+ return;
2160
+ clearTimeout(entry.timer);
2161
+ bySession.delete(hashHex);
2162
+ if (bySession.size === 0)
2163
+ this.#awaitingAck.delete(ackKey);
2164
+ }
2165
+ /** Cancel and drop all awaiting-ACK timers for a session (teardown). */
2166
+ #clearAwaitingForSession(agentName, sessionId) {
2167
+ const ackKey = this.#k(agentName, sessionId);
2168
+ const bySession = this.#awaitingAck.get(ackKey);
2169
+ if (!bySession)
2170
+ return;
2171
+ for (const entry of bySession.values())
2172
+ clearTimeout(entry.timer);
2173
+ this.#awaitingAck.delete(ackKey);
2174
+ }
2175
+ #loadTreeFromDb(agentName, sessionId) {
2176
+ if (!this.#db)
2177
+ return SessionTree.empty();
2178
+ const rows = this.#db
2179
+ .prepare("SELECT leaf_kind, leaf_hash_hex FROM session_tree_leaves WHERE agent_name = ? AND session_id = ? ORDER BY leaf_index ASC")
2180
+ .all(agentName, sessionId);
2181
+ return SessionTree.fromLeaves(rows.map((r) => ({ kind: r.leaf_kind === "ctrl" ? "ctrl" : "msg", hashHex: r.leaf_hash_hex })));
2182
+ }
2183
+ /**
2184
+ * DAEMON-004: register the /cello/content/1.0.0 handler on a session node so
2185
+ * inbound content_frames are decoded, cross-checked, and ingested.
2186
+ */
2187
+ // Awaited by createSessionNode / acceptSession so the /cello/content/1.0.0 handler
2188
+ // is provably registered before the caller returns (and thus before any peer sends
2189
+ // content). libp2p registers the protocol synchronously today, but awaiting removes
2190
+ // the fragile dependency on that internal timing (review L4).
2191
+ async #registerContentHandler(agentName, sessionId, node, _counterpartyPubkey) {
2192
+ try {
2193
+ await node.handle(CELLO_CONTENT_PROTOCOL_ID, (stream) => {
2194
+ void this.#handleContentStream(agentName, sessionId, stream);
2195
+ });
2196
+ }
2197
+ catch (err) {
2198
+ this.#logger.error("session.content.handler.register.failed", {
2199
+ sessionId,
2200
+ error: err instanceof Error ? err.message : String(err),
2201
+ });
2202
+ }
2203
+ }
2204
+ /**
2205
+ * DOD-MSG-4 (self-ordering content frame): verify the relay's signed ordering record carried IN the
2206
+ * content frame and record the canonical sequence for the strict-in-order gate — so ordering does
2207
+ * not depend on the separate leaf_deliver witness arriving first. Best-effort: any failure (malformed,
2208
+ * hash mismatch, bad signature, wrong signer) is logged and ignored — the content still ingests and
2209
+ * orders via the witness stream / arrival, so a bad record cannot block delivery.
2210
+ *
2211
+ * structure1_cbor = [1, content_hash(32), sender_pubkey(32), session_id(16), last_seen_seq, ts] —
2212
+ * the EXACT bytes the sender signed (needed to verify; Structure2 omits session_id/last_seen/ts).
2213
+ * structure2_cbor = [seq, sender_pubkey, content_hash, sender_signature, scan_result, prev_root].
2214
+ */
2215
+ /**
2216
+ * DOD-MSG-4 (2b): encode the pre-seal park envelope `[1, content, structure1_cbor|null,
2217
+ * structure2_cbor|null]`. The daemon seals THIS (not the bare content) so a parked entry carries
2218
+ * its own signed ordering record — recover then orders it the same way the direct frame does. The
2219
+ * relay still only ever holds the sealed ciphertext (INV-3 preserved).
2220
+ */
2221
+ encodeParkEnvelope(content, structure1Cbor, structure2Cbor) {
2222
+ return CBOR_ENC.encode([1, content, structure1Cbor ?? null, structure2Cbor ?? null]);
2223
+ }
2224
+ /**
2225
+ * DOD-MSG-4 (2b): decode a park envelope produced by encodeParkEnvelope. Falls back to treating the
2226
+ * whole plaintext as raw content (no ordering record) for entries sealed the old way (e.g. test
2227
+ * fixtures that seal bare content) — so recover stays backward-compatible.
2228
+ */
2229
+ decodeParkEnvelope(plaintext) {
2230
+ try {
2231
+ const arr = decode(plaintext);
2232
+ // Discriminator (review #2): a 4-element array tagged with version 1 + a byte-string content.
2233
+ // The content-hash cross-check in ingestReceivedContent is the real safety net, but narrowing
2234
+ // to length 4 makes a bare-content false-positive astronomically less likely still.
2235
+ if (Array.isArray(arr) && arr.length === 4 && arr[0] === 1 && arr[1] instanceof Uint8Array) {
2236
+ return {
2237
+ content: arr[1],
2238
+ structure1Cbor: arr[2] instanceof Uint8Array ? arr[2] : undefined,
2239
+ structure2Cbor: arr[3] instanceof Uint8Array ? arr[3] : undefined,
2240
+ };
2241
+ }
2242
+ }
2243
+ catch {
2244
+ /* not an envelope — fall through to raw */
2245
+ }
2246
+ return { content: plaintext };
2247
+ }
2248
+ /**
2249
+ * DOD-MSG-4 (2b): public entry for the recover path to verify + record a parked entry's ordering
2250
+ * record (the recover handler lives in daemon.ts, which has no access to the private method).
2251
+ */
2252
+ recordOrderingRecord(agentName, sessionId, structure1Cbor, structure2Cbor, contentHash, correlationId) {
2253
+ this.#recordFrameOrdering(agentName, sessionId, structure1Cbor, structure2Cbor, contentHash, correlationId, "park");
2254
+ }
2255
+ #recordFrameOrdering(agentName, sessionId, structure1Cbor, structure2Cbor, contentHash, correlationId, source = "content_frame") {
2256
+ try {
2257
+ const s1 = decode(structure1Cbor);
2258
+ const s2 = decode(structure2Cbor);
2259
+ const s1Hash = s1?.[1];
2260
+ const s1Pubkey = s1?.[2];
2261
+ const seq = typeof s2?.[0] === "number" ? s2[0] : -1;
2262
+ const s2Sig = s2?.[3];
2263
+ if (!(s1Hash instanceof Uint8Array) || !(s1Pubkey instanceof Uint8Array) || !(s2Sig instanceof Uint8Array) || seq < 1) {
2264
+ this.#logger.warn("session.content.ordering.malformed", { sessionId, correlationId });
2265
+ return;
2266
+ }
2267
+ // The framed ordering record must bind to THIS content (its hash) — else it orders the wrong bytes.
2268
+ const contentHashHex = Buffer.from(contentHash).toString("hex");
2269
+ if (Buffer.from(s1Hash).toString("hex") !== contentHashHex) {
2270
+ this.#logger.warn("session.content.ordering.hash_mismatch", { sessionId, correlationId });
2271
+ return;
2272
+ }
2273
+ // Verify the SENDER's Ed25519 signature over the exact signed bytes (structure1_cbor) — the same
2274
+ // check the relay performs. Proves the counterparty committed to this (content_hash @ sequence).
2275
+ if (!verify(s1Pubkey, structure1Cbor, s2Sig)) {
2276
+ this.#logger.warn("session.content.ordering.bad_signature", { sessionId, correlationId });
2277
+ return;
2278
+ }
2279
+ // Sovereign-node cross-check: the signer MUST be THIS session's counterparty, not an unrelated
2280
+ // key. FAIL CLOSED (review L) — if the counterparty pubkey is unknown we cannot prove the signer,
2281
+ // so we do NOT trust the framed ordering record (fall back to the witness stream / arrival). The
2282
+ // "B does not trust the counterparty for ordering" invariant is non-negotiable; never fail open.
2283
+ const counterparty = this.getSessionRecord(agentName, sessionId)?.counterparty_pubkey;
2284
+ if (!counterparty || Buffer.from(s1Pubkey).toString("hex") !== counterparty) {
2285
+ this.#logger.warn("session.content.ordering.wrong_signer", {
2286
+ sessionId,
2287
+ reason: counterparty ? "signer_not_counterparty" : "counterparty_unknown",
2288
+ correlationId,
2289
+ });
2290
+ return;
2291
+ }
2292
+ // Verified — record the relay-assigned canonical sequence (1-based → 0-based leaf index) for the gate.
2293
+ this.recordWitnessedSequence(agentName, sessionId, contentHashHex, seq - 1);
2294
+ this.#logger.info("session.content.ordering.recorded", {
2295
+ sessionId,
2296
+ canonicalSeq: seq - 1,
2297
+ source,
2298
+ correlationId,
2299
+ });
2300
+ }
2301
+ catch (err) {
2302
+ this.#logger.warn("session.content.ordering.decode_failed", {
2303
+ sessionId,
2304
+ error: err instanceof Error ? err.message : String(err),
2305
+ correlationId,
2306
+ });
2307
+ }
2308
+ }
2309
+ async #handleContentStream(agentName, sessionId, stream) {
2310
+ const iter = lp.decode(stream)[Symbol.asyncIterator]();
2311
+ try {
2312
+ const result = await iter.next();
2313
+ if (result.done || result.value === undefined)
2314
+ return;
2315
+ const bytes = result.value instanceof Uint8Array ? result.value
2316
+ : Buffer.isBuffer(result.value) ? new Uint8Array(result.value)
2317
+ : result.value.slice();
2318
+ const frame = decode(bytes);
2319
+ const correlationId = typeof frame["correlation_id"] === "string" ? frame["correlation_id"] : undefined;
2320
+ // CELLO-M7-MSG-001 (AC-001/AC-002): a `persisted` delivery ACK arriving on the
2321
+ // same /cello/content/1.0.0 protocol resolves the sender's awaiting-ACK timer.
2322
+ // The protocol acts on `persisted` ONLY — any other level leaves the timer armed.
2323
+ if (frame["type"] === "content_delivery_ack") {
2324
+ const ackHash = frame["content_hash"];
2325
+ const level = frame["level"];
2326
+ if (ackHash instanceof Uint8Array && level === "persisted") {
2327
+ this.#resolveAwaitingAck(agentName, sessionId, ackHash);
2328
+ }
2329
+ return;
2330
+ }
2331
+ if (frame["type"] !== "content_frame")
2332
+ return;
2333
+ const contentBytes = frame["content_bytes"];
2334
+ const contentHash = frame["content_hash"];
2335
+ if (!(contentBytes instanceof Uint8Array) || !(contentHash instanceof Uint8Array))
2336
+ return;
2337
+ // DOD-MSG-4 (self-ordering content frame): if the frame carries the relay's signed ordering
2338
+ // record, verify the sender signature and record the canonical sequence FROM THE FRAME, BEFORE
2339
+ // ingest — so the strict-in-order gate has the position without waiting on the separate
2340
+ // leaf_deliver witness (removes the content-before-witness race). A bad/absent record is
2341
+ // non-fatal: the content still ingests, ordered by the witness stream / arrival as before.
2342
+ const s1Cbor = frame["structure1_cbor"];
2343
+ const s2Cbor = frame["structure2_cbor"];
2344
+ if (s1Cbor instanceof Uint8Array && s2Cbor instanceof Uint8Array) {
2345
+ this.#recordFrameOrdering(agentName, sessionId, s1Cbor, s2Cbor, contentHash, correlationId);
2346
+ }
2347
+ // AC-001: carry the sender's correlationId from the frame into the receive
2348
+ // path so both sides log the same flow id (never re-minted on receipt).
2349
+ const ingest = this.ingestReceivedContent(agentName, sessionId, contentBytes, contentHash, correlationId);
2350
+ // AC-001: after the content is durably ingested AND its hash cross-check
2351
+ // succeeds, emit an unsigned `persisted` delivery ACK back to the sender. A
2352
+ // rejected ingest (tamper / not-active) produces NO ACK, so the sender's TTF
2353
+ // path can park / recover.
2354
+ // DOD-MSG-4: a HELD (out-of-order) frame is NOT yet a durable leaf, so it is NOT
2355
+ // acknowledged `persisted` — the sender's TTF→park backstop then guarantees the
2356
+ // missing-earlier message is fetchable, and dedup absorbs the redundant copy.
2357
+ if (ingest.ok && !ingest.held) {
2358
+ void this.#sendDeliveryAck(agentName, sessionId, contentHash, correlationId);
2359
+ }
2360
+ }
2361
+ catch (err) {
2362
+ this.#logger.warn("session.content.stream.read.failed", {
2363
+ sessionId,
2364
+ error: err instanceof Error ? err.message : String(err),
2365
+ });
2366
+ }
2367
+ }
2368
+ /**
2369
+ * M7-SESSION-001 AC-004/AC-005: Register a relay stream for an active session.
2370
+ * Starts a background reader that watches for session_interrupted frames and
2371
+ * stream close events. Both detection paths call markInterruptedWithDetails().
2372
+ *
2373
+ * The reader runs for the lifetime of the relay stream. If the stream closes
2374
+ * without delivering a session_interrupted frame (AC-005 / 'stream_close' path),
2375
+ * the session is still marked interrupted.
2376
+ *
2377
+ * @param sessionId The hex session ID
2378
+ * @param stream The relay stream to monitor
2379
+ * @param messageCount Number of message leaves at the time of registration
2380
+ * (used as the count at interruption — best effort since exact count at frame
2381
+ * receipt may differ, but this is the value available at stream setup time)
2382
+ */
2383
+ registerRelayStream(agentName, sessionId, stream, messageCount = 0) {
2384
+ void this.#watchRelayStream(agentName, sessionId, stream, messageCount);
2385
+ }
2386
+ /**
2387
+ * Background relay stream watcher.
2388
+ * Pseudocode:
2389
+ * 1. Create LP-framed iterator over the stream
2390
+ * 2. For each frame:
2391
+ * a. If type === 'session_interrupted':
2392
+ * - Record receivedInterruptFrame = true
2393
+ * - Call markInterruptedWithDetails(sessionId, messageCount, 'relay_frame')
2394
+ * - Break (no more frames expected)
2395
+ * 3. On stream close (loop ends normally or with error):
2396
+ * a. If !receivedInterruptFrame:
2397
+ * - Call markInterruptedWithDetails(sessionId, messageCount, 'stream_close')
2398
+ */
2399
+ async #watchRelayStream(agentName, sessionId, stream, messageCount) {
2400
+ let receivedInterruptFrame = false;
2401
+ // CELLO-M7-TRANSPORT-001: cast the stream input to lp.decode. Adding the
2402
+ // @libp2p/autonat service (interface@3.2.2 / uint8arraylist v2) to the
2403
+ // transport package surfaced a benign mixed-version split between the Stream
2404
+ // type (now v2) and it-length-prefixed's expected Uint8ArrayList (v3). The two
2405
+ // are structurally identical at runtime — this is a build-time-only artifact.
2406
+ const lpSource = stream;
2407
+ const source = lp.decode(lpSource)[Symbol.asyncIterator]();
2408
+ try {
2409
+ while (true) {
2410
+ let result;
2411
+ try {
2412
+ result = await source.next();
2413
+ }
2414
+ catch {
2415
+ // Stream error (e.g. stream aborted) — treat as stream close
2416
+ break;
2417
+ }
2418
+ if (result.done || result.value === undefined)
2419
+ break;
2420
+ let frame;
2421
+ try {
2422
+ const bytes = result.value instanceof Uint8Array ? result.value
2423
+ : Buffer.isBuffer(result.value) ? new Uint8Array(result.value)
2424
+ : result.value.slice();
2425
+ frame = decode(bytes);
2426
+ }
2427
+ catch {
2428
+ continue;
2429
+ }
2430
+ if (frame["type"] === "session_interrupted") {
2431
+ // H-3 SECURITY: this stream is registered (bound) to a specific
2432
+ // sessionId. A malicious or buggy relay could put a DIFFERENT session_id
2433
+ // in the frame body to target a session this stream is not authorized
2434
+ // for (cross-session targeting). Never trust the frame's id: if the frame
2435
+ // names a different session, reject it and keep watching the bound one.
2436
+ const frameSessionId = typeof frame["session_id"] === "string"
2437
+ ? frame["session_id"]
2438
+ : (frame["session_id"] instanceof Uint8Array
2439
+ ? Buffer.from(frame["session_id"]).toString("hex")
2440
+ : null);
2441
+ if (frameSessionId !== null && frameSessionId !== sessionId) {
2442
+ this.#logger.warn("session.interrupt.frame.session_mismatch", {
2443
+ boundSessionId: sessionId,
2444
+ frameSessionId,
2445
+ reason: "cross_session_frame_rejected",
2446
+ });
2447
+ continue; // ignore the hostile/mismatched frame; keep reading
2448
+ }
2449
+ receivedInterruptFrame = true;
2450
+ // Always mark the BOUND sessionId — never the id carried in the frame.
2451
+ await this.markInterruptedWithDetails(agentName, sessionId, messageCount, "relay_frame");
2452
+ break; // No more relay frames expected after session_interrupted
2453
+ }
2454
+ }
2455
+ }
2456
+ catch {
2457
+ // Stream read loop ended — fall through to stream_close check
2458
+ }
2459
+ // AC-005: stream closed without a session_interrupted frame
2460
+ if (!receivedInterruptFrame) {
2461
+ // Only mark interrupted if this session is still active in SQLite
2462
+ const record = this.getSessionRecord(agentName, sessionId);
2463
+ if (record && record.status === "active") {
2464
+ await this.markInterruptedWithDetails(agentName, sessionId, messageCount, "stream_close");
2465
+ }
2466
+ }
2467
+ }
2468
+ // ─── Private helpers ──────────────────────────────────────────────────────
2469
+ /**
2470
+ * DOD-LOOP-1: ensure the given agent has a standing receiver node (idempotent). Created when an
2471
+ * agent comes online (cello_start_agent) and replaced after it is handed off to a session. The
2472
+ * `#standingReceiverCreating` guard prevents two concurrent ensure() calls (e.g. the
2473
+ * cello_start_agent hook racing a consume-site retry) from building two nodes for one agent. A
2474
+ * create failure logs + leaves no entry; the next consume-site ensure() call retries on demand.
2475
+ */
2476
+ async #ensureStandingReceiver(agentName, correlationId = randomUUID()) {
2477
+ if (this.#standingReceivers.has(agentName) || this.#standingReceiverCreating.has(agentName))
2478
+ return;
2479
+ if (this.#shuttingDown)
2480
+ return;
2481
+ // A fresh ensure request supersedes any pending removal (agent toggled offline→online).
2482
+ this.#standingReceiverRemoving.delete(agentName);
2483
+ this.#standingReceiverCreating.add(agentName);
2484
+ try {
2485
+ const sessionId = `standing_receiver_${randomUUID()}`;
2486
+ const gater = new SessionConnectionGater({
2487
+ sessionId,
2488
+ allowedPeerId: null, // open — counterparty unknown at creation time
2489
+ logger: this.#logger,
2490
+ });
2491
+ let node;
2492
+ try {
2493
+ node = await this.#factory.createNode({ sessionId, connectionGater: gater, nodeType: "standing_receiver" });
2494
+ await node.start();
2495
+ }
2496
+ catch (err) {
2497
+ this.#logger.error("session.node.create.failed", {
2498
+ sessionId,
2499
+ agentName: `${STANDING_RECEIVER_AGENT_NAME}:${agentName}`,
2500
+ error: err instanceof Error ? err.message : String(err),
2501
+ correlationId,
2502
+ });
2503
+ return; // not ready — callers check getStandingReceiverReady / retry via ensure on demand
2504
+ }
2505
+ // M2: gracefulShutdown may have begun while this node was starting (ensure runs un-awaited).
2506
+ // Don't install an orphan bound to a TCP port — stop it and bail.
2507
+ if (this.#shuttingDown) {
2508
+ try {
2509
+ await node.stop();
2510
+ }
2511
+ catch { /* best-effort */ }
2512
+ return;
2513
+ }
2514
+ // L1: the agent may have gone offline (cello_stop_agent → removeStandingReceiverForAgent)
2515
+ // while this ensure was parked on start(). Removal found no map entry to delete, so the
2516
+ // tombstone is how we learn of it — tear the fresh node down rather than install an SR for
2517
+ // an offline agent.
2518
+ if (this.#standingReceiverRemoving.has(agentName)) {
2519
+ this.#standingReceiverRemoving.delete(agentName);
2520
+ try {
2521
+ await node.stop();
2522
+ }
2523
+ catch { /* best-effort */ }
2524
+ return;
2525
+ }
2526
+ // CELLO-M7-TRANSPORT-001: wrap in a NodeAutoNatService so its dialability drives session-
2527
+ // address advertisement and the transport.autonat.* events fire.
2528
+ const autoNat = new NodeAutoNatService({
2529
+ node,
2530
+ logger: this.#logger,
2531
+ nodeType: "standing_receiver",
2532
+ probers: this.#autoNatProbers(),
2533
+ });
2534
+ autoNat.emitInitialResult();
2535
+ this.#standingReceivers.set(agentName, { node, gater, autoNat });
2536
+ this.#logger.info("session.node.created", {
2537
+ sessionId,
2538
+ agentName: `${STANDING_RECEIVER_AGENT_NAME}:${agentName}`,
2539
+ sessionPeerId: node.getPeerId(),
2540
+ correlationId,
2541
+ });
2542
+ }
2543
+ finally {
2544
+ this.#standingReceiverCreating.delete(agentName);
2545
+ }
2546
+ }
2547
+ /**
2548
+ * DOD-LOOP-1: public hook for the composition root to create an agent's standing receiver when
2549
+ * the agent comes online (cello_start_agent), and to tear it down when it goes offline.
2550
+ */
2551
+ async ensureStandingReceiverForAgent(agentName) {
2552
+ await this.#ensureStandingReceiver(agentName);
2553
+ }
2554
+ async removeStandingReceiverForAgent(agentName) {
2555
+ const sr = this.#standingReceivers.get(agentName);
2556
+ if (!sr) {
2557
+ // L1: an #ensureStandingReceiver for this agent may be in flight (parked on start(), so no
2558
+ // map entry yet). Leave a tombstone — that ensure tears its fresh node down on completion
2559
+ // instead of installing an SR for an agent that is now offline. Also drop any stale creating
2560
+ // marker so a later start can re-ensure.
2561
+ if (this.#standingReceiverCreating.has(agentName))
2562
+ this.#standingReceiverRemoving.add(agentName);
2563
+ return;
2564
+ }
2565
+ this.#standingReceivers.delete(agentName);
2566
+ sr.autoNat.stop();
2567
+ try {
2568
+ await sr.node.stop();
2569
+ }
2570
+ catch { /* best-effort */ }
2571
+ }
2572
+ #insertSessionRow(sessionId, agentName, counterpartyPubkey, status) {
2573
+ if (!this.#db)
2574
+ return false;
2575
+ const now = Date.now();
2576
+ try {
2577
+ this.#db
2578
+ .prepare(`INSERT INTO sessions
2579
+ (session_id, agent_name, counterparty_pubkey, status, created_at, updated_at)
2580
+ VALUES (?, ?, ?, ?, ?, ?)`)
2581
+ .run(sessionId, agentName, counterpartyPubkey, status, now, now);
2582
+ return true;
2583
+ }
2584
+ catch (err) {
2585
+ this.#logger.error("session.interrupt.db.write.failed", {
2586
+ sessionId,
2587
+ error: err instanceof Error ? err.message : String(err),
2588
+ });
2589
+ return false;
2590
+ }
2591
+ }
2592
+ #updateSessionStatus(agentName, sessionId, status) {
2593
+ if (!this.#db)
2594
+ return;
2595
+ const now = Date.now();
2596
+ try {
2597
+ this.#db
2598
+ .prepare("UPDATE sessions SET status = ?, updated_at = ? WHERE agent_name = ? AND session_id = ?")
2599
+ .run(status, now, agentName, sessionId);
2600
+ }
2601
+ catch (err) {
2602
+ this.#logger.error("session.interrupt.db.write.failed", {
2603
+ sessionId,
2604
+ error: err instanceof Error ? err.message : String(err),
2605
+ });
2606
+ }
2607
+ }
2608
+ }
2609
+ //# sourceMappingURL=session-node-manager.js.map