@brainpilot/runtime 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +61 -0
  2. package/dist/agent-error.d.ts +51 -0
  3. package/dist/agent-error.d.ts.map +1 -0
  4. package/dist/agent-error.js +163 -0
  5. package/dist/agent-error.js.map +1 -0
  6. package/dist/agent-factory.d.ts.map +1 -1
  7. package/dist/agent-factory.js +45 -10
  8. package/dist/agent-factory.js.map +1 -1
  9. package/dist/events.d.ts +18 -0
  10. package/dist/events.d.ts.map +1 -1
  11. package/dist/events.js +24 -0
  12. package/dist/events.js.map +1 -1
  13. package/dist/extensions/agent-status.d.ts +91 -0
  14. package/dist/extensions/agent-status.d.ts.map +1 -0
  15. package/dist/extensions/agent-status.js +103 -0
  16. package/dist/extensions/agent-status.js.map +1 -0
  17. package/dist/extensions/trace-reminder.d.ts +94 -0
  18. package/dist/extensions/trace-reminder.d.ts.map +1 -0
  19. package/dist/extensions/trace-reminder.js +153 -0
  20. package/dist/extensions/trace-reminder.js.map +1 -0
  21. package/dist/index.d.ts +2 -0
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +1 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/mailbox.d.ts +37 -1
  26. package/dist/mailbox.d.ts.map +1 -1
  27. package/dist/mailbox.js +79 -2
  28. package/dist/mailbox.js.map +1 -1
  29. package/dist/mas-agent.d.ts +74 -12
  30. package/dist/mas-agent.d.ts.map +1 -1
  31. package/dist/mas-agent.js +158 -33
  32. package/dist/mas-agent.js.map +1 -1
  33. package/dist/materialize-skills.d.ts +40 -0
  34. package/dist/materialize-skills.d.ts.map +1 -0
  35. package/dist/materialize-skills.js +141 -0
  36. package/dist/materialize-skills.js.map +1 -0
  37. package/dist/mcp-bridge.d.ts +15 -2
  38. package/dist/mcp-bridge.d.ts.map +1 -1
  39. package/dist/mcp-bridge.js +53 -10
  40. package/dist/mcp-bridge.js.map +1 -1
  41. package/dist/mem-watchdog.d.ts +63 -0
  42. package/dist/mem-watchdog.d.ts.map +1 -0
  43. package/dist/mem-watchdog.js +81 -0
  44. package/dist/mem-watchdog.js.map +1 -0
  45. package/dist/mock-agent.d.ts.map +1 -1
  46. package/dist/mock-agent.js +13 -1
  47. package/dist/mock-agent.js.map +1 -1
  48. package/dist/personas.d.ts +16 -0
  49. package/dist/personas.d.ts.map +1 -1
  50. package/dist/personas.js +651 -8
  51. package/dist/personas.js.map +1 -1
  52. package/dist/pi-provider.d.ts +32 -1
  53. package/dist/pi-provider.d.ts.map +1 -1
  54. package/dist/pi-provider.js +70 -0
  55. package/dist/pi-provider.js.map +1 -1
  56. package/dist/provider-config.d.ts +23 -0
  57. package/dist/provider-config.d.ts.map +1 -0
  58. package/dist/provider-config.js +49 -0
  59. package/dist/provider-config.js.map +1 -0
  60. package/dist/server.d.ts +2 -2
  61. package/dist/server.d.ts.map +1 -1
  62. package/dist/server.js +146 -8
  63. package/dist/server.js.map +1 -1
  64. package/dist/session-manager.d.ts +367 -8
  65. package/dist/session-manager.d.ts.map +1 -1
  66. package/dist/session-manager.js +1082 -39
  67. package/dist/session-manager.js.map +1 -1
  68. package/dist/tools/skill-search.d.ts +53 -0
  69. package/dist/tools/skill-search.d.ts.map +1 -0
  70. package/dist/tools/skill-search.js +269 -0
  71. package/dist/tools/skill-search.js.map +1 -0
  72. package/dist/tools/system-tools.d.ts +22 -1
  73. package/dist/tools/system-tools.d.ts.map +1 -1
  74. package/dist/tools/system-tools.js +149 -21
  75. package/dist/tools/system-tools.js.map +1 -1
  76. package/dist/trace.d.ts +27 -1
  77. package/dist/trace.d.ts.map +1 -1
  78. package/dist/trace.js +60 -3
  79. package/dist/trace.js.map +1 -1
  80. package/dist/types.d.ts +61 -5
  81. package/dist/types.d.ts.map +1 -1
  82. package/package.json +6 -2
@@ -9,18 +9,32 @@
9
9
  * Persistence (§5): config/history/state live under `<dataRoot>/.bp/{sid}/`,
10
10
  * work files under `<dataRoot>/workspaces/{sid}/`.
11
11
  */
12
- import { mkdir, readFile, writeFile, readdir, rm } from "node:fs/promises";
13
- import { join } from "node:path";
12
+ import { mkdir, readFile, writeFile, readdir, rm, stat, rename } from "node:fs/promises";
13
+ import { join, resolve, sep, dirname } from "node:path";
14
14
  import { randomUUID } from "node:crypto";
15
+ import { CUSTOM_EVENT, } from "@brainpilot/protocol";
15
16
  import { EventBus } from "./event-bus.js";
16
17
  import { Mailbox } from "./mailbox.js";
17
18
  import { GraphOfTrace } from "./trace.js";
18
- import { MasAgent } from "./mas-agent.js";
19
+ import { MasAgent, addUsage, emptyTokenUsage } from "./mas-agent.js";
19
20
  import { systemToolsForRole, builtinToolNamesForRole } from "./tools/system-tools.js";
20
21
  import { ev } from "./events.js";
21
22
  import { selectFactory, isMockMode } from "./agent-factory.js";
22
- import { personaFor } from "./personas.js";
23
+ import { personaFor, withLanguageDirective } from "./personas.js";
24
+ import { renderAgentStatusBlock, collectAgentStatusLines } from "./extensions/agent-status.js";
23
25
  import { McpBridge, loadMcpServersConfig } from "./mcp-bridge.js";
26
+ import { materializeSkills } from "./materialize-skills.js";
27
+ import { resolveSessionProvider } from "./provider-config.js";
28
+ import { MemWatchdog, parseMemLimitMb } from "./mem-watchdog.js";
29
+ function makeDeferred() {
30
+ let resolve;
31
+ let reject;
32
+ const promise = new Promise((res, rej) => {
33
+ resolve = res;
34
+ reject = rej;
35
+ });
36
+ return { promise, resolve, reject };
37
+ }
24
38
  /** Roles inferred from agent name. */
25
39
  function roleFor(name) {
26
40
  if (name === "principal")
@@ -29,22 +43,126 @@ function roleFor(name) {
29
43
  return "trace";
30
44
  return "expert";
31
45
  }
46
+ /**
47
+ * Conservative token estimation from character count (issue #80).
48
+ * English text averages ~4 chars/token; CJK text ~1-2 chars/token.
49
+ * 3.5 gives a safety margin — we'd rather truncate slightly early than
50
+ * overflow the provider's context window. Exported for tests.
51
+ */
52
+ export function estimateTokens(text) {
53
+ return Math.ceil(text.length / 3.5);
54
+ }
55
+ /** Sum a per-agent token usage breakdown into a single session total. */
56
+ function sumAgentUsage(byAgent) {
57
+ const total = emptyTokenUsage();
58
+ for (const u of Object.values(byAgent)) {
59
+ total.input += u.input;
60
+ total.output += u.output;
61
+ total.cacheRead += u.cacheRead;
62
+ total.cacheWrite += u.cacheWrite;
63
+ total.total += u.total;
64
+ }
65
+ return total;
66
+ }
67
+ /** Filesystem-safe form of a tool name (for saving truncated results). */
68
+ function sanitiseFilename(name) {
69
+ return name.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 64);
70
+ }
71
+ /** Human-readable byte size (e.g. "1.2MB"). */
72
+ function formatBytes(n) {
73
+ if (n < 1024)
74
+ return `${n}B`;
75
+ if (n < 1024 * 1024)
76
+ return `${(n / 1024).toFixed(1)}KB`;
77
+ return `${(n / (1024 * 1024)).toFixed(1)}MB`;
78
+ }
32
79
  export class SessionManager {
33
80
  sessions = new Map();
34
81
  dataRoot;
35
82
  agentFactory;
36
83
  persist;
37
84
  lastActivityAt = 0;
85
+ // #76: active mailbox delivery. A delivery loop drains a target agent's inbox
86
+ // and runs it; the key (`${sid}:${name}`) guards re-entrancy so concurrent
87
+ // wakes for one agent collapse into a single serial loop (its `prompt` is
88
+ // never invoked concurrently).
89
+ deliveryLoops = new Set();
38
90
  // External MCP tools (§9 decision 2): loaded once, lazily, shared by all
39
91
  // non-trace agents. Null until first agent is created.
40
92
  mcpBridge;
41
93
  mcpTools = [];
42
94
  mcpLoaded = false;
95
+ // Built-in skills directory, loaded through Pi's native skill pipeline
96
+ // (`additionalSkillPaths`). The bundled @brainpilot/skills content is
97
+ // materialized here once (lazily) on first agent creation.
98
+ skillsDir;
99
+ // Router skills directory backing the `skill_search` Pi-native tool — the
100
+ // long-tail catalog NOT in `<available_skills>`. Materialized alongside
101
+ // `skillsDir` (each top-level category lands on the side determined by
102
+ // `materializeSkills`).
103
+ routerSkillsDir;
104
+ skillsMaterialized = false;
105
+ // Opt-in memory watchdog (§R-4 / issue #20). Null when no budget is set.
106
+ memWatchdog;
107
+ // Tool result truncation (issue #80). 0 = disabled.
108
+ maxToolResultTokens;
43
109
  constructor(opts = {}) {
44
110
  this.dataRoot = opts.dataRoot ?? process.env.BP_DATA_DIR ?? join(process.cwd(), ".bp-data");
45
111
  this.agentFactory = opts.agentFactory ?? selectFactory();
46
112
  this.persist = opts.persist ?? true;
47
113
  this.mcpBridge = opts.mcpBridge ?? null;
114
+ this.maxToolResultTokens =
115
+ opts.maxToolResultTokens ??
116
+ (() => {
117
+ const env = process.env.BP_MAX_TOOL_RESULT_TOKENS?.trim();
118
+ if (env !== undefined && env !== "") {
119
+ const n = Number(env);
120
+ if (Number.isInteger(n) && n >= 0)
121
+ return n;
122
+ }
123
+ return 64000;
124
+ })();
125
+ // Skills are loaded by Pi from this dir (default <dataRoot>/bp_template/skills).
126
+ this.skillsDir = opts.skillsDir ?? join(this.dataRoot, "bp_template", "skills");
127
+ // The router skill library is a parallel directory with the same on-disk
128
+ // format; `skill_search` reads from here, Pi never sees it.
129
+ this.routerSkillsDir =
130
+ opts.routerSkillsDir ?? join(this.dataRoot, "bp_template", "skills-router");
131
+ const limitBytes = opts.memLimitBytes ?? parseMemLimitMb(process.env);
132
+ this.memWatchdog =
133
+ limitBytes != null
134
+ ? new MemWatchdog({
135
+ limitBytes,
136
+ readRss: opts.readRss,
137
+ onThrottle: (snap) => this.onMemoryThrottle(snap),
138
+ })
139
+ : null;
140
+ this.memWatchdog?.start();
141
+ }
142
+ /**
143
+ * Materialize the bundled @brainpilot/skills content into `this.skillsDir`
144
+ * (skip-if-exists) so Pi's native skill pipeline can load it. Idempotent —
145
+ * runs at most once per manager. Called at server startup (so skills exist and
146
+ * are user-visible before any agent runs, incl. Docker pure-compose where no
147
+ * CLI scaffold ran) AND lazily before the first non-trace agent. Best-effort:
148
+ * skills are a convenience, not a hard dependency, so failures are swallowed.
149
+ */
150
+ async ensureSkillsMaterialized() {
151
+ if (this.skillsMaterialized)
152
+ return;
153
+ this.skillsMaterialized = true;
154
+ try {
155
+ const res = await materializeSkills(this.dataRoot);
156
+ // eslint-disable-next-line no-console
157
+ console.info(`[skills] always-on: ${res.copied} copied → ${res.dest}` +
158
+ (res.skipped ? ` (${res.skipped} preserved)` : "") +
159
+ `; router: ${res.routerCopied} copied → ${res.routerDest}` +
160
+ (res.routerSkipped ? ` (${res.routerSkipped} preserved)` : ""));
161
+ }
162
+ catch (err) {
163
+ // eslint-disable-next-line no-console
164
+ console.error(`[skills] failed to materialize built-in skills: ${err.message}`);
165
+ }
48
166
  }
49
167
  /**
50
168
  * Load external MCP tools once. No-op in mock mode (BP_MOCK=1) and when no
@@ -76,21 +194,209 @@ export class SessionManager {
76
194
  workspaceDir(sid) {
77
195
  return join(this.dataRoot, "workspaces", sid);
78
196
  }
197
+ /**
198
+ * #60: composer uploads in single-user mode are POSTed against the literal
199
+ * sandbox id `"local"` (the web `LOCAL_SANDBOX.id`), because a file can be
200
+ * attached in the draft composer *before* the real session exists. They land
201
+ * in `workspaces/local/` — but the agent's cwd is `workspaces/<sessionId>/`,
202
+ * so without this it can't read the file the user just attached. We treat
203
+ * `workspaces/local/` as a staging area and drain it into the real session
204
+ * workspace right before the agent runs (see drainLocalUploads).
205
+ */
206
+ static UPLOAD_STAGING_SID = "local";
207
+ /**
208
+ * #97: max CONSECUTIVE failed delivery runs for one expert before the failure
209
+ * is escalated to the principal instead of self-retried. Matches the legacy
210
+ * circuit-breaker threshold (3). Only `retryable` errors consume retries;
211
+ * a `fatal` error escalates on the first failure regardless of this cap.
212
+ */
213
+ static MAX_DELIVERY_RETRIES = 3;
79
214
  historyPath(sid, agent) {
80
215
  return join(this.bpDir(sid), "history", `${agent}.jsonl`);
81
216
  }
82
- /** Skills shared by every session (user-editable `bp_template/skills/`). */
83
- templateSkillsDir() {
84
- return join(this.dataRoot, "bp_template", "skills");
85
- }
86
- /** This session's own skill dir (`.bp/<sid>/skills/`), overrides/augments the template. */
87
- sessionSkillsDir(sid) {
88
- return join(this.bpDir(sid), "skills");
89
- }
90
217
  /** User-editable persona override for an agent (`bp_template/agents/<name>/prompt.md`). */
91
218
  agentPromptPath(name) {
92
219
  return join(this.dataRoot, "bp_template", "agents", name, "prompt.md");
93
220
  }
221
+ /* ----------------------------- workspace files ----------------------------- */
222
+ /**
223
+ * Resolve a workspace-relative path to an absolute one, refusing anything that
224
+ * escapes the session's `workspaces/<sid>/` root (path traversal guard). This
225
+ * is the single enforcement point for all file routes.
226
+ *
227
+ * The SPA addresses files with a `/workspace`-rooted convention
228
+ * (`/workspace`, `/workspace/sub/file.txt`); we normalize that to a path
229
+ * relative to the on-disk workspace root before resolving.
230
+ */
231
+ resolveWorkspacePath(sid, rawPath) {
232
+ const root = this.workspaceDir(sid);
233
+ let rel = rawPath ?? "";
234
+ if (rel === "/workspace")
235
+ rel = "";
236
+ else if (rel.startsWith("/workspace/"))
237
+ rel = rel.slice("/workspace/".length);
238
+ rel = rel.replace(/^\/+/, ""); // never let a leading slash make it absolute
239
+ const abs = resolve(root, rel);
240
+ if (abs !== root && !abs.startsWith(root + sep)) {
241
+ throw new Error(`path escapes workspace: ${rawPath}`);
242
+ }
243
+ return abs;
244
+ }
245
+ /** List one directory level under the session workspace (default: root). */
246
+ async listSessionFiles(sid, rel = "") {
247
+ const dir = this.resolveWorkspacePath(sid, rel);
248
+ let dirents;
249
+ try {
250
+ dirents = await readdir(dir, { withFileTypes: true });
251
+ }
252
+ catch {
253
+ return []; // missing workspace → empty (new session, nothing written yet)
254
+ }
255
+ const entries = await Promise.all(dirents.map(async (d) => {
256
+ const type = d.isDirectory()
257
+ ? "folder"
258
+ : d.isSymbolicLink()
259
+ ? "symlink"
260
+ : "file";
261
+ let size = 0;
262
+ let modified = 0;
263
+ let permissions = "";
264
+ try {
265
+ const st = await stat(join(dir, d.name));
266
+ size = st.size;
267
+ modified = Math.floor(st.mtimeMs / 1000);
268
+ permissions = (st.mode & 0o777).toString(8);
269
+ }
270
+ catch {
271
+ /* broken symlink / race — report zeros */
272
+ }
273
+ return { name: d.name, type, size, modified, permissions };
274
+ }));
275
+ return entries;
276
+ }
277
+ /** Read a workspace text file as UTF-8. */
278
+ async readSessionFile(sid, rel) {
279
+ const abs = this.resolveWorkspacePath(sid, rel);
280
+ const content = await readFile(abs, "utf8");
281
+ return { path: rel, content, size: Buffer.byteLength(content) };
282
+ }
283
+ /** Read a workspace file's raw bytes (images/PDF/download). */
284
+ async readSessionFileRaw(sid, rel) {
285
+ const abs = this.resolveWorkspacePath(sid, rel);
286
+ return readFile(abs);
287
+ }
288
+ /** Delete a workspace file. Returns false if it was already gone. */
289
+ async deleteSessionFile(sid, rel) {
290
+ const abs = this.resolveWorkspacePath(sid, rel);
291
+ try {
292
+ await rm(abs, { recursive: true });
293
+ return true;
294
+ }
295
+ catch {
296
+ return false;
297
+ }
298
+ }
299
+ /**
300
+ * #47: write an uploaded file into the session workspace. Content arrives
301
+ * base64-encoded (binary-safe over the JSON byte chain). The same
302
+ * `resolveWorkspacePath` guard prevents path traversal; parent dirs are
303
+ * created so an upload like `docs/foo.pdf` works. The file lands in the
304
+ * agent's cwd, so it can `read` it by its workspace-relative path.
305
+ * `maxBytes` (default 20 MiB) bounds the decoded size.
306
+ */
307
+ async writeSessionFile(sid, rel, contentBase64, maxBytes = 20 * 1024 * 1024) {
308
+ const buf = Buffer.from(contentBase64, "base64");
309
+ if (buf.byteLength > maxBytes) {
310
+ throw new Error(`file too large: ${buf.byteLength} bytes exceeds limit of ${maxBytes}`);
311
+ }
312
+ const abs = this.resolveWorkspacePath(sid, rel);
313
+ await mkdir(dirname(abs), { recursive: true });
314
+ await writeFile(abs, buf);
315
+ // Return the workspace-relative path (strip the absolute root prefix).
316
+ const root = this.workspaceDir(sid);
317
+ const relOut = abs === root ? "" : abs.slice(root.length + 1);
318
+ return { path: relOut, size: buf.byteLength };
319
+ }
320
+ /**
321
+ * #60: drain the composer upload staging area (`workspaces/local/`) into a
322
+ * real session's workspace so the agent — whose cwd is `workspaces/<sid>/` —
323
+ * can read files the user attached in the draft composer (when no real
324
+ * session id existed yet, the web uploads against the literal `"local"`
325
+ * sandbox id). Called right before the agent runs.
326
+ *
327
+ * Move semantics: each staged entry is renamed into the session workspace
328
+ * (an existing same-named entry in the session is left untouched and the
329
+ * staged copy is discarded), then the staging area is emptied so files never
330
+ * leak into the next session. No-op when the target IS the staging sid, or
331
+ * when the staging dir is missing/empty. Best-effort: never throws — a copy
332
+ * failure must not block the user's prompt.
333
+ */
334
+ async drainLocalUploads(sessionId) {
335
+ if (sessionId === SessionManager.UPLOAD_STAGING_SID)
336
+ return;
337
+ const stagingDir = this.workspaceDir(SessionManager.UPLOAD_STAGING_SID);
338
+ let names;
339
+ try {
340
+ names = await readdir(stagingDir);
341
+ }
342
+ catch {
343
+ return; // no staging dir → nothing was uploaded in the draft
344
+ }
345
+ if (names.length === 0)
346
+ return;
347
+ const destDir = this.workspaceDir(sessionId);
348
+ try {
349
+ await mkdir(destDir, { recursive: true });
350
+ }
351
+ catch {
352
+ /* best-effort */
353
+ }
354
+ for (const name of names) {
355
+ const from = join(stagingDir, name);
356
+ const to = join(destDir, name);
357
+ try {
358
+ // Don't clobber an existing session file; just drop the staged copy.
359
+ let exists = false;
360
+ try {
361
+ await stat(to);
362
+ exists = true;
363
+ }
364
+ catch {
365
+ /* target absent → safe to move */
366
+ }
367
+ if (exists) {
368
+ await rm(from, { recursive: true, force: true });
369
+ continue;
370
+ }
371
+ await rename(from, to);
372
+ }
373
+ catch {
374
+ // rename failed (e.g. cross-device, or `from` is a directory on some
375
+ // platforms): fall back to a content copy so the file still reaches the
376
+ // session, then remove the staged copy. Best-effort, never throws.
377
+ try {
378
+ await this.copyEntry(from, to);
379
+ await rm(from, { recursive: true, force: true });
380
+ }
381
+ catch {
382
+ /* give up on this entry */
383
+ }
384
+ }
385
+ }
386
+ }
387
+ /** Recursively copy a file or directory tree (drainLocalUploads fallback). */
388
+ async copyEntry(from, to) {
389
+ const st = await stat(from);
390
+ if (st.isDirectory()) {
391
+ await mkdir(to, { recursive: true });
392
+ for (const child of await readdir(from)) {
393
+ await this.copyEntry(join(from, child), join(to, child));
394
+ }
395
+ return;
396
+ }
397
+ await mkdir(dirname(to), { recursive: true });
398
+ await writeFile(to, await readFile(from));
399
+ }
94
400
  /**
95
401
  * Resolve an agent's system persona. Prefers the user-editable on-disk
96
402
  * `bp_template/agents/<name>/prompt.md` (so personas can be tuned without a
@@ -98,49 +404,93 @@ export class SessionManager {
98
404
  * file is present or it's empty.
99
405
  */
100
406
  async loadPersona(name, role) {
407
+ let base;
101
408
  try {
102
409
  const raw = (await readFile(this.agentPromptPath(name), "utf8")).trim();
103
410
  if (raw)
104
- return raw;
411
+ base = raw;
105
412
  }
106
413
  catch {
107
414
  // No on-disk override — fall through to the built-in persona.
108
415
  }
109
- return personaFor(name, role);
416
+ // #97: append the language-following directive here (not in the persona text
417
+ // / on-disk prompt.md) so it also reaches users who scaffolded earlier, and
418
+ // applies whether the persona came from disk or the built-in constant.
419
+ return withLanguageDirective(base ?? personaFor(name, role));
110
420
  }
111
421
  /* ---------------------------- session CRUD ---------------------------- */
112
- async createSession(input = {}) {
422
+ async createSession(input = {},
423
+ /**
424
+ * Internal restore path (see `restoreFromDisk`): when provided, the entry
425
+ * inherits the on-disk meta.json timestamps verbatim instead of stamping
426
+ * fresh ones, and `writeMeta` is skipped so the canonical file is not
427
+ * clobbered with boot-time values. Public callers should not pass this.
428
+ */
429
+ _restore) {
430
+ if (this.memWatchdog?.isOverSoftLimit()) {
431
+ throw new Error("memory budget exceeded: refusing new session");
432
+ }
113
433
  const id = input.id ?? randomUUID();
114
434
  if (this.sessions.has(id))
115
435
  return this.toSession(this.sessions.get(id));
116
- const nowIso = new Date().toISOString();
436
+ const nowIso = _restore ? _restore.updatedAt : new Date().toISOString();
437
+ const createdAt = _restore ? _restore.createdAt : nowIso;
438
+ const lastActivityAt = _restore ? _restore.lastActivityAt : Date.now();
117
439
  const persistBase = this.persist ? this.bpDir(id) : undefined;
440
+ // Provider ref: explicit input wins; otherwise reuse an existing on-disk ref
441
+ // (restore path) so reviving a session never clobbers its chosen model.
442
+ const explicitRef = input.providerId !== undefined || input.modelId !== undefined;
443
+ const providerRef = explicitRef
444
+ ? { providerId: input.providerId, modelId: input.modelId }
445
+ : this.persist
446
+ ? await this.readProviderRef(id)
447
+ : {};
118
448
  const bus = new EventBus({ persistPath: persistBase ? join(persistBase, "events.jsonl") : undefined });
119
449
  const mailbox = new Mailbox(id, persistBase ? join(persistBase, "mailbox") : undefined);
120
- const trace = new GraphOfTrace(id, persistBase ? join(persistBase, "trace.json") : undefined);
450
+ // #79: push every trace mutation to the SSE stream as CUSTOM:trace_node so
451
+ // the web Graph of Trace updates live instead of polling. The store stays
452
+ // bus-agnostic; the manager owns the wire shape.
453
+ const trace = new GraphOfTrace(id, persistBase ? join(persistBase, "trace.json") : undefined, (op, node) => {
454
+ bus.emit(ev.custom({ sessionId: id }, CUSTOM_EVENT.TRACE_NODE, { op, node }));
455
+ });
121
456
  const entry = {
122
457
  id,
123
458
  title: input.title ?? "Untitled session",
124
- createdAt: nowIso,
459
+ createdAt,
125
460
  updatedAt: nowIso,
126
- lastActivityAt: Date.now(),
461
+ lastActivityAt,
127
462
  bus,
128
463
  mailbox,
129
464
  trace,
130
465
  agents: new Map(),
131
466
  tasks: new Map(),
467
+ deliveryErrors: new Map(),
468
+ delegators: new Map(),
132
469
  runActive: false,
133
470
  activeRunId: null,
471
+ pendingInputs: new Map(),
472
+ providerRef,
473
+ tokenUsage: { total: emptyTokenUsage(), byAgent: {} },
134
474
  };
135
475
  this.sessions.set(id, entry);
136
- this.touch(entry);
476
+ if (!_restore)
477
+ this.touch(entry);
478
+ else
479
+ this.lastActivityAt = entry.lastActivityAt;
137
480
  if (this.persist) {
138
481
  await mkdir(join(this.bpDir(id), "history"), { recursive: true });
139
- await mkdir(this.sessionSkillsDir(id), { recursive: true });
140
482
  await mkdir(this.workspaceDir(id), { recursive: true });
141
- await this.writeMeta(entry);
483
+ // On restore, meta.json on disk is the authority — do not write it back.
484
+ if (!_restore)
485
+ await this.writeMeta(entry);
486
+ // Only (re)write the ref when the caller chose one — restore must not
487
+ // clobber an existing ref with an empty object.
488
+ if (explicitRef)
489
+ await this.writeProviderRef(entry);
142
490
  await mailbox.recover();
143
491
  await this.loadTrace(entry);
492
+ // Rehydrate cumulative token usage so the running total survives restarts.
493
+ await this.loadUsage(entry);
144
494
  }
145
495
  return this.toSession(entry);
146
496
  }
@@ -148,6 +498,22 @@ export class SessionManager {
148
498
  const e = this.sessions.get(id);
149
499
  return e ? this.toSession(e) : undefined;
150
500
  }
501
+ /**
502
+ * Update a session's title and persist it to meta.json (#29). A blank or
503
+ * non-string title is ignored (idempotent) so the call can't wipe a title.
504
+ * Returns the updated session, or undefined if the session is unknown.
505
+ */
506
+ async renameSession(id, title) {
507
+ const e = this.sessions.get(id);
508
+ if (!e)
509
+ return undefined;
510
+ if (typeof title === "string" && title.trim().length > 0) {
511
+ e.title = title.trim();
512
+ }
513
+ this.touch(e);
514
+ await this.writeMeta(e);
515
+ return this.toSession(e);
516
+ }
151
517
  listSessions() {
152
518
  return [...this.sessions.values()].map((e) => this.toSession(e));
153
519
  }
@@ -179,20 +545,49 @@ export class SessionManager {
179
545
  await e.mailbox.flush();
180
546
  await e.trace.flush();
181
547
  e.bus.clear();
548
+ for (const [id2, d] of e.pendingInputs) {
549
+ d.reject(new Error("evicted"));
550
+ e.pendingInputs.delete(id2);
551
+ }
182
552
  this.sessions.delete(id);
183
553
  return { evicted: true, agentsKilled: killed };
184
554
  }
185
555
  /* ----------------------------- messaging ----------------------------- */
186
556
  /** Send a user message to an agent (default principal). §7 L3 isolated. */
187
- async sendMessage(sessionId, content, agentName = "principal") {
557
+ async sendMessage(sessionId, content, agentName = "principal", opts = {}) {
188
558
  const entry = this.sessions.get(sessionId);
189
559
  if (!entry)
190
560
  throw new Error(`session not found: ${sessionId}`);
191
561
  this.touch(entry);
562
+ // §R-4: refuse new runs past the soft memory threshold. The HTTP `accepted`
563
+ // flag alone isn't surfaced by the web, so also emit a system message.
564
+ if (this.memWatchdog?.isOverSoftLimit()) {
565
+ entry.bus.emit(ev.systemMessage(sessionId, "warning", "内存接近上限,暂不接受新任务,请稍后重试。", {
566
+ agent: agentName,
567
+ recoverable: true,
568
+ }));
569
+ return { accepted: false };
570
+ }
192
571
  const agent = await this.ensureAgent(sessionId, agentName);
572
+ // #60: pull any composer uploads staged under workspaces/local/ into this
573
+ // session's workspace (the agent's cwd) before it runs, so it can read the
574
+ // file the user just attached. No-op when nothing was staged.
575
+ await this.drainLocalUploads(sessionId);
193
576
  entry.runActive = true;
194
577
  entry.activeRunId = `run_${randomUUID()}`;
195
578
  const runId = entry.activeRunId;
579
+ // #70: emit an initial session_state frame here — onStatusChange only fires
580
+ // on a status *change*, and ensureAgent creates the agent as idle without
581
+ // emitting, so without this the panel stays empty until the first
582
+ // setStatus("running"). This first frame carries runState.active=true + the
583
+ // freshly-ensured agent.
584
+ this.emitSessionState(entry);
585
+ // issue #42: persist + broadcast the user's own prompt as a role:"user"
586
+ // CHUNK *before* the agent runs, so SSE replay reconstructs the full
587
+ // transcript (user + assistant). The web composer's optimistic bubble uses
588
+ // the same `uuid`, so the reducer dedupes the replayed event by id rather
589
+ // than duplicating it. Fall back to a fresh id if the client omitted one.
590
+ entry.bus.emit(ev.textMessageChunk({ sessionId, agentName, runId }, opts.uuid ?? randomUUID(), content, "user"));
196
591
  // Fire-and-track: don't block the HTTP response on the full run.
197
592
  void agent
198
593
  .prompt(content)
@@ -203,22 +598,192 @@ export class SessionManager {
203
598
  entry.runActive = false;
204
599
  entry.activeRunId = null;
205
600
  this.touch(entry);
601
+ // #76: re-evaluate the derived run-active flag now that the user-prompt
602
+ // correlation is cleared. For a direct reply this yields the terminal
603
+ // active=false frame; for a delegation a pending delivery loop keeps it
604
+ // true (the loop emits its own terminal frame when it drains).
605
+ this.emitSessionState(entry);
206
606
  });
207
607
  return { accepted: true, runId };
208
608
  }
209
- /** Interrupt a session (or a specific agent). */
609
+ /**
610
+ * Ask the terminal user a question on behalf of `agent`. Emits a
611
+ * `user_input_request` event and returns a promise that resolves when
612
+ * `resolveInput` is called with the matching request_id, or rejects if the
613
+ * session is interrupted/evicted. Blocks the calling tool's turn.
614
+ */
615
+ requestUserInput(entry, agent, req) {
616
+ const requestId = `req_${randomUUID()}`;
617
+ const deferred = makeDeferred();
618
+ entry.pendingInputs.set(requestId, deferred);
619
+ entry.bus.emit(ev.userInputRequest({ sessionId: entry.id, runId: entry.activeRunId ?? undefined }, { request_id: requestId, agent, question: req.question, options: req.options, allow_free_text: req.allow_free_text }));
620
+ return deferred.promise;
621
+ }
622
+ /**
623
+ * Resolve an outstanding ask_user request. Returns false when the session or
624
+ * request_id is unknown/already consumed (stale answer). Pure lookup; never
625
+ * throws — the server handles 404 for unknown sessions before calling.
626
+ */
627
+ resolveInput(sessionId, requestId, answer) {
628
+ const entry = this.sessions.get(sessionId);
629
+ const deferred = entry?.pendingInputs.get(requestId);
630
+ if (!entry || !deferred)
631
+ return false;
632
+ entry.pendingInputs.delete(requestId);
633
+ deferred.resolve(answer);
634
+ this.touch(entry);
635
+ return true;
636
+ }
637
+ /**
638
+ * Interrupt a session (or a specific agent).
639
+ *
640
+ * Targeted (`agentName` given): abort just that agent. Mailboxes and the
641
+ * principal are left untouched — a narrow "stop this one expert" contract.
642
+ *
643
+ * Whole-session (`agentName` omitted, the Stop button — #90): abort EVERY
644
+ * agent (incl. their running script subprocesses, via Pi `session.abort()`),
645
+ * then clear ALL mailboxes so a queued message can't re-wake a stopped agent,
646
+ * surface a user-facing system_message, and immediately prompt the principal
647
+ * one run with an interrupt notice so PI knows the user interrupted and should
648
+ * await further instructions.
649
+ */
210
650
  async interrupt(sessionId, agentName) {
211
651
  const entry = this.sessions.get(sessionId);
212
652
  if (!entry)
213
653
  return false;
654
+ const wholeSession = agentName === undefined;
214
655
  const targets = agentName ? [entry.agents.get(agentName)].filter(Boolean) : [...entry.agents.values()];
215
- for (const a of targets)
216
- await a.abort();
656
+ // Reject any pending ask_user FIRST: a prompt blocked awaiting user input
657
+ // would never settle, so abort()'s waitForIdle (#101) must not run before
658
+ // these are unblocked or it would deadlock.
659
+ for (const [id, d] of entry.pendingInputs) {
660
+ d.reject(new Error("interrupted"));
661
+ entry.pendingInputs.delete(id);
662
+ }
663
+ // Abort every target and WAIT for each in-flight run to fully settle (#101)
664
+ // — RUN_FINISHED emitted, status settled, provider stream fenced — so the
665
+ // interrupt-notice run below can't race the old run ("already processing").
666
+ await Promise.all(targets.map((a) => a.abort()));
217
667
  entry.runActive = false;
218
668
  entry.activeRunId = null;
669
+ if (wholeSession) {
670
+ // Clear every inbox BEFORE notifying PI: otherwise a queued task_delegate
671
+ // would re-wake the expert the user just stopped.
672
+ await entry.mailbox.clearAll();
673
+ entry.bus.emit(ev.systemMessage(sessionId, "info", "⏹️ 用户已中断当前任务,信箱已清空,正在等候进一步指示。", {
674
+ agent: "principal",
675
+ recoverable: true,
676
+ }));
677
+ this.notifyPrincipalInterrupted(entry);
678
+ }
219
679
  return targets.length > 0;
220
680
  }
681
+ /**
682
+ * #90: after a whole-session Stop, prompt the principal one run with an
683
+ * interrupt notice. Mirrors `sendMessage`'s fire-and-track run accounting but
684
+ * emits NO role:"user" text chunk — the notice is system context, not a user
685
+ * bubble. The principal should acknowledge briefly and await the user.
686
+ */
687
+ notifyPrincipalInterrupted(entry) {
688
+ const notice = "<system_notice>\n" +
689
+ " The user interrupted the current task. All running agents were stopped " +
690
+ "and every mailbox was cleared, so any in-flight delegation is cancelled. " +
691
+ "Do not resume or re-delegate the prior work. Briefly acknowledge the " +
692
+ "interruption and wait for the user's next instruction.\n" +
693
+ "</system_notice>";
694
+ void this.ensureAgent(entry.id, "principal")
695
+ .then((agent) => {
696
+ entry.runActive = true;
697
+ entry.activeRunId = `run_${randomUUID()}`;
698
+ this.emitSessionState(entry);
699
+ return agent.prompt(notice).finally(() => {
700
+ entry.runActive = false;
701
+ entry.activeRunId = null;
702
+ this.touch(entry);
703
+ this.emitSessionState(entry);
704
+ });
705
+ })
706
+ .catch(() => {
707
+ /* error-isolated: prompt() never throws, ensureAgent failure is best-effort */
708
+ });
709
+ }
710
+ /** Test/diagnostic accessor: number of queued messages in `agent`'s inbox. */
711
+ mailboxCount(sessionId, agent) {
712
+ return this.sessions.get(sessionId)?.mailbox.count(agent) ?? 0;
713
+ }
221
714
  /* ------------------------------ agents ------------------------------- */
715
+ /**
716
+ * Wrap a SystemTool so its execute() results are guarded against overflowing
717
+ * the model's context window (issue #80). When truncation triggers, the full
718
+ * result is saved to `<workspace>/.truncated/` and a system_message warning
719
+ * is emitted. No-op when maxToolResultTokens is 0.
720
+ */
721
+ wrapToolWithTruncation(tool, sessionId, bus) {
722
+ if (this.maxToolResultTokens <= 0)
723
+ return tool;
724
+ const maxTokens = this.maxToolResultTokens;
725
+ const saveFullResult = (origResult) => this.truncateToolResult(tool.name, sessionId, bus, origResult, maxTokens);
726
+ const originalExecute = tool.execute.bind(tool);
727
+ return {
728
+ name: tool.name,
729
+ description: tool.description,
730
+ parameters: tool.parameters,
731
+ execute: async (params) => {
732
+ const result = await originalExecute(params);
733
+ if (result.isError)
734
+ return result; // never truncate error messages
735
+ return saveFullResult(result);
736
+ },
737
+ };
738
+ }
739
+ /**
740
+ * Estimate tokens in a tool result, truncate if over budget, save the full
741
+ * content to the session workspace, and emit a warning event.
742
+ */
743
+ async truncateToolResult(toolName, sessionId, bus, result, maxTokens) {
744
+ // Concatenate all text blocks to estimate total tokens.
745
+ const fullText = result.content.map((c) => c.text).join("");
746
+ const estimated = estimateTokens(fullText);
747
+ if (estimated <= maxTokens)
748
+ return result;
749
+ // Truncate at ~maxTokens chars (conservative).
750
+ const maxChars = maxTokens * 3.5;
751
+ const truncatedText = fullText.slice(0, Math.floor(maxChars));
752
+ const now = new Date().toISOString();
753
+ const ts = now.replace(/[:.]/g, "-");
754
+ const fname = `${sanitiseFilename(toolName)}_${ts}.json`;
755
+ const relPath = `.truncated/${fname}`;
756
+ // Save full content to workspace.
757
+ try {
758
+ const absDir = join(this.workspaceDir(sessionId), ".truncated");
759
+ await mkdir(absDir, { recursive: true });
760
+ const saved = {
761
+ tool: toolName,
762
+ truncatedAt: now,
763
+ originalBytes: Buffer.byteLength(fullText),
764
+ truncatedBytes: Buffer.byteLength(truncatedText),
765
+ estimatedTokens: estimated,
766
+ maxTokens,
767
+ content: fullText,
768
+ };
769
+ await writeFile(join(absDir, fname), JSON.stringify(saved, null, 2), "utf8");
770
+ }
771
+ catch {
772
+ // Best-effort — never block the agent on file I/O.
773
+ }
774
+ // Emit warning.
775
+ bus.emit(ev.systemMessage(sessionId, "warning", `⚠️ 工具 ${toolName} 返回结果过大 ` +
776
+ `(原始约 ${estimated} tokens / ${formatBytes(Buffer.byteLength(fullText))}),` +
777
+ `已截断至约 ${estimateTokens(truncatedText)} tokens。` +
778
+ `完整结果已保存至 workspace/${relPath}`, { recoverable: true }));
779
+ const notice = `\n\n---\n` +
780
+ `[⚠️ 结果已截断: 原始 ${estimated} tokens / ${formatBytes(Buffer.byteLength(fullText))} → ` +
781
+ `截断后 ${estimateTokens(truncatedText)} tokens。` +
782
+ `完整内容已保存至 workspace/${relPath} ,可用 read 工具读取]`;
783
+ return {
784
+ content: [{ type: "text", text: truncatedText + notice }],
785
+ };
786
+ }
222
787
  /** Ensure an agent exists (create or resurrect). */
223
788
  async ensureAgent(sessionId, name) {
224
789
  const entry = this.sessions.get(sessionId);
@@ -239,13 +804,29 @@ export class SessionManager {
239
804
  destroyAgent: async (target) => {
240
805
  await this.destroyAgent(sessionId, target);
241
806
  },
807
+ wakeAgent: (target) => this.wakeAgent(sessionId, target),
808
+ requestUserInput: (req) => this.requestUserInput(entry, name, req),
809
+ routerSkillsDir: this.routerSkillsDir,
242
810
  };
243
811
  const systemTools = systemToolsForRole(role, name, deps);
244
812
  // External MCP tools go to non-trace agents (trace agent is graph-only, §9).
245
813
  const mcpTools = role === "trace" ? [] : await this.ensureMcpTools();
246
- const agentTools = [...systemTools, ...mcpTools];
814
+ const rawTools = [...systemTools, ...mcpTools];
815
+ // Built-in skills are loaded by Pi natively (not as tools). Materialize the
816
+ // bundled content into bp_template/skills once, then hand the dir to the
817
+ // factory as additionalSkillPaths. Trace agent is skill-less (graph-only).
818
+ let skillPaths;
819
+ if (role !== "trace") {
820
+ await this.ensureSkillsMaterialized();
821
+ skillPaths = [this.skillsDir];
822
+ }
823
+ // #80: guard every tool result against context-window overflow.
824
+ const agentTools = rawTools.map((t) => this.wrapToolWithTruncation(t, sessionId, entry.bus));
247
825
  const builtins = builtinToolNamesForRole(role, name);
248
826
  const allowedToolNames = [...builtins, ...agentTools.map((t) => t.name)];
827
+ // Resolve this session's provider against the SSOT (providers.json). When
828
+ // unset/empty the factory falls back to Pi's env-based default.
829
+ const providerConfig = await resolveSessionProvider(this.dataRoot, entry.providerRef);
249
830
  const session = await this.agentFactory({
250
831
  sessionId,
251
832
  agentName: name,
@@ -255,7 +836,15 @@ export class SessionManager {
255
836
  systemTools: agentTools,
256
837
  allowedToolNames,
257
838
  systemPrompt: await this.loadPersona(name, role),
258
- skillPaths: [this.templateSkillsDir(), this.sessionSkillsDir(sessionId)],
839
+ skillPaths,
840
+ providerConfig,
841
+ // 意图二 fallback: the trace-reminder extension calls this when an expert
842
+ // was reminded once and still didn't report back, so the principal never
843
+ // dead-waits on a silent expert.
844
+ onUnreplied: (agentName) => this.writeFallbackToDelegator(entry, agentName),
845
+ // #97: only the principal gets the live team-status block injected each
846
+ // turn (it is the coordinator). Other roles run without it.
847
+ renderAgentStatus: name === "principal" ? () => this.renderAgentStatus(entry) : undefined,
259
848
  });
260
849
  const agent = new MasAgent({
261
850
  sessionId,
@@ -263,13 +852,72 @@ export class SessionManager {
263
852
  role,
264
853
  session,
265
854
  bus: entry.bus,
266
- onStatusChange: () => this.touch(entry),
855
+ // #70: keep the touch (idle-reclaim) AND push an authoritative live
856
+ // snapshot so the web Agents panel updates without a reload/reselect.
857
+ // setStatus early-returns on no-op transitions, so this never storms.
858
+ onStatusChange: () => {
859
+ this.touch(entry);
860
+ this.emitSessionState(entry);
861
+ },
862
+ // Roll the agent's running total into the per-session breakdown, push a
863
+ // live session_state frame, and persist usage.json. Total is recomputed
864
+ // as the sum across agents so it can never drift from the breakdown.
865
+ onUsage: (agentName, _delta, cumulative) => {
866
+ entry.tokenUsage.byAgent[agentName] = cumulative;
867
+ entry.tokenUsage.total = sumAgentUsage(entry.tokenUsage.byAgent);
868
+ this.touch(entry);
869
+ this.emitSessionState(entry);
870
+ void this.writeUsage(entry);
871
+ },
267
872
  });
873
+ // Continue this agent's cumulative count across restarts / lazy revival.
874
+ agent.seedUsage(entry.tokenUsage.byAgent[name]);
268
875
  entry.agents.set(name, agent);
269
876
  if (!entry.tasks.has(name))
270
877
  entry.tasks.set(name, "");
271
878
  return agent;
272
879
  }
880
+ /**
881
+ * 意图二 fallback — the trace-reminder extension calls this (via the factory's
882
+ * `onUnreplied`) when an expert was reminded once and STILL did not
883
+ * `send_message` its delegator (the "silence" path; a hard *error* run is
884
+ * handled separately). We write a NEUTRAL system note into the REAL delegator's
885
+ * mailbox and wake it so it never dead-waits. The delegator is whoever last
886
+ * delegated to this expert (#97 directed escalation), falling back to the
887
+ * principal. This fires during the expert's run (before the clean-run cleanup
888
+ * in `runDeliveryLoop`), so the delegator record is still present. The note
889
+ * only states the fact — the expert ended without delivering a result — and
890
+ * deliberately gives NO directive ("re-delegate", "proceed without it"): the
891
+ * delegator decides what to do. Best-effort: a failed write must never break
892
+ * the agent loop.
893
+ */
894
+ writeFallbackToDelegator(entry, expert) {
895
+ const to = this.delegatorFor(entry, expert);
896
+ void entry.mailbox
897
+ .write({
898
+ fromAgent: "system",
899
+ toAgent: to,
900
+ msgType: "system",
901
+ content: `[系统通知] 专家 "${expert}" 结束了本次任务但未回交结果。`,
902
+ })
903
+ .then(() => this.wakeAgent(entry.id, to))
904
+ .catch(() => {
905
+ /* best-effort */
906
+ });
907
+ }
908
+ /**
909
+ * #97: snapshot the live team status for injection into the principal's turn
910
+ * (via the agent-status extension's Pi `context` hook). Lists every agent —
911
+ * INCLUDING the principal itself, so it sees its own inbox backlog — with its
912
+ * authoritative status and the number of messages still queued unread in its
913
+ * inbox (`mailbox.count`). Excludes the trace agent (an internal recorder) and
914
+ * any stopped agent (destroyed; irrelevant to current coordination). Returns
915
+ * "" when nothing is worth reporting so the extension injects nothing.
916
+ */
917
+ renderAgentStatus(entry) {
918
+ const lines = collectAgentStatusLines(entry.agents.values(), (name) => entry.mailbox.count(name));
919
+ return renderAgentStatusBlock(lines);
920
+ }
273
921
  async destroyAgent(sessionId, name) {
274
922
  const entry = this.sessions.get(sessionId);
275
923
  if (!entry)
@@ -280,6 +928,198 @@ export class SessionManager {
280
928
  agent.stop();
281
929
  entry.agents.delete(name); // history on disk is kept (§5).
282
930
  }
931
+ /* ------------------------- mailbox delivery (#76) ------------------------- */
932
+ /**
933
+ * #76: wake `name` to consume its mailbox. Fire-and-forget — `send_message`
934
+ * calls this after writing; the actual run happens in a serial delivery loop.
935
+ * The re-entrancy guard (`deliveryLoops`) means concurrent wakes for the same
936
+ * agent collapse into the one already-running loop (which re-drains after each
937
+ * turn), so an agent's `prompt` is never invoked concurrently.
938
+ */
939
+ wakeAgent(sessionId, name) {
940
+ const key = `${sessionId}:${name}`;
941
+ if (this.deliveryLoops.has(key))
942
+ return;
943
+ this.deliveryLoops.add(key);
944
+ void this.runDeliveryLoop(sessionId, name).finally(() => {
945
+ this.deliveryLoops.delete(key);
946
+ // Emit a final frame AFTER the key is gone: the agent's own running→idle
947
+ // transition fired emitSessionState while this key was still present (so
948
+ // that frame still read active via the pending-delivery check). Without
949
+ // this trailing frame the derived run-active flag would stay stuck true.
950
+ const entry = this.sessions.get(sessionId);
951
+ if (entry)
952
+ this.emitSessionState(entry);
953
+ // Re-check after releasing the guard: a message could have been written
954
+ // between the loop's final empty read and this delete, and that writer's
955
+ // wakeAgent would have bailed (key still present) — leaving the message
956
+ // unread. Re-wake if the inbox is non-empty so it never strands.
957
+ if (entry && entry.mailbox.count(name) > 0)
958
+ this.wakeAgent(sessionId, name);
959
+ });
960
+ }
961
+ /**
962
+ * Drain `name`'s inbox and run it, looping so messages that arrive *during* a
963
+ * turn are picked up without a second external wake. Each iteration atomically
964
+ * drains the inbox, ensures the agent, wraps the messages as
965
+ * `<message_envelope>`s (the format the A2A persona documents), and prompts.
966
+ * `MasAgent.prompt` is error-isolated (never throws), so a failed expert turn
967
+ * ends the loop cleanly rather than rejecting. A `session_state` frame is
968
+ * emitted on entry and exit so the derived run-active flag reflects the
969
+ * delegated work even across the await gap between the sender finishing and
970
+ * the target starting.
971
+ */
972
+ async runDeliveryLoop(sessionId, name) {
973
+ for (;;) {
974
+ const entry = this.sessions.get(sessionId);
975
+ if (!entry)
976
+ return;
977
+ const msgs = await entry.mailbox.readBatch(name); // bounded FIFO batch (#76)
978
+ if (msgs.length === 0)
979
+ return;
980
+ const agent = await this.ensureAgent(sessionId, name);
981
+ if (agent.status === "stopped")
982
+ return;
983
+ // #97 directed escalation: remember who delegated this work (the last
984
+ // task_delegate in the batch). Self-retry nudges are msgType "system", so
985
+ // they never overwrite a real delegator recorded on the original task.
986
+ const delegated = [...msgs].reverse().find((m) => m.msgType === "task_delegate");
987
+ if (delegated)
988
+ entry.delegators.set(name, delegated.fromAgent);
989
+ this.touch(entry);
990
+ // Surface the delegated run immediately (derived active flag, agent list).
991
+ this.emitSessionState(entry);
992
+ await agent.prompt(this.renderEnvelopes(msgs, name));
993
+ // #97 error path. A delegated run that ended in `error` is handled here
994
+ // (the trace-reminder extension bails on an errored run, leaving the host
995
+ // the sole owner of error recovery). Transient errors self-retry up to a
996
+ // cap; fatal errors (auth/config) and the exhausted cap escalate to the
997
+ // principal. A clean run resets the agent's consecutive-error count.
998
+ if (agent.status === "error" && agent.role === "expert") {
999
+ if (this.handleDeliveryError(entry, agent))
1000
+ continue; // self-retry queued
1001
+ return; // escalated — nothing more to drain for this agent
1002
+ }
1003
+ entry.deliveryErrors.delete(name); // clean run → reset the streak
1004
+ entry.delegators.delete(name); // and forget the delegator (task done)
1005
+ }
1006
+ }
1007
+ /**
1008
+ * #97: react to a failed delegated expert run. Returns true when a self-retry
1009
+ * was queued (the loop should continue and re-drain the agent's own inbox),
1010
+ * false when the failure was escalated to the principal (the loop should stop).
1011
+ *
1012
+ * Policy:
1013
+ * - `retryable` (rate limit / 5xx / network) AND under the retry cap →
1014
+ * re-wake the SAME expert with a neutral system nudge in its own inbox, and
1015
+ * surface a `warning` to the user ("retrying n/N"). Re-running may succeed.
1016
+ * - `fatal` (auth / missing key / forbidden), OR the cap is reached →
1017
+ * escalate: write a NEUTRAL error note to the principal's mailbox + wake it,
1018
+ * surface an `error` to the user, and reset the streak so a future task to
1019
+ * this expert starts fresh.
1020
+ */
1021
+ handleDeliveryError(entry, agent) {
1022
+ const name = agent.name;
1023
+ const count = (entry.deliveryErrors.get(name) ?? 0) + 1;
1024
+ entry.deliveryErrors.set(name, count);
1025
+ const kind = agent.lastErrorKind ?? "retryable";
1026
+ const headline = agent.state().lastError?.message ?? "未知错误";
1027
+ if (kind === "retryable" && count < SessionManager.MAX_DELIVERY_RETRIES) {
1028
+ entry.bus.emit(ev.systemMessage(entry.id, "warning", `专家 "${name}" 执行任务时出错,正在自动重试 (${count}/${SessionManager.MAX_DELIVERY_RETRIES})…`, { agent: name, recoverable: true }));
1029
+ // Re-wake the SAME expert via its own inbox: a neutral, directive-free
1030
+ // nudge. The expert retains its prior conversation context, so it knows
1031
+ // what it was attempting; we only signal "the last attempt failed, try
1032
+ // again". Returning true lets the loop re-drain this note immediately.
1033
+ void entry.mailbox
1034
+ .write({
1035
+ fromAgent: "system",
1036
+ toAgent: name,
1037
+ msgType: "system",
1038
+ content: `[系统通知] 上一次任务执行出错(${headline})。请重试。`,
1039
+ })
1040
+ .catch(() => {
1041
+ /* best-effort */
1042
+ });
1043
+ return true;
1044
+ }
1045
+ // Fatal, or retries exhausted → escalate to the real delegator and stop.
1046
+ const delegator = this.delegatorFor(entry, name);
1047
+ const target = delegator === "principal" ? "主管" : `委派方 "${delegator}"`;
1048
+ entry.bus.emit(ev.systemMessage(entry.id, "error", kind === "fatal"
1049
+ ? `专家 "${name}" 发生无法自动恢复的错误,已上报${target}。`
1050
+ : `专家 "${name}" 连续 ${count} 次执行失败,已上报${target}。`, { agent: name, recoverable: true }));
1051
+ this.writeErrorToDelegator(entry, name, headline);
1052
+ entry.deliveryErrors.delete(name); // reset streak for a future task
1053
+ entry.delegators.delete(name); // delegator notified; forget it
1054
+ return false;
1055
+ }
1056
+ /**
1057
+ * #97 directed escalation: resolve who an expert's failure/silence should be
1058
+ * reported to. Returns the recorded delegator ONLY when it is a still-live,
1059
+ * non-trace agent other than the expert itself (a destroyed/stopped delegator
1060
+ * would be wrongly resurrected by the wake, and a self/system target is
1061
+ * nonsensical). Otherwise falls back to `principal`, the root coordinator,
1062
+ * which always exists and owns un-rooted work.
1063
+ */
1064
+ delegatorFor(entry, expert) {
1065
+ const d = entry.delegators.get(expert);
1066
+ if (!d || d === expert || d === "system" || d === "principal")
1067
+ return "principal";
1068
+ const agent = entry.agents.get(d);
1069
+ if (!agent || agent.status === "stopped" || agent.role === "trace")
1070
+ return "principal";
1071
+ return d;
1072
+ }
1073
+ /**
1074
+ * #97 error escalation: write a NEUTRAL, error-flavored system note into the
1075
+ * REAL delegator's mailbox and wake it, so whoever delegated the work (the
1076
+ * principal, or another agent in a chain like auditor→engineer) learns the
1077
+ * expert failed rather than dead-waiting. Distinct from
1078
+ * `writeFallbackToDelegator` (the "silence" path): this one states an ERROR
1079
+ * occurred and carries the error headline as context, but — like the silence
1080
+ * note — gives NO directive ("re-delegate" / "proceed"): the delegator decides.
1081
+ * Best-effort; never breaks the loop.
1082
+ */
1083
+ writeErrorToDelegator(entry, expert, headline) {
1084
+ const to = this.delegatorFor(entry, expert);
1085
+ void entry.mailbox
1086
+ .write({
1087
+ fromAgent: "system",
1088
+ toAgent: to,
1089
+ msgType: "system",
1090
+ content: `[系统通知] 专家 "${expert}" 在执行任务时发生错误,未能产出结果。错误:${headline}`,
1091
+ })
1092
+ .then(() => this.wakeAgent(entry.id, to))
1093
+ .catch(() => {
1094
+ /* best-effort */
1095
+ });
1096
+ }
1097
+ /**
1098
+ * Wrap drained mailbox messages in the `<message_envelope>` header the A2A
1099
+ * persona (`personas.ts`) tells agents to expect, so the model knows who sent
1100
+ * each message and why. User-origin messages declare `<source type="user"/>`;
1101
+ * agent-origin ones name the sender.
1102
+ *
1103
+ * 意图一·触发点2 (Pi-native hooks): when the PRINCIPAL receives a message from
1104
+ * another agent (not the user — i.e. an expert reporting back), append a single
1105
+ * static line nudging it to record_trace any real decision it makes while
1106
+ * processing the reply. Stateless, loop-free (at most one line per delivery).
1107
+ */
1108
+ renderEnvelopes(msgs, toAgent) {
1109
+ const body = msgs
1110
+ .map((m) => {
1111
+ const source = m.msgType === "user_message"
1112
+ ? `<source type="user" />`
1113
+ : `<source type="agent" name="${m.fromAgent}" />`;
1114
+ return `<message_envelope>\n ${source}\n <type>${m.msgType}</type>\n</message_envelope>\n${m.content}`;
1115
+ })
1116
+ .join("\n\n");
1117
+ const fromAgent = msgs.some((m) => m.msgType !== "user_message");
1118
+ if (toAgent === "principal" && fromAgent) {
1119
+ return `${body}\n\n[提醒:处理完这些消息后,如有实质决策请调用 record_trace 记录。]`;
1120
+ }
1121
+ return body;
1122
+ }
283
1123
  /* -------------------------- state authority -------------------------- */
284
1124
  /** §10 polling fallback: list agents with authoritative status. */
285
1125
  listAgents(sessionId) {
@@ -298,16 +1138,123 @@ export class SessionManager {
298
1138
  return out;
299
1139
  });
300
1140
  }
1141
+ /**
1142
+ * #76: a session is "running" whenever ANY non-trace agent is running, or a
1143
+ * mailbox delivery loop is pending for a non-trace target (the loop is
1144
+ * registered synchronously inside `send_message`, so this closes the await gap
1145
+ * between the sender finishing its turn and the delegated target starting —
1146
+ * without it the flag would flicker false in that window). The trace agent is
1147
+ * a real spawned agent (record_trace dispatches `trace_event` envelopes into
1148
+ * its mailbox and it owns the Graph of Trace as editor, see
1149
+ * `system-tools.ts:createRecordTraceTool`), but it is excluded from the
1150
+ * AGGREGATE: a trace recording isn't "the user's task is still running". It
1151
+ * is still LISTED in `agents[]` with its own status so the Agents panel shows
1152
+ * its idle/running transitions live.
1153
+ */
1154
+ deriveRunActive(entry) {
1155
+ if (entry.runActive)
1156
+ return true;
1157
+ for (const a of entry.agents.values()) {
1158
+ if (a.role !== "trace" && a.status === "running")
1159
+ return true;
1160
+ }
1161
+ for (const key of this.deliveryLoops) {
1162
+ const sep = entry.id.length;
1163
+ // key === `${sid}:${name}` — match this session, exclude the trace target.
1164
+ if (key.startsWith(`${entry.id}:`) && key.slice(sep + 1) !== "trace")
1165
+ return true;
1166
+ }
1167
+ return false;
1168
+ }
1169
+ /**
1170
+ * #70/#76: emit the authoritative live snapshot as a `CUSTOM:session_state`
1171
+ * event. This is the wholesale source the web Agents panel replaces its
1172
+ * agents list from; it is pushed on every agent status transition
1173
+ * (`onStatusChange`), an initial frame in `sendMessage`, and on delivery-loop
1174
+ * entry/exit. `runState.active` is DERIVED (any non-trace agent running / a
1175
+ * pending delivery), so a delegated expert keeps the run visibly active. The
1176
+ * ring buffer replays the last frame on reconnect, so a re-subscribing client
1177
+ * recovers the current snapshot. Shape matches `SessionStateSnapshotSchema`.
1178
+ */
1179
+ emitSessionState(entry) {
1180
+ entry.bus.emit(ev.custom({ sessionId: entry.id }, "session_state", {
1181
+ runState: { active: this.deriveRunActive(entry), runId: entry.activeRunId },
1182
+ agents: this.listAgents(entry.id),
1183
+ lastActivityTs: new Date(entry.lastActivityAt).toISOString(),
1184
+ tokenUsage: entry.tokenUsage,
1185
+ }));
1186
+ }
301
1187
  getSessionState(sessionId) {
302
1188
  const entry = this.sessions.get(sessionId);
303
1189
  if (!entry)
304
1190
  return undefined;
305
1191
  return {
306
- runState: { active: entry.runActive, runId: entry.activeRunId },
1192
+ runState: { active: this.deriveRunActive(entry), runId: entry.activeRunId },
307
1193
  agents: this.listAgents(sessionId),
308
1194
  lastActivityTs: new Date(entry.lastActivityAt).toISOString(),
1195
+ tokenUsage: entry.tokenUsage,
309
1196
  };
310
1197
  }
1198
+ /** The session's Graph of Trace (reasoning DAG), or undefined if no session. */
1199
+ getTrace(sessionId) {
1200
+ const entry = this.sessions.get(sessionId);
1201
+ return entry?.trace.getGraph();
1202
+ }
1203
+ /**
1204
+ * Read persisted AG-UI events for a session from `.bp/<sid>/events.jsonl`.
1205
+ * Used by the web to rehydrate chat history after a runtime restart (the
1206
+ * in-memory bus ring buffer only carries `recent()` for live SSE replay).
1207
+ *
1208
+ * The file is read line-by-line and unparseable lines are skipped so a
1209
+ * single corrupt record doesn't poison the whole history.
1210
+ *
1211
+ * `limit` caps the returned array; when total > limit we return the **tail**
1212
+ * (most recent events) for lightweight callers. Default 1000, positive
1213
+ * limits are capped at 5000. `limit <= 0` returns the full log and is used by
1214
+ * the web rehydrate path so long sessions are not sliced through the middle
1215
+ * of a streamed message.
1216
+ *
1217
+ * Returns `undefined` if the session id isn't in memory — this method is
1218
+ * only useful for known sessions (call `restoreFromDisk` first if needed).
1219
+ */
1220
+ async readEventHistory(sessionId, opts = {}) {
1221
+ if (!this.sessions.has(sessionId))
1222
+ return undefined;
1223
+ const requestedLimit = opts.limit;
1224
+ const limit = requestedLimit === undefined || !Number.isFinite(requestedLimit)
1225
+ ? 1000
1226
+ : requestedLimit <= 0
1227
+ ? null
1228
+ : Math.max(1, Math.min(requestedLimit, 5000));
1229
+ const path = join(this.bpDir(sessionId), "events.jsonl");
1230
+ let raw;
1231
+ try {
1232
+ raw = await readFile(path, "utf8");
1233
+ }
1234
+ catch {
1235
+ // No events file yet — empty history is valid (newly created session).
1236
+ return { events: [], total: 0, truncated: false };
1237
+ }
1238
+ const lines = raw.split("\n");
1239
+ const events = [];
1240
+ let total = 0;
1241
+ for (const line of lines) {
1242
+ if (!line)
1243
+ continue;
1244
+ let parsed;
1245
+ try {
1246
+ parsed = JSON.parse(line);
1247
+ }
1248
+ catch {
1249
+ continue; // skip malformed line
1250
+ }
1251
+ total++;
1252
+ events.push(parsed);
1253
+ }
1254
+ const truncated = limit !== null && events.length > limit;
1255
+ const out = truncated ? events.slice(events.length - limit) : events;
1256
+ return { events: out, total, truncated };
1257
+ }
311
1258
  metrics() {
312
1259
  let runningAgents = 0;
313
1260
  for (const e of this.sessions.values()) {
@@ -315,13 +1262,36 @@ export class SessionManager {
315
1262
  if (a.status === "running")
316
1263
  runningAgents++;
317
1264
  }
1265
+ const snap = this.memWatchdog?.snapshot() ?? null;
318
1266
  return {
319
1267
  activeSessions: this.sessions.size,
320
1268
  runningAgents,
321
1269
  lastActivityAt: this.lastActivityAt ? new Date(this.lastActivityAt).toISOString() : null,
322
1270
  memRss: process.memoryUsage().rss,
1271
+ // null when the opt-in budget is unset (single-user) — keeps the metric meaningful.
1272
+ memLimitBytes: snap ? snap.limitBytes : null,
1273
+ memRatio: snap ? snap.ratio : null,
323
1274
  };
324
1275
  }
1276
+ /**
1277
+ * Stop background work (memory watchdog). Called on graceful shutdown so the
1278
+ * poll interval doesn't outlive the manager. Idempotent.
1279
+ */
1280
+ shutdown() {
1281
+ this.memWatchdog?.stop();
1282
+ }
1283
+ /**
1284
+ * Rising-edge handler when RSS crosses the soft memory threshold (§R-4).
1285
+ * Warns every currently-loaded session once so in-flight users see the
1286
+ * back-off; new sessions/messages are then refused at their entry points.
1287
+ */
1288
+ onMemoryThrottle(snap) {
1289
+ const mb = (n) => Math.round(n / (1024 * 1024));
1290
+ const msg = `内存使用接近容器上限 (${mb(snap.rss)}MB / ${mb(snap.limitBytes)}MB),正在限流,暂不接受新任务。`;
1291
+ for (const [id, entry] of this.sessions) {
1292
+ entry.bus.emit(ev.systemMessage(id, "warning", msg, { recoverable: true }));
1293
+ }
1294
+ }
325
1295
  /* ----------------------------- SSE/events ---------------------------- */
326
1296
  subscribe(sessionId, listener) {
327
1297
  const entry = this.sessions.get(sessionId);
@@ -358,6 +1328,27 @@ export class SessionManager {
358
1328
  await mkdir(this.bpDir(entry.id), { recursive: true }).catch(() => { });
359
1329
  await writeFile(join(this.bpDir(entry.id), "meta.json"), JSON.stringify(meta, null, 2), "utf8").catch(() => { });
360
1330
  }
1331
+ providerRefPath(sid) {
1332
+ return join(this.bpDir(sid), "provider.json");
1333
+ }
1334
+ /** Persist this session's `{ providerId, modelId }` reference (no key). */
1335
+ async writeProviderRef(entry) {
1336
+ if (!this.persist)
1337
+ return;
1338
+ await mkdir(this.bpDir(entry.id), { recursive: true }).catch(() => { });
1339
+ await writeFile(this.providerRefPath(entry.id), JSON.stringify(entry.providerRef, null, 2), "utf8").catch(() => { });
1340
+ }
1341
+ /** Load a session's stored provider ref from disk (restore path). */
1342
+ async readProviderRef(sid) {
1343
+ try {
1344
+ const raw = await readFile(this.providerRefPath(sid), "utf8");
1345
+ const ref = JSON.parse(raw);
1346
+ return { providerId: ref.providerId, modelId: ref.modelId };
1347
+ }
1348
+ catch {
1349
+ return {};
1350
+ }
1351
+ }
361
1352
  async loadTrace(entry) {
362
1353
  try {
363
1354
  const raw = await readFile(join(this.bpDir(entry.id), "trace.json"), "utf8");
@@ -367,7 +1358,43 @@ export class SessionManager {
367
1358
  /* no trace yet */
368
1359
  }
369
1360
  }
370
- /** Restore session list from disk (§10 策略A: agents start idle, lazily revived). */
1361
+ usagePath(sid) {
1362
+ return join(this.bpDir(sid), "usage.json");
1363
+ }
1364
+ /** Persist cumulative token usage (best-effort; never throws). */
1365
+ async writeUsage(entry) {
1366
+ if (!this.persist)
1367
+ return;
1368
+ await mkdir(this.bpDir(entry.id), { recursive: true }).catch(() => { });
1369
+ await writeFile(this.usagePath(entry.id), JSON.stringify(entry.tokenUsage, null, 2), "utf8").catch(() => { });
1370
+ }
1371
+ /** Rehydrate cumulative token usage from disk (restore path). */
1372
+ async loadUsage(entry) {
1373
+ try {
1374
+ const raw = await readFile(this.usagePath(entry.id), "utf8");
1375
+ const parsed = JSON.parse(raw);
1376
+ const byAgent = {};
1377
+ for (const [name, u] of Object.entries(parsed.byAgent ?? {})) {
1378
+ byAgent[name] = addUsage(emptyTokenUsage(), u);
1379
+ }
1380
+ entry.tokenUsage = { byAgent, total: sumAgentUsage(byAgent) };
1381
+ }
1382
+ catch {
1383
+ /* no usage yet — keep the zeroed default */
1384
+ }
1385
+ }
1386
+ /**
1387
+ * Restore session list from disk. Reads `<dataRoot>/.bp/<id>/meta.json` for
1388
+ * every directory and recreates the session entry with its original
1389
+ * timestamps preserved (provider ref, mailbox, trace also rehydrate via the
1390
+ * normal `createSession` restore path). §10 策略A: agents start idle and
1391
+ * are lazily revived when the user actually sends a message.
1392
+ *
1393
+ * Idempotent — sessions already in memory are skipped, not reset.
1394
+ *
1395
+ * Returns the ids that were restored this call (i.e. excluding ones that
1396
+ * were already loaded or whose meta.json was missing / malformed).
1397
+ */
371
1398
  async restoreFromDisk() {
372
1399
  const restored = [];
373
1400
  const root = join(this.dataRoot, ".bp");
@@ -376,19 +1403,35 @@ export class SessionManager {
376
1403
  ids = await readdir(root);
377
1404
  }
378
1405
  catch {
379
- return restored;
1406
+ return restored; // .bp/ doesn't exist yet — fresh install
380
1407
  }
381
1408
  for (const id of ids) {
1409
+ if (this.sessions.has(id))
1410
+ continue;
1411
+ const metaPath = join(root, id, "meta.json");
1412
+ let raw;
382
1413
  try {
383
- const raw = await readFile(join(root, id, "meta.json"), "utf8");
384
- const meta = JSON.parse(raw);
385
- if (!this.sessions.has(meta.id)) {
386
- await this.createSession({ id: meta.id, title: meta.title });
387
- restored.push(meta.id);
388
- }
1414
+ raw = await readFile(metaPath, "utf8");
389
1415
  }
390
1416
  catch {
391
- /* skip non-session dirs */
1417
+ continue; // not a session dir (no meta.json) — silent skip
1418
+ }
1419
+ try {
1420
+ const meta = JSON.parse(raw);
1421
+ const sid = meta.id ?? id;
1422
+ if (this.sessions.has(sid))
1423
+ continue;
1424
+ const now = new Date().toISOString();
1425
+ await this.createSession({ id: sid, title: meta.title }, {
1426
+ createdAt: meta.createdAt ?? now,
1427
+ updatedAt: meta.updatedAt ?? now,
1428
+ lastActivityAt: typeof meta.lastActivityAt === "number" ? meta.lastActivityAt : Date.now(),
1429
+ });
1430
+ restored.push(sid);
1431
+ }
1432
+ catch (err) {
1433
+ // eslint-disable-next-line no-console
1434
+ console.warn(`[runtime] skipping ${id}: ${err.message}`);
392
1435
  }
393
1436
  }
394
1437
  return restored;