@brainpilot/runtime 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +61 -0
  2. package/dist/agent-error.d.ts +51 -0
  3. package/dist/agent-error.d.ts.map +1 -0
  4. package/dist/agent-error.js +163 -0
  5. package/dist/agent-error.js.map +1 -0
  6. package/dist/agent-factory.d.ts.map +1 -1
  7. package/dist/agent-factory.js +36 -6
  8. package/dist/agent-factory.js.map +1 -1
  9. package/dist/events.d.ts +18 -0
  10. package/dist/events.d.ts.map +1 -1
  11. package/dist/events.js +24 -0
  12. package/dist/events.js.map +1 -1
  13. package/dist/extensions/agent-status.d.ts +91 -0
  14. package/dist/extensions/agent-status.d.ts.map +1 -0
  15. package/dist/extensions/agent-status.js +103 -0
  16. package/dist/extensions/agent-status.js.map +1 -0
  17. package/dist/extensions/trace-reminder.d.ts +94 -0
  18. package/dist/extensions/trace-reminder.d.ts.map +1 -0
  19. package/dist/extensions/trace-reminder.js +153 -0
  20. package/dist/extensions/trace-reminder.js.map +1 -0
  21. package/dist/index.d.ts +2 -0
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +1 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/mailbox.d.ts +37 -1
  26. package/dist/mailbox.d.ts.map +1 -1
  27. package/dist/mailbox.js +79 -2
  28. package/dist/mailbox.js.map +1 -1
  29. package/dist/mas-agent.d.ts +74 -12
  30. package/dist/mas-agent.d.ts.map +1 -1
  31. package/dist/mas-agent.js +158 -33
  32. package/dist/mas-agent.js.map +1 -1
  33. package/dist/materialize-skills.d.ts +40 -0
  34. package/dist/materialize-skills.d.ts.map +1 -0
  35. package/dist/materialize-skills.js +141 -0
  36. package/dist/materialize-skills.js.map +1 -0
  37. package/dist/mock-agent.d.ts.map +1 -1
  38. package/dist/mock-agent.js +13 -1
  39. package/dist/mock-agent.js.map +1 -1
  40. package/dist/personas.d.ts +16 -0
  41. package/dist/personas.d.ts.map +1 -1
  42. package/dist/personas.js +651 -8
  43. package/dist/personas.js.map +1 -1
  44. package/dist/pi-provider.d.ts +5 -0
  45. package/dist/pi-provider.d.ts.map +1 -1
  46. package/dist/pi-provider.js +7 -1
  47. package/dist/pi-provider.js.map +1 -1
  48. package/dist/provider-config.d.ts +5 -0
  49. package/dist/provider-config.d.ts.map +1 -1
  50. package/dist/provider-config.js +2 -0
  51. package/dist/provider-config.js.map +1 -1
  52. package/dist/server.d.ts +2 -2
  53. package/dist/server.d.ts.map +1 -1
  54. package/dist/server.js +82 -8
  55. package/dist/server.js.map +1 -1
  56. package/dist/session-manager.d.ts +311 -8
  57. package/dist/session-manager.d.ts.map +1 -1
  58. package/dist/session-manager.js +896 -39
  59. package/dist/session-manager.js.map +1 -1
  60. package/dist/tools/skill-search.d.ts +53 -0
  61. package/dist/tools/skill-search.d.ts.map +1 -0
  62. package/dist/tools/skill-search.js +269 -0
  63. package/dist/tools/skill-search.js.map +1 -0
  64. package/dist/tools/system-tools.d.ts +22 -1
  65. package/dist/tools/system-tools.d.ts.map +1 -1
  66. package/dist/tools/system-tools.js +149 -21
  67. package/dist/tools/system-tools.js.map +1 -1
  68. package/dist/trace.d.ts +27 -1
  69. package/dist/trace.d.ts.map +1 -1
  70. package/dist/trace.js +60 -3
  71. package/dist/trace.js.map +1 -1
  72. package/dist/types.d.ts +51 -5
  73. package/dist/types.d.ts.map +1 -1
  74. package/package.json +6 -2
@@ -9,20 +9,32 @@
9
9
  * Persistence (§5): config/history/state live under `<dataRoot>/.bp/{sid}/`,
10
10
  * work files under `<dataRoot>/workspaces/{sid}/`.
11
11
  */
12
- import { mkdir, readFile, writeFile, readdir, rm, stat } from "node:fs/promises";
13
- import { join, resolve, sep } from "node:path";
12
+ import { mkdir, readFile, writeFile, readdir, rm, stat, rename } from "node:fs/promises";
13
+ import { join, resolve, sep, dirname } from "node:path";
14
14
  import { randomUUID } from "node:crypto";
15
+ import { CUSTOM_EVENT, } from "@brainpilot/protocol";
15
16
  import { EventBus } from "./event-bus.js";
16
17
  import { Mailbox } from "./mailbox.js";
17
18
  import { GraphOfTrace } from "./trace.js";
18
- import { MasAgent } from "./mas-agent.js";
19
+ import { MasAgent, addUsage, emptyTokenUsage } from "./mas-agent.js";
19
20
  import { systemToolsForRole, builtinToolNamesForRole } from "./tools/system-tools.js";
20
21
  import { ev } from "./events.js";
21
22
  import { selectFactory, isMockMode } from "./agent-factory.js";
22
- import { personaFor } from "./personas.js";
23
+ import { personaFor, withLanguageDirective } from "./personas.js";
24
+ import { renderAgentStatusBlock, collectAgentStatusLines } from "./extensions/agent-status.js";
23
25
  import { McpBridge, loadMcpServersConfig } from "./mcp-bridge.js";
26
+ import { materializeSkills } from "./materialize-skills.js";
24
27
  import { resolveSessionProvider } from "./provider-config.js";
25
28
  import { MemWatchdog, parseMemLimitMb } from "./mem-watchdog.js";
29
+ function makeDeferred() {
30
+ let resolve;
31
+ let reject;
32
+ const promise = new Promise((res, rej) => {
33
+ resolve = res;
34
+ reject = rej;
35
+ });
36
+ return { promise, resolve, reject };
37
+ }
26
38
  /** Roles inferred from agent name. */
27
39
  function roleFor(name) {
28
40
  if (name === "principal")
@@ -31,24 +43,91 @@ function roleFor(name) {
31
43
  return "trace";
32
44
  return "expert";
33
45
  }
46
+ /**
47
+ * Conservative token estimation from character count (issue #80).
48
+ * English text averages ~4 chars/token; CJK text ~1-2 chars/token.
49
+ * 3.5 gives a safety margin — we'd rather truncate slightly early than
50
+ * overflow the provider's context window. Exported for tests.
51
+ */
52
+ export function estimateTokens(text) {
53
+ return Math.ceil(text.length / 3.5);
54
+ }
55
+ /** Sum a per-agent token usage breakdown into a single session total. */
56
+ function sumAgentUsage(byAgent) {
57
+ const total = emptyTokenUsage();
58
+ for (const u of Object.values(byAgent)) {
59
+ total.input += u.input;
60
+ total.output += u.output;
61
+ total.cacheRead += u.cacheRead;
62
+ total.cacheWrite += u.cacheWrite;
63
+ total.total += u.total;
64
+ }
65
+ return total;
66
+ }
67
+ /** Filesystem-safe form of a tool name (for saving truncated results). */
68
+ function sanitiseFilename(name) {
69
+ return name.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 64);
70
+ }
71
+ /** Human-readable byte size (e.g. "1.2MB"). */
72
+ function formatBytes(n) {
73
+ if (n < 1024)
74
+ return `${n}B`;
75
+ if (n < 1024 * 1024)
76
+ return `${(n / 1024).toFixed(1)}KB`;
77
+ return `${(n / (1024 * 1024)).toFixed(1)}MB`;
78
+ }
34
79
  export class SessionManager {
35
80
  sessions = new Map();
36
81
  dataRoot;
37
82
  agentFactory;
38
83
  persist;
39
84
  lastActivityAt = 0;
85
+ // #76: active mailbox delivery. A delivery loop drains a target agent's inbox
86
+ // and runs it; the key (`${sid}:${name}`) guards re-entrancy so concurrent
87
+ // wakes for one agent collapse into a single serial loop (its `prompt` is
88
+ // never invoked concurrently).
89
+ deliveryLoops = new Set();
40
90
  // External MCP tools (§9 decision 2): loaded once, lazily, shared by all
41
91
  // non-trace agents. Null until first agent is created.
42
92
  mcpBridge;
43
93
  mcpTools = [];
44
94
  mcpLoaded = false;
95
+ // Built-in skills directory, loaded through Pi's native skill pipeline
96
+ // (`additionalSkillPaths`). The bundled @brainpilot/skills content is
97
+ // materialized here once (lazily) on first agent creation.
98
+ skillsDir;
99
+ // Router skills directory backing the `skill_search` Pi-native tool — the
100
+ // long-tail catalog NOT in `<available_skills>`. Materialized alongside
101
+ // `skillsDir` (each top-level category lands on the side determined by
102
+ // `materializeSkills`).
103
+ routerSkillsDir;
104
+ skillsMaterialized = false;
45
105
  // Opt-in memory watchdog (§R-4 / issue #20). Null when no budget is set.
46
106
  memWatchdog;
107
+ // Tool result truncation (issue #80). 0 = disabled.
108
+ maxToolResultTokens;
47
109
  constructor(opts = {}) {
48
110
  this.dataRoot = opts.dataRoot ?? process.env.BP_DATA_DIR ?? join(process.cwd(), ".bp-data");
49
111
  this.agentFactory = opts.agentFactory ?? selectFactory();
50
112
  this.persist = opts.persist ?? true;
51
113
  this.mcpBridge = opts.mcpBridge ?? null;
114
+ this.maxToolResultTokens =
115
+ opts.maxToolResultTokens ??
116
+ (() => {
117
+ const env = process.env.BP_MAX_TOOL_RESULT_TOKENS?.trim();
118
+ if (env !== undefined && env !== "") {
119
+ const n = Number(env);
120
+ if (Number.isInteger(n) && n >= 0)
121
+ return n;
122
+ }
123
+ return 64000;
124
+ })();
125
+ // Skills are loaded by Pi from this dir (default <dataRoot>/bp_template/skills).
126
+ this.skillsDir = opts.skillsDir ?? join(this.dataRoot, "bp_template", "skills");
127
+ // The router skill library is a parallel directory with the same on-disk
128
+ // format; `skill_search` reads from here, Pi never sees it.
129
+ this.routerSkillsDir =
130
+ opts.routerSkillsDir ?? join(this.dataRoot, "bp_template", "skills-router");
52
131
  const limitBytes = opts.memLimitBytes ?? parseMemLimitMb(process.env);
53
132
  this.memWatchdog =
54
133
  limitBytes != null
@@ -60,6 +139,31 @@ export class SessionManager {
60
139
  : null;
61
140
  this.memWatchdog?.start();
62
141
  }
142
+ /**
143
+ * Materialize the bundled @brainpilot/skills content into `this.skillsDir`
144
+ * (skip-if-exists) so Pi's native skill pipeline can load it. Idempotent —
145
+ * runs at most once per manager. Called at server startup (so skills exist and
146
+ * are user-visible before any agent runs, incl. Docker pure-compose where no
147
+ * CLI scaffold ran) AND lazily before the first non-trace agent. Best-effort:
148
+ * skills are a convenience, not a hard dependency, so failures are swallowed.
149
+ */
150
+ async ensureSkillsMaterialized() {
151
+ if (this.skillsMaterialized)
152
+ return;
153
+ this.skillsMaterialized = true;
154
+ try {
155
+ const res = await materializeSkills(this.dataRoot);
156
+ // eslint-disable-next-line no-console
157
+ console.info(`[skills] always-on: ${res.copied} copied → ${res.dest}` +
158
+ (res.skipped ? ` (${res.skipped} preserved)` : "") +
159
+ `; router: ${res.routerCopied} copied → ${res.routerDest}` +
160
+ (res.routerSkipped ? ` (${res.routerSkipped} preserved)` : ""));
161
+ }
162
+ catch (err) {
163
+ // eslint-disable-next-line no-console
164
+ console.error(`[skills] failed to materialize built-in skills: ${err.message}`);
165
+ }
166
+ }
63
167
  /**
64
168
  * Load external MCP tools once. No-op in mock mode (BP_MOCK=1) and when no
65
169
  * `mcp_servers.json` is present, so the default path stays zero-overhead.
@@ -90,17 +194,26 @@ export class SessionManager {
90
194
  workspaceDir(sid) {
91
195
  return join(this.dataRoot, "workspaces", sid);
92
196
  }
197
+ /**
198
+ * #60: composer uploads in single-user mode are POSTed against the literal
199
+ * sandbox id `"local"` (the web `LOCAL_SANDBOX.id`), because a file can be
200
+ * attached in the draft composer *before* the real session exists. They land
201
+ * in `workspaces/local/` — but the agent's cwd is `workspaces/<sessionId>/`,
202
+ * so without this it can't read the file the user just attached. We treat
203
+ * `workspaces/local/` as a staging area and drain it into the real session
204
+ * workspace right before the agent runs (see drainLocalUploads).
205
+ */
206
+ static UPLOAD_STAGING_SID = "local";
207
+ /**
208
+ * #97: max CONSECUTIVE failed delivery runs for one expert before the failure
209
+ * is escalated to the principal instead of self-retried. Matches the legacy
210
+ * circuit-breaker threshold (3). Only `retryable` errors consume retries;
211
+ * a `fatal` error escalates on the first failure regardless of this cap.
212
+ */
213
+ static MAX_DELIVERY_RETRIES = 3;
93
214
  historyPath(sid, agent) {
94
215
  return join(this.bpDir(sid), "history", `${agent}.jsonl`);
95
216
  }
96
- /** Skills shared by every session (user-editable `bp_template/skills/`). */
97
- templateSkillsDir() {
98
- return join(this.dataRoot, "bp_template", "skills");
99
- }
100
- /** This session's own skill dir (`.bp/<sid>/skills/`), overrides/augments the template. */
101
- sessionSkillsDir(sid) {
102
- return join(this.bpDir(sid), "skills");
103
- }
104
217
  /** User-editable persona override for an agent (`bp_template/agents/<name>/prompt.md`). */
105
218
  agentPromptPath(name) {
106
219
  return join(this.dataRoot, "bp_template", "agents", name, "prompt.md");
@@ -183,6 +296,107 @@ export class SessionManager {
183
296
  return false;
184
297
  }
185
298
  }
299
+ /**
300
+ * #47: write an uploaded file into the session workspace. Content arrives
301
+ * base64-encoded (binary-safe over the JSON byte chain). The same
302
+ * `resolveWorkspacePath` guard prevents path traversal; parent dirs are
303
+ * created so an upload like `docs/foo.pdf` works. The file lands in the
304
+ * agent's cwd, so it can `read` it by its workspace-relative path.
305
+ * `maxBytes` (default 20 MiB) bounds the decoded size.
306
+ */
307
+ async writeSessionFile(sid, rel, contentBase64, maxBytes = 20 * 1024 * 1024) {
308
+ const buf = Buffer.from(contentBase64, "base64");
309
+ if (buf.byteLength > maxBytes) {
310
+ throw new Error(`file too large: ${buf.byteLength} bytes exceeds limit of ${maxBytes}`);
311
+ }
312
+ const abs = this.resolveWorkspacePath(sid, rel);
313
+ await mkdir(dirname(abs), { recursive: true });
314
+ await writeFile(abs, buf);
315
+ // Return the workspace-relative path (strip the absolute root prefix).
316
+ const root = this.workspaceDir(sid);
317
+ const relOut = abs === root ? "" : abs.slice(root.length + 1);
318
+ return { path: relOut, size: buf.byteLength };
319
+ }
320
+ /**
321
+ * #60: drain the composer upload staging area (`workspaces/local/`) into a
322
+ * real session's workspace so the agent — whose cwd is `workspaces/<sid>/` —
323
+ * can read files the user attached in the draft composer (when no real
324
+ * session id existed yet, the web uploads against the literal `"local"`
325
+ * sandbox id). Called right before the agent runs.
326
+ *
327
+ * Move semantics: each staged entry is renamed into the session workspace
328
+ * (an existing same-named entry in the session is left untouched and the
329
+ * staged copy is discarded), then the staging area is emptied so files never
330
+ * leak into the next session. No-op when the target IS the staging sid, or
331
+ * when the staging dir is missing/empty. Best-effort: never throws — a copy
332
+ * failure must not block the user's prompt.
333
+ */
334
+ async drainLocalUploads(sessionId) {
335
+ if (sessionId === SessionManager.UPLOAD_STAGING_SID)
336
+ return;
337
+ const stagingDir = this.workspaceDir(SessionManager.UPLOAD_STAGING_SID);
338
+ let names;
339
+ try {
340
+ names = await readdir(stagingDir);
341
+ }
342
+ catch {
343
+ return; // no staging dir → nothing was uploaded in the draft
344
+ }
345
+ if (names.length === 0)
346
+ return;
347
+ const destDir = this.workspaceDir(sessionId);
348
+ try {
349
+ await mkdir(destDir, { recursive: true });
350
+ }
351
+ catch {
352
+ /* best-effort */
353
+ }
354
+ for (const name of names) {
355
+ const from = join(stagingDir, name);
356
+ const to = join(destDir, name);
357
+ try {
358
+ // Don't clobber an existing session file; just drop the staged copy.
359
+ let exists = false;
360
+ try {
361
+ await stat(to);
362
+ exists = true;
363
+ }
364
+ catch {
365
+ /* target absent → safe to move */
366
+ }
367
+ if (exists) {
368
+ await rm(from, { recursive: true, force: true });
369
+ continue;
370
+ }
371
+ await rename(from, to);
372
+ }
373
+ catch {
374
+ // rename failed (e.g. cross-device, or `from` is a directory on some
375
+ // platforms): fall back to a content copy so the file still reaches the
376
+ // session, then remove the staged copy. Best-effort, never throws.
377
+ try {
378
+ await this.copyEntry(from, to);
379
+ await rm(from, { recursive: true, force: true });
380
+ }
381
+ catch {
382
+ /* give up on this entry */
383
+ }
384
+ }
385
+ }
386
+ }
387
+ /** Recursively copy a file or directory tree (drainLocalUploads fallback). */
388
+ async copyEntry(from, to) {
389
+ const st = await stat(from);
390
+ if (st.isDirectory()) {
391
+ await mkdir(to, { recursive: true });
392
+ for (const child of await readdir(from)) {
393
+ await this.copyEntry(join(from, child), join(to, child));
394
+ }
395
+ return;
396
+ }
397
+ await mkdir(dirname(to), { recursive: true });
398
+ await writeFile(to, await readFile(from));
399
+ }
186
400
  /**
187
401
  * Resolve an agent's system persona. Prefers the user-editable on-disk
188
402
  * `bp_template/agents/<name>/prompt.md` (so personas can be tuned without a
@@ -190,25 +404,38 @@ export class SessionManager {
190
404
  * file is present or it's empty.
191
405
  */
192
406
  async loadPersona(name, role) {
407
+ let base;
193
408
  try {
194
409
  const raw = (await readFile(this.agentPromptPath(name), "utf8")).trim();
195
410
  if (raw)
196
- return raw;
411
+ base = raw;
197
412
  }
198
413
  catch {
199
414
  // No on-disk override — fall through to the built-in persona.
200
415
  }
201
- return personaFor(name, role);
416
+ // #97: append the language-following directive here (not in the persona text
417
+ // / on-disk prompt.md) so it also reaches users who scaffolded earlier, and
418
+ // applies whether the persona came from disk or the built-in constant.
419
+ return withLanguageDirective(base ?? personaFor(name, role));
202
420
  }
203
421
  /* ---------------------------- session CRUD ---------------------------- */
204
- async createSession(input = {}) {
422
+ async createSession(input = {},
423
+ /**
424
+ * Internal restore path (see `restoreFromDisk`): when provided, the entry
425
+ * inherits the on-disk meta.json timestamps verbatim instead of stamping
426
+ * fresh ones, and `writeMeta` is skipped so the canonical file is not
427
+ * clobbered with boot-time values. Public callers should not pass this.
428
+ */
429
+ _restore) {
205
430
  if (this.memWatchdog?.isOverSoftLimit()) {
206
431
  throw new Error("memory budget exceeded: refusing new session");
207
432
  }
208
433
  const id = input.id ?? randomUUID();
209
434
  if (this.sessions.has(id))
210
435
  return this.toSession(this.sessions.get(id));
211
- const nowIso = new Date().toISOString();
436
+ const nowIso = _restore ? _restore.updatedAt : new Date().toISOString();
437
+ const createdAt = _restore ? _restore.createdAt : nowIso;
438
+ const lastActivityAt = _restore ? _restore.lastActivityAt : Date.now();
212
439
  const persistBase = this.persist ? this.bpDir(id) : undefined;
213
440
  // Provider ref: explicit input wins; otherwise reuse an existing on-disk ref
214
441
  // (restore path) so reviving a session never clobbers its chosen model.
@@ -220,35 +447,50 @@ export class SessionManager {
220
447
  : {};
221
448
  const bus = new EventBus({ persistPath: persistBase ? join(persistBase, "events.jsonl") : undefined });
222
449
  const mailbox = new Mailbox(id, persistBase ? join(persistBase, "mailbox") : undefined);
223
- const trace = new GraphOfTrace(id, persistBase ? join(persistBase, "trace.json") : undefined);
450
+ // #79: push every trace mutation to the SSE stream as CUSTOM:trace_node so
451
+ // the web Graph of Trace updates live instead of polling. The store stays
452
+ // bus-agnostic; the manager owns the wire shape.
453
+ const trace = new GraphOfTrace(id, persistBase ? join(persistBase, "trace.json") : undefined, (op, node) => {
454
+ bus.emit(ev.custom({ sessionId: id }, CUSTOM_EVENT.TRACE_NODE, { op, node }));
455
+ });
224
456
  const entry = {
225
457
  id,
226
458
  title: input.title ?? "Untitled session",
227
- createdAt: nowIso,
459
+ createdAt,
228
460
  updatedAt: nowIso,
229
- lastActivityAt: Date.now(),
461
+ lastActivityAt,
230
462
  bus,
231
463
  mailbox,
232
464
  trace,
233
465
  agents: new Map(),
234
466
  tasks: new Map(),
467
+ deliveryErrors: new Map(),
468
+ delegators: new Map(),
235
469
  runActive: false,
236
470
  activeRunId: null,
471
+ pendingInputs: new Map(),
237
472
  providerRef,
473
+ tokenUsage: { total: emptyTokenUsage(), byAgent: {} },
238
474
  };
239
475
  this.sessions.set(id, entry);
240
- this.touch(entry);
476
+ if (!_restore)
477
+ this.touch(entry);
478
+ else
479
+ this.lastActivityAt = entry.lastActivityAt;
241
480
  if (this.persist) {
242
481
  await mkdir(join(this.bpDir(id), "history"), { recursive: true });
243
- await mkdir(this.sessionSkillsDir(id), { recursive: true });
244
482
  await mkdir(this.workspaceDir(id), { recursive: true });
245
- await this.writeMeta(entry);
483
+ // On restore, meta.json on disk is the authority — do not write it back.
484
+ if (!_restore)
485
+ await this.writeMeta(entry);
246
486
  // Only (re)write the ref when the caller chose one — restore must not
247
487
  // clobber an existing ref with an empty object.
248
488
  if (explicitRef)
249
489
  await this.writeProviderRef(entry);
250
490
  await mailbox.recover();
251
491
  await this.loadTrace(entry);
492
+ // Rehydrate cumulative token usage so the running total survives restarts.
493
+ await this.loadUsage(entry);
252
494
  }
253
495
  return this.toSession(entry);
254
496
  }
@@ -303,12 +545,16 @@ export class SessionManager {
303
545
  await e.mailbox.flush();
304
546
  await e.trace.flush();
305
547
  e.bus.clear();
548
+ for (const [id2, d] of e.pendingInputs) {
549
+ d.reject(new Error("evicted"));
550
+ e.pendingInputs.delete(id2);
551
+ }
306
552
  this.sessions.delete(id);
307
553
  return { evicted: true, agentsKilled: killed };
308
554
  }
309
555
  /* ----------------------------- messaging ----------------------------- */
310
556
  /** Send a user message to an agent (default principal). §7 L3 isolated. */
311
- async sendMessage(sessionId, content, agentName = "principal") {
557
+ async sendMessage(sessionId, content, agentName = "principal", opts = {}) {
312
558
  const entry = this.sessions.get(sessionId);
313
559
  if (!entry)
314
560
  throw new Error(`session not found: ${sessionId}`);
@@ -323,9 +569,25 @@ export class SessionManager {
323
569
  return { accepted: false };
324
570
  }
325
571
  const agent = await this.ensureAgent(sessionId, agentName);
572
+ // #60: pull any composer uploads staged under workspaces/local/ into this
573
+ // session's workspace (the agent's cwd) before it runs, so it can read the
574
+ // file the user just attached. No-op when nothing was staged.
575
+ await this.drainLocalUploads(sessionId);
326
576
  entry.runActive = true;
327
577
  entry.activeRunId = `run_${randomUUID()}`;
328
578
  const runId = entry.activeRunId;
579
+ // #70: emit an initial session_state frame here — onStatusChange only fires
580
+ // on a status *change*, and ensureAgent creates the agent as idle without
581
+ // emitting, so without this the panel stays empty until the first
582
+ // setStatus("running"). This first frame carries runState.active=true + the
583
+ // freshly-ensured agent.
584
+ this.emitSessionState(entry);
585
+ // issue #42: persist + broadcast the user's own prompt as a role:"user"
586
+ // CHUNK *before* the agent runs, so SSE replay reconstructs the full
587
+ // transcript (user + assistant). The web composer's optimistic bubble uses
588
+ // the same `uuid`, so the reducer dedupes the replayed event by id rather
589
+ // than duplicating it. Fall back to a fresh id if the client omitted one.
590
+ entry.bus.emit(ev.textMessageChunk({ sessionId, agentName, runId }, opts.uuid ?? randomUUID(), content, "user"));
329
591
  // Fire-and-track: don't block the HTTP response on the full run.
330
592
  void agent
331
593
  .prompt(content)
@@ -336,22 +598,192 @@ export class SessionManager {
336
598
  entry.runActive = false;
337
599
  entry.activeRunId = null;
338
600
  this.touch(entry);
601
+ // #76: re-evaluate the derived run-active flag now that the user-prompt
602
+ // correlation is cleared. For a direct reply this yields the terminal
603
+ // active=false frame; for a delegation a pending delivery loop keeps it
604
+ // true (the loop emits its own terminal frame when it drains).
605
+ this.emitSessionState(entry);
339
606
  });
340
607
  return { accepted: true, runId };
341
608
  }
342
- /** Interrupt a session (or a specific agent). */
609
+ /**
610
+ * Ask the terminal user a question on behalf of `agent`. Emits a
611
+ * `user_input_request` event and returns a promise that resolves when
612
+ * `resolveInput` is called with the matching request_id, or rejects if the
613
+ * session is interrupted/evicted. Blocks the calling tool's turn.
614
+ */
615
+ requestUserInput(entry, agent, req) {
616
+ const requestId = `req_${randomUUID()}`;
617
+ const deferred = makeDeferred();
618
+ entry.pendingInputs.set(requestId, deferred);
619
+ entry.bus.emit(ev.userInputRequest({ sessionId: entry.id, runId: entry.activeRunId ?? undefined }, { request_id: requestId, agent, question: req.question, options: req.options, allow_free_text: req.allow_free_text }));
620
+ return deferred.promise;
621
+ }
622
+ /**
623
+ * Resolve an outstanding ask_user request. Returns false when the session or
624
+ * request_id is unknown/already consumed (stale answer). Pure lookup; never
625
+ * throws — the server handles 404 for unknown sessions before calling.
626
+ */
627
+ resolveInput(sessionId, requestId, answer) {
628
+ const entry = this.sessions.get(sessionId);
629
+ const deferred = entry?.pendingInputs.get(requestId);
630
+ if (!entry || !deferred)
631
+ return false;
632
+ entry.pendingInputs.delete(requestId);
633
+ deferred.resolve(answer);
634
+ this.touch(entry);
635
+ return true;
636
+ }
637
+ /**
638
+ * Interrupt a session (or a specific agent).
639
+ *
640
+ * Targeted (`agentName` given): abort just that agent. Mailboxes and the
641
+ * principal are left untouched — a narrow "stop this one expert" contract.
642
+ *
643
+ * Whole-session (`agentName` omitted, the Stop button — #90): abort EVERY
644
+ * agent (incl. their running script subprocesses, via Pi `session.abort()`),
645
+ * then clear ALL mailboxes so a queued message can't re-wake a stopped agent,
646
+ * surface a user-facing system_message, and immediately prompt the principal
647
+ * one run with an interrupt notice so PI knows the user interrupted and should
648
+ * await further instructions.
649
+ */
343
650
  async interrupt(sessionId, agentName) {
344
651
  const entry = this.sessions.get(sessionId);
345
652
  if (!entry)
346
653
  return false;
654
+ const wholeSession = agentName === undefined;
347
655
  const targets = agentName ? [entry.agents.get(agentName)].filter(Boolean) : [...entry.agents.values()];
348
- for (const a of targets)
349
- await a.abort();
656
+ // Reject any pending ask_user FIRST: a prompt blocked awaiting user input
657
+ // would never settle, so abort()'s waitForIdle (#101) must not run before
658
+ // these are unblocked or it would deadlock.
659
+ for (const [id, d] of entry.pendingInputs) {
660
+ d.reject(new Error("interrupted"));
661
+ entry.pendingInputs.delete(id);
662
+ }
663
+ // Abort every target and WAIT for each in-flight run to fully settle (#101)
664
+ // — RUN_FINISHED emitted, status settled, provider stream fenced — so the
665
+ // interrupt-notice run below can't race the old run ("already processing").
666
+ await Promise.all(targets.map((a) => a.abort()));
350
667
  entry.runActive = false;
351
668
  entry.activeRunId = null;
669
+ if (wholeSession) {
670
+ // Clear every inbox BEFORE notifying PI: otherwise a queued task_delegate
671
+ // would re-wake the expert the user just stopped.
672
+ await entry.mailbox.clearAll();
673
+ entry.bus.emit(ev.systemMessage(sessionId, "info", "⏹️ 用户已中断当前任务,信箱已清空,正在等候进一步指示。", {
674
+ agent: "principal",
675
+ recoverable: true,
676
+ }));
677
+ this.notifyPrincipalInterrupted(entry);
678
+ }
352
679
  return targets.length > 0;
353
680
  }
681
+ /**
682
+ * #90: after a whole-session Stop, prompt the principal one run with an
683
+ * interrupt notice. Mirrors `sendMessage`'s fire-and-track run accounting but
684
+ * emits NO role:"user" text chunk — the notice is system context, not a user
685
+ * bubble. The principal should acknowledge briefly and await the user.
686
+ */
687
+ notifyPrincipalInterrupted(entry) {
688
+ const notice = "<system_notice>\n" +
689
+ " The user interrupted the current task. All running agents were stopped " +
690
+ "and every mailbox was cleared, so any in-flight delegation is cancelled. " +
691
+ "Do not resume or re-delegate the prior work. Briefly acknowledge the " +
692
+ "interruption and wait for the user's next instruction.\n" +
693
+ "</system_notice>";
694
+ void this.ensureAgent(entry.id, "principal")
695
+ .then((agent) => {
696
+ entry.runActive = true;
697
+ entry.activeRunId = `run_${randomUUID()}`;
698
+ this.emitSessionState(entry);
699
+ return agent.prompt(notice).finally(() => {
700
+ entry.runActive = false;
701
+ entry.activeRunId = null;
702
+ this.touch(entry);
703
+ this.emitSessionState(entry);
704
+ });
705
+ })
706
+ .catch(() => {
707
+ /* error-isolated: prompt() never throws, ensureAgent failure is best-effort */
708
+ });
709
+ }
710
+ /** Test/diagnostic accessor: number of queued messages in `agent`'s inbox. */
711
+ mailboxCount(sessionId, agent) {
712
+ return this.sessions.get(sessionId)?.mailbox.count(agent) ?? 0;
713
+ }
354
714
  /* ------------------------------ agents ------------------------------- */
715
+ /**
716
+ * Wrap a SystemTool so its execute() results are guarded against overflowing
717
+ * the model's context window (issue #80). When truncation triggers, the full
718
+ * result is saved to `<workspace>/.truncated/` and a system_message warning
719
+ * is emitted. No-op when maxToolResultTokens is 0.
720
+ */
721
+ wrapToolWithTruncation(tool, sessionId, bus) {
722
+ if (this.maxToolResultTokens <= 0)
723
+ return tool;
724
+ const maxTokens = this.maxToolResultTokens;
725
+ const saveFullResult = (origResult) => this.truncateToolResult(tool.name, sessionId, bus, origResult, maxTokens);
726
+ const originalExecute = tool.execute.bind(tool);
727
+ return {
728
+ name: tool.name,
729
+ description: tool.description,
730
+ parameters: tool.parameters,
731
+ execute: async (params) => {
732
+ const result = await originalExecute(params);
733
+ if (result.isError)
734
+ return result; // never truncate error messages
735
+ return saveFullResult(result);
736
+ },
737
+ };
738
+ }
739
+ /**
740
+ * Estimate tokens in a tool result, truncate if over budget, save the full
741
+ * content to the session workspace, and emit a warning event.
742
+ */
743
+ async truncateToolResult(toolName, sessionId, bus, result, maxTokens) {
744
+ // Concatenate all text blocks to estimate total tokens.
745
+ const fullText = result.content.map((c) => c.text).join("");
746
+ const estimated = estimateTokens(fullText);
747
+ if (estimated <= maxTokens)
748
+ return result;
749
+ // Truncate at ~maxTokens chars (conservative).
750
+ const maxChars = maxTokens * 3.5;
751
+ const truncatedText = fullText.slice(0, Math.floor(maxChars));
752
+ const now = new Date().toISOString();
753
+ const ts = now.replace(/[:.]/g, "-");
754
+ const fname = `${sanitiseFilename(toolName)}_${ts}.json`;
755
+ const relPath = `.truncated/${fname}`;
756
+ // Save full content to workspace.
757
+ try {
758
+ const absDir = join(this.workspaceDir(sessionId), ".truncated");
759
+ await mkdir(absDir, { recursive: true });
760
+ const saved = {
761
+ tool: toolName,
762
+ truncatedAt: now,
763
+ originalBytes: Buffer.byteLength(fullText),
764
+ truncatedBytes: Buffer.byteLength(truncatedText),
765
+ estimatedTokens: estimated,
766
+ maxTokens,
767
+ content: fullText,
768
+ };
769
+ await writeFile(join(absDir, fname), JSON.stringify(saved, null, 2), "utf8");
770
+ }
771
+ catch {
772
+ // Best-effort — never block the agent on file I/O.
773
+ }
774
+ // Emit warning.
775
+ bus.emit(ev.systemMessage(sessionId, "warning", `⚠️ 工具 ${toolName} 返回结果过大 ` +
776
+ `(原始约 ${estimated} tokens / ${formatBytes(Buffer.byteLength(fullText))}),` +
777
+ `已截断至约 ${estimateTokens(truncatedText)} tokens。` +
778
+ `完整结果已保存至 workspace/${relPath}`, { recoverable: true }));
779
+ const notice = `\n\n---\n` +
780
+ `[⚠️ 结果已截断: 原始 ${estimated} tokens / ${formatBytes(Buffer.byteLength(fullText))} → ` +
781
+ `截断后 ${estimateTokens(truncatedText)} tokens。` +
782
+ `完整内容已保存至 workspace/${relPath} ,可用 read 工具读取]`;
783
+ return {
784
+ content: [{ type: "text", text: truncatedText + notice }],
785
+ };
786
+ }
355
787
  /** Ensure an agent exists (create or resurrect). */
356
788
  async ensureAgent(sessionId, name) {
357
789
  const entry = this.sessions.get(sessionId);
@@ -372,11 +804,24 @@ export class SessionManager {
372
804
  destroyAgent: async (target) => {
373
805
  await this.destroyAgent(sessionId, target);
374
806
  },
807
+ wakeAgent: (target) => this.wakeAgent(sessionId, target),
808
+ requestUserInput: (req) => this.requestUserInput(entry, name, req),
809
+ routerSkillsDir: this.routerSkillsDir,
375
810
  };
376
811
  const systemTools = systemToolsForRole(role, name, deps);
377
812
  // External MCP tools go to non-trace agents (trace agent is graph-only, §9).
378
813
  const mcpTools = role === "trace" ? [] : await this.ensureMcpTools();
379
- const agentTools = [...systemTools, ...mcpTools];
814
+ const rawTools = [...systemTools, ...mcpTools];
815
+ // Built-in skills are loaded by Pi natively (not as tools). Materialize the
816
+ // bundled content into bp_template/skills once, then hand the dir to the
817
+ // factory as additionalSkillPaths. Trace agent is skill-less (graph-only).
818
+ let skillPaths;
819
+ if (role !== "trace") {
820
+ await this.ensureSkillsMaterialized();
821
+ skillPaths = [this.skillsDir];
822
+ }
823
+ // #80: guard every tool result against context-window overflow.
824
+ const agentTools = rawTools.map((t) => this.wrapToolWithTruncation(t, sessionId, entry.bus));
380
825
  const builtins = builtinToolNamesForRole(role, name);
381
826
  const allowedToolNames = [...builtins, ...agentTools.map((t) => t.name)];
382
827
  // Resolve this session's provider against the SSOT (providers.json). When
@@ -391,8 +836,15 @@ export class SessionManager {
391
836
  systemTools: agentTools,
392
837
  allowedToolNames,
393
838
  systemPrompt: await this.loadPersona(name, role),
394
- skillPaths: [this.templateSkillsDir(), this.sessionSkillsDir(sessionId)],
839
+ skillPaths,
395
840
  providerConfig,
841
+ // 意图二 fallback: the trace-reminder extension calls this when an expert
842
+ // was reminded once and still didn't report back, so the principal never
843
+ // dead-waits on a silent expert.
844
+ onUnreplied: (agentName) => this.writeFallbackToDelegator(entry, agentName),
845
+ // #97: only the principal gets the live team-status block injected each
846
+ // turn (it is the coordinator). Other roles run without it.
847
+ renderAgentStatus: name === "principal" ? () => this.renderAgentStatus(entry) : undefined,
396
848
  });
397
849
  const agent = new MasAgent({
398
850
  sessionId,
@@ -400,13 +852,72 @@ export class SessionManager {
400
852
  role,
401
853
  session,
402
854
  bus: entry.bus,
403
- onStatusChange: () => this.touch(entry),
855
+ // #70: keep the touch (idle-reclaim) AND push an authoritative live
856
+ // snapshot so the web Agents panel updates without a reload/reselect.
857
+ // setStatus early-returns on no-op transitions, so this never storms.
858
+ onStatusChange: () => {
859
+ this.touch(entry);
860
+ this.emitSessionState(entry);
861
+ },
862
+ // Roll the agent's running total into the per-session breakdown, push a
863
+ // live session_state frame, and persist usage.json. Total is recomputed
864
+ // as the sum across agents so it can never drift from the breakdown.
865
+ onUsage: (agentName, _delta, cumulative) => {
866
+ entry.tokenUsage.byAgent[agentName] = cumulative;
867
+ entry.tokenUsage.total = sumAgentUsage(entry.tokenUsage.byAgent);
868
+ this.touch(entry);
869
+ this.emitSessionState(entry);
870
+ void this.writeUsage(entry);
871
+ },
404
872
  });
873
+ // Continue this agent's cumulative count across restarts / lazy revival.
874
+ agent.seedUsage(entry.tokenUsage.byAgent[name]);
405
875
  entry.agents.set(name, agent);
406
876
  if (!entry.tasks.has(name))
407
877
  entry.tasks.set(name, "");
408
878
  return agent;
409
879
  }
880
+ /**
881
+ * 意图二 fallback — the trace-reminder extension calls this (via the factory's
882
+ * `onUnreplied`) when an expert was reminded once and STILL did not
883
+ * `send_message` its delegator (the "silence" path; a hard *error* run is
884
+ * handled separately). We write a NEUTRAL system note into the REAL delegator's
885
+ * mailbox and wake it so it never dead-waits. The delegator is whoever last
886
+ * delegated to this expert (#97 directed escalation), falling back to the
887
+ * principal. This fires during the expert's run (before the clean-run cleanup
888
+ * in `runDeliveryLoop`), so the delegator record is still present. The note
889
+ * only states the fact — the expert ended without delivering a result — and
890
+ * deliberately gives NO directive ("re-delegate", "proceed without it"): the
891
+ * delegator decides what to do. Best-effort: a failed write must never break
892
+ * the agent loop.
893
+ */
894
+ writeFallbackToDelegator(entry, expert) {
895
+ const to = this.delegatorFor(entry, expert);
896
+ void entry.mailbox
897
+ .write({
898
+ fromAgent: "system",
899
+ toAgent: to,
900
+ msgType: "system",
901
+ content: `[系统通知] 专家 "${expert}" 结束了本次任务但未回交结果。`,
902
+ })
903
+ .then(() => this.wakeAgent(entry.id, to))
904
+ .catch(() => {
905
+ /* best-effort */
906
+ });
907
+ }
908
+ /**
909
+ * #97: snapshot the live team status for injection into the principal's turn
910
+ * (via the agent-status extension's Pi `context` hook). Lists every agent —
911
+ * INCLUDING the principal itself, so it sees its own inbox backlog — with its
912
+ * authoritative status and the number of messages still queued unread in its
913
+ * inbox (`mailbox.count`). Excludes the trace agent (an internal recorder) and
914
+ * any stopped agent (destroyed; irrelevant to current coordination). Returns
915
+ * "" when nothing is worth reporting so the extension injects nothing.
916
+ */
917
+ renderAgentStatus(entry) {
918
+ const lines = collectAgentStatusLines(entry.agents.values(), (name) => entry.mailbox.count(name));
919
+ return renderAgentStatusBlock(lines);
920
+ }
410
921
  async destroyAgent(sessionId, name) {
411
922
  const entry = this.sessions.get(sessionId);
412
923
  if (!entry)
@@ -417,6 +928,198 @@ export class SessionManager {
417
928
  agent.stop();
418
929
  entry.agents.delete(name); // history on disk is kept (§5).
419
930
  }
931
+ /* ------------------------- mailbox delivery (#76) ------------------------- */
932
+ /**
933
+ * #76: wake `name` to consume its mailbox. Fire-and-forget — `send_message`
934
+ * calls this after writing; the actual run happens in a serial delivery loop.
935
+ * The re-entrancy guard (`deliveryLoops`) means concurrent wakes for the same
936
+ * agent collapse into the one already-running loop (which re-drains after each
937
+ * turn), so an agent's `prompt` is never invoked concurrently.
938
+ */
939
+ wakeAgent(sessionId, name) {
940
+ const key = `${sessionId}:${name}`;
941
+ if (this.deliveryLoops.has(key))
942
+ return;
943
+ this.deliveryLoops.add(key);
944
+ void this.runDeliveryLoop(sessionId, name).finally(() => {
945
+ this.deliveryLoops.delete(key);
946
+ // Emit a final frame AFTER the key is gone: the agent's own running→idle
947
+ // transition fired emitSessionState while this key was still present (so
948
+ // that frame still read active via the pending-delivery check). Without
949
+ // this trailing frame the derived run-active flag would stay stuck true.
950
+ const entry = this.sessions.get(sessionId);
951
+ if (entry)
952
+ this.emitSessionState(entry);
953
+ // Re-check after releasing the guard: a message could have been written
954
+ // between the loop's final empty read and this delete, and that writer's
955
+ // wakeAgent would have bailed (key still present) — leaving the message
956
+ // unread. Re-wake if the inbox is non-empty so it never strands.
957
+ if (entry && entry.mailbox.count(name) > 0)
958
+ this.wakeAgent(sessionId, name);
959
+ });
960
+ }
961
+ /**
962
+ * Drain `name`'s inbox and run it, looping so messages that arrive *during* a
963
+ * turn are picked up without a second external wake. Each iteration atomically
964
+ * drains the inbox, ensures the agent, wraps the messages as
965
+ * `<message_envelope>`s (the format the A2A persona documents), and prompts.
966
+ * `MasAgent.prompt` is error-isolated (never throws), so a failed expert turn
967
+ * ends the loop cleanly rather than rejecting. A `session_state` frame is
968
+ * emitted on entry and exit so the derived run-active flag reflects the
969
+ * delegated work even across the await gap between the sender finishing and
970
+ * the target starting.
971
+ */
972
+ async runDeliveryLoop(sessionId, name) {
973
+ for (;;) {
974
+ const entry = this.sessions.get(sessionId);
975
+ if (!entry)
976
+ return;
977
+ const msgs = await entry.mailbox.readBatch(name); // bounded FIFO batch (#76)
978
+ if (msgs.length === 0)
979
+ return;
980
+ const agent = await this.ensureAgent(sessionId, name);
981
+ if (agent.status === "stopped")
982
+ return;
983
+ // #97 directed escalation: remember who delegated this work (the last
984
+ // task_delegate in the batch). Self-retry nudges are msgType "system", so
985
+ // they never overwrite a real delegator recorded on the original task.
986
+ const delegated = [...msgs].reverse().find((m) => m.msgType === "task_delegate");
987
+ if (delegated)
988
+ entry.delegators.set(name, delegated.fromAgent);
989
+ this.touch(entry);
990
+ // Surface the delegated run immediately (derived active flag, agent list).
991
+ this.emitSessionState(entry);
992
+ await agent.prompt(this.renderEnvelopes(msgs, name));
993
+ // #97 error path. A delegated run that ended in `error` is handled here
994
+ // (the trace-reminder extension bails on an errored run, leaving the host
995
+ // the sole owner of error recovery). Transient errors self-retry up to a
996
+ // cap; fatal errors (auth/config) and the exhausted cap escalate to the
997
+ // principal. A clean run resets the agent's consecutive-error count.
998
+ if (agent.status === "error" && agent.role === "expert") {
999
+ if (this.handleDeliveryError(entry, agent))
1000
+ continue; // self-retry queued
1001
+ return; // escalated — nothing more to drain for this agent
1002
+ }
1003
+ entry.deliveryErrors.delete(name); // clean run → reset the streak
1004
+ entry.delegators.delete(name); // and forget the delegator (task done)
1005
+ }
1006
+ }
1007
+ /**
1008
+ * #97: react to a failed delegated expert run. Returns true when a self-retry
1009
+ * was queued (the loop should continue and re-drain the agent's own inbox),
1010
+ * false when the failure was escalated to the principal (the loop should stop).
1011
+ *
1012
+ * Policy:
1013
+ * - `retryable` (rate limit / 5xx / network) AND under the retry cap →
1014
+ * re-wake the SAME expert with a neutral system nudge in its own inbox, and
1015
+ * surface a `warning` to the user ("retrying n/N"). Re-running may succeed.
1016
+ * - `fatal` (auth / missing key / forbidden), OR the cap is reached →
1017
+ * escalate: write a NEUTRAL error note to the principal's mailbox + wake it,
1018
+ * surface an `error` to the user, and reset the streak so a future task to
1019
+ * this expert starts fresh.
1020
+ */
1021
+ handleDeliveryError(entry, agent) {
1022
+ const name = agent.name;
1023
+ const count = (entry.deliveryErrors.get(name) ?? 0) + 1;
1024
+ entry.deliveryErrors.set(name, count);
1025
+ const kind = agent.lastErrorKind ?? "retryable";
1026
+ const headline = agent.state().lastError?.message ?? "未知错误";
1027
+ if (kind === "retryable" && count < SessionManager.MAX_DELIVERY_RETRIES) {
1028
+ entry.bus.emit(ev.systemMessage(entry.id, "warning", `专家 "${name}" 执行任务时出错,正在自动重试 (${count}/${SessionManager.MAX_DELIVERY_RETRIES})…`, { agent: name, recoverable: true }));
1029
+ // Re-wake the SAME expert via its own inbox: a neutral, directive-free
1030
+ // nudge. The expert retains its prior conversation context, so it knows
1031
+ // what it was attempting; we only signal "the last attempt failed, try
1032
+ // again". Returning true lets the loop re-drain this note immediately.
1033
+ void entry.mailbox
1034
+ .write({
1035
+ fromAgent: "system",
1036
+ toAgent: name,
1037
+ msgType: "system",
1038
+ content: `[系统通知] 上一次任务执行出错(${headline})。请重试。`,
1039
+ })
1040
+ .catch(() => {
1041
+ /* best-effort */
1042
+ });
1043
+ return true;
1044
+ }
1045
+ // Fatal, or retries exhausted → escalate to the real delegator and stop.
1046
+ const delegator = this.delegatorFor(entry, name);
1047
+ const target = delegator === "principal" ? "主管" : `委派方 "${delegator}"`;
1048
+ entry.bus.emit(ev.systemMessage(entry.id, "error", kind === "fatal"
1049
+ ? `专家 "${name}" 发生无法自动恢复的错误,已上报${target}。`
1050
+ : `专家 "${name}" 连续 ${count} 次执行失败,已上报${target}。`, { agent: name, recoverable: true }));
1051
+ this.writeErrorToDelegator(entry, name, headline);
1052
+ entry.deliveryErrors.delete(name); // reset streak for a future task
1053
+ entry.delegators.delete(name); // delegator notified; forget it
1054
+ return false;
1055
+ }
1056
+ /**
1057
+ * #97 directed escalation: resolve who an expert's failure/silence should be
1058
+ * reported to. Returns the recorded delegator ONLY when it is a still-live,
1059
+ * non-trace agent other than the expert itself (a destroyed/stopped delegator
1060
+ * would be wrongly resurrected by the wake, and a self/system target is
1061
+ * nonsensical). Otherwise falls back to `principal`, the root coordinator,
1062
+ * which always exists and owns un-rooted work.
1063
+ */
1064
+ delegatorFor(entry, expert) {
1065
+ const d = entry.delegators.get(expert);
1066
+ if (!d || d === expert || d === "system" || d === "principal")
1067
+ return "principal";
1068
+ const agent = entry.agents.get(d);
1069
+ if (!agent || agent.status === "stopped" || agent.role === "trace")
1070
+ return "principal";
1071
+ return d;
1072
+ }
1073
+ /**
1074
+ * #97 error escalation: write a NEUTRAL, error-flavored system note into the
1075
+ * REAL delegator's mailbox and wake it, so whoever delegated the work (the
1076
+ * principal, or another agent in a chain like auditor→engineer) learns the
1077
+ * expert failed rather than dead-waiting. Distinct from
1078
+ * `writeFallbackToDelegator` (the "silence" path): this one states an ERROR
1079
+ * occurred and carries the error headline as context, but — like the silence
1080
+ * note — gives NO directive ("re-delegate" / "proceed"): the delegator decides.
1081
+ * Best-effort; never breaks the loop.
1082
+ */
1083
+ writeErrorToDelegator(entry, expert, headline) {
1084
+ const to = this.delegatorFor(entry, expert);
1085
+ void entry.mailbox
1086
+ .write({
1087
+ fromAgent: "system",
1088
+ toAgent: to,
1089
+ msgType: "system",
1090
+ content: `[系统通知] 专家 "${expert}" 在执行任务时发生错误,未能产出结果。错误:${headline}`,
1091
+ })
1092
+ .then(() => this.wakeAgent(entry.id, to))
1093
+ .catch(() => {
1094
+ /* best-effort */
1095
+ });
1096
+ }
1097
+ /**
1098
+ * Wrap drained mailbox messages in the `<message_envelope>` header the A2A
1099
+ * persona (`personas.ts`) tells agents to expect, so the model knows who sent
1100
+ * each message and why. User-origin messages declare `<source type="user"/>`;
1101
+ * agent-origin ones name the sender.
1102
+ *
1103
+ * 意图一·触发点2 (Pi-native hooks): when the PRINCIPAL receives a message from
1104
+ * another agent (not the user — i.e. an expert reporting back), append a single
1105
+ * static line nudging it to record_trace any real decision it makes while
1106
+ * processing the reply. Stateless, loop-free (at most one line per delivery).
1107
+ */
1108
+ renderEnvelopes(msgs, toAgent) {
1109
+ const body = msgs
1110
+ .map((m) => {
1111
+ const source = m.msgType === "user_message"
1112
+ ? `<source type="user" />`
1113
+ : `<source type="agent" name="${m.fromAgent}" />`;
1114
+ return `<message_envelope>\n ${source}\n <type>${m.msgType}</type>\n</message_envelope>\n${m.content}`;
1115
+ })
1116
+ .join("\n\n");
1117
+ const fromAgent = msgs.some((m) => m.msgType !== "user_message");
1118
+ if (toAgent === "principal" && fromAgent) {
1119
+ return `${body}\n\n[提醒:处理完这些消息后,如有实质决策请调用 record_trace 记录。]`;
1120
+ }
1121
+ return body;
1122
+ }
420
1123
  /* -------------------------- state authority -------------------------- */
421
1124
  /** §10 polling fallback: list agents with authoritative status. */
422
1125
  listAgents(sessionId) {
@@ -435,14 +1138,61 @@ export class SessionManager {
435
1138
  return out;
436
1139
  });
437
1140
  }
1141
+ /**
1142
+ * #76: a session is "running" whenever ANY non-trace agent is running, or a
1143
+ * mailbox delivery loop is pending for a non-trace target (the loop is
1144
+ * registered synchronously inside `send_message`, so this closes the await gap
1145
+ * between the sender finishing its turn and the delegated target starting —
1146
+ * without it the flag would flicker false in that window). The trace agent is
1147
+ * a real spawned agent (record_trace dispatches `trace_event` envelopes into
1148
+ * its mailbox and it owns the Graph of Trace as editor, see
1149
+ * `system-tools.ts:createRecordTraceTool`), but it is excluded from the
1150
+ * AGGREGATE: a trace recording isn't "the user's task is still running". It
1151
+ * is still LISTED in `agents[]` with its own status so the Agents panel shows
1152
+ * its idle/running transitions live.
1153
+ */
1154
+ deriveRunActive(entry) {
1155
+ if (entry.runActive)
1156
+ return true;
1157
+ for (const a of entry.agents.values()) {
1158
+ if (a.role !== "trace" && a.status === "running")
1159
+ return true;
1160
+ }
1161
+ for (const key of this.deliveryLoops) {
1162
+ const sep = entry.id.length;
1163
+ // key === `${sid}:${name}` — match this session, exclude the trace target.
1164
+ if (key.startsWith(`${entry.id}:`) && key.slice(sep + 1) !== "trace")
1165
+ return true;
1166
+ }
1167
+ return false;
1168
+ }
1169
+ /**
1170
+ * #70/#76: emit the authoritative live snapshot as a `CUSTOM:session_state`
1171
+ * event. This is the wholesale source the web Agents panel replaces its
1172
+ * agents list from; it is pushed on every agent status transition
1173
+ * (`onStatusChange`), an initial frame in `sendMessage`, and on delivery-loop
1174
+ * entry/exit. `runState.active` is DERIVED (any non-trace agent running / a
1175
+ * pending delivery), so a delegated expert keeps the run visibly active. The
1176
+ * ring buffer replays the last frame on reconnect, so a re-subscribing client
1177
+ * recovers the current snapshot. Shape matches `SessionStateSnapshotSchema`.
1178
+ */
1179
+ emitSessionState(entry) {
1180
+ entry.bus.emit(ev.custom({ sessionId: entry.id }, "session_state", {
1181
+ runState: { active: this.deriveRunActive(entry), runId: entry.activeRunId },
1182
+ agents: this.listAgents(entry.id),
1183
+ lastActivityTs: new Date(entry.lastActivityAt).toISOString(),
1184
+ tokenUsage: entry.tokenUsage,
1185
+ }));
1186
+ }
438
1187
  getSessionState(sessionId) {
439
1188
  const entry = this.sessions.get(sessionId);
440
1189
  if (!entry)
441
1190
  return undefined;
442
1191
  return {
443
- runState: { active: entry.runActive, runId: entry.activeRunId },
1192
+ runState: { active: this.deriveRunActive(entry), runId: entry.activeRunId },
444
1193
  agents: this.listAgents(sessionId),
445
1194
  lastActivityTs: new Date(entry.lastActivityAt).toISOString(),
1195
+ tokenUsage: entry.tokenUsage,
446
1196
  };
447
1197
  }
448
1198
  /** The session's Graph of Trace (reasoning DAG), or undefined if no session. */
@@ -450,6 +1200,61 @@ export class SessionManager {
450
1200
  const entry = this.sessions.get(sessionId);
451
1201
  return entry?.trace.getGraph();
452
1202
  }
1203
+ /**
1204
+ * Read persisted AG-UI events for a session from `.bp/<sid>/events.jsonl`.
1205
+ * Used by the web to rehydrate chat history after a runtime restart (the
1206
+ * in-memory bus ring buffer only carries `recent()` for live SSE replay).
1207
+ *
1208
+ * The file is read line-by-line and unparseable lines are skipped so a
1209
+ * single corrupt record doesn't poison the whole history.
1210
+ *
1211
+ * `limit` caps the returned array; when total > limit we return the **tail**
1212
+ * (most recent events) for lightweight callers. Default 1000, positive
1213
+ * limits are capped at 5000. `limit <= 0` returns the full log and is used by
1214
+ * the web rehydrate path so long sessions are not sliced through the middle
1215
+ * of a streamed message.
1216
+ *
1217
+ * Returns `undefined` if the session id isn't in memory — this method is
1218
+ * only useful for known sessions (call `restoreFromDisk` first if needed).
1219
+ */
1220
+ async readEventHistory(sessionId, opts = {}) {
1221
+ if (!this.sessions.has(sessionId))
1222
+ return undefined;
1223
+ const requestedLimit = opts.limit;
1224
+ const limit = requestedLimit === undefined || !Number.isFinite(requestedLimit)
1225
+ ? 1000
1226
+ : requestedLimit <= 0
1227
+ ? null
1228
+ : Math.max(1, Math.min(requestedLimit, 5000));
1229
+ const path = join(this.bpDir(sessionId), "events.jsonl");
1230
+ let raw;
1231
+ try {
1232
+ raw = await readFile(path, "utf8");
1233
+ }
1234
+ catch {
1235
+ // No events file yet — empty history is valid (newly created session).
1236
+ return { events: [], total: 0, truncated: false };
1237
+ }
1238
+ const lines = raw.split("\n");
1239
+ const events = [];
1240
+ let total = 0;
1241
+ for (const line of lines) {
1242
+ if (!line)
1243
+ continue;
1244
+ let parsed;
1245
+ try {
1246
+ parsed = JSON.parse(line);
1247
+ }
1248
+ catch {
1249
+ continue; // skip malformed line
1250
+ }
1251
+ total++;
1252
+ events.push(parsed);
1253
+ }
1254
+ const truncated = limit !== null && events.length > limit;
1255
+ const out = truncated ? events.slice(events.length - limit) : events;
1256
+ return { events: out, total, truncated };
1257
+ }
453
1258
  metrics() {
454
1259
  let runningAgents = 0;
455
1260
  for (const e of this.sessions.values()) {
@@ -553,7 +1358,43 @@ export class SessionManager {
553
1358
  /* no trace yet */
554
1359
  }
555
1360
  }
556
- /** Restore session list from disk (§10 策略A: agents start idle, lazily revived). */
1361
+ usagePath(sid) {
1362
+ return join(this.bpDir(sid), "usage.json");
1363
+ }
1364
+ /** Persist cumulative token usage (best-effort; never throws). */
1365
+ async writeUsage(entry) {
1366
+ if (!this.persist)
1367
+ return;
1368
+ await mkdir(this.bpDir(entry.id), { recursive: true }).catch(() => { });
1369
+ await writeFile(this.usagePath(entry.id), JSON.stringify(entry.tokenUsage, null, 2), "utf8").catch(() => { });
1370
+ }
1371
+ /** Rehydrate cumulative token usage from disk (restore path). */
1372
+ async loadUsage(entry) {
1373
+ try {
1374
+ const raw = await readFile(this.usagePath(entry.id), "utf8");
1375
+ const parsed = JSON.parse(raw);
1376
+ const byAgent = {};
1377
+ for (const [name, u] of Object.entries(parsed.byAgent ?? {})) {
1378
+ byAgent[name] = addUsage(emptyTokenUsage(), u);
1379
+ }
1380
+ entry.tokenUsage = { byAgent, total: sumAgentUsage(byAgent) };
1381
+ }
1382
+ catch {
1383
+ /* no usage yet — keep the zeroed default */
1384
+ }
1385
+ }
1386
+ /**
1387
+ * Restore session list from disk. Reads `<dataRoot>/.bp/<id>/meta.json` for
1388
+ * every directory and recreates the session entry with its original
1389
+ * timestamps preserved (provider ref, mailbox, trace also rehydrate via the
1390
+ * normal `createSession` restore path). §10 策略A: agents start idle and
1391
+ * are lazily revived when the user actually sends a message.
1392
+ *
1393
+ * Idempotent — sessions already in memory are skipped, not reset.
1394
+ *
1395
+ * Returns the ids that were restored this call (i.e. excluding ones that
1396
+ * were already loaded or whose meta.json was missing / malformed).
1397
+ */
557
1398
  async restoreFromDisk() {
558
1399
  const restored = [];
559
1400
  const root = join(this.dataRoot, ".bp");
@@ -562,19 +1403,35 @@ export class SessionManager {
562
1403
  ids = await readdir(root);
563
1404
  }
564
1405
  catch {
565
- return restored;
1406
+ return restored; // .bp/ doesn't exist yet — fresh install
566
1407
  }
567
1408
  for (const id of ids) {
1409
+ if (this.sessions.has(id))
1410
+ continue;
1411
+ const metaPath = join(root, id, "meta.json");
1412
+ let raw;
568
1413
  try {
569
- const raw = await readFile(join(root, id, "meta.json"), "utf8");
570
- const meta = JSON.parse(raw);
571
- if (!this.sessions.has(meta.id)) {
572
- await this.createSession({ id: meta.id, title: meta.title });
573
- restored.push(meta.id);
574
- }
1414
+ raw = await readFile(metaPath, "utf8");
575
1415
  }
576
1416
  catch {
577
- /* skip non-session dirs */
1417
+ continue; // not a session dir (no meta.json) — silent skip
1418
+ }
1419
+ try {
1420
+ const meta = JSON.parse(raw);
1421
+ const sid = meta.id ?? id;
1422
+ if (this.sessions.has(sid))
1423
+ continue;
1424
+ const now = new Date().toISOString();
1425
+ await this.createSession({ id: sid, title: meta.title }, {
1426
+ createdAt: meta.createdAt ?? now,
1427
+ updatedAt: meta.updatedAt ?? now,
1428
+ lastActivityAt: typeof meta.lastActivityAt === "number" ? meta.lastActivityAt : Date.now(),
1429
+ });
1430
+ restored.push(sid);
1431
+ }
1432
+ catch (err) {
1433
+ // eslint-disable-next-line no-console
1434
+ console.warn(`[runtime] skipping ${id}: ${err.message}`);
578
1435
  }
579
1436
  }
580
1437
  return restored;