chapterhouse 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -182,6 +182,21 @@ Optional Entra settings:
182
182
  - `ENTRA_REQUIRED_ROLE` — if set, the signed-in user must have this app role in the token's `roles` claim. This replaced the older group-based check.
183
183
  - `ENTRA_TEAM_LEAD_ID` — optional for regular engineers, who can omit it entirely. Set it only for the one person who should be treated as `team-lead` for managerial functions such as `/api/team/report` and protected OKR/KPI/team wiki writes. Without it, the signed-in user is treated as `engineer`, which is the correct role for normal team members.
184
184
 
185
+ ### WorkIQ MCP server (Entra only)
186
+
187
+ When `ENTRA_AUTH_ENABLED=true`, Chapterhouse automatically adds a `workiq` entry to `~/.copilot/mcp-config.json` at daemon startup. This gives the orchestrator access to Microsoft 365 tools (Teams, Outlook, Calendar, etc.) via the `@microsoft/workiq` MCP server without any manual configuration.
188
+
189
+ The entry uses `npx -y @microsoft/workiq` so no global npm install is required — npx fetches the server on first use.
190
+
191
+ | Behaviour | Detail |
192
+ |-----------|--------|
193
+ | **Trigger** | `ENTRA_AUTH_ENABLED=true` + `ENTRA_TENANT_ID` set |
194
+ | **Idempotent** | Safe to restart; entry is only written if `workiq` key is absent |
195
+ | **Opt-out** | Set `CHAPTERHOUSE_WORKIQ_AUTO_INSTALL=false` to disable |
196
+ | **Failure-safe** | If the write fails (permissions, read-only FS), a structured warning is logged and the daemon continues |
197
+
198
+ **`CHAPTERHOUSE_WORKIQ_AUTO_INSTALL`** — `true` (default) or `false`. Set to `false` to manage the workiq MCP entry manually.
199
+
185
200
  ## Docker (Personal)
186
201
 
187
202
  For a single-user local deployment, use the personal compose file. It binds port `7788`, runs the daemon as the non-root `node` user, and persists state in `CHAPTERHOUSE_HOME` (default: `$HOME/.chapterhouse` on macOS/Linux).
@@ -371,6 +386,7 @@ Busy sessions (processing a turn or with items queued) are never evicted by eith
371
386
  #### Daemon PATH
372
387
 
373
388
  The generated systemd unit and launchd plist compose a rich `PATH` that includes:
389
+
374
390
  - The installing shell's `$PATH` (captured at install time)
375
391
  - The binary's own directory
376
392
  - Linuxbrew (`/home/linuxbrew/.linuxbrew/bin`), Homebrew (`/opt/homebrew/bin`, `/usr/local/bin`)
@@ -392,7 +408,7 @@ The browser app at `http://localhost:7788` is split into a few views:
392
408
 
393
409
  ## How it Works
394
410
 
395
- ```
411
+ ```text
396
412
  Browser ──HTTP / SSE──► Chapterhouse Daemon
397
413
 
398
414
  Orchestrator Session (Copilot SDK)
@@ -506,6 +522,12 @@ npm run dev:web
506
522
 
507
523
  # Build everything
508
524
  npm run build
525
+
526
+ # Run tests
527
+ npm test
528
+
529
+ # Lint user-facing markdown (README, CHANGELOG, docs/, .github/)
530
+ npm run lint:md
509
531
  ```
510
532
 
511
533
  The web UI lives in `web/`. Production builds emit to `web/dist/`, which the Express server serves out of in `src/api/server.ts`.
@@ -531,5 +553,6 @@ git push origin main --follow-tags
531
553
  All commits on this repository follow **[Conventional Commits v1.0.0](https://www.conventionalcommits.org/en/v1.0.0/)**. The format is `<type>(<scope>): <subject>` (e.g. `feat(api): add session export endpoint`). Allowed types: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `chore`, `build`, `ci`, `revert`, `release`.
532
554
 
533
555
  This is automatically enforced:
556
+
534
557
  - **Locally:** `husky` installs a `commit-msg` git hook on `npm install` that runs `commitlint` against every commit message. Bad messages are rejected before the commit lands.
535
558
  - **On PRs:** A GitHub Action (`lint-pr-title.yml`) validates the PR title on every open/edit. This matters because squash-merges use the PR title as the commit message on `main`.
@@ -5,7 +5,7 @@ import { existsSync, statSync, readdirSync } from "fs";
5
5
  import { join, dirname } from "path";
6
6
  import { fileURLToPath } from "url";
7
7
  import { z } from "zod";
8
- import { sendToOrchestrator, getAgentInfo, cancelCurrentMessage, getLastRouteResult, getCurrentSessionKey } from "../copilot/orchestrator.js";
8
+ import { sendToOrchestrator, getAgentInfo, cancelCurrentMessage, getLastRouteResult, getCurrentSessionKey, subscribeTaskEvents } from "../copilot/orchestrator.js";
9
9
  import { getAgentRegistry } from "../copilot/agents.js";
10
10
  import { config, persistModel } from "../config.js";
11
11
  import { getRouterConfig, updateRouterConfig } from "../copilot/router.js";
@@ -19,7 +19,7 @@ import { withWikiWrite } from "../wiki/lock.js";
19
19
  import { listSkills, removeSkill } from "../copilot/skills.js";
20
20
  import { restartDaemon } from "../daemon.js";
21
21
  import { API_TOKEN_PATH, resolveWikiRelativePath } from "../paths.js";
22
- import { getDb, getSessionMessages } from "../store/db.js";
22
+ import { getDb, getSessionMessages, getTaskEvents } from "../store/db.js";
23
23
  import { getStatus, onStatusChange } from "../status.js";
24
24
  import { formatSseData, formatSseEvent } from "./sse.js";
25
25
  import { syncDecisionsFileToWiki } from "../squad/mirror.js";
@@ -279,6 +279,44 @@ app.get("/api/workers/:taskId", (req, res) => {
279
279
  completedAt: row.completed_at,
280
280
  });
281
281
  });
282
+ // Historical event log for a task (catch-up on page load)
283
+ app.get("/api/workers/:taskId/events", (req, res) => {
284
+ const taskId = req.params.taskId;
285
+ const afterSeqRaw = req.query.afterSeq;
286
+ const afterSeq = typeof afterSeqRaw === "string" && !isNaN(Number(afterSeqRaw)) ? Number(afterSeqRaw) : 0;
287
+ const taskRow = getDb()
288
+ .prepare(`SELECT task_id FROM agent_tasks WHERE task_id = ?`)
289
+ .get(taskId);
290
+ if (!taskRow) {
291
+ throw new NotFoundError("Task not found");
292
+ }
293
+ const events = getTaskEvents(taskId, afterSeq);
294
+ res.json({ taskId, events });
295
+ });
296
+ // SSE stream for per-task live tool-call activity
297
+ app.get("/api/workers/:taskId/events/stream", (req, res) => {
298
+ const taskId = req.params.taskId;
299
+ const taskRow = getDb()
300
+ .prepare(`SELECT task_id FROM agent_tasks WHERE task_id = ?`)
301
+ .get(taskId);
302
+ if (!taskRow) {
303
+ throw new NotFoundError("Task not found");
304
+ }
305
+ res.writeHead(200, {
306
+ "Content-Type": "text/event-stream",
307
+ "Cache-Control": "no-cache",
308
+ Connection: "keep-alive",
309
+ });
310
+ res.write(formatSseData({ type: "connected", taskId }));
311
+ const heartbeat = setInterval(() => { res.write(`:ping\n\n`); }, 20_000);
312
+ const unsub = subscribeTaskEvents(taskId, (event) => {
313
+ res.write(formatSseData({ type: "task_event", taskId, ...event }));
314
+ });
315
+ req.on("close", () => {
316
+ clearInterval(heartbeat);
317
+ unsub();
318
+ });
319
+ });
282
320
  // ---------------------------------------------------------------------------
283
321
  // SSE stream for real-time chat
284
322
  // ---------------------------------------------------------------------------
package/dist/config.js CHANGED
@@ -46,6 +46,7 @@ const configSchema = z.object({
46
46
  API_RATE_LIMIT_AUTH_MAX: z.string().optional(),
47
47
  API_RATE_LIMIT_SSE_MAX_CONNECTIONS: z.string().optional(),
48
48
  ENABLE_SQUAD: z.string().optional(),
49
+ CHAPTERHOUSE_WORKIQ_AUTO_INSTALL: z.string().optional(),
49
50
  });
50
51
  export const DEFAULT_MODEL = "claude-sonnet-4.6";
51
52
  export const DEFAULT_TEAM_WIKI_CACHE_TTL_MINUTES = 60;
@@ -219,6 +220,7 @@ export function parseRuntimeConfig(env, options = {}) {
219
220
  apiRateLimitAuthMax,
220
221
  apiRateLimitSseMaxConnections,
221
222
  squadEnabled: raw.ENABLE_SQUAD === "1",
223
+ workiqAutoInstall: parseBooleanEnv("CHAPTERHOUSE_WORKIQ_AUTO_INSTALL", raw.CHAPTERHOUSE_WORKIQ_AUTO_INSTALL, true),
222
224
  };
223
225
  }
224
226
  const runtimeConfig = parseRuntimeConfig(process.env);
@@ -258,6 +260,7 @@ export const config = {
258
260
  apiRateLimitAuthMax: runtimeConfig.apiRateLimitAuthMax,
259
261
  apiRateLimitSseMaxConnections: runtimeConfig.apiRateLimitSseMaxConnections,
260
262
  squadEnabled: runtimeConfig.squadEnabled,
263
+ workiqAutoInstall: runtimeConfig.workiqAutoInstall,
261
264
  copilotAuthToken: runtimeConfig.copilotAuthToken,
262
265
  get copilotModel() {
263
266
  return _copilotModel;
@@ -7,7 +7,7 @@ import { config, DEFAULT_MODEL } from "../config.js";
7
7
  import { loadMcpConfig } from "./mcp-config.js";
8
8
  import { getSkillDirectories } from "./skills.js";
9
9
  import { resetClient } from "./client.js";
10
- import { logConversation, getState, setState, deleteState, getCopilotSession, upsertCopilotSession, getTaskSessionKey, getDb, bumpProjectLastUsed } from "../store/db.js";
10
+ import { logConversation, getState, setState, deleteState, getCopilotSession, upsertCopilotSession, getTaskSessionKey, getDb, bumpProjectLastUsed, appendTaskEvent } from "../store/db.js";
11
11
  import { maybeWriteEpisode } from "./episode-writer.js";
12
12
  import { getWikiSummary } from "../wiki/context.js";
13
13
  import { SESSIONS_DIR } from "../paths.js";
@@ -51,6 +51,32 @@ let lastRouteResult;
51
51
  export function getLastRouteResult() {
52
52
  return lastRouteResult;
53
53
  }
54
+ const taskEventListeners = new Map();
55
+ export function subscribeTaskEvents(taskId, listener) {
56
+ if (!taskEventListeners.has(taskId)) {
57
+ taskEventListeners.set(taskId, new Set());
58
+ }
59
+ taskEventListeners.get(taskId).add(listener);
60
+ return () => {
61
+ const set = taskEventListeners.get(taskId);
62
+ if (set) {
63
+ set.delete(listener);
64
+ if (set.size === 0)
65
+ taskEventListeners.delete(taskId);
66
+ }
67
+ };
68
+ }
69
+ function emitTaskEvent(taskId, event) {
70
+ const set = taskEventListeners.get(taskId);
71
+ if (set) {
72
+ for (const listener of set) {
73
+ try {
74
+ listener(event);
75
+ }
76
+ catch { /* non-fatal */ }
77
+ }
78
+ }
79
+ }
54
80
  // ---------------------------------------------------------------------------
55
81
  // SessionRegistry — the single owner of all per-session orchestrators
56
82
  // ---------------------------------------------------------------------------
@@ -333,6 +359,21 @@ async function executeOnSession(manager, item) {
333
359
  let accumulated = "";
334
360
  let toolCallExecuted = false;
335
361
  let toolCallCount = 0;
362
+ // Per-turn map: toolCallId → spawn args stashed from tool.execution_start when toolName === "task".
363
+ // Correlates the SDK's subagent.started event (which only carries agent_type fields) with the
364
+ // actual spawn parameters (name, description) passed to the task() tool call.
365
+ const spawnArgsMap = new Map();
366
+ // Unconditional capture — must fire even when onActivity is absent so the DB handler can resolve names.
367
+ const unsubSpawnCapture = session.on("tool.execution_start", (event) => {
368
+ const data = event.data;
369
+ if (data.toolName === "task" && data.toolCallId) {
370
+ const args = (data.arguments ?? {});
371
+ spawnArgsMap.set(data.toolCallId, {
372
+ name: typeof args.name === "string" ? args.name : undefined,
373
+ description: typeof args.description === "string" ? args.description : undefined,
374
+ });
375
+ }
376
+ });
336
377
  const unsubToolDone = session.on("tool.execution_complete", (event) => {
337
378
  toolCallExecuted = true;
338
379
  toolCallCount++;
@@ -378,12 +419,20 @@ async function executeOnSession(manager, item) {
378
419
  const unsubSubStart = item.onActivity
379
420
  ? session.on("subagent.started", (event) => {
380
421
  const data = event.data;
422
+ const spawnArgs = spawnArgsMap.get(data.toolCallId);
423
+ const agentSlug = (typeof spawnArgs?.name === "string" ? spawnArgs.name : (data.agentName || "unknown"))
424
+ .toLowerCase()
425
+ .replace(/\s+/g, "-");
426
+ const resolvedDescription = (typeof spawnArgs?.description === "string"
427
+ ? spawnArgs.description
428
+ : data.agentDescription || data.agentDisplayName || `Squad dispatch: ${agentSlug}`).slice(0, 500);
381
429
  item.onActivity({
382
430
  kind: "subagent_started",
383
431
  toolCallId: data.toolCallId,
384
432
  agentName: data.agentName,
385
433
  agentDisplayName: data.agentDisplayName,
386
- agentDescription: data.agentDescription,
434
+ agentDescription: resolvedDescription,
435
+ agentSlug,
387
436
  });
388
437
  })
389
438
  : () => { };
@@ -413,17 +462,27 @@ async function executeOnSession(manager, item) {
413
462
  : () => { };
414
463
  // Always persist SDK subagent dispatches to agent_tasks so Workers tab shows them.
415
464
  const db = getDb();
465
+ // Set of task IDs for subagents spawned in THIS turn — used to filter nested tool events.
466
+ const activeSubagentTaskIds = new Set();
416
467
  const unsubSubStartDb = session.on("subagent.started", (event) => {
417
468
  try {
418
469
  const data = event.data;
419
- const agentSlug = (data.agentName || "unknown").toLowerCase().replace(/\s+/g, "-");
420
- const description = (data.agentDescription || data.agentDisplayName || `Squad dispatch: ${agentSlug}`).slice(0, 500);
470
+ const spawnArgs = spawnArgsMap.get(data.toolCallId);
471
+ const agentSlug = (typeof spawnArgs?.name === "string" ? spawnArgs.name : (data.agentName || "unknown"))
472
+ .toLowerCase()
473
+ .replace(/\s+/g, "-");
474
+ const description = (typeof spawnArgs?.description === "string"
475
+ ? spawnArgs.description
476
+ : data.agentDescription || data.agentDisplayName || `Squad dispatch: ${agentSlug}`).slice(0, 500);
421
477
  db.prepare(`INSERT OR IGNORE INTO agent_tasks (task_id, agent_slug, description, status, origin_channel, session_key, source) VALUES (?, ?, ?, 'running', ?, ?, 'squad')`).run(data.toolCallId, agentSlug, description, item.sourceChannel || null, sessionKey);
478
+ activeSubagentTaskIds.add(data.toolCallId);
422
479
  }
423
480
  catch { /* non-fatal */ }
424
481
  });
425
482
  const unsubSubDoneDb = session.on("subagent.completed", (event) => {
426
483
  try {
484
+ spawnArgsMap.delete(event.data.toolCallId);
485
+ activeSubagentTaskIds.delete(event.data.toolCallId);
427
486
  db.prepare(`UPDATE agent_tasks SET status = 'completed', completed_at = CURRENT_TIMESTAMP WHERE task_id = ?`).run(event.data.toolCallId);
428
487
  }
429
488
  catch { /* non-fatal */ }
@@ -431,10 +490,57 @@ async function executeOnSession(manager, item) {
431
490
  const unsubSubFailDb = session.on("subagent.failed", (event) => {
432
491
  try {
433
492
  const data = event.data;
493
+ spawnArgsMap.delete(data.toolCallId);
494
+ activeSubagentTaskIds.delete(data.toolCallId);
434
495
  db.prepare(`UPDATE agent_tasks SET status = 'error', result = ?, completed_at = CURRENT_TIMESTAMP WHERE task_id = ?`).run(data.error || "Subagent failed", data.toolCallId);
435
496
  }
436
497
  catch { /* non-fatal */ }
437
498
  });
499
+ // ---------------------------------------------------------------------------
500
+ // Nested tool-call streaming — capture tool.execution_start / _complete events
501
+ // whose parentToolCallId matches a known subagent task id, persist them to
502
+ // agent_task_events, and broadcast to per-task SSE subscribers.
503
+ // ---------------------------------------------------------------------------
504
+ const unsubNestedToolStart = session.on("tool.execution_start", (event) => {
505
+ try {
506
+ const data = event.data;
507
+ const parentId = data.parentToolCallId;
508
+ if (!parentId || !activeSubagentTaskIds.has(parentId))
509
+ return;
510
+ const toolName = data.toolName ?? null;
511
+ const args = data.arguments ?? {};
512
+ let summary = null;
513
+ if (typeof args.command === "string")
514
+ summary = args.command.slice(0, 120);
515
+ else if (typeof args.path === "string")
516
+ summary = args.path.slice(0, 120);
517
+ else if (typeof args.query === "string")
518
+ summary = args.query.slice(0, 120);
519
+ else if (typeof args.prompt === "string")
520
+ summary = args.prompt.slice(0, 120);
521
+ const ev = appendTaskEvent(parentId, "tool_start", toolName, summary);
522
+ if (ev)
523
+ emitTaskEvent(parentId, ev);
524
+ }
525
+ catch { /* non-fatal */ }
526
+ });
527
+ const unsubNestedToolDone = session.on("tool.execution_complete", (event) => {
528
+ try {
529
+ const data = event.data;
530
+ const parentId = data.parentToolCallId;
531
+ if (!parentId || !activeSubagentTaskIds.has(parentId))
532
+ return;
533
+ const success = data.success !== false;
534
+ const resultContent = data.result?.content ?? data.result?.detailedContent;
535
+ const summary = typeof resultContent === "string"
536
+ ? (success ? resultContent.slice(0, 120) : `error: ${resultContent.slice(0, 100)}`)
537
+ : (success ? "ok" : "error");
538
+ const ev = appendTaskEvent(parentId, "tool_complete", null, summary);
539
+ if (ev)
540
+ emitTaskEvent(parentId, ev);
541
+ }
542
+ catch { /* non-fatal */ }
543
+ });
438
544
  const unsubDelta = session.on("assistant.message_delta", (event) => {
439
545
  if (toolCallExecuted && accumulated.length > 0 && !accumulated.endsWith("\n")) {
440
546
  accumulated += "\n";
@@ -474,6 +580,7 @@ async function executeOnSession(manager, item) {
474
580
  unsubDelta();
475
581
  unsubToolDone();
476
582
  unsubToolStart();
583
+ unsubSpawnCapture();
477
584
  unsubReasoning();
478
585
  unsubSubStart();
479
586
  unsubSubDone();
@@ -481,6 +588,8 @@ async function executeOnSession(manager, item) {
481
588
  unsubSubStartDb();
482
589
  unsubSubDoneDb();
483
590
  unsubSubFailDb();
591
+ unsubNestedToolStart();
592
+ unsubNestedToolDone();
484
593
  }
485
594
  });
486
595
  }
@@ -98,6 +98,7 @@ async function loadOrchestratorModule(t, overrides = {}) {
98
98
  { slug: "coder", name: "Kaylee", model: "claude-sonnet-4.6" },
99
99
  ],
100
100
  sendResult: "Finished successfully",
101
+ taskEvents: new Map(),
101
102
  ...overrides,
102
103
  };
103
104
  const client = createFakeClient(state);
@@ -181,8 +182,17 @@ async function loadOrchestratorModule(t, overrides = {}) {
181
182
  get: () => undefined,
182
183
  all: () => [],
183
184
  }),
185
+ transaction: (fn) => fn,
184
186
  }),
185
187
  bumpProjectLastUsed: (_projectRoot) => { },
188
+ appendTaskEvent: (taskId, kind, toolName, summary) => {
189
+ const seq = (state.taskEvents.get(taskId)?.length ?? 0) + 1;
190
+ const ev = { id: seq, taskId, seq, ts: Date.now(), kind, toolName, summary };
191
+ if (!state.taskEvents.has(taskId))
192
+ state.taskEvents.set(taskId, []);
193
+ state.taskEvents.get(taskId).push(ev);
194
+ return ev;
195
+ },
186
196
  },
187
197
  });
188
198
  t.mock.module("./episode-writer.js", {
@@ -665,4 +675,199 @@ test("regression #35: session A blocking does not delay session B (concurrent se
665
675
  state.pendingReject?.(new Error("test teardown"));
666
676
  await new Promise((resolve) => setTimeout(resolve, 10));
667
677
  });
678
+ // ---------------------------------------------------------------------------
679
+ // #81 — task spawn args (name/description) must win over SDK agent_type fields
680
+ // Root cause: subagent.started only carries agent_type boilerplate. The actual
681
+ // spawn params (name, description) arrive earlier via tool.execution_start for
682
+ // toolName === "task". We stash them keyed by toolCallId and prefer them in the
683
+ // INSERT so the worker tab shows "kaylee" instead of "general-purpose".
684
+ // ---------------------------------------------------------------------------
685
+ test("#81: tool.execution_start stash + subagent.started → agent_tasks uses spawn name/description", async (t) => {
686
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
687
+ sendResult: "__PENDING__",
688
+ });
689
+ await orchestrator.initOrchestrator(client);
690
+ orchestrator.sendToOrchestrator("dispatch a worker", { type: "background" }, () => { });
691
+ await new Promise((resolve) => setTimeout(resolve, 10));
692
+ assert.ok(state.lastSession, "FakeSession must have been created");
693
+ // Step 1: emit tool.execution_start for a "task" call with spawn parameters
694
+ state.lastSession.emit("tool.execution_start", {
695
+ toolName: "task",
696
+ toolCallId: "tc-spawn-1",
697
+ arguments: { name: "kaylee", description: "🔧 Kaylee: test spawn" },
698
+ });
699
+ // Step 2: emit subagent.started with the same toolCallId — SDK only knows agent_type details
700
+ state.lastSession.emit("subagent.started", {
701
+ toolCallId: "tc-spawn-1",
702
+ agentName: "general-purpose",
703
+ agentDisplayName: "General Purpose Agent",
704
+ agentDescription: "Full-capability agent boilerplate",
705
+ });
706
+ const insertWrite = state.dbWrites.find((w) => w.sql.includes("INSERT") && w.sql.includes("agent_tasks"));
707
+ assert.ok(insertWrite, "subagent.started must INSERT a row into agent_tasks");
708
+ const argsJson = JSON.stringify(insertWrite.args);
709
+ assert.ok(argsJson.includes("kaylee"), `agent_slug must be "kaylee" but got: ${argsJson}`);
710
+ assert.ok(argsJson.includes("🔧 Kaylee: test spawn"), `description must be spawn description but got: ${argsJson}`);
711
+ assert.ok(!argsJson.includes("general-purpose"), `agent_slug must NOT fall back to "general-purpose" when spawn name is available`);
712
+ state.pendingReject?.(new Error("test teardown"));
713
+ });
714
+ test("#81 fallback: subagent.started with no prior tool.execution_start uses agentName/agentDescription", async (t) => {
715
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
716
+ sendResult: "__PENDING__",
717
+ });
718
+ await orchestrator.initOrchestrator(client);
719
+ orchestrator.sendToOrchestrator("dispatch a worker", { type: "background" }, () => { });
720
+ await new Promise((resolve) => setTimeout(resolve, 10));
721
+ assert.ok(state.lastSession, "FakeSession must have been created");
722
+ // No tool.execution_start emitted — subagent.started fires cold
723
+ state.lastSession.emit("subagent.started", {
724
+ toolCallId: "tc-no-spawn",
725
+ agentName: "general-purpose",
726
+ agentDisplayName: "General Purpose Agent",
727
+ agentDescription: "Full-capability agent boilerplate",
728
+ });
729
+ const insertWrite = state.dbWrites.find((w) => w.sql.includes("INSERT") && w.sql.includes("agent_tasks"));
730
+ assert.ok(insertWrite, "subagent.started must still INSERT a row without spawn args");
731
+ const argsJson = JSON.stringify(insertWrite.args);
732
+ assert.ok(argsJson.includes("general-purpose"), `agent_slug must fall back to agentName ("general-purpose") when no spawn args: ${argsJson}`);
733
+ assert.ok(argsJson.includes("Full-capability agent boilerplate"), `description must fall back to agentDescription: ${argsJson}`);
734
+ state.pendingReject?.(new Error("test teardown"));
735
+ });
736
+ test("#81: activity callback receives resolved agentSlug from spawn args (SSE live path)", async (t) => {
737
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
738
+ sendResult: "__PENDING__",
739
+ });
740
+ await orchestrator.initOrchestrator(client);
741
+ const activityEvents = [];
742
+ orchestrator.sendToOrchestrator("dispatch a worker", { type: "background" }, () => { }, undefined, // no attachments
743
+ (event) => { activityEvents.push(event); });
744
+ await new Promise((resolve) => setTimeout(resolve, 10));
745
+ assert.ok(state.lastSession, "FakeSession must have been created");
746
+ // Stash spawn args via tool.execution_start
747
+ state.lastSession.emit("tool.execution_start", {
748
+ toolName: "task",
749
+ toolCallId: "tc-activity-1",
750
+ arguments: { name: "kaylee", description: "🔧 Kaylee: test spawn" },
751
+ });
752
+ // SDK fires subagent.started with boilerplate agent_type fields
753
+ state.lastSession.emit("subagent.started", {
754
+ toolCallId: "tc-activity-1",
755
+ agentName: "general-purpose",
756
+ agentDisplayName: "General Purpose Agent",
757
+ agentDescription: "Full-capability agent boilerplate",
758
+ });
759
+ const startedEvent = activityEvents.find((e) => e.kind === "subagent_started");
760
+ assert.ok(startedEvent, "onActivity must have been called with a subagent_started event");
761
+ assert.equal(startedEvent.agentSlug, "kaylee", `agentSlug in activity event must be "kaylee" (spawn name), got: ${startedEvent.agentSlug}`);
762
+ assert.ok(String(startedEvent.agentDescription).includes("🔧 Kaylee"), `agentDescription in activity event must use spawn description, got: ${startedEvent.agentDescription}`);
763
+ state.pendingReject?.(new Error("test teardown"));
764
+ });
765
+ // ---------------------------------------------------------------------------
766
+ // #86: Nested tool-call events streamed to /workers detail pane
767
+ // ---------------------------------------------------------------------------
768
+ test("#86: tool.execution_start with parentToolCallId matching active subagent calls appendTaskEvent", async (t) => {
769
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
770
+ sendResult: "__PENDING__",
771
+ });
772
+ await orchestrator.initOrchestrator(client);
773
+ orchestrator.sendToOrchestrator("dispatch kaylee", { type: "background" }, () => { });
774
+ await new Promise((resolve) => setTimeout(resolve, 10));
775
+ assert.ok(state.lastSession, "FakeSession must have been created");
776
+ // Register the subagent task via subagent.started so activeSubagentTaskIds is populated
777
+ state.lastSession.emit("subagent.started", {
778
+ toolCallId: "subagent-task-001",
779
+ agentName: "kaylee",
780
+ agentDisplayName: "Kaylee — Backend Dev",
781
+ agentDescription: "Fix the streaming gap",
782
+ });
783
+ // Fire a nested tool.execution_start with parentToolCallId pointing to the subagent
784
+ state.lastSession.emit("tool.execution_start", {
785
+ toolCallId: "nested-call-001",
786
+ toolName: "bash",
787
+ parentToolCallId: "subagent-task-001",
788
+ arguments: { command: "npm run build" },
789
+ });
790
+ const events = state.taskEvents.get("subagent-task-001") ?? [];
791
+ assert.equal(events.length, 1, "appendTaskEvent must have been called once for the nested tool start");
792
+ assert.equal(events[0].kind, "tool_start");
793
+ assert.equal(events[0].toolName, "bash");
794
+ assert.equal(events[0].summary, "npm run build");
795
+ state.pendingReject?.(new Error("test teardown"));
796
+ });
797
+ test("#86: tool.execution_start with parentToolCallId NOT in active subagents is ignored", async (t) => {
798
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
799
+ sendResult: "__PENDING__",
800
+ });
801
+ await orchestrator.initOrchestrator(client);
802
+ orchestrator.sendToOrchestrator("run something", { type: "background" }, () => { });
803
+ await new Promise((resolve) => setTimeout(resolve, 10));
804
+ assert.ok(state.lastSession, "FakeSession must have been created");
805
+ // No subagent.started fired — activeSubagentTaskIds is empty
806
+ state.lastSession.emit("tool.execution_start", {
807
+ toolCallId: "nested-call-002",
808
+ toolName: "bash",
809
+ parentToolCallId: "unknown-parent",
810
+ arguments: { command: "echo hi" },
811
+ });
812
+ const events = state.taskEvents.get("unknown-parent") ?? [];
813
+ assert.equal(events.length, 0, "appendTaskEvent must NOT be called when parentToolCallId is not a known subagent");
814
+ state.pendingReject?.(new Error("test teardown"));
815
+ });
816
+ test("#86: tool.execution_complete with parentToolCallId calls appendTaskEvent with tool_complete", async (t) => {
817
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
818
+ sendResult: "__PENDING__",
819
+ });
820
+ await orchestrator.initOrchestrator(client);
821
+ orchestrator.sendToOrchestrator("dispatch agent", { type: "background" }, () => { });
822
+ await new Promise((resolve) => setTimeout(resolve, 10));
823
+ assert.ok(state.lastSession, "FakeSession must have been created");
824
+ state.lastSession.emit("subagent.started", {
825
+ toolCallId: "subagent-task-002",
826
+ agentName: "zoe",
827
+ agentDisplayName: "Zoe — QA",
828
+ agentDescription: "Run tests",
829
+ });
830
+ state.lastSession.emit("tool.execution_complete", {
831
+ toolCallId: "nested-call-003",
832
+ parentToolCallId: "subagent-task-002",
833
+ success: true,
834
+ result: { content: "All tests passed" },
835
+ });
836
+ const events = state.taskEvents.get("subagent-task-002") ?? [];
837
+ assert.equal(events.length, 1, "appendTaskEvent must have been called for tool_complete");
838
+ assert.equal(events[0].kind, "tool_complete");
839
+ assert.ok(String(events[0].summary).includes("All tests passed"), `summary must include result content, got: ${events[0].summary}`);
840
+ state.pendingReject?.(new Error("test teardown"));
841
+ });
842
+ test("#86: subagent.completed removes task from activeSubagentTaskIds — subsequent nested events ignored", async (t) => {
843
+ const { orchestrator, state, client } = await loadOrchestratorModule(t, {
844
+ sendResult: "__PENDING__",
845
+ });
846
+ await orchestrator.initOrchestrator(client);
847
+ orchestrator.sendToOrchestrator("dispatch agent", { type: "background" }, () => { });
848
+ await new Promise((resolve) => setTimeout(resolve, 10));
849
+ assert.ok(state.lastSession, "FakeSession must have been created");
850
+ state.lastSession.emit("subagent.started", {
851
+ toolCallId: "subagent-task-003",
852
+ agentName: "wash",
853
+ agentDisplayName: "Wash",
854
+ agentDescription: "UI work",
855
+ });
856
+ // Complete the subagent — removes from activeSubagentTaskIds
857
+ state.lastSession.emit("subagent.completed", {
858
+ toolCallId: "subagent-task-003",
859
+ agentName: "wash",
860
+ agentDisplayName: "Wash",
861
+ });
862
+ // Nested event arriving after completion must be ignored
863
+ state.lastSession.emit("tool.execution_start", {
864
+ toolCallId: "late-tool-call",
865
+ toolName: "view",
866
+ parentToolCallId: "subagent-task-003",
867
+ arguments: { path: "/some/file" },
868
+ });
869
+ const events = state.taskEvents.get("subagent-task-003") ?? [];
870
+ assert.equal(events.length, 0, "No task events must be recorded after subagent completes");
871
+ state.pendingReject?.(new Error("test teardown"));
872
+ });
668
873
  //# sourceMappingURL=orchestrator.test.js.map
@@ -53,6 +53,11 @@ export class SessionManager {
53
53
  _currentModel;
54
54
  _recentTiers = [];
55
55
  _lastActivityAt = Date.now();
56
+ /** Set by registry.close() when the session is busy at close time. The drain loop
57
+ * honors this after the queue fully empties — evicting without violating the
58
+ * never-evict-mid-turn invariant. */
59
+ _pendingClose = false;
60
+ _onPendingCloseEvict;
56
61
  constructor(sessionKey, worker, sessionFactory) {
57
62
  this.worker = worker;
58
63
  this.sessionFactory = sessionFactory;
@@ -72,6 +77,19 @@ export class SessionManager {
72
77
  get lastActivityAt() {
73
78
  return this._lastActivityAt;
74
79
  }
80
+ /** True when an explicit close was requested while the session was busy. */
81
+ get pendingClose() {
82
+ return this._pendingClose;
83
+ }
84
+ /**
85
+ * Mark this session for deferred eviction. Called by SessionRegistry.close()
86
+ * when the session is mid-turn or has queued messages. The drain loop calls
87
+ * `onEvict` after the queue fully empties.
88
+ */
89
+ setPendingClose(onEvict) {
90
+ this._pendingClose = true;
91
+ this._onPendingCloseEvict = onEvict;
92
+ }
75
93
  // ── Session and model state (for orchestrator.ts) ────────────────────────
76
94
  get session() {
77
95
  return this._session;
@@ -136,6 +154,11 @@ export class SessionManager {
136
154
  this._lastActivityAt = Date.now();
137
155
  }
138
156
  this._processing = false;
157
+ // Honor deferred explicit-close: evict now that the queue is empty.
158
+ if (this._pendingClose && this._queue.length === 0) {
159
+ log.info({ sessionKey: this.sessionKey }, "session.pendingClose.evicting");
160
+ this._onPendingCloseEvict?.();
161
+ }
139
162
  }
140
163
  // ── Session lifecycle ────────────────────────────────────────────────────
141
164
  /** Ensure the CopilotSession exists, creating/resuming if needed. Concurrency-safe. */
@@ -245,14 +268,21 @@ export class SessionRegistry {
245
268
  }
246
269
  /**
247
270
  * Explicitly close a session (e.g., browser tab closed).
248
- * Deferred (with warning) if the session is currently busy.
271
+ * If busy (mid-turn or queued messages), sets _pendingClose on the manager so the
272
+ * drain loop evicts it as soon as the queue empties — honoring the explicit-close
273
+ * intent without violating the never-evict-mid-turn invariant.
249
274
  */
250
275
  close(sessionKey, reason) {
251
276
  const manager = this.managers.get(sessionKey);
252
277
  if (!manager)
253
278
  return;
254
279
  if (!manager.canEvict) {
255
- log.warn({ sessionKey, reason }, "Eviction deferred — session is mid-turn or has queued messages");
280
+ log.info({ sessionKey, reason }, "session.close.deferred — session is busy; will evict when queue drains");
281
+ manager.setPendingClose(() => {
282
+ this.managers.delete(sessionKey);
283
+ void manager.evict(reason);
284
+ log.info({ sessionKey, reason }, "session.evicted (deferred)");
285
+ });
256
286
  return;
257
287
  }
258
288
  this.managers.delete(sessionKey);