chapterhouse 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
+ import { AsyncLocalStorage } from "node:async_hooks";
1
2
  import { approveAll } from "@github/copilot-sdk";
2
3
  import { createTools } from "./tools.js";
3
4
  import { getOrchestratorSystemMessage } from "./system-message.js";
5
+ import { CHAPTERHOUSE_VERSION } from "../version.js";
4
6
  import { config, DEFAULT_MODEL } from "../config.js";
5
7
  import { loadMcpConfig } from "./mcp-config.js";
6
8
  import { getSkillDirectories } from "./skills.js";
@@ -14,11 +16,8 @@ import { loadAgents, ensureDefaultAgents, clearActiveTasks, getAgentRegistry, se
14
16
  import { normalizeProjectPath, setChannelProject } from "../squad/context.js";
15
17
  import { getSquadCoordinatorSystemMessage } from "../squad/charter.js";
16
18
  import { childLogger } from "../util/logger.js";
19
+ import { SessionManager, SessionRegistry, SESSION_IDLE_TTL_MS, SESSION_MAX_ACTIVE, } from "./session-manager.js";
17
20
  const log = childLogger("orchestrator");
18
- /**
19
- * Permission handler for the orchestrator session.
20
- * Approves all tool requests so @chapterhouse has full access to all tools.
21
- */
22
21
  const orchestratorPermissionHandler = approveAll;
23
22
  const MAX_RETRIES = 3;
24
23
  const RECONNECT_DELAYS_MS = [1_000, 3_000, 10_000];
@@ -33,61 +32,58 @@ let proactiveNotifyFn;
33
32
  export function setProactiveNotify(fn) {
34
33
  proactiveNotifyFn = fn;
35
34
  }
35
+ const turnContextStorage = new AsyncLocalStorage();
36
+ // ---------------------------------------------------------------------------
37
+ // Module-level state (not per-session)
38
+ // ---------------------------------------------------------------------------
36
39
  let copilotClient;
37
40
  let healthCheckTimer;
38
41
  let currentUserContext;
42
+ /**
43
+ * Last-seen auth context — persists after a turn completes so that callers
44
+ * which inspect it outside of an active turn (e.g. /api/cancel) still see the
45
+ * most recent values. Tools that run DURING a turn should use the per-turn
46
+ * AsyncLocalStorage context for safety in concurrent-session scenarios.
47
+ */
39
48
  let currentAuthenticatedUser;
40
49
  let currentAuthorizationHeader;
41
- // Router state
42
- let recentTiers = [];
43
50
  let lastRouteResult;
44
51
  export function getLastRouteResult() {
45
52
  return lastRouteResult;
46
53
  }
47
- // Session map — one entry per session key ("default" or "project:{normalizedRoot}")
48
- const sessionMap = new Map();
49
- const sessionModelMap = new Map();
50
- const sessionCreatePromises = new Map();
51
- // Track which session key the currently-executing turn belongs to (for abort + task routing)
52
- let currentProcessingSessionKey;
54
+ // ---------------------------------------------------------------------------
55
+ // SessionRegistry the single owner of all per-session orchestrators
56
+ // ---------------------------------------------------------------------------
57
+ let registry;
58
+ function buildRegistry() {
59
+ return new SessionRegistry({ idleTtlMs: SESSION_IDLE_TTL_MS, maxActive: SESSION_MAX_ACTIVE }, (sessionKey) => new SessionManager(sessionKey, processItem, createOrResumeSession));
60
+ }
61
+ // ---------------------------------------------------------------------------
62
+ // Context getters — exported for tools.ts and server.ts
63
+ // ---------------------------------------------------------------------------
53
64
  export function getCurrentSessionKey() {
54
- return currentProcessingSessionKey ?? "default";
65
+ return turnContextStorage.getStore()?.sessionKey ?? "default";
55
66
  }
56
- const messageQueue = [];
57
- let processing = false;
58
- let currentCallback;
59
- /** The channel currently being processed — tools use this to tag new workers. */
60
- let currentSourceChannel;
61
- /** Get the channel that originated the message currently being processed. */
62
67
  export function getCurrentSourceChannel() {
63
- return currentSourceChannel;
68
+ return turnContextStorage.getStore()?.sourceChannel;
64
69
  }
65
- let currentChannelKey;
66
70
  export function getCurrentChannelKey() {
67
- return currentChannelKey;
71
+ return turnContextStorage.getStore()?.channelKey;
68
72
  }
69
- /**
70
- * The activity callback for the message currently being processed. Tool handlers
71
- * (notably `delegate_to_agent`) read this to forward child-session events back
72
- * to the parent's SSE connection.
73
- */
74
- let currentActivityCallback;
75
73
  export function getCurrentActivityCallback() {
76
- return currentActivityCallback;
74
+ return turnContextStorage.getStore()?.activityCallback;
77
75
  }
78
76
  export function getCurrentAuthenticatedUser() {
79
- return currentAuthenticatedUser;
77
+ return turnContextStorage.getStore()?.authUser ?? currentAuthenticatedUser;
80
78
  }
81
79
  export function getLastAuthenticatedUser() {
82
80
  const raw = getState(LAST_AUTHENTICATED_USER_KEY);
83
- if (!raw) {
81
+ if (!raw)
84
82
  return undefined;
85
- }
86
83
  try {
87
84
  const parsed = JSON.parse(raw);
88
- if (!parsed?.id || !parsed?.name || !parsed?.email || !parsed?.role) {
85
+ if (!parsed?.id || !parsed?.name || !parsed?.email || !parsed?.role)
89
86
  return undefined;
90
- }
91
87
  return parsed;
92
88
  }
93
89
  catch {
@@ -95,8 +91,11 @@ export function getLastAuthenticatedUser() {
95
91
  }
96
92
  }
97
93
  export function getCurrentAuthorizationHeader() {
98
- return currentAuthorizationHeader;
94
+ return turnContextStorage.getStore()?.authHeader ?? currentAuthorizationHeader;
99
95
  }
96
+ // ---------------------------------------------------------------------------
97
+ // Internal helpers
98
+ // ---------------------------------------------------------------------------
100
99
  function getSessionConfig() {
101
100
  const tools = createTools({
102
101
  client: copilotClient,
@@ -118,17 +117,16 @@ function sameUserContext(a, b) {
118
117
  return a?.name === b?.name && a?.role === b?.role;
119
118
  }
120
119
  function updateUserContext(source) {
121
- if (source.type !== "web") {
120
+ if (source.type !== "web")
122
121
  return;
123
- }
124
122
  const nextContext = source.user
125
123
  ? { name: source.user.name, role: source.user.role }
126
124
  : undefined;
127
- if (sameUserContext(currentUserContext, nextContext)) {
125
+ if (sameUserContext(currentUserContext, nextContext))
128
126
  return;
129
- }
130
127
  currentUserContext = nextContext;
131
- sessionMap.delete("default");
128
+ // Invalidate the default session so it's recreated with the updated system message
129
+ registry?.get("default")?.invalidateSession();
132
130
  }
133
131
  function updateRequestContext(source) {
134
132
  if (source.type !== "web") {
@@ -142,7 +140,6 @@ function updateRequestContext(source) {
142
140
  setState(LAST_AUTHENTICATED_USER_KEY, JSON.stringify(source.user));
143
141
  }
144
142
  }
145
- /** Feed an agent task result into the orchestrator as a new turn. */
146
143
  export function feedAgentResult(taskId, agentSlug, result) {
147
144
  const prompt = `[Agent task completed] @${agentSlug} finished task ${taskId}:\n\n${result}`;
148
145
  const sessionKey = getTaskSessionKey(taskId);
@@ -155,7 +152,6 @@ export function feedAgentResult(taskId, agentSlug, result) {
155
152
  function sleep(ms) {
156
153
  return new Promise((resolve) => setTimeout(resolve, ms));
157
154
  }
158
- /** Ensure the SDK client is connected, resetting if necessary. Coalesces concurrent resets. */
159
155
  let resetPromise;
160
156
  async function ensureClient() {
161
157
  if (copilotClient && copilotClient.getState() === "connected") {
@@ -171,7 +167,6 @@ async function ensureClient() {
171
167
  }
172
168
  return resetPromise;
173
169
  }
174
- /** Start periodic health check that proactively reconnects the client. */
175
170
  function startHealthCheck() {
176
171
  if (healthCheckTimer)
177
172
  return;
@@ -183,9 +178,10 @@ function startHealthCheck() {
183
178
  if (state !== "connected") {
184
179
  log.info({ state }, "Health check: client not connected, resetting");
185
180
  await ensureClient();
186
- // Session may need recovery after client reset
187
- sessionMap.clear();
188
- sessionModelMap.clear();
181
+ // Invalidate all cached sessions they're tied to the old connection
182
+ for (const [, mgr] of registry.getAll()) {
183
+ mgr.invalidateSession();
184
+ }
189
185
  }
190
186
  }
191
187
  catch (err) {
@@ -193,27 +189,7 @@ function startHealthCheck() {
193
189
  }
194
190
  }, HEALTH_CHECK_INTERVAL_MS);
195
191
  }
196
- /** Ensure a session exists for the given key, creating/resuming as needed. Concurrency-safe. */
197
- async function ensureOrchestratorSession(sessionKey, projectRoot) {
198
- const existing = sessionMap.get(sessionKey);
199
- if (existing)
200
- return existing;
201
- // Coalesce concurrent callers for the same key
202
- const inFlight = sessionCreatePromises.get(sessionKey);
203
- if (inFlight)
204
- return inFlight;
205
- const promise = createOrResumeSession(sessionKey, projectRoot);
206
- sessionCreatePromises.set(sessionKey, promise);
207
- try {
208
- const session = await promise;
209
- sessionMap.set(sessionKey, session);
210
- return session;
211
- }
212
- finally {
213
- sessionCreatePromises.delete(sessionKey);
214
- }
215
- }
216
- /** Internal: actually create or resume a session (not concurrency-safe — use ensureOrchestratorSession). */
192
+ /** Internal: create or resume a CopilotSession. Called by SessionManager.ensureSession(). */
217
193
  async function createOrResumeSession(sessionKey, projectRoot) {
218
194
  const client = await ensureClient();
219
195
  const { tools, mcpServers, skillDirectories } = getSessionConfig();
@@ -223,7 +199,6 @@ async function createOrResumeSession(sessionKey, projectRoot) {
223
199
  backgroundCompactionThreshold: 0.80,
224
200
  bufferExhaustionThreshold: 0.95,
225
201
  };
226
- // Build the correct system message for this session mode
227
202
  let systemMessageContent;
228
203
  if (isProjectSession && projectRoot) {
229
204
  systemMessageContent = await getSquadCoordinatorSystemMessage(projectRoot);
@@ -232,9 +207,9 @@ async function createOrResumeSession(sessionKey, projectRoot) {
232
207
  const memorySummary = getWikiSummary();
233
208
  systemMessageContent = getOrchestratorSystemMessage({
234
209
  ...getSystemMessageOptions(memorySummary, isProjectSession ? projectRoot : undefined),
210
+ version: CHAPTERHOUSE_VERSION,
235
211
  });
236
212
  }
237
- // Try to resume from copilot_sessions; fall back to legacy max_state key for the default session
238
213
  const stored = getCopilotSession(sessionKey);
239
214
  const savedSessionId = stored?.copilotSessionId ?? (sessionKey === "default" ? getState(ORCHESTRATOR_SESSION_KEY) : undefined);
240
215
  if (savedSessionId) {
@@ -253,7 +228,9 @@ async function createOrResumeSession(sessionKey, projectRoot) {
253
228
  });
254
229
  log.info({ sessionKey }, "Session resumed successfully");
255
230
  upsertCopilotSession(sessionKey, isProjectSession ? "project" : "default", session.sessionId, projectRoot, config.copilotModel);
256
- sessionModelMap.set(sessionKey, config.copilotModel);
231
+ const mgr = registry?.get(sessionKey);
232
+ if (mgr)
233
+ mgr.currentModel = config.copilotModel;
257
234
  return session;
258
235
  }
259
236
  catch (err) {
@@ -262,7 +239,6 @@ async function createOrResumeSession(sessionKey, projectRoot) {
262
239
  deleteState(ORCHESTRATOR_SESSION_KEY);
263
240
  }
264
241
  }
265
- // Create a fresh session
266
242
  log.info({ sessionKey }, "Creating new session");
267
243
  const session = await client.createSession({
268
244
  model: config.copilotModel,
@@ -277,20 +253,25 @@ async function createOrResumeSession(sessionKey, projectRoot) {
277
253
  });
278
254
  log.info({ sessionKey, sessionId: session.sessionId.slice(0, 8) }, "Session created");
279
255
  upsertCopilotSession(sessionKey, isProjectSession ? "project" : "default", session.sessionId, projectRoot, config.copilotModel);
280
- // Backward compat: also persist the default session to the legacy state key
281
256
  if (sessionKey === "default")
282
257
  setState(ORCHESTRATOR_SESSION_KEY, session.sessionId);
283
- sessionModelMap.set(sessionKey, config.copilotModel);
258
+ const mgr = registry?.get(sessionKey);
259
+ if (mgr)
260
+ mgr.currentModel = config.copilotModel;
284
261
  return session;
285
262
  }
286
263
  export async function initOrchestrator(client) {
287
264
  copilotClient = client;
265
+ // (Re-)create the registry — supports multiple initOrchestrator calls in tests
266
+ if (registry) {
267
+ await registry.shutdown();
268
+ }
269
+ registry = buildRegistry();
270
+ registry.startEvictionTimer();
288
271
  const { mcpServers, skillDirectories } = getSessionConfig();
289
- // Initialize agent system
290
272
  ensureDefaultAgents();
291
273
  const agents = loadAgents();
292
274
  log.info({ count: agents.length, agents: agents.map((a) => `@${a.slug}`) }, "Agents loaded");
293
- // Validate configured model against available models
294
275
  try {
295
276
  const models = await client.listModels();
296
277
  const configured = config.copilotModel;
@@ -307,16 +288,17 @@ export async function initOrchestrator(client) {
307
288
  log.info({ skillDirectories }, "Skill directories");
308
289
  log.info("Persistent session mode — conversation history maintained by SDK");
309
290
  startHealthCheck();
310
- // Eagerly create/resume the default orchestrator session
311
291
  try {
312
- await ensureOrchestratorSession("default");
292
+ const defaultManager = registry.getOrCreate("default");
293
+ await defaultManager.ensureSession();
313
294
  }
314
295
  catch (err) {
315
296
  log.error({ err: err instanceof Error ? err.message : err }, "Failed to create initial session (will retry on first message)");
316
297
  }
317
298
  }
318
- /** How long to wait for the orchestrator to finish a turn (30 min default).
299
+ /** How long to wait for the orchestrator to finish a single session turn (30 min default).
319
300
  * Override with CHAPTERHOUSE_ORCHESTRATOR_TIMEOUT_MS env var (parsed as integer ms).
301
+ * Applies per-session-turn; concurrent sessions each have their own independent timeout.
320
302
  * Part of the 3-layer timing contract — see systemd unit TimeoutStopSec comment. */
321
303
  const DEFAULT_ORCHESTRATOR_TIMEOUT_MS = 1_800_000;
322
304
  export const ORCHESTRATOR_TIMEOUT_MS = (() => {
@@ -328,245 +310,218 @@ export const ORCHESTRATOR_TIMEOUT_MS = (() => {
328
310
  }
329
311
  return DEFAULT_ORCHESTRATOR_TIMEOUT_MS;
330
312
  })();
331
- /** Send a prompt on a session identified by sessionKey, return the response. */
332
- async function executeOnSession(sessionKey, prompt, callback, attachments, onActivity) {
333
- const projectRoot = sessionKey.startsWith("project:") ? sessionKey.slice("project:".length) : undefined;
334
- const session = await ensureOrchestratorSession(sessionKey, projectRoot);
335
- currentProcessingSessionKey = sessionKey;
336
- currentCallback = callback;
337
- currentActivityCallback = onActivity;
338
- let accumulated = "";
339
- let toolCallExecuted = false;
340
- let toolCallCount = 0;
341
- const unsubToolDone = session.on("tool.execution_complete", (event) => {
342
- toolCallExecuted = true;
343
- toolCallCount++;
344
- if (onActivity) {
345
- const data = event.data;
346
- const result = data.result;
347
- const resultPreview = typeof result?.content === "string" ? result.content.slice(0, 400) : undefined;
348
- const detailedContent = typeof result?.detailedContent === "string"
349
- ? result.detailedContent
350
- : typeof result?.content === "string"
351
- ? result.content
352
- : undefined;
353
- onActivity({
354
- kind: "tool_complete",
355
- toolCallId: data.toolCallId,
356
- success: data.success,
357
- resultPreview,
358
- detailedContent,
359
- });
360
- }
361
- });
362
- const unsubToolStart = onActivity
363
- ? session.on("tool.execution_start", (event) => {
364
- const data = event.data;
365
- onActivity({
366
- kind: "tool_start",
367
- toolCallId: data.toolCallId,
368
- toolName: data.toolName,
369
- mcpServerName: data.mcpServerName,
370
- arguments: data.arguments,
371
- });
372
- })
373
- : () => { };
374
- const unsubReasoning = onActivity
375
- ? session.on("assistant.reasoning_delta", (event) => {
376
- onActivity({
377
- kind: "thinking_delta",
378
- reasoningId: event.data.reasoningId,
379
- deltaContent: event.data.deltaContent,
380
- });
381
- })
382
- : () => { };
383
- const unsubSubStart = onActivity
384
- ? session.on("subagent.started", (event) => {
385
- const data = event.data;
386
- onActivity({
387
- kind: "subagent_started",
388
- toolCallId: data.toolCallId,
389
- agentName: data.agentName,
390
- agentDisplayName: data.agentDisplayName,
391
- agentDescription: data.agentDescription,
392
- });
393
- })
394
- : () => { };
395
- const unsubSubDone = onActivity
396
- ? session.on("subagent.completed", (event) => {
397
- const data = event.data;
398
- onActivity({
399
- kind: "subagent_completed",
400
- toolCallId: data.toolCallId,
401
- agentName: data.agentName,
402
- agentDisplayName: data.agentDisplayName,
403
- durationMs: data.durationMs,
404
- });
405
- })
406
- : () => { };
407
- const unsubSubFail = onActivity
408
- ? session.on("subagent.failed", (event) => {
409
- const data = event.data;
410
- onActivity({
411
- kind: "subagent_failed",
412
- toolCallId: data.toolCallId,
413
- agentName: data.agentName,
414
- agentDisplayName: data.agentDisplayName,
415
- error: data.error,
416
- });
417
- })
418
- : () => { };
419
- // Always persist SDK subagent dispatches to agent_tasks so Workers tab shows them.
420
- // These fire when the built-in `task` tool (Squad coordinator) routes work to a
421
- // specialist separate from `delegate_to_agent` which handles CH-registry agents.
422
- const db = getDb();
423
- const unsubSubStartDb = session.on("subagent.started", (event) => {
424
- try {
425
- const data = event.data;
426
- const agentSlug = (data.agentName || "unknown").toLowerCase().replace(/\s+/g, "-");
427
- const description = (data.agentDescription || data.agentDisplayName || `Squad dispatch: ${agentSlug}`).slice(0, 500);
428
- db.prepare(`INSERT OR IGNORE INTO agent_tasks (task_id, agent_slug, description, status, origin_channel, session_key, source) VALUES (?, ?, ?, 'running', ?, ?, 'squad')`).run(data.toolCallId, agentSlug, description, currentSourceChannel || null, sessionKey);
429
- }
430
- catch { /* non-fatal */ }
431
- });
432
- const unsubSubDoneDb = session.on("subagent.completed", (event) => {
433
- try {
434
- db.prepare(`UPDATE agent_tasks SET status = 'completed', completed_at = CURRENT_TIMESTAMP WHERE task_id = ?`).run(event.data.toolCallId);
435
- }
436
- catch { /* non-fatal */ }
437
- });
438
- const unsubSubFailDb = session.on("subagent.failed", (event) => {
313
+ /**
314
+ * Execute a single queued item on its session.
315
+ * Wraps the entire turn in AsyncLocalStorage so all tool handlers (e.g. delegate_to_agent,
316
+ * register_task) see the correct per-session context even when multiple sessions run
317
+ * concurrently. This is the core of the per-session isolation guarantee.
318
+ */
319
+ async function executeOnSession(manager, item) {
320
+ const { sessionKey } = manager;
321
+ const session = await manager.ensureSession();
322
+ // Update last-seen globals (backwards compat — for callers that inspect after a turn ends)
323
+ currentAuthenticatedUser = item.authUser;
324
+ currentAuthorizationHeader = item.authHeader;
325
+ return turnContextStorage.run({
326
+ sessionKey,
327
+ sourceChannel: item.sourceChannel,
328
+ channelKey: item.channelKey,
329
+ authUser: item.authUser,
330
+ authHeader: item.authHeader,
331
+ activityCallback: item.onActivity,
332
+ }, async () => {
333
+ let accumulated = "";
334
+ let toolCallExecuted = false;
335
+ let toolCallCount = 0;
336
+ const unsubToolDone = session.on("tool.execution_complete", (event) => {
337
+ toolCallExecuted = true;
338
+ toolCallCount++;
339
+ if (item.onActivity) {
340
+ const data = event.data;
341
+ const result = data.result;
342
+ const resultPreview = typeof result?.content === "string" ? result.content.slice(0, 400) : undefined;
343
+ const detailedContent = typeof result?.detailedContent === "string"
344
+ ? result.detailedContent
345
+ : typeof result?.content === "string"
346
+ ? result.content
347
+ : undefined;
348
+ item.onActivity({
349
+ kind: "tool_complete",
350
+ toolCallId: data.toolCallId,
351
+ success: data.success,
352
+ resultPreview,
353
+ detailedContent,
354
+ });
355
+ }
356
+ });
357
+ const unsubToolStart = item.onActivity
358
+ ? session.on("tool.execution_start", (event) => {
359
+ const data = event.data;
360
+ item.onActivity({
361
+ kind: "tool_start",
362
+ toolCallId: data.toolCallId,
363
+ toolName: data.toolName,
364
+ mcpServerName: data.mcpServerName,
365
+ arguments: data.arguments,
366
+ });
367
+ })
368
+ : () => { };
369
+ const unsubReasoning = item.onActivity
370
+ ? session.on("assistant.reasoning_delta", (event) => {
371
+ item.onActivity({
372
+ kind: "thinking_delta",
373
+ reasoningId: event.data.reasoningId,
374
+ deltaContent: event.data.deltaContent,
375
+ });
376
+ })
377
+ : () => { };
378
+ const unsubSubStart = item.onActivity
379
+ ? session.on("subagent.started", (event) => {
380
+ const data = event.data;
381
+ item.onActivity({
382
+ kind: "subagent_started",
383
+ toolCallId: data.toolCallId,
384
+ agentName: data.agentName,
385
+ agentDisplayName: data.agentDisplayName,
386
+ agentDescription: data.agentDescription,
387
+ });
388
+ })
389
+ : () => { };
390
+ const unsubSubDone = item.onActivity
391
+ ? session.on("subagent.completed", (event) => {
392
+ const data = event.data;
393
+ item.onActivity({
394
+ kind: "subagent_completed",
395
+ toolCallId: data.toolCallId,
396
+ agentName: data.agentName,
397
+ agentDisplayName: data.agentDisplayName,
398
+ durationMs: data.durationMs,
399
+ });
400
+ })
401
+ : () => { };
402
+ const unsubSubFail = item.onActivity
403
+ ? session.on("subagent.failed", (event) => {
404
+ const data = event.data;
405
+ item.onActivity({
406
+ kind: "subagent_failed",
407
+ toolCallId: data.toolCallId,
408
+ agentName: data.agentName,
409
+ agentDisplayName: data.agentDisplayName,
410
+ error: data.error,
411
+ });
412
+ })
413
+ : () => { };
414
+ // Always persist SDK subagent dispatches to agent_tasks so Workers tab shows them.
415
+ const db = getDb();
416
+ const unsubSubStartDb = session.on("subagent.started", (event) => {
417
+ try {
418
+ const data = event.data;
419
+ const agentSlug = (data.agentName || "unknown").toLowerCase().replace(/\s+/g, "-");
420
+ const description = (data.agentDescription || data.agentDisplayName || `Squad dispatch: ${agentSlug}`).slice(0, 500);
421
+ db.prepare(`INSERT OR IGNORE INTO agent_tasks (task_id, agent_slug, description, status, origin_channel, session_key, source) VALUES (?, ?, ?, 'running', ?, ?, 'squad')`).run(data.toolCallId, agentSlug, description, item.sourceChannel || null, sessionKey);
422
+ }
423
+ catch { /* non-fatal */ }
424
+ });
425
+ const unsubSubDoneDb = session.on("subagent.completed", (event) => {
426
+ try {
427
+ db.prepare(`UPDATE agent_tasks SET status = 'completed', completed_at = CURRENT_TIMESTAMP WHERE task_id = ?`).run(event.data.toolCallId);
428
+ }
429
+ catch { /* non-fatal */ }
430
+ });
431
+ const unsubSubFailDb = session.on("subagent.failed", (event) => {
432
+ try {
433
+ const data = event.data;
434
+ db.prepare(`UPDATE agent_tasks SET status = 'error', result = ?, completed_at = CURRENT_TIMESTAMP WHERE task_id = ?`).run(data.error || "Subagent failed", data.toolCallId);
435
+ }
436
+ catch { /* non-fatal */ }
437
+ });
438
+ const unsubDelta = session.on("assistant.message_delta", (event) => {
439
+ if (toolCallExecuted && accumulated.length > 0 && !accumulated.endsWith("\n")) {
440
+ accumulated += "\n";
441
+ }
442
+ toolCallExecuted = false;
443
+ accumulated += event.data.deltaContent;
444
+ item.callback(accumulated, false);
445
+ });
439
446
  try {
440
- const data = event.data;
441
- db.prepare(`UPDATE agent_tasks SET status = 'error', result = ?, completed_at = CURRENT_TIMESTAMP WHERE task_id = ?`).run(data.error || "Subagent failed", data.toolCallId);
442
- }
443
- catch { /* non-fatal */ }
444
- });
445
- const unsubDelta = session.on("assistant.message_delta", (event) => {
446
- // After a tool call completes, ensure a line break separates the text blocks
447
- // so they don't visually run together in the rendered chat.
448
- if (toolCallExecuted && accumulated.length > 0 && !accumulated.endsWith("\n")) {
449
- accumulated += "\n";
447
+ const result = await session.sendAndWait({ prompt: item.prompt, ...(item.attachments && item.attachments.length > 0 ? { attachments: item.attachments } : {}) }, ORCHESTRATOR_TIMEOUT_MS);
448
+ const finalContent = result?.data?.content || accumulated || "(No response)";
449
+ return finalContent;
450
450
  }
451
- toolCallExecuted = false;
452
- accumulated += event.data.deltaContent;
453
- callback(accumulated, false);
454
- });
455
- try {
456
- const result = await session.sendAndWait({ prompt, ...(attachments && attachments.length > 0 ? { attachments } : {}) }, ORCHESTRATOR_TIMEOUT_MS);
457
- const finalContent = result?.data?.content || accumulated || "(No response)";
458
- return finalContent;
459
- }
460
- catch (err) {
461
- const msg = err instanceof Error ? err.message : String(err);
462
- // On timeout, never throw the message was already sent to the persistent
463
- // session and may have been (partially) processed. Return what we have.
464
- if (/timeout/i.test(msg)) {
465
- if (accumulated.length > 0) {
466
- log.warn({ timeoutSec: ORCHESTRATOR_TIMEOUT_MS / 1000, charCount: accumulated.length }, "Timeout with partial response — returning partial");
467
- return accumulated;
451
+ catch (err) {
452
+ const msg = err instanceof Error ? err.message : String(err);
453
+ if (/timeout/i.test(msg)) {
454
+ if (accumulated.length > 0) {
455
+ log.warn({ sessionKey, timeoutSec: ORCHESTRATOR_TIMEOUT_MS / 1000, charCount: accumulated.length }, "Timeout with partial response — returning partial");
456
+ return accumulated;
457
+ }
458
+ if (toolCallCount > 0) {
459
+ log.warn({ sessionKey, timeoutSec: ORCHESTRATOR_TIMEOUT_MS / 1000, toolCallCount }, "Timeout — tool calls ran but no text yet, session still working");
460
+ return "I'm still working on this — I've started processing but it's taking longer than expected. I'll send you the results when I'm done.";
461
+ }
462
+ log.warn({ sessionKey, timeoutSec: ORCHESTRATOR_TIMEOUT_MS / 1000 }, "Timeout with no activity session may be stuck");
463
+ return "Sorry, that request timed out before I could start working on it. Try again or break it into smaller pieces?";
468
464
  }
469
- // No text yet but tool calls ran — the session is working in the background
470
- // (e.g. delegate_to_agent dispatched). Don't error out.
471
- if (toolCallCount > 0) {
472
- log.warn({ timeoutSec: ORCHESTRATOR_TIMEOUT_MS / 1000, toolCallCount }, "Timeout — tool calls ran but no text yet, session still working");
473
- return "I'm still working on this — I've started processing but it's taking longer than expected. I'll send you the results when I'm done.";
465
+ if (/closed|destroy|disposed|invalid|expired|not found/i.test(msg)) {
466
+ log.warn({ sessionKey, msg }, "Session appears dead, will recreate");
467
+ manager.invalidateSession();
468
+ if (sessionKey === "default")
469
+ deleteState(ORCHESTRATOR_SESSION_KEY);
474
470
  }
475
- // No text, no tool calls — the session is truly stuck
476
- log.warn({ timeoutSec: ORCHESTRATOR_TIMEOUT_MS / 1000 }, "Timeout with no activity — session may be stuck");
477
- return "Sorry, that request timed out before I could start working on it. Try again or break it into smaller pieces?";
471
+ throw err;
478
472
  }
479
- // If the session is broken, invalidate it so it's recreated on next attempt
480
- if (/closed|destroy|disposed|invalid|expired|not found/i.test(msg)) {
481
- log.warn({ sessionKey, msg }, "Session appears dead, will recreate");
482
- sessionMap.delete(sessionKey);
483
- sessionModelMap.delete(sessionKey);
484
- if (sessionKey === "default")
485
- deleteState(ORCHESTRATOR_SESSION_KEY);
473
+ finally {
474
+ unsubDelta();
475
+ unsubToolDone();
476
+ unsubToolStart();
477
+ unsubReasoning();
478
+ unsubSubStart();
479
+ unsubSubDone();
480
+ unsubSubFail();
481
+ unsubSubStartDb();
482
+ unsubSubDoneDb();
483
+ unsubSubFailDb();
486
484
  }
487
- throw err;
488
- }
489
- finally {
490
- unsubDelta();
491
- unsubToolDone();
492
- unsubToolStart();
493
- unsubReasoning();
494
- unsubSubStart();
495
- unsubSubDone();
496
- unsubSubFail();
497
- unsubSubStartDb();
498
- unsubSubDoneDb();
499
- unsubSubFailDb();
500
- currentCallback = undefined;
501
- currentActivityCallback = undefined;
502
- currentProcessingSessionKey = undefined;
503
- }
485
+ });
504
486
  }
505
- /** Process the message queue one at a time. */
506
- async function processQueue() {
507
- if (processing) {
508
- if (messageQueue.length > 0) {
509
- log.debug({ queueLength: messageQueue.length }, "Message queued, orchestrator is busy");
510
- }
511
- return;
487
+ /**
488
+ * Process a single queued item: route model, handle @mentions, execute.
489
+ * This is the SessionManager worker — one call per turn, inside the drain loop.
490
+ */
491
+ async function processItem(item, manager) {
492
+ const { sessionKey } = manager;
493
+ if (item.targetAgent && item.targetAgent !== "chapterhouse") {
494
+ setActiveAgent(item.channelKey || "default", item.targetAgent);
495
+ return executeOnSession(manager, item);
512
496
  }
513
- processing = true;
514
- while (messageQueue.length > 0) {
515
- const item = messageQueue.shift();
516
- currentSourceChannel = item.sourceChannel;
517
- currentChannelKey = item.channelKey;
518
- const { sessionKey } = item;
519
- try {
520
- let result;
521
- if (item.targetAgent && item.targetAgent !== "chapterhouse") {
522
- // @mention switches the active agent — route through the session
523
- setActiveAgent(item.channelKey || "default", item.targetAgent);
524
- result = await executeOnSession(sessionKey, item.prompt, item.callback, item.attachments, item.onActivity);
497
+ const currentModel = manager.currentModel ?? config.copilotModel;
498
+ const routeResult = await resolveModel(item.prompt, currentModel, manager.recentTiers);
499
+ if (routeResult.switched) {
500
+ log.info({ model: routeResult.model, tier: routeResult.overrideName || routeResult.tier }, "Auto-routing: switching model");
501
+ config.copilotModel = routeResult.model;
502
+ const existingSession = manager.session;
503
+ if (existingSession) {
504
+ try {
505
+ await existingSession.setModel(routeResult.model);
506
+ manager.currentModel = routeResult.model;
507
+ log.info({ sessionKey }, "Model switched in-place");
525
508
  }
526
- else {
527
- // Route the model before executing
528
- const currentModel = sessionModelMap.get(sessionKey) ?? config.copilotModel;
529
- const routeResult = await resolveModel(item.prompt, currentModel, recentTiers);
530
- if (routeResult.switched) {
531
- log.info({ model: routeResult.model, tier: routeResult.overrideName || routeResult.tier }, "Auto-routing: switching model");
532
- config.copilotModel = routeResult.model;
533
- const existingSession = sessionMap.get(sessionKey);
534
- if (existingSession) {
535
- try {
536
- await existingSession.setModel(routeResult.model);
537
- sessionModelMap.set(sessionKey, routeResult.model);
538
- log.info({ sessionKey }, "Model switched in-place");
539
- }
540
- catch (err) {
541
- log.warn({ sessionKey, err: err instanceof Error ? err.message : err }, "setModel() failed, will recreate session");
542
- sessionMap.delete(sessionKey);
543
- if (sessionKey === "default")
544
- deleteState(ORCHESTRATOR_SESSION_KEY);
545
- }
546
- }
547
- }
548
- if (routeResult.tier) {
549
- recentTiers.push(routeResult.tier);
550
- if (recentTiers.length > 5)
551
- recentTiers = recentTiers.slice(-5);
552
- }
553
- lastRouteResult = routeResult;
554
- result = await executeOnSession(sessionKey, item.prompt, item.callback, item.attachments, item.onActivity);
509
+ catch (err) {
510
+ log.warn({ sessionKey, err: err instanceof Error ? err.message : err }, "setModel() failed, will recreate session");
511
+ manager.invalidateSession();
512
+ if (sessionKey === "default")
513
+ deleteState(ORCHESTRATOR_SESSION_KEY);
555
514
  }
556
- item.resolve(result);
557
515
  }
558
- catch (err) {
559
- item.reject(err);
560
- }
561
- currentSourceChannel = undefined;
562
- currentChannelKey = undefined;
563
516
  }
564
- processing = false;
517
+ if (routeResult.tier) {
518
+ manager.addRecentTier(routeResult.tier);
519
+ }
520
+ lastRouteResult = routeResult;
521
+ return executeOnSession(manager, item);
565
522
  }
566
523
  function isRecoverableError(err) {
567
524
  const msg = err instanceof Error ? err.message : String(err);
568
- // Timeouts are NOT retryable on a persistent session — the message was already
569
- // sent and likely processed; re-sending creates "duplicate" responses.
570
525
  if (/timeout/i.test(msg))
571
526
  return false;
572
527
  return /disconnect|connection|EPIPE|ECONNRESET|ECONNREFUSED|socket|closed|ENOENT|spawn|not found|expired|stale/i.test(msg);
@@ -576,14 +531,10 @@ export async function sendToOrchestrator(prompt, source, callback, attachments,
576
531
  updateRequestContext(source);
577
532
  const sourceLabel = source.type === "web" ? "web" : "background";
578
533
  logMessage("in", sourceLabel, prompt);
579
- // Derive the session key: project sessions come from web messages with a projectPath;
580
- // background completions carry their own sessionKey; everything else is "default".
581
534
  let sessionKey;
582
535
  if (source.type === "web" && source.projectPath && config.squadEnabled) {
583
536
  sessionKey = "project:" + normalizeProjectPath(source.projectPath);
584
- // Keep the legacy channel-project map in sync for tools that read it
585
537
  setChannelProject(source.connectionId, normalizeProjectPath(source.projectPath));
586
- // Bump last-used timestamp so sidebar can sort by real activity
587
538
  bumpProjectLastUsed(normalizeProjectPath(source.projectPath));
588
539
  }
589
540
  else if (source.type === "background" && source.sessionKey) {
@@ -593,36 +544,46 @@ export async function sendToOrchestrator(prompt, source, callback, attachments,
593
544
  sessionKey = "default";
594
545
  }
595
546
  const channelKey = source.type === "web" ? source.connectionId : "default";
596
- // Pass projectRoot to parseAtMention only for project sessions so the default
597
- // chat does not get Squad roster injection.
598
547
  const projectRoot = sessionKey.startsWith("project:") ? sessionKey.slice("project:".length) : undefined;
599
- // Parse @mention routing (e.g., "@coder fix the bug" → target "coder")
600
548
  const mention = parseAtMention(prompt, projectRoot);
601
549
  const targetAgent = mention?.agentSlug;
602
550
  const routedPrompt = mention ? mention.message : prompt;
603
- // Tag the prompt with its source channel
604
551
  const taggedPrompt = source.type === "background"
605
552
  ? routedPrompt
606
553
  : `[via ${sourceLabel}] ${routedPrompt}`;
607
- // Log role: background events are "system", user messages are "user"
608
554
  const logRole = source.type === "background" ? "system" : "user";
609
- // Determine the source channel for agent origin tracking
610
555
  const sourceChannel = source.type === "web" ? "web" : undefined;
611
- // Enqueue and process
556
+ // Capture auth context at enqueue time — prevents cross-session contamination
557
+ // when concurrent sessions are processing simultaneously.
558
+ const authUser = source.type === "web" ? source.user : undefined;
559
+ const authHeader = source.type === "web" ? source.authorizationHeader?.trim() || undefined : undefined;
560
+ const manager = registry.getOrCreate(sessionKey);
612
561
  void (async () => {
613
562
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
614
563
  try {
615
564
  const finalContent = await new Promise((resolve, reject) => {
616
- messageQueue.push({ prompt: taggedPrompt, attachments, callback, onActivity, sourceChannel, targetAgent, channelKey, sessionKey, resolve, reject });
617
- processQueue();
565
+ manager.enqueue({
566
+ prompt: taggedPrompt,
567
+ attachments,
568
+ callback,
569
+ // Cast: QueuedMessage.onActivity uses a wide event type to avoid circular
570
+ // type dependencies. orchestrator.ts always passes valid ActivityEvent objects.
571
+ onActivity: onActivity,
572
+ sourceChannel,
573
+ targetAgent,
574
+ channelKey,
575
+ sessionKey,
576
+ authUser,
577
+ authHeader,
578
+ resolve,
579
+ reject,
580
+ });
618
581
  });
619
- // Deliver response to user FIRST, then log best-effort
620
582
  callback(finalContent, true);
621
583
  try {
622
584
  logMessage("out", sourceLabel, finalContent);
623
585
  }
624
586
  catch { /* best-effort */ }
625
- // Log both sides of the conversation, scoped to the session
626
587
  try {
627
588
  logConversation(logRole, prompt, sourceLabel, sessionKey);
628
589
  }
@@ -631,9 +592,6 @@ export async function sendToOrchestrator(prompt, source, callback, attachments,
631
592
  logConversation("assistant", finalContent, sourceLabel, sessionKey);
632
593
  }
633
594
  catch { /* best-effort */ }
634
- // Episodic memory: if enough turns have accumulated since the last
635
- // summary, kick off a background write. Fire-and-forget — never blocks
636
- // the user reply path.
637
595
  if (copilotClient) {
638
596
  maybeWriteEpisode(copilotClient).catch((err) => {
639
597
  log.error({ err: err instanceof Error ? err.message : err }, "Episode write failed (non-fatal)");
@@ -643,7 +601,6 @@ export async function sendToOrchestrator(prompt, source, callback, attachments,
643
601
  }
644
602
  catch (err) {
645
603
  const msg = err instanceof Error ? err.message : String(err);
646
- // Don't retry cancelled messages
647
604
  if (/cancelled|abort/i.test(msg)) {
648
605
  return;
649
606
  }
@@ -651,7 +608,6 @@ export async function sendToOrchestrator(prompt, source, callback, attachments,
651
608
  const delay = RECONNECT_DELAYS_MS[Math.min(attempt, RECONNECT_DELAYS_MS.length - 1)];
652
609
  log.warn({ msg, attempt: attempt + 1, maxRetries: MAX_RETRIES, delayMs: delay }, "Recoverable error, retrying");
653
610
  await sleep(delay);
654
- // Reset client before retry in case the connection is stale
655
611
  try {
656
612
  await ensureClient();
657
613
  }
@@ -665,34 +621,28 @@ export async function sendToOrchestrator(prompt, source, callback, attachments,
665
621
  }
666
622
  })();
667
623
  }
668
- /** Cancel the in-flight message and drain the queue. */
624
+ /** Cancel all queued and in-flight messages across all active sessions. */
669
625
  export async function cancelCurrentMessage() {
670
- // Drain any queued messages
671
- const drained = messageQueue.length;
672
- while (messageQueue.length > 0) {
673
- const item = messageQueue.shift();
674
- item.reject(new Error("Cancelled"));
675
- }
676
- // Abort the active session request
677
- const activeSession = currentProcessingSessionKey ? sessionMap.get(currentProcessingSessionKey) : undefined;
678
- if (activeSession && currentCallback) {
679
- try {
680
- await activeSession.abort();
681
- log.info({ sessionKey: currentProcessingSessionKey }, "Aborted in-flight request");
682
- return true;
683
- }
684
- catch (err) {
685
- log.error({ err: err instanceof Error ? err.message : err }, "Abort failed");
626
+ if (!registry)
627
+ return false;
628
+ let drained = 0;
629
+ const aborts = [];
630
+ for (const [, manager] of registry.getAll()) {
631
+ drained += manager.cancelQueued();
632
+ if (manager.isProcessing) {
633
+ aborts.push(manager.abortCurrentTurn());
686
634
  }
687
635
  }
688
- return drained > 0;
636
+ const results = await Promise.all(aborts);
637
+ const aborted = results.some(Boolean);
638
+ return aborted || drained > 0;
689
639
  }
690
640
  /** Switch the model on the live default orchestrator session without destroying it. */
691
641
  export function switchSessionModel(newModel) {
692
- const session = sessionMap.get("default");
693
- if (session) {
694
- return session.setModel(newModel).then(() => {
695
- sessionModelMap.set("default", newModel);
642
+ const manager = registry?.get("default");
643
+ if (manager?.session) {
644
+ return manager.session.setModel(newModel).then(() => {
645
+ manager.currentModel = newModel;
696
646
  });
697
647
  }
698
648
  return Promise.resolve();
@@ -700,9 +650,9 @@ export function switchSessionModel(newModel) {
700
650
  /** Return a snapshot of currently running workers for API/UI consumers. */
701
651
  export function getAgentInfo() {
702
652
  const allTasks = getActiveTasks().filter((t) => t.status === "running");
703
- const registry = getAgentRegistry();
653
+ const reg = getAgentRegistry();
704
654
  return allTasks.map((t) => {
705
- const agent = registry.find((a) => a.slug === t.agentSlug);
655
+ const agent = reg.find((a) => a.slug === t.agentSlug);
706
656
  return {
707
657
  slug: t.agentSlug,
708
658
  name: agent?.name || t.agentSlug,
@@ -714,16 +664,11 @@ export function getAgentInfo() {
714
664
  }
715
665
  /** Clean up on shutdown/restart. */
716
666
  export async function shutdownAgents() {
717
- for (const [key, session] of sessionMap) {
718
- try {
719
- await session.disconnect();
720
- }
721
- catch (err) {
722
- log.error({ sessionKey: key, err: err instanceof Error ? err.message : err }, "Error disconnecting session during shutdown");
723
- }
667
+ if (!registry) {
668
+ await clearActiveTasks();
669
+ return;
724
670
  }
725
- sessionMap.clear();
726
- sessionModelMap.clear();
671
+ await registry.shutdown();
727
672
  await clearActiveTasks();
728
673
  }
729
674
  //# sourceMappingURL=orchestrator.js.map