@kinqs/brainrouter-cli 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.env.example +55 -48
  2. package/bin/cli.cjs +71 -0
  3. package/dist/agent/agent.d.ts +212 -2
  4. package/dist/agent/agent.js +428 -38
  5. package/dist/cli/banner.d.ts +60 -0
  6. package/dist/cli/banner.js +199 -0
  7. package/dist/cli/cliPrompt.d.ts +69 -0
  8. package/dist/cli/cliPrompt.js +287 -0
  9. package/dist/cli/commands/_helpers.js +6 -6
  10. package/dist/cli/commands/guard.js +75 -10
  11. package/dist/cli/commands/mcp.d.ts +17 -0
  12. package/dist/cli/commands/mcp.js +121 -0
  13. package/dist/cli/commands/memory.js +2 -2
  14. package/dist/cli/commands/obs.js +22 -22
  15. package/dist/cli/commands/session.js +13 -5
  16. package/dist/cli/commands/ui.js +97 -45
  17. package/dist/cli/commands/workflow.d.ts +18 -0
  18. package/dist/cli/commands/workflow.js +314 -43
  19. package/dist/cli/repl.js +219 -132
  20. package/dist/cli/spinner.d.ts +34 -0
  21. package/dist/cli/spinner.js +36 -0
  22. package/dist/cli/statusline.d.ts +67 -0
  23. package/dist/cli/statusline.js +204 -0
  24. package/dist/cli/theme.d.ts +79 -0
  25. package/dist/cli/theme.js +106 -0
  26. package/dist/cli/whereView.d.ts +81 -0
  27. package/dist/cli/whereView.js +245 -0
  28. package/dist/config/config.d.ts +40 -0
  29. package/dist/config/config.js +45 -73
  30. package/dist/index.js +80 -13
  31. package/dist/memory/briefing.d.ts +10 -0
  32. package/dist/memory/briefing.js +69 -1
  33. package/dist/prompt/breadthHint.d.ts +5 -0
  34. package/dist/prompt/breadthHint.js +44 -0
  35. package/dist/prompt/systemPrompt.d.ts +34 -0
  36. package/dist/prompt/systemPrompt.js +124 -108
  37. package/dist/runtime/dangerousCommand.d.ts +53 -0
  38. package/dist/runtime/dangerousCommand.js +105 -0
  39. package/dist/runtime/mcpClient.d.ts +38 -1
  40. package/dist/runtime/mcpClient.js +90 -2
  41. package/dist/state/goalStore.d.ts +98 -17
  42. package/dist/state/goalStore.js +132 -42
  43. package/dist/state/preferencesStore.d.ts +67 -3
  44. package/dist/state/preferencesStore.js +84 -1
  45. package/dist/state/workflowArtifacts.d.ts +63 -2
  46. package/dist/state/workflowArtifacts.js +120 -8
  47. package/dist/tests/_helpers.d.ts +31 -0
  48. package/dist/tests/_helpers.js +91 -0
  49. package/package.json +5 -4
@@ -2,8 +2,9 @@ import fs from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { exec } from 'node:child_process';
4
4
  import { promisify } from 'node:util';
5
+ import { randomUUID } from 'node:crypto';
5
6
  import chalk from 'chalk';
6
- import { askYesNo } from '../cli/cliPrompt.js';
7
+ import { askChoice, askYesNo, getActiveReadline, NoTTYError } from '../cli/cliPrompt.js';
7
8
  import { appendTranscriptEntry } from '../state/sessionStore.js';
8
9
  import { buildSystemPrompt, loadWorkspaceInstructionSummary } from '../prompt/systemPrompt.js';
9
10
  import { formatPlan, readPlan, updatePlan } from '../state/taskStore.js';
@@ -14,7 +15,8 @@ import { acquireLLMSlot } from '../runtime/llmSemaphore.js';
14
15
  import { blockGoal, completeGoal, formatGoalBlock, readGoal } from '../state/goalStore.js';
15
16
  import { runHooks } from '../state/hooksStore.js';
16
17
  import { resolveSandboxConfig, runShell } from '../runtime/sandbox.js';
17
- import { readPreferences } from '../state/preferencesStore.js';
18
+ import { isDangerousCommand, resolveRunCommandApproval } from '../runtime/dangerousCommand.js';
19
+ import { readPreferences, resolveEffort } from '../state/preferencesStore.js';
18
20
  import { startSpan, traceEvent } from '../runtime/tracing.js';
19
21
  import { buildHookifyContext, evaluateHookify, listHookifyRules } from '../state/hookifyStore.js';
20
22
  import { renderCompactSystemMessage, runCompaction } from '../prompt/compactor.js';
@@ -146,6 +148,41 @@ export const LOCAL_TOOLS = [
146
148
  createReadAgentTranscriptTool(),
147
149
  createCloseAgentTool(),
148
150
  createRouteAgentTool(),
151
+ {
152
+ name: 'ask_user_choice',
153
+ description: 'Pause the turn and ask the human to commit to ONE of 2–4 mutually exclusive approaches. ' +
154
+ 'Renders an arrow-key picker (↑/↓ navigate, ENTER confirm; SPACE toggles in multiSelect mode) ' +
155
+ 'with an always-on "Other" row that drops to a free-text prompt — the user is never trapped between bad options. ' +
156
+ 'Returns { answer: <chosen label or free-text> } in single-select, or { answer: [labels/free-text…] } in multiSelect. ' +
157
+ 'Use ONLY when there is genuine ambiguity that needs the user\'s judgment — NOT for trivial yes/no confirmations ' +
158
+ '(`askYesNo` is wired into approval gates already), NOT for things you can decide yourself with the available context, ' +
159
+ 'and NOT as a substitute for thinking. ' +
160
+ 'Errors in non-interactive runs (CI / piped / `brainrouter run`) and when the user cancels (Esc/q/Ctrl+C); ' +
161
+ 'on either error, decide yourself and say which option you picked and why.',
162
+ inputSchema: {
163
+ type: 'object',
164
+ properties: {
165
+ question: { type: 'string', description: 'The question to ask the user (complete sentence ending with `?`).' },
166
+ header: { type: 'string', description: 'Short chip-style label (≤12 chars) shown above the question, e.g. "Auth method" or "Storage".' },
167
+ options: {
168
+ type: 'array',
169
+ description: '2–4 mutually exclusive choices. Each option needs a short label and a one-line description.',
170
+ minItems: 2,
171
+ maxItems: 4,
172
+ items: {
173
+ type: 'object',
174
+ properties: {
175
+ label: { type: 'string', description: 'Short display text (1–5 words).' },
176
+ description: { type: 'string', description: 'One-line explanation of what this option means or what will happen if chosen.' },
177
+ },
178
+ required: ['label', 'description'],
179
+ },
180
+ },
181
+ multiSelect: { type: 'boolean', description: 'When true, allow the user to pick multiple options (comma-separated input). Defaults to false.' },
182
+ },
183
+ required: ['question', 'header', 'options'],
184
+ },
185
+ },
149
186
  {
150
187
  name: 'update_plan',
151
188
  description: 'Create or update the durable CLI task plan. Use this for multi-step work and keep at most one item in_progress.',
@@ -326,6 +363,28 @@ export class Agent {
326
363
  recalledRecordIds = [];
327
364
  recalledRecords = [];
328
365
  lastBriefingSources = [];
366
+ /**
367
+ * 10b: latest MCP tool inventory captured by `listTools()` calls. Used by
368
+ * `createSystemMessage` to decide whether the BrainRouter memory section
369
+ * should render — when `memory_recall` is missing from this list (the
370
+ * cloud brain is offline), the prompt swaps to a brain-offline notice so
371
+ * the model doesn't try to call tools that aren't there. Undefined until
372
+ * the first successful list; treated as "assume online" by the prompt
373
+ * builder until then (back-compat for callers that don't list pre-turn).
374
+ */
375
+ lastKnownMcpTools;
376
+ /**
377
+ * 9b: gated recall state. `recallHasFiredThisSession` flips to true on the
378
+ * first successful briefing injection so subsequent turns can skip the
379
+ * fresh recall pull unless a gated trigger fires. `recallNextTurnIsPost-
380
+ * Compaction` is set by `compactHistory()` to force the next turn through
381
+ * the full briefing path (compaction just dropped the prior briefing as
382
+ * collateral; replay it once so the model isn't blind). Both are
383
+ * cleared on `loadHistory` / `fork` / `bootstrapSession` so a fresh
384
+ * session re-pulls.
385
+ */
386
+ recallHasFiredThisSession = false;
387
+ recallNextTurnIsPostCompaction = false;
329
388
  roleOverlay;
330
389
  accessMode;
331
390
  silent;
@@ -360,7 +419,15 @@ export class Agent {
360
419
  this.llmConfig = llmConfig;
361
420
  this.workspaceRoot = options.workspaceRoot;
362
421
  this.launchCwd = options.launchCwd;
363
- this.sessionKey = options.sessionKey ?? `brainrouter-cli:${this.workspaceRoot}`;
422
+ // Each CLI process gets a fresh sessionKey by default. The previous
423
+ // workspace-derived fallback (`brainrouter-cli:<workspaceRoot>`) made
424
+ // MCP's `memory_resolve_session` fall into its workspace-cache branch
425
+ // and return the same UUID for every CLI in the workspace, so two
426
+ // concurrent CLIs shared one goal/plan/working bucket. A randomUUID
427
+ // here is accepted by MCP's `isUniqueId` and echoed back as-is, so
428
+ // each CLI is its own session for local state. The memory DB is
429
+ // userId-scoped, so cross-CLI recall continuity is unaffected.
430
+ this.sessionKey = options.sessionKey ?? randomUUID();
364
431
  this.roleOverlay = options.roleOverlay;
365
432
  this.accessMode = options.accessMode ?? 'shell';
366
433
  this.silent = options.silent ?? false;
@@ -388,6 +455,10 @@ export class Agent {
388
455
  'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
389
456
  'read_agent_transcript', 'close_agent', 'route_agent',
390
457
  'goal_complete', 'goal_blocked',
458
+ // ask_user_choice doesn't touch the workspace — it's an interaction
459
+ // primitive, so it stays available in every access mode (and is gated
460
+ // structurally by activeReadline / isTTY in the helper itself).
461
+ 'ask_user_choice',
391
462
  ]);
392
463
  const writeAdds = new Set(['write_file', 'edit_file', 'apply_patch']);
393
464
  const shellAdds = new Set(['run_command']);
@@ -428,6 +499,16 @@ export class Agent {
428
499
  catch (err) {
429
500
  // Non-fatal: continue with local tools only
430
501
  }
502
+ // 10b: cache the inventory so `createSystemMessage` can render a
503
+ // brain-online vs brain-offline prompt. Refresh chatHistory[0]
504
+ // whenever the inventory shape changed (online → offline or vice
505
+ // versa) so the next LLM call sees the correct system message.
506
+ const prevTools = this.lastKnownMcpTools?.map((t) => t.name).sort().join(',');
507
+ this.lastKnownMcpTools = mcpTools.map((t) => ({ name: t.name }));
508
+ const newTools = this.lastKnownMcpTools.map((t) => t.name).sort().join(',');
509
+ if (prevTools !== newTools && this.chatHistory.length > 0 && this.chatHistory[0].role === 'system') {
510
+ this.chatHistory[0] = this.createSystemMessage();
511
+ }
431
512
  const allowed = this.allowedToolsForAccess();
432
513
  const filteredLocalTools = LOCAL_TOOLS.filter(t => allowed.has(t.name));
433
514
  // Hide MCP tools we already call automatically. Small models otherwise
@@ -489,6 +570,29 @@ export class Agent {
489
570
  callbacks.onToolEnd('breadth-detector', { success: true, summary: `fan-out hint injected (${intent.signals.length} signals)` });
490
571
  }
491
572
  }
573
+ // Per-turn goal anchor: re-inject a FRESH goal block at the end of the
574
+ // chatHistory's system messages (replaceTaggedSystemMessage appends), so
575
+ // it lands right before the user prompt. Pre-9d the goal block was ALSO
576
+ // embedded in the foundational system message (via createSystemMessage),
577
+ // which meant every turn carried two copies; 9d made this anchor the
578
+ // single source — `createSystemMessage` no longer touches goal state.
579
+ // The fresh re-push every iteration keeps the up-to-date iteration
580
+ // counter in immediate-context distance and prevents the long /goal
581
+ // continuation-loop drift that PR #26 originally addressed. The anchor
582
+ // also auto-folds the final-budget-turn wrap-up directive (via
583
+ // `formatGoalBlock`'s internal `goalIsOnFinalBudgetTurn` check), so
584
+ // the separate `goal-budget-steering` tagged message is gone too.
585
+ if (!this.silent) {
586
+ const activeGoal = readGoal(this.workspaceRoot, this.sessionKey);
587
+ if (activeGoal?.text && activeGoal.status === 'active') {
588
+ this.replaceTaggedSystemMessage('goal-anchor', formatGoalBlock(activeGoal));
589
+ }
590
+ else {
591
+ // No active goal — drop any stale anchor from a prior /goal so the
592
+ // model doesn't keep seeing a completed/cleared goal as "current."
593
+ this.removeTaggedSystemMessage('goal-anchor');
594
+ }
595
+ }
492
596
  const userMsg = { role: 'user', content: prompt };
493
597
  this.chatHistory.push(userMsg);
494
598
  this.recordTranscript(userMsg);
@@ -513,7 +617,11 @@ export class Agent {
513
617
  callbacks.onStatusUpdate(`Thinking (turn ${loopCount})...`);
514
618
  let response;
515
619
  try {
516
- response = await callOpenAI(this.llmConfig, this.chatHistory, allTools);
620
+ // Re-resolve every loop iteration so an in-session `/effort` flip
621
+ // (which only refreshes the system prompt) also updates the next
622
+ // request's reasoning_effort slot — no restart needed.
623
+ const effort = resolveEffort(this.workspaceRoot).effort;
624
+ response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
517
625
  }
518
626
  catch (err) {
519
627
  throw new Error(`LLM Execution failed: ${err.message}`);
@@ -935,37 +1043,49 @@ export class Agent {
935
1043
  if (this.accessMode !== 'shell') {
936
1044
  return `Command execution denied: agent access mode is "${this.accessMode}".`;
937
1045
  }
938
- // Approval gating. Two cases:
939
- // Interactive parent (this.silent === false): show y/N unless
940
- // autoApproveShell is set (i.e. /yolo on).
941
- // Silent child: cannot prompt; the previous code path silently
942
- // auto-approved, which let a spawn_agent({role:'verifier'}) child
943
- // run arbitrary shell with no user gate — a sandbox bypass. Now
944
- // refuse unless the parent has explicitly opted in via prefs.
1046
+ // Approval gating routes through the pure resolver in
1047
+ // runtime/dangerousCommand.ts. Three outcomes:
1048
+ // auto-approve: fast mode + safe command (or silent child whose
1049
+ // parent has opted in via fast mode).
1050
+ // • ask: planning mode, OR fast mode but the command matched the
1051
+ // dangerous heuristic (rm -rf, sudo, force-push, …).
1052
+ // deny-silent: silent child agents can't answer y/N, so safe
1053
+ // commands need parent opt-in (fast mode) and dangerous commands
1054
+ // are always denied.
945
1055
  const prefs = readPreferences(this.workspaceRoot);
946
- if (this.silent) {
947
- if (!prefs.autoApproveShell) {
948
- return (`Command execution denied: silent child agents may not run shell ` +
949
- `without parent opt-in. Set \`autoApproveShell\` (via /yolo on) ` +
950
- `in the workspace preferences, or have a parent agent run this command.`);
1056
+ const approval = resolveRunCommandApproval(prefs, cmd, { silent: this.silent });
1057
+ if (approval === 'deny-silent') {
1058
+ if (isDangerousCommand(cmd)) {
1059
+ return (`Command execution denied: dangerous command in a silent child agent. ` +
1060
+ `Silent children can't answer the y/N prompt, so destructive commands ` +
1061
+ `(rm -rf, sudo, force-push, …) are refused regardless of /mode. ` +
1062
+ `Have a parent agent run this command, or split it into a safer ` +
1063
+ `equivalent.`);
951
1064
  }
952
- console.log(chalk.gray(`▶ Auto-approved (silent child): ${chalk.cyan(cmd)}`));
1065
+ return (`Command execution denied: silent child agents may not run shell ` +
1066
+ `without parent opt-in. Switch the session to \`/mode fast\` (or set ` +
1067
+ `the legacy \`autoApproveShell\` pref) to let silent children run ` +
1068
+ `safe commands, or have a parent agent run this command.`);
953
1069
  }
954
- else if (!prefs.autoApproveShell) {
955
- // Use the parent REPL's readline interface for the y/N prompt.
956
- // Spinning up an inquirer prompt opens a second readline against
957
- // the same stdin and dumps a stray "line" event back into the
958
- // parent rl when it exits, which used to surface as the bogus
959
- // "A previous turn is still running" warning.
960
- console.log(`\n${chalk.yellow('⚠️ Command execution request:')} ${chalk.cyan(cmd)}`);
1070
+ if (approval === 'auto-approve') {
1071
+ const tag = this.silent ? 'Auto-approved (silent child)' : 'Auto-approved';
1072
+ console.log(chalk.gray(`▶ ${tag}: ${chalk.cyan(cmd)}`));
1073
+ }
1074
+ else {
1075
+ // approval === 'ask' interactive y/N. Use the parent REPL's
1076
+ // readline interface; spinning up an inquirer prompt opens a second
1077
+ // readline against the same stdin and dumps a stray "line" event
1078
+ // back into the parent rl when it exits, which used to surface as
1079
+ // the bogus "A previous turn is still running" warning.
1080
+ const dangerNote = isDangerousCommand(cmd)
1081
+ ? chalk.red(' (flagged as potentially destructive)')
1082
+ : '';
1083
+ console.log(`\n${chalk.yellow('⚠️ Command execution request:')} ${chalk.cyan(cmd)}${dangerNote}`);
961
1084
  const approved = await askYesNo('Allow execution? (y/N) ', false);
962
1085
  if (!approved) {
963
1086
  return 'Command execution rejected by user.';
964
1087
  }
965
1088
  }
966
- else {
967
- console.log(chalk.gray(`▶ Auto-approved: ${chalk.cyan(cmd)}`));
968
- }
969
1089
  const sandboxConfig = resolveSandboxConfig(this.workspaceRoot, {
970
1090
  readPaths: prefs.sandboxReadPaths,
971
1091
  writePaths: prefs.sandboxWritePaths,
@@ -1026,6 +1146,48 @@ export class Agent {
1026
1146
  }, this.sessionKey);
1027
1147
  return formatPlan(state);
1028
1148
  }
1149
+ case 'ask_user_choice': {
1150
+ const question = String(args.question ?? '').trim();
1151
+ const header = String(args.header ?? '').trim();
1152
+ const rawOptions = Array.isArray(args.options) ? args.options : [];
1153
+ if (!question)
1154
+ throw new Error('ask_user_choice requires a non-empty `question`.');
1155
+ if (!header)
1156
+ throw new Error('ask_user_choice requires a non-empty `header`.');
1157
+ if (rawOptions.length < 2 || rawOptions.length > 4) {
1158
+ throw new Error(`ask_user_choice requires 2–4 options; received ${rawOptions.length}.`);
1159
+ }
1160
+ const options = rawOptions.map((o, i) => {
1161
+ const label = String(o?.label ?? '').trim();
1162
+ const description = String(o?.description ?? '').trim();
1163
+ if (!label)
1164
+ throw new Error(`ask_user_choice option ${i + 1} is missing "label".`);
1165
+ if (!description)
1166
+ throw new Error(`ask_user_choice option ${i + 1} is missing "description".`);
1167
+ return { label, description };
1168
+ });
1169
+ // Silent child agents have no parent stdin/REPL bridge, so the
1170
+ // helper's TTY check would error anyway — but giving a clearer message
1171
+ // up front saves the LLM an iteration.
1172
+ if (this.silent) {
1173
+ throw new NoTTYError('ask_user_choice is not available to silent child agents. Decide the answer yourself, ' +
1174
+ 'state which option you picked and why, and return that as your final answer to the parent.');
1175
+ }
1176
+ // Eager TTY check so we fail without disturbing the screen. askChoice
1177
+ // also checks (defense-in-depth for direct callers), but doing it here
1178
+ // means the LLM gets a clean error before the picker tries to render.
1179
+ if (!getActiveReadline() || !process.stdin.isTTY) {
1180
+ throw new NoTTYError('ask_user_choice requires an interactive TTY. ' +
1181
+ 'Fall back to deciding yourself and state which option you picked and why.');
1182
+ }
1183
+ // header is rendered by the picker itself (chip line at the top of
1184
+ // the frame), so we just thread it through opts.
1185
+ const answer = await askChoice(question, options, {
1186
+ multiSelect: !!args.multiSelect,
1187
+ header,
1188
+ });
1189
+ return JSON.stringify({ answer });
1190
+ }
1029
1191
  case 'goal_complete': {
1030
1192
  const proof = String(args.proof ?? '').trim();
1031
1193
  if (!proof)
@@ -1102,6 +1264,10 @@ export class Agent {
1102
1264
  next.push({ role: 'user', content: lastUserMessage });
1103
1265
  this.chatHistory = next;
1104
1266
  this.initialized = true;
1267
+ // 9b: compaction just dropped the prior briefing as collateral —
1268
+ // force the next turn through the full recall path even in gated
1269
+ // mode so the model isn't blind to what was load-bearing.
1270
+ this.recallNextTurnIsPostCompaction = true;
1105
1271
  return { ...result, replacedMessages: before };
1106
1272
  }
1107
1273
  /** Runtime model switch. Used by `/model` slash command. */
@@ -1139,6 +1305,10 @@ export class Agent {
1139
1305
  });
1140
1306
  this.chatHistory = [this.createSystemMessage(), ...replay];
1141
1307
  this.initialized = true;
1308
+ // 9b: a freshly-loaded history is a session boundary; reset gated
1309
+ // recall state so the next turn refreshes the briefing.
1310
+ this.recallHasFiredThisSession = false;
1311
+ this.recallNextTurnIsPostCompaction = false;
1142
1312
  return replay.length;
1143
1313
  }
1144
1314
  /** Cumulative token usage across the last runTurn. Cleared at each new turn. */
@@ -1206,6 +1376,23 @@ export class Agent {
1206
1376
  const marker = `<!--brainrouter:${tag}-->\n`;
1207
1377
  this.chatHistory = this.chatHistory.filter((msg) => !(msg.role === 'system' && typeof msg.content === 'string' && msg.content.startsWith(marker)));
1208
1378
  }
1379
+ /**
1380
+ * Zero the in-process counters that back `/tokens`. Call this on any
1381
+ * conceptual session boundary (`/resume`, `fork`) — otherwise the parent
1382
+ * row keeps accumulating across the switch and "this session" no longer
1383
+ * matches the displayed sessionKey.
1384
+ */
1385
+ resetSessionCounters() {
1386
+ this.sessionUsage = { promptTokens: 0, completionTokens: 0, calls: 0, turns: 0 };
1387
+ this.memoryMetrics = {
1388
+ briefingTokensInjected: 0,
1389
+ offloadCharsAvoided: 0,
1390
+ recallRecordsConsulted: 0,
1391
+ };
1392
+ // 9b: session-boundary reset for gated recall.
1393
+ this.recallHasFiredThisSession = false;
1394
+ this.recallNextTurnIsPostCompaction = false;
1395
+ }
1209
1396
  /** Fork the current chat history into a fresh sessionKey. Returns the new key. */
1210
1397
  fork(newSessionKey) {
1211
1398
  this.sessionKey = newSessionKey;
@@ -1217,6 +1404,7 @@ export class Agent {
1217
1404
  else {
1218
1405
  this.chatHistory = [this.createSystemMessage(), ...this.chatHistory];
1219
1406
  }
1407
+ this.resetSessionCounters();
1220
1408
  return this.sessionKey;
1221
1409
  }
1222
1410
  async bootstrapSession(callbacks) {
@@ -1237,26 +1425,62 @@ export class Agent {
1237
1425
  this.chatHistory = [this.createSystemMessage()];
1238
1426
  this.initialized = true;
1239
1427
  }
1428
+ /**
1429
+ * Public, callback-free wrapper around bootstrapSession for slash commands
1430
+ * that mutate per-session state (notably `/goal`) BEFORE any runTurn has
1431
+ * fired. Without this, the FIRST `/goal` of a session writes goal.json
1432
+ * under the deterministic fallback sessionKey ("brainrouter-cli:<path>")
1433
+ * because bootstrap hasn't happened yet, but every subsequent runTurn
1434
+ * reads from the MCP-resolved UUID sessionKey — split-brain that left
1435
+ * the agent reading a stale goal from a different directory.
1436
+ *
1437
+ * Idempotent: returns immediately if already initialized. Tolerates
1438
+ * missing MCP — falls back to the deterministic key the same way
1439
+ * bootstrapSession does.
1440
+ */
1441
+ async ensureInitialized() {
1442
+ if (this.initialized)
1443
+ return;
1444
+ // Stub the callbacks bootstrapSession expects — no UI plumbing needed
1445
+ // for the eager-init path; the status line is for runTurn's spinner.
1446
+ await this.bootstrapSession({
1447
+ onStatusUpdate: () => { },
1448
+ onToolStart: () => { },
1449
+ onToolEnd: () => { },
1450
+ });
1451
+ }
1240
1452
  createSystemMessage() {
1241
1453
  const prefs = readPreferences(this.workspaceRoot);
1454
+ // 10b: pass the connected MCP tool inventory so `buildSystemPrompt`
1455
+ // can omit the BrainRouter memory section when the brain is offline.
1456
+ // The cached `lastKnownMcpTools` is populated by every successful
1457
+ // `listTools()` (see `runTurn` and `bootstrapSession`); when no tools
1458
+ // have been seen yet, leave it undefined — `buildSystemPrompt` treats
1459
+ // that as "assume brain online" for back-compat.
1460
+ const connectedMcpTools = this.lastKnownMcpTools?.map((t) => t.name);
1242
1461
  const base = this.systemPromptOverride ?? buildSystemPrompt({
1243
1462
  workspaceRoot: this.workspaceRoot,
1244
1463
  launchCwd: this.launchCwd,
1245
1464
  sessionKey: this.sessionKey,
1246
1465
  instructionSummary: loadWorkspaceInstructionSummary(this.workspaceRoot),
1247
1466
  personality: prefs.personality,
1467
+ activeSkill: this.activeSkill,
1468
+ executionMode: prefs.executionMode,
1469
+ reviewPolicy: prefs.reviewPolicy,
1470
+ effort: resolveEffort(this.workspaceRoot).effort,
1471
+ connectedMcpTools,
1248
1472
  });
1249
1473
  const parts = [base];
1250
1474
  if (this.roleOverlay)
1251
1475
  parts.push(this.roleOverlay);
1252
- // Sticky goal lives on disk so it survives CLI restarts; injected here so
1253
- // every turn (including the first after `/resume`) sees it. Goals are
1254
- // scoped to the current sessionKey so /side and /fork don't drag their
1255
- // parent's goal along, but a workspace-level legacy goal still works as a
1256
- // fallback for sessions that don't have one yet.
1257
- const goal = readGoal(this.workspaceRoot, this.sessionKey);
1258
- if (goal?.text)
1259
- parts.push(formatGoalBlock(goal));
1476
+ // Goal text used to be appended here AND re-pushed as a per-turn
1477
+ // `goal-anchor` tagged system message (runTurn around line 680), which
1478
+ // meant the whole goal block landed in the prompt twice every turn.
1479
+ // 9d removed the duplicate; the per-turn anchor is the single owner
1480
+ // of goal state (text, status, budget, contract reminders, and the
1481
+ // final-budget wrap-up directive). `runTurn` re-injects it via
1482
+ // `formatGoalBlock` immediately before the user message is appended,
1483
+ // so even first-turn-after-`/resume` sees the goal.
1260
1484
  return { role: 'system', content: parts.join('\n\n') };
1261
1485
  }
1262
1486
  async injectRecallContext(prompt, mcpTools, callbacks) {
@@ -1267,7 +1491,58 @@ export class Agent {
1267
1491
  callbacks.onMemoryEvent?.({ kind: 'skipped', reason: this.silent ? 'silent agent (child)' : 'recall disabled' });
1268
1492
  return;
1269
1493
  }
1494
+ // 9b: gate recall instead of firing unconditionally every turn. Pre-9b
1495
+ // every turn paid ~3-10K tokens for a fresh briefing even when the user
1496
+ // message was "thanks" or "/help". The new default `gated` mode fires
1497
+ // recall only when it's likely to pay off:
1498
+ // - turn 1 of the session (no prior briefing)
1499
+ // - the turn immediately after auto-compaction (the model just lost
1500
+ // context — give it back what was load-bearing)
1501
+ // - when the user message names ≥2 entity-shaped tokens (proper
1502
+ // nouns, file paths, identifiers) suggesting they're asking about
1503
+ // something specific that memory might have history on
1504
+ // The env knob `BRAINROUTER_RECALL_MODE=always|gated|off` lets users
1505
+ // preserve pre-9b behaviour or kill recall entirely for benchmarking.
1506
+ const recallMode = resolveRecallMode();
1507
+ if (recallMode === 'off') {
1508
+ this.recalledRecords = [];
1509
+ this.recalledRecordIds = [];
1510
+ this.lastBriefingSources = [];
1511
+ callbacks.onMemoryEvent?.({ kind: 'skipped', reason: 'recallMode=off' });
1512
+ return;
1513
+ }
1514
+ if (recallMode === 'gated') {
1515
+ const isFirstTurn = !this.recallHasFiredThisSession;
1516
+ const justCompacted = this.recallNextTurnIsPostCompaction;
1517
+ const entityHits = countEntityTokens(prompt);
1518
+ const hasEntityCue = entityHits >= 2;
1519
+ if (!isFirstTurn && !justCompacted && !hasEntityCue) {
1520
+ // Skip the full briefing — emit a lightweight system-reminder so
1521
+ // the model knows it can pull memory itself if it needs to. The
1522
+ // reminder is tagged so the next turn replaces it cleanly.
1523
+ this.replaceTaggedSystemMessage('memory-hint', [
1524
+ '## Memory available (gated mode)',
1525
+ 'BrainRouter memory is available this turn but the auto-briefing was skipped (no first-turn / post-compaction / entity-cue trigger). Call `memory_recall` / `memory_search` / `memory_file_history` yourself if you need history on a specific entity, file, or decision.',
1526
+ ].join('\n'));
1527
+ this.recalledRecords = [];
1528
+ this.recalledRecordIds = [];
1529
+ this.lastBriefingSources = [];
1530
+ callbacks.onMemoryEvent?.({ kind: 'skipped', reason: 'gated (no trigger)' });
1531
+ return;
1532
+ }
1533
+ // Reset the post-compaction flag now that we're firing because of it.
1534
+ this.recallNextTurnIsPostCompaction = false;
1535
+ }
1536
+ // Either `recallMode === 'always'` (preserves pre-9b behaviour) or
1537
+ // we hit a gated trigger — fire the full briefing.
1270
1538
  callbacks.onStatusUpdate('Briefing from BrainRouter memory...');
1539
+ // 9d: skip `memory_task_state` in the briefing when a goal-anchor is
1540
+ // already carrying the current objective — avoids re-injecting the
1541
+ // "what we're doing now" context twice. The anchor is set immediately
1542
+ // before this call in `runTurn` (around line 680), so reading the goal
1543
+ // here resolves to the same record the anchor used.
1544
+ const activeGoal = readGoal(this.workspaceRoot, this.sessionKey);
1545
+ const hasActiveGoal = !!(activeGoal?.text && activeGoal.status === 'active');
1271
1546
  const briefing = await buildMemoryBriefing({
1272
1547
  mcpClient: this.mcpClient,
1273
1548
  mcpTools,
@@ -1275,10 +1550,14 @@ export class Agent {
1275
1550
  workspaceRoot: this.workspaceRoot,
1276
1551
  query: prompt,
1277
1552
  activeSkill: this.activeSkill,
1553
+ hasActiveGoal,
1278
1554
  });
1279
1555
  this.recalledRecords = briefing.recalledRecords;
1280
1556
  this.recalledRecordIds = briefing.recalledRecordIds;
1281
1557
  this.lastBriefingSources = briefing.sourcesQueried;
1558
+ this.recallHasFiredThisSession = true;
1559
+ // Drop any prior lightweight hint now that the full briefing is live.
1560
+ this.removeTaggedSystemMessage('memory-hint');
1282
1561
  if (briefing.block) {
1283
1562
  this.replaceTaggedSystemMessage('memory-briefing', briefing.block);
1284
1563
  callbacks.onStatusUpdate(`Memory briefing loaded: ${briefing.sourcesQueried.join(', ')} (${briefing.recalledRecordIds.length} records).`);
@@ -1295,6 +1574,15 @@ export class Agent {
1295
1574
  getLastBriefing() {
1296
1575
  return { sources: [...this.lastBriefingSources], recordIds: [...this.recalledRecordIds] };
1297
1576
  }
1577
+ /**
1578
+ * Snapshot of the records produced by the most recent pre-turn briefing.
1579
+ * `/where` surfaces a few of these to give the user a sense of what the
1580
+ * agent is leaning on right now. Returns a shallow copy so callers can't
1581
+ * mutate the agent's internal state.
1582
+ */
1583
+ getRecalledRecords() {
1584
+ return [...this.recalledRecords];
1585
+ }
1298
1586
  /** One-line summary of any new contradiction surfaced after the last capture, or undefined if none. */
1299
1587
  lastContradictionWarning;
1300
1588
  takeContradictionWarning() {
@@ -1835,7 +2123,102 @@ function formatBytes(n) {
1835
2123
  // per-turn system messages (briefing, fan-out hint). Strip them before the
1836
2124
  // payload reaches the LLM so the model doesn't see the bookkeeping.
1837
2125
  const TAG_MARKER_RE = /^<!--brainrouter:[a-z0-9-]+-->\n/;
1838
- export function buildChatCompletionPayload(config, messages, tools) {
2126
+ /**
2127
+ * Heuristic for "does this model accept the OpenAI Chat Completions
2128
+ * `reasoning_effort` field?". The signal that actually matters is the
2129
+ * **model name**, not the endpoint hostname — modern OpenAI-compatible
2130
+ * servers (LM Studio 0.3.29+, Ollama, vLLM, OpenRouter, OpenAI itself)
2131
+ * all accept the field on /v1/chat/completions for the reasoning-capable
2132
+ * model classes below, and silently ignore it for everything else. So a
2133
+ * `gpt-oss-20b` served from localhost via LM Studio gets the same
2134
+ * treatment as `gpt-5` on `api.openai.com`.
2135
+ *
2136
+ * Borrowed shape from openai-node's `ReasoningEffort` enum
2137
+ * (openSrc/openai-node/src/resources/shared.ts) — `low|medium|high` map
2138
+ * straight through to the provider field across OpenAI, DeepSeek,
2139
+ * LM Studio, Ollama, and OpenRouter's pass-through. Anthropic models
2140
+ * (`claude-*`) use a different field shape (`thinking: { budget_tokens }`)
2141
+ * and a different endpoint (`/v1/messages`), so they're intentionally
2142
+ * skipped here — brainrouter would need a separate provider adapter to
2143
+ * forward into Anthropic's native API.
2144
+ */
2145
+ /**
2146
+ * 9b: resolve the recall-gating mode for this process. `BRAINROUTER_RECALL_MODE`
2147
+ * env var beats everything; unset defaults to `gated`. Anything outside the
2148
+ * three valid values falls back to `gated` (defensive — better to be helpful
2149
+ * than crash on a typo). Re-resolved each turn so users can flip with
2150
+ * `export BRAINROUTER_RECALL_MODE=always` mid-session via a /run command.
2151
+ */
2152
+ export function resolveRecallMode() {
2153
+ const raw = (process.env.BRAINROUTER_RECALL_MODE ?? '').toLowerCase().trim();
2154
+ if (raw === 'always' || raw === 'gated' || raw === 'off')
2155
+ return raw;
2156
+ return 'gated';
2157
+ }
2158
+ /**
2159
+ * 9b: cheap local heuristic for "the user message names something specific
2160
+ * memory might have history on." Counts entity-shaped tokens: proper nouns
2161
+ * (capitalized words that aren't sentence-starting), file paths (anything
2162
+ * with `/` or `\\` or a `.<ext>` suffix), and identifier-shaped tokens (`camelCase`
2163
+ * / `snake_case` / `PascalCase` longer than 4 chars). Crude but the bar is
2164
+ * "is recall plausibly worth it?" — false positives waste a recall call,
2165
+ * false negatives waste an ask. Tunable threshold via the caller.
2166
+ */
2167
+ export function countEntityTokens(text) {
2168
+ if (!text)
2169
+ return 0;
2170
+ let count = 0;
2171
+ // File paths and identifiers (`/` or `\`).
2172
+ const pathMatches = text.match(/[A-Za-z0-9_./\\-]+\.[A-Za-z]{1,8}(?![A-Za-z])|(?:[\w-]+\/){1,}[\w.-]+/g);
2173
+ if (pathMatches)
2174
+ count += pathMatches.length;
2175
+ // Identifier-shaped tokens longer than 4 chars (camelCase, snake_case, PascalCase).
2176
+ const identMatches = text.match(/\b(?:[a-z]+[A-Z][A-Za-z0-9]+|[A-Z][a-z]+[A-Z][A-Za-z0-9]+|[a-z]+_[a-z][\w]+)\b/g);
2177
+ if (identMatches)
2178
+ count += identMatches.length;
2179
+ // Proper nouns (capitalized, not at sentence start, ≥3 chars). We split on
2180
+ // sentence boundaries first so the first word of each sentence is skipped.
2181
+ const sentences = text.split(/[.!?]\s+/);
2182
+ for (const s of sentences) {
2183
+ const words = s.split(/\s+/);
2184
+ for (let i = 1; i < words.length; i++) {
2185
+ const w = words[i].replace(/[^A-Za-z]/g, '');
2186
+ if (w.length >= 3 && /^[A-Z][a-z]+$/.test(w))
2187
+ count++;
2188
+ }
2189
+ }
2190
+ return count;
2191
+ }
2192
+ export function supportsReasoningEffortField(config) {
2193
+ // Normalize the model name: strip any `<vendor>/` prefix so OpenRouter /
2194
+ // LM Studio naming (`openai/gpt-oss-20b`, `mistralai/magistral-small`,
2195
+ // `deepseek/deepseek-r1`) matches the same patterns as a bare model name.
2196
+ // Some servers stack multiple prefixes (`openai/gpt-oss/20b-variant`), so
2197
+ // we keep only the segment after the LAST `/`.
2198
+ const raw = (config.model ?? '').toLowerCase();
2199
+ const model = raw.includes('/') ? raw.slice(raw.lastIndexOf('/') + 1) : raw;
2200
+ // Reasoning-model name patterns. The list covers the major reasoning
2201
+ // model families running through OpenAI-compatible /chat/completions
2202
+ // surfaces in 2026: OpenAI's gpt-5 / o-series / open-weights gpt-oss,
2203
+ // DeepSeek's R1 / R2 / V3+ thinking variants, Alibaba's Qwen3 thinking
2204
+ // models, Mistral's Magistral, and Microsoft's Phi-4-reasoning. Any
2205
+ // model whose name itself contains "reasoning" or "thinking" is
2206
+ // included too — that catches new entrants we haven't enumerated yet
2207
+ // (e.g. `phi-4-reasoning-plus`, `qwen3-30b-a3b-thinking`).
2208
+ const reasoningPatterns = [
2209
+ /^gpt-5/, // gpt-5, gpt-5-mini, gpt-5-pro, gpt-5.1, gpt-5-codex-max
2210
+ /^o[134](-|$|\.)/, // o1, o3, o4 and dated / sized variants
2211
+ /^gpt-oss/, // gpt-oss-20b / 120b (LM Studio 0.3.29+, Ollama, llama.cpp)
2212
+ /^deepseek-r[12]/, // DeepSeek R1, R2
2213
+ /^deepseek-v[34]/, // DeepSeek V3.1+, V4 reasoning variants
2214
+ /^qwen3/, // Qwen3 reasoning variants (LM Studio, Ollama)
2215
+ /^magistral/, // Mistral Magistral (small/medium reasoning)
2216
+ /reasoning/, // catch-all for `phi-4-reasoning`, `*-reasoning-plus`, …
2217
+ /thinking/, // catch-all for `qwen3-30b-a3b-thinking`, `*-thinking-*`, …
2218
+ ];
2219
+ return reasoningPatterns.some((re) => re.test(model));
2220
+ }
2221
+ export function buildChatCompletionPayload(config, messages, tools, options = {}) {
1839
2222
  const stripTag = (content) => typeof content === 'string' && TAG_MARKER_RE.test(content)
1840
2223
  ? content.replace(TAG_MARKER_RE, '')
1841
2224
  : content;
@@ -1874,9 +2257,16 @@ export function buildChatCompletionPayload(config, messages, tools) {
1874
2257
  }));
1875
2258
  body.tool_choice = 'auto';
1876
2259
  }
2260
+ // Forward reasoning_effort only when the level is non-default AND the
2261
+ // endpoint+model combo is a known reasoning surface. `medium` is the
2262
+ // CLI default and forwarding it would change every existing user's
2263
+ // request shape on upgrade for no behavioural gain.
2264
+ if (options.effort && options.effort !== 'medium' && supportsReasoningEffortField(config)) {
2265
+ body.reasoning_effort = options.effort;
2266
+ }
1877
2267
  return body;
1878
2268
  }
1879
- export async function callOpenAI(config, messages, tools) {
2269
+ export async function callOpenAI(config, messages, tools, options = {}) {
1880
2270
  const endpoint = config.endpoint || 'https://api.openai.com/v1';
1881
2271
  let apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
1882
2272
  const isLocal = endpoint.includes('localhost') || endpoint.includes('127.0.0.1');
@@ -1886,7 +2276,7 @@ export async function callOpenAI(config, messages, tools) {
1886
2276
  if (!apiKey && isLocal) {
1887
2277
  apiKey = 'sk-local-placeholder';
1888
2278
  }
1889
- const body = buildChatCompletionPayload(config, messages, tools);
2279
+ const body = buildChatCompletionPayload(config, messages, tools, options);
1890
2280
  const headers = {
1891
2281
  'Content-Type': 'application/json'
1892
2282
  };