@kinqs/brainrouter-cli 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +29 -52
  2. package/agents/architect.json +18 -0
  3. package/agents/explorer.json +18 -0
  4. package/agents/reviewer.json +18 -0
  5. package/agents/verifier.json +18 -0
  6. package/agents/worker.json +18 -0
  7. package/bin/cli.cjs +71 -0
  8. package/dist/agent/agent.d.ts +224 -3
  9. package/dist/agent/agent.js +561 -55
  10. package/dist/cli/banner.d.ts +80 -0
  11. package/dist/cli/banner.js +232 -0
  12. package/dist/cli/cliPrompt.d.ts +106 -0
  13. package/dist/cli/cliPrompt.js +314 -0
  14. package/dist/cli/commands/_context.d.ts +3 -1
  15. package/dist/cli/commands/_helpers.d.ts +1 -1
  16. package/dist/cli/commands/_helpers.js +6 -6
  17. package/dist/cli/commands/config.d.ts +46 -0
  18. package/dist/cli/commands/config.js +1042 -0
  19. package/dist/cli/commands/guard.js +75 -10
  20. package/dist/cli/commands/init.d.ts +20 -0
  21. package/dist/cli/commands/init.js +64 -0
  22. package/dist/cli/commands/login.d.ts +13 -0
  23. package/dist/cli/commands/login.js +179 -0
  24. package/dist/cli/commands/mcp.d.ts +19 -0
  25. package/dist/cli/commands/mcp.js +286 -0
  26. package/dist/cli/commands/memory.js +2 -2
  27. package/dist/cli/commands/obs.js +22 -22
  28. package/dist/cli/commands/orchestration.js +18 -0
  29. package/dist/cli/commands/session.js +13 -5
  30. package/dist/cli/commands/ui.js +202 -91
  31. package/dist/cli/commands/workflow.d.ts +20 -0
  32. package/dist/cli/commands/workflow.js +368 -51
  33. package/dist/cli/ink/ChatApp.d.ts +206 -0
  34. package/dist/cli/ink/ChatApp.js +493 -0
  35. package/dist/cli/ink/Frame.d.ts +26 -0
  36. package/dist/cli/ink/Frame.js +5 -0
  37. package/dist/cli/ink/Picker.d.ts +65 -0
  38. package/dist/cli/ink/Picker.js +133 -0
  39. package/dist/cli/ink/SlashPalette.d.ts +51 -0
  40. package/dist/cli/ink/SlashPalette.js +136 -0
  41. package/dist/cli/ink/TextField.d.ts +34 -0
  42. package/dist/cli/ink/TextField.js +47 -0
  43. package/dist/cli/ink/WizardApp.d.ts +7 -0
  44. package/dist/cli/ink/WizardApp.js +422 -0
  45. package/dist/cli/ink/ambientChat.d.ts +34 -0
  46. package/dist/cli/ink/ambientChat.js +7 -0
  47. package/dist/cli/ink/consoleCapture.d.ts +11 -0
  48. package/dist/cli/ink/consoleCapture.js +33 -0
  49. package/dist/cli/ink/markdownRender.d.ts +41 -0
  50. package/dist/cli/ink/markdownRender.js +278 -0
  51. package/dist/cli/ink/renderWithResizeClear.d.ts +14 -0
  52. package/dist/cli/ink/renderWithResizeClear.js +33 -0
  53. package/dist/cli/ink/runChat.d.ts +34 -0
  54. package/dist/cli/ink/runChat.js +571 -0
  55. package/dist/cli/ink/runPicker.d.ts +31 -0
  56. package/dist/cli/ink/runPicker.js +139 -0
  57. package/dist/cli/ink/runSlashPalette.d.ts +23 -0
  58. package/dist/cli/ink/runSlashPalette.js +33 -0
  59. package/dist/cli/ink/runWizard.d.ts +22 -0
  60. package/dist/cli/ink/runWizard.js +133 -0
  61. package/dist/cli/ink/stdinHandoff.d.ts +51 -0
  62. package/dist/cli/ink/stdinHandoff.js +78 -0
  63. package/dist/cli/ink/toolFormat.d.ts +73 -0
  64. package/dist/cli/ink/toolFormat.js +180 -0
  65. package/dist/cli/ink/useTerminalSize.d.ts +35 -0
  66. package/dist/cli/ink/useTerminalSize.js +26 -0
  67. package/dist/cli/repl.d.ts +25 -3
  68. package/dist/cli/repl.js +64 -646
  69. package/dist/cli/slashSuggest.d.ts +32 -0
  70. package/dist/cli/slashSuggest.js +146 -0
  71. package/dist/cli/spinner.d.ts +34 -0
  72. package/dist/cli/spinner.js +36 -0
  73. package/dist/cli/statusline.d.ts +67 -0
  74. package/dist/cli/statusline.js +204 -0
  75. package/dist/cli/theme.d.ts +79 -0
  76. package/dist/cli/theme.js +106 -0
  77. package/dist/cli/whereView.d.ts +81 -0
  78. package/dist/cli/whereView.js +245 -0
  79. package/dist/cli/wizard/modelsApi.d.ts +72 -0
  80. package/dist/cli/wizard/modelsApi.js +166 -0
  81. package/dist/cli/wizard/picker.d.ts +202 -0
  82. package/dist/cli/wizard/picker.js +547 -0
  83. package/dist/cli/wizard/providers.d.ts +86 -0
  84. package/dist/cli/wizard/providers.js +190 -0
  85. package/dist/cli/wizard/runner.d.ts +13 -0
  86. package/dist/cli/wizard/runner.js +488 -0
  87. package/dist/cli/wizard/types.d.ts +122 -0
  88. package/dist/cli/wizard/types.js +109 -0
  89. package/dist/config/config.d.ts +52 -0
  90. package/dist/config/config.js +89 -75
  91. package/dist/index.js +215 -206
  92. package/dist/memory/briefing.d.ts +11 -1
  93. package/dist/memory/briefing.js +69 -1
  94. package/dist/memory/consolidation.d.ts +1 -1
  95. package/dist/orchestration/agentRegistry.d.ts +36 -0
  96. package/dist/orchestration/agentRegistry.js +64 -0
  97. package/dist/orchestration/orchestrator.d.ts +7 -0
  98. package/dist/orchestration/orchestrator.js +2 -0
  99. package/dist/orchestration/tools.d.ts +10 -1
  100. package/dist/orchestration/tools.js +48 -4
  101. package/dist/prompt/breadthHint.d.ts +5 -0
  102. package/dist/prompt/breadthHint.js +44 -0
  103. package/dist/prompt/skillCatalog.d.ts +11 -0
  104. package/dist/prompt/skillCatalog.js +134 -0
  105. package/dist/prompt/skillRunner.d.ts +2 -2
  106. package/dist/prompt/skillRunner.js +2 -31
  107. package/dist/prompt/systemPrompt.d.ts +34 -0
  108. package/dist/prompt/systemPrompt.js +128 -108
  109. package/dist/runtime/dangerousCommand.d.ts +53 -0
  110. package/dist/runtime/dangerousCommand.js +105 -0
  111. package/dist/runtime/mcpClient.d.ts +38 -1
  112. package/dist/runtime/mcpClient.js +104 -13
  113. package/dist/runtime/mcpPool.d.ts +162 -0
  114. package/dist/runtime/mcpPool.js +423 -0
  115. package/dist/runtime/mcpUtils.d.ts +3 -1
  116. package/dist/state/goalStore.d.ts +98 -17
  117. package/dist/state/goalStore.js +132 -42
  118. package/dist/state/preferencesStore.d.ts +67 -3
  119. package/dist/state/preferencesStore.js +84 -1
  120. package/dist/state/workflowArtifacts.d.ts +63 -2
  121. package/dist/state/workflowArtifacts.js +120 -8
  122. package/dist/tests/_helpers.d.ts +31 -0
  123. package/dist/tests/_helpers.js +91 -0
  124. package/package.json +12 -5
  125. package/.env.example +0 -109
@@ -2,8 +2,9 @@ import fs from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { exec } from 'node:child_process';
4
4
  import { promisify } from 'node:util';
5
+ import { randomUUID } from 'node:crypto';
5
6
  import chalk from 'chalk';
6
- import { askYesNo } from '../cli/cliPrompt.js';
7
+ import { askChoice, askYesNo, getActiveReadline, NoTTYError } from '../cli/cliPrompt.js';
7
8
  import { appendTranscriptEntry } from '../state/sessionStore.js';
8
9
  import { buildSystemPrompt, loadWorkspaceInstructionSummary } from '../prompt/systemPrompt.js';
9
10
  import { formatPlan, readPlan, updatePlan } from '../state/taskStore.js';
@@ -14,13 +15,61 @@ import { acquireLLMSlot } from '../runtime/llmSemaphore.js';
14
15
  import { blockGoal, completeGoal, formatGoalBlock, readGoal } from '../state/goalStore.js';
15
16
  import { runHooks } from '../state/hooksStore.js';
16
17
  import { resolveSandboxConfig, runShell } from '../runtime/sandbox.js';
17
- import { readPreferences } from '../state/preferencesStore.js';
18
+ import { isDangerousCommand, resolveRunCommandApproval } from '../runtime/dangerousCommand.js';
19
+ import { readPreferences, resolveEffort } from '../state/preferencesStore.js';
18
20
  import { startSpan, traceEvent } from '../runtime/tracing.js';
19
21
  import { buildHookifyContext, evaluateHookify, listHookifyRules } from '../state/hookifyStore.js';
20
22
  import { renderCompactSystemMessage, runCompaction } from '../prompt/compactor.js';
21
23
  import { buildFanOutHint, shouldSuggestFanOut } from '../prompt/breadthHint.js';
22
24
  const execPromise = promisify(exec);
23
25
  const IGNORED_DIRS = new Set(['node_modules', '.git', 'dist', '.DS_Store', '.next']);
26
+ function parseJsonObject(text) {
27
+ try {
28
+ const parsed = JSON.parse(text);
29
+ return parsed && typeof parsed === 'object' ? parsed : undefined;
30
+ }
31
+ catch {
32
+ return undefined;
33
+ }
34
+ }
35
+ function collectChildIds(value) {
36
+ if (!value || typeof value !== 'object')
37
+ return [];
38
+ const ids = [];
39
+ const maybeRecord = value;
40
+ if (typeof maybeRecord.id === 'string')
41
+ ids.push(maybeRecord.id);
42
+ if (Array.isArray(maybeRecord.agents)) {
43
+ for (const entry of maybeRecord.agents) {
44
+ if (entry && typeof entry === 'object' && typeof entry.id === 'string') {
45
+ ids.push(entry.id);
46
+ }
47
+ }
48
+ }
49
+ return [...new Set(ids)];
50
+ }
51
+ function trackChildObservation(toolName, args, resultText, spawned, waited) {
52
+ if (toolName === 'spawn_agent' || toolName === 'spawn_agents') {
53
+ const ids = collectChildIds(parseJsonObject(resultText));
54
+ for (const id of ids) {
55
+ spawned.add(id);
56
+ if (toolName === 'spawn_agent' && args?.wait)
57
+ waited.add(id);
58
+ }
59
+ return;
60
+ }
61
+ if (toolName === 'wait_agent') {
62
+ const id = typeof args?.id === 'string' ? args.id : undefined;
63
+ if (id)
64
+ waited.add(id);
65
+ return;
66
+ }
67
+ if (toolName === 'wait_agents') {
68
+ const ids = Array.isArray(args?.ids) ? args.ids.filter((id) => typeof id === 'string') : [];
69
+ for (const id of ids)
70
+ waited.add(id);
71
+ }
72
+ }
24
73
  export const LOCAL_TOOLS = [
25
74
  {
26
75
  name: 'read_file',
@@ -146,6 +195,41 @@ export const LOCAL_TOOLS = [
146
195
  createReadAgentTranscriptTool(),
147
196
  createCloseAgentTool(),
148
197
  createRouteAgentTool(),
198
+ {
199
+ name: 'ask_user_choice',
200
+ description: 'Pause the turn and ask the human to commit to ONE of 2–4 mutually exclusive approaches. ' +
201
+ 'Renders an arrow-key picker (↑/↓ navigate, ENTER confirm; SPACE toggles in multiSelect mode) ' +
202
+ 'with an always-on "Other" row that drops to a free-text prompt — the user is never trapped between bad options. ' +
203
+ 'Returns { answer: <chosen label or free-text> } in single-select, or { answer: [labels/free-text…] } in multiSelect. ' +
204
+ 'Use ONLY when there is genuine ambiguity that needs the user\'s judgment — NOT for trivial yes/no confirmations ' +
205
+ '(`askYesNo` is wired into approval gates already), NOT for things you can decide yourself with the available context, ' +
206
+ 'and NOT as a substitute for thinking. ' +
207
+ 'Errors in non-interactive runs (CI / piped / `brainrouter run`) and when the user cancels (Esc/q/Ctrl+C); ' +
208
+ 'on either error, decide yourself and say which option you picked and why.',
209
+ inputSchema: {
210
+ type: 'object',
211
+ properties: {
212
+ question: { type: 'string', description: 'The question to ask the user (complete sentence ending with `?`).' },
213
+ header: { type: 'string', description: 'Short chip-style label (≤12 chars) shown above the question, e.g. "Auth method" or "Storage".' },
214
+ options: {
215
+ type: 'array',
216
+ description: '2–4 mutually exclusive choices. Each option needs a short label and a one-line description.',
217
+ minItems: 2,
218
+ maxItems: 4,
219
+ items: {
220
+ type: 'object',
221
+ properties: {
222
+ label: { type: 'string', description: 'Short display text (1–5 words).' },
223
+ description: { type: 'string', description: 'One-line explanation of what this option means or what will happen if chosen.' },
224
+ },
225
+ required: ['label', 'description'],
226
+ },
227
+ },
228
+ multiSelect: { type: 'boolean', description: 'When true, allow the user to pick multiple options (comma-separated input). Defaults to false.' },
229
+ },
230
+ required: ['question', 'header', 'options'],
231
+ },
232
+ },
149
233
  {
150
234
  name: 'update_plan',
151
235
  description: 'Create or update the durable CLI task plan. Use this for multi-step work and keep at most one item in_progress.',
@@ -326,6 +410,28 @@ export class Agent {
326
410
  recalledRecordIds = [];
327
411
  recalledRecords = [];
328
412
  lastBriefingSources = [];
413
+ /**
414
+ * 10b: latest MCP tool inventory captured by `listTools()` calls. Used by
415
+ * `createSystemMessage` to decide whether the BrainRouter memory section
416
+ * should render — when `memory_recall` is missing from this list (the
417
+ * cloud brain is offline), the prompt swaps to a brain-offline notice so
418
+ * the model doesn't try to call tools that aren't there. Undefined until
419
+ * the first successful list; treated as "assume online" by the prompt
420
+ * builder until then (back-compat for callers that don't list pre-turn).
421
+ */
422
+ lastKnownMcpTools;
423
+ /**
424
+ * 9b: gated recall state. `recallHasFiredThisSession` flips to true on the
425
+ * first successful briefing injection so subsequent turns can skip the
426
+ * fresh recall pull unless a gated trigger fires. `recallNextTurnIsPost-
427
+ * Compaction` is set by `compactHistory()` to force the next turn through
428
+ * the full briefing path (compaction just dropped the prior briefing as
429
+ * collateral; replay it once so the model isn't blind). Both are
430
+ * cleared on `loadHistory` / `fork` / `bootstrapSession` so a fresh
431
+ * session re-pulls.
432
+ */
433
+ recallHasFiredThisSession = false;
434
+ recallNextTurnIsPostCompaction = false;
329
435
  roleOverlay;
330
436
  accessMode;
331
437
  silent;
@@ -355,12 +461,24 @@ export class Agent {
355
461
  agentId = `agent-${Math.random().toString(36).slice(2, 8)}`;
356
462
  /** agent_id of the parent (set by spawn_agent for children). */
357
463
  parentAgentId;
464
+ /** Agent tier — forwarded to OrchestrationContext so grandchildren can inherit hierarchy checks. */
465
+ tier;
466
+ /** Spawn-chain depth (0 = direct chat-root child). Forwarded to hierarchy checks. */
467
+ agentDepth;
358
468
  constructor(mcpClient, llmConfig, options) {
359
469
  this.mcpClient = mcpClient;
360
470
  this.llmConfig = llmConfig;
361
471
  this.workspaceRoot = options.workspaceRoot;
362
472
  this.launchCwd = options.launchCwd;
363
- this.sessionKey = options.sessionKey ?? `brainrouter-cli:${this.workspaceRoot}`;
473
+ // Each CLI process gets a fresh sessionKey by default. The previous
474
+ // workspace-derived fallback (`brainrouter-cli:<workspaceRoot>`) made
475
+ // MCP's `memory_resolve_session` fall into its workspace-cache branch
476
+ // and return the same UUID for every CLI in the workspace, so two
477
+ // concurrent CLIs shared one goal/plan/working bucket. A randomUUID
478
+ // here is accepted by MCP's `isUniqueId` and echoed back as-is, so
479
+ // each CLI is its own session for local state. The memory DB is
480
+ // userId-scoped, so cross-CLI recall continuity is unaffected.
481
+ this.sessionKey = options.sessionKey ?? randomUUID();
364
482
  this.roleOverlay = options.roleOverlay;
365
483
  this.accessMode = options.accessMode ?? 'shell';
366
484
  this.silent = options.silent ?? false;
@@ -370,6 +488,8 @@ export class Agent {
370
488
  this.systemPromptOverride = options.systemPromptOverride;
371
489
  this.parentTraceId = options.parentTraceId;
372
490
  this.parentSpanId = options.parentSpanId;
491
+ this.tier = options.tier;
492
+ this.agentDepth = options.agentDepth ?? 0;
373
493
  }
374
494
  /** Expose for orchestration so spawn_agent can record the parent linkage. */
375
495
  getAgentId() {
@@ -379,6 +499,47 @@ export class Agent {
379
499
  setParentAgentId(id) {
380
500
  this.parentAgentId = id;
381
501
  }
502
+ isModelVisibleMcpTool(tool) {
503
+ const hiddenBrainrouterTools = new Set([
504
+ 'memory_capture_turn',
505
+ 'memory_mark_cited',
506
+ 'memory_resolve_session',
507
+ 'memory_register_skill_hints',
508
+ 'memory_hook_register',
509
+ 'memory_hook_status',
510
+ ]);
511
+ const name = String(tool?.name ?? '');
512
+ const rawName = String(tool?.__rawName ?? this.rawMcpToolName(name));
513
+ if (!hiddenBrainrouterTools.has(rawName))
514
+ return true;
515
+ const serverId = typeof tool?.__serverId === 'string'
516
+ ? tool.__serverId
517
+ : this.serverIdFromMcpToolName(name);
518
+ const status = serverId && typeof this.mcpClient.getStatus === 'function'
519
+ ? this.mcpClient.getStatus(serverId)
520
+ : undefined;
521
+ // Hide only BrainRouter auto-pipeline/admin tools. Third-party MCP tools
522
+ // with coincidentally similar names stay visible.
523
+ return status?.identity !== 'brainrouter';
524
+ }
525
+ rawMcpToolName(name) {
526
+ const serverId = this.serverIdFromMcpToolName(name);
527
+ return serverId ? name.slice(`mcp__${serverId}__`.length) : name;
528
+ }
529
+ serverIdFromMcpToolName(name) {
530
+ if (!name.startsWith('mcp__'))
531
+ return undefined;
532
+ const rest = name.slice('mcp__'.length);
533
+ if (typeof this.mcpClient.getServerIds === 'function') {
534
+ const ids = this.mcpClient.getServerIds();
535
+ for (const id of ids.sort((a, b) => b.length - a.length)) {
536
+ if (rest.startsWith(`${id}__`))
537
+ return id;
538
+ }
539
+ }
540
+ const idx = rest.indexOf('__');
541
+ return idx >= 0 ? rest.slice(0, idx) : undefined;
542
+ }
382
543
  allowedToolsForAccess() {
383
544
  // Lifecycle / inspection tools are always available regardless of access
384
545
  // mode — they don't touch the workspace and the agent needs them to end
@@ -388,6 +549,10 @@ export class Agent {
388
549
  'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
389
550
  'read_agent_transcript', 'close_agent', 'route_agent',
390
551
  'goal_complete', 'goal_blocked',
552
+ // ask_user_choice doesn't touch the workspace — it's an interaction
553
+ // primitive, so it stays available in every access mode (and is gated
554
+ // structurally by activeReadline / isTTY in the helper itself).
555
+ 'ask_user_choice',
391
556
  ]);
392
557
  const writeAdds = new Set(['write_file', 'edit_file', 'apply_patch']);
393
558
  const shellAdds = new Set(['run_command']);
@@ -428,22 +593,25 @@ export class Agent {
428
593
  catch (err) {
429
594
  // Non-fatal: continue with local tools only
430
595
  }
596
+ // 10b: cache the inventory so `createSystemMessage` can render a
597
+ // brain-online vs brain-offline prompt. Refresh chatHistory[0]
598
+ // whenever the inventory shape changed (online → offline or vice
599
+ // versa) so the next LLM call sees the correct system message.
600
+ const prevTools = this.lastKnownMcpTools?.map((t) => t.name).sort().join(',');
601
+ this.lastKnownMcpTools = mcpTools.map((t) => ({
602
+ name: String(t?.__rawName ?? this.rawMcpToolName(String(t?.name ?? ''))),
603
+ }));
604
+ const newTools = this.lastKnownMcpTools.map((t) => t.name).sort().join(',');
605
+ if (prevTools !== newTools && this.chatHistory.length > 0 && this.chatHistory[0].role === 'system') {
606
+ this.chatHistory[0] = this.createSystemMessage();
607
+ }
431
608
  const allowed = this.allowedToolsForAccess();
432
609
  const filteredLocalTools = LOCAL_TOOLS.filter(t => allowed.has(t.name));
433
- // Hide MCP tools we already call automatically. Small models otherwise
434
- // try to invoke them with the wrong arguments (most commonly
435
- // memory_capture_turn "Required, Required" comes from missing
436
- // sessionKey + messages). These tools are still callable; the CLI just
437
- // doesn't tell the LLM about them since the auto-pipeline owns them.
438
- const HIDDEN_FROM_LLM = new Set([
439
- 'memory_capture_turn', // called automatically post-turn
440
- 'memory_mark_cited', // called automatically with real citation IDs
441
- 'memory_resolve_session', // called automatically at bootstrap
442
- 'memory_register_skill_hints', // boot-time, not turn-level
443
- 'memory_hook_register', // managed via /hooks
444
- 'memory_hook_status',
445
- ]);
446
- const visibleMcpTools = mcpTools.filter((t) => !HIDDEN_FROM_LLM.has(t.name));
610
+ // Multi-MCP parity: expose every connected third-party MCP tool and the
611
+ // model-safe BrainRouter MCP tools in one turn, using the pool's
612
+ // `mcp__<serverId>__<tool>` namespaces. BrainRouter's auto-pipeline/admin
613
+ // tools stay hidden because the CLI owns those flows.
614
+ const visibleMcpTools = mcpTools.filter((t) => this.isModelVisibleMcpTool(t));
447
615
  const allTools = [...filteredLocalTools, ...visibleMcpTools];
448
616
  callbacks.onStatusUpdate(`Loaded ${filteredLocalTools.length} local tools and ${mcpTools.length} MCP tools.`);
449
617
  // Auto-compact: if the chat history has grown past the configured token
@@ -489,6 +657,29 @@ export class Agent {
489
657
  callbacks.onToolEnd('breadth-detector', { success: true, summary: `fan-out hint injected (${intent.signals.length} signals)` });
490
658
  }
491
659
  }
660
+ // Per-turn goal anchor: re-inject a FRESH goal block at the end of the
661
+ // chatHistory's system messages (replaceTaggedSystemMessage appends), so
662
+ // it lands right before the user prompt. Pre-9d the goal block was ALSO
663
+ // embedded in the foundational system message (via createSystemMessage),
664
+ // which meant every turn carried two copies; 9d made this anchor the
665
+ // single source — `createSystemMessage` no longer touches goal state.
666
+ // The fresh re-push every iteration keeps the up-to-date iteration
667
+ // counter in immediate-context distance and prevents the long /goal
668
+ // continuation-loop drift that PR #26 originally addressed. The anchor
669
+ // also auto-folds the final-budget-turn wrap-up directive (via
670
+ // `formatGoalBlock`'s internal `goalIsOnFinalBudgetTurn` check), so
671
+ // the separate `goal-budget-steering` tagged message is gone too.
672
+ if (!this.silent) {
673
+ const activeGoal = readGoal(this.workspaceRoot, this.sessionKey);
674
+ if (activeGoal?.text && activeGoal.status === 'active') {
675
+ this.replaceTaggedSystemMessage('goal-anchor', formatGoalBlock(activeGoal));
676
+ }
677
+ else {
678
+ // No active goal — drop any stale anchor from a prior /goal so the
679
+ // model doesn't keep seeing a completed/cleared goal as "current."
680
+ this.removeTaggedSystemMessage('goal-anchor');
681
+ }
682
+ }
492
683
  const userMsg = { role: 'user', content: prompt };
493
684
  this.chatHistory.push(userMsg);
494
685
  this.recordTranscript(userMsg);
@@ -508,12 +699,19 @@ export class Agent {
508
699
  // signatures so we can interrupt the loop with corrective feedback.
509
700
  const recentToolSignatures = [];
510
701
  const REPEAT_GUARD_LIMIT = 3;
702
+ const spawnedChildIdsThisTurn = new Set();
703
+ const waitedChildIdsThisTurn = new Set();
704
+ let spawnWaitGuardInjected = false;
511
705
  while (loopCount < maxLoops) {
512
706
  loopCount++;
513
707
  callbacks.onStatusUpdate(`Thinking (turn ${loopCount})...`);
514
708
  let response;
515
709
  try {
516
- response = await callOpenAI(this.llmConfig, this.chatHistory, allTools);
710
+ // Re-resolve every loop iteration so an in-session `/effort` flip
711
+ // (which only refreshes the system prompt) also updates the next
712
+ // request's reasoning_effort slot — no restart needed.
713
+ const effort = resolveEffort(this.workspaceRoot).effort;
714
+ response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
517
715
  }
518
716
  catch (err) {
519
717
  throw new Error(`LLM Execution failed: ${err.message}`);
@@ -531,6 +729,21 @@ export class Agent {
531
729
  this.chatHistory.push(assistantMsg);
532
730
  this.recordTranscript(assistantMsg);
533
731
  if (!response.toolCalls || response.toolCalls.length === 0) {
732
+ const unobservedChildIds = [...spawnedChildIdsThisTurn].filter((id) => !waitedChildIdsThisTurn.has(id));
733
+ if (unobservedChildIds.length > 0 && !spawnWaitGuardInjected) {
734
+ spawnWaitGuardInjected = true;
735
+ const waitTool = unobservedChildIds.length === 1 ? 'wait_agent' : 'wait_agents';
736
+ const correction = [
737
+ `You spawned ${unobservedChildIds.length} child agent${unobservedChildIds.length === 1 ? '' : 's'} in this turn but have not waited for their outputs yet.`,
738
+ `Call \`${waitTool}\` now for: ${unobservedChildIds.join(', ')}.`,
739
+ 'Do not tell the user you are waiting in prose; use the tool call, then synthesize the returned child output.',
740
+ ].join(' ');
741
+ const guardMsg = { role: 'user', content: correction };
742
+ this.chatHistory.push(guardMsg);
743
+ this.recordTranscript(guardMsg);
744
+ callbacks.onStatusUpdate(`Waiting required for ${unobservedChildIds.length} child agent${unobservedChildIds.length === 1 ? '' : 's'}...`);
745
+ continue;
746
+ }
534
747
  finalAnswer = response.content;
535
748
  exitedCleanly = true;
536
749
  break;
@@ -650,6 +863,8 @@ export class Agent {
650
863
  parentTraceId: turnSpan.traceId,
651
864
  parentSpanId: turnSpan.spanId,
652
865
  parentAgentId: this.agentId,
866
+ parentTier: this.tier,
867
+ depth: this.agentDepth,
653
868
  mcpClient: this.mcpClient,
654
869
  llmConfig: this.llmConfig,
655
870
  launchCwd: this.launchCwd,
@@ -664,6 +879,7 @@ export class Agent {
664
879
  },
665
880
  });
666
881
  summary = getToolSummary(name, args, resultText);
882
+ trackChildObservation(name, args, resultText, spawnedChildIdsThisTurn, waitedChildIdsThisTurn);
667
883
  }
668
884
  else if (isLocal) {
669
885
  resultText = await this.executeLocalTool(name, args);
@@ -935,37 +1151,49 @@ export class Agent {
935
1151
  if (this.accessMode !== 'shell') {
936
1152
  return `Command execution denied: agent access mode is "${this.accessMode}".`;
937
1153
  }
938
- // Approval gating. Two cases:
939
- // Interactive parent (this.silent === false): show y/N unless
940
- // autoApproveShell is set (i.e. /yolo on).
941
- // Silent child: cannot prompt; the previous code path silently
942
- // auto-approved, which let a spawn_agent({role:'verifier'}) child
943
- // run arbitrary shell with no user gate — a sandbox bypass. Now
944
- // refuse unless the parent has explicitly opted in via prefs.
1154
+ // Approval gating routes through the pure resolver in
1155
+ // runtime/dangerousCommand.ts. Three outcomes:
1156
+ // auto-approve: fast mode + safe command (or silent child whose
1157
+ // parent has opted in via fast mode).
1158
+ // • ask: planning mode, OR fast mode but the command matched the
1159
+ // dangerous heuristic (rm -rf, sudo, force-push, …).
1160
+ // deny-silent: silent child agents can't answer y/N, so safe
1161
+ // commands need parent opt-in (fast mode) and dangerous commands
1162
+ // are always denied.
945
1163
  const prefs = readPreferences(this.workspaceRoot);
946
- if (this.silent) {
947
- if (!prefs.autoApproveShell) {
948
- return (`Command execution denied: silent child agents may not run shell ` +
949
- `without parent opt-in. Set \`autoApproveShell\` (via /yolo on) ` +
950
- `in the workspace preferences, or have a parent agent run this command.`);
1164
+ const approval = resolveRunCommandApproval(prefs, cmd, { silent: this.silent });
1165
+ if (approval === 'deny-silent') {
1166
+ if (isDangerousCommand(cmd)) {
1167
+ return (`Command execution denied: dangerous command in a silent child agent. ` +
1168
+ `Silent children can't answer the y/N prompt, so destructive commands ` +
1169
+ `(rm -rf, sudo, force-push, …) are refused regardless of /mode. ` +
1170
+ `Have a parent agent run this command, or split it into a safer ` +
1171
+ `equivalent.`);
951
1172
  }
952
- console.log(chalk.gray(`▶ Auto-approved (silent child): ${chalk.cyan(cmd)}`));
1173
+ return (`Command execution denied: silent child agents may not run shell ` +
1174
+ `without parent opt-in. Switch the session to \`/mode fast\` (or set ` +
1175
+ `the legacy \`autoApproveShell\` pref) to let silent children run ` +
1176
+ `safe commands, or have a parent agent run this command.`);
953
1177
  }
954
- else if (!prefs.autoApproveShell) {
955
- // Use the parent REPL's readline interface for the y/N prompt.
956
- // Spinning up an inquirer prompt opens a second readline against
957
- // the same stdin and dumps a stray "line" event back into the
958
- // parent rl when it exits, which used to surface as the bogus
959
- // "A previous turn is still running" warning.
960
- console.log(`\n${chalk.yellow('⚠️ Command execution request:')} ${chalk.cyan(cmd)}`);
1178
+ if (approval === 'auto-approve') {
1179
+ const tag = this.silent ? 'Auto-approved (silent child)' : 'Auto-approved';
1180
+ console.log(chalk.gray(`▶ ${tag}: ${chalk.cyan(cmd)}`));
1181
+ }
1182
+ else {
1183
+ // approval === 'ask' interactive y/N. Use the parent REPL's
1184
+ // readline interface; spinning up an inquirer prompt opens a second
1185
+ // readline against the same stdin and dumps a stray "line" event
1186
+ // back into the parent rl when it exits, which used to surface as
1187
+ // the bogus "A previous turn is still running" warning.
1188
+ const dangerNote = isDangerousCommand(cmd)
1189
+ ? chalk.red(' (flagged as potentially destructive)')
1190
+ : '';
1191
+ console.log(`\n${chalk.yellow('⚠️ Command execution request:')} ${chalk.cyan(cmd)}${dangerNote}`);
961
1192
  const approved = await askYesNo('Allow execution? (y/N) ', false);
962
1193
  if (!approved) {
963
1194
  return 'Command execution rejected by user.';
964
1195
  }
965
1196
  }
966
- else {
967
- console.log(chalk.gray(`▶ Auto-approved: ${chalk.cyan(cmd)}`));
968
- }
969
1197
  const sandboxConfig = resolveSandboxConfig(this.workspaceRoot, {
970
1198
  readPaths: prefs.sandboxReadPaths,
971
1199
  writePaths: prefs.sandboxWritePaths,
@@ -984,7 +1212,7 @@ export class Agent {
984
1212
  try {
985
1213
  const res = await fetch(url, {
986
1214
  headers: {
987
- 'User-Agent': 'Mozilla/5.0 (compatible; BrainRouterCLI/0.3.5)'
1215
+ 'User-Agent': 'Mozilla/5.0 (compatible; BrainRouterCLI/0.3.7)'
988
1216
  }
989
1217
  });
990
1218
  if (!res.ok) {
@@ -1026,6 +1254,48 @@ export class Agent {
1026
1254
  }, this.sessionKey);
1027
1255
  return formatPlan(state);
1028
1256
  }
1257
+ case 'ask_user_choice': {
1258
+ const question = String(args.question ?? '').trim();
1259
+ const header = String(args.header ?? '').trim();
1260
+ const rawOptions = Array.isArray(args.options) ? args.options : [];
1261
+ if (!question)
1262
+ throw new Error('ask_user_choice requires a non-empty `question`.');
1263
+ if (!header)
1264
+ throw new Error('ask_user_choice requires a non-empty `header`.');
1265
+ if (rawOptions.length < 2 || rawOptions.length > 4) {
1266
+ throw new Error(`ask_user_choice requires 2–4 options; received ${rawOptions.length}.`);
1267
+ }
1268
+ const options = rawOptions.map((o, i) => {
1269
+ const label = String(o?.label ?? '').trim();
1270
+ const description = String(o?.description ?? '').trim();
1271
+ if (!label)
1272
+ throw new Error(`ask_user_choice option ${i + 1} is missing "label".`);
1273
+ if (!description)
1274
+ throw new Error(`ask_user_choice option ${i + 1} is missing "description".`);
1275
+ return { label, description };
1276
+ });
1277
+ // Silent child agents have no parent stdin/REPL bridge, so the
1278
+ // helper's TTY check would error anyway — but giving a clearer message
1279
+ // up front saves the LLM an iteration.
1280
+ if (this.silent) {
1281
+ throw new NoTTYError('ask_user_choice is not available to silent child agents. Decide the answer yourself, ' +
1282
+ 'state which option you picked and why, and return that as your final answer to the parent.');
1283
+ }
1284
+ // Eager TTY check so we fail without disturbing the screen. askChoice
1285
+ // also checks (defense-in-depth for direct callers), but doing it here
1286
+ // means the LLM gets a clean error before the picker tries to render.
1287
+ if (!getActiveReadline() || !process.stdin.isTTY) {
1288
+ throw new NoTTYError('ask_user_choice requires an interactive TTY. ' +
1289
+ 'Fall back to deciding yourself and state which option you picked and why.');
1290
+ }
1291
+ // header is rendered by the picker itself (chip line at the top of
1292
+ // the frame), so we just thread it through opts.
1293
+ const answer = await askChoice(question, options, {
1294
+ multiSelect: !!args.multiSelect,
1295
+ header,
1296
+ });
1297
+ return JSON.stringify({ answer });
1298
+ }
1029
1299
  case 'goal_complete': {
1030
1300
  const proof = String(args.proof ?? '').trim();
1031
1301
  if (!proof)
@@ -1102,6 +1372,10 @@ export class Agent {
1102
1372
  next.push({ role: 'user', content: lastUserMessage });
1103
1373
  this.chatHistory = next;
1104
1374
  this.initialized = true;
1375
+ // 9b: compaction just dropped the prior briefing as collateral —
1376
+ // force the next turn through the full recall path even in gated
1377
+ // mode so the model isn't blind to what was load-bearing.
1378
+ this.recallNextTurnIsPostCompaction = true;
1105
1379
  return { ...result, replacedMessages: before };
1106
1380
  }
1107
1381
  /** Runtime model switch. Used by `/model` slash command. */
@@ -1139,6 +1413,10 @@ export class Agent {
1139
1413
  });
1140
1414
  this.chatHistory = [this.createSystemMessage(), ...replay];
1141
1415
  this.initialized = true;
1416
+ // 9b: a freshly-loaded history is a session boundary; reset gated
1417
+ // recall state so the next turn refreshes the briefing.
1418
+ this.recallHasFiredThisSession = false;
1419
+ this.recallNextTurnIsPostCompaction = false;
1142
1420
  return replay.length;
1143
1421
  }
1144
1422
  /** Cumulative token usage across the last runTurn. Cleared at each new turn. */
@@ -1206,6 +1484,23 @@ export class Agent {
1206
1484
  const marker = `<!--brainrouter:${tag}-->\n`;
1207
1485
  this.chatHistory = this.chatHistory.filter((msg) => !(msg.role === 'system' && typeof msg.content === 'string' && msg.content.startsWith(marker)));
1208
1486
  }
1487
+ /**
1488
+ * Zero the in-process counters that back `/tokens`. Call this on any
1489
+ * conceptual session boundary (`/resume`, `fork`) — otherwise the parent
1490
+ * row keeps accumulating across the switch and "this session" no longer
1491
+ * matches the displayed sessionKey.
1492
+ */
1493
+ resetSessionCounters() {
1494
+ this.sessionUsage = { promptTokens: 0, completionTokens: 0, calls: 0, turns: 0 };
1495
+ this.memoryMetrics = {
1496
+ briefingTokensInjected: 0,
1497
+ offloadCharsAvoided: 0,
1498
+ recallRecordsConsulted: 0,
1499
+ };
1500
+ // 9b: session-boundary reset for gated recall.
1501
+ this.recallHasFiredThisSession = false;
1502
+ this.recallNextTurnIsPostCompaction = false;
1503
+ }
1209
1504
  /** Fork the current chat history into a fresh sessionKey. Returns the new key. */
1210
1505
  fork(newSessionKey) {
1211
1506
  this.sessionKey = newSessionKey;
@@ -1217,6 +1512,7 @@ export class Agent {
1217
1512
  else {
1218
1513
  this.chatHistory = [this.createSystemMessage(), ...this.chatHistory];
1219
1514
  }
1515
+ this.resetSessionCounters();
1220
1516
  return this.sessionKey;
1221
1517
  }
1222
1518
  async bootstrapSession(callbacks) {
@@ -1237,26 +1533,62 @@ export class Agent {
1237
1533
  this.chatHistory = [this.createSystemMessage()];
1238
1534
  this.initialized = true;
1239
1535
  }
1536
+ /**
1537
+ * Public, callback-free wrapper around bootstrapSession for slash commands
1538
+ * that mutate per-session state (notably `/goal`) BEFORE any runTurn has
1539
+ * fired. Without this, the FIRST `/goal` of a session writes goal.json
1540
+ * under the deterministic fallback sessionKey ("brainrouter-cli:<path>")
1541
+ * because bootstrap hasn't happened yet, but every subsequent runTurn
1542
+ * reads from the MCP-resolved UUID sessionKey — split-brain that left
1543
+ * the agent reading a stale goal from a different directory.
1544
+ *
1545
+ * Idempotent: returns immediately if already initialized. Tolerates
1546
+ * missing MCP — falls back to the deterministic key the same way
1547
+ * bootstrapSession does.
1548
+ */
1549
+ async ensureInitialized() {
1550
+ if (this.initialized)
1551
+ return;
1552
+ // Stub the callbacks bootstrapSession expects — no UI plumbing needed
1553
+ // for the eager-init path; the status line is for runTurn's spinner.
1554
+ await this.bootstrapSession({
1555
+ onStatusUpdate: () => { },
1556
+ onToolStart: () => { },
1557
+ onToolEnd: () => { },
1558
+ });
1559
+ }
1240
1560
  createSystemMessage() {
1241
1561
  const prefs = readPreferences(this.workspaceRoot);
1562
+ // 10b: pass the connected MCP tool inventory so `buildSystemPrompt`
1563
+ // can omit the BrainRouter memory section when the brain is offline.
1564
+ // The cached `lastKnownMcpTools` is populated by every successful
1565
+ // `listTools()` (see `runTurn` and `bootstrapSession`); when no tools
1566
+ // have been seen yet, leave it undefined — `buildSystemPrompt` treats
1567
+ // that as "assume brain online" for back-compat.
1568
+ const connectedMcpTools = this.lastKnownMcpTools?.map((t) => t.name);
1242
1569
  const base = this.systemPromptOverride ?? buildSystemPrompt({
1243
1570
  workspaceRoot: this.workspaceRoot,
1244
1571
  launchCwd: this.launchCwd,
1245
1572
  sessionKey: this.sessionKey,
1246
1573
  instructionSummary: loadWorkspaceInstructionSummary(this.workspaceRoot),
1247
1574
  personality: prefs.personality,
1575
+ activeSkill: this.activeSkill,
1576
+ executionMode: prefs.executionMode,
1577
+ reviewPolicy: prefs.reviewPolicy,
1578
+ effort: resolveEffort(this.workspaceRoot).effort,
1579
+ connectedMcpTools,
1248
1580
  });
1249
1581
  const parts = [base];
1250
1582
  if (this.roleOverlay)
1251
1583
  parts.push(this.roleOverlay);
1252
- // Sticky goal lives on disk so it survives CLI restarts; injected here so
1253
- // every turn (including the first after `/resume`) sees it. Goals are
1254
- // scoped to the current sessionKey so /side and /fork don't drag their
1255
- // parent's goal along, but a workspace-level legacy goal still works as a
1256
- // fallback for sessions that don't have one yet.
1257
- const goal = readGoal(this.workspaceRoot, this.sessionKey);
1258
- if (goal?.text)
1259
- parts.push(formatGoalBlock(goal));
1584
+ // Goal text used to be appended here AND re-pushed as a per-turn
1585
+ // `goal-anchor` tagged system message (runTurn around line 680), which
1586
+ // meant the whole goal block landed in the prompt twice every turn.
1587
+ // 9d removed the duplicate; the per-turn anchor is the single owner
1588
+ // of goal state (text, status, budget, contract reminders, and the
1589
+ // final-budget wrap-up directive). `runTurn` re-injects it via
1590
+ // `formatGoalBlock` immediately before the user message is appended,
1591
+ // so even first-turn-after-`/resume` sees the goal.
1260
1592
  return { role: 'system', content: parts.join('\n\n') };
1261
1593
  }
1262
1594
  async injectRecallContext(prompt, mcpTools, callbacks) {
@@ -1267,7 +1599,58 @@ export class Agent {
1267
1599
  callbacks.onMemoryEvent?.({ kind: 'skipped', reason: this.silent ? 'silent agent (child)' : 'recall disabled' });
1268
1600
  return;
1269
1601
  }
1602
+ // 9b: gate recall instead of firing unconditionally every turn. Pre-9b
1603
+ // every turn paid ~3-10K tokens for a fresh briefing even when the user
1604
+ // message was "thanks" or "/help". The new default `gated` mode fires
1605
+ // recall only when it's likely to pay off:
1606
+ // - turn 1 of the session (no prior briefing)
1607
+ // - the turn immediately after auto-compaction (the model just lost
1608
+ // context — give it back what was load-bearing)
1609
+ // - when the user message names ≥2 entity-shaped tokens (proper
1610
+ // nouns, file paths, identifiers) suggesting they're asking about
1611
+ // something specific that memory might have history on
1612
+ // The env knob `BRAINROUTER_RECALL_MODE=always|gated|off` lets users
1613
+ // preserve pre-9b behaviour or kill recall entirely for benchmarking.
1614
+ const recallMode = resolveRecallMode();
1615
+ if (recallMode === 'off') {
1616
+ this.recalledRecords = [];
1617
+ this.recalledRecordIds = [];
1618
+ this.lastBriefingSources = [];
1619
+ callbacks.onMemoryEvent?.({ kind: 'skipped', reason: 'recallMode=off' });
1620
+ return;
1621
+ }
1622
+ if (recallMode === 'gated') {
1623
+ const isFirstTurn = !this.recallHasFiredThisSession;
1624
+ const justCompacted = this.recallNextTurnIsPostCompaction;
1625
+ const entityHits = countEntityTokens(prompt);
1626
+ const hasEntityCue = entityHits >= 2;
1627
+ if (!isFirstTurn && !justCompacted && !hasEntityCue) {
1628
+ // Skip the full briefing — emit a lightweight system-reminder so
1629
+ // the model knows it can pull memory itself if it needs to. The
1630
+ // reminder is tagged so the next turn replaces it cleanly.
1631
+ this.replaceTaggedSystemMessage('memory-hint', [
1632
+ '## Memory available (gated mode)',
1633
+ 'BrainRouter memory is available this turn but the auto-briefing was skipped (no first-turn / post-compaction / entity-cue trigger). Call `memory_recall` / `memory_search` / `memory_file_history` yourself if you need history on a specific entity, file, or decision.',
1634
+ ].join('\n'));
1635
+ this.recalledRecords = [];
1636
+ this.recalledRecordIds = [];
1637
+ this.lastBriefingSources = [];
1638
+ callbacks.onMemoryEvent?.({ kind: 'skipped', reason: 'gated (no trigger)' });
1639
+ return;
1640
+ }
1641
+ // Reset the post-compaction flag now that we're firing because of it.
1642
+ this.recallNextTurnIsPostCompaction = false;
1643
+ }
1644
+ // Either `recallMode === 'always'` (preserves pre-9b behaviour) or
1645
+ // we hit a gated trigger — fire the full briefing.
1270
1646
  callbacks.onStatusUpdate('Briefing from BrainRouter memory...');
1647
+ // 9d: skip `memory_task_state` in the briefing when a goal-anchor is
1648
+ // already carrying the current objective — avoids re-injecting the
1649
+ // "what we're doing now" context twice. The anchor is set immediately
1650
+ // before this call in `runTurn` (around line 680), so reading the goal
1651
+ // here resolves to the same record the anchor used.
1652
+ const activeGoal = readGoal(this.workspaceRoot, this.sessionKey);
1653
+ const hasActiveGoal = !!(activeGoal?.text && activeGoal.status === 'active');
1271
1654
  const briefing = await buildMemoryBriefing({
1272
1655
  mcpClient: this.mcpClient,
1273
1656
  mcpTools,
@@ -1275,10 +1658,14 @@ export class Agent {
1275
1658
  workspaceRoot: this.workspaceRoot,
1276
1659
  query: prompt,
1277
1660
  activeSkill: this.activeSkill,
1661
+ hasActiveGoal,
1278
1662
  });
1279
1663
  this.recalledRecords = briefing.recalledRecords;
1280
1664
  this.recalledRecordIds = briefing.recalledRecordIds;
1281
1665
  this.lastBriefingSources = briefing.sourcesQueried;
1666
+ this.recallHasFiredThisSession = true;
1667
+ // Drop any prior lightweight hint now that the full briefing is live.
1668
+ this.removeTaggedSystemMessage('memory-hint');
1282
1669
  if (briefing.block) {
1283
1670
  this.replaceTaggedSystemMessage('memory-briefing', briefing.block);
1284
1671
  callbacks.onStatusUpdate(`Memory briefing loaded: ${briefing.sourcesQueried.join(', ')} (${briefing.recalledRecordIds.length} records).`);
@@ -1295,6 +1682,15 @@ export class Agent {
1295
1682
  getLastBriefing() {
1296
1683
  return { sources: [...this.lastBriefingSources], recordIds: [...this.recalledRecordIds] };
1297
1684
  }
1685
+ /**
1686
+ * Snapshot of the records produced by the most recent pre-turn briefing.
1687
+ * `/where` surfaces a few of these to give the user a sense of what the
1688
+ * agent is leaning on right now. Returns a shallow copy so callers can't
1689
+ * mutate the agent's internal state.
1690
+ */
1691
+ getRecalledRecords() {
1692
+ return [...this.recalledRecords];
1693
+ }
1298
1694
  /** One-line summary of any new contradiction surfaced after the last capture, or undefined if none. */
1299
1695
  lastContradictionWarning;
1300
1696
  takeContradictionWarning() {
@@ -1424,7 +1820,7 @@ async function runWebSearch(query, maxResults) {
1424
1820
  }
1425
1821
  try {
1426
1822
  const url = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
1427
- const res = await fetch(url, { headers: { 'User-Agent': 'BrainRouterCLI/0.3.5' } });
1823
+ const res = await fetch(url, { headers: { 'User-Agent': 'BrainRouterCLI/0.3.7' } });
1428
1824
  if (!res.ok) {
1429
1825
  return `web_search failed: DuckDuckGo returned ${res.status} ${res.statusText}.`;
1430
1826
  }
@@ -1835,7 +2231,102 @@ function formatBytes(n) {
1835
2231
  // per-turn system messages (briefing, fan-out hint). Strip them before the
1836
2232
  // payload reaches the LLM so the model doesn't see the bookkeeping.
1837
2233
  const TAG_MARKER_RE = /^<!--brainrouter:[a-z0-9-]+-->\n/;
1838
- export function buildChatCompletionPayload(config, messages, tools) {
2234
+ /**
2235
+ * Heuristic for "does this model accept the OpenAI Chat Completions
2236
+ * `reasoning_effort` field?". The signal that actually matters is the
2237
+ * **model name**, not the endpoint hostname — modern OpenAI-compatible
2238
+ * servers (LM Studio 0.3.29+, Ollama, vLLM, OpenRouter, OpenAI itself)
2239
+ * all accept the field on /v1/chat/completions for the reasoning-capable
2240
+ * model classes below, and silently ignore it for everything else. So a
2241
+ * `gpt-oss-20b` served from localhost via LM Studio gets the same
2242
+ * treatment as `gpt-5` on `api.openai.com`.
2243
+ *
2244
+ * Borrowed shape from openai-node's `ReasoningEffort` enum
2245
+ * (openSrc/openai-node/src/resources/shared.ts) — `low|medium|high` map
2246
+ * straight through to the provider field across OpenAI, DeepSeek,
2247
+ * LM Studio, Ollama, and OpenRouter's pass-through. Anthropic models
2248
+ * (`claude-*`) use a different field shape (`thinking: { budget_tokens }`)
2249
+ * and a different endpoint (`/v1/messages`), so they're intentionally
2250
+ * skipped here — brainrouter would need a separate provider adapter to
2251
+ * forward into Anthropic's native API.
2252
+ */
2253
+ /**
2254
+ * 9b: resolve the recall-gating mode for this process. `BRAINROUTER_RECALL_MODE`
2255
+ * env var beats everything; unset defaults to `gated`. Anything outside the
2256
+ * three valid values falls back to `gated` (defensive — better to be helpful
2257
+ * than crash on a typo). Re-resolved each turn so users can flip with
2258
+ * `export BRAINROUTER_RECALL_MODE=always` mid-session via a /run command.
2259
+ */
2260
+ export function resolveRecallMode() {
2261
+ const raw = (process.env.BRAINROUTER_RECALL_MODE ?? '').toLowerCase().trim();
2262
+ if (raw === 'always' || raw === 'gated' || raw === 'off')
2263
+ return raw;
2264
+ return 'gated';
2265
+ }
2266
+ /**
2267
+ * 9b: cheap local heuristic for "the user message names something specific
2268
+ * memory might have history on." Counts entity-shaped tokens: proper nouns
2269
+ * (capitalized words that aren't sentence-starting), file paths (anything
2270
+ * with `/` or `\\` or a `.<ext>` suffix), and identifier-shaped tokens (`camelCase`
2271
+ * / `snake_case` / `PascalCase` longer than 4 chars). Crude but the bar is
2272
+ * "is recall plausibly worth it?" — false positives waste a recall call,
2273
+ * false negatives waste an ask. Tunable threshold via the caller.
2274
+ */
2275
+ export function countEntityTokens(text) {
2276
+ if (!text)
2277
+ return 0;
2278
+ let count = 0;
2279
+ // File paths and identifiers (`/` or `\`).
2280
+ const pathMatches = text.match(/[A-Za-z0-9_./\\-]+\.[A-Za-z]{1,8}(?![A-Za-z])|(?:[\w-]+\/){1,}[\w.-]+/g);
2281
+ if (pathMatches)
2282
+ count += pathMatches.length;
2283
+ // Identifier-shaped tokens longer than 4 chars (camelCase, snake_case, PascalCase).
2284
+ const identMatches = text.match(/\b(?:[a-z]+[A-Z][A-Za-z0-9]+|[A-Z][a-z]+[A-Z][A-Za-z0-9]+|[a-z]+_[a-z][\w]+)\b/g);
2285
+ if (identMatches)
2286
+ count += identMatches.length;
2287
+ // Proper nouns (capitalized, not at sentence start, ≥3 chars). We split on
2288
+ // sentence boundaries first so the first word of each sentence is skipped.
2289
+ const sentences = text.split(/[.!?]\s+/);
2290
+ for (const s of sentences) {
2291
+ const words = s.split(/\s+/);
2292
+ for (let i = 1; i < words.length; i++) {
2293
+ const w = words[i].replace(/[^A-Za-z]/g, '');
2294
+ if (w.length >= 3 && /^[A-Z][a-z]+$/.test(w))
2295
+ count++;
2296
+ }
2297
+ }
2298
+ return count;
2299
+ }
2300
+ export function supportsReasoningEffortField(config) {
2301
+ // Normalize the model name: strip any `<vendor>/` prefix so OpenRouter /
2302
+ // LM Studio naming (`openai/gpt-oss-20b`, `mistralai/magistral-small`,
2303
+ // `deepseek/deepseek-r1`) matches the same patterns as a bare model name.
2304
+ // Some servers stack multiple prefixes (`openai/gpt-oss/20b-variant`), so
2305
+ // we keep only the segment after the LAST `/`.
2306
+ const raw = (config.model ?? '').toLowerCase();
2307
+ const model = raw.includes('/') ? raw.slice(raw.lastIndexOf('/') + 1) : raw;
2308
+ // Reasoning-model name patterns. The list covers the major reasoning
2309
+ // model families running through OpenAI-compatible /chat/completions
2310
+ // surfaces in 2026: OpenAI's gpt-5 / o-series / open-weights gpt-oss,
2311
+ // DeepSeek's R1 / R2 / V3+ thinking variants, Alibaba's Qwen3 thinking
2312
+ // models, Mistral's Magistral, and Microsoft's Phi-4-reasoning. Any
2313
+ // model whose name itself contains "reasoning" or "thinking" is
2314
+ // included too — that catches new entrants we haven't enumerated yet
2315
+ // (e.g. `phi-4-reasoning-plus`, `qwen3-30b-a3b-thinking`).
2316
+ const reasoningPatterns = [
2317
+ /^gpt-5/, // gpt-5, gpt-5-mini, gpt-5-pro, gpt-5.1, gpt-5-codex-max
2318
+ /^o[134](-|$|\.)/, // o1, o3, o4 and dated / sized variants
2319
+ /^gpt-oss/, // gpt-oss-20b / 120b (LM Studio 0.3.29+, Ollama, llama.cpp)
2320
+ /^deepseek-r[12]/, // DeepSeek R1, R2
2321
+ /^deepseek-v[34]/, // DeepSeek V3.1+, V4 reasoning variants
2322
+ /^qwen3/, // Qwen3 reasoning variants (LM Studio, Ollama)
2323
+ /^magistral/, // Mistral Magistral (small/medium reasoning)
2324
+ /reasoning/, // catch-all for `phi-4-reasoning`, `*-reasoning-plus`, …
2325
+ /thinking/, // catch-all for `qwen3-30b-a3b-thinking`, `*-thinking-*`, …
2326
+ ];
2327
+ return reasoningPatterns.some((re) => re.test(model));
2328
+ }
2329
+ export function buildChatCompletionPayload(config, messages, tools, options = {}) {
1839
2330
  const stripTag = (content) => typeof content === 'string' && TAG_MARKER_RE.test(content)
1840
2331
  ? content.replace(TAG_MARKER_RE, '')
1841
2332
  : content;
@@ -1874,10 +2365,25 @@ export function buildChatCompletionPayload(config, messages, tools) {
1874
2365
  }));
1875
2366
  body.tool_choice = 'auto';
1876
2367
  }
2368
+ // Forward reasoning_effort only when the level is non-default AND the
2369
+ // endpoint+model combo is a known reasoning surface. `medium` is the
2370
+ // CLI default and forwarding it would change every existing user's
2371
+ // request shape on upgrade for no behavioural gain.
2372
+ if (options.effort && options.effort !== 'medium' && supportsReasoningEffortField(config)) {
2373
+ body.reasoning_effort = options.effort;
2374
+ }
1877
2375
  return body;
1878
2376
  }
1879
- export async function callOpenAI(config, messages, tools) {
1880
- const endpoint = config.endpoint || 'https://api.openai.com/v1';
2377
+ export async function callOpenAI(config, messages, tools, options = {}) {
2378
+ // Normalize the endpoint to a base URL (everything UP TO `/chat/completions`
2379
+ // exclusive). Earlier callers stored the full chat-completions URL in
2380
+ // `config.endpoint` (e.g. "https://api.openai.com/v1/chat/completions")
2381
+ // because the in-terminal wizard's provider catalog wrote the full path.
2382
+ // We then re-append `/chat/completions` below, producing a duplicate
2383
+ // `/chat/completions/chat/completions` and a 404. Strip the suffix
2384
+ // defensively so both shapes (full URL or base URL) work.
2385
+ const rawEndpoint = config.endpoint || 'https://api.openai.com/v1';
2386
+ const endpoint = rawEndpoint.replace(/\/+$/, '').replace(/\/chat\/completions$/, '');
1881
2387
  let apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
1882
2388
  const isLocal = endpoint.includes('localhost') || endpoint.includes('127.0.0.1');
1883
2389
  if (!apiKey && !isLocal) {
@@ -1886,7 +2392,7 @@ export async function callOpenAI(config, messages, tools) {
1886
2392
  if (!apiKey && isLocal) {
1887
2393
  apiKey = 'sk-local-placeholder';
1888
2394
  }
1889
- const body = buildChatCompletionPayload(config, messages, tools);
2395
+ const body = buildChatCompletionPayload(config, messages, tools, options);
1890
2396
  const headers = {
1891
2397
  'Content-Type': 'application/json'
1892
2398
  };