npm - @kinqs/brainrouter-cli - Versions diffs - 0.3.5 → 0.3.6 - Mend

@kinqs/brainrouter-cli 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/.env.example +55 -48
package/bin/cli.cjs +71 -0
package/dist/agent/agent.d.ts +212 -2
package/dist/agent/agent.js +428 -38
package/dist/cli/banner.d.ts +60 -0
package/dist/cli/banner.js +199 -0
package/dist/cli/cliPrompt.d.ts +69 -0
package/dist/cli/cliPrompt.js +287 -0
package/dist/cli/commands/_helpers.js +6 -6
package/dist/cli/commands/guard.js +75 -10
package/dist/cli/commands/mcp.d.ts +17 -0
package/dist/cli/commands/mcp.js +121 -0
package/dist/cli/commands/memory.js +2 -2
package/dist/cli/commands/obs.js +22 -22
package/dist/cli/commands/session.js +13 -5
package/dist/cli/commands/ui.js +97 -45
package/dist/cli/commands/workflow.d.ts +18 -0
package/dist/cli/commands/workflow.js +314 -43
package/dist/cli/repl.js +219 -132
package/dist/cli/spinner.d.ts +34 -0
package/dist/cli/spinner.js +36 -0
package/dist/cli/statusline.d.ts +67 -0
package/dist/cli/statusline.js +204 -0
package/dist/cli/theme.d.ts +79 -0
package/dist/cli/theme.js +106 -0
package/dist/cli/whereView.d.ts +81 -0
package/dist/cli/whereView.js +245 -0
package/dist/config/config.d.ts +40 -0
package/dist/config/config.js +45 -73
package/dist/index.js +80 -13
package/dist/memory/briefing.d.ts +10 -0
package/dist/memory/briefing.js +69 -1
package/dist/prompt/breadthHint.d.ts +5 -0
package/dist/prompt/breadthHint.js +44 -0
package/dist/prompt/systemPrompt.d.ts +34 -0
package/dist/prompt/systemPrompt.js +124 -108
package/dist/runtime/dangerousCommand.d.ts +53 -0
package/dist/runtime/dangerousCommand.js +105 -0
package/dist/runtime/mcpClient.d.ts +38 -1
package/dist/runtime/mcpClient.js +90 -2
package/dist/state/goalStore.d.ts +98 -17
package/dist/state/goalStore.js +132 -42
package/dist/state/preferencesStore.d.ts +67 -3
package/dist/state/preferencesStore.js +84 -1
package/dist/state/workflowArtifacts.d.ts +63 -2
package/dist/state/workflowArtifacts.js +120 -8
package/dist/tests/_helpers.d.ts +31 -0
package/dist/tests/_helpers.js +91 -0
package/package.json +5 -4

package/dist/agent/agent.js CHANGED Viewed

@@ -2,8 +2,9 @@ import fs from 'node:fs';
 import path from 'node:path';
 import { exec } from 'node:child_process';
 import { promisify } from 'node:util';
+import { randomUUID } from 'node:crypto';
 import chalk from 'chalk';
-import { askYesNo } from '../cli/cliPrompt.js';
+import { askChoice, askYesNo, getActiveReadline, NoTTYError } from '../cli/cliPrompt.js';
 import { appendTranscriptEntry } from '../state/sessionStore.js';
 import { buildSystemPrompt, loadWorkspaceInstructionSummary } from '../prompt/systemPrompt.js';
 import { formatPlan, readPlan, updatePlan } from '../state/taskStore.js';
@@ -14,7 +15,8 @@ import { acquireLLMSlot } from '../runtime/llmSemaphore.js';
 import { blockGoal, completeGoal, formatGoalBlock, readGoal } from '../state/goalStore.js';
 import { runHooks } from '../state/hooksStore.js';
 import { resolveSandboxConfig, runShell } from '../runtime/sandbox.js';
-import { readPreferences } from '../state/preferencesStore.js';
+import { isDangerousCommand, resolveRunCommandApproval } from '../runtime/dangerousCommand.js';
+import { readPreferences, resolveEffort } from '../state/preferencesStore.js';
 import { startSpan, traceEvent } from '../runtime/tracing.js';
 import { buildHookifyContext, evaluateHookify, listHookifyRules } from '../state/hookifyStore.js';
 import { renderCompactSystemMessage, runCompaction } from '../prompt/compactor.js';
@@ -146,6 +148,41 @@ export const LOCAL_TOOLS = [
     createReadAgentTranscriptTool(),
     createCloseAgentTool(),
     createRouteAgentTool(),
+    {
+        name: 'ask_user_choice',
+        description: 'Pause the turn and ask the human to commit to ONE of 2–4 mutually exclusive approaches. ' +
+            'Renders an arrow-key picker (↑/↓ navigate, ENTER confirm; SPACE toggles in multiSelect mode) ' +
+            'with an always-on "Other" row that drops to a free-text prompt — the user is never trapped between bad options. ' +
+            'Returns { answer: <chosen label or free-text> } in single-select, or { answer: [labels/free-text…] } in multiSelect. ' +
+            'Use ONLY when there is genuine ambiguity that needs the user\'s judgment — NOT for trivial yes/no confirmations ' +
+            '(`askYesNo` is wired into approval gates already), NOT for things you can decide yourself with the available context, ' +
+            'and NOT as a substitute for thinking. ' +
+            'Errors in non-interactive runs (CI / piped / `brainrouter run`) and when the user cancels (Esc/q/Ctrl+C); ' +
+            'on either error, decide yourself and say which option you picked and why.',
+        inputSchema: {
+            type: 'object',
+            properties: {
+                question: { type: 'string', description: 'The question to ask the user (complete sentence ending with `?`).' },
+                header: { type: 'string', description: 'Short chip-style label (≤12 chars) shown above the question, e.g. "Auth method" or "Storage".' },
+                options: {
+                    type: 'array',
+                    description: '2–4 mutually exclusive choices. Each option needs a short label and a one-line description.',
+                    minItems: 2,
+                    maxItems: 4,
+                    items: {
+                        type: 'object',
+                        properties: {
+                            label: { type: 'string', description: 'Short display text (1–5 words).' },
+                            description: { type: 'string', description: 'One-line explanation of what this option means or what will happen if chosen.' },
+                        },
+                        required: ['label', 'description'],
+                    },
+                },
+                multiSelect: { type: 'boolean', description: 'When true, allow the user to pick multiple options (comma-separated input). Defaults to false.' },
+            },
+            required: ['question', 'header', 'options'],
+        },
+    },
     {
         name: 'update_plan',
         description: 'Create or update the durable CLI task plan. Use this for multi-step work and keep at most one item in_progress.',
@@ -326,6 +363,28 @@ export class Agent {
     recalledRecordIds = [];
     recalledRecords = [];
     lastBriefingSources = [];
+    /**
+     * 10b: latest MCP tool inventory captured by `listTools()` calls. Used by
+     * `createSystemMessage` to decide whether the BrainRouter memory section
+     * should render — when `memory_recall` is missing from this list (the
+     * cloud brain is offline), the prompt swaps to a brain-offline notice so
+     * the model doesn't try to call tools that aren't there. Undefined until
+     * the first successful list; treated as "assume online" by the prompt
+     * builder until then (back-compat for callers that don't list pre-turn).
+     */
+    lastKnownMcpTools;
+    /**
+     * 9b: gated recall state. `recallHasFiredThisSession` flips to true on the
+     * first successful briefing injection so subsequent turns can skip the
+     * fresh recall pull unless a gated trigger fires. `recallNextTurnIsPost-
+     * Compaction` is set by `compactHistory()` to force the next turn through
+     * the full briefing path (compaction just dropped the prior briefing as
+     * collateral; replay it once so the model isn't blind). Both are
+     * cleared on `loadHistory` / `fork` / `bootstrapSession` so a fresh
+     * session re-pulls.
+     */
+    recallHasFiredThisSession = false;
+    recallNextTurnIsPostCompaction = false;
     roleOverlay;
     accessMode;
     silent;
@@ -360,7 +419,15 @@ export class Agent {
         this.llmConfig = llmConfig;
         this.workspaceRoot = options.workspaceRoot;
         this.launchCwd = options.launchCwd;
-        this.sessionKey = options.sessionKey ?? `brainrouter-cli:${this.workspaceRoot}`;
+        // Each CLI process gets a fresh sessionKey by default. The previous
+        // workspace-derived fallback (`brainrouter-cli:<workspaceRoot>`) made
+        // MCP's `memory_resolve_session` fall into its workspace-cache branch
+        // and return the same UUID for every CLI in the workspace, so two
+        // concurrent CLIs shared one goal/plan/working bucket. A randomUUID
+        // here is accepted by MCP's `isUniqueId` and echoed back as-is, so
+        // each CLI is its own session for local state. The memory DB is
+        // userId-scoped, so cross-CLI recall continuity is unaffected.
+        this.sessionKey = options.sessionKey ?? randomUUID();
         this.roleOverlay = options.roleOverlay;
         this.accessMode = options.accessMode ?? 'shell';
         this.silent = options.silent ?? false;
@@ -388,6 +455,10 @@ export class Agent {
             'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
             'read_agent_transcript', 'close_agent', 'route_agent',
             'goal_complete', 'goal_blocked',
+            // ask_user_choice doesn't touch the workspace — it's an interaction
+            // primitive, so it stays available in every access mode (and is gated
+            // structurally by activeReadline / isTTY in the helper itself).
+            'ask_user_choice',
         ]);
         const writeAdds = new Set(['write_file', 'edit_file', 'apply_patch']);
         const shellAdds = new Set(['run_command']);
@@ -428,6 +499,16 @@ export class Agent {
         catch (err) {
             // Non-fatal: continue with local tools only
         }
+        // 10b: cache the inventory so `createSystemMessage` can render a
+        // brain-online vs brain-offline prompt. Refresh chatHistory[0]
+        // whenever the inventory shape changed (online → offline or vice
+        // versa) so the next LLM call sees the correct system message.
+        const prevTools = this.lastKnownMcpTools?.map((t) => t.name).sort().join(',');
+        this.lastKnownMcpTools = mcpTools.map((t) => ({ name: t.name }));
+        const newTools = this.lastKnownMcpTools.map((t) => t.name).sort().join(',');
+        if (prevTools !== newTools && this.chatHistory.length > 0 && this.chatHistory[0].role === 'system') {
+            this.chatHistory[0] = this.createSystemMessage();
+        }
         const allowed = this.allowedToolsForAccess();
         const filteredLocalTools = LOCAL_TOOLS.filter(t => allowed.has(t.name));
         // Hide MCP tools we already call automatically. Small models otherwise
@@ -489,6 +570,29 @@ export class Agent {
                 callbacks.onToolEnd('breadth-detector', { success: true, summary: `fan-out hint injected (${intent.signals.length} signals)` });
             }
         }
+        // Per-turn goal anchor: re-inject a FRESH goal block at the end of the
+        // chatHistory's system messages (replaceTaggedSystemMessage appends), so
+        // it lands right before the user prompt. Pre-9d the goal block was ALSO
+        // embedded in the foundational system message (via createSystemMessage),
+        // which meant every turn carried two copies; 9d made this anchor the
+        // single source — `createSystemMessage` no longer touches goal state.
+        // The fresh re-push every iteration keeps the up-to-date iteration
+        // counter in immediate-context distance and prevents the long /goal
+        // continuation-loop drift that PR #26 originally addressed. The anchor
+        // also auto-folds the final-budget-turn wrap-up directive (via
+        // `formatGoalBlock`'s internal `goalIsOnFinalBudgetTurn` check), so
+        // the separate `goal-budget-steering` tagged message is gone too.
+        if (!this.silent) {
+            const activeGoal = readGoal(this.workspaceRoot, this.sessionKey);
+            if (activeGoal?.text && activeGoal.status === 'active') {
+                this.replaceTaggedSystemMessage('goal-anchor', formatGoalBlock(activeGoal));
+            }
+            else {
+                // No active goal — drop any stale anchor from a prior /goal so the
+                // model doesn't keep seeing a completed/cleared goal as "current."
+                this.removeTaggedSystemMessage('goal-anchor');
+            }
+        }
         const userMsg = { role: 'user', content: prompt };
         this.chatHistory.push(userMsg);
         this.recordTranscript(userMsg);
@@ -513,7 +617,11 @@ export class Agent {
             callbacks.onStatusUpdate(`Thinking (turn ${loopCount})...`);
             let response;
             try {
-                response = await callOpenAI(this.llmConfig, this.chatHistory, allTools);
+                // Re-resolve every loop iteration so an in-session `/effort` flip
+                // (which only refreshes the system prompt) also updates the next
+                // request's reasoning_effort slot — no restart needed.
+                const effort = resolveEffort(this.workspaceRoot).effort;
+                response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
             }
             catch (err) {
                 throw new Error(`LLM Execution failed: ${err.message}`);
@@ -935,37 +1043,49 @@ export class Agent {
                 if (this.accessMode !== 'shell') {
                     return `Command execution denied: agent access mode is "${this.accessMode}".`;
                 }
-                // Approval gating. Two cases:
-                //   • Interactive parent (this.silent === false): show y/N unless
-                //     autoApproveShell is set (i.e. /yolo on).
-                //   • Silent child: cannot prompt; the previous code path silently
-                //     auto-approved, which let a spawn_agent({role:'verifier'}) child
-                //     run arbitrary shell with no user gate — a sandbox bypass. Now
-                //     refuse unless the parent has explicitly opted in via prefs.
+                // Approval gating routes through the pure resolver in
+                // runtime/dangerousCommand.ts. Three outcomes:
+                //   • auto-approve: fast mode + safe command (or silent child whose
+                //     parent has opted in via fast mode).
+                //   • ask: planning mode, OR fast mode but the command matched the
+                //     dangerous heuristic (rm -rf, sudo, force-push, …).
+                //   • deny-silent: silent child agents can't answer y/N, so safe
+                //     commands need parent opt-in (fast mode) and dangerous commands
+                //     are always denied.
                 const prefs = readPreferences(this.workspaceRoot);
-                if (this.silent) {
-                    if (!prefs.autoApproveShell) {
-                        return (`Command execution denied: silent child agents may not run shell ` +
-                            `without parent opt-in. Set \`autoApproveShell\` (via /yolo on) ` +
-                            `in the workspace preferences, or have a parent agent run this command.`);
+                const approval = resolveRunCommandApproval(prefs, cmd, { silent: this.silent });
+                if (approval === 'deny-silent') {
+                    if (isDangerousCommand(cmd)) {
+                        return (`Command execution denied: dangerous command in a silent child agent. ` +
+                            `Silent children can't answer the y/N prompt, so destructive commands ` +
+                            `(rm -rf, sudo, force-push, …) are refused regardless of /mode. ` +
+                            `Have a parent agent run this command, or split it into a safer ` +
+                            `equivalent.`);
                     }
-                    console.log(chalk.gray(`▶  Auto-approved (silent child): ${chalk.cyan(cmd)}`));
+                    return (`Command execution denied: silent child agents may not run shell ` +
+                        `without parent opt-in. Switch the session to \`/mode fast\` (or set ` +
+                        `the legacy \`autoApproveShell\` pref) to let silent children run ` +
+                        `safe commands, or have a parent agent run this command.`);
                 }
-                else if (!prefs.autoApproveShell) {
-                    // Use the parent REPL's readline interface for the y/N prompt.
-                    // Spinning up an inquirer prompt opens a second readline against
-                    // the same stdin and dumps a stray "line" event back into the
-                    // parent rl when it exits, which used to surface as the bogus
-                    // "A previous turn is still running" warning.
-                    console.log(`\n${chalk.yellow('⚠️  Command execution request:')} ${chalk.cyan(cmd)}`);
+                if (approval === 'auto-approve') {
+                    const tag = this.silent ? 'Auto-approved (silent child)' : 'Auto-approved';
+                    console.log(chalk.gray(`▶  ${tag}: ${chalk.cyan(cmd)}`));
+                }
+                else {
+                    // approval === 'ask' — interactive y/N. Use the parent REPL's
+                    // readline interface; spinning up an inquirer prompt opens a second
+                    // readline against the same stdin and dumps a stray "line" event
+                    // back into the parent rl when it exits, which used to surface as
+                    // the bogus "A previous turn is still running" warning.
+                    const dangerNote = isDangerousCommand(cmd)
+                        ? chalk.red(' (flagged as potentially destructive)')
+                        : '';
+                    console.log(`\n${chalk.yellow('⚠️  Command execution request:')} ${chalk.cyan(cmd)}${dangerNote}`);
                     const approved = await askYesNo('Allow execution? (y/N) ', false);
                     if (!approved) {
                         return 'Command execution rejected by user.';
                     }
                 }
-                else {
-                    console.log(chalk.gray(`▶  Auto-approved: ${chalk.cyan(cmd)}`));
-                }
                 const sandboxConfig = resolveSandboxConfig(this.workspaceRoot, {
                     readPaths: prefs.sandboxReadPaths,
                     writePaths: prefs.sandboxWritePaths,
@@ -1026,6 +1146,48 @@ export class Agent {
                 }, this.sessionKey);
                 return formatPlan(state);
             }
+            case 'ask_user_choice': {
+                const question = String(args.question ?? '').trim();
+                const header = String(args.header ?? '').trim();
+                const rawOptions = Array.isArray(args.options) ? args.options : [];
+                if (!question)
+                    throw new Error('ask_user_choice requires a non-empty `question`.');
+                if (!header)
+                    throw new Error('ask_user_choice requires a non-empty `header`.');
+                if (rawOptions.length < 2 || rawOptions.length > 4) {
+                    throw new Error(`ask_user_choice requires 2–4 options; received ${rawOptions.length}.`);
+                }
+                const options = rawOptions.map((o, i) => {
+                    const label = String(o?.label ?? '').trim();
+                    const description = String(o?.description ?? '').trim();
+                    if (!label)
+                        throw new Error(`ask_user_choice option ${i + 1} is missing "label".`);
+                    if (!description)
+                        throw new Error(`ask_user_choice option ${i + 1} is missing "description".`);
+                    return { label, description };
+                });
+                // Silent child agents have no parent stdin/REPL bridge, so the
+                // helper's TTY check would error anyway — but giving a clearer message
+                // up front saves the LLM an iteration.
+                if (this.silent) {
+                    throw new NoTTYError('ask_user_choice is not available to silent child agents. Decide the answer yourself, ' +
+                        'state which option you picked and why, and return that as your final answer to the parent.');
+                }
+                // Eager TTY check so we fail without disturbing the screen. askChoice
+                // also checks (defense-in-depth for direct callers), but doing it here
+                // means the LLM gets a clean error before the picker tries to render.
+                if (!getActiveReadline() || !process.stdin.isTTY) {
+                    throw new NoTTYError('ask_user_choice requires an interactive TTY. ' +
+                        'Fall back to deciding yourself and state which option you picked and why.');
+                }
+                // header is rendered by the picker itself (chip line at the top of
+                // the frame), so we just thread it through opts.
+                const answer = await askChoice(question, options, {
+                    multiSelect: !!args.multiSelect,
+                    header,
+                });
+                return JSON.stringify({ answer });
+            }
             case 'goal_complete': {
                 const proof = String(args.proof ?? '').trim();
                 if (!proof)
@@ -1102,6 +1264,10 @@ export class Agent {
             next.push({ role: 'user', content: lastUserMessage });
         this.chatHistory = next;
         this.initialized = true;
+        // 9b: compaction just dropped the prior briefing as collateral —
+        // force the next turn through the full recall path even in gated
+        // mode so the model isn't blind to what was load-bearing.
+        this.recallNextTurnIsPostCompaction = true;
         return { ...result, replacedMessages: before };
     }
     /** Runtime model switch. Used by `/model` slash command. */
@@ -1139,6 +1305,10 @@ export class Agent {
         });
         this.chatHistory = [this.createSystemMessage(), ...replay];
         this.initialized = true;
+        // 9b: a freshly-loaded history is a session boundary; reset gated
+        // recall state so the next turn refreshes the briefing.
+        this.recallHasFiredThisSession = false;
+        this.recallNextTurnIsPostCompaction = false;
         return replay.length;
     }
     /** Cumulative token usage across the last runTurn. Cleared at each new turn. */
@@ -1206,6 +1376,23 @@ export class Agent {
         const marker = `<!--brainrouter:${tag}-->\n`;
         this.chatHistory = this.chatHistory.filter((msg) => !(msg.role === 'system' && typeof msg.content === 'string' && msg.content.startsWith(marker)));
     }
+    /**
+     * Zero the in-process counters that back `/tokens`. Call this on any
+     * conceptual session boundary (`/resume`, `fork`) — otherwise the parent
+     * row keeps accumulating across the switch and "this session" no longer
+     * matches the displayed sessionKey.
+     */
+    resetSessionCounters() {
+        this.sessionUsage = { promptTokens: 0, completionTokens: 0, calls: 0, turns: 0 };
+        this.memoryMetrics = {
+            briefingTokensInjected: 0,
+            offloadCharsAvoided: 0,
+            recallRecordsConsulted: 0,
+        };
+        // 9b: session-boundary reset for gated recall.
+        this.recallHasFiredThisSession = false;
+        this.recallNextTurnIsPostCompaction = false;
+    }
     /** Fork the current chat history into a fresh sessionKey. Returns the new key. */
     fork(newSessionKey) {
         this.sessionKey = newSessionKey;
@@ -1217,6 +1404,7 @@ export class Agent {
         else {
             this.chatHistory = [this.createSystemMessage(), ...this.chatHistory];
         }
+        this.resetSessionCounters();
         return this.sessionKey;
     }
     async bootstrapSession(callbacks) {
@@ -1237,26 +1425,62 @@ export class Agent {
         this.chatHistory = [this.createSystemMessage()];
         this.initialized = true;
     }
+    /**
+     * Public, callback-free wrapper around bootstrapSession for slash commands
+     * that mutate per-session state (notably `/goal`) BEFORE any runTurn has
+     * fired. Without this, the FIRST `/goal` of a session writes goal.json
+     * under the deterministic fallback sessionKey ("brainrouter-cli:<path>")
+     * because bootstrap hasn't happened yet, but every subsequent runTurn
+     * reads from the MCP-resolved UUID sessionKey — split-brain that left
+     * the agent reading a stale goal from a different directory.
+     *
+     * Idempotent: returns immediately if already initialized. Tolerates
+     * missing MCP — falls back to the deterministic key the same way
+     * bootstrapSession does.
+     */
+    async ensureInitialized() {
+        if (this.initialized)
+            return;
+        // Stub the callbacks bootstrapSession expects — no UI plumbing needed
+        // for the eager-init path; the status line is for runTurn's spinner.
+        await this.bootstrapSession({
+            onStatusUpdate: () => { },
+            onToolStart: () => { },
+            onToolEnd: () => { },
+        });
+    }
     createSystemMessage() {
         const prefs = readPreferences(this.workspaceRoot);
+        // 10b: pass the connected MCP tool inventory so `buildSystemPrompt`
+        // can omit the BrainRouter memory section when the brain is offline.
+        // The cached `lastKnownMcpTools` is populated by every successful
+        // `listTools()` (see `runTurn` and `bootstrapSession`); when no tools
+        // have been seen yet, leave it undefined — `buildSystemPrompt` treats
+        // that as "assume brain online" for back-compat.
+        const connectedMcpTools = this.lastKnownMcpTools?.map((t) => t.name);
         const base = this.systemPromptOverride ?? buildSystemPrompt({
             workspaceRoot: this.workspaceRoot,
             launchCwd: this.launchCwd,
             sessionKey: this.sessionKey,
             instructionSummary: loadWorkspaceInstructionSummary(this.workspaceRoot),
             personality: prefs.personality,
+            activeSkill: this.activeSkill,
+            executionMode: prefs.executionMode,
+            reviewPolicy: prefs.reviewPolicy,
+            effort: resolveEffort(this.workspaceRoot).effort,
+            connectedMcpTools,
         });
         const parts = [base];
         if (this.roleOverlay)
             parts.push(this.roleOverlay);
-        // Sticky goal lives on disk so it survives CLI restarts; injected here so
-        // every turn (including the first after `/resume`) sees it. Goals are
-        // scoped to the current sessionKey so /side and /fork don't drag their
-        // parent's goal along, but a workspace-level legacy goal still works as a
-        // fallback for sessions that don't have one yet.
-        const goal = readGoal(this.workspaceRoot, this.sessionKey);
-        if (goal?.text)
-            parts.push(formatGoalBlock(goal));
+        // Goal text used to be appended here AND re-pushed as a per-turn
+        // `goal-anchor` tagged system message (runTurn around line 680), which
+        // meant the whole goal block landed in the prompt twice every turn.
+        // 9d removed the duplicate; the per-turn anchor is the single owner
+        // of goal state (text, status, budget, contract reminders, and the
+        // final-budget wrap-up directive). `runTurn` re-injects it via
+        // `formatGoalBlock` immediately before the user message is appended,
+        // so even first-turn-after-`/resume` sees the goal.
         return { role: 'system', content: parts.join('\n\n') };
     }
     async injectRecallContext(prompt, mcpTools, callbacks) {
@@ -1267,7 +1491,58 @@ export class Agent {
             callbacks.onMemoryEvent?.({ kind: 'skipped', reason: this.silent ? 'silent agent (child)' : 'recall disabled' });
             return;
         }
+        // 9b: gate recall instead of firing unconditionally every turn. Pre-9b
+        // every turn paid ~3-10K tokens for a fresh briefing even when the user
+        // message was "thanks" or "/help". The new default `gated` mode fires
+        // recall only when it's likely to pay off:
+        //   - turn 1 of the session (no prior briefing)
+        //   - the turn immediately after auto-compaction (the model just lost
+        //     context — give it back what was load-bearing)
+        //   - when the user message names ≥2 entity-shaped tokens (proper
+        //     nouns, file paths, identifiers) suggesting they're asking about
+        //     something specific that memory might have history on
+        // The env knob `BRAINROUTER_RECALL_MODE=always|gated|off` lets users
+        // preserve pre-9b behaviour or kill recall entirely for benchmarking.
+        const recallMode = resolveRecallMode();
+        if (recallMode === 'off') {
+            this.recalledRecords = [];
+            this.recalledRecordIds = [];
+            this.lastBriefingSources = [];
+            callbacks.onMemoryEvent?.({ kind: 'skipped', reason: 'recallMode=off' });
+            return;
+        }
+        if (recallMode === 'gated') {
+            const isFirstTurn = !this.recallHasFiredThisSession;
+            const justCompacted = this.recallNextTurnIsPostCompaction;
+            const entityHits = countEntityTokens(prompt);
+            const hasEntityCue = entityHits >= 2;
+            if (!isFirstTurn && !justCompacted && !hasEntityCue) {
+                // Skip the full briefing — emit a lightweight system-reminder so
+                // the model knows it can pull memory itself if it needs to. The
+                // reminder is tagged so the next turn replaces it cleanly.
+                this.replaceTaggedSystemMessage('memory-hint', [
+                    '## Memory available (gated mode)',
+                    'BrainRouter memory is available this turn but the auto-briefing was skipped (no first-turn / post-compaction / entity-cue trigger). Call `memory_recall` / `memory_search` / `memory_file_history` yourself if you need history on a specific entity, file, or decision.',
+                ].join('\n'));
+                this.recalledRecords = [];
+                this.recalledRecordIds = [];
+                this.lastBriefingSources = [];
+                callbacks.onMemoryEvent?.({ kind: 'skipped', reason: 'gated (no trigger)' });
+                return;
+            }
+            // Reset the post-compaction flag now that we're firing because of it.
+            this.recallNextTurnIsPostCompaction = false;
+        }
+        // Either `recallMode === 'always'` (preserves pre-9b behaviour) or
+        // we hit a gated trigger — fire the full briefing.
         callbacks.onStatusUpdate('Briefing from BrainRouter memory...');
+        // 9d: skip `memory_task_state` in the briefing when a goal-anchor is
+        // already carrying the current objective — avoids re-injecting the
+        // "what we're doing now" context twice. The anchor is set immediately
+        // before this call in `runTurn` (around line 680), so reading the goal
+        // here resolves to the same record the anchor used.
+        const activeGoal = readGoal(this.workspaceRoot, this.sessionKey);
+        const hasActiveGoal = !!(activeGoal?.text && activeGoal.status === 'active');
         const briefing = await buildMemoryBriefing({
             mcpClient: this.mcpClient,
             mcpTools,
@@ -1275,10 +1550,14 @@ export class Agent {
             workspaceRoot: this.workspaceRoot,
             query: prompt,
             activeSkill: this.activeSkill,
+            hasActiveGoal,
         });
         this.recalledRecords = briefing.recalledRecords;
         this.recalledRecordIds = briefing.recalledRecordIds;
         this.lastBriefingSources = briefing.sourcesQueried;
+        this.recallHasFiredThisSession = true;
+        // Drop any prior lightweight hint now that the full briefing is live.
+        this.removeTaggedSystemMessage('memory-hint');
         if (briefing.block) {
             this.replaceTaggedSystemMessage('memory-briefing', briefing.block);
             callbacks.onStatusUpdate(`Memory briefing loaded: ${briefing.sourcesQueried.join(', ')} (${briefing.recalledRecordIds.length} records).`);
@@ -1295,6 +1574,15 @@ export class Agent {
     getLastBriefing() {
         return { sources: [...this.lastBriefingSources], recordIds: [...this.recalledRecordIds] };
     }
+    /**
+     * Snapshot of the records produced by the most recent pre-turn briefing.
+     * `/where` surfaces a few of these to give the user a sense of what the
+     * agent is leaning on right now. Returns a shallow copy so callers can't
+     * mutate the agent's internal state.
+     */
+    getRecalledRecords() {
+        return [...this.recalledRecords];
+    }
     /** One-line summary of any new contradiction surfaced after the last capture, or undefined if none. */
     lastContradictionWarning;
     takeContradictionWarning() {
@@ -1835,7 +2123,102 @@ function formatBytes(n) {
 // per-turn system messages (briefing, fan-out hint). Strip them before the
 // payload reaches the LLM so the model doesn't see the bookkeeping.
 const TAG_MARKER_RE = /^<!--brainrouter:[a-z0-9-]+-->\n/;
-export function buildChatCompletionPayload(config, messages, tools) {
+/**
+ * Heuristic for "does this model accept the OpenAI Chat Completions
+ * `reasoning_effort` field?". The signal that actually matters is the
+ * **model name**, not the endpoint hostname — modern OpenAI-compatible
+ * servers (LM Studio 0.3.29+, Ollama, vLLM, OpenRouter, OpenAI itself)
+ * all accept the field on /v1/chat/completions for the reasoning-capable
+ * model classes below, and silently ignore it for everything else. So a
+ * `gpt-oss-20b` served from localhost via LM Studio gets the same
+ * treatment as `gpt-5` on `api.openai.com`.
+ *
+ * Borrowed shape from openai-node's `ReasoningEffort` enum
+ * (openSrc/openai-node/src/resources/shared.ts) — `low|medium|high` map
+ * straight through to the provider field across OpenAI, DeepSeek,
+ * LM Studio, Ollama, and OpenRouter's pass-through. Anthropic models
+ * (`claude-*`) use a different field shape (`thinking: { budget_tokens }`)
+ * and a different endpoint (`/v1/messages`), so they're intentionally
+ * skipped here — brainrouter would need a separate provider adapter to
+ * forward into Anthropic's native API.
+ */
+/**
+ * 9b: resolve the recall-gating mode for this process. `BRAINROUTER_RECALL_MODE`
+ * env var beats everything; unset defaults to `gated`. Anything outside the
+ * three valid values falls back to `gated` (defensive — better to be helpful
+ * than crash on a typo). Re-resolved each turn so users can flip with
+ * `export BRAINROUTER_RECALL_MODE=always` mid-session via a /run command.
+ */
+export function resolveRecallMode() {
+    const raw = (process.env.BRAINROUTER_RECALL_MODE ?? '').toLowerCase().trim();
+    if (raw === 'always' || raw === 'gated' || raw === 'off')
+        return raw;
+    return 'gated';
+}
+/**
+ * 9b: cheap local heuristic for "the user message names something specific
+ * memory might have history on." Counts entity-shaped tokens: proper nouns
+ * (capitalized words that aren't sentence-starting), file paths (anything
+ * with `/` or `\\` or a `.<ext>` suffix), and identifier-shaped tokens (`camelCase`
+ * / `snake_case` / `PascalCase` longer than 4 chars). Crude but the bar is
+ * "is recall plausibly worth it?" — false positives waste a recall call,
+ * false negatives waste an ask. Tunable threshold via the caller.
+ */
+export function countEntityTokens(text) {
+    if (!text)
+        return 0;
+    let count = 0;
+    // File paths and identifiers (`/` or `\`).
+    const pathMatches = text.match(/[A-Za-z0-9_./\\-]+\.[A-Za-z]{1,8}(?![A-Za-z])|(?:[\w-]+\/){1,}[\w.-]+/g);
+    if (pathMatches)
+        count += pathMatches.length;
+    // Identifier-shaped tokens longer than 4 chars (camelCase, snake_case, PascalCase).
+    const identMatches = text.match(/\b(?:[a-z]+[A-Z][A-Za-z0-9]+|[A-Z][a-z]+[A-Z][A-Za-z0-9]+|[a-z]+_[a-z][\w]+)\b/g);
+    if (identMatches)
+        count += identMatches.length;
+    // Proper nouns (capitalized, not at sentence start, ≥3 chars). We split on
+    // sentence boundaries first so the first word of each sentence is skipped.
+    const sentences = text.split(/[.!?]\s+/);
+    for (const s of sentences) {
+        const words = s.split(/\s+/);
+        for (let i = 1; i < words.length; i++) {
+            const w = words[i].replace(/[^A-Za-z]/g, '');
+            if (w.length >= 3 && /^[A-Z][a-z]+$/.test(w))
+                count++;
+        }
+    }
+    return count;
+}
+export function supportsReasoningEffortField(config) {
+    // Normalize the model name: strip any `<vendor>/` prefix so OpenRouter /
+    // LM Studio naming (`openai/gpt-oss-20b`, `mistralai/magistral-small`,
+    // `deepseek/deepseek-r1`) matches the same patterns as a bare model name.
+    // Some servers stack multiple prefixes (`openai/gpt-oss/20b-variant`), so
+    // we keep only the segment after the LAST `/`.
+    const raw = (config.model ?? '').toLowerCase();
+    const model = raw.includes('/') ? raw.slice(raw.lastIndexOf('/') + 1) : raw;
+    // Reasoning-model name patterns. The list covers the major reasoning
+    // model families running through OpenAI-compatible /chat/completions
+    // surfaces in 2026: OpenAI's gpt-5 / o-series / open-weights gpt-oss,
+    // DeepSeek's R1 / R2 / V3+ thinking variants, Alibaba's Qwen3 thinking
+    // models, Mistral's Magistral, and Microsoft's Phi-4-reasoning. Any
+    // model whose name itself contains "reasoning" or "thinking" is
+    // included too — that catches new entrants we haven't enumerated yet
+    // (e.g. `phi-4-reasoning-plus`, `qwen3-30b-a3b-thinking`).
+    const reasoningPatterns = [
+        /^gpt-5/, // gpt-5, gpt-5-mini, gpt-5-pro, gpt-5.1, gpt-5-codex-max
+        /^o[134](-|$|\.)/, // o1, o3, o4 and dated / sized variants
+        /^gpt-oss/, // gpt-oss-20b / 120b (LM Studio 0.3.29+, Ollama, llama.cpp)
+        /^deepseek-r[12]/, // DeepSeek R1, R2
+        /^deepseek-v[34]/, // DeepSeek V3.1+, V4 reasoning variants
+        /^qwen3/, // Qwen3 reasoning variants (LM Studio, Ollama)
+        /^magistral/, // Mistral Magistral (small/medium reasoning)
+        /reasoning/, // catch-all for `phi-4-reasoning`, `*-reasoning-plus`, …
+        /thinking/, // catch-all for `qwen3-30b-a3b-thinking`, `*-thinking-*`, …
+    ];
+    return reasoningPatterns.some((re) => re.test(model));
+}
+export function buildChatCompletionPayload(config, messages, tools, options = {}) {
     const stripTag = (content) => typeof content === 'string' && TAG_MARKER_RE.test(content)
         ? content.replace(TAG_MARKER_RE, '')
         : content;
@@ -1874,9 +2257,16 @@ export function buildChatCompletionPayload(config, messages, tools) {
         }));
         body.tool_choice = 'auto';
     }
+    // Forward reasoning_effort only when the level is non-default AND the
+    // endpoint+model combo is a known reasoning surface. `medium` is the
+    // CLI default and forwarding it would change every existing user's
+    // request shape on upgrade for no behavioural gain.
+    if (options.effort && options.effort !== 'medium' && supportsReasoningEffortField(config)) {
+        body.reasoning_effort = options.effort;
+    }
     return body;
 }
-export async function callOpenAI(config, messages, tools) {
+export async function callOpenAI(config, messages, tools, options = {}) {
     const endpoint = config.endpoint || 'https://api.openai.com/v1';
     let apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
     const isLocal = endpoint.includes('localhost') || endpoint.includes('127.0.0.1');
@@ -1886,7 +2276,7 @@ export async function callOpenAI(config, messages, tools) {
     if (!apiKey && isLocal) {
         apiKey = 'sk-local-placeholder';
     }
-    const body = buildChatCompletionPayload(config, messages, tools);
+    const body = buildChatCompletionPayload(config, messages, tools, options);
     const headers = {
         'Content-Type': 'application/json'
     };