npm - @kinqs/brainrouter-cli - Versions diffs - 0.3.5 → 0.3.6 - Mend

@kinqs/brainrouter-cli 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/.env.example +55 -48
package/bin/cli.cjs +71 -0
package/dist/agent/agent.d.ts +212 -2
package/dist/agent/agent.js +428 -38
package/dist/cli/banner.d.ts +60 -0
package/dist/cli/banner.js +199 -0
package/dist/cli/cliPrompt.d.ts +69 -0
package/dist/cli/cliPrompt.js +287 -0
package/dist/cli/commands/_helpers.js +6 -6
package/dist/cli/commands/guard.js +75 -10
package/dist/cli/commands/mcp.d.ts +17 -0
package/dist/cli/commands/mcp.js +121 -0
package/dist/cli/commands/memory.js +2 -2
package/dist/cli/commands/obs.js +22 -22
package/dist/cli/commands/session.js +13 -5
package/dist/cli/commands/ui.js +97 -45
package/dist/cli/commands/workflow.d.ts +18 -0
package/dist/cli/commands/workflow.js +314 -43
package/dist/cli/repl.js +219 -132
package/dist/cli/spinner.d.ts +34 -0
package/dist/cli/spinner.js +36 -0
package/dist/cli/statusline.d.ts +67 -0
package/dist/cli/statusline.js +204 -0
package/dist/cli/theme.d.ts +79 -0
package/dist/cli/theme.js +106 -0
package/dist/cli/whereView.d.ts +81 -0
package/dist/cli/whereView.js +245 -0
package/dist/config/config.d.ts +40 -0
package/dist/config/config.js +45 -73
package/dist/index.js +80 -13
package/dist/memory/briefing.d.ts +10 -0
package/dist/memory/briefing.js +69 -1
package/dist/prompt/breadthHint.d.ts +5 -0
package/dist/prompt/breadthHint.js +44 -0
package/dist/prompt/systemPrompt.d.ts +34 -0
package/dist/prompt/systemPrompt.js +124 -108
package/dist/runtime/dangerousCommand.d.ts +53 -0
package/dist/runtime/dangerousCommand.js +105 -0
package/dist/runtime/mcpClient.d.ts +38 -1
package/dist/runtime/mcpClient.js +90 -2
package/dist/state/goalStore.d.ts +98 -17
package/dist/state/goalStore.js +132 -42
package/dist/state/preferencesStore.d.ts +67 -3
package/dist/state/preferencesStore.js +84 -1
package/dist/state/workflowArtifacts.d.ts +63 -2
package/dist/state/workflowArtifacts.js +120 -8
package/dist/tests/_helpers.d.ts +31 -0
package/dist/tests/_helpers.js +91 -0
package/package.json +5 -4

package/dist/memory/briefing.js CHANGED Viewed

@@ -17,7 +17,7 @@ export async function buildMemoryBriefing(inputs) {
     if (toolNames.has('memory_working_context')) {
         tasks.push(callSafe('memory_working_context', { sessionKey, workspacePath: workspaceRoot }, mcpClient, maxChars));
     }
-    if (toolNames.has('memory_task_state')) {
+    if (toolNames.has('memory_task_state') && !inputs.hasActiveGoal) {
         tasks.push(callSafe('memory_task_state', { query }, mcpClient, maxChars));
     }
     const results = await Promise.all(tasks);
@@ -28,6 +28,16 @@ export async function buildMemoryBriefing(inputs) {
         if (!r.text)
             continue;
         sourcesQueried.push(r.source);
+        if (r.source === 'memory_working_context') {
+            const workingSection = renderWorkingMemorySection(r.text);
+            if (workingSection) {
+                sections.push(workingSection);
+                continue;
+            }
+            // Fall through to the opaque-dump branch when the payload didn't
+            // match the expected shape — that path runs redactText and keeps
+            // the secrets test honest.
+        }
         if (r.records && r.records.length > 0) {
             // Render structured cards instead of dumping the raw JSON. The previous
             // form emitted ~2-4KB of `recallExplanation`/`sparkedNodes`/etc. per
@@ -147,6 +157,64 @@ function prettyLabel(toolName) {
         default: return toolName;
     }
 }
+/**
+ * 0.3.6 item 2c — structurally surface working-memory steps in the
+ * briefing. Two slices:
+ *   - the recentSteps tail the MCP already injected (last 5–10 steps,
+ *     regardless of kind), which gives the model the latest tool
+ *     outputs in order; and
+ *   - up to 3 most-recent reasoning-kind steps from the full step log,
+ *     which keeps the "why" trail visible even after a chatty tool
+ *     burst has pushed reasoning off the tail.
+ *
+ * Returns null when the payload doesn't look like a working-context
+ * JSON blob — caller falls back to the opaque-dump branch so secrets
+ * still get redacted on unstructured text.
+ */
+function renderWorkingMemorySection(text) {
+    let parsed;
+    try {
+        parsed = JSON.parse(text);
+    }
+    catch {
+        return null;
+    }
+    if (!parsed || typeof parsed !== 'object')
+        return null;
+    const recentSteps = Array.isArray(parsed?.state?.injectedState?.recentSteps)
+        ? parsed.state.injectedState.recentSteps
+        : [];
+    const allSteps = Array.isArray(parsed?.steps) ? parsed.steps : recentSteps;
+    if (recentSteps.length === 0 && allSteps.length === 0)
+        return null;
+    const renderStep = (step) => {
+        const kind = step.kind ? `[${step.kind}] ` : '';
+        const title = (step.title ?? '').replace(/\s+/g, ' ').trim() || '(no title)';
+        const summary = (step.summary ?? '').replace(/\s+/g, ' ').trim();
+        const preview = summary.length > 200 ? summary.slice(0, 199) + '…' : summary;
+        return `- ${kind}${title}${preview ? ` — ${preview}` : ''}`;
+    };
+    const lines = [`### ${prettyLabel('memory_working_context')}`];
+    if (recentSteps.length > 0) {
+        lines.push('Recent steps:');
+        for (const step of recentSteps)
+            lines.push(renderStep(step));
+    }
+    // Surface up to 3 most-recent reasoning-kind steps that the recentSteps
+    // tail didn't already include. Cap on purpose — without it a turn that
+    // offloaded reasoning every batch would stuff the briefing with its own
+    // past commentary.
+    const recentNodeIds = new Set(recentSteps.map((s) => s.nodeId).filter(Boolean));
+    const reasoningTail = allSteps
+        .filter((s) => s.kind === 'reasoning' && (!s.nodeId || !recentNodeIds.has(s.nodeId)))
+        .slice(-3);
+    if (reasoningTail.length > 0) {
+        lines.push('', 'Recent reasoning (why-trail):');
+        for (const step of reasoningTail)
+            lines.push(renderStep(step));
+    }
+    return redactText(lines.join('\n'));
+}
 function dedupe(items) {
     return Array.from(new Set(items));
 }

package/dist/prompt/breadthHint.d.ts CHANGED Viewed

@@ -36,9 +36,14 @@ export declare function detectBreadthIntent(prompt: string): BreadthIntent;
  *    turn that should have been parallel.
  */
 export declare const BREADTH_FAN_OUT_THRESHOLD = 1.5;
+export declare function detectFanOutVeto(prompt: string): {
+    vetoed: boolean;
+    pattern?: string;
+};
 export declare function shouldSuggestFanOut(prompt: string): {
     suggest: boolean;
     intent: BreadthIntent;
+    veto?: string;
 };
 /**
  * The system message we inject to nudge the agent toward spawn_agents. It

package/dist/prompt/breadthHint.js CHANGED Viewed

@@ -64,8 +64,52 @@ export function detectBreadthIntent(prompt) {
  *    turn that should have been parallel.
  */
 export const BREADTH_FAN_OUT_THRESHOLD = 1.5;
+/**
+ * Negation hints — explicit signals from the user that they DO NOT want
+ * fan-out for this prompt. Honored as a hard veto: even a high breadth
+ * score won't trigger the hint if any of these match.
+ *
+ * Common cases we want to honor:
+ *  - "(no spawn_agent, no fan-out, files are small)"  — explicit opt-out
+ *  - "do this in one turn"                            — wants serial
+ *  - "directly with read_file, no fan-out"            — explicit tool
+ *  - "yourself, don't spawn agents"                   — explicit self
+ *
+ * Without this veto, a prompt like
+ *   "audit every file (no spawn_agent, files are small)"
+ * still scores high on `verb-object-broad` + `every` and the model gets
+ * told "fan out!" — directly contradicting the user's instruction.
+ */
+const NEGATION_PATTERNS = [
+    /\bno\s+(spawn[_-]?agents?|fan[- ]?out|children?|sub[- ]?agents?|orchestration)\b/i,
+    /\b(don'?t|do not)\s+(spawn|fan[- ]?out|delegate|orchestrate)\b/i,
+    /\b(in\s+one\s+turn|single\s+turn|sequentially|one[- ]by[- ]one|in[- ]process)\b/i,
+    /\bdirectly\s+(with|using|via)\b/i,
+    /\b(yourself|by\s+yourself|on\s+your\s+own)\b/i,
+];
+export function detectFanOutVeto(prompt) {
+    const text = (prompt ?? '').toString();
+    for (const re of NEGATION_PATTERNS) {
+        const match = text.match(re);
+        if (match)
+            return { vetoed: true, pattern: match[0] };
+    }
+    return { vetoed: false };
+}
 export function shouldSuggestFanOut(prompt) {
     const intent = detectBreadthIntent(prompt);
+    const veto = detectFanOutVeto(prompt);
+    if (veto.vetoed) {
+        // Reflect the veto in the intent's signals so onToolEnd's surfacing
+        // shows the user why we didn't fan out, even though the breadth
+        // score was high. The score itself isn't zeroed — it's still useful
+        // signal for other heuristics.
+        return {
+            suggest: false,
+            intent: { ...intent, signals: [...intent.signals, `vetoed:${veto.pattern}`] },
+            veto: veto.pattern,
+        };
+    }
     return { suggest: intent.score >= BREADTH_FAN_OUT_THRESHOLD, intent };
 }
 /**

package/dist/prompt/systemPrompt.d.ts CHANGED Viewed

@@ -5,6 +5,40 @@ export interface SystemPromptContext {
     instructionSummary?: string;
     /** Communication style overlay set by /personality. */
     personality?: 'concise' | 'standard' | 'detailed' | 'pair-programmer';
+    /**
+     * Name of the active BrainRouter skill latched by a slash command (e.g.
+     * `/spec`, `/feature-dev`, `/grill-me`). Most skills are workflow
+     * directives the model loads via `get_skill` and don't change the system
+     * prompt — `grill-me` is the exception: it appends a CLARIFY-mode block
+     * here so the model asks questions instead of jumping to edits.
+     */
+    activeSkill?: string;
+    /**
+     * Execution-mode overlay set by `/mode`. Only `fast` produces an overlay
+     * — `planning` is the unchanged default behaviour and adding prose for it
+     * would just dilute the rest of the prompt.
+     */
+    executionMode?: 'planning' | 'fast';
+    /**
+     * Review-policy overlay set by `/review-policy`. Only `proceed` produces
+     * an overlay; `request` is the default behaviour.
+     */
+    reviewPolicy?: 'request' | 'proceed';
+    /**
+     * Reasoning-depth overlay set by `/effort` (or `BRAINROUTER_EFFORT`).
+     * `medium` is the default and emits no overlay — adding prose for it
+     * would silently change behaviour for every existing user on upgrade.
+     */
+    effort?: 'low' | 'medium' | 'high';
+    /**
+     * 0.3.6 item 10b: the set of MCP tool names actually connected this turn.
+     * When this list lacks `memory_recall` (i.e. the BrainRouter cloud brain
+     * is offline), the prompt omits the "BrainRouter MCP Tools" / "Memory-
+     * First" sections so the model doesn't try to call tools that don't
+     * exist. Undefined = "assume the BrainRouter MCP is online" (pre-10b
+     * back-compat for callers that don't pass the inventory).
+     */
+    connectedMcpTools?: string[];
 }
 export declare function buildSystemPrompt(context: SystemPromptContext): string;
 export declare function loadWorkspaceInstructionSummary(workspaceRoot: string): string | undefined;

package/dist/prompt/systemPrompt.js CHANGED Viewed

@@ -27,133 +27,149 @@ function personalityOverlay(style) {
             return '';
     }
 }
+function policyOverlay(executionMode, reviewPolicy) {
+    const lines = [];
+    if (executionMode === 'fast') {
+        lines.push('- Execution mode is `fast`: skip the "may I run this?" prose for safe shell calls and just issue the tool. The CLI still gates dangerous commands (`rm -rf`, `sudo`, force-push, …) with a y/N regardless of mode.');
+    }
+    if (reviewPolicy === 'proceed') {
+        lines.push('- Review policy is `proceed`: apply multi-file plans and report after — no "ready for your approval?" pause. `/approve` is still the user\'s explicit lever.');
+    }
+    if (lines.length === 0)
+        return '';
+    return ['## Session policy overrides', ...lines].join('\n');
+}
+function effortOverlay(effort) {
+    if (effort === 'low') {
+        return [
+            '## Reasoning depth: low',
+            '- Be terse. Skip ceremony. One-paragraph answers when the question fits in one paragraph.',
+        ].join('\n');
+    }
+    if (effort === 'high') {
+        return [
+            '## Reasoning depth: high',
+            '- Reason step-by-step before acting. Audit your evidence against the goal before each tool call.',
+        ].join('\n');
+    }
+    return '';
+}
+function clarifyOverlay(activeSkill) {
+    if (activeSkill !== 'grill-me')
+        return '';
+    return [
+        '## CLARIFY mode (grill-me)',
+        '- Do NOT make file edits, run shell commands, or spawn worker agents this turn.',
+        '- Ask 2–5 questions to disambiguate scope, format, and unstated assumptions.',
+        '- Prefer `ask_user_choice` for mutually-exclusive options; plain prose for free-form input.',
+        '- (`askYesNo` is a CLI-internal gate the framework triggers — do NOT try to call it as a tool.)',
+        '- End with a one-paragraph "what I\'ll do once you answer" so the user can sanity-check the read.',
+    ].join('\n');
+}
+/**
+ * 0.3.6 item 10b: emit the BrainRouter-MCP-specific guidance ONLY when the
+ * brain is actually reachable. The detection signal is the presence of
+ * `memory_recall` in `connectedMcpTools` (the canonical BrainRouter
+ * signature tool). When undefined (older callers) we keep today's behaviour
+ * and assume the brain is online — so the prompt doesn't suddenly omit
+ * memory guidance for callers that haven't been updated yet.
+ */
+function isBrainOnline(connectedTools) {
+    if (!connectedTools)
+        return true;
+    return connectedTools.includes('memory_recall');
+}
+function brainOfflineNotice() {
+    return [
+        '## ⚠️ BrainRouter MCP is OFFLINE this turn',
+        '- Long-term memory, skill lookup, and the recall briefing are unavailable.',
+        '- Do NOT call any BrainRouter memory or skill tools — they will fail with "MCP server is not connected". The turn-start tool list reflects this; only tools that appear there are callable.',
+        '- If the user asks about past sessions, prior decisions, or skill-based workflows, tell them the brain is offline and recommend `/mcp reconnect`.',
+        '- Operate against the workspace files directly using local tools (`read_file`, `glob_files`, `grep_search`, `run_command`).',
+    ].join('\n');
+}
+function memoryFirstSection() {
+    return [
+        '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
+        'BrainRouter is a cognitive memory engine first. Treat memory as a primary tool.',
+        '- A `## BrainRouter Memory Briefing` system message is auto-injected with recalled memories, persona, and recent context. Read it before reasoning. When thin/empty, call `memory_search` / `memory_recall` yourself — do not assume the user is new.',
+        '- For non-trivial work, call `memory_recall` with sessionKey + the request as the query. When you pivot mid-turn or need deeper signal, re-call: `memory_file_history` for file-specific past changes, `memory_graph_query` for related entities (2-hop), `memory_explain_recall` for ranking signals, `memory_failed_attempts` for prior dead-ends. Call `memory_resolve_session` first when you don\'t yet have a sessionKey.',
+        '- Quote record IDs inline like `[rec_xxx]` so the user sees what you used.',
+        '- For payloads >~1,000 tokens, call `memory_working_offload` and reference back by its ref-node id instead of pasting again.',
+        '- **Capture the WHY.** After every non-trivial tool batch (≥3 tool calls OR a single tool that returned >2KB), call `memory_working_offload` ONCE with `kind: "reasoning"`, `title: "Why: <short>"`, and a 1-paragraph DECISION summary. Payload offload is about token budget; reasoning offload is the audit trail the next turn\'s briefing surfaces back.',
+        '',
+        '**Anti-hallucination.** Don\'t generalize recall results — quote or paraphrase tightly, always with `[recordId]`. Don\'t invent project facts not in the briefing, a recall result, or a file you read. Never say "I do not have information about your current projects" if the briefing is non-empty or before running `memory_recall`. If a recalled fact looks stale or off-project (e.g. recall says "Vue.js + Go" but the workspace is TypeScript-only), flag it: "Recalled [rec_xxx] looks inconsistent — archive via `memory_update`?"',
+    ].join('\n');
+}
 export function buildSystemPrompt(context) {
     const instructionSummary = context.instructionSummary?.trim()
         ? context.instructionSummary.trim()
         : 'No workspace AGENT.md or AGENTS.md instruction file was found.';
+    const brainOnline = isBrainOnline(context.connectedMcpTools);
+    // Order matters for prompt-cache hits (item 9c): identity + tool-mechanics
+    // baseline stay first because they never change turn-to-turn; the workspace
+    // block + per-call overlays sit at the tail so dynamic content lands last.
     return [
-        'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal.',
-        'Your edge over generic coding agents is being direct, tool-driven, memory-aware, and workspace-aware — every turn should reflect that.',
+        'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal. Direct, tool-driven, memory-aware, workspace-aware.',
         '',
-        '## Runtime Context',
-        `- Workspace root: ${context.workspaceRoot}`,
-        `- Launch directory: ${context.launchCwd}`,
-        `- BrainRouter sessionKey: ${context.sessionKey}`,
-        '- All relative file paths are resolved from the workspace root, not from the CLI installation directory.',
-        '- If the user asks about "the session", answer with the current BrainRouter sessionKey and workspace root.',
-        '',
-        '## Workspace Instructions',
-        instructionSummary,
-        '',
-        '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
-        'BrainRouter is a cognitive memory engine first and a coding agent second. Treat memory as a primary tool, not an afterthought. The user pays for this routing — you must use it.',
-        '',
-        '### Before doing the work',
-        '- The CLI already injects a "## BrainRouter Memory Briefing" system message with recalled cognitive memories, persona, focus scenes, and recent context. READ it before you reason. If it is empty, do NOT assume the user is new — call `memory_search` and `memory_recall` to look further.',
-        '- For ANY non-trivial request, call `memory_recall` with the current sessionKey AND the user request as the query. Look for `recordId` values you can cite later.',
-        '- If the request mentions a specific file, also call `memory_file_history` with that path — past changes and known issues live there.',
-        '- If the request mentions a domain/feature concept, call `memory_graph_query` with the entity name to find related memories across the knowledge graph (2-hop default).',
-        '- When you don\'t have a sessionKey yet, call `memory_resolve_session` with the workspacePath.',
-        '',
-        '### During the work',
-        '- Surface the record IDs you are relying on. Quote them inline like `[rec_xxx]` so the user sees what you used.',
-        '- For long-running tasks, call `memory_task_state` to check whether this work was started before and `memory_task_update` to record progress (blockers, decisions, next actions).',
-        '- If you produce a payload over ~1,000 tokens (analysis, diff, large summary), call `memory_working_offload` and refer back to it by its ref node id instead of pasting again.',
-        '- The briefing only fires ONCE at turn start with the prompt as the query. **Re-call memory tools manually** when (a) you pivot to a new topic mid-turn, (b) the briefing came back thin/empty, or (c) you need explanations (`memory_explain_recall`), file history (`memory_file_history`), prior failures (`memory_failed_attempts`), or graph adjacency (`memory_graph_query`). The CLI surfaces every memory tool call as `🧠 Briefing` / `💾 Captured` / `📌 Reinforced` so the user can see what you used.',
-        '',
-        '### After the work',
-        '- The CLI auto-runs `memory_mark_cited` with the records you actually used (detected by content match against your final answer) and `memory_capture_turn`. You do NOT need to call these unless you want to force capture mid-turn after a particularly meaningful step.',
-        '',
-        '### Never do',
-        '- Never say "I do not have information about your current projects" if the briefing is non-empty or if you have not first run `memory_search` / `memory_recall` for the question.',
-        '- Never re-discover something that already lives in memory. Recall first, then read files.',
-        '- Never cite a recordId that did not appear in the briefing or in a recall result you ran.',
+        '## Tool-call mechanics',
+        'Tool calls live in the structured `tool_calls` field of your assistant message, NOT in prose. Writing `goal_complete({...})` or any other tool name as text/markdown/code-fence does NOTHING — the framework only sees `tool_calls`. The same applies to every tool (`read_file`, `update_plan`, `spawn_agent`, `goal_blocked`, `memory_*`, …). Never call a tool name that wasn\'t in the turn-start tool list. Skills (names ending in `-skill` / `-workflow` / `-driven`) are documentation, not tools — load via `get_skill`, never `tool_calls`. The CLI has a repeat-loop guard: 3 identical (tool, args) calls in one turn returns an error instead of executing.',
         '',
-        '### Anti-hallucination rules when summarizing recall (critical)',
-        '- When recall returns memories, do NOT generalize. Quote the content verbatim or paraphrase to within a few words. Always include the recordId in `[brackets]`.',
-        '- Memory records can be STALE or from a DIFFERENT project. If a recalled fact looks inconsistent with the user\'s current question (e.g. recall says "Vue.js + Go" but the user is editing a TypeScript-only repo), say so explicitly: "Recalled record [rec_xxx] mentions Vue.js + Go — this looks inconsistent with the current workspace. Should I archive it via `memory_update`?"',
-        '- Do not invent project facts that aren\'t in either (a) the briefing, (b) a recall/search result you just ran, or (c) files you actually read. If unsure, say "I don\'t see this in memory or in the workspace files I\'ve read — please confirm before I proceed."',
-        '- When unsure whether a recall result is current, call `memory_verify` to flag it for re-checking, or suggest the user run `/forget <recordId>` to archive obvious garbage.',
+        '## Tool policy',
+        '- Prefer tool calls over asking the user for info the workspace or memory can answer.',
+        '- MCP-first for cognitive work — skills, personas, memory, working canvas, contradictions go through MCP tools, not filesystem reads.',
+        '- Skill workflow: `list_skills` / `search_skills` → `get_skill({ name })` → follow steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
         '',
-        '## Tool Policy',
-        '- You may call local workspace tools and BrainRouter MCP tools yourself.',
-        '- Prefer tool calls over asking the user for information that can be discovered from the workspace or MCP memory.',
-        '- If the user asks about files, project structure, code, tests, or configuration, inspect files with list_dir, glob_files, grep_search, or read_file.',
-        '- **MCP-first for everything cognitive.** Skills, personas, memory, evidence, scenes, working canvas, contradictions, audit — anything the MCP exposes — MUST be accessed through the MCP tools. Do not reimplement them with filesystem reads. If a task mentions a workflow or a skill, the first move is `list_skills` / `search_skills` → `get_skill`, not random `read_file` on the skills/ folder.',
-        '- **Skills are NOT tools.** Names like `incremental-skill`, `spec-driven-skill`, `code-structure-cleanup` are workflow documentation — they cannot be called with `tool_calls`. To use one: call `list_skills` (or `search_skills`) to discover the canonical name, then `get_skill({ name: "<name>" })` to load its instructions, and then follow the steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
-        '- **Never call a tool whose name was not in the tool list returned at turn start.** If the name ends in `-skill`, `-implementation`, `-workflow`, `-driven`, or contains "skill", it is almost certainly a skill — load it via `get_skill` instead of inventing a tool call. Hallucinated tool names fail with `-32601 Unknown tool` and waste an iteration.',
-        '- **No tight loops.** The CLI has a repeat-loop guard: calling the same tool with identical args 3 times in a single turn returns an error instead of executing. If the result you got was insufficient, do something different — read a different file, write the output you have, spawn a child, or call `goal_blocked` with a concrete reason.',
+        brainOnline ? memoryFirstSection() : brainOfflineNotice(),
         '',
-        '## Multi-Agent Orchestration',
-        '- You may delegate bounded, parallelizable work to child agents with `spawn_agent` (one child) or `spawn_agents` (a batch in one tool call).',
-        '- Available roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (implementation with write access), verifier (runs tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb of the prompt; use `route_agent` for a dry run.',
-        '- Use `list_agents` / `read_agent_transcript` to observe, `wait_agent` (single) or `wait_agents` (batch) to drain, and `close_agent` for cleanup.',
-        '- **Fan-out triggers.** ALWAYS prefer `spawn_agents` (≥3 children) when the user prompt says any of: "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "as much as", "test more X", "explore all Y", "across the codebase". One tool call + a paragraph asking "what next?" is NOT acceptable for these prompts.',
-        '- **Standard fan-out templates.**',
-        '   • "Test all the MCP tools" → 5 explorers, each focused on a different tool category (memory_*, list_skills/get_skill, governance/*, working/*, hooks/*).',
-        '   • "Explore this codebase" → 3 explorers covering server / client / shared types.',
-        '   • "Design feature X" → 2 architects with different stack constraints + 1 reviewer.',
-        '- Delegate when there are 2+ independent investigations or when you would otherwise produce a large isolated output. The repeat-loop guard fires after 3 identical tool calls — fan out instead of re-trying the same thing.',
-        '- Always synthesize child outputs in your own words — never claim work is done just because a child returned.',
+        '## Multi-agent orchestration',
+        '- Delegate parallel, bounded work via `spawn_agent` (one) or `spawn_agents` (batch). Roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (write access), verifier (tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb; use `route_agent` for a dry run.',
+        '- Fan-out triggers: phrasings like "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "across the codebase" → ALWAYS `spawn_agents` with ≥3 children. One tool call + "what next?" is NOT acceptable for those prompts.',
+        '- Use `wait_agent` / `wait_agents` to drain before yielding. Synthesize child outputs in your own words — never claim work is done just because a child returned.',
         '',
-        '## Durable Workflow Artifacts (single source of truth)',
-        '- Every multi-step request (spec, feature plan, review, implementation plan) MUST land as files inside `.brainrouter/cli/workflows/<slug>/`.',
-        '- Required artifacts: `spec.md` (what + why + boundaries), `tasks.md` (ordered task breakdown), `walkthrough.md` (post-implementation summary). Use `write_file` with the workspace-relative path the CLI provides — never paste long specs into chat alone.',
-        '- For free-form prompts that look like spec/plan requests, tell the user to use `/spec <title>` or `/feature-dev <title>` instead of producing a chat-only plan. Those commands set up the directory and pre-fill the meta record for you.',
-        '- Never produce a multi-section plan response in chat without also writing it to the workflow folder. If you cannot write the file, say so explicitly.',
+        '## Workflow artifacts',
+        'Multi-step requests (spec, feature plan, review, implementation plan) land as files under `.brainrouter/cli/workflows/<slug>/` — `spec.md` (what + why + boundaries), `tasks.md` (ordered breakdown), `walkthrough.md` (post-implementation summary). Use `/spec <title>` or `/feature-dev <title>` to set up the folder; don\'t produce chat-only plans. If you can\'t write the file, say so explicitly.',
         '',
-        '## Local Tools',
-        '- read_file: read workspace files with optional line ranges.',
-        '- write_file: create or overwrite files inside the workspace.',
-        '- edit_file: replace exactly one target string in an existing file.',
-        '- list_dir: list a workspace directory.',
-        '- grep_search: search workspace files for a string.',
-        '- glob_files: find workspace files by glob pattern.',
-        '- run_command (alias: bash / shell / sh): run shell commands after explicit terminal confirmation.',
-        '- fetch_url: fetch HTTP(S) text content when needed.',
+        '## Autonomy & batching',
+        '- Don\'t block on unnecessary confirmations. Execute clear instructions.',
+        '- Batch independent tool calls (reads, recalls, spawns) in ONE response — most chat APIs accept multiple `tool_calls` per assistant message and the CLI runs them in order then feeds results back.',
+        '- After tools return: either call more tools that need the results, OR write the final answer. NEVER produce "I will now do Y" prose with no tool call attached.',
         '',
-        '## BrainRouter MCP Tools',
-        '- memory_resolve_session, memory_recall, memory_search, memory_graph_query, memory_contradictions.',
-        '- memory_working_context, memory_working_offload, memory_working_reset.',
-        '- memory_capture_turn, memory_mark_cited, memory_task_state, memory_task_update, memory_file_history, memory_debug_trace_search.',
-        '- list_skills, get_skill, search_skills, get_persona, get_reference, list_template_docs, get_template_doc.',
+        '## Persistence on tool failure',
+        'When a tool fails or returns an empty/unexpected result, try at least one recovery before yielding:',
+        '1. **Extension swap** — `read_file` on `foo/bar.js` failed? Try `.ts` / `.tsx` / `.mjs`. This codebase is TypeScript.',
+        '2. **Directory listing** — `list_dir` the parent to see what\'s actually there.',
+        '3. **Glob / grep** — `glob_files` with `**/<name>.*` or `grep_search` for a unique symbol.',
+        '4. **Memory** — `memory_file_history` / `memory_search` may have the right path.',
+        'Only after 2+ failed recoveries say the file doesn\'t exist, and propose the closest matches you DID find. When `/goal` is active, NEVER stop on a single failure — burning an iteration to ask "what next?" violates the goal contract.',
         '',
-        '## Autonomy and tool batching (read carefully)',
-        '- **Do not block on unnecessary confirmations.** When the user gives you a clear instruction, execute it. Do not ask "shall I proceed?" between tool calls. Do not stop mid-flow to enumerate what you *could* do — DO it.',
-        '- **Batch your tool calls.** Most OpenAI-compatible chat APIs accept multiple `tool_calls` in a single assistant response. When the user asks you to do several things, emit ALL the necessary tool calls in one response. The CLI executes them in order and feeds the results back to you.',
-        '- **Parallelize independent work.** Independent reads (`read_file`, `grep_search`, `list_dir`, `memory_recall`, `memory_search`, `memory_working_context`, `memory_task_state`) can be requested in the same response. Independent `spawn_agent` calls likewise.',
-        '- When the user says "test all", "every X", "do everything", "run them all", treat it as a single batched request. Fire the relevant tools in one round, then summarize results in your final message. Do not iterate "now I will test X / would you like to proceed".',
-        '- After your tools return, either (a) call more tools that need the previous results, or (b) write the final answer. Do not produce intermediate "I will now do Y" prose with no tool call attached.',
-        '- If sub-agents (spawn_agent) are running, `wait_agent` for them before yielding the turn.',
+        '## Surfacing tool output',
+        'When the user explicitly asks to see something — "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find/grep for Q" — your final message MUST include the actual content the tool returned (rendered as a Markdown list / fenced code block / table as appropriate). The CLI hides full tool payloads by default; an acknowledgement-only reply ("I listed the contents") leaves the user blind.',
         '',
-        '## Persistence on tool failure (CRITICAL — read every turn)',
-        'When a tool call fails or returns an empty/unexpected result, you MUST attempt to recover before yielding the turn. **Do not** apologize and ask the user what to do next — that is the single biggest way you waste their time.',
+        '## Mid-turn user prompts',
+        '- Binary y/N confirmations are CLI-internal gates (`askYesNo`) — the framework triggers them. Do NOT try to call `askYesNo` as a tool.',
+        '- `ask_user_choice({ question, header, options })` is for genuine ambiguity with 2–4 mutually-exclusive reasonable approaches. NOT for trivial confirmations, NOT for things you can decide yourself, NOT a substitute for thinking. Errors in non-interactive runs (CI, piped, `brainrouter run`) — when that happens fall back to deciding yourself and explicitly state which option you picked and why.',
         '',
-        '**Standard recovery moves (try at least ONE before giving up):**',
-        '1. **Extension swap.** If `read_file` on `foo/bar.js` fails with "File not found", try `foo/bar.ts`, `foo/bar.tsx`, `foo/bar.mjs`. This codebase is TypeScript — `.js` paths almost always mean `.ts` source.',
-        '2. **Directory listing.** Call `list_dir` on the parent directory to see what files actually exist there. Then re-read the right file.',
-        '3. **Glob search.** Call `glob_files` with a wildcard (`**/engine.*`, `**/<filename>.*`) or `grep_search` for a unique symbol you expect inside the file.',
-        '4. **Memory lookup.** `memory_file_history` or `memory_search` may surface the path the user (or a past agent) actually used.',
-        '5. **Re-read the listing.** If you already called `list_dir` earlier this turn, scroll back — the file is probably there under a different extension.',
+        '## Operating behavior',
+        '- Be concise but not passive. Read before editing. Run tests after changes.',
+        '- For multi-step work, keep `update_plan` current — statuses `pending` / `in_progress` / `completed`, at most one `in_progress`.',
+        '- The CLI persists per-session state under `.brainrouter/cli/sessions/<encodedKey>/` (transcript.jsonl, goal.json, tasks.json) for inspection.',
+        '- If the model / endpoint can\'t use tools, say so and continue with the best direct answer.',
         '',
-        'Only after 2+ recovery attempts that all fail should you tell the user the file genuinely does not exist, and even then propose the closest matching files you DID find. Phrases like "I will skip this file and wait for your next instruction" or "What would you like to focus on next?" are forbidden when you have not exhausted the recovery moves above.',
-        '',
-        '**The same persistence rule applies to every tool failure** — failed greps, failed edits (re-read the file and try a narrower string), failed shell commands (read the stderr and adjust). When a `/goal` is active, NEVER stop on a single failure — the goal-block in your system prompt is your directive, and the CLI auto-continues turns until you either call `goal_complete` with evidence or `goal_blocked` with a concrete unblocker. Burning an iteration to ask "what next?" violates the goal contract.',
-        '',
-        '## Surfacing tool output to the user (read every turn)',
-        'When the user explicitly asks to see something — phrasings like "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find files matching Q", "grep for W" — your final assistant message MUST include the actual content the tool returned. Replying with only an acknowledgement ("I have listed the contents", "Search completed") is a failure: the user is left blind because the CLI hides full tool payloads by default. Render the result inline — a Markdown list for directory listings, a fenced code block for file contents, a table or bullet list for grep matches — using the data your tool calls produced. The CLI also prints a short preview for inspection tools, but that preview is a fallback for terse-LLM cases, NOT a substitute for your response.',
+        '## Runtime Context',
+        `- Workspace root: ${context.workspaceRoot}`,
+        `- Launch directory: ${context.launchCwd}`,
+        `- BrainRouter sessionKey: ${context.sessionKey}`,
+        '- All relative paths resolve from the workspace root.',
         '',
-        '## Operating Behavior',
-        '- Be concise but not passive. Do the next useful thing with tools.',
-        '- Do not say you lack session context when the Runtime Context contains a sessionKey.',
-        '- Do not ask for a workspace path unless the current workspace root is wrong or inaccessible.',
-        '- Read before editing. Keep edits scoped. Run relevant tests after changes.',
-        '- If the model or endpoint cannot use tools, explain that clearly and continue with the best available direct answer.',
-        '- For multi-step work, keep the durable plan current with update_plan. Use statuses pending, in_progress, and completed, with at most one in_progress item.',
-        '- The CLI persists per-session state under .brainrouter/cli/sessions/<encodedKey>/ (transcript.jsonl, goal.json, tasks.json) for inspection and future orchestration.',
+        '## Workspace Instructions',
+        instructionSummary,
         '',
         personalityOverlay(context.personality),
-    ].join('\n');
+        policyOverlay(context.executionMode, context.reviewPolicy),
+        effortOverlay(context.effort),
+        clarifyOverlay(context.activeSkill),
+    ].filter(Boolean).join('\n');
 }
 export function loadWorkspaceInstructionSummary(workspaceRoot) {
     const instructionPath = ['AGENT.md', 'AGENTS.md']

package/dist/runtime/dangerousCommand.d.ts ADDED Viewed

@@ -0,0 +1,53 @@
+/**
+ * Single source of truth for "is this shell command destructive enough that we
+ * must confirm even in /mode fast?"
+ *
+ * Used by:
+ *   - agent.ts `run_command`: in `executionMode === 'fast'` we skip the
+ *     `askYesNo` prompt for everyday commands, but route through askYesNo
+ *     anyway when this returns true.
+ *   - tests: invariant that fast mode ≠ unconditional auto-approve.
+ *
+ * Heuristic, not a sandbox. The real blast-radius limiter is
+ * `BRAINROUTER_SANDBOX=on`. This list exists so that a typo
+ * (`rm -rf /` instead of `rm -rf ./build`) doesn't get auto-approved
+ * because the user happened to be in fast mode.
+ *
+ * Patterns are conservative on purpose: false-positives cost one extra y/N
+ * prompt; false-negatives cost a wiped disk. Add a pattern when you spot one
+ * — do not remove existing entries without a replacement.
+ */
+/**
+ * Returns true when the command matches any pattern that fast mode should
+ * still gate through `askYesNo`. The check is a single-pass regex sweep
+ * against the literal command string — no shell parsing, no env expansion.
+ *
+ * The trailing wildcard semantics matter: `rm -rf foo` matches, `rm-rf` does
+ * not (word boundary), `rmdir` does not (different keyword). When in doubt,
+ * lean toward returning true: the cost of an extra y/N is much smaller than
+ * the cost of accidentally letting a destructive command through.
+ */
+export declare function isDangerousCommand(command: string): boolean;
+export type RunCommandApproval = 'auto-approve' | 'ask' | 'deny-silent';
+/**
+ * Pure decision for "what should happen when the agent calls `run_command`?"
+ * Split out of `agent.ts` so the policy is unit-testable without TTY mocking.
+ *
+ *   - Silent children cannot answer a y/N prompt. We auto-approve only when
+ *     the parent has opted in via `executionMode === 'fast'` AND the command
+ *     is not in the dangerous set. Dangerous commands in silent children are
+ *     always denied — there is no human to confirm the blast radius.
+ *   - Interactive parents in `fast` mode skip the prompt for safe commands
+ *     and still gate dangerous ones through `askYesNo`. In `planning` mode
+ *     every command routes through `askYesNo`.
+ *
+ * The `executionMode === 'fast'` check is the single source of truth for
+ * "yolo-ish" behavior — the legacy `autoApproveShell` flag is migrated into
+ * `executionMode === 'fast'` on first read of `preferencesStore` so new
+ * callers do not need to consult both.
+ */
+export declare function resolveRunCommandApproval(prefs: {
+    executionMode: 'planning' | 'fast';
+}, command: string, opts: {
+    silent: boolean;
+}): RunCommandApproval;