@kinqs/brainrouter-cli 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.env.example +55 -48
  2. package/bin/cli.cjs +71 -0
  3. package/dist/agent/agent.d.ts +212 -2
  4. package/dist/agent/agent.js +428 -38
  5. package/dist/cli/banner.d.ts +60 -0
  6. package/dist/cli/banner.js +199 -0
  7. package/dist/cli/cliPrompt.d.ts +69 -0
  8. package/dist/cli/cliPrompt.js +287 -0
  9. package/dist/cli/commands/_helpers.js +6 -6
  10. package/dist/cli/commands/guard.js +75 -10
  11. package/dist/cli/commands/mcp.d.ts +17 -0
  12. package/dist/cli/commands/mcp.js +121 -0
  13. package/dist/cli/commands/memory.js +2 -2
  14. package/dist/cli/commands/obs.js +22 -22
  15. package/dist/cli/commands/session.js +13 -5
  16. package/dist/cli/commands/ui.js +97 -45
  17. package/dist/cli/commands/workflow.d.ts +18 -0
  18. package/dist/cli/commands/workflow.js +314 -43
  19. package/dist/cli/repl.js +219 -132
  20. package/dist/cli/spinner.d.ts +34 -0
  21. package/dist/cli/spinner.js +36 -0
  22. package/dist/cli/statusline.d.ts +67 -0
  23. package/dist/cli/statusline.js +204 -0
  24. package/dist/cli/theme.d.ts +79 -0
  25. package/dist/cli/theme.js +106 -0
  26. package/dist/cli/whereView.d.ts +81 -0
  27. package/dist/cli/whereView.js +245 -0
  28. package/dist/config/config.d.ts +40 -0
  29. package/dist/config/config.js +45 -73
  30. package/dist/index.js +80 -13
  31. package/dist/memory/briefing.d.ts +10 -0
  32. package/dist/memory/briefing.js +69 -1
  33. package/dist/prompt/breadthHint.d.ts +5 -0
  34. package/dist/prompt/breadthHint.js +44 -0
  35. package/dist/prompt/systemPrompt.d.ts +34 -0
  36. package/dist/prompt/systemPrompt.js +124 -108
  37. package/dist/runtime/dangerousCommand.d.ts +53 -0
  38. package/dist/runtime/dangerousCommand.js +105 -0
  39. package/dist/runtime/mcpClient.d.ts +38 -1
  40. package/dist/runtime/mcpClient.js +90 -2
  41. package/dist/state/goalStore.d.ts +98 -17
  42. package/dist/state/goalStore.js +132 -42
  43. package/dist/state/preferencesStore.d.ts +67 -3
  44. package/dist/state/preferencesStore.js +84 -1
  45. package/dist/state/workflowArtifacts.d.ts +63 -2
  46. package/dist/state/workflowArtifacts.js +120 -8
  47. package/dist/tests/_helpers.d.ts +31 -0
  48. package/dist/tests/_helpers.js +91 -0
  49. package/package.json +5 -4
@@ -17,7 +17,7 @@ export async function buildMemoryBriefing(inputs) {
17
17
  if (toolNames.has('memory_working_context')) {
18
18
  tasks.push(callSafe('memory_working_context', { sessionKey, workspacePath: workspaceRoot }, mcpClient, maxChars));
19
19
  }
20
- if (toolNames.has('memory_task_state')) {
20
+ if (toolNames.has('memory_task_state') && !inputs.hasActiveGoal) {
21
21
  tasks.push(callSafe('memory_task_state', { query }, mcpClient, maxChars));
22
22
  }
23
23
  const results = await Promise.all(tasks);
@@ -28,6 +28,16 @@ export async function buildMemoryBriefing(inputs) {
28
28
  if (!r.text)
29
29
  continue;
30
30
  sourcesQueried.push(r.source);
31
+ if (r.source === 'memory_working_context') {
32
+ const workingSection = renderWorkingMemorySection(r.text);
33
+ if (workingSection) {
34
+ sections.push(workingSection);
35
+ continue;
36
+ }
37
+ // Fall through to the opaque-dump branch when the payload didn't
38
+ // match the expected shape — that path runs redactText and keeps
39
+ // the secrets test honest.
40
+ }
31
41
  if (r.records && r.records.length > 0) {
32
42
  // Render structured cards instead of dumping the raw JSON. The previous
33
43
  // form emitted ~2-4KB of `recallExplanation`/`sparkedNodes`/etc. per
@@ -147,6 +157,64 @@ function prettyLabel(toolName) {
147
157
  default: return toolName;
148
158
  }
149
159
  }
160
+ /**
161
+ * 0.3.6 item 2c — structurally surface working-memory steps in the
162
+ * briefing. Two slices:
163
+ * - the recentSteps tail the MCP already injected (last 5–10 steps,
164
+ * regardless of kind), which gives the model the latest tool
165
+ * outputs in order; and
166
+ * - up to 3 most-recent reasoning-kind steps from the full step log,
167
+ * which keeps the "why" trail visible even after a chatty tool
168
+ * burst has pushed reasoning off the tail.
169
+ *
170
+ * Returns null when the payload doesn't look like a working-context
171
+ * JSON blob — caller falls back to the opaque-dump branch so secrets
172
+ * still get redacted on unstructured text.
173
+ */
174
+ function renderWorkingMemorySection(text) {
175
+ let parsed;
176
+ try {
177
+ parsed = JSON.parse(text);
178
+ }
179
+ catch {
180
+ return null;
181
+ }
182
+ if (!parsed || typeof parsed !== 'object')
183
+ return null;
184
+ const recentSteps = Array.isArray(parsed?.state?.injectedState?.recentSteps)
185
+ ? parsed.state.injectedState.recentSteps
186
+ : [];
187
+ const allSteps = Array.isArray(parsed?.steps) ? parsed.steps : recentSteps;
188
+ if (recentSteps.length === 0 && allSteps.length === 0)
189
+ return null;
190
+ const renderStep = (step) => {
191
+ const kind = step.kind ? `[${step.kind}] ` : '';
192
+ const title = (step.title ?? '').replace(/\s+/g, ' ').trim() || '(no title)';
193
+ const summary = (step.summary ?? '').replace(/\s+/g, ' ').trim();
194
+ const preview = summary.length > 200 ? summary.slice(0, 199) + '…' : summary;
195
+ return `- ${kind}${title}${preview ? ` — ${preview}` : ''}`;
196
+ };
197
+ const lines = [`### ${prettyLabel('memory_working_context')}`];
198
+ if (recentSteps.length > 0) {
199
+ lines.push('Recent steps:');
200
+ for (const step of recentSteps)
201
+ lines.push(renderStep(step));
202
+ }
203
+ // Surface up to 3 most-recent reasoning-kind steps that the recentSteps
204
+ // tail didn't already include. Cap on purpose — without it a turn that
205
+ // offloaded reasoning every batch would stuff the briefing with its own
206
+ // past commentary.
207
+ const recentNodeIds = new Set(recentSteps.map((s) => s.nodeId).filter(Boolean));
208
+ const reasoningTail = allSteps
209
+ .filter((s) => s.kind === 'reasoning' && (!s.nodeId || !recentNodeIds.has(s.nodeId)))
210
+ .slice(-3);
211
+ if (reasoningTail.length > 0) {
212
+ lines.push('', 'Recent reasoning (why-trail):');
213
+ for (const step of reasoningTail)
214
+ lines.push(renderStep(step));
215
+ }
216
+ return redactText(lines.join('\n'));
217
+ }
150
218
  function dedupe(items) {
151
219
  return Array.from(new Set(items));
152
220
  }
@@ -36,9 +36,14 @@ export declare function detectBreadthIntent(prompt: string): BreadthIntent;
36
36
  * turn that should have been parallel.
37
37
  */
38
38
  export declare const BREADTH_FAN_OUT_THRESHOLD = 1.5;
39
+ export declare function detectFanOutVeto(prompt: string): {
40
+ vetoed: boolean;
41
+ pattern?: string;
42
+ };
39
43
  export declare function shouldSuggestFanOut(prompt: string): {
40
44
  suggest: boolean;
41
45
  intent: BreadthIntent;
46
+ veto?: string;
42
47
  };
43
48
  /**
44
49
  * The system message we inject to nudge the agent toward spawn_agents. It
@@ -64,8 +64,52 @@ export function detectBreadthIntent(prompt) {
64
64
  * turn that should have been parallel.
65
65
  */
66
66
  export const BREADTH_FAN_OUT_THRESHOLD = 1.5;
67
+ /**
68
+ * Negation hints — explicit signals from the user that they DO NOT want
69
+ * fan-out for this prompt. Honored as a hard veto: even a high breadth
70
+ * score won't trigger the hint if any of these match.
71
+ *
72
+ * Common cases we want to honor:
73
+ * - "(no spawn_agent, no fan-out, files are small)" — explicit opt-out
74
+ * - "do this in one turn" — wants serial
75
+ * - "directly with read_file, no fan-out" — explicit tool
76
+ * - "yourself, don't spawn agents" — explicit self
77
+ *
78
+ * Without this veto, a prompt like
79
+ * "audit every file (no spawn_agent, files are small)"
80
+ * still scores high on `verb-object-broad` + `every` and the model gets
81
+ * told "fan out!" — directly contradicting the user's instruction.
82
+ */
83
+ const NEGATION_PATTERNS = [
84
+ /\bno\s+(spawn[_-]?agents?|fan[- ]?out|children?|sub[- ]?agents?|orchestration)\b/i,
85
+ /\b(don'?t|do not)\s+(spawn|fan[- ]?out|delegate|orchestrate)\b/i,
86
+ /\b(in\s+one\s+turn|single\s+turn|sequentially|one[- ]by[- ]one|in[- ]process)\b/i,
87
+ /\bdirectly\s+(with|using|via)\b/i,
88
+ /\b(yourself|by\s+yourself|on\s+your\s+own)\b/i,
89
+ ];
90
+ export function detectFanOutVeto(prompt) {
91
+ const text = (prompt ?? '').toString();
92
+ for (const re of NEGATION_PATTERNS) {
93
+ const match = text.match(re);
94
+ if (match)
95
+ return { vetoed: true, pattern: match[0] };
96
+ }
97
+ return { vetoed: false };
98
+ }
67
99
  export function shouldSuggestFanOut(prompt) {
68
100
  const intent = detectBreadthIntent(prompt);
101
+ const veto = detectFanOutVeto(prompt);
102
+ if (veto.vetoed) {
103
+ // Reflect the veto in the intent's signals so onToolEnd's surfacing
104
+ // shows the user why we didn't fan out, even though the breadth
105
+ // score was high. The score itself isn't zeroed — it's still useful
106
+ // signal for other heuristics.
107
+ return {
108
+ suggest: false,
109
+ intent: { ...intent, signals: [...intent.signals, `vetoed:${veto.pattern}`] },
110
+ veto: veto.pattern,
111
+ };
112
+ }
69
113
  return { suggest: intent.score >= BREADTH_FAN_OUT_THRESHOLD, intent };
70
114
  }
71
115
  /**
@@ -5,6 +5,40 @@ export interface SystemPromptContext {
5
5
  instructionSummary?: string;
6
6
  /** Communication style overlay set by /personality. */
7
7
  personality?: 'concise' | 'standard' | 'detailed' | 'pair-programmer';
8
+ /**
9
+ * Name of the active BrainRouter skill latched by a slash command (e.g.
10
+ * `/spec`, `/feature-dev`, `/grill-me`). Most skills are workflow
11
+ * directives the model loads via `get_skill` and don't change the system
12
+ * prompt — `grill-me` is the exception: it appends a CLARIFY-mode block
13
+ * here so the model asks questions instead of jumping to edits.
14
+ */
15
+ activeSkill?: string;
16
+ /**
17
+ * Execution-mode overlay set by `/mode`. Only `fast` produces an overlay
18
+ * — `planning` is the unchanged default behaviour and adding prose for it
19
+ * would just dilute the rest of the prompt.
20
+ */
21
+ executionMode?: 'planning' | 'fast';
22
+ /**
23
+ * Review-policy overlay set by `/review-policy`. Only `proceed` produces
24
+ * an overlay; `request` is the default behaviour.
25
+ */
26
+ reviewPolicy?: 'request' | 'proceed';
27
+ /**
28
+ * Reasoning-depth overlay set by `/effort` (or `BRAINROUTER_EFFORT`).
29
+ * `medium` is the default and emits no overlay — adding prose for it
30
+ * would silently change behaviour for every existing user on upgrade.
31
+ */
32
+ effort?: 'low' | 'medium' | 'high';
33
+ /**
34
+ * 0.3.6 item 10b: the set of MCP tool names actually connected this turn.
35
+ * When this list lacks `memory_recall` (i.e. the BrainRouter cloud brain
36
+ * is offline), the prompt omits the "BrainRouter MCP Tools" / "Memory-
37
+ * First" sections so the model doesn't try to call tools that don't
38
+ * exist. Undefined = "assume the BrainRouter MCP is online" (pre-10b
39
+ * back-compat for callers that don't pass the inventory).
40
+ */
41
+ connectedMcpTools?: string[];
8
42
  }
9
43
  export declare function buildSystemPrompt(context: SystemPromptContext): string;
10
44
  export declare function loadWorkspaceInstructionSummary(workspaceRoot: string): string | undefined;
@@ -27,133 +27,149 @@ function personalityOverlay(style) {
27
27
  return '';
28
28
  }
29
29
  }
30
+ function policyOverlay(executionMode, reviewPolicy) {
31
+ const lines = [];
32
+ if (executionMode === 'fast') {
33
+ lines.push('- Execution mode is `fast`: skip the "may I run this?" prose for safe shell calls and just issue the tool. The CLI still gates dangerous commands (`rm -rf`, `sudo`, force-push, …) with a y/N regardless of mode.');
34
+ }
35
+ if (reviewPolicy === 'proceed') {
36
+ lines.push('- Review policy is `proceed`: apply multi-file plans and report after — no "ready for your approval?" pause. `/approve` is still the user\'s explicit lever.');
37
+ }
38
+ if (lines.length === 0)
39
+ return '';
40
+ return ['## Session policy overrides', ...lines].join('\n');
41
+ }
42
+ function effortOverlay(effort) {
43
+ if (effort === 'low') {
44
+ return [
45
+ '## Reasoning depth: low',
46
+ '- Be terse. Skip ceremony. One-paragraph answers when the question fits in one paragraph.',
47
+ ].join('\n');
48
+ }
49
+ if (effort === 'high') {
50
+ return [
51
+ '## Reasoning depth: high',
52
+ '- Reason step-by-step before acting. Audit your evidence against the goal before each tool call.',
53
+ ].join('\n');
54
+ }
55
+ return '';
56
+ }
57
+ function clarifyOverlay(activeSkill) {
58
+ if (activeSkill !== 'grill-me')
59
+ return '';
60
+ return [
61
+ '## CLARIFY mode (grill-me)',
62
+ '- Do NOT make file edits, run shell commands, or spawn worker agents this turn.',
63
+ '- Ask 2–5 questions to disambiguate scope, format, and unstated assumptions.',
64
+ '- Prefer `ask_user_choice` for mutually-exclusive options; plain prose for free-form input.',
65
+ '- (`askYesNo` is a CLI-internal gate the framework triggers — do NOT try to call it as a tool.)',
66
+ '- End with a one-paragraph "what I\'ll do once you answer" so the user can sanity-check the read.',
67
+ ].join('\n');
68
+ }
69
+ /**
70
+ * 0.3.6 item 10b: emit the BrainRouter-MCP-specific guidance ONLY when the
71
+ * brain is actually reachable. The detection signal is the presence of
72
+ * `memory_recall` in `connectedMcpTools` (the canonical BrainRouter
73
+ * signature tool). When undefined (older callers) we keep today's behaviour
74
+ * and assume the brain is online — so the prompt doesn't suddenly omit
75
+ * memory guidance for callers that haven't been updated yet.
76
+ */
77
+ function isBrainOnline(connectedTools) {
78
+ if (!connectedTools)
79
+ return true;
80
+ return connectedTools.includes('memory_recall');
81
+ }
82
+ function brainOfflineNotice() {
83
+ return [
84
+ '## ⚠️ BrainRouter MCP is OFFLINE this turn',
85
+ '- Long-term memory, skill lookup, and the recall briefing are unavailable.',
86
+ '- Do NOT call any BrainRouter memory or skill tools — they will fail with "MCP server is not connected". The turn-start tool list reflects this; only tools that appear there are callable.',
87
+ '- If the user asks about past sessions, prior decisions, or skill-based workflows, tell them the brain is offline and recommend `/mcp reconnect`.',
88
+ '- Operate against the workspace files directly using local tools (`read_file`, `glob_files`, `grep_search`, `run_command`).',
89
+ ].join('\n');
90
+ }
91
+ function memoryFirstSection() {
92
+ return [
93
+ '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
94
+ 'BrainRouter is a cognitive memory engine first. Treat memory as a primary tool.',
95
+ '- A `## BrainRouter Memory Briefing` system message is auto-injected with recalled memories, persona, and recent context. Read it before reasoning. When thin/empty, call `memory_search` / `memory_recall` yourself — do not assume the user is new.',
96
+ '- For non-trivial work, call `memory_recall` with sessionKey + the request as the query. When you pivot mid-turn or need deeper signal, re-call: `memory_file_history` for file-specific past changes, `memory_graph_query` for related entities (2-hop), `memory_explain_recall` for ranking signals, `memory_failed_attempts` for prior dead-ends. Call `memory_resolve_session` first when you don\'t yet have a sessionKey.',
97
+ '- Quote record IDs inline like `[rec_xxx]` so the user sees what you used.',
98
+ '- For payloads >~1,000 tokens, call `memory_working_offload` and reference back by its ref-node id instead of pasting again.',
99
+ '- **Capture the WHY.** After every non-trivial tool batch (≥3 tool calls OR a single tool that returned >2KB), call `memory_working_offload` ONCE with `kind: "reasoning"`, `title: "Why: <short>"`, and a 1-paragraph DECISION summary. Payload offload is about token budget; reasoning offload is the audit trail the next turn\'s briefing surfaces back.',
100
+ '',
101
+ '**Anti-hallucination.** Don\'t generalize recall results — quote or paraphrase tightly, always with `[recordId]`. Don\'t invent project facts not in the briefing, a recall result, or a file you read. Never say "I do not have information about your current projects" if the briefing is non-empty or before running `memory_recall`. If a recalled fact looks stale or off-project (e.g. recall says "Vue.js + Go" but the workspace is TypeScript-only), flag it: "Recalled [rec_xxx] looks inconsistent — archive via `memory_update`?"',
102
+ ].join('\n');
103
+ }
30
104
  export function buildSystemPrompt(context) {
31
105
  const instructionSummary = context.instructionSummary?.trim()
32
106
  ? context.instructionSummary.trim()
33
107
  : 'No workspace AGENT.md or AGENTS.md instruction file was found.';
108
+ const brainOnline = isBrainOnline(context.connectedMcpTools);
109
+ // Order matters for prompt-cache hits (item 9c): identity + tool-mechanics
110
+ // baseline stay first because they never change turn-to-turn; the workspace
111
+ // block + per-call overlays sit at the tail so dynamic content lands last.
34
112
  return [
35
- 'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal.',
36
- 'Your edge over generic coding agents is being direct, tool-driven, memory-aware, and workspace-aware — every turn should reflect that.',
113
+ 'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal. Direct, tool-driven, memory-aware, workspace-aware.',
37
114
  '',
38
- '## Runtime Context',
39
- `- Workspace root: ${context.workspaceRoot}`,
40
- `- Launch directory: ${context.launchCwd}`,
41
- `- BrainRouter sessionKey: ${context.sessionKey}`,
42
- '- All relative file paths are resolved from the workspace root, not from the CLI installation directory.',
43
- '- If the user asks about "the session", answer with the current BrainRouter sessionKey and workspace root.',
44
- '',
45
- '## Workspace Instructions',
46
- instructionSummary,
47
- '',
48
- '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
49
- 'BrainRouter is a cognitive memory engine first and a coding agent second. Treat memory as a primary tool, not an afterthought. The user pays for this routing — you must use it.',
50
- '',
51
- '### Before doing the work',
52
- '- The CLI already injects a "## BrainRouter Memory Briefing" system message with recalled cognitive memories, persona, focus scenes, and recent context. READ it before you reason. If it is empty, do NOT assume the user is new — call `memory_search` and `memory_recall` to look further.',
53
- '- For ANY non-trivial request, call `memory_recall` with the current sessionKey AND the user request as the query. Look for `recordId` values you can cite later.',
54
- '- If the request mentions a specific file, also call `memory_file_history` with that path — past changes and known issues live there.',
55
- '- If the request mentions a domain/feature concept, call `memory_graph_query` with the entity name to find related memories across the knowledge graph (2-hop default).',
56
- '- When you don\'t have a sessionKey yet, call `memory_resolve_session` with the workspacePath.',
57
- '',
58
- '### During the work',
59
- '- Surface the record IDs you are relying on. Quote them inline like `[rec_xxx]` so the user sees what you used.',
60
- '- For long-running tasks, call `memory_task_state` to check whether this work was started before and `memory_task_update` to record progress (blockers, decisions, next actions).',
61
- '- If you produce a payload over ~1,000 tokens (analysis, diff, large summary), call `memory_working_offload` and refer back to it by its ref node id instead of pasting again.',
62
- '- The briefing only fires ONCE at turn start with the prompt as the query. **Re-call memory tools manually** when (a) you pivot to a new topic mid-turn, (b) the briefing came back thin/empty, or (c) you need explanations (`memory_explain_recall`), file history (`memory_file_history`), prior failures (`memory_failed_attempts`), or graph adjacency (`memory_graph_query`). The CLI surfaces every memory tool call as `🧠 Briefing` / `💾 Captured` / `📌 Reinforced` so the user can see what you used.',
63
- '',
64
- '### After the work',
65
- '- The CLI auto-runs `memory_mark_cited` with the records you actually used (detected by content match against your final answer) and `memory_capture_turn`. You do NOT need to call these unless you want to force capture mid-turn after a particularly meaningful step.',
66
- '',
67
- '### Never do',
68
- '- Never say "I do not have information about your current projects" if the briefing is non-empty or if you have not first run `memory_search` / `memory_recall` for the question.',
69
- '- Never re-discover something that already lives in memory. Recall first, then read files.',
70
- '- Never cite a recordId that did not appear in the briefing or in a recall result you ran.',
115
+ '## Tool-call mechanics',
116
+ 'Tool calls live in the structured `tool_calls` field of your assistant message, NOT in prose. Writing `goal_complete({...})` or any other tool name as text/markdown/code-fence does NOTHING — the framework only sees `tool_calls`. The same applies to every tool (`read_file`, `update_plan`, `spawn_agent`, `goal_blocked`, `memory_*`, …). Never call a tool name that wasn\'t in the turn-start tool list. Skills (names ending in `-skill` / `-workflow` / `-driven`) are documentation, not tools — load via `get_skill`, never `tool_calls`. The CLI has a repeat-loop guard: 3 identical (tool, args) calls in one turn returns an error instead of executing.',
71
117
  '',
72
- '### Anti-hallucination rules when summarizing recall (critical)',
73
- '- When recall returns memories, do NOT generalize. Quote the content verbatim or paraphrase to within a few words. Always include the recordId in `[brackets]`.',
74
- '- Memory records can be STALE or from a DIFFERENT project. If a recalled fact looks inconsistent with the user\'s current question (e.g. recall says "Vue.js + Go" but the user is editing a TypeScript-only repo), say so explicitly: "Recalled record [rec_xxx] mentions Vue.js + Go — this looks inconsistent with the current workspace. Should I archive it via `memory_update`?"',
75
- '- Do not invent project facts that aren\'t in either (a) the briefing, (b) a recall/search result you just ran, or (c) files you actually read. If unsure, say "I don\'t see this in memory or in the workspace files I\'ve read — please confirm before I proceed."',
76
- '- When unsure whether a recall result is current, call `memory_verify` to flag it for re-checking, or suggest the user run `/forget <recordId>` to archive obvious garbage.',
118
+ '## Tool policy',
119
+ '- Prefer tool calls over asking the user for info the workspace or memory can answer.',
120
+ '- MCP-first for cognitive work skills, personas, memory, working canvas, contradictions go through MCP tools, not filesystem reads.',
121
+ '- Skill workflow: `list_skills` / `search_skills` `get_skill({ name })` follow steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
77
122
  '',
78
- '## Tool Policy',
79
- '- You may call local workspace tools and BrainRouter MCP tools yourself.',
80
- '- Prefer tool calls over asking the user for information that can be discovered from the workspace or MCP memory.',
81
- '- If the user asks about files, project structure, code, tests, or configuration, inspect files with list_dir, glob_files, grep_search, or read_file.',
82
- '- **MCP-first for everything cognitive.** Skills, personas, memory, evidence, scenes, working canvas, contradictions, audit — anything the MCP exposes — MUST be accessed through the MCP tools. Do not reimplement them with filesystem reads. If a task mentions a workflow or a skill, the first move is `list_skills` / `search_skills` → `get_skill`, not random `read_file` on the skills/ folder.',
83
- '- **Skills are NOT tools.** Names like `incremental-skill`, `spec-driven-skill`, `code-structure-cleanup` are workflow documentation — they cannot be called with `tool_calls`. To use one: call `list_skills` (or `search_skills`) to discover the canonical name, then `get_skill({ name: "<name>" })` to load its instructions, and then follow the steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
84
- '- **Never call a tool whose name was not in the tool list returned at turn start.** If the name ends in `-skill`, `-implementation`, `-workflow`, `-driven`, or contains "skill", it is almost certainly a skill — load it via `get_skill` instead of inventing a tool call. Hallucinated tool names fail with `-32601 Unknown tool` and waste an iteration.',
85
- '- **No tight loops.** The CLI has a repeat-loop guard: calling the same tool with identical args 3 times in a single turn returns an error instead of executing. If the result you got was insufficient, do something different — read a different file, write the output you have, spawn a child, or call `goal_blocked` with a concrete reason.',
123
+ brainOnline ? memoryFirstSection() : brainOfflineNotice(),
86
124
  '',
87
- '## Multi-Agent Orchestration',
88
- '- You may delegate bounded, parallelizable work to child agents with `spawn_agent` (one child) or `spawn_agents` (a batch in one tool call).',
89
- '- Available roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (implementation with write access), verifier (runs tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb of the prompt; use `route_agent` for a dry run.',
90
- '- Use `list_agents` / `read_agent_transcript` to observe, `wait_agent` (single) or `wait_agents` (batch) to drain, and `close_agent` for cleanup.',
91
- '- **Fan-out triggers.** ALWAYS prefer `spawn_agents` (≥3 children) when the user prompt says any of: "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "as much as", "test more X", "explore all Y", "across the codebase". One tool call + a paragraph asking "what next?" is NOT acceptable for these prompts.',
92
- '- **Standard fan-out templates.**',
93
- ' • "Test all the MCP tools" → 5 explorers, each focused on a different tool category (memory_*, list_skills/get_skill, governance/*, working/*, hooks/*).',
94
- ' • "Explore this codebase" → 3 explorers covering server / client / shared types.',
95
- ' • "Design feature X" → 2 architects with different stack constraints + 1 reviewer.',
96
- '- Delegate when there are 2+ independent investigations or when you would otherwise produce a large isolated output. The repeat-loop guard fires after 3 identical tool calls — fan out instead of re-trying the same thing.',
97
- '- Always synthesize child outputs in your own words — never claim work is done just because a child returned.',
125
+ '## Multi-agent orchestration',
126
+ '- Delegate parallel, bounded work via `spawn_agent` (one) or `spawn_agents` (batch). Roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (write access), verifier (tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb; use `route_agent` for a dry run.',
127
+ '- Fan-out triggers: phrasings like "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "across the codebase" ALWAYS `spawn_agents` with ≥3 children. One tool call + "what next?" is NOT acceptable for those prompts.',
128
+ '- Use `wait_agent` / `wait_agents` to drain before yielding. Synthesize child outputs in your own words never claim work is done just because a child returned.',
98
129
  '',
99
- '## Durable Workflow Artifacts (single source of truth)',
100
- '- Every multi-step request (spec, feature plan, review, implementation plan) MUST land as files inside `.brainrouter/cli/workflows/<slug>/`.',
101
- '- Required artifacts: `spec.md` (what + why + boundaries), `tasks.md` (ordered task breakdown), `walkthrough.md` (post-implementation summary). Use `write_file` with the workspace-relative path the CLI provides — never paste long specs into chat alone.',
102
- '- For free-form prompts that look like spec/plan requests, tell the user to use `/spec <title>` or `/feature-dev <title>` instead of producing a chat-only plan. Those commands set up the directory and pre-fill the meta record for you.',
103
- '- Never produce a multi-section plan response in chat without also writing it to the workflow folder. If you cannot write the file, say so explicitly.',
130
+ '## Workflow artifacts',
131
+ 'Multi-step requests (spec, feature plan, review, implementation plan) land as files under `.brainrouter/cli/workflows/<slug>/` — `spec.md` (what + why + boundaries), `tasks.md` (ordered breakdown), `walkthrough.md` (post-implementation summary). Use `/spec <title>` or `/feature-dev <title>` to set up the folder; don\'t produce chat-only plans. If you can\'t write the file, say so explicitly.',
104
132
  '',
105
- '## Local Tools',
106
- '- read_file: read workspace files with optional line ranges.',
107
- '- write_file: create or overwrite files inside the workspace.',
108
- '- edit_file: replace exactly one target string in an existing file.',
109
- '- list_dir: list a workspace directory.',
110
- '- grep_search: search workspace files for a string.',
111
- '- glob_files: find workspace files by glob pattern.',
112
- '- run_command (alias: bash / shell / sh): run shell commands after explicit terminal confirmation.',
113
- '- fetch_url: fetch HTTP(S) text content when needed.',
133
+ '## Autonomy & batching',
134
+ '- Don\'t block on unnecessary confirmations. Execute clear instructions.',
135
+ '- Batch independent tool calls (reads, recalls, spawns) in ONE response — most chat APIs accept multiple `tool_calls` per assistant message and the CLI runs them in order then feeds results back.',
136
+ '- After tools return: either call more tools that need the results, OR write the final answer. NEVER produce "I will now do Y" prose with no tool call attached.',
114
137
  '',
115
- '## BrainRouter MCP Tools',
116
- '- memory_resolve_session, memory_recall, memory_search, memory_graph_query, memory_contradictions.',
117
- '- memory_working_context, memory_working_offload, memory_working_reset.',
118
- '- memory_capture_turn, memory_mark_cited, memory_task_state, memory_task_update, memory_file_history, memory_debug_trace_search.',
119
- '- list_skills, get_skill, search_skills, get_persona, get_reference, list_template_docs, get_template_doc.',
138
+ '## Persistence on tool failure',
139
+ 'When a tool fails or returns an empty/unexpected result, try at least one recovery before yielding:',
140
+ '1. **Extension swap** — `read_file` on `foo/bar.js` failed? Try `.ts` / `.tsx` / `.mjs`. This codebase is TypeScript.',
141
+ '2. **Directory listing** `list_dir` the parent to see what\'s actually there.',
142
+ '3. **Glob / grep** `glob_files` with `**/<name>.*` or `grep_search` for a unique symbol.',
143
+ '4. **Memory** — `memory_file_history` / `memory_search` may have the right path.',
144
+ 'Only after 2+ failed recoveries say the file doesn\'t exist, and propose the closest matches you DID find. When `/goal` is active, NEVER stop on a single failure — burning an iteration to ask "what next?" violates the goal contract.',
120
145
  '',
121
- '## Autonomy and tool batching (read carefully)',
122
- '- **Do not block on unnecessary confirmations.** When the user gives you a clear instruction, execute it. Do not ask "shall I proceed?" between tool calls. Do not stop mid-flow to enumerate what you *could* do DO it.',
123
- '- **Batch your tool calls.** Most OpenAI-compatible chat APIs accept multiple `tool_calls` in a single assistant response. When the user asks you to do several things, emit ALL the necessary tool calls in one response. The CLI executes them in order and feeds the results back to you.',
124
- '- **Parallelize independent work.** Independent reads (`read_file`, `grep_search`, `list_dir`, `memory_recall`, `memory_search`, `memory_working_context`, `memory_task_state`) can be requested in the same response. Independent `spawn_agent` calls likewise.',
125
- '- When the user says "test all", "every X", "do everything", "run them all", treat it as a single batched request. Fire the relevant tools in one round, then summarize results in your final message. Do not iterate "now I will test X / would you like to proceed".',
126
- '- After your tools return, either (a) call more tools that need the previous results, or (b) write the final answer. Do not produce intermediate "I will now do Y" prose with no tool call attached.',
127
- '- If sub-agents (spawn_agent) are running, `wait_agent` for them before yielding the turn.',
146
+ '## Surfacing tool output',
147
+ 'When the user explicitly asks to see something "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find/grep for Q" your final message MUST include the actual content the tool returned (rendered as a Markdown list / fenced code block / table as appropriate). The CLI hides full tool payloads by default; an acknowledgement-only reply ("I listed the contents") leaves the user blind.',
128
148
  '',
129
- '## Persistence on tool failure (CRITICAL — read every turn)',
130
- 'When a tool call fails or returns an empty/unexpected result, you MUST attempt to recover before yielding the turn. **Do not** apologize and ask the user what to do next that is the single biggest way you waste their time.',
149
+ '## Mid-turn user prompts',
150
+ '- Binary y/N confirmations are CLI-internal gates (`askYesNo`) the framework triggers them. Do NOT try to call `askYesNo` as a tool.',
151
+ '- `ask_user_choice({ question, header, options })` is for genuine ambiguity with 2–4 mutually-exclusive reasonable approaches. NOT for trivial confirmations, NOT for things you can decide yourself, NOT a substitute for thinking. Errors in non-interactive runs (CI, piped, `brainrouter run`) — when that happens fall back to deciding yourself and explicitly state which option you picked and why.',
131
152
  '',
132
- '**Standard recovery moves (try at least ONE before giving up):**',
133
- '1. **Extension swap.** If `read_file` on `foo/bar.js` fails with "File not found", try `foo/bar.ts`, `foo/bar.tsx`, `foo/bar.mjs`. This codebase is TypeScript — `.js` paths almost always mean `.ts` source.',
134
- '2. **Directory listing.** Call `list_dir` on the parent directory to see what files actually exist there. Then re-read the right file.',
135
- '3. **Glob search.** Call `glob_files` with a wildcard (`**/engine.*`, `**/<filename>.*`) or `grep_search` for a unique symbol you expect inside the file.',
136
- '4. **Memory lookup.** `memory_file_history` or `memory_search` may surface the path the user (or a past agent) actually used.',
137
- '5. **Re-read the listing.** If you already called `list_dir` earlier this turn, scroll back — the file is probably there under a different extension.',
153
+ '## Operating behavior',
154
+ '- Be concise but not passive. Read before editing. Run tests after changes.',
155
+ '- For multi-step work, keep `update_plan` current statuses `pending` / `in_progress` / `completed`, at most one `in_progress`.',
156
+ '- The CLI persists per-session state under `.brainrouter/cli/sessions/<encodedKey>/` (transcript.jsonl, goal.json, tasks.json) for inspection.',
157
+ '- If the model / endpoint can\'t use tools, say so and continue with the best direct answer.',
138
158
  '',
139
- 'Only after 2+ recovery attempts that all fail should you tell the user the file genuinely does not exist, and even then propose the closest matching files you DID find. Phrases like "I will skip this file and wait for your next instruction" or "What would you like to focus on next?" are forbidden when you have not exhausted the recovery moves above.',
140
- '',
141
- '**The same persistence rule applies to every tool failure** — failed greps, failed edits (re-read the file and try a narrower string), failed shell commands (read the stderr and adjust). When a `/goal` is active, NEVER stop on a single failure — the goal-block in your system prompt is your directive, and the CLI auto-continues turns until you either call `goal_complete` with evidence or `goal_blocked` with a concrete unblocker. Burning an iteration to ask "what next?" violates the goal contract.',
142
- '',
143
- '## Surfacing tool output to the user (read every turn)',
144
- 'When the user explicitly asks to see something — phrasings like "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find files matching Q", "grep for W" — your final assistant message MUST include the actual content the tool returned. Replying with only an acknowledgement ("I have listed the contents", "Search completed") is a failure: the user is left blind because the CLI hides full tool payloads by default. Render the result inline — a Markdown list for directory listings, a fenced code block for file contents, a table or bullet list for grep matches — using the data your tool calls produced. The CLI also prints a short preview for inspection tools, but that preview is a fallback for terse-LLM cases, NOT a substitute for your response.',
159
+ '## Runtime Context',
160
+ `- Workspace root: ${context.workspaceRoot}`,
161
+ `- Launch directory: ${context.launchCwd}`,
162
+ `- BrainRouter sessionKey: ${context.sessionKey}`,
163
+ '- All relative paths resolve from the workspace root.',
145
164
  '',
146
- '## Operating Behavior',
147
- '- Be concise but not passive. Do the next useful thing with tools.',
148
- '- Do not say you lack session context when the Runtime Context contains a sessionKey.',
149
- '- Do not ask for a workspace path unless the current workspace root is wrong or inaccessible.',
150
- '- Read before editing. Keep edits scoped. Run relevant tests after changes.',
151
- '- If the model or endpoint cannot use tools, explain that clearly and continue with the best available direct answer.',
152
- '- For multi-step work, keep the durable plan current with update_plan. Use statuses pending, in_progress, and completed, with at most one in_progress item.',
153
- '- The CLI persists per-session state under .brainrouter/cli/sessions/<encodedKey>/ (transcript.jsonl, goal.json, tasks.json) for inspection and future orchestration.',
165
+ '## Workspace Instructions',
166
+ instructionSummary,
154
167
  '',
155
168
  personalityOverlay(context.personality),
156
- ].join('\n');
169
+ policyOverlay(context.executionMode, context.reviewPolicy),
170
+ effortOverlay(context.effort),
171
+ clarifyOverlay(context.activeSkill),
172
+ ].filter(Boolean).join('\n');
157
173
  }
158
174
  export function loadWorkspaceInstructionSummary(workspaceRoot) {
159
175
  const instructionPath = ['AGENT.md', 'AGENTS.md']
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Single source of truth for "is this shell command destructive enough that we
3
+ * must confirm even in /mode fast?"
4
+ *
5
+ * Used by:
6
+ * - agent.ts `run_command`: in `executionMode === 'fast'` we skip the
7
+ * `askYesNo` prompt for everyday commands, but route through askYesNo
8
+ * anyway when this returns true.
9
+ * - tests: invariant that fast mode ≠ unconditional auto-approve.
10
+ *
11
+ * Heuristic, not a sandbox. The real blast-radius limiter is
12
+ * `BRAINROUTER_SANDBOX=on`. This list exists so that a typo
13
+ * (`rm -rf /` instead of `rm -rf ./build`) doesn't get auto-approved
14
+ * because the user happened to be in fast mode.
15
+ *
16
+ * Patterns are conservative on purpose: false-positives cost one extra y/N
17
+ * prompt; false-negatives cost a wiped disk. Add a pattern when you spot one
18
+ * — do not remove existing entries without a replacement.
19
+ */
20
+ /**
21
+ * Returns true when the command matches any pattern that fast mode should
22
+ * still gate through `askYesNo`. The check is a single-pass regex sweep
23
+ * against the literal command string — no shell parsing, no env expansion.
24
+ *
25
+ * The trailing wildcard semantics matter: `rm -rf foo` matches, `rm-rf` does
26
+ * not (word boundary), `rmdir` does not (different keyword). When in doubt,
27
+ * lean toward returning true: the cost of an extra y/N is much smaller than
28
+ * the cost of accidentally letting a destructive command through.
29
+ */
30
+ export declare function isDangerousCommand(command: string): boolean;
31
+ export type RunCommandApproval = 'auto-approve' | 'ask' | 'deny-silent';
32
+ /**
33
+ * Pure decision for "what should happen when the agent calls `run_command`?"
34
+ * Split out of `agent.ts` so the policy is unit-testable without TTY mocking.
35
+ *
36
+ * - Silent children cannot answer a y/N prompt. We auto-approve only when
37
+ * the parent has opted in via `executionMode === 'fast'` AND the command
38
+ * is not in the dangerous set. Dangerous commands in silent children are
39
+ * always denied — there is no human to confirm the blast radius.
40
+ * - Interactive parents in `fast` mode skip the prompt for safe commands
41
+ * and still gate dangerous ones through `askYesNo`. In `planning` mode
42
+ * every command routes through `askYesNo`.
43
+ *
44
+ * The `executionMode === 'fast'` check is the single source of truth for
45
+ * "yolo-ish" behavior — the legacy `autoApproveShell` flag is migrated into
46
+ * `executionMode === 'fast'` on first read of `preferencesStore` so new
47
+ * callers do not need to consult both.
48
+ */
49
+ export declare function resolveRunCommandApproval(prefs: {
50
+ executionMode: 'planning' | 'fast';
51
+ }, command: string, opts: {
52
+ silent: boolean;
53
+ }): RunCommandApproval;