@kinqs/brainrouter-cli 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +29 -52
  2. package/agents/architect.json +18 -0
  3. package/agents/explorer.json +18 -0
  4. package/agents/reviewer.json +18 -0
  5. package/agents/verifier.json +18 -0
  6. package/agents/worker.json +18 -0
  7. package/bin/cli.cjs +71 -0
  8. package/dist/agent/agent.d.ts +224 -3
  9. package/dist/agent/agent.js +561 -55
  10. package/dist/cli/banner.d.ts +80 -0
  11. package/dist/cli/banner.js +232 -0
  12. package/dist/cli/cliPrompt.d.ts +106 -0
  13. package/dist/cli/cliPrompt.js +314 -0
  14. package/dist/cli/commands/_context.d.ts +3 -1
  15. package/dist/cli/commands/_helpers.d.ts +1 -1
  16. package/dist/cli/commands/_helpers.js +6 -6
  17. package/dist/cli/commands/config.d.ts +46 -0
  18. package/dist/cli/commands/config.js +1042 -0
  19. package/dist/cli/commands/guard.js +75 -10
  20. package/dist/cli/commands/init.d.ts +20 -0
  21. package/dist/cli/commands/init.js +64 -0
  22. package/dist/cli/commands/login.d.ts +13 -0
  23. package/dist/cli/commands/login.js +179 -0
  24. package/dist/cli/commands/mcp.d.ts +19 -0
  25. package/dist/cli/commands/mcp.js +286 -0
  26. package/dist/cli/commands/memory.js +2 -2
  27. package/dist/cli/commands/obs.js +22 -22
  28. package/dist/cli/commands/orchestration.js +18 -0
  29. package/dist/cli/commands/session.js +13 -5
  30. package/dist/cli/commands/ui.js +202 -91
  31. package/dist/cli/commands/workflow.d.ts +20 -0
  32. package/dist/cli/commands/workflow.js +368 -51
  33. package/dist/cli/ink/ChatApp.d.ts +206 -0
  34. package/dist/cli/ink/ChatApp.js +493 -0
  35. package/dist/cli/ink/Frame.d.ts +26 -0
  36. package/dist/cli/ink/Frame.js +5 -0
  37. package/dist/cli/ink/Picker.d.ts +65 -0
  38. package/dist/cli/ink/Picker.js +133 -0
  39. package/dist/cli/ink/SlashPalette.d.ts +51 -0
  40. package/dist/cli/ink/SlashPalette.js +136 -0
  41. package/dist/cli/ink/TextField.d.ts +34 -0
  42. package/dist/cli/ink/TextField.js +47 -0
  43. package/dist/cli/ink/WizardApp.d.ts +7 -0
  44. package/dist/cli/ink/WizardApp.js +422 -0
  45. package/dist/cli/ink/ambientChat.d.ts +34 -0
  46. package/dist/cli/ink/ambientChat.js +7 -0
  47. package/dist/cli/ink/consoleCapture.d.ts +11 -0
  48. package/dist/cli/ink/consoleCapture.js +33 -0
  49. package/dist/cli/ink/markdownRender.d.ts +41 -0
  50. package/dist/cli/ink/markdownRender.js +278 -0
  51. package/dist/cli/ink/renderWithResizeClear.d.ts +14 -0
  52. package/dist/cli/ink/renderWithResizeClear.js +33 -0
  53. package/dist/cli/ink/runChat.d.ts +34 -0
  54. package/dist/cli/ink/runChat.js +571 -0
  55. package/dist/cli/ink/runPicker.d.ts +31 -0
  56. package/dist/cli/ink/runPicker.js +139 -0
  57. package/dist/cli/ink/runSlashPalette.d.ts +23 -0
  58. package/dist/cli/ink/runSlashPalette.js +33 -0
  59. package/dist/cli/ink/runWizard.d.ts +22 -0
  60. package/dist/cli/ink/runWizard.js +133 -0
  61. package/dist/cli/ink/stdinHandoff.d.ts +51 -0
  62. package/dist/cli/ink/stdinHandoff.js +78 -0
  63. package/dist/cli/ink/toolFormat.d.ts +73 -0
  64. package/dist/cli/ink/toolFormat.js +180 -0
  65. package/dist/cli/ink/useTerminalSize.d.ts +35 -0
  66. package/dist/cli/ink/useTerminalSize.js +26 -0
  67. package/dist/cli/repl.d.ts +25 -3
  68. package/dist/cli/repl.js +64 -646
  69. package/dist/cli/slashSuggest.d.ts +32 -0
  70. package/dist/cli/slashSuggest.js +146 -0
  71. package/dist/cli/spinner.d.ts +34 -0
  72. package/dist/cli/spinner.js +36 -0
  73. package/dist/cli/statusline.d.ts +67 -0
  74. package/dist/cli/statusline.js +204 -0
  75. package/dist/cli/theme.d.ts +79 -0
  76. package/dist/cli/theme.js +106 -0
  77. package/dist/cli/whereView.d.ts +81 -0
  78. package/dist/cli/whereView.js +245 -0
  79. package/dist/cli/wizard/modelsApi.d.ts +72 -0
  80. package/dist/cli/wizard/modelsApi.js +166 -0
  81. package/dist/cli/wizard/picker.d.ts +202 -0
  82. package/dist/cli/wizard/picker.js +547 -0
  83. package/dist/cli/wizard/providers.d.ts +86 -0
  84. package/dist/cli/wizard/providers.js +190 -0
  85. package/dist/cli/wizard/runner.d.ts +13 -0
  86. package/dist/cli/wizard/runner.js +488 -0
  87. package/dist/cli/wizard/types.d.ts +122 -0
  88. package/dist/cli/wizard/types.js +109 -0
  89. package/dist/config/config.d.ts +52 -0
  90. package/dist/config/config.js +89 -75
  91. package/dist/index.js +215 -206
  92. package/dist/memory/briefing.d.ts +11 -1
  93. package/dist/memory/briefing.js +69 -1
  94. package/dist/memory/consolidation.d.ts +1 -1
  95. package/dist/orchestration/agentRegistry.d.ts +36 -0
  96. package/dist/orchestration/agentRegistry.js +64 -0
  97. package/dist/orchestration/orchestrator.d.ts +7 -0
  98. package/dist/orchestration/orchestrator.js +2 -0
  99. package/dist/orchestration/tools.d.ts +10 -1
  100. package/dist/orchestration/tools.js +48 -4
  101. package/dist/prompt/breadthHint.d.ts +5 -0
  102. package/dist/prompt/breadthHint.js +44 -0
  103. package/dist/prompt/skillCatalog.d.ts +11 -0
  104. package/dist/prompt/skillCatalog.js +134 -0
  105. package/dist/prompt/skillRunner.d.ts +2 -2
  106. package/dist/prompt/skillRunner.js +2 -31
  107. package/dist/prompt/systemPrompt.d.ts +34 -0
  108. package/dist/prompt/systemPrompt.js +128 -108
  109. package/dist/runtime/dangerousCommand.d.ts +53 -0
  110. package/dist/runtime/dangerousCommand.js +105 -0
  111. package/dist/runtime/mcpClient.d.ts +38 -1
  112. package/dist/runtime/mcpClient.js +104 -13
  113. package/dist/runtime/mcpPool.d.ts +162 -0
  114. package/dist/runtime/mcpPool.js +423 -0
  115. package/dist/runtime/mcpUtils.d.ts +3 -1
  116. package/dist/state/goalStore.d.ts +98 -17
  117. package/dist/state/goalStore.js +132 -42
  118. package/dist/state/preferencesStore.d.ts +67 -3
  119. package/dist/state/preferencesStore.js +84 -1
  120. package/dist/state/workflowArtifacts.d.ts +63 -2
  121. package/dist/state/workflowArtifacts.js +120 -8
  122. package/dist/tests/_helpers.d.ts +31 -0
  123. package/dist/tests/_helpers.js +91 -0
  124. package/package.json +12 -5
  125. package/.env.example +0 -109
@@ -27,133 +27,153 @@ function personalityOverlay(style) {
27
27
  return '';
28
28
  }
29
29
  }
30
+ function policyOverlay(executionMode, reviewPolicy) {
31
+ const lines = [];
32
+ if (executionMode === 'fast') {
33
+ lines.push('- Execution mode is `fast`: skip the "may I run this?" prose for safe shell calls and just issue the tool. The CLI still gates dangerous commands (`rm -rf`, `sudo`, force-push, …) with a y/N regardless of mode.');
34
+ }
35
+ if (reviewPolicy === 'proceed') {
36
+ lines.push('- Review policy is `proceed`: apply multi-file plans and report after — no "ready for your approval?" pause. `/approve` is still the user\'s explicit lever.');
37
+ }
38
+ if (lines.length === 0)
39
+ return '';
40
+ return ['## Session policy overrides', ...lines].join('\n');
41
+ }
42
+ function effortOverlay(effort) {
43
+ if (effort === 'low') {
44
+ return [
45
+ '## Reasoning depth: low',
46
+ '- Be terse. Skip ceremony. One-paragraph answers when the question fits in one paragraph.',
47
+ ].join('\n');
48
+ }
49
+ if (effort === 'high') {
50
+ return [
51
+ '## Reasoning depth: high',
52
+ '- Reason step-by-step before acting. Audit your evidence against the goal before each tool call.',
53
+ ].join('\n');
54
+ }
55
+ return '';
56
+ }
57
+ function clarifyOverlay(activeSkill) {
58
+ if (activeSkill !== 'grill-me')
59
+ return '';
60
+ return [
61
+ '## CLARIFY mode (grill-me)',
62
+ '- Do NOT make file edits, run shell commands, or spawn worker agents this turn.',
63
+ '- Ask 2–5 questions to disambiguate scope, format, and unstated assumptions.',
64
+ '- Prefer `ask_user_choice` for mutually-exclusive options; plain prose for free-form input.',
65
+ '- (`askYesNo` is a CLI-internal gate the framework triggers — do NOT try to call it as a tool.)',
66
+ '- End with a one-paragraph "what I\'ll do once you answer" so the user can sanity-check the read.',
67
+ ].join('\n');
68
+ }
69
+ /**
70
+ * 0.3.6 item 10b: emit the BrainRouter-MCP-specific guidance ONLY when the
71
+ * brain is actually reachable. The detection signal is the presence of
72
+ * `memory_recall` in `connectedMcpTools` (the canonical BrainRouter
73
+ * signature tool). When undefined (older callers) we keep today's behaviour
74
+ * and assume the brain is online — so the prompt doesn't suddenly omit
75
+ * memory guidance for callers that haven't been updated yet.
76
+ */
77
+ function isBrainOnline(connectedTools) {
78
+ if (!connectedTools)
79
+ return true;
80
+ // Match bare `memory_recall`, double-underscore `mcp__<server>__memory_recall`,
81
+ // and single-underscore `mcp_<server>_memory_recall` (both prefix conventions
82
+ // are in use across the multi-MCP codepaths until naming is unified).
83
+ return connectedTools.some((tool) => tool === 'memory_recall' ||
84
+ (tool.startsWith('mcp_') && tool.endsWith('memory_recall')));
85
+ }
86
+ function brainOfflineNotice() {
87
+ return [
88
+ '## ⚠️ BrainRouter MCP is OFFLINE this turn',
89
+ '- Long-term memory, skill lookup, and the recall briefing are unavailable.',
90
+ '- Do NOT call any BrainRouter memory or skill tools — they will fail with "MCP server is not connected". The turn-start tool list reflects this; only tools that appear there are callable.',
91
+ '- If the user asks about past sessions, prior decisions, or skill-based workflows, tell them the brain is offline and recommend `/mcp reconnect`.',
92
+ '- Operate against the workspace files directly using local tools (`read_file`, `glob_files`, `grep_search`, `run_command`).',
93
+ ].join('\n');
94
+ }
95
+ function memoryFirstSection() {
96
+ return [
97
+ '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
98
+ 'BrainRouter is a cognitive memory engine first. Treat memory as a primary tool.',
99
+ '- A `## BrainRouter Memory Briefing` system message is auto-injected with recalled memories, persona, and recent context. Read it before reasoning. When thin/empty, call `memory_search` / `memory_recall` yourself — do not assume the user is new.',
100
+ '- For non-trivial work, call `memory_recall` with sessionKey + the request as the query. When you pivot mid-turn or need deeper signal, re-call: `memory_file_history` for file-specific past changes, `memory_graph_query` for related entities (2-hop), `memory_explain_recall` for ranking signals, `memory_failed_attempts` for prior dead-ends. Call `memory_resolve_session` first when you don\'t yet have a sessionKey.',
101
+ '- Quote record IDs inline like `[rec_xxx]` so the user sees what you used.',
102
+ '- For payloads >~1,000 tokens, call `memory_working_offload` and reference back by its ref-node id instead of pasting again.',
103
+ '- **Capture the WHY.** After every non-trivial tool batch (≥3 tool calls OR a single tool that returned >2KB), call `memory_working_offload` ONCE with `kind: "reasoning"`, `title: "Why: <short>"`, and a 1-paragraph DECISION summary. Payload offload is about token budget; reasoning offload is the audit trail the next turn\'s briefing surfaces back.',
104
+ '',
105
+ '**Anti-hallucination.** Don\'t generalize recall results — quote or paraphrase tightly, always with `[recordId]`. Don\'t invent project facts not in the briefing, a recall result, or a file you read. Never say "I do not have information about your current projects" if the briefing is non-empty or before running `memory_recall`. If a recalled fact looks stale or off-project (e.g. recall says "Vue.js + Go" but the workspace is TypeScript-only), flag it: "Recalled [rec_xxx] looks inconsistent — archive via `memory_update`?"',
106
+ ].join('\n');
107
+ }
30
108
  export function buildSystemPrompt(context) {
31
109
  const instructionSummary = context.instructionSummary?.trim()
32
110
  ? context.instructionSummary.trim()
33
111
  : 'No workspace AGENT.md or AGENTS.md instruction file was found.';
112
+ const brainOnline = isBrainOnline(context.connectedMcpTools);
113
+ // Order matters for prompt-cache hits (item 9c): identity + tool-mechanics
114
+ // baseline stay first because they never change turn-to-turn; the workspace
115
+ // block + per-call overlays sit at the tail so dynamic content lands last.
34
116
  return [
35
- 'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal.',
36
- 'Your edge over generic coding agents is being direct, tool-driven, memory-aware, and workspace-aware — every turn should reflect that.',
117
+ 'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal. Direct, tool-driven, memory-aware, workspace-aware.',
37
118
  '',
38
- '## Runtime Context',
39
- `- Workspace root: ${context.workspaceRoot}`,
40
- `- Launch directory: ${context.launchCwd}`,
41
- `- BrainRouter sessionKey: ${context.sessionKey}`,
42
- '- All relative file paths are resolved from the workspace root, not from the CLI installation directory.',
43
- '- If the user asks about "the session", answer with the current BrainRouter sessionKey and workspace root.',
44
- '',
45
- '## Workspace Instructions',
46
- instructionSummary,
47
- '',
48
- '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
49
- 'BrainRouter is a cognitive memory engine first and a coding agent second. Treat memory as a primary tool, not an afterthought. The user pays for this routing — you must use it.',
50
- '',
51
- '### Before doing the work',
52
- '- The CLI already injects a "## BrainRouter Memory Briefing" system message with recalled cognitive memories, persona, focus scenes, and recent context. READ it before you reason. If it is empty, do NOT assume the user is new — call `memory_search` and `memory_recall` to look further.',
53
- '- For ANY non-trivial request, call `memory_recall` with the current sessionKey AND the user request as the query. Look for `recordId` values you can cite later.',
54
- '- If the request mentions a specific file, also call `memory_file_history` with that path — past changes and known issues live there.',
55
- '- If the request mentions a domain/feature concept, call `memory_graph_query` with the entity name to find related memories across the knowledge graph (2-hop default).',
56
- '- When you don\'t have a sessionKey yet, call `memory_resolve_session` with the workspacePath.',
57
- '',
58
- '### During the work',
59
- '- Surface the record IDs you are relying on. Quote them inline like `[rec_xxx]` so the user sees what you used.',
60
- '- For long-running tasks, call `memory_task_state` to check whether this work was started before and `memory_task_update` to record progress (blockers, decisions, next actions).',
61
- '- If you produce a payload over ~1,000 tokens (analysis, diff, large summary), call `memory_working_offload` and refer back to it by its ref node id instead of pasting again.',
62
- '- The briefing only fires ONCE at turn start with the prompt as the query. **Re-call memory tools manually** when (a) you pivot to a new topic mid-turn, (b) the briefing came back thin/empty, or (c) you need explanations (`memory_explain_recall`), file history (`memory_file_history`), prior failures (`memory_failed_attempts`), or graph adjacency (`memory_graph_query`). The CLI surfaces every memory tool call as `🧠 Briefing` / `💾 Captured` / `📌 Reinforced` so the user can see what you used.',
63
- '',
64
- '### After the work',
65
- '- The CLI auto-runs `memory_mark_cited` with the records you actually used (detected by content match against your final answer) and `memory_capture_turn`. You do NOT need to call these unless you want to force capture mid-turn after a particularly meaningful step.',
66
- '',
67
- '### Never do',
68
- '- Never say "I do not have information about your current projects" if the briefing is non-empty or if you have not first run `memory_search` / `memory_recall` for the question.',
69
- '- Never re-discover something that already lives in memory. Recall first, then read files.',
70
- '- Never cite a recordId that did not appear in the briefing or in a recall result you ran.',
119
+ '## Tool-call mechanics',
120
+ 'Tool calls live in the structured `tool_calls` field of your assistant message, NOT in prose. Writing `goal_complete({...})` or any other tool name as text/markdown/code-fence does NOTHING — the framework only sees `tool_calls`. The same applies to every tool (`read_file`, `update_plan`, `spawn_agent`, `goal_blocked`, `memory_*`, …). Never call a tool name that wasn\'t in the turn-start tool list. Skills (names ending in `-skill` / `-workflow` / `-driven`) are documentation, not tools — load via `get_skill`, never `tool_calls`. The CLI has a repeat-loop guard: 3 identical (tool, args) calls in one turn returns an error instead of executing.',
71
121
  '',
72
- '### Anti-hallucination rules when summarizing recall (critical)',
73
- '- When recall returns memories, do NOT generalize. Quote the content verbatim or paraphrase to within a few words. Always include the recordId in `[brackets]`.',
74
- '- Memory records can be STALE or from a DIFFERENT project. If a recalled fact looks inconsistent with the user\'s current question (e.g. recall says "Vue.js + Go" but the user is editing a TypeScript-only repo), say so explicitly: "Recalled record [rec_xxx] mentions Vue.js + Go — this looks inconsistent with the current workspace. Should I archive it via `memory_update`?"',
75
- '- Do not invent project facts that aren\'t in either (a) the briefing, (b) a recall/search result you just ran, or (c) files you actually read. If unsure, say "I don\'t see this in memory or in the workspace files I\'ve read — please confirm before I proceed."',
76
- '- When unsure whether a recall result is current, call `memory_verify` to flag it for re-checking, or suggest the user run `/forget <recordId>` to archive obvious garbage.',
122
+ '## Tool policy',
123
+ '- Prefer tool calls over asking the user for info the workspace or memory can answer.',
124
+ '- MCP-first for cognitive work skills, personas, memory, working canvas, contradictions go through MCP tools, not filesystem reads.',
125
+ '- Skill workflow: `list_skills` / `search_skills` `get_skill({ name })` follow steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
77
126
  '',
78
- '## Tool Policy',
79
- '- You may call local workspace tools and BrainRouter MCP tools yourself.',
80
- '- Prefer tool calls over asking the user for information that can be discovered from the workspace or MCP memory.',
81
- '- If the user asks about files, project structure, code, tests, or configuration, inspect files with list_dir, glob_files, grep_search, or read_file.',
82
- '- **MCP-first for everything cognitive.** Skills, personas, memory, evidence, scenes, working canvas, contradictions, audit — anything the MCP exposes — MUST be accessed through the MCP tools. Do not reimplement them with filesystem reads. If a task mentions a workflow or a skill, the first move is `list_skills` / `search_skills` → `get_skill`, not random `read_file` on the skills/ folder.',
83
- '- **Skills are NOT tools.** Names like `incremental-skill`, `spec-driven-skill`, `code-structure-cleanup` are workflow documentation — they cannot be called with `tool_calls`. To use one: call `list_skills` (or `search_skills`) to discover the canonical name, then `get_skill({ name: "<name>" })` to load its instructions, and then follow the steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
84
- '- **Never call a tool whose name was not in the tool list returned at turn start.** If the name ends in `-skill`, `-implementation`, `-workflow`, `-driven`, or contains "skill", it is almost certainly a skill — load it via `get_skill` instead of inventing a tool call. Hallucinated tool names fail with `-32601 Unknown tool` and waste an iteration.',
85
- '- **No tight loops.** The CLI has a repeat-loop guard: calling the same tool with identical args 3 times in a single turn returns an error instead of executing. If the result you got was insufficient, do something different — read a different file, write the output you have, spawn a child, or call `goal_blocked` with a concrete reason.',
127
+ brainOnline ? memoryFirstSection() : brainOfflineNotice(),
86
128
  '',
87
- '## Multi-Agent Orchestration',
88
- '- You may delegate bounded, parallelizable work to child agents with `spawn_agent` (one child) or `spawn_agents` (a batch in one tool call).',
89
- '- Available roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (implementation with write access), verifier (runs tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb of the prompt; use `route_agent` for a dry run.',
90
- '- Use `list_agents` / `read_agent_transcript` to observe, `wait_agent` (single) or `wait_agents` (batch) to drain, and `close_agent` for cleanup.',
91
- '- **Fan-out triggers.** ALWAYS prefer `spawn_agents` (≥3 children) when the user prompt says any of: "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "as much as", "test more X", "explore all Y", "across the codebase". One tool call + a paragraph asking "what next?" is NOT acceptable for these prompts.',
92
- '- **Standard fan-out templates.**',
93
- ' • "Test all the MCP tools" → 5 explorers, each focused on a different tool category (memory_*, list_skills/get_skill, governance/*, working/*, hooks/*).',
94
- ' • "Explore this codebase" → 3 explorers covering server / client / shared types.',
95
- ' • "Design feature X" → 2 architects with different stack constraints + 1 reviewer.',
96
- '- Delegate when there are 2+ independent investigations or when you would otherwise produce a large isolated output. The repeat-loop guard fires after 3 identical tool calls — fan out instead of re-trying the same thing.',
97
- '- Always synthesize child outputs in your own words — never claim work is done just because a child returned.',
129
+ '## Multi-agent orchestration',
130
+ '- Delegate parallel, bounded work via `spawn_agent` (one) or `spawn_agents` (batch). Roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (write access), verifier (tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb; use `route_agent` for a dry run.',
131
+ '- Fan-out triggers: phrasings like "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "across the codebase" ALWAYS `spawn_agents` with ≥3 children. One tool call + "what next?" is NOT acceptable for those prompts.',
132
+ '- Use `wait_agent` / `wait_agents` to drain before yielding. Synthesize child outputs in your own words never claim work is done just because a child returned.',
98
133
  '',
99
- '## Durable Workflow Artifacts (single source of truth)',
100
- '- Every multi-step request (spec, feature plan, review, implementation plan) MUST land as files inside `.brainrouter/cli/workflows/<slug>/`.',
101
- '- Required artifacts: `spec.md` (what + why + boundaries), `tasks.md` (ordered task breakdown), `walkthrough.md` (post-implementation summary). Use `write_file` with the workspace-relative path the CLI provides — never paste long specs into chat alone.',
102
- '- For free-form prompts that look like spec/plan requests, tell the user to use `/spec <title>` or `/feature-dev <title>` instead of producing a chat-only plan. Those commands set up the directory and pre-fill the meta record for you.',
103
- '- Never produce a multi-section plan response in chat without also writing it to the workflow folder. If you cannot write the file, say so explicitly.',
134
+ '## Workflow artifacts',
135
+ 'Multi-step requests (spec, feature plan, review, implementation plan) land as files under `.brainrouter/cli/workflows/<slug>/` — `spec.md` (what + why + boundaries), `tasks.md` (ordered breakdown), `walkthrough.md` (post-implementation summary). Use `/spec <title>` or `/feature-dev <title>` to set up the folder; don\'t produce chat-only plans. If you can\'t write the file, say so explicitly.',
104
136
  '',
105
- '## Local Tools',
106
- '- read_file: read workspace files with optional line ranges.',
107
- '- write_file: create or overwrite files inside the workspace.',
108
- '- edit_file: replace exactly one target string in an existing file.',
109
- '- list_dir: list a workspace directory.',
110
- '- grep_search: search workspace files for a string.',
111
- '- glob_files: find workspace files by glob pattern.',
112
- '- run_command (alias: bash / shell / sh): run shell commands after explicit terminal confirmation.',
113
- '- fetch_url: fetch HTTP(S) text content when needed.',
137
+ '## Autonomy & batching',
138
+ '- Don\'t block on unnecessary confirmations. Execute clear instructions.',
139
+ '- Batch independent tool calls (reads, recalls, spawns) in ONE response — most chat APIs accept multiple `tool_calls` per assistant message and the CLI runs them in order then feeds results back.',
140
+ '- After tools return: either call more tools that need the results, OR write the final answer. NEVER produce "I will now do Y" prose with no tool call attached.',
114
141
  '',
115
- '## BrainRouter MCP Tools',
116
- '- memory_resolve_session, memory_recall, memory_search, memory_graph_query, memory_contradictions.',
117
- '- memory_working_context, memory_working_offload, memory_working_reset.',
118
- '- memory_capture_turn, memory_mark_cited, memory_task_state, memory_task_update, memory_file_history, memory_debug_trace_search.',
119
- '- list_skills, get_skill, search_skills, get_persona, get_reference, list_template_docs, get_template_doc.',
142
+ '## Persistence on tool failure',
143
+ 'When a tool fails or returns an empty/unexpected result, try at least one recovery before yielding:',
144
+ '1. **Extension swap** — `read_file` on `foo/bar.js` failed? Try `.ts` / `.tsx` / `.mjs`. This codebase is TypeScript.',
145
+ '2. **Directory listing** `list_dir` the parent to see what\'s actually there.',
146
+ '3. **Glob / grep** `glob_files` with `**/<name>.*` or `grep_search` for a unique symbol.',
147
+ '4. **Memory** — `memory_file_history` / `memory_search` may have the right path.',
148
+ 'Only after 2+ failed recoveries say the file doesn\'t exist, and propose the closest matches you DID find. When `/goal` is active, NEVER stop on a single failure — burning an iteration to ask "what next?" violates the goal contract.',
120
149
  '',
121
- '## Autonomy and tool batching (read carefully)',
122
- '- **Do not block on unnecessary confirmations.** When the user gives you a clear instruction, execute it. Do not ask "shall I proceed?" between tool calls. Do not stop mid-flow to enumerate what you *could* do DO it.',
123
- '- **Batch your tool calls.** Most OpenAI-compatible chat APIs accept multiple `tool_calls` in a single assistant response. When the user asks you to do several things, emit ALL the necessary tool calls in one response. The CLI executes them in order and feeds the results back to you.',
124
- '- **Parallelize independent work.** Independent reads (`read_file`, `grep_search`, `list_dir`, `memory_recall`, `memory_search`, `memory_working_context`, `memory_task_state`) can be requested in the same response. Independent `spawn_agent` calls likewise.',
125
- '- When the user says "test all", "every X", "do everything", "run them all", treat it as a single batched request. Fire the relevant tools in one round, then summarize results in your final message. Do not iterate "now I will test X / would you like to proceed".',
126
- '- After your tools return, either (a) call more tools that need the previous results, or (b) write the final answer. Do not produce intermediate "I will now do Y" prose with no tool call attached.',
127
- '- If sub-agents (spawn_agent) are running, `wait_agent` for them before yielding the turn.',
150
+ '## Surfacing tool output',
151
+ 'When the user explicitly asks to see something "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find/grep for Q" your final message MUST include the actual content the tool returned (rendered as a Markdown list / fenced code block / table as appropriate). The CLI hides full tool payloads by default; an acknowledgement-only reply ("I listed the contents") leaves the user blind.',
128
152
  '',
129
- '## Persistence on tool failure (CRITICAL — read every turn)',
130
- 'When a tool call fails or returns an empty/unexpected result, you MUST attempt to recover before yielding the turn. **Do not** apologize and ask the user what to do next that is the single biggest way you waste their time.',
153
+ '## Mid-turn user prompts',
154
+ '- Binary y/N confirmations are CLI-internal gates (`askYesNo`) the framework triggers them. Do NOT try to call `askYesNo` as a tool.',
155
+ '- `ask_user_choice({ question, header, options })` is for genuine ambiguity with 2–4 mutually-exclusive reasonable approaches. NOT for trivial confirmations, NOT for things you can decide yourself, NOT a substitute for thinking. Errors in non-interactive runs (CI, piped, `brainrouter run`) — when that happens fall back to deciding yourself and explicitly state which option you picked and why.',
131
156
  '',
132
- '**Standard recovery moves (try at least ONE before giving up):**',
133
- '1. **Extension swap.** If `read_file` on `foo/bar.js` fails with "File not found", try `foo/bar.ts`, `foo/bar.tsx`, `foo/bar.mjs`. This codebase is TypeScript — `.js` paths almost always mean `.ts` source.',
134
- '2. **Directory listing.** Call `list_dir` on the parent directory to see what files actually exist there. Then re-read the right file.',
135
- '3. **Glob search.** Call `glob_files` with a wildcard (`**/engine.*`, `**/<filename>.*`) or `grep_search` for a unique symbol you expect inside the file.',
136
- '4. **Memory lookup.** `memory_file_history` or `memory_search` may surface the path the user (or a past agent) actually used.',
137
- '5. **Re-read the listing.** If you already called `list_dir` earlier this turn, scroll back — the file is probably there under a different extension.',
157
+ '## Operating behavior',
158
+ '- Be concise but not passive. Read before editing. Run tests after changes.',
159
+ '- For multi-step work, keep `update_plan` current statuses `pending` / `in_progress` / `completed`, at most one `in_progress`.',
160
+ '- The CLI persists per-session state under `.brainrouter/cli/sessions/<encodedKey>/` (transcript.jsonl, goal.json, tasks.json) for inspection.',
161
+ '- If the model / endpoint can\'t use tools, say so and continue with the best direct answer.',
138
162
  '',
139
- 'Only after 2+ recovery attempts that all fail should you tell the user the file genuinely does not exist, and even then propose the closest matching files you DID find. Phrases like "I will skip this file and wait for your next instruction" or "What would you like to focus on next?" are forbidden when you have not exhausted the recovery moves above.',
140
- '',
141
- '**The same persistence rule applies to every tool failure** — failed greps, failed edits (re-read the file and try a narrower string), failed shell commands (read the stderr and adjust). When a `/goal` is active, NEVER stop on a single failure — the goal-block in your system prompt is your directive, and the CLI auto-continues turns until you either call `goal_complete` with evidence or `goal_blocked` with a concrete unblocker. Burning an iteration to ask "what next?" violates the goal contract.',
142
- '',
143
- '## Surfacing tool output to the user (read every turn)',
144
- 'When the user explicitly asks to see something — phrasings like "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find files matching Q", "grep for W" — your final assistant message MUST include the actual content the tool returned. Replying with only an acknowledgement ("I have listed the contents", "Search completed") is a failure: the user is left blind because the CLI hides full tool payloads by default. Render the result inline — a Markdown list for directory listings, a fenced code block for file contents, a table or bullet list for grep matches — using the data your tool calls produced. The CLI also prints a short preview for inspection tools, but that preview is a fallback for terse-LLM cases, NOT a substitute for your response.',
163
+ '## Runtime Context',
164
+ `- Workspace root: ${context.workspaceRoot}`,
165
+ `- Launch directory: ${context.launchCwd}`,
166
+ `- BrainRouter sessionKey: ${context.sessionKey}`,
167
+ '- All relative paths resolve from the workspace root.',
145
168
  '',
146
- '## Operating Behavior',
147
- '- Be concise but not passive. Do the next useful thing with tools.',
148
- '- Do not say you lack session context when the Runtime Context contains a sessionKey.',
149
- '- Do not ask for a workspace path unless the current workspace root is wrong or inaccessible.',
150
- '- Read before editing. Keep edits scoped. Run relevant tests after changes.',
151
- '- If the model or endpoint cannot use tools, explain that clearly and continue with the best available direct answer.',
152
- '- For multi-step work, keep the durable plan current with update_plan. Use statuses pending, in_progress, and completed, with at most one in_progress item.',
153
- '- The CLI persists per-session state under .brainrouter/cli/sessions/<encodedKey>/ (transcript.jsonl, goal.json, tasks.json) for inspection and future orchestration.',
169
+ '## Workspace Instructions',
170
+ instructionSummary,
154
171
  '',
155
172
  personalityOverlay(context.personality),
156
- ].join('\n');
173
+ policyOverlay(context.executionMode, context.reviewPolicy),
174
+ effortOverlay(context.effort),
175
+ clarifyOverlay(context.activeSkill),
176
+ ].filter(Boolean).join('\n');
157
177
  }
158
178
  export function loadWorkspaceInstructionSummary(workspaceRoot) {
159
179
  const instructionPath = ['AGENT.md', 'AGENTS.md']
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Single source of truth for "is this shell command destructive enough that we
3
+ * must confirm even in /mode fast?"
4
+ *
5
+ * Used by:
6
+ * - agent.ts `run_command`: in `executionMode === 'fast'` we skip the
7
+ * `askYesNo` prompt for everyday commands, but route through askYesNo
8
+ * anyway when this returns true.
9
+ * - tests: invariant that fast mode ≠ unconditional auto-approve.
10
+ *
11
+ * Heuristic, not a sandbox. The real blast-radius limiter is
12
+ * `BRAINROUTER_SANDBOX=on`. This list exists so that a typo
13
+ * (`rm -rf /` instead of `rm -rf ./build`) doesn't get auto-approved
14
+ * because the user happened to be in fast mode.
15
+ *
16
+ * Patterns are conservative on purpose: false-positives cost one extra y/N
17
+ * prompt; false-negatives cost a wiped disk. Add a pattern when you spot one
18
+ * — do not remove existing entries without a replacement.
19
+ */
20
+ /**
21
+ * Returns true when the command matches any pattern that fast mode should
22
+ * still gate through `askYesNo`. The check is a single-pass regex sweep
23
+ * against the literal command string — no shell parsing, no env expansion.
24
+ *
25
+ * The trailing wildcard semantics matter: `rm -rf foo` matches, `rm-rf` does
26
+ * not (word boundary), `rmdir` does not (different keyword). When in doubt,
27
+ * lean toward returning true: the cost of an extra y/N is much smaller than
28
+ * the cost of accidentally letting a destructive command through.
29
+ */
30
+ export declare function isDangerousCommand(command: string): boolean;
31
+ export type RunCommandApproval = 'auto-approve' | 'ask' | 'deny-silent';
32
+ /**
33
+ * Pure decision for "what should happen when the agent calls `run_command`?"
34
+ * Split out of `agent.ts` so the policy is unit-testable without TTY mocking.
35
+ *
36
+ * - Silent children cannot answer a y/N prompt. We auto-approve only when
37
+ * the parent has opted in via `executionMode === 'fast'` AND the command
38
+ * is not in the dangerous set. Dangerous commands in silent children are
39
+ * always denied — there is no human to confirm the blast radius.
40
+ * - Interactive parents in `fast` mode skip the prompt for safe commands
41
+ * and still gate dangerous ones through `askYesNo`. In `planning` mode
42
+ * every command routes through `askYesNo`.
43
+ *
44
+ * The `executionMode === 'fast'` check is the single source of truth for
45
+ * "yolo-ish" behavior — the legacy `autoApproveShell` flag is migrated into
46
+ * `executionMode === 'fast'` on first read of `preferencesStore` so new
47
+ * callers do not need to consult both.
48
+ */
49
+ export declare function resolveRunCommandApproval(prefs: {
50
+ executionMode: 'planning' | 'fast';
51
+ }, command: string, opts: {
52
+ silent: boolean;
53
+ }): RunCommandApproval;
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Single source of truth for "is this shell command destructive enough that we
3
+ * must confirm even in /mode fast?"
4
+ *
5
+ * Used by:
6
+ * - agent.ts `run_command`: in `executionMode === 'fast'` we skip the
7
+ * `askYesNo` prompt for everyday commands, but route through askYesNo
8
+ * anyway when this returns true.
9
+ * - tests: invariant that fast mode ≠ unconditional auto-approve.
10
+ *
11
+ * Heuristic, not a sandbox. The real blast-radius limiter is
12
+ * `BRAINROUTER_SANDBOX=on`. This list exists so that a typo
13
+ * (`rm -rf /` instead of `rm -rf ./build`) doesn't get auto-approved
14
+ * because the user happened to be in fast mode.
15
+ *
16
+ * Patterns are conservative on purpose: false-positives cost one extra y/N
17
+ * prompt; false-negatives cost a wiped disk. Add a pattern when you spot one
18
+ * — do not remove existing entries without a replacement.
19
+ */
20
+ const DANGEROUS_PATTERNS = [
21
+ // Recursive / forced deletions
22
+ /\brm\s+(?:-[a-zA-Z]*[rRfF][a-zA-Z]*|--recursive\b|--force\b)/,
23
+ // Anything piped/awk'd into a shell — too easy to hide an `rm` inside.
24
+ /\|\s*(?:sh|bash|zsh|fish)\b/,
25
+ // Disk imaging / zeroing
26
+ /\bdd\s+(?:if|of|bs|count)=/,
27
+ /\bmkfs(?:\.[a-z0-9]+)?\b/,
28
+ /\bfdisk\b/,
29
+ /\bshred\b/,
30
+ // Wide-open permission flips
31
+ /\bchmod\s+(?:-R\s+)?(?:[0-7]*[7]{2,3}|a\+w)\b/,
32
+ /\bchown\s+-R\b/,
33
+ // Privilege escalation
34
+ /\bsudo\b/,
35
+ /\bsu\s+-/,
36
+ // Forced or destructive git operations
37
+ /\bgit\s+push\s+(?:-f|--force)/,
38
+ /\bgit\s+reset\s+--hard/,
39
+ /\bgit\s+clean\s+-[a-zA-Z]*[fF]/,
40
+ /\bgit\s+checkout\s+--\s/,
41
+ /\bgit\s+branch\s+-D\b/,
42
+ // Package-manager mutators that touch the global tree or remove deps
43
+ /\bnpm\s+(?:uninstall|unpublish)\b/,
44
+ /\b(?:yarn|pnpm)\s+remove\b/,
45
+ // Process / system control
46
+ /\bkillall\b/,
47
+ /\bkill\s+-9\b/,
48
+ /\b(?:shutdown|reboot|halt|poweroff)\b/,
49
+ // Outbound exec-from-network — the classic curl|sh exfil/exec pattern
50
+ /\b(?:curl|wget|fetch)\b[^|]*\|\s*(?:sh|bash|zsh)\b/,
51
+ // Database wipes
52
+ /\bDROP\s+(?:DATABASE|TABLE|SCHEMA)\b/i,
53
+ /\bTRUNCATE\s+TABLE\b/i,
54
+ // Docker / k8s wipes
55
+ /\bdocker\s+system\s+prune\b/,
56
+ /\bdocker\s+(?:rm|rmi)\s+-f/,
57
+ /\bkubectl\s+delete\b/,
58
+ ];
59
+ /**
60
+ * Returns true when the command matches any pattern that fast mode should
61
+ * still gate through `askYesNo`. The check is a single-pass regex sweep
62
+ * against the literal command string — no shell parsing, no env expansion.
63
+ *
64
+ * The trailing wildcard semantics matter: `rm -rf foo` matches, `rm-rf` does
65
+ * not (word boundary), `rmdir` does not (different keyword). When in doubt,
66
+ * lean toward returning true: the cost of an extra y/N is much smaller than
67
+ * the cost of accidentally letting a destructive command through.
68
+ */
69
+ export function isDangerousCommand(command) {
70
+ if (!command)
71
+ return false;
72
+ const normalized = command.trim();
73
+ if (!normalized)
74
+ return false;
75
+ return DANGEROUS_PATTERNS.some((pattern) => pattern.test(normalized));
76
+ }
77
+ /**
78
+ * Pure decision for "what should happen when the agent calls `run_command`?"
79
+ * Split out of `agent.ts` so the policy is unit-testable without TTY mocking.
80
+ *
81
+ * - Silent children cannot answer a y/N prompt. We auto-approve only when
82
+ * the parent has opted in via `executionMode === 'fast'` AND the command
83
+ * is not in the dangerous set. Dangerous commands in silent children are
84
+ * always denied — there is no human to confirm the blast radius.
85
+ * - Interactive parents in `fast` mode skip the prompt for safe commands
86
+ * and still gate dangerous ones through `askYesNo`. In `planning` mode
87
+ * every command routes through `askYesNo`.
88
+ *
89
+ * The `executionMode === 'fast'` check is the single source of truth for
90
+ * "yolo-ish" behavior — the legacy `autoApproveShell` flag is migrated into
91
+ * `executionMode === 'fast'` on first read of `preferencesStore` so new
92
+ * callers do not need to consult both.
93
+ */
94
+ export function resolveRunCommandApproval(prefs, command, opts) {
95
+ const fastMode = prefs.executionMode === 'fast';
96
+ const dangerous = isDangerousCommand(command);
97
+ if (opts.silent) {
98
+ if (dangerous)
99
+ return 'deny-silent';
100
+ return fastMode ? 'auto-approve' : 'deny-silent';
101
+ }
102
+ if (fastMode && !dangerous)
103
+ return 'auto-approve';
104
+ return 'ask';
105
+ }
@@ -10,10 +10,31 @@ export declare class McpClientWrapper {
10
10
  * blowing up, which the agent's existing try/catch wrappers already handle.
11
11
  */
12
12
  private connected;
13
+ /**
14
+ * 10a: cached identity. Set once by `detectMcpIdentity` after the first
15
+ * successful `listTools()` (or by `connect` if the config + URL gave us
16
+ * a clear signal). The value drives status surfaces and the brain-offline
17
+ * prompt swap — distinguishes "our brain went down" from "a random
18
+ * third-party MCP went down" once item 11's multi-MCP support lands.
19
+ */
20
+ private identity;
21
+ private serverName?;
13
22
  constructor();
14
23
  /** Whether this wrapper has an active MCP transport. */
15
24
  isConnected(): boolean;
16
- connect(serverConfig: ServerConfig, llmConfig?: LLMConfig): Promise<void>;
25
+ /** 10a: who is this MCP? Set by `detectMcpIdentity`; 'unknown' before first list. */
26
+ getIdentity(): 'brainrouter' | 'third-party' | 'unknown';
27
+ /** 10a: profile name passed at connect (`brainrouter` / `local-http` / etc.). */
28
+ getServerName(): string | undefined;
29
+ /**
30
+ * 10a: connect with an optional `name` so the wrapper can render identity
31
+ * tags ("BrainRouter MCP offline" vs "third-party MCP offline") without
32
+ * the caller threading it through every error path. The pre-10a single-
33
+ * arg form remains supported — callers that don't pass a name fall back
34
+ * to URL-pattern detection.
35
+ */
36
+ connect(serverConfig: ServerConfig, llmConfig?: LLMConfig, name?: string): Promise<void>;
37
+ private _connect;
17
38
  listTools(): Promise<{
18
39
  [x: string]: unknown;
19
40
  tools: {
@@ -154,3 +175,19 @@ export declare class McpClientWrapper {
154
175
  }>;
155
176
  close(): Promise<void>;
156
177
  }
178
+ /**
179
+ * 10a: figure out who an MCP profile belongs to from config metadata + name
180
+ * + URL alone, before any network call. Explicit `identity` wins; otherwise
181
+ * we check name prefix and URL host. Returns 'unknown' when nothing matches
182
+ * — the caller (currently `listTools`) falls back to tool-signature
183
+ * detection after the first successful enumeration.
184
+ *
185
+ * Detection cases:
186
+ * - explicit `identity: 'brainrouter'` or `identity: 'third-party'` → that.
187
+ * - profile name (case-insensitive) starts with `brainrouter` → brainrouter.
188
+ * - http URL hostname matches `*.brainrouter.cloud` / `*.brainrouter.dev`
189
+ * / `*.brainrouter.io` / `*.kinqs.brainrouter.*` → brainrouter.
190
+ * - stdio command basename matches `brainrouter` / `brainrouter-mcp` → brainrouter.
191
+ * - otherwise → unknown (let the tool-signature fallback decide).
192
+ */
193
+ export declare function resolveIdentityFromConfig(serverConfig: ServerConfig, name?: string): 'brainrouter' | 'third-party' | 'unknown';