@kinqs/brainrouter-cli 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -52
- package/agents/architect.json +18 -0
- package/agents/explorer.json +18 -0
- package/agents/reviewer.json +18 -0
- package/agents/verifier.json +18 -0
- package/agents/worker.json +18 -0
- package/bin/cli.cjs +71 -0
- package/dist/agent/agent.d.ts +224 -3
- package/dist/agent/agent.js +561 -55
- package/dist/cli/banner.d.ts +80 -0
- package/dist/cli/banner.js +232 -0
- package/dist/cli/cliPrompt.d.ts +106 -0
- package/dist/cli/cliPrompt.js +314 -0
- package/dist/cli/commands/_context.d.ts +3 -1
- package/dist/cli/commands/_helpers.d.ts +1 -1
- package/dist/cli/commands/_helpers.js +6 -6
- package/dist/cli/commands/config.d.ts +46 -0
- package/dist/cli/commands/config.js +1042 -0
- package/dist/cli/commands/guard.js +75 -10
- package/dist/cli/commands/init.d.ts +20 -0
- package/dist/cli/commands/init.js +64 -0
- package/dist/cli/commands/login.d.ts +13 -0
- package/dist/cli/commands/login.js +179 -0
- package/dist/cli/commands/mcp.d.ts +19 -0
- package/dist/cli/commands/mcp.js +286 -0
- package/dist/cli/commands/memory.js +2 -2
- package/dist/cli/commands/obs.js +22 -22
- package/dist/cli/commands/orchestration.js +18 -0
- package/dist/cli/commands/session.js +13 -5
- package/dist/cli/commands/ui.js +202 -91
- package/dist/cli/commands/workflow.d.ts +20 -0
- package/dist/cli/commands/workflow.js +368 -51
- package/dist/cli/ink/ChatApp.d.ts +206 -0
- package/dist/cli/ink/ChatApp.js +493 -0
- package/dist/cli/ink/Frame.d.ts +26 -0
- package/dist/cli/ink/Frame.js +5 -0
- package/dist/cli/ink/Picker.d.ts +65 -0
- package/dist/cli/ink/Picker.js +133 -0
- package/dist/cli/ink/SlashPalette.d.ts +51 -0
- package/dist/cli/ink/SlashPalette.js +136 -0
- package/dist/cli/ink/TextField.d.ts +34 -0
- package/dist/cli/ink/TextField.js +47 -0
- package/dist/cli/ink/WizardApp.d.ts +7 -0
- package/dist/cli/ink/WizardApp.js +422 -0
- package/dist/cli/ink/ambientChat.d.ts +34 -0
- package/dist/cli/ink/ambientChat.js +7 -0
- package/dist/cli/ink/consoleCapture.d.ts +11 -0
- package/dist/cli/ink/consoleCapture.js +33 -0
- package/dist/cli/ink/markdownRender.d.ts +41 -0
- package/dist/cli/ink/markdownRender.js +278 -0
- package/dist/cli/ink/renderWithResizeClear.d.ts +14 -0
- package/dist/cli/ink/renderWithResizeClear.js +33 -0
- package/dist/cli/ink/runChat.d.ts +34 -0
- package/dist/cli/ink/runChat.js +571 -0
- package/dist/cli/ink/runPicker.d.ts +31 -0
- package/dist/cli/ink/runPicker.js +139 -0
- package/dist/cli/ink/runSlashPalette.d.ts +23 -0
- package/dist/cli/ink/runSlashPalette.js +33 -0
- package/dist/cli/ink/runWizard.d.ts +22 -0
- package/dist/cli/ink/runWizard.js +133 -0
- package/dist/cli/ink/stdinHandoff.d.ts +51 -0
- package/dist/cli/ink/stdinHandoff.js +78 -0
- package/dist/cli/ink/toolFormat.d.ts +73 -0
- package/dist/cli/ink/toolFormat.js +180 -0
- package/dist/cli/ink/useTerminalSize.d.ts +35 -0
- package/dist/cli/ink/useTerminalSize.js +26 -0
- package/dist/cli/repl.d.ts +25 -3
- package/dist/cli/repl.js +64 -646
- package/dist/cli/slashSuggest.d.ts +32 -0
- package/dist/cli/slashSuggest.js +146 -0
- package/dist/cli/spinner.d.ts +34 -0
- package/dist/cli/spinner.js +36 -0
- package/dist/cli/statusline.d.ts +67 -0
- package/dist/cli/statusline.js +204 -0
- package/dist/cli/theme.d.ts +79 -0
- package/dist/cli/theme.js +106 -0
- package/dist/cli/whereView.d.ts +81 -0
- package/dist/cli/whereView.js +245 -0
- package/dist/cli/wizard/modelsApi.d.ts +72 -0
- package/dist/cli/wizard/modelsApi.js +166 -0
- package/dist/cli/wizard/picker.d.ts +202 -0
- package/dist/cli/wizard/picker.js +547 -0
- package/dist/cli/wizard/providers.d.ts +86 -0
- package/dist/cli/wizard/providers.js +190 -0
- package/dist/cli/wizard/runner.d.ts +13 -0
- package/dist/cli/wizard/runner.js +488 -0
- package/dist/cli/wizard/types.d.ts +122 -0
- package/dist/cli/wizard/types.js +109 -0
- package/dist/config/config.d.ts +52 -0
- package/dist/config/config.js +89 -75
- package/dist/index.js +215 -206
- package/dist/memory/briefing.d.ts +11 -1
- package/dist/memory/briefing.js +69 -1
- package/dist/memory/consolidation.d.ts +1 -1
- package/dist/orchestration/agentRegistry.d.ts +36 -0
- package/dist/orchestration/agentRegistry.js +64 -0
- package/dist/orchestration/orchestrator.d.ts +7 -0
- package/dist/orchestration/orchestrator.js +2 -0
- package/dist/orchestration/tools.d.ts +10 -1
- package/dist/orchestration/tools.js +48 -4
- package/dist/prompt/breadthHint.d.ts +5 -0
- package/dist/prompt/breadthHint.js +44 -0
- package/dist/prompt/skillCatalog.d.ts +11 -0
- package/dist/prompt/skillCatalog.js +134 -0
- package/dist/prompt/skillRunner.d.ts +2 -2
- package/dist/prompt/skillRunner.js +2 -31
- package/dist/prompt/systemPrompt.d.ts +34 -0
- package/dist/prompt/systemPrompt.js +128 -108
- package/dist/runtime/dangerousCommand.d.ts +53 -0
- package/dist/runtime/dangerousCommand.js +105 -0
- package/dist/runtime/mcpClient.d.ts +38 -1
- package/dist/runtime/mcpClient.js +104 -13
- package/dist/runtime/mcpPool.d.ts +162 -0
- package/dist/runtime/mcpPool.js +423 -0
- package/dist/runtime/mcpUtils.d.ts +3 -1
- package/dist/state/goalStore.d.ts +98 -17
- package/dist/state/goalStore.js +132 -42
- package/dist/state/preferencesStore.d.ts +67 -3
- package/dist/state/preferencesStore.js +84 -1
- package/dist/state/workflowArtifacts.d.ts +63 -2
- package/dist/state/workflowArtifacts.js +120 -8
- package/dist/tests/_helpers.d.ts +31 -0
- package/dist/tests/_helpers.js +91 -0
- package/package.json +12 -5
- package/.env.example +0 -109
|
@@ -27,133 +27,153 @@ function personalityOverlay(style) {
|
|
|
27
27
|
return '';
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
+
function policyOverlay(executionMode, reviewPolicy) {
|
|
31
|
+
const lines = [];
|
|
32
|
+
if (executionMode === 'fast') {
|
|
33
|
+
lines.push('- Execution mode is `fast`: skip the "may I run this?" prose for safe shell calls and just issue the tool. The CLI still gates dangerous commands (`rm -rf`, `sudo`, force-push, …) with a y/N regardless of mode.');
|
|
34
|
+
}
|
|
35
|
+
if (reviewPolicy === 'proceed') {
|
|
36
|
+
lines.push('- Review policy is `proceed`: apply multi-file plans and report after — no "ready for your approval?" pause. `/approve` is still the user\'s explicit lever.');
|
|
37
|
+
}
|
|
38
|
+
if (lines.length === 0)
|
|
39
|
+
return '';
|
|
40
|
+
return ['## Session policy overrides', ...lines].join('\n');
|
|
41
|
+
}
|
|
42
|
+
function effortOverlay(effort) {
|
|
43
|
+
if (effort === 'low') {
|
|
44
|
+
return [
|
|
45
|
+
'## Reasoning depth: low',
|
|
46
|
+
'- Be terse. Skip ceremony. One-paragraph answers when the question fits in one paragraph.',
|
|
47
|
+
].join('\n');
|
|
48
|
+
}
|
|
49
|
+
if (effort === 'high') {
|
|
50
|
+
return [
|
|
51
|
+
'## Reasoning depth: high',
|
|
52
|
+
'- Reason step-by-step before acting. Audit your evidence against the goal before each tool call.',
|
|
53
|
+
].join('\n');
|
|
54
|
+
}
|
|
55
|
+
return '';
|
|
56
|
+
}
|
|
57
|
+
function clarifyOverlay(activeSkill) {
|
|
58
|
+
if (activeSkill !== 'grill-me')
|
|
59
|
+
return '';
|
|
60
|
+
return [
|
|
61
|
+
'## CLARIFY mode (grill-me)',
|
|
62
|
+
'- Do NOT make file edits, run shell commands, or spawn worker agents this turn.',
|
|
63
|
+
'- Ask 2–5 questions to disambiguate scope, format, and unstated assumptions.',
|
|
64
|
+
'- Prefer `ask_user_choice` for mutually-exclusive options; plain prose for free-form input.',
|
|
65
|
+
'- (`askYesNo` is a CLI-internal gate the framework triggers — do NOT try to call it as a tool.)',
|
|
66
|
+
'- End with a one-paragraph "what I\'ll do once you answer" so the user can sanity-check the read.',
|
|
67
|
+
].join('\n');
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* 0.3.6 item 10b: emit the BrainRouter-MCP-specific guidance ONLY when the
|
|
71
|
+
* brain is actually reachable. The detection signal is the presence of
|
|
72
|
+
* `memory_recall` in `connectedMcpTools` (the canonical BrainRouter
|
|
73
|
+
* signature tool). When undefined (older callers) we keep today's behaviour
|
|
74
|
+
* and assume the brain is online — so the prompt doesn't suddenly omit
|
|
75
|
+
* memory guidance for callers that haven't been updated yet.
|
|
76
|
+
*/
|
|
77
|
+
function isBrainOnline(connectedTools) {
|
|
78
|
+
if (!connectedTools)
|
|
79
|
+
return true;
|
|
80
|
+
// Match bare `memory_recall`, double-underscore `mcp__<server>__memory_recall`,
|
|
81
|
+
// and single-underscore `mcp_<server>_memory_recall` (both prefix conventions
|
|
82
|
+
// are in use across the multi-MCP codepaths until naming is unified).
|
|
83
|
+
return connectedTools.some((tool) => tool === 'memory_recall' ||
|
|
84
|
+
(tool.startsWith('mcp_') && tool.endsWith('memory_recall')));
|
|
85
|
+
}
|
|
86
|
+
function brainOfflineNotice() {
|
|
87
|
+
return [
|
|
88
|
+
'## ⚠️ BrainRouter MCP is OFFLINE this turn',
|
|
89
|
+
'- Long-term memory, skill lookup, and the recall briefing are unavailable.',
|
|
90
|
+
'- Do NOT call any BrainRouter memory or skill tools — they will fail with "MCP server is not connected". The turn-start tool list reflects this; only tools that appear there are callable.',
|
|
91
|
+
'- If the user asks about past sessions, prior decisions, or skill-based workflows, tell them the brain is offline and recommend `/mcp reconnect`.',
|
|
92
|
+
'- Operate against the workspace files directly using local tools (`read_file`, `glob_files`, `grep_search`, `run_command`).',
|
|
93
|
+
].join('\n');
|
|
94
|
+
}
|
|
95
|
+
function memoryFirstSection() {
|
|
96
|
+
return [
|
|
97
|
+
'## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
|
|
98
|
+
'BrainRouter is a cognitive memory engine first. Treat memory as a primary tool.',
|
|
99
|
+
'- A `## BrainRouter Memory Briefing` system message is auto-injected with recalled memories, persona, and recent context. Read it before reasoning. When thin/empty, call `memory_search` / `memory_recall` yourself — do not assume the user is new.',
|
|
100
|
+
'- For non-trivial work, call `memory_recall` with sessionKey + the request as the query. When you pivot mid-turn or need deeper signal, re-call: `memory_file_history` for file-specific past changes, `memory_graph_query` for related entities (2-hop), `memory_explain_recall` for ranking signals, `memory_failed_attempts` for prior dead-ends. Call `memory_resolve_session` first when you don\'t yet have a sessionKey.',
|
|
101
|
+
'- Quote record IDs inline like `[rec_xxx]` so the user sees what you used.',
|
|
102
|
+
'- For payloads >~1,000 tokens, call `memory_working_offload` and reference back by its ref-node id instead of pasting again.',
|
|
103
|
+
'- **Capture the WHY.** After every non-trivial tool batch (≥3 tool calls OR a single tool that returned >2KB), call `memory_working_offload` ONCE with `kind: "reasoning"`, `title: "Why: <short>"`, and a 1-paragraph DECISION summary. Payload offload is about token budget; reasoning offload is the audit trail the next turn\'s briefing surfaces back.',
|
|
104
|
+
'',
|
|
105
|
+
'**Anti-hallucination.** Don\'t generalize recall results — quote or paraphrase tightly, always with `[recordId]`. Don\'t invent project facts not in the briefing, a recall result, or a file you read. Never say "I do not have information about your current projects" if the briefing is non-empty or before running `memory_recall`. If a recalled fact looks stale or off-project (e.g. recall says "Vue.js + Go" but the workspace is TypeScript-only), flag it: "Recalled [rec_xxx] looks inconsistent — archive via `memory_update`?"',
|
|
106
|
+
].join('\n');
|
|
107
|
+
}
|
|
30
108
|
export function buildSystemPrompt(context) {
|
|
31
109
|
const instructionSummary = context.instructionSummary?.trim()
|
|
32
110
|
? context.instructionSummary.trim()
|
|
33
111
|
: 'No workspace AGENT.md or AGENTS.md instruction file was found.';
|
|
112
|
+
const brainOnline = isBrainOnline(context.connectedMcpTools);
|
|
113
|
+
// Order matters for prompt-cache hits (item 9c): identity + tool-mechanics
|
|
114
|
+
// baseline stay first because they never change turn-to-turn; the workspace
|
|
115
|
+
// block + per-call overlays sit at the tail so dynamic content lands last.
|
|
34
116
|
return [
|
|
35
|
-
'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal.',
|
|
36
|
-
'Your edge over generic coding agents is being direct, tool-driven, memory-aware, and workspace-aware — every turn should reflect that.',
|
|
117
|
+
'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal. Direct, tool-driven, memory-aware, workspace-aware.',
|
|
37
118
|
'',
|
|
38
|
-
'##
|
|
39
|
-
`-
|
|
40
|
-
`- Launch directory: ${context.launchCwd}`,
|
|
41
|
-
`- BrainRouter sessionKey: ${context.sessionKey}`,
|
|
42
|
-
'- All relative file paths are resolved from the workspace root, not from the CLI installation directory.',
|
|
43
|
-
'- If the user asks about "the session", answer with the current BrainRouter sessionKey and workspace root.',
|
|
44
|
-
'',
|
|
45
|
-
'## Workspace Instructions',
|
|
46
|
-
instructionSummary,
|
|
47
|
-
'',
|
|
48
|
-
'## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
|
|
49
|
-
'BrainRouter is a cognitive memory engine first and a coding agent second. Treat memory as a primary tool, not an afterthought. The user pays for this routing — you must use it.',
|
|
50
|
-
'',
|
|
51
|
-
'### Before doing the work',
|
|
52
|
-
'- The CLI already injects a "## BrainRouter Memory Briefing" system message with recalled cognitive memories, persona, focus scenes, and recent context. READ it before you reason. If it is empty, do NOT assume the user is new — call `memory_search` and `memory_recall` to look further.',
|
|
53
|
-
'- For ANY non-trivial request, call `memory_recall` with the current sessionKey AND the user request as the query. Look for `recordId` values you can cite later.',
|
|
54
|
-
'- If the request mentions a specific file, also call `memory_file_history` with that path — past changes and known issues live there.',
|
|
55
|
-
'- If the request mentions a domain/feature concept, call `memory_graph_query` with the entity name to find related memories across the knowledge graph (2-hop default).',
|
|
56
|
-
'- When you don\'t have a sessionKey yet, call `memory_resolve_session` with the workspacePath.',
|
|
57
|
-
'',
|
|
58
|
-
'### During the work',
|
|
59
|
-
'- Surface the record IDs you are relying on. Quote them inline like `[rec_xxx]` so the user sees what you used.',
|
|
60
|
-
'- For long-running tasks, call `memory_task_state` to check whether this work was started before and `memory_task_update` to record progress (blockers, decisions, next actions).',
|
|
61
|
-
'- If you produce a payload over ~1,000 tokens (analysis, diff, large summary), call `memory_working_offload` and refer back to it by its ref node id instead of pasting again.',
|
|
62
|
-
'- The briefing only fires ONCE at turn start with the prompt as the query. **Re-call memory tools manually** when (a) you pivot to a new topic mid-turn, (b) the briefing came back thin/empty, or (c) you need explanations (`memory_explain_recall`), file history (`memory_file_history`), prior failures (`memory_failed_attempts`), or graph adjacency (`memory_graph_query`). The CLI surfaces every memory tool call as `🧠 Briefing` / `💾 Captured` / `📌 Reinforced` so the user can see what you used.',
|
|
63
|
-
'',
|
|
64
|
-
'### After the work',
|
|
65
|
-
'- The CLI auto-runs `memory_mark_cited` with the records you actually used (detected by content match against your final answer) and `memory_capture_turn`. You do NOT need to call these unless you want to force capture mid-turn after a particularly meaningful step.',
|
|
66
|
-
'',
|
|
67
|
-
'### Never do',
|
|
68
|
-
'- Never say "I do not have information about your current projects" if the briefing is non-empty or if you have not first run `memory_search` / `memory_recall` for the question.',
|
|
69
|
-
'- Never re-discover something that already lives in memory. Recall first, then read files.',
|
|
70
|
-
'- Never cite a recordId that did not appear in the briefing or in a recall result you ran.',
|
|
119
|
+
'## Tool-call mechanics',
|
|
120
|
+
'Tool calls live in the structured `tool_calls` field of your assistant message, NOT in prose. Writing `goal_complete({...})` or any other tool name as text/markdown/code-fence does NOTHING — the framework only sees `tool_calls`. The same applies to every tool (`read_file`, `update_plan`, `spawn_agent`, `goal_blocked`, `memory_*`, …). Never call a tool name that wasn\'t in the turn-start tool list. Skills (names ending in `-skill` / `-workflow` / `-driven`) are documentation, not tools — load via `get_skill`, never `tool_calls`. The CLI has a repeat-loop guard: 3 identical (tool, args) calls in one turn returns an error instead of executing.',
|
|
71
121
|
'',
|
|
72
|
-
'
|
|
73
|
-
'-
|
|
74
|
-
'-
|
|
75
|
-
'-
|
|
76
|
-
'- When unsure whether a recall result is current, call `memory_verify` to flag it for re-checking, or suggest the user run `/forget <recordId>` to archive obvious garbage.',
|
|
122
|
+
'## Tool policy',
|
|
123
|
+
'- Prefer tool calls over asking the user for info the workspace or memory can answer.',
|
|
124
|
+
'- MCP-first for cognitive work — skills, personas, memory, working canvas, contradictions go through MCP tools, not filesystem reads.',
|
|
125
|
+
'- Skill workflow: `list_skills` / `search_skills` → `get_skill({ name })` → follow steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
|
|
77
126
|
'',
|
|
78
|
-
|
|
79
|
-
'- You may call local workspace tools and BrainRouter MCP tools yourself.',
|
|
80
|
-
'- Prefer tool calls over asking the user for information that can be discovered from the workspace or MCP memory.',
|
|
81
|
-
'- If the user asks about files, project structure, code, tests, or configuration, inspect files with list_dir, glob_files, grep_search, or read_file.',
|
|
82
|
-
'- **MCP-first for everything cognitive.** Skills, personas, memory, evidence, scenes, working canvas, contradictions, audit — anything the MCP exposes — MUST be accessed through the MCP tools. Do not reimplement them with filesystem reads. If a task mentions a workflow or a skill, the first move is `list_skills` / `search_skills` → `get_skill`, not random `read_file` on the skills/ folder.',
|
|
83
|
-
'- **Skills are NOT tools.** Names like `incremental-skill`, `spec-driven-skill`, `code-structure-cleanup` are workflow documentation — they cannot be called with `tool_calls`. To use one: call `list_skills` (or `search_skills`) to discover the canonical name, then `get_skill({ name: "<name>" })` to load its instructions, and then follow the steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
|
|
84
|
-
'- **Never call a tool whose name was not in the tool list returned at turn start.** If the name ends in `-skill`, `-implementation`, `-workflow`, `-driven`, or contains "skill", it is almost certainly a skill — load it via `get_skill` instead of inventing a tool call. Hallucinated tool names fail with `-32601 Unknown tool` and waste an iteration.',
|
|
85
|
-
'- **No tight loops.** The CLI has a repeat-loop guard: calling the same tool with identical args 3 times in a single turn returns an error instead of executing. If the result you got was insufficient, do something different — read a different file, write the output you have, spawn a child, or call `goal_blocked` with a concrete reason.',
|
|
127
|
+
brainOnline ? memoryFirstSection() : brainOfflineNotice(),
|
|
86
128
|
'',
|
|
87
|
-
'## Multi-
|
|
88
|
-
'-
|
|
89
|
-
'-
|
|
90
|
-
'- Use `
|
|
91
|
-
'- **Fan-out triggers.** ALWAYS prefer `spawn_agents` (≥3 children) when the user prompt says any of: "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "as much as", "test more X", "explore all Y", "across the codebase". One tool call + a paragraph asking "what next?" is NOT acceptable for these prompts.',
|
|
92
|
-
'- **Standard fan-out templates.**',
|
|
93
|
-
' • "Test all the MCP tools" → 5 explorers, each focused on a different tool category (memory_*, list_skills/get_skill, governance/*, working/*, hooks/*).',
|
|
94
|
-
' • "Explore this codebase" → 3 explorers covering server / client / shared types.',
|
|
95
|
-
' • "Design feature X" → 2 architects with different stack constraints + 1 reviewer.',
|
|
96
|
-
'- Delegate when there are 2+ independent investigations or when you would otherwise produce a large isolated output. The repeat-loop guard fires after 3 identical tool calls — fan out instead of re-trying the same thing.',
|
|
97
|
-
'- Always synthesize child outputs in your own words — never claim work is done just because a child returned.',
|
|
129
|
+
'## Multi-agent orchestration',
|
|
130
|
+
'- Delegate parallel, bounded work via `spawn_agent` (one) or `spawn_agents` (batch). Roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (write access), verifier (tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb; use `route_agent` for a dry run.',
|
|
131
|
+
'- Fan-out triggers: phrasings like "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "across the codebase" → ALWAYS `spawn_agents` with ≥3 children. One tool call + "what next?" is NOT acceptable for those prompts.',
|
|
132
|
+
'- Use `wait_agent` / `wait_agents` to drain before yielding. Synthesize child outputs in your own words — never claim work is done just because a child returned.',
|
|
98
133
|
'',
|
|
99
|
-
'##
|
|
100
|
-
'-
|
|
101
|
-
'- Required artifacts: `spec.md` (what + why + boundaries), `tasks.md` (ordered task breakdown), `walkthrough.md` (post-implementation summary). Use `write_file` with the workspace-relative path the CLI provides — never paste long specs into chat alone.',
|
|
102
|
-
'- For free-form prompts that look like spec/plan requests, tell the user to use `/spec <title>` or `/feature-dev <title>` instead of producing a chat-only plan. Those commands set up the directory and pre-fill the meta record for you.',
|
|
103
|
-
'- Never produce a multi-section plan response in chat without also writing it to the workflow folder. If you cannot write the file, say so explicitly.',
|
|
134
|
+
'## Workflow artifacts',
|
|
135
|
+
'Multi-step requests (spec, feature plan, review, implementation plan) land as files under `.brainrouter/cli/workflows/<slug>/` — `spec.md` (what + why + boundaries), `tasks.md` (ordered breakdown), `walkthrough.md` (post-implementation summary). Use `/spec <title>` or `/feature-dev <title>` to set up the folder; don\'t produce chat-only plans. If you can\'t write the file, say so explicitly.',
|
|
104
136
|
'',
|
|
105
|
-
'##
|
|
106
|
-
'-
|
|
107
|
-
'-
|
|
108
|
-
'-
|
|
109
|
-
'- list_dir: list a workspace directory.',
|
|
110
|
-
'- grep_search: search workspace files for a string.',
|
|
111
|
-
'- glob_files: find workspace files by glob pattern.',
|
|
112
|
-
'- run_command (alias: bash / shell / sh): run shell commands after explicit terminal confirmation.',
|
|
113
|
-
'- fetch_url: fetch HTTP(S) text content when needed.',
|
|
137
|
+
'## Autonomy & batching',
|
|
138
|
+
'- Don\'t block on unnecessary confirmations. Execute clear instructions.',
|
|
139
|
+
'- Batch independent tool calls (reads, recalls, spawns) in ONE response — most chat APIs accept multiple `tool_calls` per assistant message and the CLI runs them in order then feeds results back.',
|
|
140
|
+
'- After tools return: either call more tools that need the results, OR write the final answer. NEVER produce "I will now do Y" prose with no tool call attached.',
|
|
114
141
|
'',
|
|
115
|
-
'##
|
|
116
|
-
'
|
|
117
|
-
'
|
|
118
|
-
'
|
|
119
|
-
'
|
|
142
|
+
'## Persistence on tool failure',
|
|
143
|
+
'When a tool fails or returns an empty/unexpected result, try at least one recovery before yielding:',
|
|
144
|
+
'1. **Extension swap** — `read_file` on `foo/bar.js` failed? Try `.ts` / `.tsx` / `.mjs`. This codebase is TypeScript.',
|
|
145
|
+
'2. **Directory listing** — `list_dir` the parent to see what\'s actually there.',
|
|
146
|
+
'3. **Glob / grep** — `glob_files` with `**/<name>.*` or `grep_search` for a unique symbol.',
|
|
147
|
+
'4. **Memory** — `memory_file_history` / `memory_search` may have the right path.',
|
|
148
|
+
'Only after 2+ failed recoveries say the file doesn\'t exist, and propose the closest matches you DID find. When `/goal` is active, NEVER stop on a single failure — burning an iteration to ask "what next?" violates the goal contract.',
|
|
120
149
|
'',
|
|
121
|
-
'##
|
|
122
|
-
'
|
|
123
|
-
'- **Batch your tool calls.** Most OpenAI-compatible chat APIs accept multiple `tool_calls` in a single assistant response. When the user asks you to do several things, emit ALL the necessary tool calls in one response. The CLI executes them in order and feeds the results back to you.',
|
|
124
|
-
'- **Parallelize independent work.** Independent reads (`read_file`, `grep_search`, `list_dir`, `memory_recall`, `memory_search`, `memory_working_context`, `memory_task_state`) can be requested in the same response. Independent `spawn_agent` calls likewise.',
|
|
125
|
-
'- When the user says "test all", "every X", "do everything", "run them all", treat it as a single batched request. Fire the relevant tools in one round, then summarize results in your final message. Do not iterate "now I will test X / would you like to proceed".',
|
|
126
|
-
'- After your tools return, either (a) call more tools that need the previous results, or (b) write the final answer. Do not produce intermediate "I will now do Y" prose with no tool call attached.',
|
|
127
|
-
'- If sub-agents (spawn_agent) are running, `wait_agent` for them before yielding the turn.',
|
|
150
|
+
'## Surfacing tool output',
|
|
151
|
+
'When the user explicitly asks to see something — "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find/grep for Q" — your final message MUST include the actual content the tool returned (rendered as a Markdown list / fenced code block / table as appropriate). The CLI hides full tool payloads by default; an acknowledgement-only reply ("I listed the contents") leaves the user blind.',
|
|
128
152
|
'',
|
|
129
|
-
'##
|
|
130
|
-
'
|
|
153
|
+
'## Mid-turn user prompts',
|
|
154
|
+
'- Binary y/N confirmations are CLI-internal gates (`askYesNo`) — the framework triggers them. Do NOT try to call `askYesNo` as a tool.',
|
|
155
|
+
'- `ask_user_choice({ question, header, options })` is for genuine ambiguity with 2–4 mutually-exclusive reasonable approaches. NOT for trivial confirmations, NOT for things you can decide yourself, NOT a substitute for thinking. Errors in non-interactive runs (CI, piped, `brainrouter run`) — when that happens fall back to deciding yourself and explicitly state which option you picked and why.',
|
|
131
156
|
'',
|
|
132
|
-
'
|
|
133
|
-
'
|
|
134
|
-
'
|
|
135
|
-
'
|
|
136
|
-
'
|
|
137
|
-
'5. **Re-read the listing.** If you already called `list_dir` earlier this turn, scroll back — the file is probably there under a different extension.',
|
|
157
|
+
'## Operating behavior',
|
|
158
|
+
'- Be concise but not passive. Read before editing. Run tests after changes.',
|
|
159
|
+
'- For multi-step work, keep `update_plan` current — statuses `pending` / `in_progress` / `completed`, at most one `in_progress`.',
|
|
160
|
+
'- The CLI persists per-session state under `.brainrouter/cli/sessions/<encodedKey>/` (transcript.jsonl, goal.json, tasks.json) for inspection.',
|
|
161
|
+
'- If the model / endpoint can\'t use tools, say so and continue with the best direct answer.',
|
|
138
162
|
'',
|
|
139
|
-
'
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
'
|
|
144
|
-
'When the user explicitly asks to see something — phrasings like "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find files matching Q", "grep for W" — your final assistant message MUST include the actual content the tool returned. Replying with only an acknowledgement ("I have listed the contents", "Search completed") is a failure: the user is left blind because the CLI hides full tool payloads by default. Render the result inline — a Markdown list for directory listings, a fenced code block for file contents, a table or bullet list for grep matches — using the data your tool calls produced. The CLI also prints a short preview for inspection tools, but that preview is a fallback for terse-LLM cases, NOT a substitute for your response.',
|
|
163
|
+
'## Runtime Context',
|
|
164
|
+
`- Workspace root: ${context.workspaceRoot}`,
|
|
165
|
+
`- Launch directory: ${context.launchCwd}`,
|
|
166
|
+
`- BrainRouter sessionKey: ${context.sessionKey}`,
|
|
167
|
+
'- All relative paths resolve from the workspace root.',
|
|
145
168
|
'',
|
|
146
|
-
'##
|
|
147
|
-
|
|
148
|
-
'- Do not say you lack session context when the Runtime Context contains a sessionKey.',
|
|
149
|
-
'- Do not ask for a workspace path unless the current workspace root is wrong or inaccessible.',
|
|
150
|
-
'- Read before editing. Keep edits scoped. Run relevant tests after changes.',
|
|
151
|
-
'- If the model or endpoint cannot use tools, explain that clearly and continue with the best available direct answer.',
|
|
152
|
-
'- For multi-step work, keep the durable plan current with update_plan. Use statuses pending, in_progress, and completed, with at most one in_progress item.',
|
|
153
|
-
'- The CLI persists per-session state under .brainrouter/cli/sessions/<encodedKey>/ (transcript.jsonl, goal.json, tasks.json) for inspection and future orchestration.',
|
|
169
|
+
'## Workspace Instructions',
|
|
170
|
+
instructionSummary,
|
|
154
171
|
'',
|
|
155
172
|
personalityOverlay(context.personality),
|
|
156
|
-
|
|
173
|
+
policyOverlay(context.executionMode, context.reviewPolicy),
|
|
174
|
+
effortOverlay(context.effort),
|
|
175
|
+
clarifyOverlay(context.activeSkill),
|
|
176
|
+
].filter(Boolean).join('\n');
|
|
157
177
|
}
|
|
158
178
|
export function loadWorkspaceInstructionSummary(workspaceRoot) {
|
|
159
179
|
const instructionPath = ['AGENT.md', 'AGENTS.md']
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single source of truth for "is this shell command destructive enough that we
|
|
3
|
+
* must confirm even in /mode fast?"
|
|
4
|
+
*
|
|
5
|
+
* Used by:
|
|
6
|
+
* - agent.ts `run_command`: in `executionMode === 'fast'` we skip the
|
|
7
|
+
* `askYesNo` prompt for everyday commands, but route through askYesNo
|
|
8
|
+
* anyway when this returns true.
|
|
9
|
+
* - tests: invariant that fast mode ≠ unconditional auto-approve.
|
|
10
|
+
*
|
|
11
|
+
* Heuristic, not a sandbox. The real blast-radius limiter is
|
|
12
|
+
* `BRAINROUTER_SANDBOX=on`. This list exists so that a typo
|
|
13
|
+
* (`rm -rf /` instead of `rm -rf ./build`) doesn't get auto-approved
|
|
14
|
+
* because the user happened to be in fast mode.
|
|
15
|
+
*
|
|
16
|
+
* Patterns are conservative on purpose: false-positives cost one extra y/N
|
|
17
|
+
* prompt; false-negatives cost a wiped disk. Add a pattern when you spot one
|
|
18
|
+
* — do not remove existing entries without a replacement.
|
|
19
|
+
*/
|
|
20
|
+
/**
|
|
21
|
+
* Returns true when the command matches any pattern that fast mode should
|
|
22
|
+
* still gate through `askYesNo`. The check is a single-pass regex sweep
|
|
23
|
+
* against the literal command string — no shell parsing, no env expansion.
|
|
24
|
+
*
|
|
25
|
+
* The trailing wildcard semantics matter: `rm -rf foo` matches, `rm-rf` does
|
|
26
|
+
* not (word boundary), `rmdir` does not (different keyword). When in doubt,
|
|
27
|
+
* lean toward returning true: the cost of an extra y/N is much smaller than
|
|
28
|
+
* the cost of accidentally letting a destructive command through.
|
|
29
|
+
*/
|
|
30
|
+
export declare function isDangerousCommand(command: string): boolean;
|
|
31
|
+
export type RunCommandApproval = 'auto-approve' | 'ask' | 'deny-silent';
|
|
32
|
+
/**
|
|
33
|
+
* Pure decision for "what should happen when the agent calls `run_command`?"
|
|
34
|
+
* Split out of `agent.ts` so the policy is unit-testable without TTY mocking.
|
|
35
|
+
*
|
|
36
|
+
* - Silent children cannot answer a y/N prompt. We auto-approve only when
|
|
37
|
+
* the parent has opted in via `executionMode === 'fast'` AND the command
|
|
38
|
+
* is not in the dangerous set. Dangerous commands in silent children are
|
|
39
|
+
* always denied — there is no human to confirm the blast radius.
|
|
40
|
+
* - Interactive parents in `fast` mode skip the prompt for safe commands
|
|
41
|
+
* and still gate dangerous ones through `askYesNo`. In `planning` mode
|
|
42
|
+
* every command routes through `askYesNo`.
|
|
43
|
+
*
|
|
44
|
+
* The `executionMode === 'fast'` check is the single source of truth for
|
|
45
|
+
* "yolo-ish" behavior — the legacy `autoApproveShell` flag is migrated into
|
|
46
|
+
* `executionMode === 'fast'` on first read of `preferencesStore` so new
|
|
47
|
+
* callers do not need to consult both.
|
|
48
|
+
*/
|
|
49
|
+
export declare function resolveRunCommandApproval(prefs: {
|
|
50
|
+
executionMode: 'planning' | 'fast';
|
|
51
|
+
}, command: string, opts: {
|
|
52
|
+
silent: boolean;
|
|
53
|
+
}): RunCommandApproval;
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single source of truth for "is this shell command destructive enough that we
|
|
3
|
+
* must confirm even in /mode fast?"
|
|
4
|
+
*
|
|
5
|
+
* Used by:
|
|
6
|
+
* - agent.ts `run_command`: in `executionMode === 'fast'` we skip the
|
|
7
|
+
* `askYesNo` prompt for everyday commands, but route through askYesNo
|
|
8
|
+
* anyway when this returns true.
|
|
9
|
+
* - tests: invariant that fast mode ≠ unconditional auto-approve.
|
|
10
|
+
*
|
|
11
|
+
* Heuristic, not a sandbox. The real blast-radius limiter is
|
|
12
|
+
* `BRAINROUTER_SANDBOX=on`. This list exists so that a typo
|
|
13
|
+
* (`rm -rf /` instead of `rm -rf ./build`) doesn't get auto-approved
|
|
14
|
+
* because the user happened to be in fast mode.
|
|
15
|
+
*
|
|
16
|
+
* Patterns are conservative on purpose: false-positives cost one extra y/N
|
|
17
|
+
* prompt; false-negatives cost a wiped disk. Add a pattern when you spot one
|
|
18
|
+
* — do not remove existing entries without a replacement.
|
|
19
|
+
*/
|
|
20
|
+
const DANGEROUS_PATTERNS = [
|
|
21
|
+
// Recursive / forced deletions
|
|
22
|
+
/\brm\s+(?:-[a-zA-Z]*[rRfF][a-zA-Z]*|--recursive\b|--force\b)/,
|
|
23
|
+
// Anything piped/awk'd into a shell — too easy to hide an `rm` inside.
|
|
24
|
+
/\|\s*(?:sh|bash|zsh|fish)\b/,
|
|
25
|
+
// Disk imaging / zeroing
|
|
26
|
+
/\bdd\s+(?:if|of|bs|count)=/,
|
|
27
|
+
/\bmkfs(?:\.[a-z0-9]+)?\b/,
|
|
28
|
+
/\bfdisk\b/,
|
|
29
|
+
/\bshred\b/,
|
|
30
|
+
// Wide-open permission flips
|
|
31
|
+
/\bchmod\s+(?:-R\s+)?(?:[0-7]*[7]{2,3}|a\+w)\b/,
|
|
32
|
+
/\bchown\s+-R\b/,
|
|
33
|
+
// Privilege escalation
|
|
34
|
+
/\bsudo\b/,
|
|
35
|
+
/\bsu\s+-/,
|
|
36
|
+
// Forced or destructive git operations
|
|
37
|
+
/\bgit\s+push\s+(?:-f|--force)/,
|
|
38
|
+
/\bgit\s+reset\s+--hard/,
|
|
39
|
+
/\bgit\s+clean\s+-[a-zA-Z]*[fF]/,
|
|
40
|
+
/\bgit\s+checkout\s+--\s/,
|
|
41
|
+
/\bgit\s+branch\s+-D\b/,
|
|
42
|
+
// Package-manager mutators that touch the global tree or remove deps
|
|
43
|
+
/\bnpm\s+(?:uninstall|unpublish)\b/,
|
|
44
|
+
/\b(?:yarn|pnpm)\s+remove\b/,
|
|
45
|
+
// Process / system control
|
|
46
|
+
/\bkillall\b/,
|
|
47
|
+
/\bkill\s+-9\b/,
|
|
48
|
+
/\b(?:shutdown|reboot|halt|poweroff)\b/,
|
|
49
|
+
// Outbound exec-from-network — the classic curl|sh exfil/exec pattern
|
|
50
|
+
/\b(?:curl|wget|fetch)\b[^|]*\|\s*(?:sh|bash|zsh)\b/,
|
|
51
|
+
// Database wipes
|
|
52
|
+
/\bDROP\s+(?:DATABASE|TABLE|SCHEMA)\b/i,
|
|
53
|
+
/\bTRUNCATE\s+TABLE\b/i,
|
|
54
|
+
// Docker / k8s wipes
|
|
55
|
+
/\bdocker\s+system\s+prune\b/,
|
|
56
|
+
/\bdocker\s+(?:rm|rmi)\s+-f/,
|
|
57
|
+
/\bkubectl\s+delete\b/,
|
|
58
|
+
];
|
|
59
|
+
/**
|
|
60
|
+
* Returns true when the command matches any pattern that fast mode should
|
|
61
|
+
* still gate through `askYesNo`. The check is a single-pass regex sweep
|
|
62
|
+
* against the literal command string — no shell parsing, no env expansion.
|
|
63
|
+
*
|
|
64
|
+
* The trailing wildcard semantics matter: `rm -rf foo` matches, `rm-rf` does
|
|
65
|
+
* not (word boundary), `rmdir` does not (different keyword). When in doubt,
|
|
66
|
+
* lean toward returning true: the cost of an extra y/N is much smaller than
|
|
67
|
+
* the cost of accidentally letting a destructive command through.
|
|
68
|
+
*/
|
|
69
|
+
export function isDangerousCommand(command) {
|
|
70
|
+
if (!command)
|
|
71
|
+
return false;
|
|
72
|
+
const normalized = command.trim();
|
|
73
|
+
if (!normalized)
|
|
74
|
+
return false;
|
|
75
|
+
return DANGEROUS_PATTERNS.some((pattern) => pattern.test(normalized));
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Pure decision for "what should happen when the agent calls `run_command`?"
|
|
79
|
+
* Split out of `agent.ts` so the policy is unit-testable without TTY mocking.
|
|
80
|
+
*
|
|
81
|
+
* - Silent children cannot answer a y/N prompt. We auto-approve only when
|
|
82
|
+
* the parent has opted in via `executionMode === 'fast'` AND the command
|
|
83
|
+
* is not in the dangerous set. Dangerous commands in silent children are
|
|
84
|
+
* always denied — there is no human to confirm the blast radius.
|
|
85
|
+
* - Interactive parents in `fast` mode skip the prompt for safe commands
|
|
86
|
+
* and still gate dangerous ones through `askYesNo`. In `planning` mode
|
|
87
|
+
* every command routes through `askYesNo`.
|
|
88
|
+
*
|
|
89
|
+
* The `executionMode === 'fast'` check is the single source of truth for
|
|
90
|
+
* "yolo-ish" behavior — the legacy `autoApproveShell` flag is migrated into
|
|
91
|
+
* `executionMode === 'fast'` on first read of `preferencesStore` so new
|
|
92
|
+
* callers do not need to consult both.
|
|
93
|
+
*/
|
|
94
|
+
export function resolveRunCommandApproval(prefs, command, opts) {
|
|
95
|
+
const fastMode = prefs.executionMode === 'fast';
|
|
96
|
+
const dangerous = isDangerousCommand(command);
|
|
97
|
+
if (opts.silent) {
|
|
98
|
+
if (dangerous)
|
|
99
|
+
return 'deny-silent';
|
|
100
|
+
return fastMode ? 'auto-approve' : 'deny-silent';
|
|
101
|
+
}
|
|
102
|
+
if (fastMode && !dangerous)
|
|
103
|
+
return 'auto-approve';
|
|
104
|
+
return 'ask';
|
|
105
|
+
}
|
|
@@ -10,10 +10,31 @@ export declare class McpClientWrapper {
|
|
|
10
10
|
* blowing up, which the agent's existing try/catch wrappers already handle.
|
|
11
11
|
*/
|
|
12
12
|
private connected;
|
|
13
|
+
/**
|
|
14
|
+
* 10a: cached identity. Set once by `detectMcpIdentity` after the first
|
|
15
|
+
* successful `listTools()` (or by `connect` if the config + URL gave us
|
|
16
|
+
* a clear signal). The value drives status surfaces and the brain-offline
|
|
17
|
+
* prompt swap — distinguishes "our brain went down" from "a random
|
|
18
|
+
* third-party MCP went down" once item 11's multi-MCP support lands.
|
|
19
|
+
*/
|
|
20
|
+
private identity;
|
|
21
|
+
private serverName?;
|
|
13
22
|
constructor();
|
|
14
23
|
/** Whether this wrapper has an active MCP transport. */
|
|
15
24
|
isConnected(): boolean;
|
|
16
|
-
|
|
25
|
+
/** 10a: who is this MCP? Set by `detectMcpIdentity`; 'unknown' before first list. */
|
|
26
|
+
getIdentity(): 'brainrouter' | 'third-party' | 'unknown';
|
|
27
|
+
/** 10a: profile name passed at connect (`brainrouter` / `local-http` / etc.). */
|
|
28
|
+
getServerName(): string | undefined;
|
|
29
|
+
/**
|
|
30
|
+
* 10a: connect with an optional `name` so the wrapper can render identity
|
|
31
|
+
* tags ("BrainRouter MCP offline" vs "third-party MCP offline") without
|
|
32
|
+
* the caller threading it through every error path. The pre-10a single-
|
|
33
|
+
* arg form remains supported — callers that don't pass a name fall back
|
|
34
|
+
* to URL-pattern detection.
|
|
35
|
+
*/
|
|
36
|
+
connect(serverConfig: ServerConfig, llmConfig?: LLMConfig, name?: string): Promise<void>;
|
|
37
|
+
private _connect;
|
|
17
38
|
listTools(): Promise<{
|
|
18
39
|
[x: string]: unknown;
|
|
19
40
|
tools: {
|
|
@@ -154,3 +175,19 @@ export declare class McpClientWrapper {
|
|
|
154
175
|
}>;
|
|
155
176
|
close(): Promise<void>;
|
|
156
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* 10a: figure out who an MCP profile belongs to from config metadata + name
|
|
180
|
+
* + URL alone, before any network call. Explicit `identity` wins; otherwise
|
|
181
|
+
* we check name prefix and URL host. Returns 'unknown' when nothing matches
|
|
182
|
+
* — the caller (currently `listTools`) falls back to tool-signature
|
|
183
|
+
* detection after the first successful enumeration.
|
|
184
|
+
*
|
|
185
|
+
* Detection cases:
|
|
186
|
+
* - explicit `identity: 'brainrouter'` or `identity: 'third-party'` → that.
|
|
187
|
+
* - profile name (case-insensitive) starts with `brainrouter` → brainrouter.
|
|
188
|
+
* - http URL hostname matches `*.brainrouter.cloud` / `*.brainrouter.dev`
|
|
189
|
+
* / `*.brainrouter.io` / `*.kinqs.brainrouter.*` → brainrouter.
|
|
190
|
+
* - stdio command basename matches `brainrouter` / `brainrouter-mcp` → brainrouter.
|
|
191
|
+
* - otherwise → unknown (let the tool-signature fallback decide).
|
|
192
|
+
*/
|
|
193
|
+
export declare function resolveIdentityFromConfig(serverConfig: ServerConfig, name?: string): 'brainrouter' | 'third-party' | 'unknown';
|