muonroi-cli 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +122 -122
- package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
- package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
- package/dist/src/agent-harness/mock-model.d.ts +11 -0
- package/dist/src/agent-harness/mock-model.js +21 -0
- package/dist/src/cli/cost-forensics.js +12 -12
- package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
- package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
- package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
- package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
- package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
- package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
- package/dist/src/council/clarifier.js +9 -1
- package/dist/src/council/debate.js +5 -1
- package/dist/src/council/decisions-lock.js +3 -3
- package/dist/src/council/index.js +12 -5
- package/dist/src/council/leader.d.ts +0 -17
- package/dist/src/council/leader.js +22 -15
- package/dist/src/council/planner.js +1 -1
- package/dist/src/council/prompts.js +63 -57
- package/dist/src/council/types.d.ts +7 -0
- package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
- package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
- package/dist/src/ee/artifact-cache.d.ts +56 -0
- package/dist/src/ee/artifact-cache.js +155 -0
- package/dist/src/ee/artifact-cache.test.d.ts +1 -0
- package/dist/src/ee/artifact-cache.test.js +69 -0
- package/dist/src/ee/auth.d.ts +9 -0
- package/dist/src/ee/auth.js +19 -0
- package/dist/src/ee/ee-onboarding.d.ts +5 -0
- package/dist/src/ee/ee-onboarding.js +76 -0
- package/dist/src/ee/search.js +7 -5
- package/dist/src/ee/search.test.d.ts +1 -0
- package/dist/src/ee/search.test.js +23 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/headless/output.js +6 -4
- package/dist/src/headless/output.test.js +4 -3
- package/dist/src/index.js +20 -1
- package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
- package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
- package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
- package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
- package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
- package/dist/src/mcp/auto-setup.js +56 -2
- package/dist/src/mcp/client-pool.d.ts +46 -0
- package/dist/src/mcp/client-pool.js +212 -0
- package/dist/src/mcp/oauth-callback.js +2 -2
- package/dist/src/mcp/parse-headers.test.js +14 -14
- package/dist/src/mcp/runtime.d.ts +28 -0
- package/dist/src/mcp/runtime.js +117 -51
- package/dist/src/mcp/self-verify-runner.d.ts +14 -0
- package/dist/src/mcp/self-verify-runner.js +38 -0
- package/dist/src/mcp/setup-guide-text.d.ts +9 -0
- package/dist/src/mcp/setup-guide-text.js +84 -0
- package/dist/src/mcp/smart-filter.js +49 -0
- package/dist/src/mcp/smoke.test.js +43 -43
- package/dist/src/mcp/tools-server.d.ts +7 -0
- package/dist/src/mcp/tools-server.js +19 -22
- package/dist/src/models/catalog.json +349 -349
- package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
- package/dist/src/ops/doctor.d.ts +3 -2
- package/dist/src/ops/doctor.js +47 -11
- package/dist/src/ops/doctor.test.js +4 -3
- package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
- package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
- package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
- package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
- package/dist/src/orchestrator/batch-turn-runner.js +7 -11
- package/dist/src/orchestrator/compaction.d.ts +2 -0
- package/dist/src/orchestrator/compaction.js +14 -1
- package/dist/src/orchestrator/compaction.test.js +25 -1
- package/dist/src/orchestrator/message-processor.js +72 -32
- package/dist/src/orchestrator/orchestrator.js +26 -0
- package/dist/src/orchestrator/prompts.d.ts +51 -0
- package/dist/src/orchestrator/prompts.js +257 -134
- package/dist/src/orchestrator/scope-ceiling.js +6 -1
- package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
- package/dist/src/orchestrator/scope-reminder.js +16 -0
- package/dist/src/orchestrator/scope-reminder.test.js +22 -1
- package/dist/src/orchestrator/stream-runner.js +23 -15
- package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
- package/dist/src/orchestrator/subagent-compactor.js +30 -8
- package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
- package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
- package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
- package/dist/src/pil/__tests__/config.test.js +1 -17
- package/dist/src/pil/__tests__/discovery.test.js +144 -11
- package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
- package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
- package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
- package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
- package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
- package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
- package/dist/src/pil/agent-operating-contract.d.ts +1 -1
- package/dist/src/pil/agent-operating-contract.js +2 -0
- package/dist/src/pil/agent-operating-contract.test.js +7 -2
- package/dist/src/pil/cheap-model-playbook.js +35 -35
- package/dist/src/pil/cheap-model-workbooks.js +16 -13
- package/dist/src/pil/clarity-gate.d.ts +21 -19
- package/dist/src/pil/clarity-gate.js +26 -153
- package/dist/src/pil/config.d.ts +9 -1
- package/dist/src/pil/config.js +15 -4
- package/dist/src/pil/discovery.js +211 -136
- package/dist/src/pil/layer1-intent.d.ts +12 -0
- package/dist/src/pil/layer1-intent.js +283 -38
- package/dist/src/pil/layer1-intent.test.js +210 -4
- package/dist/src/pil/layer16-clarity.d.ts +25 -11
- package/dist/src/pil/layer16-clarity.js +19 -306
- package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
- package/dist/src/pil/layer3-ee-injection.js +96 -4
- package/dist/src/pil/layer4-gsd.js +18 -6
- package/dist/src/pil/layer6-output.d.ts +2 -0
- package/dist/src/pil/layer6-output.js +151 -25
- package/dist/src/pil/llm-classify.d.ts +26 -0
- package/dist/src/pil/llm-classify.js +34 -5
- package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
- package/dist/src/pil/native-capabilities-workbook.js +82 -76
- package/dist/src/pil/pipeline.js +15 -9
- package/dist/src/pil/schema.d.ts +8 -0
- package/dist/src/pil/schema.js +12 -1
- package/dist/src/pil/task-tier-map.js +4 -0
- package/dist/src/pil/types.d.ts +11 -1
- package/dist/src/product-loop/done-gate.js +3 -3
- package/dist/src/product-loop/loop-driver.js +18 -18
- package/dist/src/product-loop/progress-snapshot.js +4 -4
- package/dist/src/providers/auth/gemini-oauth.js +6 -15
- package/dist/src/providers/auth/grok-oauth.js +6 -15
- package/dist/src/providers/auth/openai-oauth.js +6 -15
- package/dist/src/providers/mcp-vision-bridge.js +48 -48
- package/dist/src/reporter/index.js +1 -1
- package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
- package/dist/src/scaffold/bb-quality-gate.js +5 -5
- package/dist/src/scaffold/continuation-prompt.js +60 -60
- package/dist/src/scaffold/init-new.js +453 -453
- package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
- package/dist/src/self-qa/agentic-loop.js +24 -19
- package/dist/src/self-qa/spec-emitter.js +26 -23
- package/dist/src/storage/__tests__/migrations.test.js +2 -2
- package/dist/src/storage/interaction-log.js +5 -5
- package/dist/src/storage/migrations.js +122 -122
- package/dist/src/storage/sessions.js +42 -42
- package/dist/src/storage/transcript.js +91 -84
- package/dist/src/storage/usage.js +14 -14
- package/dist/src/storage/workspaces.js +12 -12
- package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
- package/dist/src/tools/__tests__/native-tools.test.js +53 -0
- package/dist/src/tools/git-safety.d.ts +61 -0
- package/dist/src/tools/git-safety.js +141 -0
- package/dist/src/tools/git-safety.test.d.ts +1 -0
- package/dist/src/tools/git-safety.test.js +111 -0
- package/dist/src/tools/native-tools.d.ts +31 -0
- package/dist/src/tools/native-tools.js +273 -0
- package/dist/src/tools/registry-ee-query.test.js +18 -1
- package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
- package/dist/src/tools/registry-git-safety.test.js +92 -0
- package/dist/src/tools/registry.js +52 -6
- package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
- package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
- package/dist/src/ui/app.js +0 -0
- package/dist/src/ui/components/message-view.js +4 -1
- package/dist/src/ui/components/structured-response-view.js +7 -3
- package/dist/src/ui/components/tool-group.js +7 -1
- package/dist/src/ui/markdown-render.d.ts +41 -0
- package/dist/src/ui/markdown-render.js +223 -0
- package/dist/src/ui/markdown.d.ts +10 -0
- package/dist/src/ui/markdown.js +12 -35
- package/dist/src/ui/slash/council-inspect.js +4 -4
- package/dist/src/ui/slash/export.js +4 -4
- package/dist/src/ui/utils/text.d.ts +8 -0
- package/dist/src/ui/utils/text.js +16 -0
- package/dist/src/ui/utils/text.test.d.ts +1 -0
- package/dist/src/ui/utils/text.test.js +23 -0
- package/dist/src/usage/ledger.js +48 -15
- package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
- package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
- package/dist/src/utils/clipboard-image.js +23 -23
- package/dist/src/utils/open-url.d.ts +56 -0
- package/dist/src/utils/open-url.js +58 -0
- package/dist/src/utils/open-url.test.d.ts +1 -0
- package/dist/src/utils/open-url.test.js +86 -0
- package/dist/src/utils/settings.d.ts +12 -0
- package/dist/src/utils/settings.js +48 -0
- package/dist/src/utils/side-question.js +2 -2
- package/dist/src/utils/skills.js +3 -3
- package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
- package/dist/src/verify/environment.js +2 -1
- package/package.json +1 -1
- package/dist/src/pil/layer16-clarity.test.js +0 -31
- /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
|
@@ -244,6 +244,27 @@ export class Agent {
|
|
|
244
244
|
this.pendingCalls = options.pendingCalls ?? null;
|
|
245
245
|
this.permissionMode = options.permissionMode ?? "safe";
|
|
246
246
|
ensureDefaultMcpServers();
|
|
247
|
+
// Pre-warm the always-on MCP servers in the BACKGROUND so they're pooled
|
|
248
|
+
// before the first user turn. npx stdio servers (filesystem/memory)
|
|
249
|
+
// cold-start >2.5s and would otherwise miss the first turn's build deadline
|
|
250
|
+
// (shown as "MCP unavailable: ... still connecting — available next turn").
|
|
251
|
+
// Empty-message smart-filter keeps only the baseline (drops browser/web
|
|
252
|
+
// categories) so we don't speculatively spawn playwright/tavily. Fire-and-
|
|
253
|
+
// forget; the pool handles errors and the per-turn acquire still connects on
|
|
254
|
+
// demand if this is skipped.
|
|
255
|
+
void (async () => {
|
|
256
|
+
try {
|
|
257
|
+
const [{ warmMcpClients }, { loadMcpServers }, { filterMcpServersByMessage }] = await Promise.all([
|
|
258
|
+
import("../mcp/client-pool.js"),
|
|
259
|
+
import("../utils/settings.js"),
|
|
260
|
+
import("../mcp/smart-filter.js"),
|
|
261
|
+
]);
|
|
262
|
+
warmMcpClients(filterMcpServersByMessage(loadMcpServers(), ""));
|
|
263
|
+
}
|
|
264
|
+
catch (err) {
|
|
265
|
+
console.error(`[orchestrator] MCP pre-warm skipped: ${err?.message}`);
|
|
266
|
+
}
|
|
267
|
+
})();
|
|
247
268
|
if (options.persistSession !== false) {
|
|
248
269
|
this.sessionStore = new SessionStore(this.bash.getCwd());
|
|
249
270
|
this.workspace = this.sessionStore.getWorkspace();
|
|
@@ -469,6 +490,11 @@ export class Agent {
|
|
|
469
490
|
this.bash.cleanup(),
|
|
470
491
|
shutdownWorkspaceLspManager(this.bash.getCwd()),
|
|
471
492
|
extractSession(this.messages, this.bash.getCwd(), "cli-exit", this.getSessionId()),
|
|
493
|
+
// Tear down pooled MCP clients (client-pool.ts). They persist across turns
|
|
494
|
+
// by design (no per-turn cold-spawn), so the only real teardown is here at
|
|
495
|
+
// session end. Stdio children would die with the process anyway, but close
|
|
496
|
+
// them gracefully on a clean exit.
|
|
497
|
+
import("../mcp/client-pool.js").then((m) => m.closeAllMcpClients()),
|
|
472
498
|
]);
|
|
473
499
|
}
|
|
474
500
|
// Tool-loop cap handler — set by the UI (app.tsx) at startup. Invoked from
|
|
@@ -3,6 +3,38 @@ import { type CustomSubagentConfig, type SandboxMode, type SandboxSettings } fro
|
|
|
3
3
|
export declare const MAX_TOOL_ROUNDS: number;
|
|
4
4
|
export declare const VISION_MODEL = "grok-4-1-fast-reasoning";
|
|
5
5
|
export declare const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
|
|
6
|
+
/**
|
|
7
|
+
* Phase 5 Fix — Env-aware ENVIRONMENT block.
|
|
8
|
+
*
|
|
9
|
+
* Replaces the static rendering-only block with a dynamic block that
|
|
10
|
+
* tells the model exactly which OS + shell + cwd it's operating in.
|
|
11
|
+
* Without this the model historically emitted PowerShell cmdlets
|
|
12
|
+
* (Get-ChildItem, Select-Object, $null), cmd.exe syntax (del, if exist),
|
|
13
|
+
* or POSIX tools that aren't installed (hyperfine) — all of which fail
|
|
14
|
+
* silently in the bash tool and waste tokens on retry-cascades.
|
|
15
|
+
*
|
|
16
|
+
* Evidence: sessions f9a4cea1bf44, 9c63a38197f3, d0dc4a1f542a,
|
|
17
|
+
* 77cd2e11c6a5, 1bc27b79223c all logged shell-mismatch errors.
|
|
18
|
+
*
|
|
19
|
+
* The block is recomputed on each system-prompt assembly so settings
|
|
20
|
+
* changes (MUONROI_SHELL override, shell.kind config) are reflected
|
|
21
|
+
* without a CLI restart.
|
|
22
|
+
*/
|
|
23
|
+
/**
|
|
24
|
+
* Deterministically detect the project's stack from manifest/lockfile presence
|
|
25
|
+
* at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
|
|
26
|
+
* model/provider IDs — only ecosystem markers). Returns a compact one-line
|
|
27
|
+
* summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
|
|
28
|
+
* nothing recognizable is present (greenfield / unreadable dir).
|
|
29
|
+
*
|
|
30
|
+
* Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
|
|
31
|
+
* shell, and cwd but never WHICH project it was in — so the model acted
|
|
32
|
+
* context-blind, assumed Python, and asked the user to describe the repo it was
|
|
33
|
+
* already running inside. This gives every model, on every turn, in every mode
|
|
34
|
+
* (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
|
|
35
|
+
* section), a concrete self-model of the codebase it can act on.
|
|
36
|
+
*/
|
|
37
|
+
export declare function detectProjectStack(cwd: string): string;
|
|
6
38
|
export declare function findCustomSubagent(agent: string, subagents?: CustomSubagentConfig[]): CustomSubagentConfig | undefined;
|
|
7
39
|
export declare function formatCustomSubagentsPromptSection(subagents: CustomSubagentConfig[]): string;
|
|
8
40
|
export interface SystemPromptParts {
|
|
@@ -24,6 +56,25 @@ export interface SystemPromptOptions {
|
|
|
24
56
|
*/
|
|
25
57
|
chitchat?: boolean;
|
|
26
58
|
}
|
|
59
|
+
/**
|
|
60
|
+
* Render the LIVE per-turn MCP tool roster as a system-prompt block.
|
|
61
|
+
*
|
|
62
|
+
* The static prompt only states the mcp_<server>__<tool> naming convention; it
|
|
63
|
+
* never names the tools actually connected this turn, and the per-message smart
|
|
64
|
+
* filter can drop whole servers. The model therefore receives connected MCP
|
|
65
|
+
* tools ONLY as raw tool JSON, which it can overlook — live failure
|
|
66
|
+
* (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
|
|
67
|
+
* have a direct call_mcp tool" and drove the muonroi-docs server by hand over
|
|
68
|
+
* bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
|
|
69
|
+
* closes that gap.
|
|
70
|
+
*
|
|
71
|
+
* `toolNames` should be the keys of the FINAL assembled tool set for the turn
|
|
72
|
+
* (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
|
|
73
|
+
* so non-agent / chitchat / no-client-tools turns add nothing. The block is
|
|
74
|
+
* DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
|
|
75
|
+
* prefix.
|
|
76
|
+
*/
|
|
77
|
+
export declare function buildMcpCapabilityBlock(toolNames: readonly string[]): string;
|
|
27
78
|
export declare function buildSystemPromptParts(cwd: string, mode: AgentMode, sandboxMode: SandboxMode, planContext?: string | null, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string, resumeDigest?: string | null, options?: SystemPromptOptions): SystemPromptParts;
|
|
28
79
|
export declare function buildSystemPrompt(cwd: string, mode: AgentMode, sandboxMode: SandboxMode, planContext?: string | null, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string, resumeDigest?: string | null, options?: SystemPromptOptions): string;
|
|
29
80
|
export declare function buildSubagentPrompt(request: TaskRequest, cwd: string, custom: CustomSubagentConfig | null, sandboxMode: SandboxMode, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string): string;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
1
2
|
import { getModelInfo } from "../models/registry.js";
|
|
2
3
|
import { buildContractSection } from "../pil/agent-operating-contract.js";
|
|
3
4
|
import { buildNativeCapabilitiesSection } from "../pil/native-capabilities-workbook.js";
|
|
@@ -38,6 +39,81 @@ export const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
|
|
|
38
39
|
* changes (MUONROI_SHELL override, shell.kind config) are reflected
|
|
39
40
|
* without a CLI restart.
|
|
40
41
|
*/
|
|
42
|
+
/**
|
|
43
|
+
* Deterministically detect the project's stack from manifest/lockfile presence
|
|
44
|
+
* at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
|
|
45
|
+
* model/provider IDs — only ecosystem markers). Returns a compact one-line
|
|
46
|
+
* summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
|
|
47
|
+
* nothing recognizable is present (greenfield / unreadable dir).
|
|
48
|
+
*
|
|
49
|
+
* Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
|
|
50
|
+
* shell, and cwd but never WHICH project it was in — so the model acted
|
|
51
|
+
* context-blind, assumed Python, and asked the user to describe the repo it was
|
|
52
|
+
* already running inside. This gives every model, on every turn, in every mode
|
|
53
|
+
* (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
|
|
54
|
+
* section), a concrete self-model of the codebase it can act on.
|
|
55
|
+
*/
|
|
56
|
+
export function detectProjectStack(cwd) {
|
|
57
|
+
let entries;
|
|
58
|
+
try {
|
|
59
|
+
entries = fs.readdirSync(cwd);
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
// Best-effort enrichment: a missing/unreadable cwd simply omits the stack
|
|
63
|
+
// line (the ENVIRONMENT cwd line already surfaces "<unknown>"). Debug-gated
|
|
64
|
+
// so prompt assembly never corrupts the TUI at startup.
|
|
65
|
+
if (process.env.MUONROI_DEBUG === "1") {
|
|
66
|
+
console.error(`[orchestrator/prompts] detectProjectStack failed for ${cwd}: ${err?.message}`);
|
|
67
|
+
}
|
|
68
|
+
return "";
|
|
69
|
+
}
|
|
70
|
+
const has = (name) => entries.includes(name);
|
|
71
|
+
const hasExt = (ext) => entries.some((e) => e.toLowerCase().endsWith(ext));
|
|
72
|
+
let lang = "";
|
|
73
|
+
if (has("tsconfig.json"))
|
|
74
|
+
lang = "TypeScript";
|
|
75
|
+
else if (has("package.json"))
|
|
76
|
+
lang = "JavaScript/Node";
|
|
77
|
+
else if (has("Cargo.toml"))
|
|
78
|
+
lang = "Rust";
|
|
79
|
+
else if (has("go.mod"))
|
|
80
|
+
lang = "Go";
|
|
81
|
+
else if (has("pyproject.toml") || has("requirements.txt") || has("setup.py"))
|
|
82
|
+
lang = "Python";
|
|
83
|
+
else if (hasExt(".csproj") || hasExt(".sln") || has("Directory.Build.props"))
|
|
84
|
+
lang = ".NET/C#";
|
|
85
|
+
else if (has("pom.xml"))
|
|
86
|
+
lang = "Java (Maven)";
|
|
87
|
+
else if (has("build.gradle") || has("build.gradle.kts"))
|
|
88
|
+
lang = "Java/Kotlin (Gradle)";
|
|
89
|
+
let pkg = "";
|
|
90
|
+
if (has("bun.lockb") || has("bun.lock"))
|
|
91
|
+
pkg = "bun";
|
|
92
|
+
else if (has("pnpm-lock.yaml"))
|
|
93
|
+
pkg = "pnpm";
|
|
94
|
+
else if (has("yarn.lock"))
|
|
95
|
+
pkg = "yarn";
|
|
96
|
+
else if (has("package-lock.json"))
|
|
97
|
+
pkg = "npm";
|
|
98
|
+
let tests = "";
|
|
99
|
+
if (entries.some((e) => /^vitest\.([\w.-]+\.)?config\.(ts|js|mjs|cjs|cts|mts)$/i.test(e)))
|
|
100
|
+
tests = "vitest";
|
|
101
|
+
else if (entries.some((e) => /^jest\.config\./i.test(e)))
|
|
102
|
+
tests = "jest";
|
|
103
|
+
else if (has("pytest.ini") || has("tox.ini"))
|
|
104
|
+
tests = "pytest";
|
|
105
|
+
const vcs = has(".git") ? "git" : "";
|
|
106
|
+
const segs = [];
|
|
107
|
+
if (lang)
|
|
108
|
+
segs.push(lang);
|
|
109
|
+
if (pkg)
|
|
110
|
+
segs.push(`pkg: ${pkg}`);
|
|
111
|
+
if (tests)
|
|
112
|
+
segs.push(`tests: ${tests}`);
|
|
113
|
+
if (vcs)
|
|
114
|
+
segs.push(`vcs: ${vcs}`);
|
|
115
|
+
return segs.join(" · ");
|
|
116
|
+
}
|
|
41
117
|
function buildEnvironmentBlock() {
|
|
42
118
|
const platform = process.platform;
|
|
43
119
|
const osName = platform === "win32" ? "Windows" : platform === "darwin" ? "macOS" : platform === "linux" ? "Linux" : platform;
|
|
@@ -74,11 +150,14 @@ function buildEnvironmentBlock() {
|
|
|
74
150
|
else if (shell.kind === "cmd") {
|
|
75
151
|
shellRules.push("- The bash tool runs cmd.exe. Use cmd.exe syntax: dir, type, copy, del, if exist, for %%.", "- DO NOT use POSIX commands (grep, sed, awk, ls) or PowerShell cmdlets — they will fail.", "- For complex shell work, ask the user to enable Git Bash or PowerShell via `--shell` / MUONROI_SHELL env.");
|
|
76
152
|
}
|
|
153
|
+
const projectStack = cwd === "<unknown>" ? "" : detectProjectStack(cwd);
|
|
77
154
|
return [
|
|
78
155
|
"ENVIRONMENT:",
|
|
79
156
|
`- OS: ${osName} (${platform})`,
|
|
80
157
|
`- Shell available via bash tool: ${shellKindLabel} (kind=${shell.kind})`,
|
|
81
158
|
`- Working directory: ${cwd}`,
|
|
159
|
+
...(projectStack ? [`- Project stack: ${projectStack}`] : []),
|
|
160
|
+
"- You are running INSIDE this repository: read and search it with your own tools instead of asking the user to describe its files, structure, or stack. You can act on what you find here directly.",
|
|
82
161
|
"",
|
|
83
162
|
"Terminal rendering:",
|
|
84
163
|
"- Your text output is rendered in a plain terminal — not a browser, not a rich text editor.",
|
|
@@ -95,138 +174,138 @@ function buildEnvironmentBlock() {
|
|
|
95
174
|
}
|
|
96
175
|
const ENVIRONMENT = buildEnvironmentBlock();
|
|
97
176
|
const MODE_PROMPTS = {
|
|
98
|
-
agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
|
|
99
|
-
|
|
100
|
-
${ENVIRONMENT}
|
|
101
|
-
|
|
102
|
-
TOOLS:
|
|
103
|
-
- read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
|
|
104
|
-
- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
|
|
105
|
-
- lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
|
|
106
|
-
- write_file: Create new files or overwrite existing ones with full content.
|
|
107
|
-
- edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
|
|
108
|
-
- bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
|
|
109
|
-
- process_logs: View recent output from a background process by ID.
|
|
110
|
-
- process_stop: Stop a background process by ID.
|
|
111
|
-
- process_list: List all background processes with status and uptime.
|
|
112
|
-
- wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
|
|
113
|
-
- wallet_history: Show recent x402 payment history from the audit log.
|
|
114
|
-
- fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
|
|
115
|
-
- paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
|
|
116
|
-
- task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
|
|
117
|
-
- delegate: Launch a read-only background agent for longer research while you continue working.
|
|
118
|
-
- delegation_read: Retrieve a completed background delegation result by ID.
|
|
119
|
-
- delegation_list: List running and completed background delegations. Do not poll it repeatedly.
|
|
120
|
-
- schedule_create: Create a recurring or one-time scheduled headless run.
|
|
121
|
-
- schedule_list: List saved schedules and their status.
|
|
122
|
-
- schedule_remove: Remove a saved schedule.
|
|
123
|
-
- schedule_read_log: Read recent log output from a schedule.
|
|
124
|
-
- schedule_daemon_status: Check whether the schedule daemon is running.
|
|
125
|
-
- schedule_daemon_start: Start the schedule daemon in the background.
|
|
126
|
-
- schedule_daemon_stop: Stop the schedule daemon.
|
|
127
|
-
- search_web: Search the web for current information, documentation, APIs, tutorials, etc.
|
|
128
|
-
- search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
|
|
129
|
-
- generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
|
|
130
|
-
- generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
|
|
131
|
-
- computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
|
|
132
|
-
- computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
|
|
133
|
-
- computer_click: Click a desktop element by ref, or coordinates as a fallback.
|
|
134
|
-
- computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
|
|
135
|
-
- computer_type: Type text into a specific desktop element ref.
|
|
136
|
-
- computer_press: Press a key or key chord in the focused host application.
|
|
137
|
-
- computer_scroll: Scroll a desktop element by ref.
|
|
138
|
-
- computer_launch: Launch an application and wait for its window to appear.
|
|
139
|
-
- computer_list_windows: List visible windows and their ids.
|
|
140
|
-
- computer_focus_window: Bring a target window to the front.
|
|
141
|
-
- computer_wait: Wait for time, elements, windows, or text during desktop workflows.
|
|
142
|
-
- computer_get: Read a property from a desktop element ref.
|
|
143
|
-
- MCP tools:
|
|
144
|
-
|
|
145
|
-
WORKFLOW:
|
|
146
|
-
1. Understand the request
|
|
147
|
-
2. Decide whether a sub-agent should handle the first investigation pass
|
|
148
|
-
3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
|
|
149
|
-
4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
|
|
150
|
-
5. Use delegate for read-only work that can run in parallel, then continue productive work
|
|
151
|
-
6. Use edit_file for targeted changes, write_file for new files or full rewrites
|
|
152
|
-
7. Verify changes by reading modified files
|
|
153
|
-
8. Run tests or builds with bash to confirm correctness
|
|
154
|
-
9. Use search_web or search_x when you need up-to-date information
|
|
155
|
-
|
|
156
|
-
DEFAULT DELEGATION POLICY:
|
|
157
|
-
- Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
|
|
158
|
-
- Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
|
|
159
|
-
- Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
|
|
160
|
-
- Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
|
|
161
|
-
- Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
|
|
162
|
-
- Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
|
|
163
|
-
- Use a matching custom sub-agent when the task fits one of the configured specializations.
|
|
164
|
-
- Never use delegate for tasks that should edit files or make shell changes.
|
|
165
|
-
- When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
|
|
166
|
-
- Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
|
|
167
|
-
- Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
|
|
168
|
-
|
|
169
|
-
EXAMPLES:
|
|
170
|
-
- "review this change" -> delegate to explore first
|
|
171
|
-
- "research how auth works" -> delegate to explore first
|
|
172
|
-
- "investigate why this test fails" -> delegate to explore first, then continue with findings
|
|
173
|
-
- "refactor this module" -> delegate a focused part to general when helpful
|
|
174
|
-
- "verify this feature locally" -> use verify
|
|
175
|
-
- "open the host app and click through it" -> use computer
|
|
176
|
-
- "generate a logo" -> use generate_image
|
|
177
|
-
- "animate this still image" -> use generate_video
|
|
178
|
-
- Recurring specialized workflows -> use the matching custom sub-agent via task
|
|
179
|
-
- "every weekday at 9am run this check" -> use schedule_create with a cron expression
|
|
180
|
-
- "run this once automatically" -> use schedule_create with the right timing
|
|
181
|
-
- "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
|
|
182
|
-
|
|
183
|
-
IMPORTANT:
|
|
184
|
-
- Prefer edit_file for surgical changes to existing files — it shows a clean diff.
|
|
185
|
-
- Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
|
|
186
|
-
- Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
|
|
187
|
-
- Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
|
|
188
|
-
- Use read_file instead of cat/head/tail for reading files.
|
|
189
|
-
- When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
|
|
190
|
-
- After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
|
|
191
|
-
|
|
177
|
+
agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
|
|
178
|
+
|
|
179
|
+
${ENVIRONMENT}
|
|
180
|
+
|
|
181
|
+
TOOLS:
|
|
182
|
+
- read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
|
|
183
|
+
- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
|
|
184
|
+
- lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
|
|
185
|
+
- write_file: Create new files or overwrite existing ones with full content.
|
|
186
|
+
- edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
|
|
187
|
+
- bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
|
|
188
|
+
- process_logs: View recent output from a background process by ID.
|
|
189
|
+
- process_stop: Stop a background process by ID.
|
|
190
|
+
- process_list: List all background processes with status and uptime.
|
|
191
|
+
- wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
|
|
192
|
+
- wallet_history: Show recent x402 payment history from the audit log.
|
|
193
|
+
- fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
|
|
194
|
+
- paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
|
|
195
|
+
- task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
|
|
196
|
+
- delegate: Launch a read-only background agent for longer research while you continue working.
|
|
197
|
+
- delegation_read: Retrieve a completed background delegation result by ID.
|
|
198
|
+
- delegation_list: List running and completed background delegations. Do not poll it repeatedly.
|
|
199
|
+
- schedule_create: Create a recurring or one-time scheduled headless run.
|
|
200
|
+
- schedule_list: List saved schedules and their status.
|
|
201
|
+
- schedule_remove: Remove a saved schedule.
|
|
202
|
+
- schedule_read_log: Read recent log output from a schedule.
|
|
203
|
+
- schedule_daemon_status: Check whether the schedule daemon is running.
|
|
204
|
+
- schedule_daemon_start: Start the schedule daemon in the background.
|
|
205
|
+
- schedule_daemon_stop: Stop the schedule daemon.
|
|
206
|
+
- search_web: Search the web for current information, documentation, APIs, tutorials, etc.
|
|
207
|
+
- search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
|
|
208
|
+
- generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
|
|
209
|
+
- generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
|
|
210
|
+
- computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
|
|
211
|
+
- computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
|
|
212
|
+
- computer_click: Click a desktop element by ref, or coordinates as a fallback.
|
|
213
|
+
- computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
|
|
214
|
+
- computer_type: Type text into a specific desktop element ref.
|
|
215
|
+
- computer_press: Press a key or key chord in the focused host application.
|
|
216
|
+
- computer_scroll: Scroll a desktop element by ref.
|
|
217
|
+
- computer_launch: Launch an application and wait for its window to appear.
|
|
218
|
+
- computer_list_windows: List visible windows and their ids.
|
|
219
|
+
- computer_focus_window: Bring a target window to the front.
|
|
220
|
+
- computer_wait: Wait for time, elements, windows, or text during desktop workflows.
|
|
221
|
+
- computer_get: Read a property from a desktop element ref.
|
|
222
|
+
- MCP tools: connected servers appear as first-class tools named mcp_<server>__<tool>. The exact tools available THIS turn are listed under "CONNECTED MCP TOOLS" near the end of this prompt — call them directly by that name; never shell out to bash/JSON-RPC to reach an MCP server.
|
|
223
|
+
|
|
224
|
+
WORKFLOW:
|
|
225
|
+
1. Understand the request
|
|
226
|
+
2. Decide whether a sub-agent should handle the first investigation pass
|
|
227
|
+
3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
|
|
228
|
+
4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
|
|
229
|
+
5. Use delegate for read-only work that can run in parallel, then continue productive work
|
|
230
|
+
6. Use edit_file for targeted changes, write_file for new files or full rewrites
|
|
231
|
+
7. Verify changes by reading modified files
|
|
232
|
+
8. Run tests or builds with bash to confirm correctness
|
|
233
|
+
9. Use search_web or search_x when you need up-to-date information
|
|
234
|
+
|
|
235
|
+
DEFAULT DELEGATION POLICY:
|
|
236
|
+
- Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
|
|
237
|
+
- Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
|
|
238
|
+
- Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
|
|
239
|
+
- Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
|
|
240
|
+
- Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
|
|
241
|
+
- Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
|
|
242
|
+
- Use a matching custom sub-agent when the task fits one of the configured specializations.
|
|
243
|
+
- Never use delegate for tasks that should edit files or make shell changes.
|
|
244
|
+
- When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
|
|
245
|
+
- Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
|
|
246
|
+
- Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
|
|
247
|
+
|
|
248
|
+
EXAMPLES:
|
|
249
|
+
- "review this change" -> delegate to explore first
|
|
250
|
+
- "research how auth works" -> delegate to explore first
|
|
251
|
+
- "investigate why this test fails" -> delegate to explore first, then continue with findings
|
|
252
|
+
- "refactor this module" -> delegate a focused part to general when helpful
|
|
253
|
+
- "verify this feature locally" -> use verify
|
|
254
|
+
- "open the host app and click through it" -> use computer
|
|
255
|
+
- "generate a logo" -> use generate_image
|
|
256
|
+
- "animate this still image" -> use generate_video
|
|
257
|
+
- Recurring specialized workflows -> use the matching custom sub-agent via task
|
|
258
|
+
- "every weekday at 9am run this check" -> use schedule_create with a cron expression
|
|
259
|
+
- "run this once automatically" -> use schedule_create with the right timing
|
|
260
|
+
- "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
|
|
261
|
+
|
|
262
|
+
IMPORTANT:
|
|
263
|
+
- Prefer edit_file for surgical changes to existing files — it shows a clean diff.
|
|
264
|
+
- Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
|
|
265
|
+
- Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
|
|
266
|
+
- Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
|
|
267
|
+
- Use read_file instead of cat/head/tail for reading files.
|
|
268
|
+
- When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
|
|
269
|
+
- After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
|
|
270
|
+
|
|
192
271
|
Be direct. Execute, don't just describe. Show results, not plans.`,
|
|
193
|
-
plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
|
|
194
|
-
|
|
195
|
-
${ENVIRONMENT}
|
|
196
|
-
|
|
197
|
-
TOOLS:
|
|
198
|
-
- read_file: Read file contents for analysis.
|
|
199
|
-
- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
|
|
200
|
-
- lsp: Experimental semantic code intelligence for read-only planning and research.
|
|
201
|
-
- bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
|
|
202
|
-
- task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
|
|
203
|
-
- generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
|
|
204
|
-
|
|
205
|
-
BEHAVIOR:
|
|
206
|
-
- Explore the codebase first using read_file, grep, and bash to understand the current state
|
|
207
|
-
- Prefer lsp for exact symbol navigation when a matching server is available
|
|
208
|
-
- ALWAYS call generate_plan to present your plan — never just describe it in text
|
|
209
|
-
- Include clear, ordered steps with affected file paths
|
|
210
|
-
- Include questions when you need user input on approach, trade-offs, or preferences
|
|
211
|
-
- Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
|
|
212
|
-
- Highlight potential risks, edge cases, and dependencies in the plan summary
|
|
272
|
+
plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
|
|
273
|
+
|
|
274
|
+
${ENVIRONMENT}
|
|
275
|
+
|
|
276
|
+
TOOLS:
|
|
277
|
+
- read_file: Read file contents for analysis.
|
|
278
|
+
- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
|
|
279
|
+
- lsp: Experimental semantic code intelligence for read-only planning and research.
|
|
280
|
+
- bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
|
|
281
|
+
- task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
|
|
282
|
+
- generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
|
|
283
|
+
|
|
284
|
+
BEHAVIOR:
|
|
285
|
+
- Explore the codebase first using read_file, grep, and bash to understand the current state
|
|
286
|
+
- Prefer lsp for exact symbol navigation when a matching server is available
|
|
287
|
+
- ALWAYS call generate_plan to present your plan — never just describe it in text
|
|
288
|
+
- Include clear, ordered steps with affected file paths
|
|
289
|
+
- Include questions when you need user input on approach, trade-offs, or preferences
|
|
290
|
+
- Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
|
|
291
|
+
- Highlight potential risks, edge cases, and dependencies in the plan summary
|
|
213
292
|
- NEVER create, modify, or delete files — only read and analyze`,
|
|
214
|
-
ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
|
|
215
|
-
|
|
216
|
-
${ENVIRONMENT}
|
|
217
|
-
|
|
218
|
-
TOOLS:
|
|
219
|
-
- read_file: Read file contents for context.
|
|
220
|
-
- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
|
|
221
|
-
- lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
|
|
222
|
-
- bash: ONLY for searching (find, ls), git inspection — NEVER modify.
|
|
223
|
-
- task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
|
|
224
|
-
|
|
225
|
-
BEHAVIOR:
|
|
226
|
-
- Answer the user's question directly and thoroughly
|
|
227
|
-
- Use tools to gather context when needed, preferring lsp for exact symbol questions when available
|
|
228
|
-
- Provide code examples when helpful
|
|
229
|
-
- NEVER create, modify, or delete files
|
|
293
|
+
ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
|
|
294
|
+
|
|
295
|
+
${ENVIRONMENT}
|
|
296
|
+
|
|
297
|
+
TOOLS:
|
|
298
|
+
- read_file: Read file contents for context.
|
|
299
|
+
- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
|
|
300
|
+
- lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
|
|
301
|
+
- bash: ONLY for searching (find, ls), git inspection — NEVER modify.
|
|
302
|
+
- task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
|
|
303
|
+
|
|
304
|
+
BEHAVIOR:
|
|
305
|
+
- Answer the user's question directly and thoroughly
|
|
306
|
+
- Use tools to gather context when needed, preferring lsp for exact symbol questions when available
|
|
307
|
+
- Provide code examples when helpful
|
|
308
|
+
- NEVER create, modify, or delete files
|
|
230
309
|
- Focus on explanation, not execution`,
|
|
231
310
|
};
|
|
232
311
|
export function findCustomSubagent(agent, subagents = loadValidSubAgents()) {
|
|
@@ -242,10 +321,10 @@ export function formatCustomSubagentsPromptSection(subagents) {
|
|
|
242
321
|
});
|
|
243
322
|
return `\n\nCUSTOM SUB-AGENTS:\nUser-defined foreground sub-agents from ~/.muonroi-cli/user-settings.json. When one matches the task, call the task tool with agent set to the exact name.\n\n${lines.join("\n\n")}\n`;
|
|
244
323
|
}
|
|
245
|
-
const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
|
|
246
|
-
You MUST invoke tools ONLY via the structured function calling API provided to you.
|
|
247
|
-
NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
|
|
248
|
-
If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
|
|
324
|
+
const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
|
|
325
|
+
You MUST invoke tools ONLY via the structured function calling API provided to you.
|
|
326
|
+
NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
|
|
327
|
+
If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
|
|
249
328
|
Any XML-like tool invocation in your text output will be ignored by the system.\n`;
|
|
250
329
|
/**
|
|
251
330
|
* Strip the TOOLS: listing section from system prompt.
|
|
@@ -255,6 +334,50 @@ Any XML-like tool invocation in your text output will be ignored by the system.\
|
|
|
255
334
|
export function stripToolsSection(text) {
|
|
256
335
|
return text.replace(/\nTOOLS:\n[\s\S]*?\n(?=WORKFLOW:|BEHAVIOR:|IMPORTANT:|DEFAULT DELEGATION|EXAMPLES:|$)/g, "\n");
|
|
257
336
|
}
|
|
337
|
+
/**
|
|
338
|
+
* Render the LIVE per-turn MCP tool roster as a system-prompt block.
|
|
339
|
+
*
|
|
340
|
+
* The static prompt only states the mcp_<server>__<tool> naming convention; it
|
|
341
|
+
* never names the tools actually connected this turn, and the per-message smart
|
|
342
|
+
* filter can drop whole servers. The model therefore receives connected MCP
|
|
343
|
+
* tools ONLY as raw tool JSON, which it can overlook — live failure
|
|
344
|
+
* (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
|
|
345
|
+
* have a direct call_mcp tool" and drove the muonroi-docs server by hand over
|
|
346
|
+
* bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
|
|
347
|
+
* closes that gap.
|
|
348
|
+
*
|
|
349
|
+
* `toolNames` should be the keys of the FINAL assembled tool set for the turn
|
|
350
|
+
* (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
|
|
351
|
+
* so non-agent / chitchat / no-client-tools turns add nothing. The block is
|
|
352
|
+
* DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
|
|
353
|
+
* prefix.
|
|
354
|
+
*/
|
|
355
|
+
export function buildMcpCapabilityBlock(toolNames) {
|
|
356
|
+
const byServer = new Map();
|
|
357
|
+
for (const name of toolNames) {
|
|
358
|
+
if (!name.startsWith("mcp_"))
|
|
359
|
+
continue;
|
|
360
|
+
// mcp_<sanitized-server-id>__<tool>; split on the FIRST "__" (server ids
|
|
361
|
+
// rarely contain "__" — they are sanitized from real ids like "muonroi-docs").
|
|
362
|
+
const m = name.match(/^mcp_(.+?)__(.+)$/);
|
|
363
|
+
if (!m)
|
|
364
|
+
continue;
|
|
365
|
+
const server = m[1];
|
|
366
|
+
const list = byServer.get(server) ?? [];
|
|
367
|
+
list.push(name);
|
|
368
|
+
byServer.set(server, list);
|
|
369
|
+
}
|
|
370
|
+
if (byServer.size === 0)
|
|
371
|
+
return "";
|
|
372
|
+
const lines = [];
|
|
373
|
+
for (const [server, tools] of byServer) {
|
|
374
|
+
lines.push(` • ${server}: ${tools.sort().join(", ")}`);
|
|
375
|
+
}
|
|
376
|
+
return ("\n\nCONNECTED MCP TOOLS (this turn) — these are available to you RIGHT NOW as " +
|
|
377
|
+
"first-class tools. Call them directly by their exact name; do NOT shell out " +
|
|
378
|
+
"to bash or hand-write JSON-RPC to reach an MCP server:\n" +
|
|
379
|
+
lines.join("\n"));
|
|
380
|
+
}
|
|
258
381
|
export function buildSystemPromptParts(cwd, mode, sandboxMode, planContext, subagents, sandboxSettings, providerId, resumeDigest, options) {
|
|
259
382
|
const chitchat = options?.chitchat === true;
|
|
260
383
|
const custom = loadCustomInstructions(cwd);
|
|
@@ -46,7 +46,12 @@ const KNOWN_TASK_TYPES = new Set(Object.keys(CEILING_MATRIX));
|
|
|
46
46
|
* graceful when PIL emits an out-of-band label or null.
|
|
47
47
|
*/
|
|
48
48
|
export function resolveCeiling(taskType, size) {
|
|
49
|
-
|
|
49
|
+
// `build` (greenfield creation, PIL Pass-0) is not a row in the LOCKED matrix.
|
|
50
|
+
// It is the highest-effort task — scaffolding many files — so it borrows the
|
|
51
|
+
// `generate` ceiling (10/18/30) rather than falling back to the tight `general`
|
|
52
|
+
// row (5/10/20), which would force-finalize a greenfield build far too early.
|
|
53
|
+
const normalized = taskType === "build" ? "generate" : taskType;
|
|
54
|
+
const row = normalized && KNOWN_TASK_TYPES.has(normalized) ? normalized : "general";
|
|
50
55
|
return CEILING_MATRIX[row][size];
|
|
51
56
|
}
|
|
52
57
|
/**
|
|
@@ -100,3 +100,15 @@ export declare function attachReminderToMessages<T>(messages: ReadonlyArray<T>,
|
|
|
100
100
|
* Used by prepareStep / sub-agent paths after compaction.
|
|
101
101
|
*/
|
|
102
102
|
export declare function buildCheckpointReminder(iteration: number, hasEECheckpoint: boolean): string;
|
|
103
|
+
/**
|
|
104
|
+
* Pre-compaction "advance warning" gate. Fires when the prompt is approaching
|
|
105
|
+
* (default ≥78% of) the compaction threshold but compaction has NOT yet run this
|
|
106
|
+
* step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
|
|
107
|
+
* older tool results into stubs.
|
|
108
|
+
*
|
|
109
|
+
* `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
|
|
110
|
+
* message chars + envelope chars), NOT the message COUNT. The original B4 wiring
|
|
111
|
+
* compared `stripped.length` (a message count, ~tens) against a char-scaled
|
|
112
|
+
* threshold (~156000), so the warning could never fire — session 2b7a10219499.
|
|
113
|
+
*/
|
|
114
|
+
export declare function shouldPreWarnCompaction(promptChars: number, thresholdChars: number, ratio?: number): boolean;
|
|
@@ -218,4 +218,20 @@ export function buildCheckpointReminder(iteration, hasEECheckpoint) {
|
|
|
218
218
|
return base;
|
|
219
219
|
return base.slice(0, 220);
|
|
220
220
|
}
|
|
221
|
+
/**
|
|
222
|
+
* Pre-compaction "advance warning" gate. Fires when the prompt is approaching
|
|
223
|
+
* (default ≥78% of) the compaction threshold but compaction has NOT yet run this
|
|
224
|
+
* step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
|
|
225
|
+
* older tool results into stubs.
|
|
226
|
+
*
|
|
227
|
+
* `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
|
|
228
|
+
* message chars + envelope chars), NOT the message COUNT. The original B4 wiring
|
|
229
|
+
* compared `stripped.length` (a message count, ~tens) against a char-scaled
|
|
230
|
+
* threshold (~156000), so the warning could never fire — session 2b7a10219499.
|
|
231
|
+
*/
|
|
232
|
+
export function shouldPreWarnCompaction(promptChars, thresholdChars, ratio = 0.78) {
|
|
233
|
+
if (thresholdChars <= 0 || promptChars <= 0)
|
|
234
|
+
return false;
|
|
235
|
+
return promptChars >= Math.floor(thresholdChars * ratio);
|
|
236
|
+
}
|
|
221
237
|
//# sourceMappingURL=scope-reminder.js.map
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
* - Reminder lives in tool_result/system message — never in system prompt
|
|
14
14
|
*/
|
|
15
15
|
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
16
|
-
import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
|
|
16
|
+
import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
|
|
17
17
|
describe("cadenceForSize", () => {
|
|
18
18
|
it("locks 3/5/8 for small/medium/large with hard floor >= 3", () => {
|
|
19
19
|
expect(cadenceForSize("small")).toBe(3);
|
|
@@ -201,4 +201,25 @@ describe("attachReminderToMessages", () => {
|
|
|
201
201
|
expect(out).toEqual(messages);
|
|
202
202
|
});
|
|
203
203
|
});
|
|
204
|
+
describe("shouldPreWarnCompaction (regression: session 2b7a10219499 dead pre-warning)", () => {
|
|
205
|
+
const THRESHOLD = 200_000; // MUONROI_TOP_LEVEL_COMPACT_THRESHOLD_CHARS default
|
|
206
|
+
it("fires when prompt chars reach >=78% of the threshold (approaching compaction)", () => {
|
|
207
|
+
expect(shouldPreWarnCompaction(Math.floor(THRESHOLD * 0.78), THRESHOLD)).toBe(true);
|
|
208
|
+
expect(shouldPreWarnCompaction(190_000, THRESHOLD)).toBe(true);
|
|
209
|
+
});
|
|
210
|
+
it("does NOT fire while comfortably below the threshold", () => {
|
|
211
|
+
expect(shouldPreWarnCompaction(100_000, THRESHOLD)).toBe(false);
|
|
212
|
+
expect(shouldPreWarnCompaction(0, THRESHOLD)).toBe(false);
|
|
213
|
+
});
|
|
214
|
+
it("guards against the original bug: a message COUNT can never trip a char threshold", () => {
|
|
215
|
+
// The dead wiring compared stripped.length (a message count, ~tens) to the
|
|
216
|
+
// char-scaled threshold. With chars it crosses; with a count it never does.
|
|
217
|
+
const messageCount = 60; // plausible long-session message count
|
|
218
|
+
expect(shouldPreWarnCompaction(messageCount, THRESHOLD)).toBe(false);
|
|
219
|
+
expect(shouldPreWarnCompaction(170_000, THRESHOLD)).toBe(true);
|
|
220
|
+
});
|
|
221
|
+
it("is inert for a zero/negative threshold (no compaction configured)", () => {
|
|
222
|
+
expect(shouldPreWarnCompaction(999_999, 0)).toBe(false);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
204
225
|
//# sourceMappingURL=scope-reminder.test.js.map
|